diff --git a/CHANGELOG.md b/CHANGELOG.md index 38093aefd..94b908f16 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed +- Fix provenance of empty unbound inputs ([#968](https://github.com/alpha-unito/streamflow/pull/968)) - Fix bind mount inspection in `SingularityConnector` ([#1058](https://github.com/alpha-unito/streamflow/pull/1058)) - Fix `--wait` flag in `Helm4Connector` for Cobra parser ([#1050](https://github.com/alpha-unito/streamflow/pull/1050)) - Fix shell reuse collision across execution locations ([#1045](https://github.com/alpha-unito/streamflow/pull/1045)) diff --git a/streamflow/cwl/transformer.py b/streamflow/cwl/transformer.py index fa7a9fc37..ce105429d 100644 --- a/streamflow/cwl/transformer.py +++ b/streamflow/cwl/transformer.py @@ -18,7 +18,11 @@ from streamflow.cwl.step import build_token from streamflow.cwl.workflow import CWLWorkflow from streamflow.workflow.token import ListToken, TerminationToken -from streamflow.workflow.transformer import ManyToOneTransformer, OneToOneTransformer +from streamflow.workflow.transformer import ( + ManyToOneTransformer, + OneToManyTransformer, + OneToOneTransformer, +) from streamflow.workflow.utils import get_token_value @@ -37,6 +41,14 @@ async def transform( return {self.get_output_name(): self._transform(*next(iter(inputs.items())))} +class BroadcastTransformer(OneToManyTransformer): + async def transform( + self, inputs: MutableMapping[str, Token] + ) -> MutableMapping[str, Token | MutableSequence[Token]]: + token = list(inputs.values()).pop() + return {name: token.update(token.value) for name in self.output_ports.keys()} + + class CartesianProductSizeTransformer(ManyToOneTransformer): async def transform( self, inputs: MutableMapping[str, Token] diff --git a/streamflow/cwl/translator.py b/streamflow/cwl/translator.py index 01f690ba8..fac5d83fb 100644 --- a/streamflow/cwl/translator.py +++ b/streamflow/cwl/translator.py @@ -75,6 +75,7 @@ ) from streamflow.cwl.transformer import ( AllNonNullTransformer, + BroadcastTransformer, CartesianProductSizeTransformer, CloneTransformer, CWLTokenTransformer, @@ -1910,10 +1911,30 @@ def _inject_inputs(self, workflow: Workflow) -> None: value=self.cwl_inputs[port_name], ) # Search empty unbound input ports + empty_ports = [] for input_port in workflow.ports.values(): if input_port.empty() and not input_port.get_input_steps(): - input_port.put(Token(value=None, recoverable=True)) - input_port.put(TerminationToken()) + empty_ports.append(input_port) + if len(empty_ports) > 0: + if len(empty_ports) > 1: + step = workflow.create_step( + cls=BroadcastTransformer, + name="/__empty_unbound_inputs__-bcast", + ) + upstream_port = workflow.create_port() + step.add_input_port("__upstream__", upstream_port) + for i, downstream_port in enumerate(empty_ports): + step.add_output_port(f"__downstream_{i}__", downstream_port) + else: + upstream_port = empty_ports[0] + self._inject_input( + workflow=workflow, + global_name="/__empty_unbound_inputs__", + port_name="__empty_unbound_inputs__", + port=upstream_port, + output_directory=output_directory, + value=None, + ) def _recursive_translate( self, diff --git a/streamflow/workflow/step.py b/streamflow/workflow/step.py index c24b29e98..44d0b66b9 100644 --- a/streamflow/workflow/step.py +++ b/streamflow/workflow/step.py @@ -149,10 +149,15 @@ async def _persist_token( ) -> Token: if token.persistent_id: raise WorkflowDefinitionException( - f"Token already has an id: {token.persistent_id}" + f"Step {self.name} failed to save token: Token already has a persistent id: {token.persistent_id}" ) await token.save(self.workflow.context.database, port_id=port.persistent_id) if input_token_ids: + if any(id_ is None for id_ in input_token_ids): + raise WorkflowExecutionException( + f"Step {self.name} cannot establish provenance: " + f"One or more input tokens have not been persisted: {input_token_ids}" + ) await self.workflow.context.database.add_provenance( inputs=input_token_ids, token=token.persistent_id ) diff --git a/streamflow/workflow/transformer.py b/streamflow/workflow/transformer.py index 8fbfaf988..72936fb52 100644 --- a/streamflow/workflow/transformer.py +++ b/streamflow/workflow/transformer.py @@ -11,7 +11,7 @@ def add_output_port(self, name: str, port: Port) -> None: super().add_output_port(name, port) else: raise WorkflowDefinitionException( - f"{self.name} step must contain a single output port." + f"Step {self.name} must contain a single output port." ) def get_output_name(self) -> str: @@ -20,7 +20,24 @@ def get_output_name(self) -> str: async def run(self) -> None: if len(self.output_ports) != 1: raise WorkflowDefinitionException( - f"{self.name} step must contain a single output port." + f"Step {self.name} must contain a single output port." + ) + await super().run() + + +class OneToManyTransformer(Transformer, ABC): + def add_input_port(self, name: str, port: Port) -> None: + if not self.input_ports: + super().add_input_port(name, port) + else: + raise WorkflowDefinitionException( + f"Step {self.name} must contain a single input port." + ) + + async def run(self) -> None: + if len(self.input_ports) != 1: + raise WorkflowDefinitionException( + f"Step {self.name} must contain a single input port." ) await super().run() @@ -31,16 +48,16 @@ def add_input_port(self, name: str, port: Port) -> None: super().add_input_port(name, port) else: raise WorkflowDefinitionException( - f"{self.name} step must contain a single input port." + f"Step {self.name} must contain a single input port." ) async def run(self) -> None: if len(self.input_ports) != 1: raise WorkflowDefinitionException( - f"{self.name} step must contain a single input port." + f"Step {self.name} must contain a single input port." ) if len(self.output_ports) != 1: raise WorkflowDefinitionException( - f"{self.name} step must contain a single output port." + f"Step {self.name} must contain a single output port." ) await super().run()