From 6d993705efa2235272c00c5d8ab2e0dfb89fa735 Mon Sep 17 00:00:00 2001 From: Fabio Luporini Date: Fri, 13 Feb 2026 10:03:08 +0000 Subject: [PATCH 1/4] compiler: Refactor kwargs forwarding throughout lowering --- devito/core/cpu.py | 25 ++++++------------------- devito/core/gpu.py | 29 +++++++---------------------- devito/operator/operator.py | 4 ++-- devito/passes/iet/instrument.py | 2 +- devito/passes/iet/langbase.py | 22 ++++++++++++++++------ devito/passes/iet/parpragma.py | 8 ++++---- 6 files changed, 36 insertions(+), 54 deletions(-) diff --git a/devito/core/cpu.py b/devito/core/cpu.py index 5b5aa8448e..a1c70ccdfc 100644 --- a/devito/core/cpu.py +++ b/devito/core/cpu.py @@ -140,18 +140,15 @@ class Cpu64NoopOperator(Cpu64OperatorMixin, CoreOperator): @timed_pass(name='specializing.IET') def _specialize_iet(cls, graph, **kwargs): options = kwargs['options'] - platform = kwargs['platform'] - compiler = kwargs['compiler'] - sregistry = kwargs['sregistry'] # Distributed-memory parallelism mpiize(graph, **kwargs) # Shared-memory parallelism if options['openmp']: - parizer = cls._Target.Parizer(sregistry, options, platform, compiler) + parizer = cls._Target.Parizer(**kwargs) parizer.make_parallel(graph) - parizer.initialize(graph, options=options) + parizer.initialize(graph) # Symbol definitions cls._Target.DataManager(**kwargs).process(graph) @@ -205,11 +202,6 @@ def _specialize_clusters(cls, clusters, **kwargs): @classmethod @timed_pass(name='specializing.IET') def _specialize_iet(cls, graph, **kwargs): - options = kwargs['options'] - platform = kwargs['platform'] - compiler = kwargs['compiler'] - sregistry = kwargs['sregistry'] - # Flush denormal numbers avoid_denormals(graph, **kwargs) @@ -220,10 +212,10 @@ def _specialize_iet(cls, graph, **kwargs): relax_incr_dimensions(graph, **kwargs) # Parallelism - parizer = cls._Target.Parizer(sregistry, options, platform, compiler) + parizer = cls._Target.Parizer(**kwargs) parizer.make_simd(graph) parizer.make_parallel(graph) - parizer.initialize(graph, options=options) + parizer.initialize(graph) # Misc optimizations hoist_prodders(graph) @@ -300,12 +292,7 @@ def callback(f, *args): @classmethod def _make_iet_passes_mapper(cls, **kwargs): - options = kwargs['options'] - platform = kwargs['platform'] - compiler = kwargs['compiler'] - sregistry = kwargs['sregistry'] - - parizer = cls._Target.Parizer(sregistry, options, platform, compiler) + parizer = cls._Target.Parizer(**kwargs) return { 'denormals': partial(avoid_denormals, **kwargs), @@ -316,7 +303,7 @@ def _make_iet_passes_mapper(cls, **kwargs): 'linearize': partial(linearize, **kwargs), 'simd': partial(parizer.make_simd), 'prodders': hoist_prodders, - 'init': partial(parizer.initialize, options=options) + 'init': partial(parizer.initialize) } _known_passes = ( diff --git a/devito/core/gpu.py b/devito/core/gpu.py index 7f88d9aa0e..ca6c288c7d 100644 --- a/devito/core/gpu.py +++ b/devito/core/gpu.py @@ -180,18 +180,13 @@ class DeviceNoopOperator(DeviceOperatorMixin, CoreOperator): @classmethod @timed_pass(name='specializing.IET') def _specialize_iet(cls, graph, **kwargs): - options = kwargs['options'] - platform = kwargs['platform'] - compiler = kwargs['compiler'] - sregistry = kwargs['sregistry'] - # Distributed-memory parallelism mpiize(graph, **kwargs) # GPU parallelism - parizer = cls._Target.Parizer(sregistry, options, platform, compiler) + parizer = cls._Target.Parizer(**kwargs) parizer.make_parallel(graph) - parizer.initialize(graph, options=options) + parizer.initialize(graph) # Symbol definitions cls._Target.DataManager(**kwargs).process(graph) @@ -248,11 +243,6 @@ def _specialize_clusters(cls, clusters, **kwargs): @classmethod @timed_pass(name='specializing.IET') def _specialize_iet(cls, graph, **kwargs): - options = kwargs['options'] - platform = kwargs['platform'] - compiler = kwargs['compiler'] - sregistry = kwargs['sregistry'] - # Distributed-memory parallelism mpiize(graph, **kwargs) @@ -260,9 +250,9 @@ def _specialize_iet(cls, graph, **kwargs): relax_incr_dimensions(graph, **kwargs) # GPU parallelism - parizer = cls._Target.Parizer(sregistry, options, platform, compiler) + parizer = cls._Target.Parizer(**kwargs) parizer.make_parallel(graph) - parizer.initialize(graph, options=options) + parizer.initialize(graph) # Misc optimizations hoist_prodders(graph) @@ -325,22 +315,17 @@ def _make_clusters_passes_mapper(cls, **kwargs): @classmethod def _make_iet_passes_mapper(cls, **kwargs): - options = kwargs['options'] - platform = kwargs['platform'] - compiler = kwargs['compiler'] - sregistry = kwargs['sregistry'] - - parizer = cls._Target.Parizer(sregistry, options, platform, compiler) + parizer = cls._Target.Parizer(**kwargs) orchestrator = cls._Target.Orchestrator(**kwargs) return { 'parallel': parizer.make_parallel, 'orchestrate': partial(orchestrator.process), - 'pthreadify': partial(pthreadify, sregistry=sregistry), + 'pthreadify': partial(pthreadify, **kwargs), 'mpi': partial(mpiize, **kwargs), 'linearize': partial(linearize, **kwargs), 'prodders': partial(hoist_prodders), - 'init': partial(parizer.initialize, options=options) + 'init': partial(parizer.initialize) } _known_passes = ( diff --git a/devito/operator/operator.py b/devito/operator/operator.py index 779106d75a..5ee6e5db35 100644 --- a/devito/operator/operator.py +++ b/devito/operator/operator.py @@ -474,7 +474,7 @@ def _lower_uiet(cls, stree, profiler=None, **kwargs): @classmethod @timed_pass(name='lowering.IET') - def _lower_iet(cls, uiet, profiler=None, **kwargs): + def _lower_iet(cls, uiet, **kwargs): """ Iteration/Expression tree lowering: @@ -496,7 +496,7 @@ def _lower_iet(cls, uiet, profiler=None, **kwargs): # Instrument the IET for C-level profiling # Note: this is postponed until after _specialize_iet because during # specialization further Sections may be introduced - cls._Target.instrument(graph, profiler=profiler, **kwargs) + cls._Target.instrument(graph, **kwargs) # Extract the necessary macros from the symbolic objects generate_macros(graph, **kwargs) diff --git a/devito/passes/iet/instrument.py b/devito/passes/iet/instrument.py index 7e0e49d2cb..4bc75b88b0 100644 --- a/devito/passes/iet/instrument.py +++ b/devito/passes/iet/instrument.py @@ -14,7 +14,7 @@ def instrument(graph, **kwargs): - profiler = kwargs['profiler'] + profiler = kwargs.get('profiler') if profiler is None: return diff --git a/devito/passes/iet/langbase.py b/devito/passes/iet/langbase.py index d4d00f6e8a..e6eea069f9 100644 --- a/devito/passes/iet/langbase.py +++ b/devito/passes/iet/langbase.py @@ -160,27 +160,36 @@ class LangTransformer: The constructs of the target language. To be specialized by a subclass. """ - def __init__(self, key, sregistry, platform, compiler): + def __init__(self, key=None, options=None, sregistry=None, platform=None, + compiler=None, profiler=None, **kwargs): """ Parameters ---------- key : callable, optional Return True if an Iteration can and should be parallelized, False otherwise. + options : dict, optional + The optimization options. sregistry : SymbolRegistry The symbol registry, to access the symbols appearing in an IET. platform : Platform The underlying platform. compiler : Compiler The underlying JIT compiler. + profiler : Profiler + The underlying Profiler, used to instrument the IET. """ if key is not None: self.key = key else: self.key = lambda i: False + + self.uses_mpi = options['mpi'] + self.sregistry = sregistry self.platform = platform self.compiler = compiler + self.profiler = profiler @iet_pass def make_parallel(self, iet): @@ -228,11 +237,11 @@ class ShmTransformer(LangTransformer): shared-memory-parallel IETs for CPUs. """ - def __init__(self, key, sregistry, options, platform, compiler): + def __init__(self, key, options=None, **kwargs): """ Parameters ---------- - key : callable, optional + key : callable Return True if an Iteration can and should be parallelized, False otherwise. sregistry : SymbolRegistry @@ -251,12 +260,13 @@ def __init__(self, key, sregistry, options, platform, compiler): iteration exceeds this threshold. Otherwise, use static scheduling. * 'par-nested': nested parallelism if the number of hyperthreads per core is greater than this threshold. + * 'mpi': tells whether MPI is enabled. platform : Platform The underlying platform. compiler : Compiler The underlying JIT compiler. """ - super().__init__(key, sregistry, platform, compiler) + super().__init__(key, options=options, **kwargs) self.collapse_ncores = options['par-collapse-ncores'] self.collapse_work = options['par-collapse-work'] @@ -391,7 +401,7 @@ def deviceid(self): return self.sregistry.deviceid @iet_pass - def initialize(self, iet, options=None): + def initialize(self, iet): """ An `iet_pass` which transforms an IET such that the target language runtime is initialized. @@ -416,7 +426,7 @@ def _extract_objcomm(iet): # Fallback -- might end up here because the Operator has no # halo exchanges, but we now need it nonetheless to perform # the rank-GPU assignment - if options['mpi']: + if self.uses_mpi: for i in iet.parameters: try: return i.grid.distributor._obj_comm diff --git a/devito/passes/iet/parpragma.py b/devito/passes/iet/parpragma.py index eb6916622e..3cb072104c 100644 --- a/devito/passes/iet/parpragma.py +++ b/devito/passes/iet/parpragma.py @@ -225,9 +225,9 @@ class PragmaShmTransformer(ShmTransformer, PragmaSimdTransformer): IETs for CPUs. """ - def __init__(self, sregistry, options, platform, compiler): + def __init__(self, **kwargs): key = lambda i: i.is_ParallelRelaxed and not i.is_Vectorized - super().__init__(key, sregistry, options, platform, compiler) + super().__init__(key, **kwargs) def _make_reductions(self, partree): if not any(i.is_ParallelAtomic for i in partree.collapsed): @@ -491,8 +491,8 @@ class PragmaDeviceAwareTransformer(DeviceAwareMixin, PragmaShmTransformer): shared-memory-parallel, and device-parallel IETs. """ - def __init__(self, sregistry, options, platform, compiler): - super().__init__(sregistry, options, platform, compiler) + def __init__(self, options=None, **kwargs): + super().__init__(options=options, **kwargs) self.gpu_fit = options['gpu-fit'] # Need to reset the tile in case was already used and iter over by blocking From 71098c22f4206601f8915485ffdb2bdd93b20ca4 Mon Sep 17 00:00:00 2001 From: Fabio Luporini Date: Fri, 13 Feb 2026 10:21:15 +0000 Subject: [PATCH 2/4] compiler: Add Profiler.high_verbosity --- devito/operator/profiling.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/devito/operator/profiling.py b/devito/operator/profiling.py index 6a82928277..49e4417742 100644 --- a/devito/operator/profiling.py +++ b/devito/operator/profiling.py @@ -180,6 +180,10 @@ def record_ops_variation(self, initial, final): def all_sections(self): return list(self._sections) + flatten(self._subsections.values()) + @property + def high_verbosity(self): + return self._verbosity >= 2 + def summary(self, args, dtype, reduce_over=None): """ Return a PerformanceSummary of the profiled sections. From 8bc4c9a2cbd226b49f688c549855c15a9cc8af58 Mon Sep 17 00:00:00 2001 From: Fabio Luporini Date: Tue, 3 Mar 2026 09:14:33 +0000 Subject: [PATCH 3/4] tests: Relax tolerance for some CI configs --- tests/test_linearize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_linearize.py b/tests/test_linearize.py index 9f40e4c7e0..f63fa50dfa 100644 --- a/tests/test_linearize.py +++ b/tests/test_linearize.py @@ -688,4 +688,4 @@ def test_cire_n_strides(): # NOTE: not exact equality because `op2` slightly changes the order of # arithmetic operations, which in turn causes some rounding differences - assert np.allclose(u.data, u1.data, rtol=1e-5) + assert np.allclose(u.data, u1.data, rtol=1e-4) From 3a3df47765d8b4c618dea323a83684ebe64fcf31 Mon Sep 17 00:00:00 2001 From: Fabio Luporini Date: Tue, 3 Mar 2026 09:50:56 +0000 Subject: [PATCH 4/4] examples: Fix typos --- examples/userapi/05_conditional_dimension.ipynb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/userapi/05_conditional_dimension.ipynb b/examples/userapi/05_conditional_dimension.ipynb index 91cb9cdda8..f0a7a1b531 100644 --- a/examples/userapi/05_conditional_dimension.ipynb +++ b/examples/userapi/05_conditional_dimension.ipynb @@ -831,13 +831,13 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Example C: Combining ConditionaDimension\n", + "# Example C: Combining ConditionalDimension\n", "\n", - "In some cases, a `ConditionaDimension` might be used in combination with an implicit_dim to handle specific cases. This combination can be made mutually exclusive (And) or inclusive (Or).\n", + "In some cases, a `ConditionalDimension` might be used in combination with an implicit_dim to handle specific cases. This combination can be made mutually exclusive (And) or inclusive (Or).\n", "\n", "As an example, let's consider the following case:\n", "\n", - "- Set all even x indices to 1 using the standard subsampling `factor` ConditionaDimension\n", + "- Set all even x indices to 1 using the standard subsampling `factor` ConditionalDimension\n", "- Set the edges to 2" ] },