Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 6 additions & 19 deletions devito/core/cpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,18 +140,15 @@ class Cpu64NoopOperator(Cpu64OperatorMixin, CoreOperator):
@timed_pass(name='specializing.IET')
def _specialize_iet(cls, graph, **kwargs):
options = kwargs['options']
platform = kwargs['platform']
compiler = kwargs['compiler']
sregistry = kwargs['sregistry']

# Distributed-memory parallelism
mpiize(graph, **kwargs)

# Shared-memory parallelism
if options['openmp']:
parizer = cls._Target.Parizer(sregistry, options, platform, compiler)
parizer = cls._Target.Parizer(**kwargs)
parizer.make_parallel(graph)
parizer.initialize(graph, options=options)
parizer.initialize(graph)

# Symbol definitions
cls._Target.DataManager(**kwargs).process(graph)
Expand Down Expand Up @@ -205,11 +202,6 @@ def _specialize_clusters(cls, clusters, **kwargs):
@classmethod
@timed_pass(name='specializing.IET')
def _specialize_iet(cls, graph, **kwargs):
options = kwargs['options']
platform = kwargs['platform']
compiler = kwargs['compiler']
sregistry = kwargs['sregistry']

# Flush denormal numbers
avoid_denormals(graph, **kwargs)

Expand All @@ -220,10 +212,10 @@ def _specialize_iet(cls, graph, **kwargs):
relax_incr_dimensions(graph, **kwargs)

# Parallelism
parizer = cls._Target.Parizer(sregistry, options, platform, compiler)
parizer = cls._Target.Parizer(**kwargs)
parizer.make_simd(graph)
parizer.make_parallel(graph)
parizer.initialize(graph, options=options)
parizer.initialize(graph)

# Misc optimizations
hoist_prodders(graph)
Expand Down Expand Up @@ -300,12 +292,7 @@ def callback(f, *args):

@classmethod
def _make_iet_passes_mapper(cls, **kwargs):
options = kwargs['options']
platform = kwargs['platform']
compiler = kwargs['compiler']
sregistry = kwargs['sregistry']

parizer = cls._Target.Parizer(sregistry, options, platform, compiler)
parizer = cls._Target.Parizer(**kwargs)

return {
'denormals': partial(avoid_denormals, **kwargs),
Expand All @@ -316,7 +303,7 @@ def _make_iet_passes_mapper(cls, **kwargs):
'linearize': partial(linearize, **kwargs),
'simd': partial(parizer.make_simd),
'prodders': hoist_prodders,
'init': partial(parizer.initialize, options=options)
'init': partial(parizer.initialize)
}

_known_passes = (
Expand Down
29 changes: 7 additions & 22 deletions devito/core/gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,18 +180,13 @@ class DeviceNoopOperator(DeviceOperatorMixin, CoreOperator):
@classmethod
@timed_pass(name='specializing.IET')
def _specialize_iet(cls, graph, **kwargs):
options = kwargs['options']
platform = kwargs['platform']
compiler = kwargs['compiler']
sregistry = kwargs['sregistry']

# Distributed-memory parallelism
mpiize(graph, **kwargs)

# GPU parallelism
parizer = cls._Target.Parizer(sregistry, options, platform, compiler)
parizer = cls._Target.Parizer(**kwargs)
parizer.make_parallel(graph)
parizer.initialize(graph, options=options)
parizer.initialize(graph)

# Symbol definitions
cls._Target.DataManager(**kwargs).process(graph)
Expand Down Expand Up @@ -248,21 +243,16 @@ def _specialize_clusters(cls, clusters, **kwargs):
@classmethod
@timed_pass(name='specializing.IET')
def _specialize_iet(cls, graph, **kwargs):
options = kwargs['options']
platform = kwargs['platform']
compiler = kwargs['compiler']
sregistry = kwargs['sregistry']

# Distributed-memory parallelism
mpiize(graph, **kwargs)

# Lower BlockDimensions so that blocks of arbitrary shape may be used
relax_incr_dimensions(graph, **kwargs)

# GPU parallelism
parizer = cls._Target.Parizer(sregistry, options, platform, compiler)
parizer = cls._Target.Parizer(**kwargs)
parizer.make_parallel(graph)
parizer.initialize(graph, options=options)
parizer.initialize(graph)

# Misc optimizations
hoist_prodders(graph)
Expand Down Expand Up @@ -325,22 +315,17 @@ def _make_clusters_passes_mapper(cls, **kwargs):

@classmethod
def _make_iet_passes_mapper(cls, **kwargs):
options = kwargs['options']
platform = kwargs['platform']
compiler = kwargs['compiler']
sregistry = kwargs['sregistry']

parizer = cls._Target.Parizer(sregistry, options, platform, compiler)
parizer = cls._Target.Parizer(**kwargs)
orchestrator = cls._Target.Orchestrator(**kwargs)

return {
'parallel': parizer.make_parallel,
'orchestrate': partial(orchestrator.process),
'pthreadify': partial(pthreadify, sregistry=sregistry),
'pthreadify': partial(pthreadify, **kwargs),
'mpi': partial(mpiize, **kwargs),
'linearize': partial(linearize, **kwargs),
'prodders': partial(hoist_prodders),
'init': partial(parizer.initialize, options=options)
'init': partial(parizer.initialize)
}

_known_passes = (
Expand Down
4 changes: 2 additions & 2 deletions devito/operator/operator.py
Original file line number Diff line number Diff line change
Expand Up @@ -474,7 +474,7 @@ def _lower_uiet(cls, stree, profiler=None, **kwargs):

@classmethod
@timed_pass(name='lowering.IET')
def _lower_iet(cls, uiet, profiler=None, **kwargs):
def _lower_iet(cls, uiet, **kwargs):
"""
Iteration/Expression tree lowering:

Expand All @@ -496,7 +496,7 @@ def _lower_iet(cls, uiet, profiler=None, **kwargs):
# Instrument the IET for C-level profiling
# Note: this is postponed until after _specialize_iet because during
# specialization further Sections may be introduced
cls._Target.instrument(graph, profiler=profiler, **kwargs)
cls._Target.instrument(graph, **kwargs)

# Extract the necessary macros from the symbolic objects
generate_macros(graph, **kwargs)
Expand Down
4 changes: 4 additions & 0 deletions devito/operator/profiling.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,10 @@ def record_ops_variation(self, initial, final):
def all_sections(self):
return list(self._sections) + flatten(self._subsections.values())

@property
def high_verbosity(self):
return self._verbosity >= 2

def summary(self, args, dtype, reduce_over=None):
"""
Return a PerformanceSummary of the profiled sections.
Expand Down
2 changes: 1 addition & 1 deletion devito/passes/iet/instrument.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@


def instrument(graph, **kwargs):
profiler = kwargs['profiler']
profiler = kwargs.get('profiler')
if profiler is None:
return

Expand Down
22 changes: 16 additions & 6 deletions devito/passes/iet/langbase.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,27 +160,36 @@ class LangTransformer:
The constructs of the target language. To be specialized by a subclass.
"""

def __init__(self, key, sregistry, platform, compiler):
def __init__(self, key=None, options=None, sregistry=None, platform=None,
compiler=None, profiler=None, **kwargs):
"""
Parameters
----------
key : callable, optional
Return True if an Iteration can and should be parallelized,
False otherwise.
options : dict, optional
The optimization options.
sregistry : SymbolRegistry
The symbol registry, to access the symbols appearing in an IET.
platform : Platform
The underlying platform.
compiler : Compiler
The underlying JIT compiler.
profiler : Profiler
The underlying Profiler, used to instrument the IET.
"""
if key is not None:
self.key = key
else:
self.key = lambda i: False

self.uses_mpi = options['mpi']

self.sregistry = sregistry
self.platform = platform
self.compiler = compiler
self.profiler = profiler

@iet_pass
def make_parallel(self, iet):
Expand Down Expand Up @@ -228,11 +237,11 @@ class ShmTransformer(LangTransformer):
shared-memory-parallel IETs for CPUs.
"""

def __init__(self, key, sregistry, options, platform, compiler):
def __init__(self, key, options=None, **kwargs):
"""
Parameters
----------
key : callable, optional
key : callable
Return True if an Iteration can and should be parallelized,
False otherwise.
sregistry : SymbolRegistry
Expand All @@ -251,12 +260,13 @@ def __init__(self, key, sregistry, options, platform, compiler):
iteration exceeds this threshold. Otherwise, use static scheduling.
* 'par-nested': nested parallelism if the number of hyperthreads
per core is greater than this threshold.
* 'mpi': tells whether MPI is enabled.
platform : Platform
The underlying platform.
compiler : Compiler
The underlying JIT compiler.
"""
super().__init__(key, sregistry, platform, compiler)
super().__init__(key, options=options, **kwargs)

self.collapse_ncores = options['par-collapse-ncores']
self.collapse_work = options['par-collapse-work']
Expand Down Expand Up @@ -391,7 +401,7 @@ def deviceid(self):
return self.sregistry.deviceid

@iet_pass
def initialize(self, iet, options=None):
def initialize(self, iet):
"""
An `iet_pass` which transforms an IET such that the target language
runtime is initialized.
Expand All @@ -416,7 +426,7 @@ def _extract_objcomm(iet):
# Fallback -- might end up here because the Operator has no
# halo exchanges, but we now need it nonetheless to perform
# the rank-GPU assignment
if options['mpi']:
if self.uses_mpi:
for i in iet.parameters:
try:
return i.grid.distributor._obj_comm
Expand Down
8 changes: 4 additions & 4 deletions devito/passes/iet/parpragma.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,9 +225,9 @@ class PragmaShmTransformer(ShmTransformer, PragmaSimdTransformer):
IETs for CPUs.
"""

def __init__(self, sregistry, options, platform, compiler):
def __init__(self, **kwargs):
key = lambda i: i.is_ParallelRelaxed and not i.is_Vectorized
super().__init__(key, sregistry, options, platform, compiler)
super().__init__(key, **kwargs)

def _make_reductions(self, partree):
if not any(i.is_ParallelAtomic for i in partree.collapsed):
Expand Down Expand Up @@ -491,8 +491,8 @@ class PragmaDeviceAwareTransformer(DeviceAwareMixin, PragmaShmTransformer):
shared-memory-parallel, and device-parallel IETs.
"""

def __init__(self, sregistry, options, platform, compiler):
super().__init__(sregistry, options, platform, compiler)
def __init__(self, options=None, **kwargs):
super().__init__(options=options, **kwargs)

self.gpu_fit = options['gpu-fit']
# Need to reset the tile in case was already used and iter over by blocking
Expand Down
6 changes: 3 additions & 3 deletions examples/userapi/05_conditional_dimension.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -831,13 +831,13 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"# Example C: Combining ConditionaDimension\n",
"# Example C: Combining ConditionalDimension\n",
"\n",
"In some cases, a `ConditionaDimension` might be used in combination with an implicit_dim to handle specific cases. This combination can be made mutually exclusive (And) or inclusive (Or).\n",
"In some cases, a `ConditionalDimension` might be used in combination with an implicit_dim to handle specific cases. This combination can be made mutually exclusive (And) or inclusive (Or).\n",
"\n",
"As an example, let's consider the following case:\n",
"\n",
"- Set all even x indices to 1 using the standard subsampling `factor` ConditionaDimension\n",
"- Set all even x indices to 1 using the standard subsampling `factor` ConditionalDimension\n",
"- Set the edges to 2"
]
},
Expand Down
2 changes: 1 addition & 1 deletion tests/test_linearize.py
Original file line number Diff line number Diff line change
Expand Up @@ -688,4 +688,4 @@ def test_cire_n_strides():

# NOTE: not exact equality because `op2` slightly changes the order of
# arithmetic operations, which in turn causes some rounding differences
assert np.allclose(u.data, u1.data, rtol=1e-5)
assert np.allclose(u.data, u1.data, rtol=1e-4)
Loading