Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -479,3 +479,9 @@ def check_array(array, exp_halo, exp_shape, rotate=False):

assert tuple(array.halo) == exp_halo
assert tuple(shape) == tuple(exp_shape)


# Main body in Operator IET, depending on ISA
def body0(op):
bidx = 0 if 'sse' not in configuration['platform'].known_isas else 1
return op.body.body[bidx]
7 changes: 5 additions & 2 deletions devito/passes/clusters/aliases.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,8 @@ def _aliases_from_clusters(self, cgroup, exclude, meta):
for mapper in self._generate(cgroup, exclude):
# Clusters -> AliasList
found = collect(mapper.extracted, meta.ispace, self.opt_minstorage)
if not found:
continue
exprs, aliases = self._choose(found, cgroup, mapper)

# AliasList -> Schedule
Expand Down Expand Up @@ -271,14 +273,15 @@ def _do_generate(self, exprs, exclude, cbk_search, cbk_compose=None):
free_symbols = i.free_symbols
if {a.function for a in free_symbols} & exclude:
continue

mapper.add(i, make, terms)

return mapper


class CireInvariants(CireTransformerLegacy, Queue):

_q_guards_in_key = True

def __init__(self, sregistry, options, platform):
super().__init__(sregistry, options, platform)

Expand Down Expand Up @@ -928,7 +931,7 @@ def lower_schedule(schedule, meta, sregistry, opt_ftemps, opt_min_dtype,
assert writeto.size == 0

dtype = sympy_dtype(pivot, base=meta.dtype, smin=opt_min_dtype)
obj = Temp(name=name, dtype=dtype)
obj = Temp(name=name, dtype=dtype, is_const=True)
expression = Eq(obj, uxreplace(pivot, subs))

callback = lambda idx: obj # noqa: B023
Expand Down
11 changes: 9 additions & 2 deletions devito/passes/clusters/cse.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,12 @@ def cse_dtype(exprdtype, cdtype):
Return the dtype of a CSE temporary given the dtype of the expression to be
captured and the cluster's dtype.
"""
if np.issubdtype(cdtype, np.floating) and np.issubdtype(exprdtype, np.integer):
# Integer expression and floating-point cluster: promote to the floating point
# np.promote_types upcast integers (e.g int32 -> Float64) so we
# need to ensure that the promoted type is not larger than the cluster's dtype
return cdtype

if np.issubdtype(cdtype, np.complexfloating):
return np.promote_types(exprdtype, cdtype(0).real.__class__).type
else:
Expand Down Expand Up @@ -97,8 +103,9 @@ def cse(cluster, sregistry=None, options=None, **kwargs):
if cluster.is_fence:
return cluster

make_dtype = lambda e: cse_dtype(e.dtype, dtype)
make = lambda e: CTemp(name=sregistry.make_name(), dtype=make_dtype(e))
def make(e):
edtype = cse_dtype(e.dtype, dtype)
return CTemp(name=sregistry.make_name(), dtype=edtype)

exprs = _cse(cluster, make, min_cost=min_cost, mode=mode)

Expand Down
12 changes: 6 additions & 6 deletions devito/passes/clusters/misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,12 +97,12 @@ def callback(self, clusters, prefix):

properties = c.properties.filter(key)

# Lifted scalar clusters cannot be guarded
# as they would not be in the scope of the guarded clusters
# unless the guard is for an outer dimension
guards = {} if c.is_scalar and not (prefix[:-1] and c.guards) else c.guards

lifted.append(c.rebuild(ispace=ispace, properties=properties, guards=guards))
# If `c` is made of scalar expressions within guards, then we must keep
# it close to the adjacent Clusters for correctness
if c.is_scalar and c.guards and ispace:
processed.append(c.rebuild(ispace=ispace, properties=properties))
else:
lifted.append(c.rebuild(ispace=ispace, properties=properties))

return lifted + processed

Expand Down
7 changes: 5 additions & 2 deletions devito/symbolics/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,15 +96,18 @@ def visit_preorder_first_hit(self, expr: Expression) -> Iterator[Expression]:


def search(exprs: Expression | Iterable[Expression],
query: type | Callable[[Any], bool],
query: type | tuple[type, ...] | Callable[[Any], bool],
mode: Mode = 'unique',
visit: Literal['dfs', 'bfs', 'bfs_first_hit'] = 'dfs',
deep: bool = False) -> List | set[Expression]:
"""Interface to Search."""

assert mode in ('all', 'unique'), "Unknown mode"

Q = (lambda obj: isinstance(obj, query)) if isinstance(query, type) else query
if isinstance(query, (type, tuple)):
Q = lambda obj: isinstance(obj, query)
else:
Q = query

# Search doesn't actually use a BFS (rather, a preorder DFS), but the terminology
# is retained in this function's parameters for backwards compatibility
Expand Down
2 changes: 1 addition & 1 deletion devito/types/basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1589,7 +1589,7 @@ def _rebuild(self, *args, **kwargs):
comps = [f.func(*args, name=f.name.replace(self.name, newname), **kwargs)
for f in self.flat()]
# Rebuild the matrix with the new components
return self._new(comps)
return self._new(*self.shape, comps)

func = _rebuild

Expand Down
2 changes: 1 addition & 1 deletion examples/mpi/overview.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -460,7 +460,7 @@
" _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);\n",
" _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);\n",
"\n",
" float r0 = 1.0F/h_x;\n",
" const float r0 = 1.0F/h_x;\n",
"\n",
" for (int time = time_m, t0 = (time)%(2), t1 = (time + 1)%(2); time <= time_M; time += 1, t0 = (time)%(2), t1 = (time + 1)%(2))\n",
" {\n",
Expand Down
57 changes: 35 additions & 22 deletions examples/performance/00_overview.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -508,7 +508,7 @@
"}\n",
"STOP(section0,timers)\n",
"\n",
"float r1 = 1.0F/h_y;\n",
"const float r1 = 1.0F/h_y;\n",
"\n",
"for (int time = time_m, t0 = (time)%(2), t1 = (time + 1)%(2); time <= time_M; time += 1, t0 = (time)%(2), t1 = (time + 1)%(2))\n",
"{\n",
Expand Down Expand Up @@ -572,7 +572,13 @@
"+ u[t1][x + 4][y + 4][z + 4] = (f[x + 1][y + 1][z + 1]*f[x + 1][y + 1][z + 1])*((-6.66666667e-1F*r0)*(8.33333333e-2F*r0*u[t0][x + 4][y + 1][z + 4] - 6.66666667e-1F*r0*u[t0][x + 4][y + 2][z + 4] + 6.66666667e-1F*r0*u[t0][x + 4][y + 4][z + 4] - 8.33333333e-2F*r0*u[t0][x + 4][y + 5][z + 4]) + (-8.33333333e-2F*r0)*(8.33333333e-2F*r0*u[t0][x + 4][y + 4][z + 4] - 6.66666667e-1F*r0*u[t0][x + 4][y + 5][z + 4] + 6.66666667e-1F*r0*u[t0][x + 4][y + 7][z + 4] - 8.33333333e-2F*r0*u[t0][x + 4][y + 8][z + 4]) + (8.33333333e-2F*r0)*(8.33333333e-2F*r0*u[t0][x + 4][y][z + 4] - 6.66666667e-1F*r0*u[t0][x + 4][y + 1][z + 4] + 6.66666667e-1F*r0*u[t0][x + 4][y + 3][z + 4] - 8.33333333e-2F*r0*u[t0][x + 4][y + 4][z + 4]) + (6.66666667e-1F*r0)*(8.33333333e-2F*r0*u[t0][x + 4][y + 3][z + 4] - 6.66666667e-1F*r0*u[t0][x + 4][y + 4][z + 4] + 6.66666667e-1F*r0*u[t0][x + 4][y + 6][z + 4] - 8.33333333e-2F*r0*u[t0][x + 4][y + 7][z + 4]))*sinf(f[x + 1][y + 1][z + 1]);\n",
" }\n",
" }\n",
" }\n",
" }\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n"
]
}
Expand Down Expand Up @@ -652,7 +658,7 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": 12,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -712,7 +718,7 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 13,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -753,7 +759,14 @@
" }\n",
" }\n",
" STOP(section0,timers)\n",
"}\n"
"}"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n"
]
}
],
Expand All @@ -772,7 +785,7 @@
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": 14,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -863,7 +876,7 @@
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": 15,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -919,7 +932,7 @@
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": 16,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -976,7 +989,7 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -994,7 +1007,7 @@
},
{
"cell_type": "code",
"execution_count": 19,
"execution_count": 18,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -1044,7 +1057,7 @@
},
{
"cell_type": "code",
"execution_count": 20,
"execution_count": 19,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -1112,7 +1125,7 @@
},
{
"cell_type": "code",
"execution_count": 21,
"execution_count": 20,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -1192,7 +1205,7 @@
" }\n",
" STOP(section0,timers)\n",
"\n",
" float r1 = 1.0F/h_y;\n",
" const float r1 = 1.0F/h_y;\n",
"\n",
" for (int time = time_m, t0 = (time)%(2), t1 = (time + 1)%(2); time <= time_M; time += 1, t0 = (time)%(2), t1 = (time + 1)%(2))\n",
" {\n",
Expand Down Expand Up @@ -1279,7 +1292,7 @@
},
{
"cell_type": "code",
"execution_count": 22,
"execution_count": 21,
"metadata": {},
"outputs": [
{
Expand All @@ -1304,7 +1317,7 @@
"}\n",
"STOP(section0,timers)\n",
"\n",
"float r1 = 1.0F/h_y;\n",
"const float r1 = 1.0F/h_y;\n",
"\n",
"for (int time = time_m, t0 = (time)%(2), t1 = (time + 1)%(2); time <= time_M; time += 1, t0 = (time)%(2), t1 = (time + 1)%(2))\n",
"{\n",
Expand Down Expand Up @@ -1369,7 +1382,7 @@
},
{
"cell_type": "code",
"execution_count": 23,
"execution_count": 22,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -1404,7 +1417,7 @@
},
{
"cell_type": "code",
"execution_count": 24,
"execution_count": 23,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -1483,7 +1496,7 @@
" }\n",
" STOP(section0,timers)\n",
"\n",
" float r1 = 1.0F/h_y;\n",
" const float r1 = 1.0F/h_y;\n",
"\n",
" for (int time = time_m, t0 = (time)%(2), t1 = (time + 1)%(2); time <= time_M; time += 1, t0 = (time)%(2), t1 = (time + 1)%(2))\n",
" {\n",
Expand Down Expand Up @@ -1546,7 +1559,7 @@
},
{
"cell_type": "code",
"execution_count": 25,
"execution_count": 24,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -1624,8 +1637,8 @@
" }\n",
" STOP(section0,timers)\n",
"\n",
" float r1 = 1.0F/h_x;\n",
" float r2 = 1.0F/h_y;\n",
" const float r1 = 1.0F/h_x;\n",
" const float r2 = 1.0F/h_y;\n",
"\n",
" for (int time = time_m, t0 = (time)%(2), t1 = (time + 1)%(2); time <= time_M; time += 1, t0 = (time)%(2), t1 = (time + 1)%(2))\n",
" {\n",
Expand Down Expand Up @@ -1718,7 +1731,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
"version": "3.13.11"
}
},
"nbformat": 4,
Expand Down
17 changes: 11 additions & 6 deletions examples/performance/01_gpu.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -142,8 +142,13 @@
"name": "stderr",
"output_type": "stream",
"text": [
"NUMA domain count autodetection failed, assuming 1\n",
"Operator `Kernel` ran in 0.01 s\n",
"NUMA domain count autodetection failed, assuming 1\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Operator `Kernel` ran in 0.01 s\n"
]
}
Expand Down Expand Up @@ -292,9 +297,9 @@
" const int x_stride0 = x_fsz0*y_fsz0;\n",
" const int y_stride0 = y_fsz0;\n",
"\n",
" float r0 = 1.0F/dt;\n",
" float r1 = 1.0F/(h_x*h_x);\n",
" float r2 = 1.0F/(h_y*h_y);\n",
" const float r0 = 1.0F/dt;\n",
" const float r1 = 1.0F/(h_x*h_x);\n",
" const float r2 = 1.0F/(h_y*h_y);\n",
"\n",
" for (int time = time_m; time <= time_M; time += 1)\n",
" {\n",
Expand Down Expand Up @@ -340,7 +345,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.5"
"version": "3.13.11"
}
},
"nbformat": 4,
Expand Down
Loading
Loading