Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

compiler: Yet another batch of compilation tweaks #2396

Merged
merged 18 commits into from
Jul 8, 2024
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions devito/core/gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,17 +120,17 @@ def _rcompile_wrapper(cls, **kwargs0):
options0 = kwargs0.pop('options')

def wrapper(expressions, mode='default', options=None, **kwargs1):
options = {**options0, **(options or {})}
kwargs = {**kwargs0, **kwargs1}

if mode == 'host':
par_disabled = options['par-disabled']
options = options or {}
EdCaunt marked this conversation as resolved.
Show resolved Hide resolved
target = {
'platform': 'cpu64',
'language': 'C' if par_disabled else 'openmp',
'language': 'C' if options0['par-disabled'] else 'openmp',
'compiler': 'custom'
}
else:
options = {**options0, **(options or {})}
target = None

return rcompile(expressions, kwargs, options, target=target)
Expand Down
21 changes: 14 additions & 7 deletions devito/ir/clusters/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,24 +343,31 @@ def rule(size, e):
# Reconstruct the Clusters
processed = []
for c in clusters:
exprs = c.exprs
EdCaunt marked this conversation as resolved.
Show resolved Hide resolved

sub_iterators = dict(c.ispace.sub_iterators)
sub_iterators[d] = [i for i in sub_iterators[d] if i not in subiters]

# Apply substitutions to expressions
# Note: In an expression, there could be `u[t+1, ...]` and `v[t+1,
# ...]`, where `u` and `v` are TimeFunction with circular time
# buffers (save=None) *but* different modulo extent. The `t+1`
# indices above are therefore conceptually different, so they will
# be replaced with the proper ModuloDimension through two different
# calls to `xreplace_indices`
exprs = c.exprs
groups = as_mapper(mds, lambda d: d.modulo)
for size, v in groups.items():
subs = {md.origin: md for md in v}
func = partial(xreplace_indices, mapper=subs, key=partial(rule, size))
key = partial(rule, size)
if size == 1:
# Optimization -- avoid useless "% 1" ModuloDimensions
subs = {md.origin: 0 for md in v}
else:
subs = {md.origin: md for md in v}
sub_iterators[d].extend(v)

func = partial(xreplace_indices, mapper=subs, key=key)
exprs = [e.apply(func) for e in exprs]

# Augment IterationSpace
sub_iterators = dict(c.ispace.sub_iterators)
sub_iterators[d] = tuple(i for i in sub_iterators[d] + tuple(mds)
if i not in subiters)
ispace = IterationSpace(c.ispace.intervals, sub_iterators,
c.ispace.directions)

Expand Down
4 changes: 0 additions & 4 deletions devito/ir/clusters/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,10 +187,6 @@ def scope(self):
def functions(self):
return self.scope.functions

@cached_property
def has_increments(self):
return any(e.is_Increment for e in self.exprs)

@cached_property
def grid(self):
grids = set(f.grid for f in self.functions if f.is_AbstractFunction)
Expand Down
16 changes: 10 additions & 6 deletions devito/ir/iet/nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -1217,21 +1217,25 @@ def __repr__(self):
class Pragma(Node):

"""
One or more pragmas floating in the IET constructed through a callback.
One or more pragmas floating in the IET.
"""

def __init__(self, callback, arguments=None):
def __init__(self, pragma, arguments=None):
super().__init__()

self.callback = callback
if not isinstance(pragma, str):
raise TypeError("Pragma name must be a string, not %s" % type(pragma))

self.pragma = pragma
self.arguments = as_tuple(arguments)

def __repr__(self):
return '<Pragmas>'
return '<Pragma>'

@cached_property
def pragmas(self):
return as_tuple(self.callback(*self.arguments))
def _generate(self):
# Subclasses may override this property to customize the pragma generation
return self.pragma % self.arguments


class Transfer:
Expand Down
3 changes: 2 additions & 1 deletion devito/ir/iet/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,8 @@ def derive_parameters(iet, drop_locals=False, ordering='default'):
basics = FindSymbols('basics').visit(iet)
candidates.extend(i.function for i in basics)

# Filter off duplicates (e.g., `x_size` is extracted by both calls to FindSymbols)
# Filter off duplicates (e.g., `x_size` is extracted by both calls to
# FindSymbols)
candidates = filter_ordered(candidates)

# Filter off symbols which are defined somewhere within `iet`
Expand Down
15 changes: 7 additions & 8 deletions devito/ir/iet/visitors.py
Original file line number Diff line number Diff line change
Expand Up @@ -480,7 +480,7 @@ def visit_Expression(self, o):
code = c.Assign(lhs, rhs)

if o.pragmas:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These four lines are repeated twice. Worth constructing a utility function?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

which four lines? I only see two, starting at if o.pragmas -- which is too little to deserve a separate function

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if o.pragmas:
            code = c.Module(self._visit(o.pragmas) + (code,))

return code

but tbh, you're probably right

code = c.Module(list(o.pragmas) + [code])
code = c.Module(self._visit(o.pragmas) + (code,))

return code

Expand All @@ -489,7 +489,7 @@ def visit_AugmentedExpression(self, o):
c_rhs = ccode(o.expr.rhs, dtype=o.dtype, compiler=self._compiler)
code = c.Statement("%s %s= %s" % (c_lhs, o.op, c_rhs))
if o.pragmas:
code = c.Module(list(o.pragmas) + [code])
code = c.Module(self._visit(o.pragmas) + (code,))
return code

def visit_Call(self, o, nested_call=False):
Expand Down Expand Up @@ -555,15 +555,13 @@ def visit_Iteration(self, o):

# Attach pragmas, if any
if o.pragmas:
handle = c.Module(o.pragmas + (handle,))
pragmas = tuple(self._visit(i) for i in o.pragmas)
handle = c.Module(pragmas + (handle,))

return handle

def visit_Pragma(self, o):
if len(o.pragmas) == 1:
return o.pragmas[0]
else:
return c.Collection(o.pragmas)
return c.Pragma(o._generate)

def visit_While(self, o):
condition = ccode(o.condition)
Expand Down Expand Up @@ -1230,9 +1228,10 @@ def visit_Iteration(self, o):
nodes = self._visit(o.nodes)
dimension = uxreplace(o.dim, self.mapper)
limits = [uxreplace(i, self.mapper) for i in o.limits]
pragmas = self._visit(o.pragmas)
uindices = [uxreplace(i, self.mapper) for i in o.uindices]
return o._rebuild(nodes=nodes, dimension=dimension, limits=limits,
uindices=uindices)
pragmas=pragmas, uindices=uindices)

def visit_Definition(self, o):
try:
Expand Down
137 changes: 94 additions & 43 deletions devito/ir/support/basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from functools import cached_property

from sympy import S
import sympy
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

jusit import Interval above no?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would, but to make a neat distinction with our own Interval...


from devito.ir.support.space import Backward, null_ispace
from devito.ir.support.utils import AccessMode, extrema
Expand Down Expand Up @@ -345,21 +346,48 @@ def distance(self, other):
if not (sit == oit and sai.root is oai.root):
# E.g., `self=R<f,[x + 2]>` and `other=W<f,[i + 1]>`
# E.g., `self=R<f,[x]>`, `other=W<f,[x + 1]>`,
# `self.itintervals=(x<0>,)` and `other.itintervals=(x<1>,)`
ret.append(S.Infinity)
break
# `self.itintervals=(x<0>,)`, `other.itintervals=(x<1>,)`
return vinf(ret)
except AttributeError:
# E.g., `self=R<f,[cy]>` and `self.itintervals=(y,)` => `sai=None`
pass

# In some cases, the distance degenerates because `self` and
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note: aside from the "Case 3", the rest is just lifted from approximately 40 lines below

# `other` never intersect, which essentially means there's no
# dependence between them. In this case, we set the distance to a
# dummy value (the imaginary unit). Hence, we call these "imaginary
# dependences". This occurs in just a small set of special cases,
# which we attempt to handle here
if any(d and d._defines & sit.dim._defines for d in (sai, oai)):
# Case 1: `sit` is an IterationInterval with statically known
# trip count. E.g. it ranges from 0 to 3; `other` performs a
# constant access at 4
for v in (self[n], other[n]):
try:
if bool(v < sit.symbolic_min or v > sit.symbolic_max):
return Vector(S.ImaginaryUnit)
except TypeError:
pass

# Case 2: `sit` is an IterationInterval over a local SubDimension
# and `other` performs a constant access
for d0, d1 in ((sai, oai), (oai, sai)):
if d0 is None and d1.is_Sub and d1.local:
return Vector(S.ImaginaryUnit)

# Case 3: `self` and `other` have some special form such that
# it's provable that they never intersect
if sai and sit == oit:
if disjoint_test(self[n], other[n], sai, sit):
return Vector(S.ImaginaryUnit)

if self.function._mem_shared:
# Special case: the distance between two regular, thread-shared
# objects fallbacks to zero, as any other value would be nonsensical.
# objects fallbacks to zero, as any other value would be nonsensical
ret.append(S.Zero)

elif sai and oai and sai._defines & sit.dim._defines:
# E.g., `self=R<f,[t + 1, x]>`, `self.itintervals=(time, x)`
# and `ai=t`
# E.g., `self=R<f,[t + 1, x]>`, `self.itintervals=(time, x)`, `ai=t`
if sit.direction is Backward:
ret.append(other[n] - self[n])
else:
Expand All @@ -373,45 +401,13 @@ def distance(self, other):
break

elif sai in self.ispace and oai in other.ispace:
# E.g., `self=R<f,[x, y]>`, `sai=time`, self.itintervals=(time, x, y)
# with `n=0`
# E.g., `self=R<f,[x, y]>`, `sai=time`,
# `self.itintervals=(time, x, y)`, `n=0`
continue

elif any(d and d._defines & sit.dim._defines for d in (sai, oai)):
# In some cases, the distance degenerates because `self` and
# `other` never intersect, which essentially means there's no
# dependence between them. In this case, we set the distance to
# a dummy value (the imaginary unit). Hence, we call these
# "imaginary dependences". This occurs in just a small set of
# special cases, which we handle here

# Case 1: `sit` is an IterationInterval with statically known
# trip count. E.g. it ranges from 0 to 3; `other` performs a
# constant access at 4
for v in (self[n], other[n]):
try:
if bool(v < sit.symbolic_min or v > sit.symbolic_max):
return Vector(S.ImaginaryUnit)
except TypeError:
pass

# Case 2: `sit` is an IterationInterval over a local SubDimension
# and `other` performs a constant access
for d0, d1 in ((sai, oai), (oai, sai)):
if d0 is None and d1.is_Sub and d1.local:
return Vector(S.ImaginaryUnit)

# Fallback
ret.append(S.Infinity)
break

elif self.findices[n] in sit.dim._defines:
# E.g., `self=R<u,[t+1, ii_src_0+1, ii_src_1+2]>` and `fi=p_src` (`n=1`)
ret.append(S.Infinity)
break

if S.Infinity in ret:
return Vector(*ret)
else:
# E.g., `self=R<u,[t+1, ii_src_0+1, ii_src_1+2]>`, `fi=p_src`, `n=1`
return vinf(ret)

n = len(ret)

Expand Down Expand Up @@ -1330,6 +1326,10 @@ def is_regular(self):

# *** Utils

def vinf(entries):
georgebisbas marked this conversation as resolved.
Show resolved Hide resolved
return Vector(*(entries + [S.Infinity]))


def retrieve_accesses(exprs, **kwargs):
"""
Like retrieve_terminals, but ensure that if a ComponentAccess is found,
Expand All @@ -1345,3 +1345,54 @@ def retrieve_accesses(exprs, **kwargs):
exprs1 = uxreplace(exprs, subs)

return compaccs | retrieve_terminals(exprs1, **kwargs) - set(subs.values())


def disjoint_test(e0, e1, d, it):
"""
A rudimentary test to check if two accesses `e0` and `e1` along `d` within
the IterationInterval `it` are independent.

This is inspired by the Banerjee test, but it's way more simplistic.

The test is conservative, meaning that if it returns False, then the accesses
might be independent, but it's not guaranteed. If it returns True, then the
accesses are definitely independent.

Our implementation focuses on tiny yet relevant cases, such as when the
iteration space's bounds are numeric constants, while the index accesses
functions reduce to numbers once the iteration variable is substituted with
one of the possible values in the iteration space.

Examples
--------
* e0 = 12 - zl, e1 = zl + 4, d = zl, it = zl[0,0]
where zl is a left SubDimension with thickness, say, 4
The test will return True, as the two index access functions never
overlap.
"""
if e0 == e1:
return False

if d.is_Custom:
subs = {}
elif d.is_Sub and d.is_left:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why can middle/right subdimensions be ignored? Genuinely curious

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we just don't treat them as we don't -- practically speaking need to

as per the docstring:

"A rudimentary test ..."

and

"Our implementation focuses on tiny yet relevant cases"

so basically this is a simplistic implementation, to be refined in the future if we ever will have to

subs = {d.root.symbolic_min: 0, **dict([d.thickness.left])}
else:
return False

m = it.symbolic_min.subs(subs)
M = it.symbolic_max.subs(subs)

p00 = e0._subs(d, m)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Explanatory comment of the 00, 01, etc would be helpful here

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just dummy variable names because you have two "dimensions":

  • e0 and e1
  • m and M

which leads to four objects

p01 = e0._subs(d, M)

p10 = e1._subs(d, m)
p11 = e1._subs(d, M)

if any(not i.is_Number for i in [p00, p01, p10, p11]):
return False

i0 = sympy.Interval(min(p00, p01), max(p00, p01))
i1 = sympy.Interval(min(p10, p11), max(p10, p11))

return not bool(i0.intersect(i1))
5 changes: 2 additions & 3 deletions devito/operator/operator.py
Original file line number Diff line number Diff line change
Expand Up @@ -1098,13 +1098,12 @@ def rcompile(expressions, kwargs, options, target=None):

if target is None:
cls = operator_selector(**kwargs)
kwargs['options'] = options
else:
kwargs = parse_kwargs(**target)
cls = operator_selector(**kwargs)
kwargs = cls._normalize_kwargs(**kwargs)

# Use the customized opt options
kwargs['options'] = options
kwargs['options'].update(options)

# Recursive profiling not supported -- would be a complete mess
kwargs.pop('profiler', None)
Expand Down
10 changes: 1 addition & 9 deletions devito/passes/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,15 +82,7 @@ def is_gpu_create(obj, gpu_create):
except AttributeError:
functions = as_tuple(obj)

for i in functions:
try:
f = i.alias or i
except AttributeError:
f = i
if f not in gpu_create:
return False

return True
return all(f in gpu_create for f in functions)


# Import all compiler passes
Expand Down
Loading
Loading