devitocodes · mloubout · Jul 8, 2024 · Jun 20, 2024 · Jun 20, 2024 · Jun 20, 2024
diff --git a/devito/core/gpu.py b/devito/core/gpu.py
@@ -120,17 +120,17 @@ def _rcompile_wrapper(cls, **kwargs0):
         options0 = kwargs0.pop('options')
 
         def wrapper(expressions, mode='default', options=None, **kwargs1):
-            options = {**options0, **(options or {})}
             kwargs = {**kwargs0, **kwargs1}
 
             if mode == 'host':
-                par_disabled = options['par-disabled']
+                options = options or {}
                 target = {
                     'platform': 'cpu64',
-                    'language': 'C' if par_disabled else 'openmp',
+                    'language': 'C' if options0['par-disabled'] else 'openmp',
                     'compiler': 'custom'
                 }
             else:
+                options = {**options0, **(options or {})}
                 target = None
 
             return rcompile(expressions, kwargs, options, target=target)

diff --git a/devito/ir/clusters/algorithms.py b/devito/ir/clusters/algorithms.py
@@ -343,24 +343,31 @@ def rule(size, e):
         # Reconstruct the Clusters
         processed = []
         for c in clusters:
+            exprs = c.exprs
+
+            sub_iterators = dict(c.ispace.sub_iterators)
+            sub_iterators[d] = [i for i in sub_iterators[d] if i not in subiters]
+
             # Apply substitutions to expressions
             # Note: In an expression, there could be `u[t+1, ...]` and `v[t+1,
             # ...]`, where `u` and `v` are TimeFunction with circular time
             # buffers (save=None) *but* different modulo extent. The `t+1`
             # indices above are therefore conceptually different, so they will
             # be replaced with the proper ModuloDimension through two different
             # calls to `xreplace_indices`
-            exprs = c.exprs
             groups = as_mapper(mds, lambda d: d.modulo)
             for size, v in groups.items():
-                subs = {md.origin: md for md in v}
-                func = partial(xreplace_indices, mapper=subs, key=partial(rule, size))
+                key = partial(rule, size)
+                if size == 1:
+                    # Optimization -- avoid useless "% 1" ModuloDimensions
+                    subs = {md.origin: 0 for md in v}
+                else:
+                    subs = {md.origin: md for md in v}
+                    sub_iterators[d].extend(v)
+
+                func = partial(xreplace_indices, mapper=subs, key=key)
                 exprs = [e.apply(func) for e in exprs]
 
-            # Augment IterationSpace
-            sub_iterators = dict(c.ispace.sub_iterators)
-            sub_iterators[d] = tuple(i for i in sub_iterators[d] + tuple(mds)
-                                     if i not in subiters)
             ispace = IterationSpace(c.ispace.intervals, sub_iterators,
                                     c.ispace.directions)
 

diff --git a/devito/ir/clusters/cluster.py b/devito/ir/clusters/cluster.py
@@ -187,10 +187,6 @@ def scope(self):
     def functions(self):
         return self.scope.functions
 
-    @cached_property
-    def has_increments(self):
-        return any(e.is_Increment for e in self.exprs)
-
     @cached_property
     def grid(self):
         grids = set(f.grid for f in self.functions if f.is_AbstractFunction)

diff --git a/devito/ir/iet/nodes.py b/devito/ir/iet/nodes.py
@@ -1217,21 +1217,25 @@ def __repr__(self):
 class Pragma(Node):
 
     """
-    One or more pragmas floating in the IET constructed through a callback.
+    One or more pragmas floating in the IET.
     """
 
-    def __init__(self, callback, arguments=None):
+    def __init__(self, pragma, arguments=None):
         super().__init__()
 
-        self.callback = callback
+        if not isinstance(pragma, str):
+            raise TypeError("Pragma name must be a string, not %s" % type(pragma))
+
+        self.pragma = pragma
         self.arguments = as_tuple(arguments)
 
     def __repr__(self):
-        return '<Pragmas>'
+        return '<Pragma>'
 
     @cached_property
-    def pragmas(self):
-        return as_tuple(self.callback(*self.arguments))
+    def _generate(self):
+        # Subclasses may override this property to customize the pragma generation
+        return self.pragma % self.arguments
 
 
 class Transfer:

diff --git a/devito/ir/iet/utils.py b/devito/ir/iet/utils.py
@@ -106,7 +106,8 @@ def derive_parameters(iet, drop_locals=False, ordering='default'):
     basics = FindSymbols('basics').visit(iet)
     candidates.extend(i.function for i in basics)
 
-    # Filter off duplicates (e.g., `x_size` is extracted by both calls to FindSymbols)
+    # Filter off duplicates (e.g., `x_size` is extracted by both calls to
+    # FindSymbols)
     candidates = filter_ordered(candidates)
 
     # Filter off symbols which are defined somewhere within `iet`

diff --git a/devito/ir/iet/visitors.py b/devito/ir/iet/visitors.py
@@ -480,7 +480,7 @@ def visit_Expression(self, o):
             code = c.Assign(lhs, rhs)
 
         if o.pragmas:
-            code = c.Module(list(o.pragmas) + [code])
+            code = c.Module(self._visit(o.pragmas) + (code,))
 
         return code
 
@@ -489,7 +489,7 @@ def visit_AugmentedExpression(self, o):
         c_rhs = ccode(o.expr.rhs, dtype=o.dtype, compiler=self._compiler)
         code = c.Statement("%s %s= %s" % (c_lhs, o.op, c_rhs))
         if o.pragmas:
-            code = c.Module(list(o.pragmas) + [code])
+            code = c.Module(self._visit(o.pragmas) + (code,))
         return code
 
     def visit_Call(self, o, nested_call=False):
@@ -555,15 +555,13 @@ def visit_Iteration(self, o):
 
         # Attach pragmas, if any
         if o.pragmas:
-            handle = c.Module(o.pragmas + (handle,))
+            pragmas = tuple(self._visit(i) for i in o.pragmas)
+            handle = c.Module(pragmas + (handle,))
 
         return handle
 
     def visit_Pragma(self, o):
-        if len(o.pragmas) == 1:
-            return o.pragmas[0]
-        else:
-            return c.Collection(o.pragmas)
+        return c.Pragma(o._generate)
 
     def visit_While(self, o):
         condition = ccode(o.condition)
@@ -1230,9 +1228,10 @@ def visit_Iteration(self, o):
         nodes = self._visit(o.nodes)
         dimension = uxreplace(o.dim, self.mapper)
         limits = [uxreplace(i, self.mapper) for i in o.limits]
+        pragmas = self._visit(o.pragmas)
         uindices = [uxreplace(i, self.mapper) for i in o.uindices]
         return o._rebuild(nodes=nodes, dimension=dimension, limits=limits,
-                          uindices=uindices)
+                          pragmas=pragmas, uindices=uindices)
 
     def visit_Definition(self, o):
         try:

diff --git a/devito/ir/support/basic.py b/devito/ir/support/basic.py
@@ -2,6 +2,7 @@
 from functools import cached_property
 
 from sympy import S
+import sympy
 
 from devito.ir.support.space import Backward, null_ispace
 from devito.ir.support.utils import AccessMode, extrema
@@ -345,21 +346,48 @@ def distance(self, other):
                 if not (sit == oit and sai.root is oai.root):
                     # E.g., `self=R<f,[x + 2]>` and `other=W<f,[i + 1]>`
                     # E.g., `self=R<f,[x]>`, `other=W<f,[x + 1]>`,
-                    #       `self.itintervals=(x<0>,)` and `other.itintervals=(x<1>,)`
-                    ret.append(S.Infinity)
-                    break
+                    #       `self.itintervals=(x<0>,)`, `other.itintervals=(x<1>,)`
+                    return vinf(ret)
             except AttributeError:
                 # E.g., `self=R<f,[cy]>` and `self.itintervals=(y,)` => `sai=None`
                 pass
 
+            # In some cases, the distance degenerates because `self` and
+            # `other` never intersect, which essentially means there's no
+            # dependence between them. In this case, we set the distance to a
+            # dummy value (the imaginary unit). Hence, we call these "imaginary
+            # dependences". This occurs in just a small set of special cases,
+            # which we attempt to handle here
+            if any(d and d._defines & sit.dim._defines for d in (sai, oai)):
+                # Case 1: `sit` is an IterationInterval with statically known
+                # trip count. E.g. it ranges from 0 to 3; `other` performs a
+                # constant access at 4
+                for v in (self[n], other[n]):
+                    try:
+                        if bool(v < sit.symbolic_min or v > sit.symbolic_max):
+                            return Vector(S.ImaginaryUnit)
+                    except TypeError:
+                        pass
+
+                # Case 2: `sit` is an IterationInterval over a local SubDimension
+                # and `other` performs a constant access
+                for d0, d1 in ((sai, oai), (oai, sai)):
+                    if d0 is None and d1.is_Sub and d1.local:
+                        return Vector(S.ImaginaryUnit)
+
+                # Case 3: `self` and `other` have some special form such that
+                # it's provable that they never intersect
+                if sai and sit == oit:
+                    if disjoint_test(self[n], other[n], sai, sit):
+                        return Vector(S.ImaginaryUnit)
+
             if self.function._mem_shared:
                 # Special case: the distance between two regular, thread-shared
-                # objects fallbacks to zero, as any other value would be nonsensical.
+                # objects fallbacks to zero, as any other value would be nonsensical
                 ret.append(S.Zero)
 
             elif sai and oai and sai._defines & sit.dim._defines:
-                # E.g., `self=R<f,[t + 1, x]>`, `self.itintervals=(time, x)`
-                # and `ai=t`
+                # E.g., `self=R<f,[t + 1, x]>`, `self.itintervals=(time, x)`, `ai=t`
                 if sit.direction is Backward:
                     ret.append(other[n] - self[n])
                 else:
@@ -373,45 +401,13 @@ def distance(self, other):
                     break
 
             elif sai in self.ispace and oai in other.ispace:
-                # E.g., `self=R<f,[x, y]>`, `sai=time`, self.itintervals=(time, x, y)
-                # with `n=0`
+                # E.g., `self=R<f,[x, y]>`, `sai=time`,
+                #       `self.itintervals=(time, x, y)`, `n=0`
                 continue
 
-            elif any(d and d._defines & sit.dim._defines for d in (sai, oai)):
-                # In some cases, the distance degenerates because `self` and
-                # `other` never intersect, which essentially means there's no
-                # dependence between them. In this case, we set the distance to
-                # a dummy value (the imaginary unit). Hence, we call these
-                # "imaginary dependences". This occurs in just a small set of
-                # special cases, which we handle here
-
-                # Case 1: `sit` is an IterationInterval with statically known
-                # trip count. E.g. it ranges from 0 to 3; `other` performs a
-                # constant access at 4
-                for v in (self[n], other[n]):
-                    try:
-                        if bool(v < sit.symbolic_min or v > sit.symbolic_max):
-                            return Vector(S.ImaginaryUnit)
-                    except TypeError:
-                        pass
-
-                # Case 2: `sit` is an IterationInterval over a local SubDimension
-                # and `other` performs a constant access
-                for d0, d1 in ((sai, oai), (oai, sai)):
-                    if d0 is None and d1.is_Sub and d1.local:
-                        return Vector(S.ImaginaryUnit)
-
-                # Fallback
-                ret.append(S.Infinity)
-                break
-
-            elif self.findices[n] in sit.dim._defines:
-                # E.g., `self=R<u,[t+1, ii_src_0+1, ii_src_1+2]>` and `fi=p_src` (`n=1`)
-                ret.append(S.Infinity)
-                break
-
-        if S.Infinity in ret:
-            return Vector(*ret)
+            else:
+                # E.g., `self=R<u,[t+1, ii_src_0+1, ii_src_1+2]>`, `fi=p_src`, `n=1`
+                return vinf(ret)
 
         n = len(ret)
 
@@ -1330,6 +1326,10 @@ def is_regular(self):
 
 # *** Utils
 
+def vinf(entries):
+    return Vector(*(entries + [S.Infinity]))
+
+
 def retrieve_accesses(exprs, **kwargs):
     """
     Like retrieve_terminals, but ensure that if a ComponentAccess is found,
@@ -1345,3 +1345,54 @@ def retrieve_accesses(exprs, **kwargs):
     exprs1 = uxreplace(exprs, subs)
 
     return compaccs | retrieve_terminals(exprs1, **kwargs) - set(subs.values())
+
+
+def disjoint_test(e0, e1, d, it):
+    """
+    A rudimentary test to check if two accesses `e0` and `e1` along `d` within
+    the IterationInterval `it` are independent.
+
+    This is inspired by the Banerjee test, but it's way more simplistic.
+
+    The test is conservative, meaning that if it returns False, then the accesses
+    might be independent, but it's not guaranteed. If it returns True, then the
+    accesses are definitely independent.
+
+    Our implementation focuses on tiny yet relevant cases, such as when the
+    iteration space's bounds are numeric constants, while the index accesses
+    functions reduce to numbers once the iteration variable is substituted with
+    one of the possible values in the iteration space.
+
+    Examples
+    --------
+      * e0 = 12 - zl, e1 = zl + 4, d = zl, it = zl[0,0]
+        where zl is a left SubDimension with thickness, say, 4
+        The test will return True, as the two index access functions never
+        overlap.
+    """
+    if e0 == e1:
+        return False
+
+    if d.is_Custom:
+        subs = {}
+    elif d.is_Sub and d.is_left:
+        subs = {d.root.symbolic_min: 0, **dict([d.thickness.left])}
+    else:
+        return False
+
+    m = it.symbolic_min.subs(subs)
+    M = it.symbolic_max.subs(subs)
+
+    p00 = e0._subs(d, m)
+    p01 = e0._subs(d, M)
+
+    p10 = e1._subs(d, m)
+    p11 = e1._subs(d, M)
+
+    if any(not i.is_Number for i in [p00, p01, p10, p11]):
+        return False
+
+    i0 = sympy.Interval(min(p00, p01), max(p00, p01))
+    i1 = sympy.Interval(min(p10, p11), max(p10, p11))
+
+    return not bool(i0.intersect(i1))
diff --git a/devito/operator/operator.py b/devito/operator/operator.py
@@ -1098,13 +1098,12 @@ def rcompile(expressions, kwargs, options, target=None):
 
     if target is None:
         cls = operator_selector(**kwargs)
+        kwargs['options'] = options
     else:
         kwargs = parse_kwargs(**target)
         cls = operator_selector(**kwargs)
         kwargs = cls._normalize_kwargs(**kwargs)
-
-    # Use the customized opt options
-    kwargs['options'] = options
+        kwargs['options'].update(options)
 
     # Recursive profiling not supported -- would be a complete mess
     kwargs.pop('profiler', None)

diff --git a/devito/passes/__init__.py b/devito/passes/__init__.py
@@ -82,15 +82,7 @@ def is_gpu_create(obj, gpu_create):
     except AttributeError:
         functions = as_tuple(obj)
 
-    for i in functions:
-        try:
-            f = i.alias or i
-        except AttributeError:
-            f = i
-        if f not in gpu_create:
-            return False
-
-    return True
+    return all(f in gpu_create for f in functions)
 
 
 # Import all compiler passes