diff --git a/loopy/schedule/tools.py b/loopy/schedule/tools.py index f2164b6dd..8c196e205 100644 --- a/loopy/schedule/tools.py +++ b/loopy/schedule/tools.py @@ -30,6 +30,7 @@ from loopy.kernel.data import AddressSpace, TemporaryVariable, ArrayArg from loopy.kernel import LoopKernel +from loopy.isl_helpers import static_min_of_pw_aff # {{{ block boundary finder @@ -381,6 +382,23 @@ def _check_for_access_races(map_a, insn_a, map_b, insn_b, knl, callables_table): if dt == isl.dim_type.in_: tag, = filter_iname_tags_by_type(knl.inames[name].tags, HardwareConcurrentTag) + if any(not aff.plain_is_zero() + for _, aff in (knl + .get_iname_bounds(name) + .lower_bound_pw_aff + .get_pieces()) + ): + # Non-zero hardware inames are offseted in loopy's codegen. See + # https://github.com/inducer/loopy/issues/600#issuecomment-1104066735 + # => Accordingly offset the access maps + lbound_val = (static_min_of_pw_aff(knl + .get_iname_bounds(name) + .lower_bound_pw_aff, + constants_only=True) + .get_constant_val().to_python()) + map_ = map_.apply_domain( + isl.Map(f"{{[{name}] -> [{name}-{lbound_val}]}}")) + map_ = map_.set_dim_name(dt, pos, str(tag)) for i_l in lsize: