diff --git a/dace/codegen/targets/cpu.py b/dace/codegen/targets/cpu.py index 160855458d..9ba202757e 100644 --- a/dace/codegen/targets/cpu.py +++ b/dace/codegen/targets/cpu.py @@ -1845,7 +1845,7 @@ def _generate_MapEntry( # Define all input connectors of this map entry for e in dynamic_map_inputs(state_dfg, node): - if e.data.data != e.dst_conn: + if cpp.ptr(e.data.data, sdfg.arrays[e.data.data], sdfg, self._frame) != e.dst_conn: callsite_stream.write( self.memlet_definition(sdfg, e.data, False, e.dst_conn, e.dst.in_connectors[e.dst_conn]), cfg, state_id, node) diff --git a/tests/codegen/allocation_lifetime_test.py b/tests/codegen/allocation_lifetime_test.py index 380367057a..2b53e87644 100644 --- a/tests/codegen/allocation_lifetime_test.py +++ b/tests/codegen/allocation_lifetime_test.py @@ -398,6 +398,31 @@ def perscal(a: dace.float64[20]): assert np.allclose(a[3], 5) +def test_persistent_loop_bound(): + """ + Code originates from Issue #1550. + Tests both ``for`` and OpenMP parallel ``for`` loop bounds with persistent storage. + """ + N = dace.symbol('N') + + @dace.program(auto_optimize=True) + def tester(L: dace.float64[N, N], index: dace.uint64, active_size: dace.uint64): + for i in range(index, active_size - 1): + L[i + 1][i] = 1.0 + + for j in range(i, dace.int64(active_size - 1)): + L[j + 1][i] = 2.0 + + l = np.random.rand(10, 10) + index = 2 + active_size = 7 + l_ref = np.copy(l) + tester.f(l_ref, index, active_size) + tester(l, index, active_size) + + assert np.allclose(l, l_ref) + + def test_double_nested_persistent_write(): sdfg = dace.SDFG('npw_inner') sdfg.add_array('pers', [20], dace.float64) @@ -451,10 +476,16 @@ def test_branched_allocation(mode): sdfg.add_edge(state_br1_1, state_merge, dace.InterstateEdge()) sdfg.add_edge(state_br2_1, state_merge, dace.InterstateEdge()) - tasklet1 = state_br1.add_tasklet(name="br1", inputs=[], outputs=["out"], \ - code="out = 1;", language=dace.Language.CPP) - tasklet2 = state_br2.add_tasklet(name="br2", inputs=[], outputs=["out"], \ - code="out = 1;", language=dace.Language.CPP) + tasklet1 = state_br1.add_tasklet(name="br1", + inputs=[], + outputs=["out"], + code="out = 1;", + language=dace.Language.CPP) + tasklet2 = state_br2.add_tasklet(name="br2", + inputs=[], + outputs=["out"], + code="out = 1;", + language=dace.Language.CPP) arr_A = state_br1.add_write("A") memlet = dace.Memlet(expr="A[1]") @@ -576,6 +607,7 @@ def test_multisize(): test_persistent_scalar() test_persistent_scalar_in_map() test_persistent_array_access() + test_persistent_loop_bound() test_double_nested_persistent_write() test_branched_allocation('global') test_branched_allocation('singlevalue')