From 144c0e8d7c2d60fca3e3920b8652391e57b9f935 Mon Sep 17 00:00:00 2001
From: Tal Ben-Nun <talbn@llnl.gov>
Date: Thu, 3 Aug 2023 17:16:41 -0700
Subject: [PATCH 001/163] Return types are arrays too, bypass check in codegen

---
 dace/codegen/compiled_sdfg.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dace/codegen/compiled_sdfg.py b/dace/codegen/compiled_sdfg.py
index d0d29cfa1e..dcd529865f 100644
--- a/dace/codegen/compiled_sdfg.py
+++ b/dace/codegen/compiled_sdfg.py
@@ -449,8 +449,8 @@ def _construct_args(self, kwargs) -> Tuple[Tuple[Any], Tuple[Any]]:
                     raise TypeError('Passing an object (type %s) to an array in argument "%s"' %
                                     (type(arg).__name__, a))
             elif dtypes.is_array(arg) and not isinstance(atype, dt.Array):
-                # GPU scalars are pointers, so this is fine
-                if atype.storage != dtypes.StorageType.GPU_Global:
+                # GPU scalars and return values are pointers, so this is fine
+                if atype.storage != dtypes.StorageType.GPU_Global and not a.startswith('__return'):
                     raise TypeError('Passing an array to a scalar (type %s) in argument "%s"' % (atype.dtype.ctype, a))
             elif not isinstance(atype, dt.Array) and not isinstance(atype.dtype, dtypes.callback) and not isinstance(
                     arg,

From 68764c7b4489789908bcdffe9a81f078f2873314 Mon Sep 17 00:00:00 2001
From: Tal Ben-Nun <talbn@llnl.gov>
Date: Thu, 3 Aug 2023 17:17:40 -0700
Subject: [PATCH 002/163] Fix case where nested SDFGs would define more symbols
 than used

---
 dace/codegen/targets/cpu.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/dace/codegen/targets/cpu.py b/dace/codegen/targets/cpu.py
index 3b7b592775..88bb616063 100644
--- a/dace/codegen/targets/cpu.py
+++ b/dace/codegen/targets/cpu.py
@@ -1469,9 +1469,10 @@ def make_restrict(expr: str) -> str:
         arguments += [
             f'{atype} {restrict} {aname}' for (atype, aname, _), restrict in zip(memlet_references, restrict_args)
         ]
+        fsyms = self._frame.free_symbols(node.sdfg)
         arguments += [
             f'{node.sdfg.symbols[aname].as_arg(aname)}' for aname in sorted(node.symbol_mapping.keys())
-            if aname not in sdfg.constants
+            if aname not in sdfg.constants and aname in fsyms
         ]
         arguments = ', '.join(arguments)
         return f'void {sdfg_label}({arguments}) {{'
@@ -1480,9 +1481,10 @@ def generate_nsdfg_call(self, sdfg, state, node, memlet_references, sdfg_label,
         prepend = []
         if state_struct:
             prepend = ['__state']
+        fsyms = self._frame.free_symbols(node.sdfg)
         args = ', '.join(prepend + [argval for _, _, argval in memlet_references] + [
             cpp.sym2cpp(symval)
-            for symname, symval in sorted(node.symbol_mapping.items()) if symname not in sdfg.constants
+            for symname, symval in sorted(node.symbol_mapping.items()) if symname not in sdfg.constants and symname in fsyms
         ])
         return f'{sdfg_label}({args});'
 
@@ -1766,11 +1768,11 @@ def _generate_MapEntry(
 
             # Find if bounds are used within the scope
             scope = state_dfg.scope_subgraph(node, False, False)
-            fsyms = scope.free_symbols
+            fsyms = self._frame.free_symbols(scope)
             # Include external edges
             for n in scope.nodes():
                 for e in state_dfg.all_edges(n):
-                    fsyms |= e.data.free_symbols
+                    fsyms |= self._frame.free_symbols(e.data)
             fsyms = set(map(str, fsyms))
 
             ntid_is_used = '__omp_num_threads' in fsyms

From e2e2f32d5f9523541ae448f506f4400442c5b4c8 Mon Sep 17 00:00:00 2001
From: Tal Ben-Nun <talbn@llnl.gov>
Date: Thu, 3 Aug 2023 17:21:45 -0700
Subject: [PATCH 003/163] Specialize used_symbols behavior for memlets

---
 dace/memlet.py     | 35 +++++++++++++++++++++++++++++------
 dace/sdfg/sdfg.py  | 11 ++---------
 dace/sdfg/state.py |  6 +++++-
 3 files changed, 36 insertions(+), 16 deletions(-)

diff --git a/dace/memlet.py b/dace/memlet.py
index 74a1320a3b..49b3084390 100644
--- a/dace/memlet.py
+++ b/dace/memlet.py
@@ -512,22 +512,45 @@ def validate(self, sdfg, state):
         if self.data is not None and self.data not in sdfg.arrays:
             raise KeyError('Array "%s" not found in SDFG' % self.data)
 
-    def used_symbols(self, all_symbols: bool) -> Set[str]:
+    def used_symbols(self, all_symbols: bool, edge=None) -> Set[str]:
         """
         Returns a set of symbols used in this edge's properties. 
         
         :param all_symbols: If False, only returns the set of symbols that will be used
                             in the generated code and are needed as arguments.
+        :param edge: If given, provides richer context-based tests for the case
+                     of ``all_symbols=False``.
         """
         # Symbolic properties are in volume, and the two subsets
         result = set()
+        view_edge = False
         if all_symbols:
             result |= set(map(str, self.volume.free_symbols))
-        if self.src_subset:
-            result |= self.src_subset.free_symbols
-
-        if self.dst_subset:
-            result |= self.dst_subset.free_symbols
+        elif edge is not None:  # Not all symbols are requested, and an edge is given
+            view_edge = False
+            from dace.sdfg import nodes
+            if isinstance(edge.dst, nodes.CodeNode) or isinstance(edge.src, nodes.CodeNode):
+                view_edge = True
+            elif edge.dst_conn == 'views' and isinstance(edge.dst, nodes.AccessNode):
+                view_edge = True
+            elif edge.src_conn == 'views' and isinstance(edge.src, nodes.AccessNode):
+                view_edge = True
+
+        if not view_edge:
+            if self.src_subset:
+                result |= self.src_subset.free_symbols
+
+            if self.dst_subset:
+                result |= self.dst_subset.free_symbols
+        else:
+            # View edges do not require the end of the range nor strides
+            if self.src_subset:
+                for rb, _, _ in self.src_subset:
+                    result |= set(map(str, rb.free_symbols))
+
+            if self.dst_subset:
+                for rb, _, _ in self.dst_subset:
+                    result |= set(map(str, rb.free_symbols))
 
         return result
 
diff --git a/dace/sdfg/sdfg.py b/dace/sdfg/sdfg.py
index f3a37ef08c..f8776f4670 100644
--- a/dace/sdfg/sdfg.py
+++ b/dace/sdfg/sdfg.py
@@ -1294,23 +1294,16 @@ def used_symbols(self, all_symbols: bool) -> Set[str]:
         defined_syms = set()
         free_syms = set()
 
-        # Exclude data descriptor names, constants, and shapes of global data descriptors
-        not_strictly_necessary_global_symbols = set()
+        # Exclude data descriptor names and constants
         for name, desc in self.arrays.items():
             defined_syms.add(name)
 
-            if not all_symbols:
-                used_desc_symbols = desc.used_symbols(all_symbols)
-                not_strictly_necessary = (desc.used_symbols(all_symbols=True) - used_desc_symbols)
-                not_strictly_necessary_global_symbols |= set(map(str, not_strictly_necessary))
-
         defined_syms |= set(self.constants_prop.keys())
 
         # Start with the set of SDFG free symbols
         if all_symbols:
             free_syms |= set(self.symbols.keys())
-        else:
-            free_syms |= set(s for s in self.symbols.keys() if s not in not_strictly_necessary_global_symbols)
+            # If all_symbols is False, those symbols would only be added in the case of non-Python tasklets
 
         # Add free state symbols
         used_before_assignment = set()
diff --git a/dace/sdfg/state.py b/dace/sdfg/state.py
index a4a6648401..c5fb16503b 100644
--- a/dace/sdfg/state.py
+++ b/dace/sdfg/state.py
@@ -435,6 +435,10 @@ def used_symbols(self, all_symbols: bool) -> Set[str]:
                         if (isinstance(astnode, ast.Call) and isinstance(astnode.func, ast.Name)
                                 and astnode.func.id in sdfg.symbols):
                             freesyms.add(astnode.func.id)
+            elif (not all_symbols and isinstance(n, nd.Tasklet) and n.language != dtypes.Language.Python):
+                # If a non-Python tasklet, conservatively assume all SDFG global symbols are used for now
+                # See SDFG.used_symbols for more information
+                freesyms |= set(sdfg.symbols.keys())
 
             if hasattr(n, 'used_symbols'):
                 freesyms |= n.used_symbols(all_symbols)
@@ -454,7 +458,7 @@ def _is_leaf_memlet(e):
             if not all_symbols and not _is_leaf_memlet(e):
                 continue
 
-            freesyms |= e.data.used_symbols(all_symbols)
+            freesyms |= e.data.used_symbols(all_symbols, e)
 
         # Do not consider SDFG constants as symbols
         new_symbols.update(set(sdfg.constants.keys()))

From 7d9ab17dc8ae25b8d07de6dba3b17dac405e6f56 Mon Sep 17 00:00:00 2001
From: Tal Ben-Nun <talbn@llnl.gov>
Date: Thu, 3 Aug 2023 17:58:50 -0700
Subject: [PATCH 004/163] Minor fix

---
 dace/memlet.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/dace/memlet.py b/dace/memlet.py
index 49b3084390..d448ca1134 100644
--- a/dace/memlet.py
+++ b/dace/memlet.py
@@ -545,12 +545,14 @@ def used_symbols(self, all_symbols: bool, edge=None) -> Set[str]:
         else:
             # View edges do not require the end of the range nor strides
             if self.src_subset:
-                for rb, _, _ in self.src_subset:
-                    result |= set(map(str, rb.free_symbols))
+                for rb, _, _ in self.src_subset.ndrange():
+                    if symbolic.issymbolic(rb):
+                        result |= set(map(str, rb.free_symbols))
 
             if self.dst_subset:
-                for rb, _, _ in self.dst_subset:
-                    result |= set(map(str, rb.free_symbols))
+                for rb, _, _ in self.dst_subset.ndrange():
+                    if symbolic.issymbolic(rb):
+                        result |= set(map(str, rb.free_symbols))
 
         return result
 

From 5ae2f74d057b3b0e021f15c2329795f45054012b Mon Sep 17 00:00:00 2001
From: Tal Ben-Nun <talbn@llnl.gov>
Date: Fri, 4 Aug 2023 09:09:37 -0700
Subject: [PATCH 005/163] Fix issue with filtering out nested SDFG symbol
 mapping

---
 dace/sdfg/nodes.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/dace/sdfg/nodes.py b/dace/sdfg/nodes.py
index 28431deeea..d29280d22b 100644
--- a/dace/sdfg/nodes.py
+++ b/dace/sdfg/nodes.py
@@ -581,16 +581,17 @@ def from_json(json_obj, context=None):
         return ret
 
     def used_symbols(self, all_symbols: bool) -> Set[str]:
-        free_syms = set().union(*(map(str,
-                                      pystr_to_symbolic(v).free_symbols) for v in self.symbol_mapping.values()),
-                                *(map(str,
-                                      pystr_to_symbolic(v).free_symbols) for v in self.location.values()))
+        free_syms = set().union(*(map(str, pystr_to_symbolic(v).free_symbols) for v in self.location.values()))
+
+        keys_to_use = set(self.symbol_mapping.keys())
 
         # Filter out unused internal symbols from symbol mapping
         if not all_symbols:
             internally_used_symbols = self.sdfg.used_symbols(all_symbols=False)
-            free_syms &= internally_used_symbols
-        
+            keys_to_use &= internally_used_symbols
+
+        free_syms |= set().union(*(map(str, pystr_to_symbolic(v).free_symbols) for k, v in self.symbol_mapping.items() if k in keys_to_use))
+
         return free_syms
 
     @property

From 924ecafbb99ec7634147c96e7a4ab06f34f911cf Mon Sep 17 00:00:00 2001
From: Tal Ben-Nun <tbennun@gmail.com>
Date: Tue, 22 Aug 2023 08:43:34 -0700
Subject: [PATCH 006/163] Interstate edge free symbols: use symbols from AST
 directly to avoid simplifying

---
 dace/sdfg/sdfg.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dace/sdfg/sdfg.py b/dace/sdfg/sdfg.py
index f8776f4670..bbdf7de041 100644
--- a/dace/sdfg/sdfg.py
+++ b/dace/sdfg/sdfg.py
@@ -232,7 +232,7 @@ def used_symbols(self, all_symbols: bool) -> Set[str]:
         rhs_symbols = set()
         for lhs, rhs in self.assignments.items():
             # Always add LHS symbols to the set of candidate free symbols
-            rhs_symbols |= symbolic.free_symbols_and_functions(rhs)
+            rhs_symbols |= set(map(str, dace.symbolic.symbols_in_ast(ast.parse(rhs))))
             # Add the RHS to the set of candidate defined symbols ONLY if it has not been read yet
             # This also solves the ordering issue that may arise in cases like the 3rd example above
             if lhs not in cond_symbols and lhs not in rhs_symbols:

From 22289796415c7b777c7b723afe34d1498660290c Mon Sep 17 00:00:00 2001
From: Tal Ben-Nun <tbennun@gmail.com>
Date: Tue, 22 Aug 2023 09:05:26 -0700
Subject: [PATCH 007/163] Revert unnecessary code generator changes. If nested
 symbols are not to be used, PruneSymbols should be called

---
 dace/codegen/targets/cpu.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/dace/codegen/targets/cpu.py b/dace/codegen/targets/cpu.py
index 969c42fc60..ef97b0bbad 100644
--- a/dace/codegen/targets/cpu.py
+++ b/dace/codegen/targets/cpu.py
@@ -1469,10 +1469,9 @@ def make_restrict(expr: str) -> str:
         arguments += [
             f'{atype} {restrict} {aname}' for (atype, aname, _), restrict in zip(memlet_references, restrict_args)
         ]
-        fsyms = self._frame.free_symbols(node.sdfg)
         arguments += [
             f'{node.sdfg.symbols[aname].as_arg(aname)}' for aname in sorted(node.symbol_mapping.keys())
-            if aname not in sdfg.constants and aname in fsyms
+            if aname not in sdfg.constants
         ]
         arguments = ', '.join(arguments)
         return f'void {sdfg_label}({arguments}) {{'
@@ -1481,10 +1480,9 @@ def generate_nsdfg_call(self, sdfg, state, node, memlet_references, sdfg_label,
         prepend = []
         if state_struct:
             prepend = ['__state']
-        fsyms = self._frame.free_symbols(node.sdfg)
         args = ', '.join(prepend + [argval for _, _, argval in memlet_references] + [
             cpp.sym2cpp(symval)
-            for symname, symval in sorted(node.symbol_mapping.items()) if symname not in sdfg.constants and symname in fsyms
+            for symname, symval in sorted(node.symbol_mapping.items()) if symname not in sdfg.constants
         ])
         return f'{sdfg_label}({args});'
 

From 1bb60e26f34bd7e43ba96034f28632fa07f705dc Mon Sep 17 00:00:00 2001
From: Tal Ben-Nun <tbennun@gmail.com>
Date: Sun, 1 Oct 2023 12:46:53 -0700
Subject: [PATCH 008/163] Don't pass unused symbols to nested SDFG calls

---
 dace/codegen/targets/cpu.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/dace/codegen/targets/cpu.py b/dace/codegen/targets/cpu.py
index d39ae5fc9d..0b48b60c25 100644
--- a/dace/codegen/targets/cpu.py
+++ b/dace/codegen/targets/cpu.py
@@ -1513,7 +1513,7 @@ def make_restrict(expr: str) -> str:
         ]
         arguments += [
             f'{node.sdfg.symbols[aname].as_arg(aname)}' for aname in sorted(node.symbol_mapping.keys())
-            if aname not in sdfg.constants
+            if aname not in sdfg.constants and aname in self._frame.free_symbols(node.sdfg)
         ]
         arguments = ', '.join(arguments)
         return f'void {sdfg_label}({arguments}) {{'
@@ -1523,8 +1523,8 @@ def generate_nsdfg_call(self, sdfg, state, node, memlet_references, sdfg_label,
         if state_struct:
             prepend = ['__state']
         args = ', '.join(prepend + [argval for _, _, argval in memlet_references] + [
-            cpp.sym2cpp(symval)
-            for symname, symval in sorted(node.symbol_mapping.items()) if symname not in sdfg.constants
+            cpp.sym2cpp(symval) for symname, symval in sorted(node.symbol_mapping.items())
+            if symname not in sdfg.constants and symname in self._frame.free_symbols(node.sdfg)
         ])
         return f'{sdfg_label}({args});'
 

From 890965848bb2e490dea29d6414618d323ec1a597 Mon Sep 17 00:00:00 2001
From: Tal Ben-Nun <tbennun@gmail.com>
Date: Sun, 1 Oct 2023 18:09:00 -0700
Subject: [PATCH 009/163] Revert changes (again)

---
 dace/codegen/targets/cpu.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dace/codegen/targets/cpu.py b/dace/codegen/targets/cpu.py
index 0b48b60c25..737c0f9ea3 100644
--- a/dace/codegen/targets/cpu.py
+++ b/dace/codegen/targets/cpu.py
@@ -1513,7 +1513,7 @@ def make_restrict(expr: str) -> str:
         ]
         arguments += [
             f'{node.sdfg.symbols[aname].as_arg(aname)}' for aname in sorted(node.symbol_mapping.keys())
-            if aname not in sdfg.constants and aname in self._frame.free_symbols(node.sdfg)
+            if aname not in sdfg.constants
         ]
         arguments = ', '.join(arguments)
         return f'void {sdfg_label}({arguments}) {{'
@@ -1524,7 +1524,7 @@ def generate_nsdfg_call(self, sdfg, state, node, memlet_references, sdfg_label,
             prepend = ['__state']
         args = ', '.join(prepend + [argval for _, _, argval in memlet_references] + [
             cpp.sym2cpp(symval) for symname, symval in sorted(node.symbol_mapping.items())
-            if symname not in sdfg.constants and symname in self._frame.free_symbols(node.sdfg)
+            if symname not in sdfg.constants
         ])
         return f'{sdfg_label}({args});'
 

From 1dd43a054570dee06116295ed9d07ebf035ca8c6 Mon Sep 17 00:00:00 2001
From: Tal Ben-Nun <tbennun@gmail.com>
Date: Sun, 1 Oct 2023 18:16:47 -0700
Subject: [PATCH 010/163] Richer analysis

---
 dace/codegen/targets/cpu.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dace/codegen/targets/cpu.py b/dace/codegen/targets/cpu.py
index 737c0f9ea3..0497f0ddc7 100644
--- a/dace/codegen/targets/cpu.py
+++ b/dace/codegen/targets/cpu.py
@@ -1812,7 +1812,7 @@ def _generate_MapEntry(
             # Include external edges
             for n in scope.nodes():
                 for e in state_dfg.all_edges(n):
-                    fsyms |= self._frame.free_symbols(e.data)
+                    fsyms |= e.data.used_symbols(False, e)
             fsyms = set(map(str, fsyms))
 
             ntid_is_used = '__omp_num_threads' in fsyms

From 1cc6be4dc9e880a61f158868e28f06efb26b2300 Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Mon, 2 Oct 2023 11:42:42 +0200
Subject: [PATCH 011/163] When generating a nested SDFG's header and call, add
 to the arguments only the frame's free (used) symbols.

---
 dace/codegen/targets/cpu.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dace/codegen/targets/cpu.py b/dace/codegen/targets/cpu.py
index 0497f0ddc7..ef1a0654a6 100644
--- a/dace/codegen/targets/cpu.py
+++ b/dace/codegen/targets/cpu.py
@@ -1513,7 +1513,7 @@ def make_restrict(expr: str) -> str:
         ]
         arguments += [
             f'{node.sdfg.symbols[aname].as_arg(aname)}' for aname in sorted(node.symbol_mapping.keys())
-            if aname not in sdfg.constants
+            if aname in self._frame.free_symbols(node) and aname not in sdfg.constants
         ]
         arguments = ', '.join(arguments)
         return f'void {sdfg_label}({arguments}) {{'
@@ -1524,7 +1524,7 @@ def generate_nsdfg_call(self, sdfg, state, node, memlet_references, sdfg_label,
             prepend = ['__state']
         args = ', '.join(prepend + [argval for _, _, argval in memlet_references] + [
             cpp.sym2cpp(symval) for symname, symval in sorted(node.symbol_mapping.items())
-            if symname not in sdfg.constants
+            if symname in self._frame.free_symbols(node) and symname not in sdfg.constants
         ])
         return f'{sdfg_label}({args});'
 

From eb29c7009537e6954f35dfb1a1e7c5d6e946ab03 Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Mon, 2 Oct 2023 11:43:30 +0200
Subject: [PATCH 012/163] When generating the arglist of an SDFGState Subgraph,
 add to the scalar arguments only "used" symbols.

---
 dace/sdfg/state.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/dace/sdfg/state.py b/dace/sdfg/state.py
index fa51103b7a..1ff8fe4cf1 100644
--- a/dace/sdfg/state.py
+++ b/dace/sdfg/state.py
@@ -688,14 +688,15 @@ def arglist(self, defined_syms=None, shared_transients=None) -> Dict[str, dt.Dat
         defined_syms = defined_syms or self.defined_symbols()
         scalar_args.update({
             k: dt.Scalar(defined_syms[k]) if k in defined_syms else sdfg.arrays[k]
-            for k in self.free_symbols if not k.startswith('__dace') and k not in sdfg.constants
+            for k in self.used_symbols(all_symbols=False) if not k.startswith('__dace') and k not in sdfg.constants
         })
 
         # Add scalar arguments from free symbols of data descriptors
         for arg in data_args.values():
             scalar_args.update({
                 str(k): dt.Scalar(k.dtype)
-                for k in arg.free_symbols if not str(k).startswith('__dace') and str(k) not in sdfg.constants
+                for k in arg.used_symbols(all_symbols=False)
+                if not str(k).startswith('__dace') and str(k) not in sdfg.constants
             })
 
         # Fill up ordered dictionary

From 48e138142fd0cd1b186100541ebe56cdb8494b7e Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Mon, 2 Oct 2023 11:44:33 +0200
Subject: [PATCH 013/163] Added tests.

---
 tests/codegen/codegen_used_symbols_test.py | 95 ++++++++++++++++++++++
 1 file changed, 95 insertions(+)
 create mode 100644 tests/codegen/codegen_used_symbols_test.py

diff --git a/tests/codegen/codegen_used_symbols_test.py b/tests/codegen/codegen_used_symbols_test.py
new file mode 100644
index 0000000000..afa0ca0a05
--- /dev/null
+++ b/tests/codegen/codegen_used_symbols_test.py
@@ -0,0 +1,95 @@
+# Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved.
+""" Tests used-symbols in code generation."""
+import dace
+import numpy
+import pytest
+
+
+n0i, n0j, n0k = (dace.symbol(s, dtype=dace.int32) for s in ('n0i', 'n0j', 'n0k'))
+n1i, n1j, n1k = (dace.symbol(s, dtype=dace.int64) for s in ('n1i', 'n1j', 'n1k'))
+
+
+@dace.program
+def rprj3(r: dace.float64[n0i, n0j, n0k], s: dace.float64[n1i, n1j, n1k]):
+
+    for i, j, k in dace.map[1:s.shape[0] - 1, 1:s.shape[1] - 1, 1:s.shape[2] - 1]:
+
+        s[i, j, k] = (
+            0.5000 * r[2 * i, 2 * j, 2 * k] +
+            0.2500 * (r[2 * i - 1, 2 * j, 2 * k] + r[2 * i + 1, 2 * j, 2 * k] + r[2 * i, 2 * j - 1, 2 * k] +
+                      r[2 * i, 2 * j + 1, 2 * k] + r[2 * i, 2 * j, 2 * k - 1] + r[2 * i, 2 * j, 2 * k + 1]) +
+            0.1250 * (r[2 * i - 1, 2 * j - 1, 2 * k] + r[2 * i - 1, 2 * j + 1, 2 * k] +
+                      r[2 * i + 1, 2 * j - 1, 2 * k] + r[2 * i + 1, 2 * j + 1, 2 * k] +
+                      r[2 * i - 1, 2 * j, 2 * k - 1] + r[2 * i - 1, 2 * j, 2 * k + 1] +
+                      r[2 * i + 1, 2 * j, 2 * k - 1] + r[2 * i + 1, 2 * j, 2 * k + 1] +
+                      r[2 * i, 2 * j - 1, 2 * k - 1] + r[2 * i, 2 * j - 1, 2 * k + 1] +
+                      r[2 * i, 2 * j + 1, 2 * k - 1] + r[2 * i, 2 * j + 1, 2 * k + 1]) +
+            0.0625 * (r[2 * i - 1, 2 * j - 1, 2 * k - 1] + r[2 * i - 1, 2 * j - 1, 2 * k + 1] +
+                      r[2 * i - 1, 2 * j + 1, 2 * k - 1] + r[2 * i - 1, 2 * j + 1, 2 * k + 1] +
+                      r[2 * i + 1, 2 * j - 1, 2 * k - 1] + r[2 * i + 1, 2 * j - 1, 2 * k + 1] +
+                      r[2 * i + 1, 2 * j + 1, 2 * k - 1] + r[2 * i + 1, 2 * j + 1, 2 * k + 1]))
+
+
+def test_codegen_used_symbols_cpu():
+
+    rng = numpy.random.default_rng(42)
+    r = rng.random((10, 10, 10))
+    s_ref = numpy.zeros((4, 4, 4))
+    s_val = numpy.zeros((4, 4, 4))
+
+    rprj3.f(r, s_ref)
+    rprj3(r, s_val)
+
+    assert numpy.allclose(s_ref, s_val)
+
+
+def test_codegen_used_symbols_cpu_2():
+
+    @dace.program
+    def rprj3_nested(r: dace.float64[n0i, n0j, n0k], s: dace.float64[n1i, n1j, n1k]):
+        rprj3(r, s)
+
+    rng = numpy.random.default_rng(42)
+    r = rng.random((10, 10, 10))
+    s_ref = numpy.zeros((4, 4, 4))
+    s_val = numpy.zeros((4, 4, 4))
+
+    rprj3.f(r, s_ref)
+    rprj3_nested(r, s_val)
+
+    assert numpy.allclose(s_ref, s_val)
+
+
+@pytest.mark.gpu
+def test_codegen_used_symbols_gpu():
+
+    sdfg = rprj3.to_sdfg()
+    for _, desc in sdfg.arrays.items():
+        if not desc.transient and isinstance(desc, dace.data.Array):
+            desc.storage = dace.StorageType.GPU_Global
+    sdfg.apply_gpu_transformations()
+    func = sdfg.compile()
+
+    try:
+        import cupy
+
+        rng = numpy.random.default_rng(42)
+        r = rng.random((10, 10, 10))
+        r_dev = cupy.asarray(r)
+        s_ref = numpy.zeros((4, 4, 4))
+        s_val = cupy.zeros((4, 4, 4))
+
+        rprj3.f(r, s_ref)
+        func(r=r_dev, s=s_val, n0i=10, n0j=10, n0k=10, n1i=4, n1j=4, n1k=4)
+
+        assert numpy.allclose(s_ref, s_val)
+    
+    except (ImportError, ModuleNotFoundError):
+        pass
+
+
+if __name__ == "__main__":
+
+    test_codegen_used_symbols_cpu()
+    test_codegen_used_symbols_cpu_2()
+    test_codegen_used_symbols_gpu()

From f75627fcec73ad88968f45263ff91d00d4126b00 Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Mon, 2 Oct 2023 12:28:43 +0200
Subject: [PATCH 014/163] Get the free/used symbols of the nested SDFG instead
 of the NestedSDFG node.

---
 dace/codegen/targets/cpu.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dace/codegen/targets/cpu.py b/dace/codegen/targets/cpu.py
index ef1a0654a6..995cc2f3a9 100644
--- a/dace/codegen/targets/cpu.py
+++ b/dace/codegen/targets/cpu.py
@@ -1513,7 +1513,7 @@ def make_restrict(expr: str) -> str:
         ]
         arguments += [
             f'{node.sdfg.symbols[aname].as_arg(aname)}' for aname in sorted(node.symbol_mapping.keys())
-            if aname in self._frame.free_symbols(node) and aname not in sdfg.constants
+            if aname in self._frame.free_symbols(node.sdfg) and aname not in sdfg.constants
         ]
         arguments = ', '.join(arguments)
         return f'void {sdfg_label}({arguments}) {{'
@@ -1524,7 +1524,7 @@ def generate_nsdfg_call(self, sdfg, state, node, memlet_references, sdfg_label,
             prepend = ['__state']
         args = ', '.join(prepend + [argval for _, _, argval in memlet_references] + [
             cpp.sym2cpp(symval) for symname, symval in sorted(node.symbol_mapping.items())
-            if symname in self._frame.free_symbols(node) and symname not in sdfg.constants
+            if symname in self._frame.free_symbols(node.sdfg) and symname not in sdfg.constants
         ])
         return f'{sdfg_label}({args});'
 

From a2f1a13aabedd9364b399551e38e71c0f4c005b7 Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Mon, 2 Oct 2023 12:29:34 +0200
Subject: [PATCH 015/163] Updated test to reflect that temporarily setting
 config value does not work in the CI.

---
 tests/symbol_mapping_replace_test.py | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/tests/symbol_mapping_replace_test.py b/tests/symbol_mapping_replace_test.py
index cd47320bf1..cbb572bc81 100644
--- a/tests/symbol_mapping_replace_test.py
+++ b/tests/symbol_mapping_replace_test.py
@@ -27,14 +27,15 @@ def outer(A, inp1: float, inp2: float):
 
 def test_symbol_mapping_replace():
 
-    with dace.config.set_temporary('optimizer', 'automatic_simplification', value=True):
-        A = np.ones((10, 10, 10))
-        ref = A.copy()
-        b = 2.0
-        c = 2.0
-        outer(A, inp1=b, inp2=c)
-        outer.f(ref, inp1=b, inp2=c)
-        assert (np.allclose(A, ref))
+    # TODO/NOTE: Setting temporary config values does not work in the CI
+    # with dace.config.set_temporary('optimizer', 'automatic_simplification', value=True):
+    A = np.ones((10, 10, 10))
+    ref = A.copy()
+    b = 2.0
+    c = 2.0
+    outer(A, inp1=b, inp2=c)
+    outer.f(ref, inp1=b, inp2=c)
+    assert (np.allclose(A, ref))
 
 
 if __name__ == '__main__':

From fba42853d1b97319ed5a567c1d7165b32bb623de Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 2 Oct 2023 23:47:05 +0000
Subject: [PATCH 016/163] Bump urllib3 from 2.0.3 to 2.0.6

Bumps [urllib3](https://github.com/urllib3/urllib3) from 2.0.3 to 2.0.6.
- [Release notes](https://github.com/urllib3/urllib3/releases)
- [Changelog](https://github.com/urllib3/urllib3/blob/main/CHANGES.rst)
- [Commits](https://github.com/urllib3/urllib3/compare/2.0.3...2.0.6)

---
updated-dependencies:
- dependency-name: urllib3
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index ea4db45916..996449dbef 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -20,7 +20,7 @@ PyYAML==6.0
 requests==2.31.0
 six==1.16.0
 sympy==1.9
-urllib3==2.0.3
+urllib3==2.0.6
 websockets==11.0.3
 Werkzeug==2.3.5
 zipp==3.15.0

From d713e6e351c3ca3d2a599db7a2e8b28630013d94 Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Tue, 3 Oct 2023 16:35:28 +0200
Subject: [PATCH 017/163] Code generation/used symbols fix for symbols in the
 symbol mapping that may or may not be (re-)defined in an InterstateEdge.

---
 dace/codegen/targets/cpu.py | 6 ++++--
 dace/sdfg/sdfg.py           | 8 +++++++-
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/dace/codegen/targets/cpu.py b/dace/codegen/targets/cpu.py
index 995cc2f3a9..88dda0058f 100644
--- a/dace/codegen/targets/cpu.py
+++ b/dace/codegen/targets/cpu.py
@@ -1511,9 +1511,10 @@ def make_restrict(expr: str) -> str:
         arguments += [
             f'{atype} {restrict} {aname}' for (atype, aname, _), restrict in zip(memlet_references, restrict_args)
         ]
+        fsyms = node.sdfg.used_symbols(all_symbols=False, keep_defined_in_mapping=True)
         arguments += [
             f'{node.sdfg.symbols[aname].as_arg(aname)}' for aname in sorted(node.symbol_mapping.keys())
-            if aname in self._frame.free_symbols(node.sdfg) and aname not in sdfg.constants
+            if aname in fsyms and aname not in sdfg.constants
         ]
         arguments = ', '.join(arguments)
         return f'void {sdfg_label}({arguments}) {{'
@@ -1522,9 +1523,10 @@ def generate_nsdfg_call(self, sdfg, state, node, memlet_references, sdfg_label,
         prepend = []
         if state_struct:
             prepend = ['__state']
+        fsyms = node.sdfg.used_symbols(all_symbols=False, keep_defined_in_mapping=True)
         args = ', '.join(prepend + [argval for _, _, argval in memlet_references] + [
             cpp.sym2cpp(symval) for symname, symval in sorted(node.symbol_mapping.items())
-            if symname in self._frame.free_symbols(node.sdfg) and symname not in sdfg.constants
+            if symname in fsyms and symname not in sdfg.constants
         ])
         return f'{sdfg_label}({args});'
 
diff --git a/dace/sdfg/sdfg.py b/dace/sdfg/sdfg.py
index a7b5d90b2b..a85e773337 100644
--- a/dace/sdfg/sdfg.py
+++ b/dace/sdfg/sdfg.py
@@ -1323,7 +1323,7 @@ def arrays_recursive(self):
                 if isinstance(node, nd.NestedSDFG):
                     yield from node.sdfg.arrays_recursive()
 
-    def used_symbols(self, all_symbols: bool) -> Set[str]:
+    def used_symbols(self, all_symbols: bool, keep_defined_in_mapping: bool=False) -> Set[str]:
         """
         Returns a set of symbol names that are used by the SDFG, but not
         defined within it. This property is used to determine the symbolic
@@ -1331,6 +1331,8 @@ def used_symbols(self, all_symbols: bool) -> Set[str]:
 
         :param all_symbols: If False, only returns the set of symbols that will be used
                             in the generated code and are needed as arguments.
+        :param keep_defined_in_mapping: If True, symbols defined in inter-state edges that are in the symbol mapping
+                                        will be removed from the set of defined symbols.
         """
         defined_syms = set()
         free_syms = set()
@@ -1372,6 +1374,10 @@ def used_symbols(self, all_symbols: bool) -> Set[str]:
         # Remove symbols that were used before they were assigned
         defined_syms -= used_before_assignment
 
+        # Remove from defined symbols those that are in the symbol mapping
+        if self.parent_nsdfg_node is not None and keep_defined_in_mapping:
+            defined_syms -= set(self.parent_nsdfg_node.symbol_mapping.keys())
+
         # Add the set of SDFG symbol parameters
         # If all_symbols is False, those symbols would only be added in the case of non-Python tasklets
         if all_symbols:

From 8ba05f15da78008b9b6973635cf369a0e8aa433e Mon Sep 17 00:00:00 2001
From: Tal Ben-Nun <talbn@llnl.gov>
Date: Tue, 3 Oct 2023 10:47:52 -0700
Subject: [PATCH 018/163] Adapt FPGA code generator to address used symbols

---
 dace/codegen/targets/intel_fpga.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/dace/codegen/targets/intel_fpga.py b/dace/codegen/targets/intel_fpga.py
index 095a5ce9df..d3c46b0069 100644
--- a/dace/codegen/targets/intel_fpga.py
+++ b/dace/codegen/targets/intel_fpga.py
@@ -729,9 +729,10 @@ def generate_module(self, sdfg, state, kernel_name, module_name, subgraph, param
     def generate_nsdfg_header(self, sdfg, state, state_id, node, memlet_references, sdfg_label):
         # Intel FPGA needs to deal with streams
         arguments = [f'{atype} {aname}' for atype, aname, _ in memlet_references]
+        fsyms = node.sdfg.used_symbols(all_symbols=False, keep_defined_in_mapping=True)
         arguments += [
             f'{node.sdfg.symbols[aname].as_arg(aname)}' for aname in sorted(node.symbol_mapping.keys())
-            if aname not in sdfg.constants
+            if aname in fsyms and aname not in sdfg.constants
         ]
         arguments = ', '.join(arguments)
         function_header = f'void {sdfg_label}({arguments}) {{'

From 17fa4c1ed3738c3fc4261262ad4bac1872d84ac7 Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Wed, 4 Oct 2023 21:10:20 +0200
Subject: [PATCH 019/163] Using used-symbols in Xilinx code generator.

---
 dace/codegen/targets/xilinx.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/dace/codegen/targets/xilinx.py b/dace/codegen/targets/xilinx.py
index e802907652..5d82cfeafc 100644
--- a/dace/codegen/targets/xilinx.py
+++ b/dace/codegen/targets/xilinx.py
@@ -368,9 +368,10 @@ def generate_flatten_loop_post(kernel_stream, sdfg, state_id, node):
     def generate_nsdfg_header(self, sdfg, state, state_id, node, memlet_references, sdfg_label):
         # TODO: Use a single method for GPU kernels, FPGA modules, and NSDFGs
         arguments = [f'{atype} {aname}' for atype, aname, _ in memlet_references]
+        fsyms = node.sdfg.used_symbols(all_symbols=False, keep_defined_in_mapping=True)
         arguments += [
             f'{node.sdfg.symbols[aname].as_arg(aname)}' for aname in sorted(node.symbol_mapping.keys())
-            if aname not in sdfg.constants
+            if aname in fsyms and aname not in sdfg.constants
         ]
         arguments = ', '.join(arguments)
         return f'void {sdfg_label}({arguments}) {{\n#pragma HLS INLINE'

From ea326951cdf72a2833501fcab01e362a10313088 Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Fri, 6 Oct 2023 22:09:04 +0200
Subject: [PATCH 020/163] Trying Python 3.12

---
 .github/workflows/general-ci.yml | 2 +-
 setup.py                         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/general-ci.yml b/.github/workflows/general-ci.yml
index 138726ef1d..063c1f3e7d 100644
--- a/.github/workflows/general-ci.yml
+++ b/.github/workflows/general-ci.yml
@@ -12,7 +12,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: [3.7,'3.11']
+        python-version: [3.7,'3.12']
         simplify: [0,1,autoopt]
 
     steps:
diff --git a/setup.py b/setup.py
index 6f97086543..a0ac2e2d49 100644
--- a/setup.py
+++ b/setup.py
@@ -63,7 +63,7 @@
           "License :: OSI Approved :: BSD License",
           "Operating System :: OS Independent",
       ],
-      python_requires='>=3.6, <3.12',
+      python_requires='>=3.6, <3.13',
       packages=find_packages(exclude=["*.tests", "*.tests.*", "tests.*", "tests"]),
       package_data={
           '': [

From 6a320d1b0cfca3ce2588399acc3786aac9db794e Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Sat, 7 Oct 2023 12:11:04 +0200
Subject: [PATCH 021/163] Preparing for deprecation.

---
 dace/frontend/python/newast.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/dace/frontend/python/newast.py b/dace/frontend/python/newast.py
index 0329e31641..71d834e955 100644
--- a/dace/frontend/python/newast.py
+++ b/dace/frontend/python/newast.py
@@ -49,6 +49,11 @@
 Shape = Union[ShapeTuple, ShapeList]
 DependencyType = Dict[str, Tuple[SDFGState, Union[Memlet, nodes.Tasklet], Tuple[int]]]
 
+if sys.version_info < (3, 8):
+    _simple_ast_nodes = (ast.Constant, ast.Name, ast.NameConstant, ast.Num)
+else:
+    _simple_ast_nodes = (ast.Constant, ast.Name)
+
 
 class SkipCall(Exception):
     """ Exception used to skip calls to functions that cannot be parsed. """
@@ -2344,12 +2349,11 @@ def _is_test_simple(self, node: ast.AST):
         # Fix for scalar promotion tests
         # TODO: Maybe those tests should use the SDFG API instead of the
         # Python frontend which can change how it handles conditions.
-        simple_ast_nodes = (ast.Constant, ast.Name, ast.NameConstant, ast.Num)
-        is_test_simple = isinstance(node, simple_ast_nodes)
+        is_test_simple = isinstance(node, _simple_ast_nodes)
         if not is_test_simple:
             if isinstance(node, ast.Compare):
-                is_left_simple = isinstance(node.left, simple_ast_nodes)
-                is_right_simple = (len(node.comparators) == 1 and isinstance(node.comparators[0], simple_ast_nodes))
+                is_left_simple = isinstance(node.left, _simple_ast_nodes)
+                is_right_simple = (len(node.comparators) == 1 and isinstance(node.comparators[0], _simple_ast_nodes))
                 if is_left_simple and is_right_simple:
                     return True
             elif isinstance(node, ast.BoolOp):

From b6f56d56c1c901c48ad5111e7ed95565d6a5d4eb Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Sat, 7 Oct 2023 12:11:27 +0200
Subject: [PATCH 022/163] Fixed assertEqual(s) call.

---
 tests/transformations/move_loop_into_map_test.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/transformations/move_loop_into_map_test.py b/tests/transformations/move_loop_into_map_test.py
index 67c60c01bf..dca775bb7a 100644
--- a/tests/transformations/move_loop_into_map_test.py
+++ b/tests/transformations/move_loop_into_map_test.py
@@ -96,17 +96,17 @@ def test_multiple_edges(self):
     def test_itervar_in_map_range(self):
         sdfg = should_not_apply_1.to_sdfg(simplify=True)
         count = sdfg.apply_transformations(MoveLoopIntoMap)
-        self.assertEquals(count, 0)
+        self.assertEqual(count, 0)
 
     def test_itervar_in_data(self):
         sdfg = should_not_apply_2.to_sdfg(simplify=True)
         count = sdfg.apply_transformations(MoveLoopIntoMap)
-        self.assertEquals(count, 0)
+        self.assertEqual(count, 0)
 
     def test_non_injective_index(self):
         sdfg = should_not_apply_3.to_sdfg(simplify=True)
         count = sdfg.apply_transformations(MoveLoopIntoMap)
-        self.assertEquals(count, 0)
+        self.assertEqual(count, 0)
 
     def test_apply_multiple_times(self):
         sdfg = apply_multiple_times.to_sdfg(simplify=True)

From dcbfd2a7e51e631a36bd9f7559289c5eb1e5cb3a Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Sat, 7 Oct 2023 13:49:11 +0200
Subject: [PATCH 023/163] Reworked code to avoid deprecation warnings and
 errors.

---
 dace/frontend/python/astutils.py | 41 ++++++++++++++++++++------------
 1 file changed, 26 insertions(+), 15 deletions(-)

diff --git a/dace/frontend/python/astutils.py b/dace/frontend/python/astutils.py
index faf214fdeb..ff2d191752 100644
--- a/dace/frontend/python/astutils.py
+++ b/dace/frontend/python/astutils.py
@@ -15,6 +15,12 @@
 from dace import dtypes, symbolic
 
 
+if sys.version_info >= (3, 8):
+    NumConstant = ast.Constant
+else:
+    NumConstant = ast.Num
+
+
 def _remove_outer_indentation(src: str):
     """ Removes extra indentation from a source Python function.
 
@@ -66,8 +72,9 @@ def is_constant(node: ast.AST) -> bool:
     if sys.version_info >= (3, 8):
         if isinstance(node, ast.Constant):
             return True
-    if isinstance(node, (ast.Num, ast.Str, ast.NameConstant)):  # For compatibility
-        return True
+    else:
+        if isinstance(node, (ast.Num, ast.Str, ast.NameConstant)):  # For compatibility
+            return True
     return False
 
 
@@ -82,13 +89,14 @@ def evalnode(node: ast.AST, gvars: Dict[str, Any]) -> Any:
     """
     if not isinstance(node, ast.AST):
         return node
-    if isinstance(node, ast.Index):  # For compatibility
+    if sys.version_info < (3, 9) and isinstance(node, ast.Index):  # For compatibility
         node = node.value
-    if isinstance(node, ast.Num):  # For compatibility
-        return node.n
     if sys.version_info >= (3, 8):
         if isinstance(node, ast.Constant):
             return node.value
+    else:
+        if isinstance(node, ast.Num):  # For compatibility
+            return node.n
 
     # Replace internal constants with their values
     node = copy_tree(node)
@@ -112,7 +120,7 @@ def rname(node):
 
     if isinstance(node, str):
         return node
-    if isinstance(node, ast.Num):
+    if sys.version_info < (3, 8) and isinstance(node, ast.Num):
         return str(node.n)
     if isinstance(node, ast.Name):  # form x
         return node.id
@@ -174,12 +182,15 @@ def subscript_to_ast_slice(node, without_array=False):
 
     # Python <3.9 compatibility
     result_slice = None
-    if isinstance(node.slice, ast.Index):
-        slc = node.slice.value
-        if not isinstance(slc, ast.Tuple):
-            result_slice = [slc]
-    elif isinstance(node.slice, ast.ExtSlice):
-        slc = tuple(node.slice.dims)
+    if sys.version_info < (3, 9):
+        if isinstance(node.slice, ast.Index):
+            slc = node.slice.value
+            if not isinstance(slc, ast.Tuple):
+                result_slice = [slc]
+        elif isinstance(node.slice, ast.ExtSlice):
+            slc = tuple(node.slice.dims)
+        else:
+            raise TypeError('Unsupported slicing type: ' + str(type(node.slice)))
     else:
         slc = node.slice
 
@@ -196,7 +207,7 @@ def subscript_to_ast_slice(node, without_array=False):
             # Slice
             if isinstance(s, ast.Slice):
                 result_slice.append((s.lower, s.upper, s.step))
-            elif isinstance(s, ast.Index):  # Index (Python <3.9)
+            elif sys.version_info < (3, 9) and isinstance(s, ast.Index):  # Index (Python <3.9)
                 result_slice.append(s.value)
             else:  # Index
                 result_slice.append(s)
@@ -226,7 +237,7 @@ def _Subscript(self, t):
         self.dispatch(t.value)
         self.write('[')
         # Compatibility
-        if isinstance(t.slice, ast.Index):
+        if sys.version_info < (3, 9) and isinstance(t.slice, ast.Index):
             slc = t.slice.value
         else:
             slc = t.slice
@@ -600,7 +611,7 @@ def visit_Name(self, node: ast.Name):
     def visit_Constant(self, node):
         return self.visit_Num(node)
 
-    def visit_Num(self, node: ast.Num):
+    def visit_Num(self, node: NumConstant):
         newname = f'__uu{self.id}'
         self.gvars[newname] = node.n
         self.id += 1

From a8d7431d52fd936579faac1cf7636bb961436555 Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Sat, 7 Oct 2023 14:03:05 +0200
Subject: [PATCH 024/163] Reworked code to avoid deprecation warnings and
 errors.

---
 dace/frontend/python/newast.py | 53 +++++++++++++++++++++++-----------
 1 file changed, 36 insertions(+), 17 deletions(-)

diff --git a/dace/frontend/python/newast.py b/dace/frontend/python/newast.py
index 71d834e955..3b0023c842 100644
--- a/dace/frontend/python/newast.py
+++ b/dace/frontend/python/newast.py
@@ -49,10 +49,29 @@
 Shape = Union[ShapeTuple, ShapeList]
 DependencyType = Dict[str, Tuple[SDFGState, Union[Memlet, nodes.Tasklet], Tuple[int]]]
 
+
 if sys.version_info < (3, 8):
     _simple_ast_nodes = (ast.Constant, ast.Name, ast.NameConstant, ast.Num)
+    BytesConstant = ast.Bytes
+    EllipsisConstant = ast.Ellipsis
+    NameConstant = ast.NameConstant
+    NumConstant = ast.Num
+    StrConstant = ast.Str
 else:
     _simple_ast_nodes = (ast.Constant, ast.Name)
+    BytesConstant = ast.Constant
+    EllipsisConstant = ast.Constant
+    NameConstant = ast.Constant
+    NumConstant = ast.Constant
+    StrConstant = ast.Constant
+
+
+if sys.version_info < (3, 9):
+    Index = ast.Index
+    ExtSlice = ast.ExtSlice
+else:
+    Index = type(None)
+    ExtSlice = type(None)
 
 
 class SkipCall(Exception):
@@ -986,13 +1005,13 @@ def visit_TopLevelExpr(self, node):
                         raise DaceSyntaxError(self, node, 'Local variable is already a tasklet input or output')
                     self.outputs[connector] = memlet
                     return None  # Remove from final tasklet code
-        elif isinstance(node.value, ast.Str):
+        elif isinstance(node.value, StrConstant):
             return self.visit_TopLevelStr(node.value)
 
         return self.generic_visit(node)
 
     # Detect external tasklet code
-    def visit_TopLevelStr(self, node: ast.Str):
+    def visit_TopLevelStr(self, node: StrConstant):
         if self.extcode != None:
             raise DaceSyntaxError(self, node, 'Cannot provide more than one intrinsic implementation ' + 'for tasklet')
         self.extcode = node.s
@@ -1616,7 +1635,7 @@ def _parse_for_indices(self, node: ast.Expr):
 
         return indices
 
-    def _parse_value(self, node: Union[ast.Name, ast.Num, ast.Constant]):
+    def _parse_value(self, node: Union[ast.Name, NumConstant, ast.Constant]):
         """Parses a value
 
         Arguments:
@@ -1631,7 +1650,7 @@ def _parse_value(self, node: Union[ast.Name, ast.Num, ast.Constant]):
 
         if isinstance(node, ast.Name):
             return node.id
-        elif isinstance(node, ast.Num):
+        elif sys.version_info < (3.8) and isinstance(node, ast.Num):
             return str(node.n)
         elif isinstance(node, ast.Constant):
             return str(node.value)
@@ -1651,14 +1670,14 @@ def _parse_slice(self, node: ast.Slice):
         return (self._parse_value(node.lower), self._parse_value(node.upper),
                 self._parse_value(node.step) if node.step is not None else "1")
 
-    def _parse_index_as_range(self, node: Union[ast.Index, ast.Tuple]):
+    def _parse_index_as_range(self, node: Union[Index, ast.Tuple]):
         """
         Parses an index as range
 
         :param node: Index node
         :return: Range in (from, to, step) format
         """
-        if isinstance(node, ast.Index):
+        if sys.version_info < (3.9) and isinstance(node, ast.Index):
             val = self._parse_value(node.value)
         elif isinstance(node, ast.Tuple):
             val = self._parse_value(node.elts)
@@ -1765,7 +1784,7 @@ def visit_ast_or_value(arg):
                 iterator = 'dace.map'
         else:
             ranges = []
-            if isinstance(node.slice, (ast.Tuple, ast.ExtSlice)):
+            if isinstance(node.slice, (ast.Tuple, ExtSlice)):
                 for s in node.slice.dims:
                     ranges.append(self._parse_slice(s))
             elif isinstance(node.slice, ast.Slice):
@@ -4297,7 +4316,7 @@ def visit_Call(self, node: ast.Call, create_callbacks=False):
         func = None
         funcname = None
         # If the call directly refers to an SDFG or dace-compatible program
-        if isinstance(node.func, ast.Num):
+        if sys.version_info < (3, 8) and isinstance(node.func, ast.Num):
             if self._has_sdfg(node.func.n):
                 func = node.func.n
         elif isinstance(node.func, ast.Constant):
@@ -4620,11 +4639,11 @@ def visit_Str(self, node: ast.Str):
         # A string constant returns a string literal
         return StringLiteral(node.s)
 
-    def visit_Bytes(self, node: ast.Bytes):
+    def visit_Bytes(self, node: BytesConstant):
         # A bytes constant returns a string literal
         return StringLiteral(node.s)
 
-    def visit_Num(self, node: ast.Num):
+    def visit_Num(self, node: NumConstant):
         if isinstance(node.n, bool):
             return dace.bool_(node.n)
         if isinstance(node.n, (int, float, complex)):
@@ -4644,7 +4663,7 @@ def visit_Name(self, node: ast.Name):
         # If visiting a name, check if it is a defined variable or a global
         return self._visitname(node.id, node)
 
-    def visit_NameConstant(self, node: ast.NameConstant):
+    def visit_NameConstant(self, node: NameConstant):
         return self.visit_Constant(node)
 
     def visit_Attribute(self, node: ast.Attribute):
@@ -4919,7 +4938,7 @@ def _promote(node: ast.AST) -> Union[Any, str, symbolic.symbol]:
                 res = self.visit(s)
             else:
                 res = self._visit_ast_or_value(s)
-        elif isinstance(s, ast.Index):
+        elif sys.version_info < (3.9) and isinstance(s, ast.Index):
             res = self._parse_subscript_slice(s.value)
         elif isinstance(s, ast.Slice):
             lower = s.lower
@@ -4937,7 +4956,7 @@ def _promote(node: ast.AST) -> Union[Any, str, symbolic.symbol]:
                 res = ((lower, upper, step), )
         elif isinstance(s, ast.Tuple):
             res = tuple(self._parse_subscript_slice(d, multidim=True) for d in s.elts)
-        elif isinstance(s, ast.ExtSlice):
+        elif sys.version_info < (3, 9) and isinstance(s, ast.ExtSlice):
             res = tuple(self._parse_subscript_slice(d, multidim=True) for d in s.dims)
         else:
             res = _promote(s)
@@ -4999,8 +5018,8 @@ def visit_Subscript(self, node: ast.Subscript, inference: bool = False):
             # If the value is a tuple of constants (e.g., array.shape) and the
             # slice is constant, return the value itself
             nslice = self.visit(node.slice)
-            if isinstance(nslice, (ast.Index, Number)):
-                if isinstance(nslice, ast.Index):
+            if isinstance(nslice, (Index, Number)):
+                if sys.version_info < (3, 9) and isinstance(nslice, ast.Index):
                     v = self._parse_value(nslice.value)
                 else:
                     v = nslice
@@ -5064,7 +5083,7 @@ def _visit_ast_or_value(self, node: ast.AST) -> Any:
             out = out[0]
         return out
 
-    def visit_Index(self, node: ast.Index) -> Any:
+    def visit_Index(self, node: Index) -> Any:
         if isinstance(node.value, ast.Tuple):
             for i, elt in enumerate(node.value.elts):
                 node.value.elts[i] = self._visit_ast_or_value(elt)
@@ -5072,7 +5091,7 @@ def visit_Index(self, node: ast.Index) -> Any:
         node.value = self._visit_ast_or_value(node.value)
         return node
 
-    def visit_ExtSlice(self, node: ast.ExtSlice) -> Any:
+    def visit_ExtSlice(self, node: ExtSlice) -> Any:
         for i, dim in enumerate(node.dims):
             node.dims[i] = self._visit_ast_or_value(dim)
 

From 10108c7d0e723ba2786c8441fc5e7c42e9366b49 Mon Sep 17 00:00:00 2001
From: alexnick83 <31545860+alexnick83@users.noreply.github.com>
Date: Sat, 7 Oct 2023 15:37:08 +0200
Subject: [PATCH 025/163] Fixed comma/dots.

---
 dace/frontend/python/newast.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dace/frontend/python/newast.py b/dace/frontend/python/newast.py
index 3b0023c842..db4e716445 100644
--- a/dace/frontend/python/newast.py
+++ b/dace/frontend/python/newast.py
@@ -1650,7 +1650,7 @@ def _parse_value(self, node: Union[ast.Name, NumConstant, ast.Constant]):
 
         if isinstance(node, ast.Name):
             return node.id
-        elif sys.version_info < (3.8) and isinstance(node, ast.Num):
+        elif sys.version_info < (3, 8) and isinstance(node, ast.Num):
             return str(node.n)
         elif isinstance(node, ast.Constant):
             return str(node.value)
@@ -1677,7 +1677,7 @@ def _parse_index_as_range(self, node: Union[Index, ast.Tuple]):
         :param node: Index node
         :return: Range in (from, to, step) format
         """
-        if sys.version_info < (3.9) and isinstance(node, ast.Index):
+        if sys.version_info < (3, 9) and isinstance(node, ast.Index):
             val = self._parse_value(node.value)
         elif isinstance(node, ast.Tuple):
             val = self._parse_value(node.elts)

From d8efaca067e471ddef3d65dae99a18ac3a26be1a Mon Sep 17 00:00:00 2001
From: alexnick83 <31545860+alexnick83@users.noreply.github.com>
Date: Sat, 7 Oct 2023 16:26:58 +0200
Subject: [PATCH 026/163] Another comma/dot fix.

---
 dace/frontend/python/newast.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dace/frontend/python/newast.py b/dace/frontend/python/newast.py
index db4e716445..1d0dbc34dd 100644
--- a/dace/frontend/python/newast.py
+++ b/dace/frontend/python/newast.py
@@ -4938,7 +4938,7 @@ def _promote(node: ast.AST) -> Union[Any, str, symbolic.symbol]:
                 res = self.visit(s)
             else:
                 res = self._visit_ast_or_value(s)
-        elif sys.version_info < (3.9) and isinstance(s, ast.Index):
+        elif sys.version_info < (3, 9) and isinstance(s, ast.Index):
             res = self._parse_subscript_slice(s.value)
         elif isinstance(s, ast.Slice):
             lower = s.lower

From f4cb38aeca4c3a316312e7043cc82670b08d384c Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Sun, 8 Oct 2023 11:03:53 +0200
Subject: [PATCH 027/163] Reworked access to slice attribute.

---
 dace/frontend/python/astutils.py | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/dace/frontend/python/astutils.py b/dace/frontend/python/astutils.py
index ff2d191752..49d7278462 100644
--- a/dace/frontend/python/astutils.py
+++ b/dace/frontend/python/astutils.py
@@ -182,15 +182,12 @@ def subscript_to_ast_slice(node, without_array=False):
 
     # Python <3.9 compatibility
     result_slice = None
-    if sys.version_info < (3, 9):
-        if isinstance(node.slice, ast.Index):
-            slc = node.slice.value
-            if not isinstance(slc, ast.Tuple):
-                result_slice = [slc]
-        elif isinstance(node.slice, ast.ExtSlice):
-            slc = tuple(node.slice.dims)
-        else:
-            raise TypeError('Unsupported slicing type: ' + str(type(node.slice)))
+    if sys.version_info < (3, 9) and isinstance(node.slice, ast.Index):
+        slc = node.slice.value
+        if not isinstance(slc, ast.Tuple):
+            result_slice = [slc]
+    elif sys.version_info < (3, 9) and isinstance(node.slice, ast.ExtSlice):
+        slc = tuple(node.slice.dims)
     else:
         slc = node.slice
 

From 772a6299cd7301856a1f6f027f10a45d1b8183d4 Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Sun, 8 Oct 2023 11:56:47 +0200
Subject: [PATCH 028/163] Fixed invalid escape sequence backlash-space.

---
 dace/codegen/control_flow.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dace/codegen/control_flow.py b/dace/codegen/control_flow.py
index 28bf38f14d..a198ed371b 100644
--- a/dace/codegen/control_flow.py
+++ b/dace/codegen/control_flow.py
@@ -30,7 +30,7 @@
 
           x < 5
          /------>[s2]--------\\
-    [s1] \                    ->[s5]
+    [s1] \\                   ->[s5]
           ------>[s3]->[s4]--/   
           x >= 5
 

From c9277cefc2f52a06adbbb02c4a9efd89eebf8d6b Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Sun, 8 Oct 2023 11:57:07 +0200
Subject: [PATCH 029/163] Using StrConstant instead of ast.Str.

---
 dace/frontend/python/newast.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dace/frontend/python/newast.py b/dace/frontend/python/newast.py
index 1d0dbc34dd..eee6719825 100644
--- a/dace/frontend/python/newast.py
+++ b/dace/frontend/python/newast.py
@@ -4635,7 +4635,7 @@ def _visitname(self, name: str, node: ast.AST):
         return rname
 
     #### Visitors that return arrays
-    def visit_Str(self, node: ast.Str):
+    def visit_Str(self, node: StrConstant):
         # A string constant returns a string literal
         return StringLiteral(node.s)
 

From 188480931d19954df770e663e5b684ae3f4ec822 Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Sun, 8 Oct 2023 12:00:46 +0200
Subject: [PATCH 030/163] Fixed invalid escape sequence backslash-asterisk.

---
 dace/codegen/instrumentation/papi.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dace/codegen/instrumentation/papi.py b/dace/codegen/instrumentation/papi.py
index bc7163ea9b..c0d3b657a1 100644
--- a/dace/codegen/instrumentation/papi.py
+++ b/dace/codegen/instrumentation/papi.py
@@ -448,7 +448,7 @@ class PAPIUtils(object):
     def available_counters() -> Dict[str, int]:
         """
         Returns the available PAPI counters on this machine. Only works on
-        \*nix based systems with ``grep`` and ``papi-tools`` installed.
+        *nix based systems with ``grep`` and ``papi-tools`` installed.
         
         :return: A set of available PAPI counters in the form of a dictionary
                  mapping from counter name to the number of native hardware

From 209d44abfc2b556b9b132afbd576f11bd7bc7c55 Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Sun, 8 Oct 2023 12:02:30 +0200
Subject: [PATCH 031/163] Removed extraneous and deprecated alias.

---
 tests/fpga/hbm_transform_test.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tests/fpga/hbm_transform_test.py b/tests/fpga/hbm_transform_test.py
index 6438ac7492..0346837fbc 100644
--- a/tests/fpga/hbm_transform_test.py
+++ b/tests/fpga/hbm_transform_test.py
@@ -1,7 +1,6 @@
-# Copyright 2019-2021 ETH Zurich and the DaCe authors. All rights reserved.
+# Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved.
  
 from dace.fpga_testing import xilinx_test 
-from numpy.lib import math
 from dace.sdfg.state import SDFGState
 import numpy as np
 from dace import dtypes

From b5cb4b6666143c4b8dc4e0b297e2a171c7cc752f Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Sun, 8 Oct 2023 12:09:53 +0200
Subject: [PATCH 032/163] Accesing numerical constant value with t.value for
 Python >= 3.8.

---
 dace/codegen/cppunparse.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/dace/codegen/cppunparse.py b/dace/codegen/cppunparse.py
index 77dd34d478..2b1328ca8b 100644
--- a/dace/codegen/cppunparse.py
+++ b/dace/codegen/cppunparse.py
@@ -729,25 +729,26 @@ def _Repr(self, t):
         raise NotImplementedError('Invalid C++')
 
     def _Num(self, t):
-        repr_n = repr(t.n)
+        t_n = t.value if sys.version_info >= (3, 8) else t.n
+        repr_n = repr(t_n)
         # For complex values, use DTYPE_TO_TYPECLASS dictionary
-        if isinstance(t.n, complex):
+        if isinstance(t_n, complex):
             dtype = dtypes.DTYPE_TO_TYPECLASS[complex]
 
         # Handle large integer values
-        if isinstance(t.n, int):
-            bits = t.n.bit_length()
+        if isinstance(t_n, int):
+            bits = t_n.bit_length()
             if bits == 32:  # Integer, potentially unsigned
-                if t.n >= 0:  # unsigned
+                if t_n >= 0:  # unsigned
                     repr_n += 'U'
                 else:  # signed, 64-bit
                     repr_n += 'LL'
             elif 32 < bits <= 63:
                 repr_n += 'LL'
-            elif bits == 64 and t.n >= 0:
+            elif bits == 64 and t_n >= 0:
                 repr_n += 'ULL'
             elif bits >= 64:
-                warnings.warn(f'Value wider than 64 bits encountered in expression ({t.n}), emitting as-is')
+                warnings.warn(f'Value wider than 64 bits encountered in expression ({t_n}), emitting as-is')
 
         if repr_n.endswith("j"):
             self.write("%s(0, %s)" % (dtype, repr_n.replace("inf", INFSTR)[:-1]))

From 52011cbde9ebd427797ebbd456f9845fdc456b36 Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Sun, 8 Oct 2023 12:12:32 +0200
Subject: [PATCH 033/163] Accessing numerical constant value with node.value
 for Python >= 3.8.

---
 dace/frontend/python/astutils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dace/frontend/python/astutils.py b/dace/frontend/python/astutils.py
index 49d7278462..67d8b6aded 100644
--- a/dace/frontend/python/astutils.py
+++ b/dace/frontend/python/astutils.py
@@ -610,7 +610,7 @@ def visit_Constant(self, node):
 
     def visit_Num(self, node: NumConstant):
         newname = f'__uu{self.id}'
-        self.gvars[newname] = node.n
+        self.gvars[newname] = node.value if sys.version_info >= (3, 8) else node.n
         self.id += 1
         return ast.copy_location(ast.Name(id=newname, ctx=ast.Load()), node)
 

From e4288ed4a07f0fa0b03aa81e04238275f9952b20 Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Sun, 8 Oct 2023 12:15:52 +0200
Subject: [PATCH 034/163] Accessing numerical constant value with
 node.func.value for Python >= 3.8.

---
 dace/frontend/python/preprocessing.py | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/dace/frontend/python/preprocessing.py b/dace/frontend/python/preprocessing.py
index 239875118f..f65f4c4a01 100644
--- a/dace/frontend/python/preprocessing.py
+++ b/dace/frontend/python/preprocessing.py
@@ -20,6 +20,20 @@
 from dace.frontend.python.common import (DaceSyntaxError, SDFGConvertible, SDFGClosure, StringLiteral)
 
 
+if sys.version_info < (3, 8):
+    BytesConstant = ast.Bytes
+    EllipsisConstant = ast.Ellipsis
+    NameConstant = ast.NameConstant
+    NumConstant = ast.Num
+    StrConstant = ast.Str
+else:
+    BytesConstant = ast.Constant
+    EllipsisConstant = ast.Constant
+    NameConstant = ast.Constant
+    NumConstant = ast.Constant
+    StrConstant = ast.Constant
+
+
 class DaceRecursionError(Exception):
     """
     Exception that indicates a recursion in a data-centric parsed context.
@@ -1358,7 +1372,7 @@ def _get_given_args(self, node: ast.Call, function: 'DaceProgram') -> Set[str]:
 
     def visit_Call(self, node: ast.Call):
         # Only parse calls to parsed SDFGConvertibles
-        if not isinstance(node.func, (ast.Num, ast.Constant)):
+        if not isinstance(node.func, (NumConstant, ast.Constant)):
             self.seen_calls.add(astutils.unparse(node.func))
             return self.generic_visit(node)
         if hasattr(node.func, 'oldnode'):
@@ -1366,10 +1380,7 @@ def visit_Call(self, node: ast.Call):
                 self.seen_calls.add(astutils.unparse(node.func.oldnode.func))
             else:
                 self.seen_calls.add(astutils.rname(node.func.oldnode))
-        if isinstance(node.func, ast.Num):
-            value = node.func.n
-        else:
-            value = node.func.value
+        value = node.func.value if sys.version_info >= (3, 8) else node.func.n
 
         if not hasattr(value, '__sdfg__') or isinstance(value, SDFG):
             return self.generic_visit(node)

From 6afee58aedfedc5635e3607c328d6b7e56dfa77e Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Sun, 8 Oct 2023 12:19:51 +0200
Subject: [PATCH 035/163] ast.Ellipsis check predicated by Python < 3.8.

---
 dace/frontend/python/memlet_parser.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/dace/frontend/python/memlet_parser.py b/dace/frontend/python/memlet_parser.py
index aa9d4ddb0d..9bd051be5c 100644
--- a/dace/frontend/python/memlet_parser.py
+++ b/dace/frontend/python/memlet_parser.py
@@ -1,7 +1,7 @@
-# Copyright 2019-2021 ETH Zurich and the DaCe authors. All rights reserved.
+# Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved.
 import ast
 import copy
-import re
+import sys
 from collections import namedtuple
 from typing import Any, Dict, List, Optional, Tuple, Union
 from dataclasses import dataclass
@@ -114,7 +114,7 @@ def _fill_missing_slices(das, ast_ndslice, array, indices):
             offsets.append(idx)
             idx += 1
             new_idx += 1
-        elif (isinstance(dim, ast.Ellipsis) or dim is Ellipsis
+        elif ((sys.version_info < (3, 8) and isinstance(dim, ast.Ellipsis)) or dim is Ellipsis
               or (isinstance(dim, ast.Constant) and dim.value is Ellipsis)
               or (isinstance(dim, ast.Name) and dim.id is Ellipsis)):
             if has_ellipsis:

From b302ec5157ac4107068210e33604c90d9b63fb50 Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Sun, 8 Oct 2023 12:20:49 +0200
Subject: [PATCH 036/163] Using NameConstant instead of ast.NameConstant.

---
 dace/frontend/python/memlet_parser.py | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/dace/frontend/python/memlet_parser.py b/dace/frontend/python/memlet_parser.py
index 9bd051be5c..a95bf82046 100644
--- a/dace/frontend/python/memlet_parser.py
+++ b/dace/frontend/python/memlet_parser.py
@@ -16,6 +16,22 @@
 MemletType = Union[ast.Call, ast.Attribute, ast.Subscript, ast.Name]
 
 
+if sys.version_info < (3, 8):
+    _simple_ast_nodes = (ast.Constant, ast.Name, ast.NameConstant, ast.Num)
+    BytesConstant = ast.Bytes
+    EllipsisConstant = ast.Ellipsis
+    NameConstant = ast.NameConstant
+    NumConstant = ast.Num
+    StrConstant = ast.Str
+else:
+    _simple_ast_nodes = (ast.Constant, ast.Name)
+    BytesConstant = ast.Constant
+    EllipsisConstant = ast.Constant
+    NameConstant = ast.Constant
+    NumConstant = ast.Constant
+    StrConstant = ast.Constant
+
+
 @dataclass
 class MemletExpr:
     name: str
@@ -125,7 +141,7 @@ def _fill_missing_slices(das, ast_ndslice, array, indices):
                 ndslice[j] = (0, array.shape[j] - 1, 1)
                 idx += 1
                 new_idx += 1
-        elif (dim is None or (isinstance(dim, (ast.Constant, ast.NameConstant)) and dim.value is None)):
+        elif (dim is None or (isinstance(dim, (ast.Constant, NameConstant)) and dim.value is None)):
             new_axes.append(new_idx)
             new_idx += 1
             # NOTE: Do not increment idx here

From fa1d5c78eba1b83dc043c59b1c26d74559de0513 Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Sun, 8 Oct 2023 12:23:34 +0200
Subject: [PATCH 037/163] Check for ast.Num predicated by Python < 3.8.

---
 dace/codegen/cppunparse.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/dace/codegen/cppunparse.py b/dace/codegen/cppunparse.py
index 2b1328ca8b..58d4b2cb66 100644
--- a/dace/codegen/cppunparse.py
+++ b/dace/codegen/cppunparse.py
@@ -985,7 +985,9 @@ def _Attribute(self, t):
         # Special case: 3.__abs__() is a syntax error, so if t.value
         # is an integer literal then we need to either parenthesize
         # it or add an extra space to get 3 .__abs__().
-        if (isinstance(t.value, (ast.Num, ast.Constant)) and isinstance(t.value.n, int)):
+        if isinstance(t.value, ast.Constant) and isinstance(t.value.value, int):
+            self.write(" ")
+        elif sys.version_info < (3, 8) and isinstance(t.value, ast.Num) and isinstance(t.value.n, int):
             self.write(" ")
         if (isinstance(t.value, ast.Name) and t.value.id in ('dace', 'dace::math', 'dace::cmath')):
             self.write("::")

From fa805f3f25b49aceb3b984f17bfd92f20eb8e379 Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Mon, 9 Oct 2023 09:10:31 +0200
Subject: [PATCH 038/163] Fixed invalid escape sequence backslash-space.

---
 dace/transformation/dataflow/mpi.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dace/transformation/dataflow/mpi.py b/dace/transformation/dataflow/mpi.py
index 8138b86b26..b6a467dc21 100644
--- a/dace/transformation/dataflow/mpi.py
+++ b/dace/transformation/dataflow/mpi.py
@@ -23,9 +23,9 @@ class MPITransformMap(transformation.SingleStateTransformation):
         .. code-block:: text
         
             Input1 -                                            Output1
-                    \                                          /
+                    \\                                         /
             Input2 --- MapEntry -- Arbitrary R  -- MapExit -- Output2
-                    /                                          \ 
+                    /                                          \\
             InputN -                                            OutputN
 
 
From dbd286ddbb49121e3be0373cb113aa5d81c8f85c Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Mon, 9 Oct 2023 09:24:29 +0200
Subject: [PATCH 039/163] Predicated access to `n` attribute.

---
 dace/frontend/python/preprocessing.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/dace/frontend/python/preprocessing.py b/dace/frontend/python/preprocessing.py
index f65f4c4a01..052e823a2f 100644
--- a/dace/frontend/python/preprocessing.py
+++ b/dace/frontend/python/preprocessing.py
@@ -768,7 +768,8 @@ def visit_Subscript(self, node: ast.Subscript) -> Any:
     def visit_Call(self, node: ast.Call) -> Any:
         from dace.frontend.python.interface import in_program, inline  # Avoid import loop
 
-        if hasattr(node.func, 'n') and isinstance(node.func.n, SDFGConvertible):
+        if (hasattr(node.func, 'value') and isinstance(node.func.value, SDFGConvertible) or 
+                sys.version_info < (3, 8) and hasattr(node.func, 'n') and isinstance(node.func.n, SDFGConvertible)):
             # Skip already-parsed calls
             return self.generic_visit(node)
 

From 1bfaee5807826709216285fb12e3561cd17b0ccc Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Mon, 9 Oct 2023 09:26:36 +0200
Subject: [PATCH 040/163] Fixed pytest None warning deprecation.

---
 tests/compile_sdfg_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/compile_sdfg_test.py b/tests/compile_sdfg_test.py
index 33ace1156a..3120359262 100644
--- a/tests/compile_sdfg_test.py
+++ b/tests/compile_sdfg_test.py
@@ -51,7 +51,7 @@ def tester(a: int):
         return a + 1
 
     csdfg = tester.to_sdfg().compile()
-    with pytest.warns(None, match='Casting'):
+    with pytest.warns(UserWarning, match='Casting'):
         result = csdfg(0.1)
     assert result.item() == 1
 

From 8e0f88328cab1f0584382ccdff0131743e534fd0 Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Mon, 9 Oct 2023 09:32:19 +0200
Subject: [PATCH 041/163] Fixed pytest incorrect return deprecation warning.

---
 tests/blas/nodes/dot_test.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/blas/nodes/dot_test.py b/tests/blas/nodes/dot_test.py
index d5f1d24263..e30f03785c 100755
--- a/tests/blas/nodes/dot_test.py
+++ b/tests/blas/nodes/dot_test.py
@@ -92,23 +92,23 @@ def run_test(target, size, vector_length):
 
 
 def test_dot_pure():
-    return run_test("pure", 64, 1)
+    assert isinstance(run_test("pure", 64, 1), dace.SDFG)
 
 
 @xilinx_test()
 def test_dot_xilinx():
-    return run_test("xilinx", 64, 16)
+    assert isinstance(run_test("xilinx", 64, 16), dace.SDFG)
 
 
 @xilinx_test()
 def test_dot_xilinx_decoupled():
     with set_temporary("compiler", "xilinx", "decouple_array_interfaces", value=True):
-        return run_test("xilinx", 64, 16)
+        assert isinstance(run_test("xilinx", 64, 16), dace.SDFG)
 
 
 @intel_fpga_test()
 def test_dot_intel_fpga():
-    return run_test("intel_fpga", 64, 16)
+    assert isinstance(run_test("intel_fpga", 64, 16), dace.SDFG)
 
 
 if __name__ == "__main__":
@@ -119,4 +119,4 @@ def test_dot_intel_fpga():
     args = parser.parse_args()
     size = args.N
 
-    run_test(target, size, vector_length)
+    run_test(args.target, size, args.vector_length)

From b64f12486ad3b1ff7e908037e1c531d52b5118c1 Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Mon, 9 Oct 2023 09:33:42 +0200
Subject: [PATCH 042/163] Predicated access to s attribute.

---
 dace/frontend/python/newast.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dace/frontend/python/newast.py b/dace/frontend/python/newast.py
index eee6719825..1d1294809c 100644
--- a/dace/frontend/python/newast.py
+++ b/dace/frontend/python/newast.py
@@ -1014,7 +1014,7 @@ def visit_TopLevelExpr(self, node):
     def visit_TopLevelStr(self, node: StrConstant):
         if self.extcode != None:
             raise DaceSyntaxError(self, node, 'Cannot provide more than one intrinsic implementation ' + 'for tasklet')
-        self.extcode = node.s
+        self.extcode = node.value if sys.version_info >= (3, 8) else node.s
 
         # TODO: Should get detected by _parse_Tasklet()
         if self.lang is None:

From 70a61bb2590f8be414fa31992e357982e04a0ef7 Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Mon, 9 Oct 2023 09:35:33 +0200
Subject: [PATCH 043/163] Using NameConstant alias.

---
 dace/codegen/cppunparse.py | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/dace/codegen/cppunparse.py b/dace/codegen/cppunparse.py
index 58d4b2cb66..d8d52846ac 100644
--- a/dace/codegen/cppunparse.py
+++ b/dace/codegen/cppunparse.py
@@ -87,6 +87,21 @@
 from dace import dtypes
 from dace.codegen.tools import type_inference
 
+
+if sys.version_info < (3, 8):
+    BytesConstant = ast.Bytes
+    EllipsisConstant = ast.Ellipsis
+    NameConstant = ast.NameConstant
+    NumConstant = ast.Num
+    StrConstant = ast.Str
+else:
+    BytesConstant = ast.Constant
+    EllipsisConstant = ast.Constant
+    NameConstant = ast.Constant
+    NumConstant = ast.Constant
+    StrConstant = ast.Constant
+
+
 # Large float and imaginary literals get turned into infinities in the AST.
 # We unparse those infinities to INFSTR.
 INFSTR = "1e" + repr(sys.float_info.max_10_exp + 1)
@@ -574,7 +589,7 @@ def _generic_FunctionDef(self, t, is_async=False):
             self.write('/* async */ ')
 
         if getattr(t, "returns", False):
-            if isinstance(t.returns, ast.NameConstant):
+            if isinstance(t.returns, NameConstant):
                 if t.returns.value is None:
                     self.write('void')
                 else:

From f34cb508f1208c11868db4300732ccdd25812d79 Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Mon, 9 Oct 2023 09:37:51 +0200
Subject: [PATCH 044/163] Using NumConstant and predication for n atttribute
 access.

---
 dace/codegen/cppunparse.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/dace/codegen/cppunparse.py b/dace/codegen/cppunparse.py
index d8d52846ac..e4456e3e18 100644
--- a/dace/codegen/cppunparse.py
+++ b/dace/codegen/cppunparse.py
@@ -914,13 +914,13 @@ def _BinOp(self, t):
             self.write(")")
         # Special cases for powers
         elif t.op.__class__.__name__ == 'Pow':
-            if isinstance(t.right, (ast.Num, ast.Constant, ast.UnaryOp)):
+            if isinstance(t.right, (NumConstant, ast.Constant, ast.UnaryOp)):
                 power = None
-                if isinstance(t.right, (ast.Num, ast.Constant)):
-                    power = t.right.n
+                if isinstance(t.right, (NumConstant, ast.Constant)):
+                    power = t.right.value if sys.version_info >= (3, 8) else t.right.n
                 elif isinstance(t.right, ast.UnaryOp) and isinstance(t.right.op, ast.USub):
-                    if isinstance(t.right.operand, (ast.Num, ast.Constant)):
-                        power = -t.right.operand.n
+                    if isinstance(t.right.operand, (NumConstant, ast.Constant)):
+                        power = - (t.right.operand.value if sys.version_info >= (3, 8) else t.right.operand.n)
 
                 if power is not None and int(power) == power:
                     negative = power < 0

From 8689c63f3fa6b9d17913f841fbb2123e3876b87c Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Mon, 9 Oct 2023 09:40:49 +0200
Subject: [PATCH 045/163] Predicated access to n attribute.

---
 dace/codegen/targets/cpp.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/dace/codegen/targets/cpp.py b/dace/codegen/targets/cpp.py
index c3bf9c4027..960519e310 100644
--- a/dace/codegen/targets/cpp.py
+++ b/dace/codegen/targets/cpp.py
@@ -1,4 +1,4 @@
-# Copyright 2019-2021 ETH Zurich and the DaCe authors. All rights reserved.
+# Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved.
 """
 Helper functions for C++ code generation.
 NOTE: The C++ code generator is currently located in cpu.py.
@@ -9,6 +9,7 @@
 import itertools
 import math
 import numbers
+import sys
 import warnings
 
 import sympy as sp
@@ -1275,7 +1276,8 @@ def visit_BinOp(self, node: ast.BinOp):
                 evaluated_constant = symbolic.evaluate(unparsed, self.constants)
                 evaluated = symbolic.symstr(evaluated_constant, cpp_mode=True)
                 value = ast.parse(evaluated).body[0].value
-                if isinstance(evaluated_node, numbers.Number) and evaluated_node != value.n:
+                if isinstance(evaluated_node, numbers.Number) and evaluated_node != (
+                        value.value if sys.info_version >= (3, 8) else value.n):
                     raise TypeError
                 node.right = ast.parse(evaluated).body[0].value
             except (TypeError, AttributeError, NameError, KeyError, ValueError, SyntaxError):

From 2a756b95524075abbffde8012771f7214366a3a5 Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Mon, 9 Oct 2023 09:46:19 +0200
Subject: [PATCH 046/163] Fixed pytest mark misconfiguration.

---
 tests/library/gemm_test.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/tests/library/gemm_test.py b/tests/library/gemm_test.py
index df60d1aa43..07e9006ece 100644
--- a/tests/library/gemm_test.py
+++ b/tests/library/gemm_test.py
@@ -1,4 +1,4 @@
-# Copyright 2019-2021 ETH Zurich and the DaCe authors. All rights reserved.
+# Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved.
 import pytest
 import warnings
 import itertools
@@ -132,7 +132,10 @@ def numpy_gemm(A, B, C, transA, transB, alpha, beta):
     assert diff <= 1e-5
 
 
-@pytest.mark.parametrize(('implementation', ), [('pure', ), ('MKL', ), pytest.param('cuBLAS', marks=pytest.mark.gpu)])
+@pytest.mark.parametrize(
+    ('implementation', ),
+    [('pure', ), pytest.param('MKL', marks=pytest.mark.mkl),
+     pytest.param('cuBLAS', marks=pytest.mark.gpu)])
 def test_library_gemm(implementation):
     param_grid_trans = dict(
         transA=[True, False],

From 736bd0b4c78e02ea58fdc35cfc9ae058c91cc450 Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Mon, 9 Oct 2023 09:49:27 +0200
Subject: [PATCH 047/163] Using Index and NumConstant.

---
 dace/libraries/stencil/subscript_converter.py | 31 +++++++++++++++++--
 1 file changed, 28 insertions(+), 3 deletions(-)

diff --git a/dace/libraries/stencil/subscript_converter.py b/dace/libraries/stencil/subscript_converter.py
index 8abb3fc6c8..d159b345cb 100644
--- a/dace/libraries/stencil/subscript_converter.py
+++ b/dace/libraries/stencil/subscript_converter.py
@@ -1,9 +1,34 @@
-# Copyright 2019-2021 ETH Zurich and the DaCe authors. All rights reserved.
+# Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved.
 import ast
+import sys
 from collections import defaultdict
 from typing import Tuple
 
 
+if sys.version_info < (3, 8):
+    _simple_ast_nodes = (ast.Constant, ast.Name, ast.NameConstant, ast.Num)
+    BytesConstant = ast.Bytes
+    EllipsisConstant = ast.Ellipsis
+    NameConstant = ast.NameConstant
+    NumConstant = ast.Num
+    StrConstant = ast.Str
+else:
+    _simple_ast_nodes = (ast.Constant, ast.Name)
+    BytesConstant = ast.Constant
+    EllipsisConstant = ast.Constant
+    NameConstant = ast.Constant
+    NumConstant = ast.Constant
+    StrConstant = ast.Constant
+
+
+if sys.version_info < (3, 9):
+    Index = ast.Index
+    ExtSlice = ast.ExtSlice
+else:
+    Index = type(None)
+    ExtSlice = type(None)
+
+
 class SubscriptConverter(ast.NodeTransformer):
     """
     Finds all subscript accesses using constant indices in the given code, and
@@ -67,9 +92,9 @@ def visit_Subscript(self, node: ast.Subscript):
         # This can be a bunch of different things, varying between Python 3.8
         # and Python 3.9, so try hard to unpack it into an index we can use.
         index_tuple = node.slice
-        if isinstance(index_tuple, (ast.Subscript, ast.Index)):
+        if isinstance(index_tuple, (ast.Subscript, Index)):
             index_tuple = index_tuple.value
-        if isinstance(index_tuple, (ast.Constant, ast.Num)):
+        if isinstance(index_tuple, (ast.Constant, NumConstant)):
             index_tuple = (index_tuple, )
         if isinstance(index_tuple, ast.Tuple):
             index_tuple = index_tuple.elts

From e553510c476c1eee822e31a8fe97c52b5f4dec8f Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Mon, 9 Oct 2023 09:55:33 +0200
Subject: [PATCH 048/163] Refactored if chain/nest

---
 dace/frontend/python/preprocessing.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/dace/frontend/python/preprocessing.py b/dace/frontend/python/preprocessing.py
index 052e823a2f..3f06b81f63 100644
--- a/dace/frontend/python/preprocessing.py
+++ b/dace/frontend/python/preprocessing.py
@@ -356,13 +356,13 @@ def remake_dict(args):
         # Remake keyword argument names from AST
         kwarg_names = []
         for kw in arg.keys:
-            if isinstance(kw, ast.Num):
+            if sys.version_info >= (3, 8) and isinstance(kw, ast.Constant):
+                kwarg_names.append(kw.value)
+            elif sys.version_info < (3, 8) and isinstance(kw, ast.Num):
                 kwarg_names.append(kw.n)
-            elif isinstance(kw, (ast.Str, ast.Bytes)):
+            elif sys.version_info < (3, 8) and isinstance(kw, (ast.Str, ast.Bytes)):
                 kwarg_names.append(kw.s)
-            elif isinstance(kw, ast.NameConstant):
-                kwarg_names.append(kw.value)
-            elif sys.version_info >= (3, 8) and isinstance(kw, ast.Constant):
+            elif sys.version_info < (3, 8) and isinstance(kw, ast.NameConstant):
                 kwarg_names.append(kw.value)
             else:
                 raise NotImplementedError(f'Key type {type(kw).__name__} is not supported')

From 5ae0b4731826faa38847cd084d3c8e9a9cd0eccb Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Mon, 9 Oct 2023 10:00:52 +0200
Subject: [PATCH 049/163] Fixed use of ast.Str.

---
 dace/frontend/python/preprocessing.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/dace/frontend/python/preprocessing.py b/dace/frontend/python/preprocessing.py
index 3f06b81f63..c2d8cebd10 100644
--- a/dace/frontend/python/preprocessing.py
+++ b/dace/frontend/python/preprocessing.py
@@ -873,7 +873,8 @@ def visit_JoinedStr(self, node: ast.JoinedStr) -> Any:
             parsed = [
                 not isinstance(v, ast.FormattedValue) or isinstance(v.value, ast.Constant) for v in visited.values
             ]
-            values = [v.s if isinstance(v, ast.Str) else astutils.unparse(v.value) for v in visited.values]
+            # NOTE: In Python < 3.8, v should be ast.Str. In Python 3.8 and later, it is (probably) ast.Constant.
+            values = [astutils.unparse(v.value) if sys.vesion_info >= (3, 8) else v.s for v in visited.values]
             return ast.copy_location(
                 ast.Constant(kind='', value=''.join(('{%s}' % v) if not p else v for p, v in zip(parsed, values))),
                 node)

From fead1d6b60b95c14966216081b764cef7eb742b1 Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Mon, 9 Oct 2023 10:05:00 +0200
Subject: [PATCH 050/163] Fixed ast.Num and n attribute.

---
 dace/codegen/tools/type_inference.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/dace/codegen/tools/type_inference.py b/dace/codegen/tools/type_inference.py
index 8ee8632c65..f159088461 100644
--- a/dace/codegen/tools/type_inference.py
+++ b/dace/codegen/tools/type_inference.py
@@ -338,7 +338,15 @@ def _BinOp(t, symbols, inferred_symbols):
         return dtypes.result_type_of(type_left, type_right)
     # Special case for integer power
     elif t.op.__class__.__name__ == 'Pow':
-        if (isinstance(t.right, (ast.Num, ast.Constant)) and int(t.right.n) == t.right.n and t.right.n >= 0):
+        if (sys.version_info >= (3, 8) and isinstance(t.right, ast.Constant) and
+                int(t.right.value) == t.right.value and t.right.value >= 0):
+            if t.right.value != 0:
+                type_left = _dispatch(t.left, symbols, inferred_symbols)
+                for i in range(int(t.right.n) - 1):
+                    _dispatch(t.left, symbols, inferred_symbols)
+            return dtypes.result_type_of(type_left, dtypes.typeclass(np.uint32))
+        elif (sys.version_info < (3, 8) and isinstance(t.right, ast.Num) and
+                int(t.right.n) == t.right.n and t.right.n >= 0):
             if t.right.n != 0:
                 type_left = _dispatch(t.left, symbols, inferred_symbols)
                 for i in range(int(t.right.n) - 1):

From e5c6451dd4dce2cd84c2ca0301e91ba3bf9c277c Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Mon, 9 Oct 2023 10:30:22 +0200
Subject: [PATCH 051/163] Disallowing type aliases.

---
 dace/frontend/python/newast.py               |  5 ++++-
 tests/python_frontend/type_statement_test.py | 19 +++++++++++++++++++
 2 files changed, 23 insertions(+), 1 deletion(-)
 create mode 100644 tests/python_frontend/type_statement_test.py

diff --git a/dace/frontend/python/newast.py b/dace/frontend/python/newast.py
index 1d1294809c..e6f9247157 100644
--- a/dace/frontend/python/newast.py
+++ b/dace/frontend/python/newast.py
@@ -303,7 +303,7 @@ def repl_callback(repldict):
 # Extra AST node types that are disallowed after preprocessing
 _DISALLOWED_STMTS = DISALLOWED_STMTS + [
     'Global', 'Assert', 'Print', 'Nonlocal', 'Raise', 'Starred', 'AsyncFor', 'ListComp', 'GeneratorExp', 'SetComp',
-    'DictComp', 'comprehension'
+    'DictComp', 'comprehension', 'TypeAlias'
 ]
 
 TaskletType = Union[ast.FunctionDef, ast.With, ast.For]
@@ -4712,6 +4712,9 @@ def visit_Dict(self, node: ast.Dict):
     def visit_Lambda(self, node: ast.Lambda):
         # Return a string representation of the function
         return astutils.unparse(node)
+    
+    def visit_TypeAlias(self, node: ast.TypeAlias):
+        raise NotImplementedError('Type aliases are not supported in DaCe')
 
     ############################################################
 
diff --git a/tests/python_frontend/type_statement_test.py b/tests/python_frontend/type_statement_test.py
new file mode 100644
index 0000000000..bdd168a158
--- /dev/null
+++ b/tests/python_frontend/type_statement_test.py
@@ -0,0 +1,19 @@
+# Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved.
+import dace
+import pytest
+
+
+def test_type_statement():
+
+    @dace.program
+    def type_statement():
+        type Scalar[T] = T
+        A: Scalar[dace.float32] = 0
+        return A
+    
+    with pytest.raises(dace.frontend.python.common.DaceSyntaxError):
+        type_statement()
+
+
+if __name__ == '__main__':
+    test_type_statement()

From 9a96ef3606e80c049a3678af154744d013b44d8e Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Mon, 9 Oct 2023 11:06:38 +0200
Subject: [PATCH 052/163] Fixed TypeAlias for older Python versions.

---
 dace/frontend/python/newast.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/dace/frontend/python/newast.py b/dace/frontend/python/newast.py
index e6f9247157..7831b4d81a 100644
--- a/dace/frontend/python/newast.py
+++ b/dace/frontend/python/newast.py
@@ -74,6 +74,12 @@
     ExtSlice = type(None)
 
 
+if sys.version_info < (3, 12):
+    TypeAlias = type(None)
+else:
+    TypeAlias = ast.TypeAlias
+
+
 class SkipCall(Exception):
     """ Exception used to skip calls to functions that cannot be parsed. """
     pass
@@ -4713,7 +4719,7 @@ def visit_Lambda(self, node: ast.Lambda):
         # Return a string representation of the function
         return astutils.unparse(node)
     
-    def visit_TypeAlias(self, node: ast.TypeAlias):
+    def visit_TypeAlias(self, node: TypeAlias):
         raise NotImplementedError('Type aliases are not supported in DaCe')
 
     ############################################################

From e8317ed137376c11999d8d30567ff1fb1b2ef1b4 Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Mon, 9 Oct 2023 11:14:03 +0200
Subject: [PATCH 053/163] Don't run test for Python < 3.12.

---
 tests/python_frontend/type_statement_test.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/tests/python_frontend/type_statement_test.py b/tests/python_frontend/type_statement_test.py
index bdd168a158..1b8a27c72e 100644
--- a/tests/python_frontend/type_statement_test.py
+++ b/tests/python_frontend/type_statement_test.py
@@ -1,6 +1,7 @@
 # Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved.
 import dace
 import pytest
+import sys
 
 
 def test_type_statement():
@@ -11,8 +12,11 @@ def type_statement():
         A: Scalar[dace.float32] = 0
         return A
     
-    with pytest.raises(dace.frontend.python.common.DaceSyntaxError):
-        type_statement()
+    if sys.version_info >= (3, 12):
+        with pytest.raises(dace.frontend.python.common.DaceSyntaxError):
+            type_statement()
+    else:
+        assert True
 
 
 if __name__ == '__main__':

From d1d461649e062edeac98bef827493e5c6a2af9da Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Mon, 9 Oct 2023 11:54:02 +0200
Subject: [PATCH 054/163] Fixed typo.

---
 dace/frontend/python/preprocessing.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dace/frontend/python/preprocessing.py b/dace/frontend/python/preprocessing.py
index c2d8cebd10..3786c4caea 100644
--- a/dace/frontend/python/preprocessing.py
+++ b/dace/frontend/python/preprocessing.py
@@ -874,7 +874,7 @@ def visit_JoinedStr(self, node: ast.JoinedStr) -> Any:
                 not isinstance(v, ast.FormattedValue) or isinstance(v.value, ast.Constant) for v in visited.values
             ]
             # NOTE: In Python < 3.8, v should be ast.Str. In Python 3.8 and later, it is (probably) ast.Constant.
-            values = [astutils.unparse(v.value) if sys.vesion_info >= (3, 8) else v.s for v in visited.values]
+            values = [astutils.unparse(v.value) if sys.version_info >= (3, 8) else v.s for v in visited.values]
             return ast.copy_location(
                 ast.Constant(kind='', value=''.join(('{%s}' % v) if not p else v for p, v in zip(parsed, values))),
                 node)

From b41ba7e236fdc80b3ffd800e483cf1c6f278a16d Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Mon, 9 Oct 2023 12:10:22 +0200
Subject: [PATCH 055/163] Fixed typo.

---
 dace/codegen/targets/cpp.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dace/codegen/targets/cpp.py b/dace/codegen/targets/cpp.py
index 960519e310..3d26f76214 100644
--- a/dace/codegen/targets/cpp.py
+++ b/dace/codegen/targets/cpp.py
@@ -1277,7 +1277,7 @@ def visit_BinOp(self, node: ast.BinOp):
                 evaluated = symbolic.symstr(evaluated_constant, cpp_mode=True)
                 value = ast.parse(evaluated).body[0].value
                 if isinstance(evaluated_node, numbers.Number) and evaluated_node != (
-                        value.value if sys.info_version >= (3, 8) else value.n):
+                        value.value if sys.version_info >= (3, 8) else value.n):
                     raise TypeError
                 node.right = ast.parse(evaluated).body[0].value
             except (TypeError, AttributeError, NameError, KeyError, ValueError, SyntaxError):

From 52002ac1a67235b85856462ec4853d35e20287c2 Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Mon, 9 Oct 2023 12:13:58 +0200
Subject: [PATCH 056/163] Trying to disable test for Python < 3.12.

---
 tests/python_frontend/type_statement_test.py | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/tests/python_frontend/type_statement_test.py b/tests/python_frontend/type_statement_test.py
index 1b8a27c72e..bf53ca5150 100644
--- a/tests/python_frontend/type_statement_test.py
+++ b/tests/python_frontend/type_statement_test.py
@@ -6,16 +6,19 @@
 
 def test_type_statement():
 
-    @dace.program
-    def type_statement():
-        type Scalar[T] = T
-        A: Scalar[dace.float32] = 0
-        return A
-    
     if sys.version_info >= (3, 12):
+
+        @dace.program
+        def type_statement():
+            type Scalar[T] = T
+            A: Scalar[dace.float32] = 0
+            return A
+        
         with pytest.raises(dace.frontend.python.common.DaceSyntaxError):
             type_statement()
+    
     else:
+
         assert True
 
 
From 6c90205424cbe4d72d4ff7e48c923fb52564bfce Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Mon, 9 Oct 2023 12:19:07 +0200
Subject: [PATCH 057/163] Added py312 mark.

---
 pytest.ini                                   |  1 +
 tests/python_frontend/type_statement_test.py | 22 +++++++-------------
 2 files changed, 9 insertions(+), 14 deletions(-)

diff --git a/pytest.ini b/pytest.ini
index 087be3d897..513158f531 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -14,6 +14,7 @@ markers =
     scalapack: Test requires ScaLAPACK (Intel MKL and OpenMPI). (select with '-m scalapack')
     datainstrument: Test uses data instrumentation (select with '-m datainstrument')
     hptt: Test requires the HPTT library (select with '-m "hptt')
+    py312: Test requires Python 3.12 or later (select with '-m "py312"')
 python_files =
     *_test.py
     *_cudatest.py
diff --git a/tests/python_frontend/type_statement_test.py b/tests/python_frontend/type_statement_test.py
index bf53ca5150..2009529f3a 100644
--- a/tests/python_frontend/type_statement_test.py
+++ b/tests/python_frontend/type_statement_test.py
@@ -1,25 +1,19 @@
 # Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved.
 import dace
 import pytest
-import sys
 
 
+@pytest.mark.py312
 def test_type_statement():
 
-    if sys.version_info >= (3, 12):
-
-        @dace.program
-        def type_statement():
-            type Scalar[T] = T
-            A: Scalar[dace.float32] = 0
-            return A
-        
-        with pytest.raises(dace.frontend.python.common.DaceSyntaxError):
-            type_statement()
+    @dace.program
+    def type_statement():
+        type Scalar[T] = T
+        A: Scalar[dace.float32] = 0
+        return A
     
-    else:
-
-        assert True
+    with pytest.raises(dace.frontend.python.common.DaceSyntaxError):
+        type_statement()
 
 
 if __name__ == '__main__':

From a9cc68652f77a795505519a4351f2f71b2d5c858 Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Mon, 9 Oct 2023 12:23:54 +0200
Subject: [PATCH 058/163] Comment out test.

---
 tests/python_frontend/type_statement_test.py | 22 +++++++++++---------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/tests/python_frontend/type_statement_test.py b/tests/python_frontend/type_statement_test.py
index 2009529f3a..16ec1613db 100644
--- a/tests/python_frontend/type_statement_test.py
+++ b/tests/python_frontend/type_statement_test.py
@@ -3,18 +3,20 @@
 import pytest
 
 
-@pytest.mark.py312
-def test_type_statement():
+# TODO: Investigate why pytest parses the DaCeProgram, even when the test is not supposed to run.
+# @pytest.mark.py312
+# def test_type_statement():
 
-    @dace.program
-    def type_statement():
-        type Scalar[T] = T
-        A: Scalar[dace.float32] = 0
-        return A
+#     @dace.program
+#     def type_statement():
+#         type Scalar[T] = T
+#         A: Scalar[dace.float32] = 0
+#         return A
     
-    with pytest.raises(dace.frontend.python.common.DaceSyntaxError):
-        type_statement()
+#     with pytest.raises(dace.frontend.python.common.DaceSyntaxError):
+#         type_statement()
 
 
 if __name__ == '__main__':
-    test_type_statement()
+    # test_type_statement()
+    pass

From d5e656ea79c38ba48fe535f5ad5e9925da70f964 Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Mon, 9 Oct 2023 16:10:27 +0200
Subject: [PATCH 059/163] Fixed JoinedStr visitor method.

---
 dace/frontend/python/preprocessing.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dace/frontend/python/preprocessing.py b/dace/frontend/python/preprocessing.py
index 3786c4caea..af02d6f7d9 100644
--- a/dace/frontend/python/preprocessing.py
+++ b/dace/frontend/python/preprocessing.py
@@ -874,7 +874,7 @@ def visit_JoinedStr(self, node: ast.JoinedStr) -> Any:
                 not isinstance(v, ast.FormattedValue) or isinstance(v.value, ast.Constant) for v in visited.values
             ]
             # NOTE: In Python < 3.8, v should be ast.Str. In Python 3.8 and later, it is (probably) ast.Constant.
-            values = [astutils.unparse(v.value) if sys.version_info >= (3, 8) else v.s for v in visited.values]
+            values = [v.s if sys.version_info < (3, 8) and isinstance(v, ast.Str) else v.value for v in visited.values]
             return ast.copy_location(
                 ast.Constant(kind='', value=''.join(('{%s}' % v) if not p else v for p, v in zip(parsed, values))),
                 node)

From 650e386f55279eeb48c93c35fe72bd036960d8cd Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Mon, 9 Oct 2023 16:21:36 +0200
Subject: [PATCH 060/163] Added more disallowed statements.

---
 dace/frontend/python/newast.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dace/frontend/python/newast.py b/dace/frontend/python/newast.py
index 7831b4d81a..733c3c7f62 100644
--- a/dace/frontend/python/newast.py
+++ b/dace/frontend/python/newast.py
@@ -309,7 +309,7 @@ def repl_callback(repldict):
 # Extra AST node types that are disallowed after preprocessing
 _DISALLOWED_STMTS = DISALLOWED_STMTS + [
     'Global', 'Assert', 'Print', 'Nonlocal', 'Raise', 'Starred', 'AsyncFor', 'ListComp', 'GeneratorExp', 'SetComp',
-    'DictComp', 'comprehension', 'TypeAlias'
+    'DictComp', 'comprehension', 'TypeAlias', 'TypeVar', 'ParamSpec', 'TypeVarTuple'
 ]
 
 TaskletType = Union[ast.FunctionDef, ast.With, ast.For]

From 4cf69590084cb2010cb66d3bd080a0c1162f0892 Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Mon, 9 Oct 2023 18:08:29 +0200
Subject: [PATCH 061/163] Unparsing constant.

---
 dace/frontend/python/preprocessing.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dace/frontend/python/preprocessing.py b/dace/frontend/python/preprocessing.py
index af02d6f7d9..1636e57ad0 100644
--- a/dace/frontend/python/preprocessing.py
+++ b/dace/frontend/python/preprocessing.py
@@ -873,8 +873,8 @@ def visit_JoinedStr(self, node: ast.JoinedStr) -> Any:
             parsed = [
                 not isinstance(v, ast.FormattedValue) or isinstance(v.value, ast.Constant) for v in visited.values
             ]
-            # NOTE: In Python < 3.8, v should be ast.Str. In Python 3.8 and later, it is (probably) ast.Constant.
-            values = [v.s if sys.version_info < (3, 8) and isinstance(v, ast.Str) else v.value for v in visited.values]
+            values = [v.s if sys.version_info < (3, 8) and isinstance(v, ast.Str) else astutils.unparse(v.value)
+                      for v in visited.values]
             return ast.copy_location(
                 ast.Constant(kind='', value=''.join(('{%s}' % v) if not p else v for p, v in zip(parsed, values))),
                 node)

From d79a4039c893a5ecb4d706b410b7314da8044189 Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Mon, 9 Oct 2023 19:27:14 +0200
Subject: [PATCH 062/163] Revered changes to FPGA tests.

---
 tests/blas/nodes/dot_test.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/tests/blas/nodes/dot_test.py b/tests/blas/nodes/dot_test.py
index e30f03785c..a936be60a9 100755
--- a/tests/blas/nodes/dot_test.py
+++ b/tests/blas/nodes/dot_test.py
@@ -95,20 +95,23 @@ def test_dot_pure():
     assert isinstance(run_test("pure", 64, 1), dace.SDFG)
 
 
+# TODO: Refactor to use assert or return True/False (pytest deprecation of returning non-booleans)
 @xilinx_test()
 def test_dot_xilinx():
-    assert isinstance(run_test("xilinx", 64, 16), dace.SDFG)
+    return run_test("xilinx", 64, 16)
 
 
+# TODO: Refactor to use assert or return True/False (pytest deprecation of returning non-booleans)
 @xilinx_test()
 def test_dot_xilinx_decoupled():
     with set_temporary("compiler", "xilinx", "decouple_array_interfaces", value=True):
-        assert isinstance(run_test("xilinx", 64, 16), dace.SDFG)
+        return run_test("xilinx", 64, 16)
 
 
+# TODO: Refactor to use assert or return True/False (pytest deprecation of returning non-booleans)
 @intel_fpga_test()
 def test_dot_intel_fpga():
-    assert isinstance(run_test("intel_fpga", 64, 16), dace.SDFG)
+    return run_test("intel_fpga", 64, 16)
 
 
 if __name__ == "__main__":

From 159033e817f86bb3f35b14dca10fcfc71e798b02 Mon Sep 17 00:00:00 2001
From: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
Date: Mon, 9 Oct 2023 21:24:45 +0200
Subject: [PATCH 063/163] Removed py312 mark.

---
 pytest.ini | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pytest.ini b/pytest.ini
index 513158f531..087be3d897 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -14,7 +14,6 @@ markers =
     scalapack: Test requires ScaLAPACK (Intel MKL and OpenMPI). (select with '-m scalapack')
     datainstrument: Test uses data instrumentation (select with '-m datainstrument')
     hptt: Test requires the HPTT library (select with '-m "hptt')
-    py312: Test requires Python 3.12 or later (select with '-m "py312"')
 python_files =
     *_test.py
     *_cudatest.py

From fa0333fbed8ed998f79830e13419e1d4296e0889 Mon Sep 17 00:00:00 2001
From: Marcin Copik <mcopik@gmail.com>
Date: Thu, 12 Oct 2023 14:35:21 +0200
Subject: [PATCH 064/163] Fix incorrect generation of sum to loop code for
 Fortran frontend

---
 dace/frontend/fortran/ast_transforms.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/dace/frontend/fortran/ast_transforms.py b/dace/frontend/fortran/ast_transforms.py
index e2a7246aed..d893fad4f0 100644
--- a/dace/frontend/fortran/ast_transforms.py
+++ b/dace/frontend/fortran/ast_transforms.py
@@ -737,8 +737,7 @@ def par_Decl_Range_Finder(node: ast_internal_classes.Array_Subscript_Node,
                           count: int,
                           newbody: list,
                           scope_vars: ScopeVarsDeclarations,
-                          declaration=True,
-                          is_sum_to_loop=False):
+                          declaration=True):
     """
     Helper function for the transformation of array operations and sums to loops
     :param node: The AST to be transformed
@@ -933,7 +932,7 @@ def visit_Execution_Part_Node(self, node: ast_internal_classes.Execution_Part_No
                 rangeposrval = []
                 rangesrval = []
 
-                par_Decl_Range_Finder(val, rangesrval, rangeposrval, self.count, newbody, self.scope_vars, False, True)
+                par_Decl_Range_Finder(val, rangesrval, rangeposrval, self.count, newbody, self.scope_vars, True)
 
                 range_index = 0
                 body = ast_internal_classes.BinOp_Node(lval=current,

From 362bf626710312072f694619b9447052012027f4 Mon Sep 17 00:00:00 2001
From: Marcin Copik <mcopik@gmail.com>
Date: Thu, 12 Oct 2023 15:37:35 +0200
Subject: [PATCH 065/163] Support passing array with no bounds in Fortran sum()

---
 dace/frontend/fortran/ast_transforms.py | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/dace/frontend/fortran/ast_transforms.py b/dace/frontend/fortran/ast_transforms.py
index d893fad4f0..451fc66c62 100644
--- a/dace/frontend/fortran/ast_transforms.py
+++ b/dace/frontend/fortran/ast_transforms.py
@@ -752,6 +752,7 @@ def par_Decl_Range_Finder(node: ast_internal_classes.Array_Subscript_Node,
 
     currentindex = 0
     indices = []
+
     offsets = scope_vars.get_var(node.parent, node.name.name).offsets
 
     for idx, i in enumerate(node.indices):
@@ -925,7 +926,24 @@ def visit_Execution_Part_Node(self, node: ast_internal_classes.Execution_Part_No
 
                 current = child.lval
                 val = child.rval
-                rvals = [i for i in mywalk(val) if isinstance(i, ast_internal_classes.Array_Subscript_Node)]
+
+                rvals = []
+                for i in mywalk(val):
+                    if isinstance(i, ast_internal_classes.Call_Expr_Node) and i.name.name == '__dace_sum':
+
+                        for arg in i.args:
+
+                            # supports syntax SUM(arr)
+                            if isinstance(arg, ast_internal_classes.Name_Node):
+                                array_node = ast_internal_classes.Array_Subscript_Node(parent=arg.parent)
+                                array_node.name = arg
+                                array_node.indices = [ast_internal_classes.ParDecl_Node(type='ALL')]
+                                rvals.append(array_node)
+
+                            # supports syntax SUM(arr(:))
+                            if isinstance(arg, ast_internal_classes.Array_Subscript_Node):
+                                rvals.append(arg)
+
                 if len(rvals) != 1:
                     raise NotImplementedError("Only one array can be summed")
                 val = rvals[0]

From 6197b1e4c6092f33c514b5b35a02a12f260e5398 Mon Sep 17 00:00:00 2001
From: Marcin Copik <mcopik@gmail.com>
Date: Thu, 12 Oct 2023 15:38:57 +0200
Subject: [PATCH 066/163] Add test case for Foftran sum

---
 tests/fortran/sum_to_loop_offset.py | 51 +++++++++++++++++++++++++++++
 1 file changed, 51 insertions(+)
 create mode 100644 tests/fortran/sum_to_loop_offset.py

diff --git a/tests/fortran/sum_to_loop_offset.py b/tests/fortran/sum_to_loop_offset.py
new file mode 100644
index 0000000000..6c868edc34
--- /dev/null
+++ b/tests/fortran/sum_to_loop_offset.py
@@ -0,0 +1,51 @@
+# Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved.
+
+import numpy as np
+
+from dace.frontend.fortran import ast_transforms, fortran_parser
+
+def test_fortran_frontend_sum2loop_1d_without_offset():
+    """
+    Tests that the generated array map correctly handles offsets.
+    """
+    test_string = """
+                    PROGRAM index_offset_test
+                    implicit none
+                    double precision, dimension(7) :: d
+                    double precision, dimension(3) :: res
+                    CALL index_test_function(d, res)
+                    end
+
+                    SUBROUTINE index_test_function(d, res)
+                    double precision, dimension(7) :: d
+                    double precision, dimension(3) :: res
+
+                    res(1) = SUM(d(:))
+                    res(2) = SUM(d)
+                    res(3) = SUM(d(2:6))
+
+                    END SUBROUTINE index_test_function
+                    """
+
+    # Now test to verify it executes correctly with no offset normalization
+
+    sdfg = fortran_parser.create_sdfg_from_string(test_string, "index_offset_test", False)
+    sdfg.simplify(verbose=True)
+    sdfg.compile()
+
+    size = 7
+    d = np.full([size], 0, order="F", dtype=np.float64)
+    for i in range(size):
+        d[i] = i + 1
+    res = np.full([3], 42, order="F", dtype=np.float64)
+    sdfg(d=d, res=res)
+    assert res[0] == (1 + size) * size / 2
+    assert res[1] == (1 + size) * size / 2
+    assert res[2] == (2 + size - 1) * (size - 2)/ 2
+
+if __name__ == "__main__":
+
+    test_fortran_frontend_sum2loop_1d_without_offset()
+    #test_fortran_frontend_sum2loop_1d_offset()
+    #test_fortran_frontend_arr2loop_2d_offset()
+    #test_fortran_frontend_arr2loop_without_offset()

From e7b736834bde7ce2392892d297657364b248dbc1 Mon Sep 17 00:00:00 2001
From: Marcin Copik <mcopik@gmail.com>
Date: Thu, 12 Oct 2023 16:42:30 +0200
Subject: [PATCH 067/163] Fix bug in offset normalization and support Fortran
 SUM for arrays with offsets

---
 dace/frontend/fortran/ast_transforms.py | 10 ++++--
 tests/fortran/sum_to_loop_offset.py     | 41 ++++++++++++++++++++++++-
 2 files changed, 47 insertions(+), 4 deletions(-)

diff --git a/dace/frontend/fortran/ast_transforms.py b/dace/frontend/fortran/ast_transforms.py
index 451fc66c62..d15846193b 100644
--- a/dace/frontend/fortran/ast_transforms.py
+++ b/dace/frontend/fortran/ast_transforms.py
@@ -268,7 +268,7 @@ def visit_Execution_Part_Node(self, node: ast_internal_classes.Execution_Part_No
                                 ast_internal_classes.Var_Decl_Node(
                                     name="tmp_call_" + str(temp),
                                     type=res[i].type,
-                                    sizes=None,
+                                    sizes=None
                                 )
                             ]))
                         newbody.append(
@@ -284,7 +284,7 @@ def visit_Execution_Part_Node(self, node: ast_internal_classes.Execution_Part_No
                                 ast_internal_classes.Var_Decl_Node(
                                     name="tmp_call_" + str(temp),
                                     type=res[i].type,
-                                    sizes=None,
+                                    sizes=None
                                 )
                             ]))
                     newbody.append(
@@ -458,7 +458,11 @@ def visit_Execution_Part_Node(self, node: ast_internal_classes.Execution_Part_No
                             if self.normalize_offsets:
 
                                 # Find the offset of a variable to which we are assigning
-                                var_name = child.lval.name.name
+                                var_name = ""
+                                if isinstance(j, ast_internal_classes.Name_Node):
+                                    var_name = j.name
+                                else:
+                                    var_name = j.name.name
                                 variable = self.scope_vars.get_var(child.parent, var_name)
                                 offset = variable.offsets[idx]
 
diff --git a/tests/fortran/sum_to_loop_offset.py b/tests/fortran/sum_to_loop_offset.py
index 6c868edc34..6b16325cd0 100644
--- a/tests/fortran/sum_to_loop_offset.py
+++ b/tests/fortran/sum_to_loop_offset.py
@@ -43,9 +43,48 @@ def test_fortran_frontend_sum2loop_1d_without_offset():
     assert res[1] == (1 + size) * size / 2
     assert res[2] == (2 + size - 1) * (size - 2)/ 2
 
+def test_fortran_frontend_sum2loop_1d_offset():
+    """
+    Tests that the generated array map correctly handles offsets.
+    """
+    test_string = """
+                    PROGRAM index_offset_test
+                    implicit none
+                    double precision, dimension(2:6) :: d
+                    double precision, dimension(3) :: res
+                    CALL index_test_function(d,res)
+                    end
+
+                    SUBROUTINE index_test_function(d, res)
+                    double precision, dimension(2:6) :: d
+                    double precision, dimension(3) :: res
+
+                    res(1) = SUM(d)
+                    res(2) = SUM(d(:))
+                    res(3) = SUM(d(3:5))
+
+                    END SUBROUTINE index_test_function
+                    """
+
+    # Now test to verify it executes correctly with no offset normalization
+
+    sdfg = fortran_parser.create_sdfg_from_string(test_string, "index_offset_test", True)
+    sdfg.simplify(verbose=True)
+    sdfg.compile()
+
+    size = 5
+    d = np.full([size], 0, order="F", dtype=np.float64)
+    for i in range(size):
+        d[i] = i + 1
+    res = np.full([3], 42, order="F", dtype=np.float64)
+    sdfg(d=d, res=res)
+    assert res[0] == (1 + size) * size / 2
+    assert res[1] == (1 + size) * size / 2
+    assert res[2] == (2 + size - 1) * (size - 2) / 2
+
 if __name__ == "__main__":
 
     test_fortran_frontend_sum2loop_1d_without_offset()
-    #test_fortran_frontend_sum2loop_1d_offset()
+    test_fortran_frontend_sum2loop_1d_offset()
     #test_fortran_frontend_arr2loop_2d_offset()
     #test_fortran_frontend_arr2loop_without_offset()

From 0fcbce50ba9b387402ec56428371a38045c0630b Mon Sep 17 00:00:00 2001
From: Marcin Copik <mcopik@gmail.com>
Date: Thu, 12 Oct 2023 17:27:53 +0200
Subject: [PATCH 068/163] Expand tests for array2loop in Fortran

---
 tests/fortran/array_to_loop_offset.py | 104 ++++++++++++++++++++++++++
 1 file changed, 104 insertions(+)

diff --git a/tests/fortran/array_to_loop_offset.py b/tests/fortran/array_to_loop_offset.py
index 43d01d9b6b..5042859f8c 100644
--- a/tests/fortran/array_to_loop_offset.py
+++ b/tests/fortran/array_to_loop_offset.py
@@ -112,8 +112,112 @@ def test_fortran_frontend_arr2loop_2d_offset():
         for j in range(7,10):
             assert a[i-1, j-1] == i * 2
 
+def test_fortran_frontend_arr2loop_2d_offset2():
+    """
+    Tests that the generated array map correctly handles offsets.
+    """
+    test_string = """
+                    PROGRAM index_offset_test
+                    implicit none
+                    double precision, dimension(5,7:9) :: d
+                    CALL index_test_function(d)
+                    end
+
+                    SUBROUTINE index_test_function(d)
+                    double precision, dimension(5,7:9) :: d
+
+                    d(:,:) = 43
+
+                    END SUBROUTINE index_test_function
+                    """
+
+    # Now test to verify it executes correctly with no offset normalization
+
+    sdfg = fortran_parser.create_sdfg_from_string(test_string, "index_offset_test", False)
+    sdfg.simplify(verbose=True)
+    sdfg.compile()
+
+    assert len(sdfg.data('d').shape) == 2
+    assert sdfg.data('d').shape[0] == 5
+    assert sdfg.data('d').shape[1] == 3
+
+    a = np.full([5,9], 42, order="F", dtype=np.float64)
+    sdfg(d=a)
+    for i in range(1,6):
+        for j in range(7,10):
+            assert a[i-1, j-1] == 43
+
+    sdfg = fortran_parser.create_sdfg_from_string(test_string, "index_offset_test", True)
+    sdfg.simplify(verbose=True)
+    sdfg.compile()
+
+    a = np.full([5,3], 42, order="F", dtype=np.float64)
+    sdfg(d=a)
+    for i in range(0,5):
+        for j in range(0,3):
+            assert a[i, j] == 43
+
+def test_fortran_frontend_arr2loop_2d_offset3():
+    """
+    Tests that the generated array map correctly handles offsets.
+    """
+    test_string = """
+                    PROGRAM index_offset_test
+                    implicit none
+                    double precision, dimension(5,7:9) :: d
+                    CALL index_test_function(d)
+                    end
+
+                    SUBROUTINE index_test_function(d)
+                    double precision, dimension(5,7:9) :: d
+
+                    d(2:4, 7:8) = 43
+
+                    END SUBROUTINE index_test_function
+                    """
+
+    # Now test to verify it executes correctly with no offset normalization
+
+    sdfg = fortran_parser.create_sdfg_from_string(test_string, "index_offset_test", False)
+    sdfg.simplify(verbose=True)
+    sdfg.compile()
+
+    assert len(sdfg.data('d').shape) == 2
+    assert sdfg.data('d').shape[0] == 5
+    assert sdfg.data('d').shape[1] == 3
+
+    a = np.full([5,9], 42, order="F", dtype=np.float64)
+    sdfg(d=a)
+    for i in range(2,4):
+        for j in range(7,9):
+            assert a[i-1, j-1] == 43
+        for j in range(9,10):
+            assert a[i-1, j-1] == 42
+
+    for i in [1, 5]:
+        for j in range(7,10):
+            assert a[i-1, j-1] == 42
+
+    sdfg = fortran_parser.create_sdfg_from_string(test_string, "index_offset_test", True)
+    sdfg.simplify(verbose=True)
+    sdfg.compile()
+
+    a = np.full([5,3], 42, order="F", dtype=np.float64)
+    sdfg(d=a)
+    for i in range(1,4):
+        for j in range(0,2):
+            assert a[i, j] == 43
+        for j in range(2,3):
+            assert a[i, j] == 42
+
+    for i in [0, 4]:
+        for j in range(0,3):
+            assert a[i, j] == 42
+
 if __name__ == "__main__":
 
     test_fortran_frontend_arr2loop_1d_offset()
     test_fortran_frontend_arr2loop_2d_offset()
+    test_fortran_frontend_arr2loop_2d_offset2()
+    test_fortran_frontend_arr2loop_2d_offset3()
     test_fortran_frontend_arr2loop_without_offset()

From 81786a89f6522a3f94be96b3ae01221c1be0dbc1 Mon Sep 17 00:00:00 2001
From: Marcin Copik <mcopik@gmail.com>
Date: Thu, 12 Oct 2023 17:41:55 +0200
Subject: [PATCH 069/163] Add more tests covering 2D sum in Fortran

---
 tests/fortran/sum_to_loop_offset.py | 45 ++++++++++++++++++++++++++++-
 1 file changed, 44 insertions(+), 1 deletion(-)

diff --git a/tests/fortran/sum_to_loop_offset.py b/tests/fortran/sum_to_loop_offset.py
index 6b16325cd0..93b446b229 100644
--- a/tests/fortran/sum_to_loop_offset.py
+++ b/tests/fortran/sum_to_loop_offset.py
@@ -82,9 +82,52 @@ def test_fortran_frontend_sum2loop_1d_offset():
     assert res[1] == (1 + size) * size / 2
     assert res[2] == (2 + size - 1) * (size - 2) / 2
 
+def test_fortran_frontend_arr2loop_2d():
+    """
+    Tests that the generated array map correctly handles offsets.
+    """
+    test_string = """
+                    PROGRAM index_offset_test
+                    implicit none
+                    double precision, dimension(5,3) :: d
+                    double precision, dimension(4) :: res
+                    CALL index_test_function(d,res)
+                    end
+
+                    SUBROUTINE index_test_function(d, res)
+                    double precision, dimension(5,3) :: d
+                    double precision, dimension(4) :: res
+
+                    !res(1) = SUM(d)
+                    res(2) = SUM(d(:,:))
+                    res(3) = SUM(d(2:4, 2))
+                    res(4) = SUM(d(2:4, 2:3))
+
+                    END SUBROUTINE index_test_function
+                    """
+
+    # Now test to verify it executes correctly with no offset normalization
+
+    sdfg = fortran_parser.create_sdfg_from_string(test_string, "index_offset_test", True)
+    sdfg.simplify(verbose=True)
+    sdfg.compile()
+
+    sizes = [5, 3]
+    d = np.full(sizes, 42, order="F", dtype=np.float64)
+    cnt = 0
+    for i in range(sizes[0]):
+        for j in range(sizes[1]):
+            d[i, j] = cnt
+            cnt += 1
+    res = np.full([4], 42, order="F", dtype=np.float64)
+    sdfg(d=d, res=res)
+    assert res[1] == 105
+    assert res[2] == 21
+    assert res[3] == 45
+
 if __name__ == "__main__":
 
     test_fortran_frontend_sum2loop_1d_without_offset()
     test_fortran_frontend_sum2loop_1d_offset()
+    test_fortran_frontend_arr2loop_2d()
     #test_fortran_frontend_arr2loop_2d_offset()
-    #test_fortran_frontend_arr2loop_without_offset()

From 65446735b5c8fb9f2c61e3334b87f343dc025ebd Mon Sep 17 00:00:00 2001
From: Marcin Copik <mcopik@gmail.com>
Date: Thu, 12 Oct 2023 17:58:49 +0200
Subject: [PATCH 070/163] Support Fortran sum for arrays without explicit
 dimension access declaration

---
 dace/frontend/fortran/ast_transforms.py | 7 ++++++-
 tests/fortran/sum_to_loop_offset.py     | 4 +++-
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/dace/frontend/fortran/ast_transforms.py b/dace/frontend/fortran/ast_transforms.py
index d15846193b..32744c5120 100644
--- a/dace/frontend/fortran/ast_transforms.py
+++ b/dace/frontend/fortran/ast_transforms.py
@@ -941,7 +941,12 @@ def visit_Execution_Part_Node(self, node: ast_internal_classes.Execution_Part_No
                             if isinstance(arg, ast_internal_classes.Name_Node):
                                 array_node = ast_internal_classes.Array_Subscript_Node(parent=arg.parent)
                                 array_node.name = arg
-                                array_node.indices = [ast_internal_classes.ParDecl_Node(type='ALL')]
+
+                                # If we access SUM(arr) where arr has many dimensions,
+                                # We need to create a ParDecl_Node for each dimension
+                                dims = len(self.scope_vars.get_var(node.parent, arg.name).sizes)
+                                array_node.indices = [ast_internal_classes.ParDecl_Node(type='ALL')] * dims
+
                                 rvals.append(array_node)
 
                             # supports syntax SUM(arr(:))
diff --git a/tests/fortran/sum_to_loop_offset.py b/tests/fortran/sum_to_loop_offset.py
index 93b446b229..1898f4a182 100644
--- a/tests/fortran/sum_to_loop_offset.py
+++ b/tests/fortran/sum_to_loop_offset.py
@@ -98,7 +98,7 @@ def test_fortran_frontend_arr2loop_2d():
                     double precision, dimension(5,3) :: d
                     double precision, dimension(4) :: res
 
-                    !res(1) = SUM(d)
+                    res(1) = SUM(d)
                     res(2) = SUM(d(:,:))
                     res(3) = SUM(d(2:4, 2))
                     res(4) = SUM(d(2:4, 2:3))
@@ -121,10 +121,12 @@ def test_fortran_frontend_arr2loop_2d():
             cnt += 1
     res = np.full([4], 42, order="F", dtype=np.float64)
     sdfg(d=d, res=res)
+    assert res[0] == 105
     assert res[1] == 105
     assert res[2] == 21
     assert res[3] == 45
 
+
 if __name__ == "__main__":
 
     test_fortran_frontend_sum2loop_1d_without_offset()

From bd21926db2b5ad9a382deb087f32d709c12f7c25 Mon Sep 17 00:00:00 2001
From: Marcin Copik <mcopik@gmail.com>
Date: Thu, 12 Oct 2023 18:17:54 +0200
Subject: [PATCH 071/163] Add more tests for Fortran sum over 2D arrays

---
 tests/fortran/sum_to_loop_offset.py | 43 ++++++++++++++++++++++++++++-
 1 file changed, 42 insertions(+), 1 deletion(-)

diff --git a/tests/fortran/sum_to_loop_offset.py b/tests/fortran/sum_to_loop_offset.py
index 1898f4a182..e933589e0f 100644
--- a/tests/fortran/sum_to_loop_offset.py
+++ b/tests/fortran/sum_to_loop_offset.py
@@ -126,10 +126,51 @@ def test_fortran_frontend_arr2loop_2d():
     assert res[2] == 21
     assert res[3] == 45
 
+def test_fortran_frontend_arr2loop_2d_offset():
+    """
+    Tests that the generated array map correctly handles offsets.
+    """
+    test_string = """
+                    PROGRAM index_offset_test
+                    implicit none
+                    double precision, dimension(2:6,7:10) :: d
+                    double precision, dimension(3) :: res
+                    CALL index_test_function(d,res)
+                    end
+
+                    SUBROUTINE index_test_function(d, res)
+                    double precision, dimension(2:6,7:10) :: d
+                    double precision, dimension(3) :: res
+
+                    res(1) = SUM(d)
+                    res(2) = SUM(d(:,:))
+                    res(3) = SUM(d(3:5, 8:9))
+
+                    END SUBROUTINE index_test_function
+                    """
+
+    # Now test to verify it executes correctly with no offset normalization
+
+    sdfg = fortran_parser.create_sdfg_from_string(test_string, "index_offset_test", True)
+    sdfg.simplify(verbose=True)
+    sdfg.compile()
+
+    sizes = [5, 4]
+    d = np.full(sizes, 42, order="F", dtype=np.float64)
+    cnt = 0
+    for i in range(sizes[0]):
+        for j in range(sizes[1]):
+            d[i, j] = cnt
+            cnt += 1
+    res = np.full([3], 42, order="F", dtype=np.float64)
+    sdfg(d=d, res=res)
+    assert res[0] == 190
+    assert res[1] == 190
+    assert res[2] == 57
 
 if __name__ == "__main__":
 
     test_fortran_frontend_sum2loop_1d_without_offset()
     test_fortran_frontend_sum2loop_1d_offset()
     test_fortran_frontend_arr2loop_2d()
-    #test_fortran_frontend_arr2loop_2d_offset()
+    test_fortran_frontend_arr2loop_2d_offset()

From 6545e3ef3a8c380aebe2fefdc728a0e8288f3767 Mon Sep 17 00:00:00 2001
From: Marcin Copik <mcopik@gmail.com>
Date: Thu, 12 Oct 2023 22:39:49 +0200
Subject: [PATCH 072/163] Add new class handling Fortran intrinsics

---
 dace/frontend/fortran/ast_components.py | 59 ++--------------------
 dace/frontend/fortran/intrinsics.py     | 67 +++++++++++++++++++++++++
 2 files changed, 71 insertions(+), 55 deletions(-)
 create mode 100644 dace/frontend/fortran/intrinsics.py

diff --git a/dace/frontend/fortran/ast_components.py b/dace/frontend/fortran/ast_components.py
index d95fa87e58..bbe71394ad 100644
--- a/dace/frontend/fortran/ast_components.py
+++ b/dace/frontend/fortran/ast_components.py
@@ -6,6 +6,7 @@
 import copy
 from dace.frontend.fortran import ast_internal_classes
 from dace.frontend.fortran.ast_internal_classes import FNode, Name_Node
+from dace.frontend.fortran.intrinsics import FortranIntrinsics
 from typing import Any, List, Tuple, Type, TypeVar, Union, overload
 
 #We rely on fparser to provide an initial AST and convert to a version that is more suitable for our purposes
@@ -122,6 +123,7 @@ def __init__(self, ast: f03.Program, tables: symbol_table.SymbolTables):
             "DOUBLE PRECISION": "DOUBLE",
             "REAL": "REAL",
         }
+        self.intrinsic_handler = FortranIntrinsics()
         self.supported_fortran_syntax = {
             "str": self.str_node,
             "tuple": self.tuple_node,
@@ -242,7 +244,7 @@ def __init__(self, ast: f03.Program, tables: symbol_table.SymbolTables):
             "Level_2_Unary_Expr": self.level_2_expr,
             "Mult_Operand": self.power_expr,
             "Parenthesis": self.parenthesis_expr,
-            "Intrinsic_Name": self.intrinsic_name,
+            "Intrinsic_Name": self.intrinsic_handler.replace_function_name,
             "Intrinsic_Function_Reference": self.intrinsic_function_reference,
             "Only_List": self.only_list,
             "Structure_Constructor": self.structure_constructor,
@@ -395,65 +397,12 @@ def structure_constructor(self, node: FASTNode):
         args = get_child(children, ast_internal_classes.Component_Spec_List_Node)
         return ast_internal_classes.Structure_Constructor_Node(name=name, args=args.args, type=None)
 
-    def intrinsic_name(self, node: FASTNode):
-        name = node.string
-        replacements = {
-            "INT": "__dace_int",
-            "DBLE": "__dace_dble",
-            "SQRT": "sqrt",
-            "COSH": "cosh",
-            "ABS": "abs",
-            "MIN": "min",
-            "MAX": "max",
-            "EXP": "exp",
-            "EPSILON": "__dace_epsilon",
-            "TANH": "tanh",
-            "SUM": "__dace_sum",
-            "SIGN": "__dace_sign",
-            "EXP": "exp",
-            "SELECTED_INT_KIND": "__dace_selected_int_kind",
-            "SELECTED_REAL_KIND": "__dace_selected_real_kind",
-        }
-        return ast_internal_classes.Name_Node(name=replacements[name])
-
     def intrinsic_function_reference(self, node: FASTNode):
         children = self.create_children(node)
         line = get_line(node)
         name = get_child(children, ast_internal_classes.Name_Node)
         args = get_child(children, ast_internal_classes.Arg_List_Node)
-        if name.name == "__dace_selected_int_kind":
-            import math
-            return ast_internal_classes.Int_Literal_Node(value=str(
-                math.ceil((math.log2(math.pow(10, int(args.args[0].value))) + 1) / 8)),
-                                                         line_number=line)
-        # This selects the smallest kind that can hold the given number of digits (fp64,fp32 or fp16)
-        elif name.name == "__dace_selected_real_kind":
-            if int(args.args[0].value) >= 9 or int(args.args[1].value) > 126:
-                return ast_internal_classes.Int_Literal_Node(value="8", line_number=line)
-            elif int(args.args[0].value) >= 3 or int(args.args[1].value) > 14:
-                return ast_internal_classes.Int_Literal_Node(value="4", line_number=line)
-            else:
-                return ast_internal_classes.Int_Literal_Node(value="2", line_number=line)
-
-        func_types = {
-            "__dace_int": "INT",
-            "__dace_dble": "DOUBLE",
-            "sqrt": "DOUBLE",
-            "cosh": "DOUBLE",
-            "abs": "DOUBLE",
-            "min": "DOUBLE",
-            "max": "DOUBLE",
-            "exp": "DOUBLE",
-            "__dace_epsilon": "DOUBLE",
-            "tanh": "DOUBLE",
-            "__dace_sum": "DOUBLE",
-            "__dace_sign": "DOUBLE",
-            "exp": "DOUBLE",
-            "__dace_selected_int_kind": "INT",
-            "__dace_selected_real_kind": "INT",
-        }
-        call_type = func_types[name.name]
-        return ast_internal_classes.Call_Expr_Node(name=name, type=call_type, args=args.args, line_number=line)
+        return self.intrinsic_handler.replace_function_reference(name, args, line)
 
     def function_stmt(self, node: FASTNode):
         raise NotImplementedError(
diff --git a/dace/frontend/fortran/intrinsics.py b/dace/frontend/fortran/intrinsics.py
new file mode 100644
index 0000000000..45b0113427
--- /dev/null
+++ b/dace/frontend/fortran/intrinsics.py
@@ -0,0 +1,67 @@
+
+import math
+from typing import Any
+
+from dace.frontend.fortran import ast_internal_classes
+
+FASTNode = Any
+
+class FortranIntrinsics:
+
+    def replace_function_name(self, node: FASTNode) -> ast_internal_classes.Name_Node:
+
+        func_name = node.string
+        replacements = {
+            "INT": "__dace_int",
+            "DBLE": "__dace_dble",
+            "SQRT": "sqrt",
+            "COSH": "cosh",
+            "ABS": "abs",
+            "MIN": "min",
+            "MAX": "max",
+            "EXP": "exp",
+            "EPSILON": "__dace_epsilon",
+            "TANH": "tanh",
+            "SUM": "__dace_sum",
+            "SIGN": "__dace_sign",
+            "EXP": "exp",
+            "SELECTED_INT_KIND": "__dace_selected_int_kind",
+            "SELECTED_REAL_KIND": "__dace_selected_real_kind",
+        }
+        return ast_internal_classes.Name_Node(name=replacements[func_name])
+
+    def replace_function_reference(self, name: ast_internal_classes.Name_Node, args: ast_internal_classes.Arg_List_Node, line):
+
+        if name.name == "__dace_selected_int_kind":
+            return ast_internal_classes.Int_Literal_Node(value=str(
+                math.ceil((math.log2(math.pow(10, int(args.args[0].value))) + 1) / 8)),
+                                                         line_number=line)
+        # This selects the smallest kind that can hold the given number of digits (fp64,fp32 or fp16)
+        elif name.name == "__dace_selected_real_kind":
+            if int(args.args[0].value) >= 9 or int(args.args[1].value) > 126:
+                return ast_internal_classes.Int_Literal_Node(value="8", line_number=line)
+            elif int(args.args[0].value) >= 3 or int(args.args[1].value) > 14:
+                return ast_internal_classes.Int_Literal_Node(value="4", line_number=line)
+            else:
+                return ast_internal_classes.Int_Literal_Node(value="2", line_number=line)
+
+        func_types = {
+            "__dace_int": "INT",
+            "__dace_dble": "DOUBLE",
+            "sqrt": "DOUBLE",
+            "cosh": "DOUBLE",
+            "abs": "DOUBLE",
+            "min": "DOUBLE",
+            "max": "DOUBLE",
+            "exp": "DOUBLE",
+            "__dace_epsilon": "DOUBLE",
+            "tanh": "DOUBLE",
+            "__dace_sum": "DOUBLE",
+            "__dace_sign": "DOUBLE",
+            "exp": "DOUBLE",
+            "__dace_selected_int_kind": "INT",
+            "__dace_selected_real_kind": "INT",
+        }
+        call_type = func_types[name.name]
+        return ast_internal_classes.Call_Expr_Node(name=name, type=call_type, args=args.args, line_number=line)
+

From bee2244bc9b279d6553df27a8105f6be1d4e129c Mon Sep 17 00:00:00 2001
From: Marcin Copik <mcopik@gmail.com>
Date: Thu, 12 Oct 2023 23:06:07 +0200
Subject: [PATCH 073/163] Move implementation of two Fortran intrinsics to
 separate classes

---
 dace/frontend/fortran/intrinsics.py | 89 +++++++++++++++++++++--------
 1 file changed, 66 insertions(+), 23 deletions(-)

diff --git a/dace/frontend/fortran/intrinsics.py b/dace/frontend/fortran/intrinsics.py
index 45b0113427..ae016694f1 100644
--- a/dace/frontend/fortran/intrinsics.py
+++ b/dace/frontend/fortran/intrinsics.py
@@ -1,4 +1,5 @@
 
+from abc import abstractmethod
 import math
 from typing import Any
 
@@ -6,8 +7,61 @@
 
 FASTNode = Any
 
+class IntrinsicTransformation:
+
+    def __init__(self, func_name: str, args: ast_internal_classes.Arg_List_Node, line):
+        self.func_name = func_name
+        self.args = args
+        self.line = line
+
+    @staticmethod
+    @abstractmethod
+    def replaced_name(func_name: str) -> str:
+        pass
+
+class SelectedKind(IntrinsicTransformation):
+
+    FUNCTIONS = {
+        "SELECTED_INT_KIND": "__dace_selected_int_kind",
+        "SELECTED_REAL_KIND": "__dace_selected_real_kind",
+    }
+
+    def __init__(self, func_name: str, args: ast_internal_classes.Arg_List_Node, line):
+        super().__init__(func_name, args, line)
+
+    @staticmethod
+    def replaced_name(func_name: str) -> str:
+        return SelectedKind.FUNCTIONS[func_name]
+
+    def replace(self) -> ast_internal_classes.FNode:
+
+        if self.func_name == "__dace_selected_int_kind":
+            return ast_internal_classes.Int_Literal_Node(value=str(
+                math.ceil((math.log2(math.pow(10, int(self.args.args[0].value))) + 1) / 8)),
+                                                         line_number=self.line)
+        # This selects the smallest kind that can hold the given number of digits (fp64,fp32 or fp16)
+        elif self.func_name == "__dace_selected_real_kind":
+            if int(self.args.args[0].value) >= 9 or int(self.args.args[1].value) > 126:
+                return ast_internal_classes.Int_Literal_Node(value="8", line_number=self.line)
+            elif int(self.args.args[0].value) >= 3 or int(self.args.args[1].value) > 14:
+                return ast_internal_classes.Int_Literal_Node(value="4", line_number=self.line)
+            else:
+                return ast_internal_classes.Int_Literal_Node(value="2", line_number=self.line)
+
+        raise NotImplemented()
+
 class FortranIntrinsics:
 
+    IMPLEMENTATIONS_AST = {
+        "SELECTED_INT_KIND": SelectedKind,
+        "SELECTED_REAL_KIND": SelectedKind
+    }
+
+    IMPLEMENTATIONS_DACE = {
+        "__dace_selected_int_kind": SelectedKind,
+        "__dace_selected_real_kind": SelectedKind
+    }
+
     def replace_function_name(self, node: FASTNode) -> ast_internal_classes.Name_Node:
 
         func_name = node.string
@@ -24,27 +78,15 @@ def replace_function_name(self, node: FASTNode) -> ast_internal_classes.Name_Nod
             "TANH": "tanh",
             "SUM": "__dace_sum",
             "SIGN": "__dace_sign",
-            "EXP": "exp",
-            "SELECTED_INT_KIND": "__dace_selected_int_kind",
-            "SELECTED_REAL_KIND": "__dace_selected_real_kind",
+            "EXP": "exp"
         }
-        return ast_internal_classes.Name_Node(name=replacements[func_name])
+        if func_name in replacements:
+            return ast_internal_classes.Name_Node(name=replacements[func_name])
+        else:
+            return ast_internal_classes.Name_Node(name=self.IMPLEMENTATIONS_AST[func_name].replaced_name(func_name))
 
     def replace_function_reference(self, name: ast_internal_classes.Name_Node, args: ast_internal_classes.Arg_List_Node, line):
 
-        if name.name == "__dace_selected_int_kind":
-            return ast_internal_classes.Int_Literal_Node(value=str(
-                math.ceil((math.log2(math.pow(10, int(args.args[0].value))) + 1) / 8)),
-                                                         line_number=line)
-        # This selects the smallest kind that can hold the given number of digits (fp64,fp32 or fp16)
-        elif name.name == "__dace_selected_real_kind":
-            if int(args.args[0].value) >= 9 or int(args.args[1].value) > 126:
-                return ast_internal_classes.Int_Literal_Node(value="8", line_number=line)
-            elif int(args.args[0].value) >= 3 or int(args.args[1].value) > 14:
-                return ast_internal_classes.Int_Literal_Node(value="4", line_number=line)
-            else:
-                return ast_internal_classes.Int_Literal_Node(value="2", line_number=line)
-
         func_types = {
             "__dace_int": "INT",
             "__dace_dble": "DOUBLE",
@@ -58,10 +100,11 @@ def replace_function_reference(self, name: ast_internal_classes.Name_Node, args:
             "tanh": "DOUBLE",
             "__dace_sum": "DOUBLE",
             "__dace_sign": "DOUBLE",
-            "exp": "DOUBLE",
-            "__dace_selected_int_kind": "INT",
-            "__dace_selected_real_kind": "INT",
+            "exp": "DOUBLE"
         }
-        call_type = func_types[name.name]
-        return ast_internal_classes.Call_Expr_Node(name=name, type=call_type, args=args.args, line_number=line)
-
+        if name.name in func_types:
+            # FIXME: this will be progressively removed
+            call_type = func_types[name.name]
+            return ast_internal_classes.Call_Expr_Node(name=name, type=call_type, args=args.args, line_number=line)
+        else:
+            return self.IMPLEMENTATIONS_DACE[name.name](name.name, args, line).replace()

From a9aa37ecf724663d05c9c6535054bdaaf9f95fa2 Mon Sep 17 00:00:00 2001
From: Marcin Copik <mcopik@gmail.com>
Date: Thu, 12 Oct 2023 23:21:21 +0200
Subject: [PATCH 074/163] Move code specialized to __dace_sum to the actual
 transformation

---
 dace/frontend/fortran/ast_transforms.py | 42 ++++++++++---------------
 1 file changed, 17 insertions(+), 25 deletions(-)

diff --git a/dace/frontend/fortran/ast_transforms.py b/dace/frontend/fortran/ast_transforms.py
index 32744c5120..c44ee22ea9 100644
--- a/dace/frontend/fortran/ast_transforms.py
+++ b/dace/frontend/fortran/ast_transforms.py
@@ -262,31 +262,14 @@ def visit_Execution_Part_Node(self, node: ast_internal_classes.Execution_Part_No
             if res is not None:
                 for i in range(0, len(res)):
 
-                    if (res[i].name.name == "__dace_sum"):
-                        newbody.append(
-                            ast_internal_classes.Decl_Stmt_Node(vardecl=[
-                                ast_internal_classes.Var_Decl_Node(
-                                    name="tmp_call_" + str(temp),
-                                    type=res[i].type,
-                                    sizes=None
-                                )
-                            ]))
-                        newbody.append(
-                            ast_internal_classes.BinOp_Node(lval=ast_internal_classes.Name_Node(name="tmp_call_" +
-                                                                                                str(temp)),
-                                                            op="=",
-                                                            rval=ast_internal_classes.Int_Literal_Node(value="0"),
-                                                            line_number=child.line_number))
-                    else:
-
-                        newbody.append(
-                            ast_internal_classes.Decl_Stmt_Node(vardecl=[
-                                ast_internal_classes.Var_Decl_Node(
-                                    name="tmp_call_" + str(temp),
-                                    type=res[i].type,
-                                    sizes=None
-                                )
-                            ]))
+                    newbody.append(
+                        ast_internal_classes.Decl_Stmt_Node(vardecl=[
+                            ast_internal_classes.Var_Decl_Node(
+                                name="tmp_call_" + str(temp),
+                                type=res[i].type,
+                                sizes=None
+                            )
+                        ]))
                     newbody.append(
                         ast_internal_classes.BinOp_Node(op="=",
                                                         lval=ast_internal_classes.Name_Node(name="tmp_call_" +
@@ -961,6 +944,15 @@ def visit_Execution_Part_Node(self, node: ast_internal_classes.Execution_Part_No
 
                 par_Decl_Range_Finder(val, rangesrval, rangeposrval, self.count, newbody, self.scope_vars, True)
 
+                # Initialize the result variable
+                newbody.append(
+                    ast_internal_classes.BinOp_Node(
+                        lval=current,
+                        op="=",
+                        rval=ast_internal_classes.Int_Literal_Node(value="0"),
+                        line_number=child.line_number
+                    )
+                )
                 range_index = 0
                 body = ast_internal_classes.BinOp_Node(lval=current,
                                                        op="=",

From cf59ac8e616ae8e7a99628b918b860873fbbd0fa Mon Sep 17 00:00:00 2001
From: Marcin Copik <mcopik@gmail.com>
Date: Fri, 13 Oct 2023 00:04:59 +0200
Subject: [PATCH 075/163] Move sum-to-loop transformation to Fortran intrinsics

---
 dace/frontend/fortran/ast_components.py |  10 +-
 dace/frontend/fortran/ast_transforms.py | 125 --------------
 dace/frontend/fortran/fortran_parser.py |  15 +-
 dace/frontend/fortran/intrinsics.py     | 208 +++++++++++++++++++++---
 4 files changed, 203 insertions(+), 155 deletions(-)

diff --git a/dace/frontend/fortran/ast_components.py b/dace/frontend/fortran/ast_components.py
index bbe71394ad..332c3a563f 100644
--- a/dace/frontend/fortran/ast_components.py
+++ b/dace/frontend/fortran/ast_components.py
@@ -6,8 +6,10 @@
 import copy
 from dace.frontend.fortran import ast_internal_classes
 from dace.frontend.fortran.ast_internal_classes import FNode, Name_Node
-from dace.frontend.fortran.intrinsics import FortranIntrinsics
-from typing import Any, List, Tuple, Type, TypeVar, Union, overload
+from typing import Any, List, Tuple, Type, TypeVar, Union, overload, TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from dace.frontend.fortran.intrinsics import FortranIntrinsics
 
 #We rely on fparser to provide an initial AST and convert to a version that is more suitable for our purposes
 
@@ -123,6 +125,7 @@ def __init__(self, ast: f03.Program, tables: symbol_table.SymbolTables):
             "DOUBLE PRECISION": "DOUBLE",
             "REAL": "REAL",
         }
+        from dace.frontend.fortran.intrinsics import FortranIntrinsics
         self.intrinsic_handler = FortranIntrinsics()
         self.supported_fortran_syntax = {
             "str": self.str_node,
@@ -258,6 +261,9 @@ def __init__(self, ast: f03.Program, tables: symbol_table.SymbolTables):
             "Allocate_Shape_Spec_List": self.allocate_shape_spec_list,
         }
 
+    def fortran_intrinsics(self) -> "FortranIntrinsics":
+        return self.intrinsic_handler
+
     def list_tables(self):
         for i in self.tables._symbol_tables:
             print(i)
diff --git a/dace/frontend/fortran/ast_transforms.py b/dace/frontend/fortran/ast_transforms.py
index c44ee22ea9..630021e02c 100644
--- a/dace/frontend/fortran/ast_transforms.py
+++ b/dace/frontend/fortran/ast_transforms.py
@@ -701,23 +701,6 @@ def visit_Execution_Part_Node(self, node: ast_internal_classes.Execution_Part_No
         return
 
 
-class SumLoopNodeLister(NodeVisitor):
-    """
-    Finds all sum operations that have to be transformed to loops in the AST
-    """
-    def __init__(self):
-        self.nodes: List[ast_internal_classes.FNode] = []
-
-    def visit_BinOp_Node(self, node: ast_internal_classes.BinOp_Node):
-
-        if isinstance(node.rval, ast_internal_classes.Call_Expr_Node):
-            if node.rval.name.name == "__dace_sum":
-                self.nodes.append(node)
-
-    def visit_Execution_Part_Node(self, node: ast_internal_classes.Execution_Part_Node):
-        return
-
-
 def par_Decl_Range_Finder(node: ast_internal_classes.Array_Subscript_Node,
                           ranges: list,
                           rangepos: list,
@@ -892,114 +875,6 @@ def mywalk(node):
         todo.extend(iter_child_nodes(node))
         yield node
 
-
-class SumToLoop(NodeTransformer):
-    """
-    Transforms the AST by removing array sums and replacing them with loops
-    """
-    def __init__(self, ast):
-        self.count = 0
-        ParentScopeAssigner().visit(ast)
-        self.scope_vars = ScopeVarsDeclarations()
-        self.scope_vars.visit(ast)
-
-    def visit_Execution_Part_Node(self, node: ast_internal_classes.Execution_Part_Node):
-        newbody = []
-        for child in node.execution:
-            lister = SumLoopNodeLister()
-            lister.visit(child)
-            res = lister.nodes
-            if res is not None and len(res) > 0:
-
-                current = child.lval
-                val = child.rval
-
-                rvals = []
-                for i in mywalk(val):
-                    if isinstance(i, ast_internal_classes.Call_Expr_Node) and i.name.name == '__dace_sum':
-
-                        for arg in i.args:
-
-                            # supports syntax SUM(arr)
-                            if isinstance(arg, ast_internal_classes.Name_Node):
-                                array_node = ast_internal_classes.Array_Subscript_Node(parent=arg.parent)
-                                array_node.name = arg
-
-                                # If we access SUM(arr) where arr has many dimensions,
-                                # We need to create a ParDecl_Node for each dimension
-                                dims = len(self.scope_vars.get_var(node.parent, arg.name).sizes)
-                                array_node.indices = [ast_internal_classes.ParDecl_Node(type='ALL')] * dims
-
-                                rvals.append(array_node)
-
-                            # supports syntax SUM(arr(:))
-                            if isinstance(arg, ast_internal_classes.Array_Subscript_Node):
-                                rvals.append(arg)
-
-                if len(rvals) != 1:
-                    raise NotImplementedError("Only one array can be summed")
-                val = rvals[0]
-                rangeposrval = []
-                rangesrval = []
-
-                par_Decl_Range_Finder(val, rangesrval, rangeposrval, self.count, newbody, self.scope_vars, True)
-
-                # Initialize the result variable
-                newbody.append(
-                    ast_internal_classes.BinOp_Node(
-                        lval=current,
-                        op="=",
-                        rval=ast_internal_classes.Int_Literal_Node(value="0"),
-                        line_number=child.line_number
-                    )
-                )
-                range_index = 0
-                body = ast_internal_classes.BinOp_Node(lval=current,
-                                                       op="=",
-                                                       rval=ast_internal_classes.BinOp_Node(
-                                                           lval=current,
-                                                           op="+",
-                                                           rval=val,
-                                                           line_number=child.line_number),
-                                                       line_number=child.line_number)
-                for i in rangesrval:
-                    initrange = i[0]
-                    finalrange = i[1]
-                    init = ast_internal_classes.BinOp_Node(
-                        lval=ast_internal_classes.Name_Node(name="tmp_parfor_" + str(self.count + range_index)),
-                        op="=",
-                        rval=initrange,
-                        line_number=child.line_number)
-                    cond = ast_internal_classes.BinOp_Node(
-                        lval=ast_internal_classes.Name_Node(name="tmp_parfor_" + str(self.count + range_index)),
-                        op="<=",
-                        rval=finalrange,
-                        line_number=child.line_number)
-                    iter = ast_internal_classes.BinOp_Node(
-                        lval=ast_internal_classes.Name_Node(name="tmp_parfor_" + str(self.count + range_index)),
-                        op="=",
-                        rval=ast_internal_classes.BinOp_Node(
-                            lval=ast_internal_classes.Name_Node(name="tmp_parfor_" + str(self.count + range_index)),
-                            op="+",
-                            rval=ast_internal_classes.Int_Literal_Node(value="1")),
-                        line_number=child.line_number)
-                    current_for = ast_internal_classes.Map_Stmt_Node(
-                        init=init,
-                        cond=cond,
-                        iter=iter,
-                        body=ast_internal_classes.Execution_Part_Node(execution=[body]),
-                        line_number=child.line_number)
-                    body = current_for
-                    range_index += 1
-
-                newbody.append(body)
-
-                self.count = self.count + range_index
-            else:
-                newbody.append(self.visit(child))
-        return ast_internal_classes.Execution_Part_Node(execution=newbody)
-
-
 class RenameVar(NodeTransformer):
     def __init__(self, oldname: str, newname: str):
         self.oldname = oldname
diff --git a/dace/frontend/fortran/fortran_parser.py b/dace/frontend/fortran/fortran_parser.py
index b15435f4ff..45883f2070 100644
--- a/dace/frontend/fortran/fortran_parser.py
+++ b/dace/frontend/fortran/fortran_parser.py
@@ -1045,7 +1045,10 @@ def create_ast_from_string(
         program = ast_transforms.CallExtractor().visit(program)
         program = ast_transforms.SignToIf().visit(program)
         program = ast_transforms.ArrayToLoop(program).visit(program)
-        program = ast_transforms.SumToLoop(program).visit(program)
+
+        for transformation in own_ast.fortran_intrinsics().transformations():
+            program = transformation(program).visit(program)
+
         program = ast_transforms.ForDeclarer().visit(program)
         program = ast_transforms.IndexExtractor(program, normalize_offsets).visit(program)
 
@@ -1077,7 +1080,10 @@ def create_sdfg_from_string(
     program = ast_transforms.CallExtractor().visit(program)
     program = ast_transforms.SignToIf().visit(program)
     program = ast_transforms.ArrayToLoop(program).visit(program)
-    program = ast_transforms.SumToLoop(program).visit(program)
+
+    for transformation in own_ast.fortran_intrinsics().transformations():
+        program = transformation(program).visit(program)
+
     program = ast_transforms.ForDeclarer().visit(program)
     program = ast_transforms.IndexExtractor(program, normalize_offsets).visit(program)
     ast2sdfg = AST_translator(own_ast, __file__)
@@ -1119,7 +1125,10 @@ def create_sdfg_from_fortran_file(source_string: str):
     program = ast_transforms.CallExtractor().visit(program)
     program = ast_transforms.SignToIf().visit(program)
     program = ast_transforms.ArrayToLoop(program).visit(program)
-    program = ast_transforms.SumToLoop(program).visit(program)
+
+    for transformation in own_ast.fortran_intrinsics():
+        program = transformation(program).visit(program)
+
     program = ast_transforms.ForDeclarer().visit(program)
     program = ast_transforms.IndexExtractor(program).visit(program)
     ast2sdfg = AST_translator(own_ast, __file__)
diff --git a/dace/frontend/fortran/intrinsics.py b/dace/frontend/fortran/intrinsics.py
index ae016694f1..f61586325c 100644
--- a/dace/frontend/fortran/intrinsics.py
+++ b/dace/frontend/fortran/intrinsics.py
@@ -1,24 +1,29 @@
 
 from abc import abstractmethod
 import math
-from typing import Any
+from typing import Any, List, Set, Type
 
 from dace.frontend.fortran import ast_internal_classes
+from dace.frontend.fortran.ast_transforms import NodeVisitor, NodeTransformer, ParentScopeAssigner, ScopeVarsDeclarations, par_Decl_Range_Finder, mywalk
 
 FASTNode = Any
 
 class IntrinsicTransformation:
 
-    def __init__(self, func_name: str, args: ast_internal_classes.Arg_List_Node, line):
-        self.func_name = func_name
-        self.args = args
-        self.line = line
-
     @staticmethod
     @abstractmethod
     def replaced_name(func_name: str) -> str:
         pass
 
+    @staticmethod
+    @abstractmethod
+    def replace(func_name: ast_internal_classes.Name_Node, args: ast_internal_classes.Arg_List_Node, line) -> ast_internal_classes.FNode:
+        pass
+
+    @staticmethod
+    def has_transformation() -> bool:
+        return False
+
 class SelectedKind(IntrinsicTransformation):
 
     FUNCTIONS = {
@@ -26,42 +31,194 @@ class SelectedKind(IntrinsicTransformation):
         "SELECTED_REAL_KIND": "__dace_selected_real_kind",
     }
 
-    def __init__(self, func_name: str, args: ast_internal_classes.Arg_List_Node, line):
-        super().__init__(func_name, args, line)
-
     @staticmethod
     def replaced_name(func_name: str) -> str:
         return SelectedKind.FUNCTIONS[func_name]
 
-    def replace(self) -> ast_internal_classes.FNode:
+    @staticmethod
+    def replace(func_name: ast_internal_classes.Name_Node, args: ast_internal_classes.Arg_List_Node, line) -> ast_internal_classes.FNode:
 
-        if self.func_name == "__dace_selected_int_kind":
+        if func_name.name == "__dace_selected_int_kind":
             return ast_internal_classes.Int_Literal_Node(value=str(
-                math.ceil((math.log2(math.pow(10, int(self.args.args[0].value))) + 1) / 8)),
-                                                         line_number=self.line)
+                math.ceil((math.log2(math.pow(10, int(args.args[0].value))) + 1) / 8)),
+                                                         line_number=line)
         # This selects the smallest kind that can hold the given number of digits (fp64,fp32 or fp16)
-        elif self.func_name == "__dace_selected_real_kind":
-            if int(self.args.args[0].value) >= 9 or int(self.args.args[1].value) > 126:
-                return ast_internal_classes.Int_Literal_Node(value="8", line_number=self.line)
-            elif int(self.args.args[0].value) >= 3 or int(self.args.args[1].value) > 14:
-                return ast_internal_classes.Int_Literal_Node(value="4", line_number=self.line)
+        elif func_name.name == "__dace_selected_real_kind":
+            if int(args.args[0].value) >= 9 or int(args.args[1].value) > 126:
+                return ast_internal_classes.Int_Literal_Node(value="8", line_number=line)
+            elif int(args.args[0].value) >= 3 or int(args.args[1].value) > 14:
+                return ast_internal_classes.Int_Literal_Node(value="4", line_number=line)
             else:
-                return ast_internal_classes.Int_Literal_Node(value="2", line_number=self.line)
+                return ast_internal_classes.Int_Literal_Node(value="2", line_number=line)
 
         raise NotImplemented()
 
+class LoopBasedReplacement:
+
+    @staticmethod
+    def replaced_name(func_name: str) -> str:
+        replacements = {
+            "SUM": "__dace_sum"
+        }
+        return replacements[func_name]
+
+    @staticmethod
+    def replace(func_name: ast_internal_classes.Name_Node, args: ast_internal_classes.Arg_List_Node, line) -> ast_internal_classes.FNode:
+        func_types = {
+            "__dace_sum": "DOUBLE"
+        }
+        call_type = func_types[func_name.name]
+        return ast_internal_classes.Call_Expr_Node(name=func_name, type=call_type, args=args.args, line_number=line)
+
+    @staticmethod
+    def has_transformation() -> bool:
+        return True
+
+class Sum(LoopBasedReplacement):
+
+    class SumLoopNodeLister(NodeVisitor):
+        """
+        Finds all sum operations that have to be transformed to loops in the AST
+        """
+        def __init__(self):
+            self.nodes: List[ast_internal_classes.FNode] = []
+
+        def visit_BinOp_Node(self, node: ast_internal_classes.BinOp_Node):
+
+            if isinstance(node.rval, ast_internal_classes.Call_Expr_Node):
+                if node.rval.name.name == "__dace_sum":
+                    self.nodes.append(node)
+
+        def visit_Execution_Part_Node(self, node: ast_internal_classes.Execution_Part_Node):
+            return
+
+    class Transformation(NodeTransformer):
+
+        """
+        Transforms the AST by removing array sums and replacing them with loops
+        """
+        def __init__(self, ast):
+            self.count = 0
+            ParentScopeAssigner().visit(ast)
+            self.scope_vars = ScopeVarsDeclarations()
+            self.scope_vars.visit(ast)
+
+        def visit_Execution_Part_Node(self, node: ast_internal_classes.Execution_Part_Node):
+            newbody = []
+            for child in node.execution:
+                lister = Sum.SumLoopNodeLister()
+                lister.visit(child)
+                res = lister.nodes
+                if res is not None and len(res) > 0:
+
+                    current = child.lval
+                    val = child.rval
+
+                    rvals = []
+                    for i in mywalk(val):
+                        if isinstance(i, ast_internal_classes.Call_Expr_Node) and i.name.name == '__dace_sum':
+
+                            for arg in i.args:
+
+                                # supports syntax SUM(arr)
+                                if isinstance(arg, ast_internal_classes.Name_Node):
+                                    array_node = ast_internal_classes.Array_Subscript_Node(parent=arg.parent)
+                                    array_node.name = arg
+
+                                    # If we access SUM(arr) where arr has many dimensions,
+                                    # We need to create a ParDecl_Node for each dimension
+                                    dims = len(self.scope_vars.get_var(node.parent, arg.name).sizes)
+                                    array_node.indices = [ast_internal_classes.ParDecl_Node(type='ALL')] * dims
+
+                                    rvals.append(array_node)
+
+                                # supports syntax SUM(arr(:))
+                                if isinstance(arg, ast_internal_classes.Array_Subscript_Node):
+                                    rvals.append(arg)
+
+                    if len(rvals) != 1:
+                        raise NotImplementedError("Only one array can be summed")
+                    val = rvals[0]
+                    rangeposrval = []
+                    rangesrval = []
+
+                    par_Decl_Range_Finder(val, rangesrval, rangeposrval, self.count, newbody, self.scope_vars, True)
+
+                    # Initialize the result variable
+                    newbody.append(
+                        ast_internal_classes.BinOp_Node(
+                            lval=current,
+                            op="=",
+                            rval=ast_internal_classes.Int_Literal_Node(value="0"),
+                            line_number=child.line_number
+                        )
+                    )
+                    range_index = 0
+                    body = ast_internal_classes.BinOp_Node(lval=current,
+                                                        op="=",
+                                                        rval=ast_internal_classes.BinOp_Node(
+                                                            lval=current,
+                                                            op="+",
+                                                            rval=val,
+                                                            line_number=child.line_number),
+                                                        line_number=child.line_number)
+                    for i in rangesrval:
+                        initrange = i[0]
+                        finalrange = i[1]
+                        init = ast_internal_classes.BinOp_Node(
+                            lval=ast_internal_classes.Name_Node(name="tmp_parfor_" + str(self.count + range_index)),
+                            op="=",
+                            rval=initrange,
+                            line_number=child.line_number)
+                        cond = ast_internal_classes.BinOp_Node(
+                            lval=ast_internal_classes.Name_Node(name="tmp_parfor_" + str(self.count + range_index)),
+                            op="<=",
+                            rval=finalrange,
+                            line_number=child.line_number)
+                        iter = ast_internal_classes.BinOp_Node(
+                            lval=ast_internal_classes.Name_Node(name="tmp_parfor_" + str(self.count + range_index)),
+                            op="=",
+                            rval=ast_internal_classes.BinOp_Node(
+                                lval=ast_internal_classes.Name_Node(name="tmp_parfor_" + str(self.count + range_index)),
+                                op="+",
+                                rval=ast_internal_classes.Int_Literal_Node(value="1")),
+                            line_number=child.line_number)
+                        current_for = ast_internal_classes.Map_Stmt_Node(
+                            init=init,
+                            cond=cond,
+                            iter=iter,
+                            body=ast_internal_classes.Execution_Part_Node(execution=[body]),
+                            line_number=child.line_number)
+                        body = current_for
+                        range_index += 1
+
+                    newbody.append(body)
+
+                    self.count = self.count + range_index
+                else:
+                    newbody.append(self.visit(child))
+            return ast_internal_classes.Execution_Part_Node(execution=newbody)
+
 class FortranIntrinsics:
 
     IMPLEMENTATIONS_AST = {
         "SELECTED_INT_KIND": SelectedKind,
-        "SELECTED_REAL_KIND": SelectedKind
+        "SELECTED_REAL_KIND": SelectedKind,
+        "SUM": Sum
     }
 
     IMPLEMENTATIONS_DACE = {
         "__dace_selected_int_kind": SelectedKind,
-        "__dace_selected_real_kind": SelectedKind
+        "__dace_selected_real_kind": SelectedKind,
+        "__dace_sum": Sum
     }
 
+    def __init__(self):
+        self._transformations_to_run = set()
+
+    def transformations(self) -> Set[Type[NodeTransformer]]:
+        return self._transformations_to_run
+
     def replace_function_name(self, node: FASTNode) -> ast_internal_classes.Name_Node:
 
         func_name = node.string
@@ -76,13 +233,16 @@ def replace_function_name(self, node: FASTNode) -> ast_internal_classes.Name_Nod
             "EXP": "exp",
             "EPSILON": "__dace_epsilon",
             "TANH": "tanh",
-            "SUM": "__dace_sum",
             "SIGN": "__dace_sign",
             "EXP": "exp"
         }
         if func_name in replacements:
             return ast_internal_classes.Name_Node(name=replacements[func_name])
         else:
+
+            if self.IMPLEMENTATIONS_AST[func_name].has_transformation():
+                self._transformations_to_run.add(self.IMPLEMENTATIONS_AST[func_name].Transformation)
+
             return ast_internal_classes.Name_Node(name=self.IMPLEMENTATIONS_AST[func_name].replaced_name(func_name))
 
     def replace_function_reference(self, name: ast_internal_classes.Name_Node, args: ast_internal_classes.Arg_List_Node, line):
@@ -98,13 +258,11 @@ def replace_function_reference(self, name: ast_internal_classes.Name_Node, args:
             "exp": "DOUBLE",
             "__dace_epsilon": "DOUBLE",
             "tanh": "DOUBLE",
-            "__dace_sum": "DOUBLE",
             "__dace_sign": "DOUBLE",
-            "exp": "DOUBLE"
         }
         if name.name in func_types:
             # FIXME: this will be progressively removed
             call_type = func_types[name.name]
             return ast_internal_classes.Call_Expr_Node(name=name, type=call_type, args=args.args, line_number=line)
         else:
-            return self.IMPLEMENTATIONS_DACE[name.name](name.name, args, line).replace()
+            return self.IMPLEMENTATIONS_DACE[name.name].replace(name, args, line)

From 24cc5477ef2985e8c1e09bb9e4db8de82f25935a Mon Sep 17 00:00:00 2001
From: Marcin Copik <mcopik@gmail.com>
Date: Fri, 13 Oct 2023 14:22:40 +0200
Subject: [PATCH 076/163] Support generation of break statement when
 translating from Fortran AST to SDFG

---
 dace/frontend/fortran/ast_internal_classes.py | 4 ++++
 dace/frontend/fortran/fortran_parser.py       | 8 +++++++-
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/dace/frontend/fortran/ast_internal_classes.py b/dace/frontend/fortran/ast_internal_classes.py
index 70a43e21b8..d1e68572de 100644
--- a/dace/frontend/fortran/ast_internal_classes.py
+++ b/dace/frontend/fortran/ast_internal_classes.py
@@ -386,3 +386,7 @@ class Use_Stmt_Node(FNode):
 class Write_Stmt_Node(FNode):
     _attributes = ()
     _fields = ('args', )
+
+class Break_Node(FNode):
+    _attributes = ()
+    _fields = ()
diff --git a/dace/frontend/fortran/fortran_parser.py b/dace/frontend/fortran/fortran_parser.py
index 45883f2070..21f61a171a 100644
--- a/dace/frontend/fortran/fortran_parser.py
+++ b/dace/frontend/fortran/fortran_parser.py
@@ -66,6 +66,7 @@ def __init__(self, ast: ast_components.InternalFortranAst, source: str):
             ast_internal_classes.Program_Node: self.ast2sdfg,
             ast_internal_classes.Write_Stmt_Node: self.write2sdfg,
             ast_internal_classes.Allocate_Stmt_Node: self.allocate2sdfg,
+            ast_internal_classes.Break_Node: self.break2sdfg,
         }
 
     def get_dace_type(self, type):
@@ -295,7 +296,7 @@ def forstmt2sdfg(self, node: ast_internal_classes.For_Stmt_Node, sdfg: SDFG):
         begin_loop_state = sdfg.add_state("BeginLoop" + name)
         end_loop_state = sdfg.add_state("EndLoop" + name)
         self.last_sdfg_states[sdfg] = begin_loop_state
-        self.last_loop_continues[sdfg] = end_loop_state
+        self.last_loop_continues[sdfg] = final_substate
         self.translate(node.body, sdfg)
 
         sdfg.add_edge(self.last_sdfg_states[sdfg], end_loop_state, InterstateEdge())
@@ -1015,6 +1016,11 @@ def vardecl2sdfg(self, node: ast_internal_classes.Var_Decl_Node, sdfg: SDFG):
         if node.name not in self.contexts[sdfg.name].containers:
             self.contexts[sdfg.name].containers.append(node.name)
 
+    def break2sdfg(self, node: ast_internal_classes.Break_Node, sdfg: SDFG):
+
+        self.last_loop_breaks[sdfg] = self.last_sdfg_states[sdfg]
+        sdfg.add_edge(self.last_sdfg_states[sdfg], self.last_loop_continues.get(sdfg), InterstateEdge())
+
 def create_ast_from_string(
     source_string: str,
     sdfg_name: str,

From cbd8bb918a599d2fff283be9e4210b249ad04570 Mon Sep 17 00:00:00 2001
From: Marcin Copik <mcopik@gmail.com>
Date: Fri, 13 Oct 2023 14:37:36 +0200
Subject: [PATCH 077/163] Implement basic handling of the any intrinsic of
 Fortran

---
 dace/frontend/fortran/ast_transforms.py |   6 +-
 dace/frontend/fortran/intrinsics.py     | 164 +++++++++++++++++++++++-
 tests/fortran/intrinsic_any.py          |  54 ++++++++
 3 files changed, 218 insertions(+), 6 deletions(-)
 create mode 100644 tests/fortran/intrinsic_any.py

diff --git a/dace/frontend/fortran/ast_transforms.py b/dace/frontend/fortran/ast_transforms.py
index 630021e02c..1990597d2a 100644
--- a/dace/frontend/fortran/ast_transforms.py
+++ b/dace/frontend/fortran/ast_transforms.py
@@ -181,9 +181,11 @@ def __init__(self, funcs=None):
         if funcs is None:
             funcs = []
         self.funcs = funcs
+
+        from dace.frontend.fortran.intrinsics import FortranIntrinsics
         self.excepted_funcs = [
-            "malloc", "exp", "pow", "sqrt", "cbrt", "max", "abs", "min", "__dace_sum", "__dace_sign", "tanh",
-            "__dace_epsilon"
+            "malloc", "exp", "pow", "sqrt", "cbrt", "max", "abs", "min", "__dace_sign", "tanh",
+            "__dace_epsilon", *FortranIntrinsics.function_names()
         ]
 
     def visit_Call_Expr_Node(self, node: ast_internal_classes.Call_Expr_Node):
diff --git a/dace/frontend/fortran/intrinsics.py b/dace/frontend/fortran/intrinsics.py
index f61586325c..5c4f2368c5 100644
--- a/dace/frontend/fortran/intrinsics.py
+++ b/dace/frontend/fortran/intrinsics.py
@@ -1,5 +1,6 @@
 
 from abc import abstractmethod
+import copy
 import math
 from typing import Any, List, Set, Type
 
@@ -58,15 +59,18 @@ class LoopBasedReplacement:
     @staticmethod
     def replaced_name(func_name: str) -> str:
         replacements = {
-            "SUM": "__dace_sum"
+            "SUM": "__dace_sum",
+            "ANY": "__dace_any"
         }
         return replacements[func_name]
 
     @staticmethod
     def replace(func_name: ast_internal_classes.Name_Node, args: ast_internal_classes.Arg_List_Node, line) -> ast_internal_classes.FNode:
         func_types = {
-            "__dace_sum": "DOUBLE"
+            "__dace_sum": "DOUBLE",
+            "__dace_any": "DOUBLE"
         }
+        # FIXME: Any requires sometimes returning an array of booleans
         call_type = func_types[func_name.name]
         return ast_internal_classes.Call_Expr_Node(name=func_name, type=call_type, args=args.args, line_number=line)
 
@@ -199,18 +203,166 @@ def visit_Execution_Part_Node(self, node: ast_internal_classes.Execution_Part_No
                     newbody.append(self.visit(child))
             return ast_internal_classes.Execution_Part_Node(execution=newbody)
 
+class Any(LoopBasedReplacement):
+
+    class AnyLoopNodeLister(NodeVisitor):
+        """
+        Finds all sum operations that have to be transformed to loops in the AST
+        """
+        def __init__(self):
+            self.nodes: List[ast_internal_classes.FNode] = []
+
+        def visit_BinOp_Node(self, node: ast_internal_classes.BinOp_Node):
+
+            if isinstance(node.rval, ast_internal_classes.Call_Expr_Node):
+                if node.rval.name.name == "__dace_any":
+                    self.nodes.append(node)
+
+        def visit_Execution_Part_Node(self, node: ast_internal_classes.Execution_Part_Node):
+            return
+
+    class Transformation(NodeTransformer):
+
+        """
+        Transforms the AST by removing array sums and replacing them with loops
+        """
+        def __init__(self, ast):
+            self.count = 0
+            ParentScopeAssigner().visit(ast)
+            self.scope_vars = ScopeVarsDeclarations()
+            self.scope_vars.visit(ast)
+
+        def visit_Execution_Part_Node(self, node: ast_internal_classes.Execution_Part_Node):
+            newbody = []
+            for child in node.execution:
+                lister = Any.AnyLoopNodeLister()
+                lister.visit(child)
+                res = lister.nodes
+                print(res)
+                if res is not None and len(res) > 0:
+
+                    current = child.lval
+                    val = child.rval
+
+                    rvals = []
+                    for i in mywalk(val):
+                        if isinstance(i, ast_internal_classes.Call_Expr_Node) and i.name.name == '__dace_any':
+
+                            for arg in i.args:
+
+                                # supports syntax SUM(arr)
+                                if isinstance(arg, ast_internal_classes.Name_Node):
+                                    array_node = ast_internal_classes.Array_Subscript_Node(parent=arg.parent)
+                                    array_node.name = arg
+
+                                    # If we access SUM(arr) where arr has many dimensions,
+                                    # We need to create a ParDecl_Node for each dimension
+                                    dims = len(self.scope_vars.get_var(node.parent, arg.name).sizes)
+                                    array_node.indices = [ast_internal_classes.ParDecl_Node(type='ALL')] * dims
+
+                                    rvals.append(array_node)
+
+                                # supports syntax SUM(arr(:))
+                                if isinstance(arg, ast_internal_classes.Array_Subscript_Node):
+                                    rvals.append(arg)
+
+                    if len(rvals) != 1:
+                        raise NotImplementedError("Only one array can be summed")
+                    val = rvals[0]
+                    rangeposrval = []
+                    rangesrval = []
+
+                    par_Decl_Range_Finder(val, rangesrval, rangeposrval, self.count, newbody, self.scope_vars, True)
+
+                    # Initialize the result variable
+                    newbody.append(
+                        ast_internal_classes.BinOp_Node(
+                            lval=current,
+                            op="=",
+                            rval=ast_internal_classes.Int_Literal_Node(value="0"),
+                            line_number=child.line_number
+                        )
+                    )
+                    range_index = 0
+
+                    # Here begins the specialized implementation
+                    cond = ast_internal_classes.BinOp_Node(op="==",
+                                                        rval=ast_internal_classes.Int_Literal_Node(value="1"),
+                                                        lval=copy.deepcopy(val),
+                                                        line_number=child.line_number)
+                    body_if = ast_internal_classes.Execution_Part_Node(execution=[
+                        ast_internal_classes.BinOp_Node(
+                            lval=copy.deepcopy(current),
+                            op="=",
+                            rval=ast_internal_classes.Int_Literal_Node(value="1"),
+                            line_number=child.line_number
+                        ),
+                        # TODO: we should make the `break` generation conditional based on the architecture
+                        # For parallel maps, we should have no breaks
+                        # For sequential loop, we want a break to be faster
+                        #ast_internal_classes.Break_Node(
+                        #    line_number=child.line_number
+                        #)
+                    ])
+                    body = ast_internal_classes.If_Stmt_Node(
+                        cond=cond,
+                        body=body_if,
+                        body_else=ast_internal_classes.Execution_Part_Node(execution=[]),
+                        line_number=child.line_number
+                    )
+                    # Here ends the specialized implementation
+
+                    for i in rangesrval:
+                        initrange = i[0]
+                        finalrange = i[1]
+                        init = ast_internal_classes.BinOp_Node(
+                            lval=ast_internal_classes.Name_Node(name="tmp_parfor_" + str(self.count + range_index)),
+                            op="=",
+                            rval=initrange,
+                            line_number=child.line_number)
+                        cond = ast_internal_classes.BinOp_Node(
+                            lval=ast_internal_classes.Name_Node(name="tmp_parfor_" + str(self.count + range_index)),
+                            op="<=",
+                            rval=finalrange,
+                            line_number=child.line_number)
+                        iter = ast_internal_classes.BinOp_Node(
+                            lval=ast_internal_classes.Name_Node(name="tmp_parfor_" + str(self.count + range_index)),
+                            op="=",
+                            rval=ast_internal_classes.BinOp_Node(
+                                lval=ast_internal_classes.Name_Node(name="tmp_parfor_" + str(self.count + range_index)),
+                                op="+",
+                                rval=ast_internal_classes.Int_Literal_Node(value="1")),
+                            line_number=child.line_number)
+                        current_for = ast_internal_classes.Map_Stmt_Node(
+                            init=init,
+                            cond=cond,
+                            iter=iter,
+                            body=ast_internal_classes.Execution_Part_Node(execution=[body]),
+                            line_number=child.line_number)
+                        body = current_for
+                        range_index += 1
+
+                    newbody.append(body)
+
+                    self.count = self.count + range_index
+                else:
+                    newbody.append(self.visit(child))
+            return ast_internal_classes.Execution_Part_Node(execution=newbody)
+
 class FortranIntrinsics:
 
     IMPLEMENTATIONS_AST = {
         "SELECTED_INT_KIND": SelectedKind,
         "SELECTED_REAL_KIND": SelectedKind,
-        "SUM": Sum
+        "SUM": Sum,
+        "ANY": Any
     }
 
     IMPLEMENTATIONS_DACE = {
         "__dace_selected_int_kind": SelectedKind,
         "__dace_selected_real_kind": SelectedKind,
-        "__dace_sum": Sum
+        "__dace_sum": Sum,
+        "__dace_any": Any
     }
 
     def __init__(self):
@@ -219,6 +371,10 @@ def __init__(self):
     def transformations(self) -> Set[Type[NodeTransformer]]:
         return self._transformations_to_run
 
+    @staticmethod
+    def function_names() -> List[str]:
+        return list(FortranIntrinsics.IMPLEMENTATIONS_DACE.keys())
+
     def replace_function_name(self, node: FASTNode) -> ast_internal_classes.Name_Node:
 
         func_name = node.string
diff --git a/tests/fortran/intrinsic_any.py b/tests/fortran/intrinsic_any.py
new file mode 100644
index 0000000000..310f80855e
--- /dev/null
+++ b/tests/fortran/intrinsic_any.py
@@ -0,0 +1,54 @@
+# Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved.
+
+import numpy as np
+
+from dace.frontend.fortran import ast_transforms, fortran_parser
+
+def test_fortran_frontend_any_array():
+    """
+    Tests that the generated array map correctly handles offsets.
+    """
+    test_string = """
+                    PROGRAM intrinsic_any_test
+                    implicit none
+                    logical, dimension(5) :: d
+                    logical, dimension(2) :: res
+                    CALL intrinsic_any_test_function(d, res)
+                    end
+
+                    SUBROUTINE intrinsic_any_test_function(d, res)
+                    logical, dimension(5) :: d
+                    logical, dimension(2) :: res
+
+                    res(1) = ANY(d)
+
+                    !res(1) = ANY(d == .True.)
+                    !d(3) = .False.
+                    !res(2) = ANY(d == .True.)
+
+                    !res(1) = ANY(d == e)
+                    !d(3) = .False.
+                    !res(2) = ANY(d == 
+
+                    END SUBROUTINE intrinsic_any_test_function
+                    """
+
+    sdfg = fortran_parser.create_sdfg_from_string(test_string, "intrinsic_any_test", False)
+    sdfg.simplify(verbose=True)
+    sdfg.compile()
+
+    size = 5
+    d = np.full([size], False, order="F", dtype=np.int32)
+    res = np.full([2], 42, order="F", dtype=np.int32)
+
+    d[2] = True
+    sdfg(d=d, res=res)
+    assert res[0] == True
+
+    d[2] = False
+    sdfg(d=d, res=res)
+    assert res[0] == False
+
+if __name__ == "__main__":
+
+    test_fortran_frontend_any_array()

From 2124e4ab559de350a149ac366d2a87e6e3196018 Mon Sep 17 00:00:00 2001
From: Marcin Copik <mcopik@gmail.com>
Date: Fri, 13 Oct 2023 14:43:37 +0200
Subject: [PATCH 078/163] Explicitly not support the DIM parameter of Fortran
 ANY

---
 dace/frontend/fortran/intrinsics.py | 29 +++++++++++++++--------------
 tests/fortran/intrinsic_any.py      | 28 +++++++++++++++++++++++++++-
 2 files changed, 42 insertions(+), 15 deletions(-)

diff --git a/dace/frontend/fortran/intrinsics.py b/dace/frontend/fortran/intrinsics.py
index 5c4f2368c5..fad9ba175c 100644
--- a/dace/frontend/fortran/intrinsics.py
+++ b/dace/frontend/fortran/intrinsics.py
@@ -238,7 +238,6 @@ def visit_Execution_Part_Node(self, node: ast_internal_classes.Execution_Part_No
                 lister = Any.AnyLoopNodeLister()
                 lister.visit(child)
                 res = lister.nodes
-                print(res)
                 if res is not None and len(res) > 0:
 
                     current = child.lval
@@ -248,23 +247,25 @@ def visit_Execution_Part_Node(self, node: ast_internal_classes.Execution_Part_No
                     for i in mywalk(val):
                         if isinstance(i, ast_internal_classes.Call_Expr_Node) and i.name.name == '__dace_any':
 
-                            for arg in i.args:
+                            if len(i.args) > 1:
+                                raise NotImplementedError("Fortran ANY with the DIM parameter is not supported!")
+                            arg = i.args[0]
 
-                                # supports syntax SUM(arr)
-                                if isinstance(arg, ast_internal_classes.Name_Node):
-                                    array_node = ast_internal_classes.Array_Subscript_Node(parent=arg.parent)
-                                    array_node.name = arg
+                            # supports syntax SUM(arr)
+                            if isinstance(arg, ast_internal_classes.Name_Node):
+                                array_node = ast_internal_classes.Array_Subscript_Node(parent=arg.parent)
+                                array_node.name = arg
 
-                                    # If we access SUM(arr) where arr has many dimensions,
-                                    # We need to create a ParDecl_Node for each dimension
-                                    dims = len(self.scope_vars.get_var(node.parent, arg.name).sizes)
-                                    array_node.indices = [ast_internal_classes.ParDecl_Node(type='ALL')] * dims
+                                # If we access SUM(arr) where arr has many dimensions,
+                                # We need to create a ParDecl_Node for each dimension
+                                dims = len(self.scope_vars.get_var(node.parent, arg.name).sizes)
+                                array_node.indices = [ast_internal_classes.ParDecl_Node(type='ALL')] * dims
 
-                                    rvals.append(array_node)
+                                rvals.append(array_node)
 
-                                # supports syntax SUM(arr(:))
-                                if isinstance(arg, ast_internal_classes.Array_Subscript_Node):
-                                    rvals.append(arg)
+                            # supports syntax SUM(arr(:))
+                            if isinstance(arg, ast_internal_classes.Array_Subscript_Node):
+                                rvals.append(arg)
 
                     if len(rvals) != 1:
                         raise NotImplementedError("Only one array can be summed")
diff --git a/tests/fortran/intrinsic_any.py b/tests/fortran/intrinsic_any.py
index 310f80855e..81574c7776 100644
--- a/tests/fortran/intrinsic_any.py
+++ b/tests/fortran/intrinsic_any.py
@@ -1,8 +1,9 @@
 # Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved.
 
 import numpy as np
+import pytest
 
-from dace.frontend.fortran import ast_transforms, fortran_parser
+from dace.frontend.fortran import fortran_parser
 
 def test_fortran_frontend_any_array():
     """
@@ -49,6 +50,31 @@ def test_fortran_frontend_any_array():
     sdfg(d=d, res=res)
     assert res[0] == False
 
+def test_fortran_frontend_any_array_dim():
+    """
+    Tests that the generated array map correctly handles offsets.
+    """
+    test_string = """
+                    PROGRAM intrinsic_any_test
+                    implicit none
+                    logical, dimension(5) :: d
+                    logical, dimension(2) :: res
+                    CALL intrinsic_any_test_function(d, res)
+                    end
+
+                    SUBROUTINE intrinsic_any_test_function(d, res)
+                    logical, dimension(5) :: d
+                    logical, dimension(2) :: res
+
+                    res(1) = ANY(d, 1)
+
+                    END SUBROUTINE intrinsic_any_test_function
+                    """
+
+    with pytest.raises(NotImplementedError):
+        sdfg = fortran_parser.create_sdfg_from_string(test_string, "intrinsic_any_test", False)
+
 if __name__ == "__main__":
 
     test_fortran_frontend_any_array()
+    test_fortran_frontend_any_array_dim()

From da90d7d482dabb3238af0fe5afa93469385e33b2 Mon Sep 17 00:00:00 2001
From: Marcin Copik <mcopik@gmail.com>
Date: Fri, 13 Oct 2023 17:00:12 +0200
Subject: [PATCH 079/163] Extend pardecl_node parser to compute array range
 size

---
 dace/frontend/fortran/ast_transforms.py | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/dace/frontend/fortran/ast_transforms.py b/dace/frontend/fortran/ast_transforms.py
index 1990597d2a..58146563da 100644
--- a/dace/frontend/fortran/ast_transforms.py
+++ b/dace/frontend/fortran/ast_transforms.py
@@ -706,6 +706,7 @@ def visit_Execution_Part_Node(self, node: ast_internal_classes.Execution_Part_No
 def par_Decl_Range_Finder(node: ast_internal_classes.Array_Subscript_Node,
                           ranges: list,
                           rangepos: list,
+                          rangeslen: list,
                           count: int,
                           newbody: list,
                           scope_vars: ScopeVarsDeclarations,
@@ -714,6 +715,7 @@ def par_Decl_Range_Finder(node: ast_internal_classes.Array_Subscript_Node,
     Helper function for the transformation of array operations and sums to loops
     :param node: The AST to be transformed
     :param ranges: The ranges of the loop
+    :param rangeslength: The length of ranges of the loop
     :param rangepos: The positions of the ranges
     :param count: The current count of the loop
     :param newbody: The new basic block that will contain the loop
@@ -758,9 +760,24 @@ def par_Decl_Range_Finder(node: ast_internal_classes.Array_Subscript_Node,
                         rval=ast_internal_classes.Int_Literal_Node(value="1")
                     )
                 ranges.append([lower_boundary, upper_boundary])
+                rangeslen.append(-1)
 
             else:
                 ranges.append([i.range[0], i.range[1]])
+
+                start = 0
+                if isinstance(i.range[0], ast_internal_classes.Int_Literal_Node):
+                    start = int(i.range[0].value)
+                else:
+                    start = i.range[0]
+
+                end = 0
+                if isinstance(i.range[1], ast_internal_classes.Int_Literal_Node):
+                    end = int(i.range[1].value)
+                else:
+                    end = i.range[1]
+
+                rangeslen.append(end - start + 1)
             rangepos.append(currentindex)
             if declaration:
                 newbody.append(
@@ -800,7 +817,7 @@ def visit_Execution_Part_Node(self, node: ast_internal_classes.Execution_Part_No
                 val = child.rval
                 ranges = []
                 rangepos = []
-                par_Decl_Range_Finder(current, ranges, rangepos, self.count, newbody, self.scope_vars, True)
+                par_Decl_Range_Finder(current, ranges, rangepos, [], self.count, newbody, self.scope_vars, True)
 
                 if res_range is not None and len(res_range) > 0:
                     rvals = [i for i in mywalk(val) if isinstance(i, ast_internal_classes.Array_Subscript_Node)]
@@ -808,7 +825,7 @@ def visit_Execution_Part_Node(self, node: ast_internal_classes.Execution_Part_No
                         rangeposrval = []
                         rangesrval = []
 
-                        par_Decl_Range_Finder(i, rangesrval, rangeposrval, self.count, newbody, self.scope_vars, False)
+                        par_Decl_Range_Finder(i, rangesrval, rangeposrval, [], self.count, newbody, self.scope_vars, False)
 
                         for i, j in zip(ranges, rangesrval):
                             if i != j:

From 514df46ea3efd60745e9705fc0fcd6969eb00474 Mon Sep 17 00:00:00 2001
From: Marcin Copik <mcopik@gmail.com>
Date: Fri, 13 Oct 2023 17:03:21 +0200
Subject: [PATCH 080/163] Implement Fortran intrinsic ANY for binary operation

---
 dace/frontend/fortran/intrinsics.py | 93 +++++++++++++++++++++--------
 tests/fortran/intrinsic_any.py      | 55 ++++++++++++++++-
 2 files changed, 121 insertions(+), 27 deletions(-)

diff --git a/dace/frontend/fortran/intrinsics.py b/dace/frontend/fortran/intrinsics.py
index fad9ba175c..046cbdbdff 100644
--- a/dace/frontend/fortran/intrinsics.py
+++ b/dace/frontend/fortran/intrinsics.py
@@ -146,7 +146,7 @@ def visit_Execution_Part_Node(self, node: ast_internal_classes.Execution_Part_No
                     rangeposrval = []
                     rangesrval = []
 
-                    par_Decl_Range_Finder(val, rangesrval, rangeposrval, self.count, newbody, self.scope_vars, True)
+                    par_Decl_Range_Finder(val, rangesrval, rangeposrval, [], self.count, newbody, self.scope_vars, True)
 
                     # Initialize the result variable
                     newbody.append(
@@ -232,6 +232,24 @@ def __init__(self, ast):
             self.scope_vars = ScopeVarsDeclarations()
             self.scope_vars.visit(ast)
 
+        def _parse_array(self, node: ast_internal_classes.Execution_Part_Node, arg: ast_internal_classes.FNode) -> ast_internal_classes.Array_Subscript_Node:
+
+            # supports syntax ANY(arr)
+            if isinstance(arg, ast_internal_classes.Name_Node):
+                array_node = ast_internal_classes.Array_Subscript_Node(parent=arg.parent)
+                array_node.name = arg
+
+                # If we access SUM(arr) where arr has many dimensions,
+                # We need to create a ParDecl_Node for each dimension
+                dims = len(self.scope_vars.get_var(node.parent, arg.name).sizes)
+                array_node.indices = [ast_internal_classes.ParDecl_Node(type='ALL')] * dims
+
+                return array_node
+
+            # supports syntax ANY(arr(:))
+            if isinstance(arg, ast_internal_classes.Array_Subscript_Node):
+                return arg
+
         def visit_Execution_Part_Node(self, node: ast_internal_classes.Execution_Part_Node):
             newbody = []
             for child in node.execution:
@@ -244,6 +262,7 @@ def visit_Execution_Part_Node(self, node: ast_internal_classes.Execution_Part_No
                     val = child.rval
 
                     rvals = []
+                    rangesrval = []
                     for i in mywalk(val):
                         if isinstance(i, ast_internal_classes.Call_Expr_Node) and i.name.name == '__dace_any':
 
@@ -251,29 +270,55 @@ def visit_Execution_Part_Node(self, node: ast_internal_classes.Execution_Part_No
                                 raise NotImplementedError("Fortran ANY with the DIM parameter is not supported!")
                             arg = i.args[0]
 
-                            # supports syntax SUM(arr)
-                            if isinstance(arg, ast_internal_classes.Name_Node):
-                                array_node = ast_internal_classes.Array_Subscript_Node(parent=arg.parent)
-                                array_node.name = arg
-
-                                # If we access SUM(arr) where arr has many dimensions,
-                                # We need to create a ParDecl_Node for each dimension
-                                dims = len(self.scope_vars.get_var(node.parent, arg.name).sizes)
-                                array_node.indices = [ast_internal_classes.ParDecl_Node(type='ALL')] * dims
-
+                            array_node = self._parse_array(node, arg)
+                            if array_node is not None:
                                 rvals.append(array_node)
 
-                            # supports syntax SUM(arr(:))
-                            if isinstance(arg, ast_internal_classes.Array_Subscript_Node):
-                                rvals.append(arg)
-
-                    if len(rvals) != 1:
-                        raise NotImplementedError("Only one array can be summed")
-                    val = rvals[0]
-                    rangeposrval = []
-                    rangesrval = []
-
-                    par_Decl_Range_Finder(val, rangesrval, rangeposrval, self.count, newbody, self.scope_vars, True)
+                                if len(rvals) != 1:
+                                    raise NotImplementedError("Only one array can be summed")
+                                val = rvals[0]
+                                rangeposrval = []
+
+                                par_Decl_Range_Finder(val, rangesrval, rangeposrval, [], self.count, newbody, self.scope_vars, True)
+                                cond = ast_internal_classes.BinOp_Node(op="==",
+                                                                    rval=ast_internal_classes.Int_Literal_Node(value="1"),
+                                                                    lval=copy.deepcopy(val),
+                                                                    line_number=child.line_number)
+                            else:
+
+                                # supports syntax ANY(logical op)
+                                # the logical op can be:
+                                #
+                                # (1) arr1 op arr2
+                                # where arr1 and arr2 are name node or array subscript node
+                                # there, we need to extract shape and verify they are the same
+                                #
+                                # (2) arr1 op scalar
+                                # there, we ignore the scalar because it's not an array
+                                if isinstance(arg, ast_internal_classes.BinOp_Node):
+
+                                    left_side_arr  = self._parse_array(node, arg.lval)
+                                    right_side_arr  = self._parse_array(node, arg.rval)
+                                    if len(left_side_arr.indices) != len(right_side_arr.indices):
+                                        raise TypeError("Can't parse Fortran ANY with different array ranks!")
+
+                                    for left_idx, right_idx in zip(left_side_arr.indices, right_side_arr.indices):
+                                        if left_idx.type != right_idx.type:
+                                            raise TypeError("Can't parse Fortran ANY with different array ranks!")
+
+                                    rangeposrval = []
+                                    rangesrval_right = []
+                                    rangeslen_left = []
+                                    rangeslen_right = []
+                                    par_Decl_Range_Finder(left_side_arr, rangesrval, rangeposrval, rangeslen_left, self.count, newbody, self.scope_vars, True)
+                                    rangeposrval = []
+                                    par_Decl_Range_Finder(right_side_arr, rangesrval_right, rangeposrval, rangeslen_right, self.count, newbody, self.scope_vars, True)
+                                    val = arg
+
+                                    # Now, we need to convert the array to a proper subscript node
+                                    cond = copy.deepcopy(val)
+                                    cond.lval = left_side_arr
+                                    cond.rval = right_side_arr
 
                     # Initialize the result variable
                     newbody.append(
@@ -287,10 +332,6 @@ def visit_Execution_Part_Node(self, node: ast_internal_classes.Execution_Part_No
                     range_index = 0
 
                     # Here begins the specialized implementation
-                    cond = ast_internal_classes.BinOp_Node(op="==",
-                                                        rval=ast_internal_classes.Int_Literal_Node(value="1"),
-                                                        lval=copy.deepcopy(val),
-                                                        line_number=child.line_number)
                     body_if = ast_internal_classes.Execution_Part_Node(execution=[
                         ast_internal_classes.BinOp_Node(
                             lval=copy.deepcopy(current),
diff --git a/tests/fortran/intrinsic_any.py b/tests/fortran/intrinsic_any.py
index 81574c7776..e3648e22b2 100644
--- a/tests/fortran/intrinsic_any.py
+++ b/tests/fortran/intrinsic_any.py
@@ -5,6 +5,7 @@
 
 from dace.frontend.fortran import fortran_parser
 
+
 def test_fortran_frontend_any_array():
     """
     Tests that the generated array map correctly handles offsets.
@@ -50,6 +51,7 @@ def test_fortran_frontend_any_array():
     sdfg(d=d, res=res)
     assert res[0] == False
 
+
 def test_fortran_frontend_any_array_dim():
     """
     Tests that the generated array map correctly handles offsets.
@@ -72,9 +74,60 @@ def test_fortran_frontend_any_array_dim():
                     """
 
     with pytest.raises(NotImplementedError):
-        sdfg = fortran_parser.create_sdfg_from_string(test_string, "intrinsic_any_test", False)
+        fortran_parser.create_sdfg_from_string(test_string, "intrinsic_any_test", False)
+
+
+def test_fortran_frontend_any_array_comparison():
+    """
+    Tests that the generated array map correctly handles offsets.
+    """
+    test_string = """
+                    PROGRAM intrinsic_any_test
+                    implicit none
+                    integer, dimension(5) :: first
+                    integer, dimension(5) :: second
+                    logical, dimension(6) :: res
+                    CALL intrinsic_any_test_function(first, second, res)
+                    end
+
+                    SUBROUTINE intrinsic_any_test_function(first, second, res)
+                    integer, dimension(5) :: first
+                    integer, dimension(5) :: second
+                    logical, dimension(6) :: res
+
+                    res(1) = ANY(first .eq. second)
+                    !res(2) = ANY(first(:) .eq. second)
+                    !res(3) = ANY(first .eq. second(:))
+                    !res(4) = ANY(first(:) .eq. second(:))
+                    !res(5) = any(first(1:5) .eq. second(1:5))
+                    !res(6) = any(first(1:3) .eq. second(3:5))
+
+                    END SUBROUTINE intrinsic_any_test_function
+                    """
+
+    sdfg = fortran_parser.create_sdfg_from_string(test_string, "intrinsic_any_test", False)
+    sdfg.simplify(verbose=True)
+    sdfg.compile()
+
+    size = 5
+    first = np.full([size], 1, order="F", dtype=np.int32)
+    second = np.full([size], 2, order="F", dtype=np.int32)
+    second[3] = 1
+    res = np.full([6], 1, order="F", dtype=np.int32)
+
+    sdfg(first=first, second=second, res=res)
+    for val in res:
+        assert val == True
+
+    second = np.full([size], 2, order="F", dtype=np.int32)
+    res = np.full([6], 0, order="F", dtype=np.int32)
+    sdfg(first=first, second=second, res=res)
+    for val in res:
+        assert val == False
+
 
 if __name__ == "__main__":
 
     test_fortran_frontend_any_array()
     test_fortran_frontend_any_array_dim()
+    test_fortran_frontend_any_array_comparison()

From 4156ff50ebd3d7ba97f27b9f8586b6058643f135 Mon Sep 17 00:00:00 2001
From: Marcin Copik <mcopik@gmail.com>
Date: Fri, 13 Oct 2023 17:29:06 +0200
Subject: [PATCH 081/163] Support Fortran ANY with different array ranges

---
 dace/frontend/fortran/intrinsics.py | 26 ++++++++++++++++++++++++++
 tests/fortran/intrinsic_any.py      | 24 ++++++++++++++----------
 2 files changed, 40 insertions(+), 10 deletions(-)

diff --git a/dace/frontend/fortran/intrinsics.py b/dace/frontend/fortran/intrinsics.py
index 046cbdbdff..06b094c33e 100644
--- a/dace/frontend/fortran/intrinsics.py
+++ b/dace/frontend/fortran/intrinsics.py
@@ -315,6 +315,32 @@ def visit_Execution_Part_Node(self, node: ast_internal_classes.Execution_Part_No
                                     par_Decl_Range_Finder(right_side_arr, rangesrval_right, rangeposrval, rangeslen_right, self.count, newbody, self.scope_vars, True)
                                     val = arg
 
+                                    for left_len, right_len in zip(rangeslen_left, rangeslen_right):
+                                        if left_len != right_len:
+                                            raise TypeError("Can't support Fortran ANY with different array ranks!")
+
+                                    # Now, the loop will be dictated by the left array
+                                    # If the access pattern on the right array is different, we need to shfit it - for every dimension.
+                                    # For example, we can have arr(1:3) == arr2(3:5)
+                                    # Then, loop_idx is from 1 to 3
+                                    # arr becomes arr[loop_idx]
+                                    # but arr2 must be arr2[loop_idx + 2]
+                                    for i in range(len(right_side_arr.indices)):
+
+                                        idx_var = right_side_arr.indices[i]
+                                        start_loop = rangesrval[i][0]
+                                        end_loop = rangesrval_right[i][0]
+
+                                        difference = int(end_loop.value) - int(start_loop.value) + 1
+                                        if difference != 0:
+                                            new_index = ast_internal_classes.BinOp_Node(
+                                                lval=idx_var,
+                                                op="+",
+                                                rval=ast_internal_classes.Int_Literal_Node(value=str(difference)),
+                                                line_number=child.line_number
+                                            )
+                                            right_side_arr.indices[i] = new_index
+
                                     # Now, we need to convert the array to a proper subscript node
                                     cond = copy.deepcopy(val)
                                     cond.lval = left_side_arr
diff --git a/tests/fortran/intrinsic_any.py b/tests/fortran/intrinsic_any.py
index e3648e22b2..80b74363b3 100644
--- a/tests/fortran/intrinsic_any.py
+++ b/tests/fortran/intrinsic_any.py
@@ -86,21 +86,24 @@ def test_fortran_frontend_any_array_comparison():
                     implicit none
                     integer, dimension(5) :: first
                     integer, dimension(5) :: second
-                    logical, dimension(6) :: res
+                    logical, dimension(7) :: res
                     CALL intrinsic_any_test_function(first, second, res)
                     end
 
                     SUBROUTINE intrinsic_any_test_function(first, second, res)
                     integer, dimension(5) :: first
                     integer, dimension(5) :: second
-                    logical, dimension(6) :: res
+                    logical, dimension(7) :: res
 
                     res(1) = ANY(first .eq. second)
-                    !res(2) = ANY(first(:) .eq. second)
-                    !res(3) = ANY(first .eq. second(:))
-                    !res(4) = ANY(first(:) .eq. second(:))
-                    !res(5) = any(first(1:5) .eq. second(1:5))
-                    !res(6) = any(first(1:3) .eq. second(3:5))
+                    res(2) = ANY(first(:) .eq. second)
+                    res(3) = ANY(first .eq. second(:))
+                    res(4) = ANY(first(:) .eq. second(:))
+                    res(5) = any(first(1:5) .eq. second(1:5))
+                    ! This will also be true - the only same
+                    ! element is at position 3.
+                    res(6) = any(first(1:3) .eq. second(3:5))
+                    res(7) = any(first(1:2) .eq. second(4:5))
 
                     END SUBROUTINE intrinsic_any_test_function
                     """
@@ -113,14 +116,15 @@ def test_fortran_frontend_any_array_comparison():
     first = np.full([size], 1, order="F", dtype=np.int32)
     second = np.full([size], 2, order="F", dtype=np.int32)
     second[3] = 1
-    res = np.full([6], 1, order="F", dtype=np.int32)
+    res = np.full([7], 0, order="F", dtype=np.int32)
 
     sdfg(first=first, second=second, res=res)
-    for val in res:
+    for val in res[0:-1]:
         assert val == True
+    assert res[-1] == False
 
     second = np.full([size], 2, order="F", dtype=np.int32)
-    res = np.full([6], 0, order="F", dtype=np.int32)
+    res = np.full([7], 0, order="F", dtype=np.int32)
     sdfg(first=first, second=second, res=res)
     for val in res:
         assert val == False

From 0873a3fb31c059f6fa44ecf20dca0d236b46d22c Mon Sep 17 00:00:00 2001
From: Marcin Copik <mcopik@gmail.com>
Date: Fri, 13 Oct 2023 18:01:13 +0200
Subject: [PATCH 082/163] Support comparison against a scalar value in Fortran
 ANY

---
 dace/frontend/fortran/intrinsics.py | 36 ++++++++++++++---
 tests/fortran/intrinsic_any.py      | 61 +++++++++++++++++++++++++++--
 2 files changed, 89 insertions(+), 8 deletions(-)

diff --git a/dace/frontend/fortran/intrinsics.py b/dace/frontend/fortran/intrinsics.py
index 06b094c33e..991b4caeff 100644
--- a/dace/frontend/fortran/intrinsics.py
+++ b/dace/frontend/fortran/intrinsics.py
@@ -299,6 +299,37 @@ def visit_Execution_Part_Node(self, node: ast_internal_classes.Execution_Part_No
 
                                     left_side_arr  = self._parse_array(node, arg.lval)
                                     right_side_arr  = self._parse_array(node, arg.rval)
+                                    has_two_arrays = left_side_arr is not None and right_side_arr is not None
+
+
+                                    if not has_two_arrays:
+
+                                        # if one side of the operator is scalar, then parsing array
+                                        # will return none
+                                        dominant_array = left_side_arr
+                                        if left_side_arr is None:
+                                            dominant_array = right_side_arr
+
+                                        rangeposrval = []
+                                        rangeslen_left = []
+                                        rangeposrval = []
+                                        par_Decl_Range_Finder(dominant_array, rangesrval, rangeposrval, rangeslen_left, self.count, newbody, self.scope_vars, True)
+                                        val = arg
+
+                                        cond = copy.deepcopy(val)
+                                        if left_side_arr is not None:
+                                            cond.lval = dominant_array
+                                        if right_side_arr is not None:
+                                            cond.rval = dominant_array
+
+                                        continue
+
+                                    rangeposrval = []
+                                    rangeslen_left = []
+                                    rangeposrval = []
+                                    par_Decl_Range_Finder(left_side_arr, rangesrval, rangeposrval, rangeslen_left, self.count, newbody, self.scope_vars, True)
+                                    val = arg
+
                                     if len(left_side_arr.indices) != len(right_side_arr.indices):
                                         raise TypeError("Can't parse Fortran ANY with different array ranks!")
 
@@ -306,14 +337,9 @@ def visit_Execution_Part_Node(self, node: ast_internal_classes.Execution_Part_No
                                         if left_idx.type != right_idx.type:
                                             raise TypeError("Can't parse Fortran ANY with different array ranks!")
 
-                                    rangeposrval = []
                                     rangesrval_right = []
-                                    rangeslen_left = []
                                     rangeslen_right = []
-                                    par_Decl_Range_Finder(left_side_arr, rangesrval, rangeposrval, rangeslen_left, self.count, newbody, self.scope_vars, True)
-                                    rangeposrval = []
                                     par_Decl_Range_Finder(right_side_arr, rangesrval_right, rangeposrval, rangeslen_right, self.count, newbody, self.scope_vars, True)
-                                    val = arg
 
                                     for left_len, right_len in zip(rangeslen_left, rangeslen_right):
                                         if left_len != right_len:
diff --git a/tests/fortran/intrinsic_any.py b/tests/fortran/intrinsic_any.py
index 80b74363b3..ae1dab564e 100644
--- a/tests/fortran/intrinsic_any.py
+++ b/tests/fortran/intrinsic_any.py
@@ -129,9 +129,64 @@ def test_fortran_frontend_any_array_comparison():
     for val in res:
         assert val == False
 
+def test_fortran_frontend_any_array_scalar_comparison():
+    """
+    Tests that the generated array map correctly handles offsets.
+    """
+    test_string = """
+                    PROGRAM intrinsic_any_test
+                    implicit none
+                    integer, dimension(5) :: first
+                    logical, dimension(7) :: res
+                    CALL intrinsic_any_test_function(first, res)
+                    end
+
+                    SUBROUTINE intrinsic_any_test_function(first, res)
+                    integer, dimension(5) :: first
+                    logical, dimension(7) :: res
+
+                    res(1) = ANY(first .eq. 42)
+                    res(2) = ANY(first(:) .eq. 42)
+                    res(3) = ANY(first(1:2) .eq. 42)
+                    res(4) = ANY(first(3) .eq. 42)
+                    res(5) = ANY(first(3:5) .eq. 42)
+                    res(6) = ANY(42 .eq. first)
+                    res(7) = ANY(42 .ne. first)
+
+                    END SUBROUTINE intrinsic_any_test_function
+                    """
+
+    sdfg = fortran_parser.create_sdfg_from_string(test_string, "intrinsic_any_test", False)
+    sdfg.save('test.sdfg')
+    sdfg.simplify(verbose=True)
+    sdfg.compile()
+
+    size = 5
+    first = np.full([size], 1, order="F", dtype=np.int32)
+    res = np.full([7], 0, order="F", dtype=np.int32)
+
+    sdfg(first=first, res=res)
+    for val in res[0:-1]:
+        assert val == False
+    assert res[-1] == True
+
+    first[1] = 42
+    sdfg(first=first, res=res)
+    assert list(res) == [1, 1, 1, 0, 0, 1, 1]
+
+    first[1] = 5
+    first[3] = 42
+    sdfg(first=first, res=res)
+    assert list(res) == [1, 1, 0, 0, 1, 1, 1]
+
+    first[3] = 7
+    first[2] = 42
+    sdfg(first=first, res=res)
+    assert list(res) == [1, 1, 0, 1, 1, 1, 1]
 
 if __name__ == "__main__":
 
-    test_fortran_frontend_any_array()
-    test_fortran_frontend_any_array_dim()
-    test_fortran_frontend_any_array_comparison()
+    #test_fortran_frontend_any_array()
+    #test_fortran_frontend_any_array_dim()
+    #test_fortran_frontend_any_array_comparison()
+    test_fortran_frontend_any_array_scalar_comparison()

From cb6190f02464534fccca48a828ddfb81dd618910 Mon Sep 17 00:00:00 2001
From: Marcin Copik <mcopik@gmail.com>
Date: Fri, 13 Oct 2023 18:05:28 +0200
Subject: [PATCH 083/163] Fix regression in generation of array ranges for ANY

---
 dace/frontend/fortran/intrinsics.py | 14 +++++++-------
 tests/fortran/intrinsic_any.py      |  6 +++---
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/dace/frontend/fortran/intrinsics.py b/dace/frontend/fortran/intrinsics.py
index 991b4caeff..6362fe1360 100644
--- a/dace/frontend/fortran/intrinsics.py
+++ b/dace/frontend/fortran/intrinsics.py
@@ -301,7 +301,6 @@ def visit_Execution_Part_Node(self, node: ast_internal_classes.Execution_Part_No
                                     right_side_arr  = self._parse_array(node, arg.rval)
                                     has_two_arrays = left_side_arr is not None and right_side_arr is not None
 
-
                                     if not has_two_arrays:
 
                                         # if one side of the operator is scalar, then parsing array
@@ -324,11 +323,6 @@ def visit_Execution_Part_Node(self, node: ast_internal_classes.Execution_Part_No
 
                                         continue
 
-                                    rangeposrval = []
-                                    rangeslen_left = []
-                                    rangeposrval = []
-                                    par_Decl_Range_Finder(left_side_arr, rangesrval, rangeposrval, rangeslen_left, self.count, newbody, self.scope_vars, True)
-                                    val = arg
 
                                     if len(left_side_arr.indices) != len(right_side_arr.indices):
                                         raise TypeError("Can't parse Fortran ANY with different array ranks!")
@@ -337,9 +331,15 @@ def visit_Execution_Part_Node(self, node: ast_internal_classes.Execution_Part_No
                                         if left_idx.type != right_idx.type:
                                             raise TypeError("Can't parse Fortran ANY with different array ranks!")
 
+                                    rangeposrval = []
+                                    rangeslen_left = []
+                                    rangeposrval = []
+                                    par_Decl_Range_Finder(left_side_arr, rangesrval, rangeposrval, rangeslen_left, self.count, newbody, self.scope_vars, True)
+                                    val = arg
+
                                     rangesrval_right = []
                                     rangeslen_right = []
-                                    par_Decl_Range_Finder(right_side_arr, rangesrval_right, rangeposrval, rangeslen_right, self.count, newbody, self.scope_vars, True)
+                                    par_Decl_Range_Finder(right_side_arr, rangesrval_right, [], rangeslen_right, self.count, newbody, self.scope_vars, True)
 
                                     for left_len, right_len in zip(rangeslen_left, rangeslen_right):
                                         if left_len != right_len:
diff --git a/tests/fortran/intrinsic_any.py b/tests/fortran/intrinsic_any.py
index ae1dab564e..9d742bb388 100644
--- a/tests/fortran/intrinsic_any.py
+++ b/tests/fortran/intrinsic_any.py
@@ -186,7 +186,7 @@ def test_fortran_frontend_any_array_scalar_comparison():
 
 if __name__ == "__main__":
 
-    #test_fortran_frontend_any_array()
-    #test_fortran_frontend_any_array_dim()
-    #test_fortran_frontend_any_array_comparison()
+    test_fortran_frontend_any_array()
+    test_fortran_frontend_any_array_dim()
+    test_fortran_frontend_any_array_comparison()
     test_fortran_frontend_any_array_scalar_comparison()

From e8f701d07961d9f815dcd4d641c70a84698fc7c9 Mon Sep 17 00:00:00 2001
From: Marcin Copik <mcopik@gmail.com>
Date: Fri, 13 Oct 2023 18:08:39 +0200
Subject: [PATCH 084/163] Add test verifying that we do not generate code for
 incorrect Fortran ANY

---
 tests/fortran/intrinsic_any.py | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/tests/fortran/intrinsic_any.py b/tests/fortran/intrinsic_any.py
index 9d742bb388..5ca40c17b2 100644
--- a/tests/fortran/intrinsic_any.py
+++ b/tests/fortran/intrinsic_any.py
@@ -184,9 +184,36 @@ def test_fortran_frontend_any_array_scalar_comparison():
     sdfg(first=first, res=res)
     assert list(res) == [1, 1, 0, 1, 1, 1, 1]
 
+def test_fortran_frontend_any_array_comparison_wrong_subset():
+    """
+    Tests that the generated array map correctly handles offsets.
+    """
+    test_string = """
+                    PROGRAM intrinsic_any_test
+                    implicit none
+                    logical, dimension(5) :: first
+                    logical, dimension(5) :: second
+                    logical, dimension(2) :: res
+                    CALL intrinsic_any_test_function(first, second, res)
+                    end
+
+                    SUBROUTINE intrinsic_any_test_function(first, second, res)
+                    logical, dimension(5) :: first
+                    logical, dimension(5) :: second
+                    logical, dimension(2) :: res
+
+                    res(1) = ANY(first(1:2) .eq. second(2:5))
+
+                    END SUBROUTINE intrinsic_any_test_function
+                    """
+
+    with pytest.raises(TypeError):
+        fortran_parser.create_sdfg_from_string(test_string, "intrinsic_any_test", False)
+
 if __name__ == "__main__":
 
     test_fortran_frontend_any_array()
     test_fortran_frontend_any_array_dim()
     test_fortran_frontend_any_array_comparison()
     test_fortran_frontend_any_array_scalar_comparison()
+    test_fortran_frontend_any_array_comparison_wrong_subset()

From 3b39758c255e68ee362007a8af976dffa9d48857 Mon Sep 17 00:00:00 2001
From: Marcin Copik <mcopik@gmail.com>
Date: Fri, 13 Oct 2023 18:41:52 +0200
Subject: [PATCH 085/163] Fix off-by-one error

---
 dace/frontend/fortran/intrinsics.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dace/frontend/fortran/intrinsics.py b/dace/frontend/fortran/intrinsics.py
index 6362fe1360..e2a24f5ff6 100644
--- a/dace/frontend/fortran/intrinsics.py
+++ b/dace/frontend/fortran/intrinsics.py
@@ -357,7 +357,7 @@ def visit_Execution_Part_Node(self, node: ast_internal_classes.Execution_Part_No
                                         start_loop = rangesrval[i][0]
                                         end_loop = rangesrval_right[i][0]
 
-                                        difference = int(end_loop.value) - int(start_loop.value) + 1
+                                        difference = int(end_loop.value) - int(start_loop.value)
                                         if difference != 0:
                                             new_index = ast_internal_classes.BinOp_Node(
                                                 lval=idx_var,

From 3b5f9f367b2dd281e693b38437c0d6d55b034ec6 Mon Sep 17 00:00:00 2001
From: Marcin Copik <mcopik@gmail.com>
Date: Fri, 13 Oct 2023 19:14:56 +0200
Subject: [PATCH 086/163] Add extensive testing for 2d ANY Fortran

---
 tests/fortran/intrinsic_any.py | 194 +++++++++++++++++++++++++++++----
 1 file changed, 170 insertions(+), 24 deletions(-)

diff --git a/tests/fortran/intrinsic_any.py b/tests/fortran/intrinsic_any.py
index 5ca40c17b2..4bfabfcec0 100644
--- a/tests/fortran/intrinsic_any.py
+++ b/tests/fortran/intrinsic_any.py
@@ -7,9 +7,6 @@
 
 
 def test_fortran_frontend_any_array():
-    """
-    Tests that the generated array map correctly handles offsets.
-    """
     test_string = """
                     PROGRAM intrinsic_any_test
                     implicit none
@@ -24,14 +21,6 @@ def test_fortran_frontend_any_array():
 
                     res(1) = ANY(d)
 
-                    !res(1) = ANY(d == .True.)
-                    !d(3) = .False.
-                    !res(2) = ANY(d == .True.)
-
-                    !res(1) = ANY(d == e)
-                    !d(3) = .False.
-                    !res(2) = ANY(d == 
-
                     END SUBROUTINE intrinsic_any_test_function
                     """
 
@@ -53,9 +42,6 @@ def test_fortran_frontend_any_array():
 
 
 def test_fortran_frontend_any_array_dim():
-    """
-    Tests that the generated array map correctly handles offsets.
-    """
     test_string = """
                     PROGRAM intrinsic_any_test
                     implicit none
@@ -78,9 +64,6 @@ def test_fortran_frontend_any_array_dim():
 
 
 def test_fortran_frontend_any_array_comparison():
-    """
-    Tests that the generated array map correctly handles offsets.
-    """
     test_string = """
                     PROGRAM intrinsic_any_test
                     implicit none
@@ -115,7 +98,7 @@ def test_fortran_frontend_any_array_comparison():
     size = 5
     first = np.full([size], 1, order="F", dtype=np.int32)
     second = np.full([size], 2, order="F", dtype=np.int32)
-    second[3] = 1
+    second[2] = 1
     res = np.full([7], 0, order="F", dtype=np.int32)
 
     sdfg(first=first, second=second, res=res)
@@ -130,9 +113,6 @@ def test_fortran_frontend_any_array_comparison():
         assert val == False
 
 def test_fortran_frontend_any_array_scalar_comparison():
-    """
-    Tests that the generated array map correctly handles offsets.
-    """
     test_string = """
                     PROGRAM intrinsic_any_test
                     implicit none
@@ -185,9 +165,6 @@ def test_fortran_frontend_any_array_scalar_comparison():
     assert list(res) == [1, 1, 0, 1, 1, 1, 1]
 
 def test_fortran_frontend_any_array_comparison_wrong_subset():
-    """
-    Tests that the generated array map correctly handles offsets.
-    """
     test_string = """
                     PROGRAM intrinsic_any_test
                     implicit none
@@ -210,6 +187,171 @@ def test_fortran_frontend_any_array_comparison_wrong_subset():
     with pytest.raises(TypeError):
         fortran_parser.create_sdfg_from_string(test_string, "intrinsic_any_test", False)
 
+def test_fortran_frontend_any_array_2d():
+    test_string = """
+                    PROGRAM intrinsic_any_test
+                    implicit none
+                    logical, dimension(5,7) :: d
+                    logical, dimension(2) :: res
+                    CALL intrinsic_any_test_function(d, res)
+                    end
+
+                    SUBROUTINE intrinsic_any_test_function(d, res)
+                    logical, dimension(5,7) :: d
+                    logical, dimension(2) :: res
+
+                    res(1) = ANY(d)
+
+                    END SUBROUTINE intrinsic_any_test_function
+                    """
+
+    sdfg = fortran_parser.create_sdfg_from_string(test_string, "intrinsic_any_test", False)
+    sdfg.simplify(verbose=True)
+    sdfg.compile()
+
+    sizes = [5, 7]
+    d = np.full(sizes, False, order="F", dtype=np.int32)
+    res = np.full([2], 42, order="F", dtype=np.int32)
+
+    d[2,2] = True
+    sdfg(d=d, res=res)
+    assert res[0] == True
+
+    d[2,2] = False
+    sdfg(d=d, res=res)
+    assert res[0] == False
+
+def test_fortran_frontend_any_array_comparison_2d():
+    test_string = """
+                    PROGRAM intrinsic_any_test
+                    implicit none
+                    integer, dimension(5,4) :: first
+                    integer, dimension(5,4) :: second
+                    logical, dimension(7) :: res
+                    CALL intrinsic_any_test_function(first, second, res)
+                    end
+
+                    SUBROUTINE intrinsic_any_test_function(first, second, res)
+                    integer, dimension(5,4) :: first
+                    integer, dimension(5,4) :: second
+                    logical, dimension(7) :: res
+
+                    res(1) = ANY(first .eq. second)
+                    res(2) = ANY(first(:,:) .eq. second)
+                    res(3) = ANY(first .eq. second(:,:))
+                    res(4) = ANY(first(:,:) .eq. second(:,:))
+                    res(5) = any(first(1:5,:) .eq. second(1:5,:))
+                    res(6) = any(first(:,1:4) .eq. second(:,1:4))
+                    ! Now test subsets.
+                    res(7) = any(first(2:3, 3:4) .eq. second(2:3, 3:4))
+
+                    END SUBROUTINE intrinsic_any_test_function
+                    """
+
+    sdfg = fortran_parser.create_sdfg_from_string(test_string, "intrinsic_any_test", False)
+    sdfg.simplify(verbose=True)
+    sdfg.compile()
+
+    sizes = [5, 4]
+    first = np.full(sizes, 1, order="F", dtype=np.int32)
+    second = np.full(sizes, 2, order="F", dtype=np.int32)
+    second[2,2] = 1
+    res = np.full([7], 0, order="F", dtype=np.int32)
+
+    sdfg(first=first, second=second, res=res)
+    for val in res:
+        assert val == True
+
+    second = np.full(sizes, 2, order="F", dtype=np.int32)
+    res = np.full([7], 0, order="F", dtype=np.int32)
+    sdfg(first=first, second=second, res=res)
+    for val in res:
+        assert val == False
+
+def test_fortran_frontend_any_array_comparison_2d_subset():
+    test_string = """
+                    PROGRAM intrinsic_any_test
+                    implicit none
+                    integer, dimension(5,4) :: first
+                    integer, dimension(5,4) :: second
+                    logical, dimension(2) :: res
+                    CALL intrinsic_any_test_function(first, second, res)
+                    end
+
+                    SUBROUTINE intrinsic_any_test_function(first, second, res)
+                    integer, dimension(5,4) :: first
+                    integer, dimension(5,4) :: second
+                    logical, dimension(2) :: res
+
+                    ! Now test subsets - make sure the equal values are only
+                    ! in the tested area.
+                    res(1) = any(first(1:2, 3:4) .ne. second(4:5, 2:3))
+                    res(2) = any(first(1:2, 3:4) .eq. second(4:5, 2:3))
+
+                    END SUBROUTINE intrinsic_any_test_function
+                    """
+
+    sdfg = fortran_parser.create_sdfg_from_string(test_string, "intrinsic_any_test", False)
+    sdfg.simplify(verbose=True)
+    sdfg.compile()
+
+    sizes = [5, 4]
+    first = np.full(sizes, 1, order="F", dtype=np.int32)
+    first[2:5, :] = 2
+    first[0:2, 0:2] = 2
+
+    second = np.full(sizes, 1, order="F", dtype=np.int32)
+    second[0:3, :] = 3
+    second[3:5, 0] = 3
+    second[3:5, 3:5] = 3
+
+    res = np.full([2], 0, order="F", dtype=np.int32)
+
+    sdfg(first=first, second=second, res=res)
+    assert list(res) == [0, 1]
+
+def test_fortran_frontend_any_array_comparison_2d_subset_offset():
+    test_string = """
+                    PROGRAM intrinsic_any_test
+                    implicit none
+                    integer, dimension(20:24,4) :: first
+                    integer, dimension(5,7:10) :: second
+                    logical, dimension(2) :: res
+                    CALL intrinsic_any_test_function(first, second, res)
+                    end
+
+                    SUBROUTINE intrinsic_any_test_function(first, second, res)
+                    integer, dimension(20:24,4) :: first
+                    integer, dimension(5,7:10) :: second
+                    logical, dimension(2) :: res
+
+                    ! Now test subsets - make sure the equal values are only
+                    ! in the tested area.
+                    res(1) = any(first(20:21, 3:4) .ne. second(4:5, 8:9))
+                    res(2) = any(first(20:21, 3:4) .eq. second(4:5, 8:9))
+
+                    END SUBROUTINE intrinsic_any_test_function
+                    """
+
+    sdfg = fortran_parser.create_sdfg_from_string(test_string, "intrinsic_any_test", True)
+    sdfg.simplify(verbose=True)
+    sdfg.compile()
+
+    sizes = [5, 4]
+    first = np.full(sizes, 1, order="F", dtype=np.int32)
+    first[2:5, :] = 2
+    first[0:2, 0:2] = 2
+
+    second = np.full(sizes, 1, order="F", dtype=np.int32)
+    second[0:3, :] = 3
+    second[3:5, 0] = 3
+    second[3:5, 3:5] = 3
+
+    res = np.full([2], 0, order="F", dtype=np.int32)
+
+    sdfg(first=first, second=second, res=res)
+    assert list(res) == [0, 1]
+
 if __name__ == "__main__":
 
     test_fortran_frontend_any_array()
@@ -217,3 +359,7 @@ def test_fortran_frontend_any_array_comparison_wrong_subset():
     test_fortran_frontend_any_array_comparison()
     test_fortran_frontend_any_array_scalar_comparison()
     test_fortran_frontend_any_array_comparison_wrong_subset()
+    test_fortran_frontend_any_array_2d()
+    test_fortran_frontend_any_array_comparison_2d()
+    test_fortran_frontend_any_array_comparison_2d_subset()
+    test_fortran_frontend_any_array_comparison_2d_subset_offset()

From a78588685784eaf635819596f2e7e9cf296d5b69 Mon Sep 17 00:00:00 2001
From: Marcin Copik <mcopik@gmail.com>
Date: Fri, 13 Oct 2023 22:58:43 +0200
Subject: [PATCH 087/163] Rename test for sum intrinsic

---
 tests/fortran/{sum_to_loop_offset.py => intrinsic_sum.py} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename tests/fortran/{sum_to_loop_offset.py => intrinsic_sum.py} (100%)

diff --git a/tests/fortran/sum_to_loop_offset.py b/tests/fortran/intrinsic_sum.py
similarity index 100%
rename from tests/fortran/sum_to_loop_offset.py
rename to tests/fortran/intrinsic_sum.py

From da697355bcb46ae8bb8109c7bc4dfaf6dc248a82 Mon Sep 17 00:00:00 2001
From: Marcin Copik <mcopik@gmail.com>
Date: Fri, 13 Oct 2023 23:32:05 +0200
Subject: [PATCH 088/163] Reorganize code of loop-based intrinsics to create a
 common parent

---
 dace/frontend/fortran/intrinsics.py | 492 +++++++++++++++-------------
 1 file changed, 260 insertions(+), 232 deletions(-)

diff --git a/dace/frontend/fortran/intrinsics.py b/dace/frontend/fortran/intrinsics.py
index e2a24f5ff6..d98c9f658b 100644
--- a/dace/frontend/fortran/intrinsics.py
+++ b/dace/frontend/fortran/intrinsics.py
@@ -78,159 +78,184 @@ def replace(func_name: ast_internal_classes.Name_Node, args: ast_internal_classe
     def has_transformation() -> bool:
         return True
 
-class Sum(LoopBasedReplacement):
+class LoopBasedReplacementVisitor(NodeVisitor):
 
-    class SumLoopNodeLister(NodeVisitor):
-        """
-        Finds all sum operations that have to be transformed to loops in the AST
-        """
-        def __init__(self):
-            self.nodes: List[ast_internal_classes.FNode] = []
+    """
+    Finds all intrinsic operations that have to be transformed to loops in the AST
+    """
+    def __init__(self, func_name: str):
+        self._func_name = func_name
+        self.nodes: List[ast_internal_classes.FNode] = []
 
-        def visit_BinOp_Node(self, node: ast_internal_classes.BinOp_Node):
+    def visit_BinOp_Node(self, node: ast_internal_classes.BinOp_Node):
 
-            if isinstance(node.rval, ast_internal_classes.Call_Expr_Node):
-                if node.rval.name.name == "__dace_sum":
-                    self.nodes.append(node)
+        if isinstance(node.rval, ast_internal_classes.Call_Expr_Node):
+            if node.rval.name.name == self._func_name:
+                self.nodes.append(node)
 
-        def visit_Execution_Part_Node(self, node: ast_internal_classes.Execution_Part_Node):
-            return
+    def visit_Execution_Part_Node(self, node: ast_internal_classes.Execution_Part_Node):
+        return
+
+class LoopBasedReplacementTransformation(NodeTransformer):
+
+    """
+    Transforms the AST by removing intrinsic call and replacing it with loops
+    """
+    def __init__(self, ast):
+        self.count = 0
+        ParentScopeAssigner().visit(ast)
+        self.scope_vars = ScopeVarsDeclarations()
+        self.scope_vars.visit(ast)
+
+        self.rvals = []
+
+
+    @abstractmethod
+    def func_name(self) -> str:
+        pass
+
+    def visit_Execution_Part_Node(self, node: ast_internal_classes.Execution_Part_Node):
+
+        newbody = []
+        for child in node.execution:
+            lister = LoopBasedReplacementVisitor(self.func_name())
+            lister.visit(child)
+            res = lister.nodes
+
+            if res is None or len(res) == 0:
+                newbody.append(self.visit(child))
+                continue
+
+            self.loop_ranges = []
+            # We need to reinitialize variables as the class is reused for transformation between different
+            # calls to the same intrinsic.
+            self._initialize()
+
+            # Visit all intrinsic arguments and extract arrays
+            for i in mywalk(child.rval):
+                if isinstance(i, ast_internal_classes.Call_Expr_Node) and i.name.name == self.func_name():
+                    self._parse_call_expr_node(i)
+
+            # Verify that all of intrinsic args are correct and prepare them for loop generation
+            self._summarize_args(child, newbody)
+
+            # Initialize the result variable
+            newbody.append(self._initialize_result(child))
+
+            # Generate the intrinsic-specific logic inside loop body
+            body = self._generate_loop_body(child)
+
+            # Now generate the multi-dimensiona loop header and updates
+            range_index = 0
+            for i in self.loop_ranges:
+                initrange = i[0]
+                finalrange = i[1]
+                init = ast_internal_classes.BinOp_Node(
+                    lval=ast_internal_classes.Name_Node(name="tmp_parfor_" + str(self.count + range_index)),
+                    op="=",
+                    rval=initrange,
+                    line_number=child.line_number)
+                cond = ast_internal_classes.BinOp_Node(
+                    lval=ast_internal_classes.Name_Node(name="tmp_parfor_" + str(self.count + range_index)),
+                    op="<=",
+                    rval=finalrange,
+                    line_number=child.line_number)
+                iter = ast_internal_classes.BinOp_Node(
+                    lval=ast_internal_classes.Name_Node(name="tmp_parfor_" + str(self.count + range_index)),
+                    op="=",
+                    rval=ast_internal_classes.BinOp_Node(
+                        lval=ast_internal_classes.Name_Node(name="tmp_parfor_" + str(self.count + range_index)),
+                        op="+",
+                        rval=ast_internal_classes.Int_Literal_Node(value="1")),
+                    line_number=child.line_number)
+                current_for = ast_internal_classes.Map_Stmt_Node(
+                    init=init,
+                    cond=cond,
+                    iter=iter,
+                    body=ast_internal_classes.Execution_Part_Node(execution=[body]),
+                    line_number=child.line_number)
+                body = current_for
+                range_index += 1
+
+            newbody.append(body)
+
+            self.count = self.count + range_index
+        return ast_internal_classes.Execution_Part_Node(execution=newbody)
+
+class Sum(LoopBasedReplacement):
 
-    class Transformation(NodeTransformer):
+    class Transformation(LoopBasedReplacementTransformation):
 
-        """
-        Transforms the AST by removing array sums and replacing them with loops
-        """
         def __init__(self, ast):
-            self.count = 0
-            ParentScopeAssigner().visit(ast)
-            self.scope_vars = ScopeVarsDeclarations()
-            self.scope_vars.visit(ast)
+            super().__init__(ast)
 
-        def visit_Execution_Part_Node(self, node: ast_internal_classes.Execution_Part_Node):
-            newbody = []
-            for child in node.execution:
-                lister = Sum.SumLoopNodeLister()
-                lister.visit(child)
-                res = lister.nodes
-                if res is not None and len(res) > 0:
+        def func_name(self) -> str:
+            return "__dace_sum"
 
-                    current = child.lval
-                    val = child.rval
+        def _initialize(self):
+            self.rvals = []
+            self.argument_variable = None
 
-                    rvals = []
-                    for i in mywalk(val):
-                        if isinstance(i, ast_internal_classes.Call_Expr_Node) and i.name.name == '__dace_sum':
+        def _parse_call_expr_node(self, node: ast_internal_classes.Call_Expr_Node):
 
-                            for arg in i.args:
+            for arg in node.args:
 
-                                # supports syntax SUM(arr)
-                                if isinstance(arg, ast_internal_classes.Name_Node):
-                                    array_node = ast_internal_classes.Array_Subscript_Node(parent=arg.parent)
-                                    array_node.name = arg
+                # supports syntax SUM(arr)
+                if isinstance(arg, ast_internal_classes.Name_Node):
+                    array_node = ast_internal_classes.Array_Subscript_Node(parent=arg.parent)
+                    array_node.name = arg
 
-                                    # If we access SUM(arr) where arr has many dimensions,
-                                    # We need to create a ParDecl_Node for each dimension
-                                    dims = len(self.scope_vars.get_var(node.parent, arg.name).sizes)
-                                    array_node.indices = [ast_internal_classes.ParDecl_Node(type='ALL')] * dims
+                    # If we access SUM(arr) where arr has many dimensions,
+                    # We need to create a ParDecl_Node for each dimension
+                    dims = len(self.scope_vars.get_var(node.parent, arg.name).sizes)
+                    array_node.indices = [ast_internal_classes.ParDecl_Node(type='ALL')] * dims
 
-                                    rvals.append(array_node)
+                    self.rvals.append(array_node)
 
-                                # supports syntax SUM(arr(:))
-                                if isinstance(arg, ast_internal_classes.Array_Subscript_Node):
-                                    rvals.append(arg)
+                # supports syntax SUM(arr(:))
+                if isinstance(arg, ast_internal_classes.Array_Subscript_Node):
+                    self.rvals.append(arg)
 
-                    if len(rvals) != 1:
-                        raise NotImplementedError("Only one array can be summed")
-                    val = rvals[0]
-                    rangeposrval = []
-                    rangesrval = []
 
-                    par_Decl_Range_Finder(val, rangesrval, rangeposrval, [], self.count, newbody, self.scope_vars, True)
+        def _summarize_args(self, node: ast_internal_classes.FNode, new_func_body: List[ast_internal_classes.FNode]):
 
-                    # Initialize the result variable
-                    newbody.append(
-                        ast_internal_classes.BinOp_Node(
-                            lval=current,
-                            op="=",
-                            rval=ast_internal_classes.Int_Literal_Node(value="0"),
-                            line_number=child.line_number
-                        )
-                    )
-                    range_index = 0
-                    body = ast_internal_classes.BinOp_Node(lval=current,
-                                                        op="=",
-                                                        rval=ast_internal_classes.BinOp_Node(
-                                                            lval=current,
-                                                            op="+",
-                                                            rval=val,
-                                                            line_number=child.line_number),
-                                                        line_number=child.line_number)
-                    for i in rangesrval:
-                        initrange = i[0]
-                        finalrange = i[1]
-                        init = ast_internal_classes.BinOp_Node(
-                            lval=ast_internal_classes.Name_Node(name="tmp_parfor_" + str(self.count + range_index)),
-                            op="=",
-                            rval=initrange,
-                            line_number=child.line_number)
-                        cond = ast_internal_classes.BinOp_Node(
-                            lval=ast_internal_classes.Name_Node(name="tmp_parfor_" + str(self.count + range_index)),
-                            op="<=",
-                            rval=finalrange,
-                            line_number=child.line_number)
-                        iter = ast_internal_classes.BinOp_Node(
-                            lval=ast_internal_classes.Name_Node(name="tmp_parfor_" + str(self.count + range_index)),
-                            op="=",
-                            rval=ast_internal_classes.BinOp_Node(
-                                lval=ast_internal_classes.Name_Node(name="tmp_parfor_" + str(self.count + range_index)),
-                                op="+",
-                                rval=ast_internal_classes.Int_Literal_Node(value="1")),
-                            line_number=child.line_number)
-                        current_for = ast_internal_classes.Map_Stmt_Node(
-                            init=init,
-                            cond=cond,
-                            iter=iter,
-                            body=ast_internal_classes.Execution_Part_Node(execution=[body]),
-                            line_number=child.line_number)
-                        body = current_for
-                        range_index += 1
+            if len(self.rvals) != 1:
+                raise NotImplementedError("Only one array can be summed")
 
-                    newbody.append(body)
+            self.argument_variable = self.rvals[0]
 
-                    self.count = self.count + range_index
-                else:
-                    newbody.append(self.visit(child))
-            return ast_internal_classes.Execution_Part_Node(execution=newbody)
+            par_Decl_Range_Finder(self.argument_variable, self.loop_ranges, [], [], self.count, new_func_body, self.scope_vars, True)
 
-class Any(LoopBasedReplacement):
+        def _initialize_result(self, node: ast_internal_classes.FNode) -> ast_internal_classes.BinOp_Node:
 
-    class AnyLoopNodeLister(NodeVisitor):
-        """
-        Finds all sum operations that have to be transformed to loops in the AST
-        """
-        def __init__(self):
-            self.nodes: List[ast_internal_classes.FNode] = []
+            return ast_internal_classes.BinOp_Node(
+                lval=node.lval,
+                op="=",
+                rval=ast_internal_classes.Int_Literal_Node(value="0"),
+                line_number=node.line_number
+            )
 
-        def visit_BinOp_Node(self, node: ast_internal_classes.BinOp_Node):
+        def _generate_loop_body(self, node: ast_internal_classes.FNode) -> ast_internal_classes.BinOp_Node:
 
-            if isinstance(node.rval, ast_internal_classes.Call_Expr_Node):
-                if node.rval.name.name == "__dace_any":
-                    self.nodes.append(node)
+            return ast_internal_classes.BinOp_Node(
+                lval=node.lval,
+                op="=",
+                rval=ast_internal_classes.BinOp_Node(
+                    lval=node.lval,
+                    op="+",
+                    rval=self.argument_variable,
+                    line_number=node.line_number
+                ),
+                line_number=node.line_number
+            )
 
-        def visit_Execution_Part_Node(self, node: ast_internal_classes.Execution_Part_Node):
-            return
 
-    class Transformation(NodeTransformer):
+class Any(LoopBasedReplacement):
 
-        """
-        Transforms the AST by removing array sums and replacing them with loops
-        """
-        def __init__(self, ast):
-            self.count = 0
-            ParentScopeAssigner().visit(ast)
-            self.scope_vars = ScopeVarsDeclarations()
-            self.scope_vars.visit(ast)
+    class Transformation(LoopBasedReplacementTransformation):
+
+        def func_name(self) -> str:
+            return "__dace_any"
 
         def _parse_array(self, node: ast_internal_classes.Execution_Part_Node, arg: ast_internal_classes.FNode) -> ast_internal_classes.Array_Subscript_Node:
 
@@ -250,6 +275,114 @@ def _parse_array(self, node: ast_internal_classes.Execution_Part_Node, arg: ast_
             if isinstance(arg, ast_internal_classes.Array_Subscript_Node):
                 return arg
 
+        def _parse_call_expr_node(self, node: ast_internal_classes.Call_Expr_Node):
+
+            if len(node.args) > 1:
+                raise NotImplementedError("Fortran ANY with the DIM parameter is not supported!")
+            arg = node.args[0]
+
+            array_node = self._parse_array(node, arg)
+            if array_node is not None:
+                rvals.append(array_node)
+
+                if len(rvals) != 1:
+                    raise NotImplementedError("Only one array can be summed")
+                val = rvals[0]
+                rangeposrval = []
+
+                par_Decl_Range_Finder(val, rangesrval, rangeposrval, [], self.count, newbody, self.scope_vars, True)
+                cond = ast_internal_classes.BinOp_Node(op="==",
+                                                    rval=ast_internal_classes.Int_Literal_Node(value="1"),
+                                                    lval=copy.deepcopy(val),
+                                                    line_number=child.line_number)
+            else:
+
+                # supports syntax ANY(logical op)
+                # the logical op can be:
+                #
+                # (1) arr1 op arr2
+                # where arr1 and arr2 are name node or array subscript node
+                # there, we need to extract shape and verify they are the same
+                #
+                # (2) arr1 op scalar
+                # there, we ignore the scalar because it's not an array
+                if isinstance(arg, ast_internal_classes.BinOp_Node):
+
+                    left_side_arr  = self._parse_array(node, arg.lval)
+                    right_side_arr  = self._parse_array(node, arg.rval)
+                    has_two_arrays = left_side_arr is not None and right_side_arr is not None
+
+                    if not has_two_arrays:
+
+                        # if one side of the operator is scalar, then parsing array
+                        # will return none
+                        dominant_array = left_side_arr
+                        if left_side_arr is None:
+                            dominant_array = right_side_arr
+
+                        rangeposrval = []
+                        rangeslen_left = []
+                        rangeposrval = []
+                        par_Decl_Range_Finder(dominant_array, rangesrval, rangeposrval, rangeslen_left, self.count, newbody, self.scope_vars, True)
+                        val = arg
+
+                        cond = copy.deepcopy(val)
+                        if left_side_arr is not None:
+                            cond.lval = dominant_array
+                        if right_side_arr is not None:
+                            cond.rval = dominant_array
+
+                        return
+
+
+                    if len(left_side_arr.indices) != len(right_side_arr.indices):
+                        raise TypeError("Can't parse Fortran ANY with different array ranks!")
+
+                    for left_idx, right_idx in zip(left_side_arr.indices, right_side_arr.indices):
+                        if left_idx.type != right_idx.type:
+                            raise TypeError("Can't parse Fortran ANY with different array ranks!")
+
+                    rangeposrval = []
+                    rangeslen_left = []
+                    rangeposrval = []
+                    par_Decl_Range_Finder(left_side_arr, rangesrval, rangeposrval, rangeslen_left, self.count, newbody, self.scope_vars, True)
+                    val = arg
+
+                    rangesrval_right = []
+                    rangeslen_right = []
+                    par_Decl_Range_Finder(right_side_arr, rangesrval_right, [], rangeslen_right, self.count, newbody, self.scope_vars, True)
+
+                    for left_len, right_len in zip(rangeslen_left, rangeslen_right):
+                        if left_len != right_len:
+                            raise TypeError("Can't support Fortran ANY with different array ranks!")
+
+                    # Now, the loop will be dictated by the left array
+                    # If the access pattern on the right array is different, we need to shfit it - for every dimension.
+                    # For example, we can have arr(1:3) == arr2(3:5)
+                    # Then, loop_idx is from 1 to 3
+                    # arr becomes arr[loop_idx]
+                    # but arr2 must be arr2[loop_idx + 2]
+                    for i in range(len(right_side_arr.indices)):
+
+                        idx_var = right_side_arr.indices[i]
+                        start_loop = rangesrval[i][0]
+                        end_loop = rangesrval_right[i][0]
+
+                        difference = int(end_loop.value) - int(start_loop.value)
+                        if difference != 0:
+                            new_index = ast_internal_classes.BinOp_Node(
+                                lval=idx_var,
+                                op="+",
+                                rval=ast_internal_classes.Int_Literal_Node(value=str(difference)),
+                                line_number=child.line_number
+                            )
+                            right_side_arr.indices[i] = new_index
+
+                    # Now, we need to convert the array to a proper subscript node
+                    cond = copy.deepcopy(val)
+                    cond.lval = left_side_arr
+                    cond.rval = right_side_arr
+
         def visit_Execution_Part_Node(self, node: ast_internal_classes.Execution_Part_Node):
             newbody = []
             for child in node.execution:
@@ -265,112 +398,7 @@ def visit_Execution_Part_Node(self, node: ast_internal_classes.Execution_Part_No
                     rangesrval = []
                     for i in mywalk(val):
                         if isinstance(i, ast_internal_classes.Call_Expr_Node) and i.name.name == '__dace_any':
-
-                            if len(i.args) > 1:
-                                raise NotImplementedError("Fortran ANY with the DIM parameter is not supported!")
-                            arg = i.args[0]
-
-                            array_node = self._parse_array(node, arg)
-                            if array_node is not None:
-                                rvals.append(array_node)
-
-                                if len(rvals) != 1:
-                                    raise NotImplementedError("Only one array can be summed")
-                                val = rvals[0]
-                                rangeposrval = []
-
-                                par_Decl_Range_Finder(val, rangesrval, rangeposrval, [], self.count, newbody, self.scope_vars, True)
-                                cond = ast_internal_classes.BinOp_Node(op="==",
-                                                                    rval=ast_internal_classes.Int_Literal_Node(value="1"),
-                                                                    lval=copy.deepcopy(val),
-                                                                    line_number=child.line_number)
-                            else:
-
-                                # supports syntax ANY(logical op)
-                                # the logical op can be:
-                                #
-                                # (1) arr1 op arr2
-                                # where arr1 and arr2 are name node or array subscript node
-                                # there, we need to extract shape and verify they are the same
-                                #
-                                # (2) arr1 op scalar
-                                # there, we ignore the scalar because it's not an array
-                                if isinstance(arg, ast_internal_classes.BinOp_Node):
-
-                                    left_side_arr  = self._parse_array(node, arg.lval)
-                                    right_side_arr  = self._parse_array(node, arg.rval)
-                                    has_two_arrays = left_side_arr is not None and right_side_arr is not None
-
-                                    if not has_two_arrays:
-
-                                        # if one side of the operator is scalar, then parsing array
-                                        # will return none
-                                        dominant_array = left_side_arr
-                                        if left_side_arr is None:
-                                            dominant_array = right_side_arr
-
-                                        rangeposrval = []
-                                        rangeslen_left = []
-                                        rangeposrval = []
-                                        par_Decl_Range_Finder(dominant_array, rangesrval, rangeposrval, rangeslen_left, self.count, newbody, self.scope_vars, True)
-                                        val = arg
-
-                                        cond = copy.deepcopy(val)
-                                        if left_side_arr is not None:
-                                            cond.lval = dominant_array
-                                        if right_side_arr is not None:
-                                            cond.rval = dominant_array
-
-                                        continue
-
-
-                                    if len(left_side_arr.indices) != len(right_side_arr.indices):
-                                        raise TypeError("Can't parse Fortran ANY with different array ranks!")
-
-                                    for left_idx, right_idx in zip(left_side_arr.indices, right_side_arr.indices):
-                                        if left_idx.type != right_idx.type:
-                                            raise TypeError("Can't parse Fortran ANY with different array ranks!")
-
-                                    rangeposrval = []
-                                    rangeslen_left = []
-                                    rangeposrval = []
-                                    par_Decl_Range_Finder(left_side_arr, rangesrval, rangeposrval, rangeslen_left, self.count, newbody, self.scope_vars, True)
-                                    val = arg
-
-                                    rangesrval_right = []
-                                    rangeslen_right = []
-                                    par_Decl_Range_Finder(right_side_arr, rangesrval_right, [], rangeslen_right, self.count, newbody, self.scope_vars, True)
-
-                                    for left_len, right_len in zip(rangeslen_left, rangeslen_right):
-                                        if left_len != right_len:
-                                            raise TypeError("Can't support Fortran ANY with different array ranks!")
-
-                                    # Now, the loop will be dictated by the left array
-                                    # If the access pattern on the right array is different, we need to shfit it - for every dimension.
-                                    # For example, we can have arr(1:3) == arr2(3:5)
-                                    # Then, loop_idx is from 1 to 3
-                                    # arr becomes arr[loop_idx]
-                                    # but arr2 must be arr2[loop_idx + 2]
-                                    for i in range(len(right_side_arr.indices)):
-
-                                        idx_var = right_side_arr.indices[i]
-                                        start_loop = rangesrval[i][0]
-                                        end_loop = rangesrval_right[i][0]
-
-                                        difference = int(end_loop.value) - int(start_loop.value)
-                                        if difference != 0:
-                                            new_index = ast_internal_classes.BinOp_Node(
-                                                lval=idx_var,
-                                                op="+",
-                                                rval=ast_internal_classes.Int_Literal_Node(value=str(difference)),
-                                                line_number=child.line_number
-                                            )
-                                            right_side_arr.indices[i] = new_index
-
-                                    # Now, we need to convert the array to a proper subscript node
-                                    cond = copy.deepcopy(val)
-                                    cond.lval = left_side_arr
-                                    cond.rval = right_side_arr
+                            self._parse_call_expr_node(i)
 
                     # Initialize the result variable
                     newbody.append(

From 988534d752f813508f7a5047e03f42866169e81b Mon Sep 17 00:00:00 2001
From: Marcin Copik <mcopik@gmail.com>
Date: Fri, 13 Oct 2023 23:52:25 +0200
Subject: [PATCH 089/163] Reorganize the implementation of ANY to match the new
 code structure

---
 dace/frontend/fortran/intrinsics.py | 178 +++++++++++-----------------
 1 file changed, 70 insertions(+), 108 deletions(-)

diff --git a/dace/frontend/fortran/intrinsics.py b/dace/frontend/fortran/intrinsics.py
index d98c9f658b..260c020e3a 100644
--- a/dace/frontend/fortran/intrinsics.py
+++ b/dace/frontend/fortran/intrinsics.py
@@ -134,7 +134,7 @@ def visit_Execution_Part_Node(self, node: ast_internal_classes.Execution_Part_No
             # Visit all intrinsic arguments and extract arrays
             for i in mywalk(child.rval):
                 if isinstance(i, ast_internal_classes.Call_Expr_Node) and i.name.name == self.func_name():
-                    self._parse_call_expr_node(i)
+                    self._parse_call_expr_node(i, newbody)
 
             # Verify that all of intrinsic args are correct and prepare them for loop generation
             self._summarize_args(child, newbody)
@@ -184,6 +184,15 @@ def visit_Execution_Part_Node(self, node: ast_internal_classes.Execution_Part_No
 
 class Sum(LoopBasedReplacement):
 
+    """
+        In this class, we implement the transformation for Fortran intrinsic SUM(:)
+        We support two ways of invoking the function - by providing array name and array subscript.
+        We do NOT support the *DIM* argument.
+
+        During the loop construction, we add a single variable storing the partial result.
+        Then, we generate a binary node accumulating the result.
+    """
+
     class Transformation(LoopBasedReplacementTransformation):
 
         def __init__(self, ast):
@@ -196,7 +205,7 @@ def _initialize(self):
             self.rvals = []
             self.argument_variable = None
 
-        def _parse_call_expr_node(self, node: ast_internal_classes.Call_Expr_Node):
+        def _parse_call_expr_node(self, node: ast_internal_classes.Call_Expr_Node, new_func_body: List[ast_internal_classes.FNode]):
 
             for arg in node.args:
 
@@ -254,6 +263,9 @@ class Any(LoopBasedReplacement):
 
     class Transformation(LoopBasedReplacementTransformation):
 
+        def __init__(self, ast):
+            super().__init__(ast)
+
         def func_name(self) -> str:
             return "__dace_any"
 
@@ -275,7 +287,10 @@ def _parse_array(self, node: ast_internal_classes.Execution_Part_Node, arg: ast_
             if isinstance(arg, ast_internal_classes.Array_Subscript_Node):
                 return arg
 
-        def _parse_call_expr_node(self, node: ast_internal_classes.Call_Expr_Node):
+        def _initialize(self):
+            self.rvals = []
+
+        def _parse_call_expr_node(self, node: ast_internal_classes.Call_Expr_Node, new_func_body: List[ast_internal_classes.FNode]):
 
             if len(node.args) > 1:
                 raise NotImplementedError("Fortran ANY with the DIM parameter is not supported!")
@@ -283,18 +298,18 @@ def _parse_call_expr_node(self, node: ast_internal_classes.Call_Expr_Node):
 
             array_node = self._parse_array(node, arg)
             if array_node is not None:
-                rvals.append(array_node)
+                self.rvals.append(array_node)
 
-                if len(rvals) != 1:
+                if len(self.rvals) != 1:
                     raise NotImplementedError("Only one array can be summed")
-                val = rvals[0]
+                val = self.rvals[0]
                 rangeposrval = []
 
-                par_Decl_Range_Finder(val, rangesrval, rangeposrval, [], self.count, newbody, self.scope_vars, True)
-                cond = ast_internal_classes.BinOp_Node(op="==",
+                par_Decl_Range_Finder(val, self.loop_ranges, [], [], self.count, new_func_body, self.scope_vars, True)
+                self.cond = ast_internal_classes.BinOp_Node(op="==",
                                                     rval=ast_internal_classes.Int_Literal_Node(value="1"),
                                                     lval=copy.deepcopy(val),
-                                                    line_number=child.line_number)
+                                                    line_number=node.line_number)
             else:
 
                 # supports syntax ANY(logical op)
@@ -323,14 +338,14 @@ def _parse_call_expr_node(self, node: ast_internal_classes.Call_Expr_Node):
                         rangeposrval = []
                         rangeslen_left = []
                         rangeposrval = []
-                        par_Decl_Range_Finder(dominant_array, rangesrval, rangeposrval, rangeslen_left, self.count, newbody, self.scope_vars, True)
+                        par_Decl_Range_Finder(dominant_array, self.loop_ranges, rangeposrval, rangeslen_left, self.count, new_func_body, self.scope_vars, True)
                         val = arg
 
-                        cond = copy.deepcopy(val)
+                        self.cond = copy.deepcopy(val)
                         if left_side_arr is not None:
-                            cond.lval = dominant_array
+                            self.cond.lval = dominant_array
                         if right_side_arr is not None:
-                            cond.rval = dominant_array
+                            self.cond.rval = dominant_array
 
                         return
 
@@ -345,12 +360,12 @@ def _parse_call_expr_node(self, node: ast_internal_classes.Call_Expr_Node):
                     rangeposrval = []
                     rangeslen_left = []
                     rangeposrval = []
-                    par_Decl_Range_Finder(left_side_arr, rangesrval, rangeposrval, rangeslen_left, self.count, newbody, self.scope_vars, True)
+                    par_Decl_Range_Finder(left_side_arr, self.loop_ranges, rangeposrval, rangeslen_left, self.count, new_func_body, self.scope_vars, True)
                     val = arg
 
                     rangesrval_right = []
                     rangeslen_right = []
-                    par_Decl_Range_Finder(right_side_arr, rangesrval_right, [], rangeslen_right, self.count, newbody, self.scope_vars, True)
+                    par_Decl_Range_Finder(right_side_arr, rangesrval_right, [], rangeslen_right, self.count, new_func_body, self.scope_vars, True)
 
                     for left_len, right_len in zip(rangeslen_left, rangeslen_right):
                         if left_len != right_len:
@@ -365,7 +380,7 @@ def _parse_call_expr_node(self, node: ast_internal_classes.Call_Expr_Node):
                     for i in range(len(right_side_arr.indices)):
 
                         idx_var = right_side_arr.indices[i]
-                        start_loop = rangesrval[i][0]
+                        start_loop = self.loop_ranges[i][0]
                         end_loop = rangesrval_right[i][0]
 
                         difference = int(end_loop.value) - int(start_loop.value)
@@ -374,102 +389,49 @@ def _parse_call_expr_node(self, node: ast_internal_classes.Call_Expr_Node):
                                 lval=idx_var,
                                 op="+",
                                 rval=ast_internal_classes.Int_Literal_Node(value=str(difference)),
-                                line_number=child.line_number
+                                line_number=node.line_number
                             )
                             right_side_arr.indices[i] = new_index
 
                     # Now, we need to convert the array to a proper subscript node
-                    cond = copy.deepcopy(val)
-                    cond.lval = left_side_arr
-                    cond.rval = right_side_arr
-
-        def visit_Execution_Part_Node(self, node: ast_internal_classes.Execution_Part_Node):
-            newbody = []
-            for child in node.execution:
-                lister = Any.AnyLoopNodeLister()
-                lister.visit(child)
-                res = lister.nodes
-                if res is not None and len(res) > 0:
-
-                    current = child.lval
-                    val = child.rval
-
-                    rvals = []
-                    rangesrval = []
-                    for i in mywalk(val):
-                        if isinstance(i, ast_internal_classes.Call_Expr_Node) and i.name.name == '__dace_any':
-                            self._parse_call_expr_node(i)
-
-                    # Initialize the result variable
-                    newbody.append(
-                        ast_internal_classes.BinOp_Node(
-                            lval=current,
-                            op="=",
-                            rval=ast_internal_classes.Int_Literal_Node(value="0"),
-                            line_number=child.line_number
-                        )
-                    )
-                    range_index = 0
-
-                    # Here begins the specialized implementation
-                    body_if = ast_internal_classes.Execution_Part_Node(execution=[
-                        ast_internal_classes.BinOp_Node(
-                            lval=copy.deepcopy(current),
-                            op="=",
-                            rval=ast_internal_classes.Int_Literal_Node(value="1"),
-                            line_number=child.line_number
-                        ),
-                        # TODO: we should make the `break` generation conditional based on the architecture
-                        # For parallel maps, we should have no breaks
-                        # For sequential loop, we want a break to be faster
-                        #ast_internal_classes.Break_Node(
-                        #    line_number=child.line_number
-                        #)
-                    ])
-                    body = ast_internal_classes.If_Stmt_Node(
-                        cond=cond,
-                        body=body_if,
-                        body_else=ast_internal_classes.Execution_Part_Node(execution=[]),
-                        line_number=child.line_number
-                    )
-                    # Here ends the specialized implementation
-
-                    for i in rangesrval:
-                        initrange = i[0]
-                        finalrange = i[1]
-                        init = ast_internal_classes.BinOp_Node(
-                            lval=ast_internal_classes.Name_Node(name="tmp_parfor_" + str(self.count + range_index)),
-                            op="=",
-                            rval=initrange,
-                            line_number=child.line_number)
-                        cond = ast_internal_classes.BinOp_Node(
-                            lval=ast_internal_classes.Name_Node(name="tmp_parfor_" + str(self.count + range_index)),
-                            op="<=",
-                            rval=finalrange,
-                            line_number=child.line_number)
-                        iter = ast_internal_classes.BinOp_Node(
-                            lval=ast_internal_classes.Name_Node(name="tmp_parfor_" + str(self.count + range_index)),
-                            op="=",
-                            rval=ast_internal_classes.BinOp_Node(
-                                lval=ast_internal_classes.Name_Node(name="tmp_parfor_" + str(self.count + range_index)),
-                                op="+",
-                                rval=ast_internal_classes.Int_Literal_Node(value="1")),
-                            line_number=child.line_number)
-                        current_for = ast_internal_classes.Map_Stmt_Node(
-                            init=init,
-                            cond=cond,
-                            iter=iter,
-                            body=ast_internal_classes.Execution_Part_Node(execution=[body]),
-                            line_number=child.line_number)
-                        body = current_for
-                        range_index += 1
-
-                    newbody.append(body)
-
-                    self.count = self.count + range_index
-                else:
-                    newbody.append(self.visit(child))
-            return ast_internal_classes.Execution_Part_Node(execution=newbody)
+                    self.cond = copy.deepcopy(val)
+                    self.cond.lval = left_side_arr
+                    self.cond.rval = right_side_arr
+
+        def _summarize_args(self, node: ast_internal_classes.FNode, new_func_body: List[ast_internal_classes.FNode]):
+            pass
+
+        def _initialize_result(self, node: ast_internal_classes.FNode) -> ast_internal_classes.BinOp_Node:
+
+            return ast_internal_classes.BinOp_Node(
+                lval=node.lval,
+                op="=",
+                rval=ast_internal_classes.Int_Literal_Node(value="0"),
+                line_number=node.line_number
+            )
+
+        def _generate_loop_body(self, node: ast_internal_classes.FNode) -> ast_internal_classes.BinOp_Node:
+
+            body_if = ast_internal_classes.Execution_Part_Node(execution=[
+                ast_internal_classes.BinOp_Node(
+                    lval=copy.deepcopy(node.lval),
+                    op="=",
+                    rval=ast_internal_classes.Int_Literal_Node(value="1"),
+                    line_number=node.line_number
+                ),
+                # TODO: we should make the `break` generation conditional based on the architecture
+                # For parallel maps, we should have no breaks
+                # For sequential loop, we want a break to be faster
+                #ast_internal_classes.Break_Node(
+                #    line_number=node.line_number
+                #)
+            ])
+            return ast_internal_classes.If_Stmt_Node(
+                cond=self.cond,
+                body=body_if,
+                body_else=ast_internal_classes.Execution_Part_Node(execution=[]),
+                line_number=node.line_number
+            )
 
 class FortranIntrinsics:
 

From 94ff1acd2489b52ff9d0ed22cce1077e63bfcc8a Mon Sep 17 00:00:00 2001
From: Marcin Copik <mcopik@gmail.com>
Date: Sat, 14 Oct 2023 00:17:05 +0200
Subject: [PATCH 090/163] Add a single parent for loop-based implementations

---
 dace/frontend/fortran/intrinsics.py | 216 +++++++++++++++++-----------
 1 file changed, 134 insertions(+), 82 deletions(-)

diff --git a/dace/frontend/fortran/intrinsics.py b/dace/frontend/fortran/intrinsics.py
index 260c020e3a..f3466a9f95 100644
--- a/dace/frontend/fortran/intrinsics.py
+++ b/dace/frontend/fortran/intrinsics.py
@@ -114,6 +114,26 @@ def __init__(self, ast):
     def func_name(self) -> str:
         pass
 
+    @abstractmethod
+    def _initialize(self):
+        pass
+
+    @abstractmethod
+    def _parse_call_expr_node(self, node: ast_internal_classes.Call_Expr_Node):
+        pass
+
+    @abstractmethod
+    def _summarize_args(self, node: ast_internal_classes.FNode, new_func_body: List[ast_internal_classes.FNode]):
+        pass
+
+    @abstractmethod
+    def _initialize_result(self, node: ast_internal_classes.FNode) -> ast_internal_classes.BinOp_Node:
+        pass
+
+    @abstractmethod
+    def _generate_loop_body(self, node: ast_internal_classes.FNode) -> ast_internal_classes.BinOp_Node:
+        pass
+
     def visit_Execution_Part_Node(self, node: ast_internal_classes.Execution_Part_Node):
 
         newbody = []
@@ -134,7 +154,7 @@ def visit_Execution_Part_Node(self, node: ast_internal_classes.Execution_Part_No
             # Visit all intrinsic arguments and extract arrays
             for i in mywalk(child.rval):
                 if isinstance(i, ast_internal_classes.Call_Expr_Node) and i.name.name == self.func_name():
-                    self._parse_call_expr_node(i, newbody)
+                    self._parse_call_expr_node(i)
 
             # Verify that all of intrinsic args are correct and prepare them for loop generation
             self._summarize_args(child, newbody)
@@ -205,7 +225,7 @@ def _initialize(self):
             self.rvals = []
             self.argument_variable = None
 
-        def _parse_call_expr_node(self, node: ast_internal_classes.Call_Expr_Node, new_func_body: List[ast_internal_classes.FNode]):
+        def _parse_call_expr_node(self, node: ast_internal_classes.Call_Expr_Node):
 
             for arg in node.args:
 
@@ -261,6 +281,28 @@ def _generate_loop_body(self, node: ast_internal_classes.FNode) -> ast_internal_
 
 class Any(LoopBasedReplacement):
 
+    """
+        In this class, we implement the transformation for Fortran intrinsic ANY
+        We support three ways of invoking the function - by providing array name, array subscript,
+        and a binary operation.
+        We do NOT support the *DIM* argument.
+
+        First, we split the implementation between three scenarios:
+        (1) ANY(arr)
+        (2) ANY(arr1 op arr2)
+        (3) ANY(arr1 op scalar)
+        Depending on the scenario, we verify if all participating arrays have the same rank.
+        We determine the loop range based on the arrays, and convert all array accesses to depend on
+        the loop. We take special care for situations where arrays have different subscripts, e.g.,
+        arr1(1:3) op arr2(5:7) - the second array needs a shift when indexing based on loop iterator.
+
+        During the loop construction, we add a single variable storing the partial result.
+        Then, we generate an if condition inside the loop to check if the value is true or not.
+        For (1), we check if the array entry is equal to 1.
+        For (2), we reuse the provided binary operation.
+        When the condition is true, we set the value to true and exit.
+    """
+
     class Transformation(LoopBasedReplacementTransformation):
 
         def __init__(self, ast):
@@ -290,7 +332,12 @@ def _parse_array(self, node: ast_internal_classes.Execution_Part_Node, arg: ast_
         def _initialize(self):
             self.rvals = []
 
-        def _parse_call_expr_node(self, node: ast_internal_classes.Call_Expr_Node, new_func_body: List[ast_internal_classes.FNode]):
+            self.first_array = None
+            self.second_array = None
+            self.dominant_array = None
+            self.cond = None
+
+        def _parse_call_expr_node(self, node: ast_internal_classes.Call_Expr_Node):
 
             if len(node.args) > 1:
                 raise NotImplementedError("Fortran ANY with the DIM parameter is not supported!")
@@ -298,18 +345,9 @@ def _parse_call_expr_node(self, node: ast_internal_classes.Call_Expr_Node, new_f
 
             array_node = self._parse_array(node, arg)
             if array_node is not None:
-                self.rvals.append(array_node)
-
-                if len(self.rvals) != 1:
-                    raise NotImplementedError("Only one array can be summed")
-                val = self.rvals[0]
-                rangeposrval = []
-
-                par_Decl_Range_Finder(val, self.loop_ranges, [], [], self.count, new_func_body, self.scope_vars, True)
-                self.cond = ast_internal_classes.BinOp_Node(op="==",
-                                                    rval=ast_internal_classes.Int_Literal_Node(value="1"),
-                                                    lval=copy.deepcopy(val),
-                                                    line_number=node.line_number)
+
+                self.first_array = array_node
+
             else:
 
                 # supports syntax ANY(logical op)
@@ -321,85 +359,99 @@ def _parse_call_expr_node(self, node: ast_internal_classes.Call_Expr_Node, new_f
                 #
                 # (2) arr1 op scalar
                 # there, we ignore the scalar because it's not an array
-                if isinstance(arg, ast_internal_classes.BinOp_Node):
+                if not isinstance(arg, ast_internal_classes.BinOp_Node):
+                    return
 
-                    left_side_arr  = self._parse_array(node, arg.lval)
-                    right_side_arr  = self._parse_array(node, arg.rval)
-                    has_two_arrays = left_side_arr is not None and right_side_arr is not None
+                self.first_array  = self._parse_array(node, arg.lval)
+                self.second_array  = self._parse_array(node, arg.rval)
+                has_two_arrays = self.first_array is not None and self.second_array is not None
 
-                    if not has_two_arrays:
+                # array and scalar - simplified case
+                if not has_two_arrays:
 
-                        # if one side of the operator is scalar, then parsing array
-                        # will return none
-                        dominant_array = left_side_arr
-                        if left_side_arr is None:
-                            dominant_array = right_side_arr
+                    # if one side of the operator is scalar, then parsing array
+                    # will return none
+                    self.dominant_array = self.first_array
+                    if self.dominant_array is None:
+                        self.dominant_array = self.second_array
 
-                        rangeposrval = []
-                        rangeslen_left = []
-                        rangeposrval = []
-                        par_Decl_Range_Finder(dominant_array, self.loop_ranges, rangeposrval, rangeslen_left, self.count, new_func_body, self.scope_vars, True)
-                        val = arg
+                    # replace the array subscript node in the binary operation
+                    # ignore this when the operand is a scalar
+                    self.cond = copy.deepcopy(arg)
+                    if self.first_array is not None:
+                        self.cond.lval = self.dominant_array
+                    if self.second_array is not None:
+                        self.cond.rval = self.dominant_array
 
-                        self.cond = copy.deepcopy(val)
-                        if left_side_arr is not None:
-                            self.cond.lval = dominant_array
-                        if right_side_arr is not None:
-                            self.cond.rval = dominant_array
+                    return
 
-                        return
 
+                if len(self.first_array.indices) != len(self.second_array.indices):
+                    raise TypeError("Can't parse Fortran ANY with different array ranks!")
 
-                    if len(left_side_arr.indices) != len(right_side_arr.indices):
+                for left_idx, right_idx in zip(self.first_array.indices, self.second_array.indices):
+                    if left_idx.type != right_idx.type:
                         raise TypeError("Can't parse Fortran ANY with different array ranks!")
 
-                    for left_idx, right_idx in zip(left_side_arr.indices, right_side_arr.indices):
-                        if left_idx.type != right_idx.type:
-                            raise TypeError("Can't parse Fortran ANY with different array ranks!")
-
-                    rangeposrval = []
-                    rangeslen_left = []
-                    rangeposrval = []
-                    par_Decl_Range_Finder(left_side_arr, self.loop_ranges, rangeposrval, rangeslen_left, self.count, new_func_body, self.scope_vars, True)
-                    val = arg
-
-                    rangesrval_right = []
-                    rangeslen_right = []
-                    par_Decl_Range_Finder(right_side_arr, rangesrval_right, [], rangeslen_right, self.count, new_func_body, self.scope_vars, True)
-
-                    for left_len, right_len in zip(rangeslen_left, rangeslen_right):
-                        if left_len != right_len:
-                            raise TypeError("Can't support Fortran ANY with different array ranks!")
-
-                    # Now, the loop will be dictated by the left array
-                    # If the access pattern on the right array is different, we need to shfit it - for every dimension.
-                    # For example, we can have arr(1:3) == arr2(3:5)
-                    # Then, loop_idx is from 1 to 3
-                    # arr becomes arr[loop_idx]
-                    # but arr2 must be arr2[loop_idx + 2]
-                    for i in range(len(right_side_arr.indices)):
-
-                        idx_var = right_side_arr.indices[i]
-                        start_loop = self.loop_ranges[i][0]
-                        end_loop = rangesrval_right[i][0]
-
-                        difference = int(end_loop.value) - int(start_loop.value)
-                        if difference != 0:
-                            new_index = ast_internal_classes.BinOp_Node(
-                                lval=idx_var,
-                                op="+",
-                                rval=ast_internal_classes.Int_Literal_Node(value=str(difference)),
-                                line_number=node.line_number
-                            )
-                            right_side_arr.indices[i] = new_index
-
-                    # Now, we need to convert the array to a proper subscript node
-                    self.cond = copy.deepcopy(val)
-                    self.cond.lval = left_side_arr
-                    self.cond.rval = right_side_arr
+                # Now, we need to convert the array to a proper subscript node
+                self.cond = copy.deepcopy(arg)
+                self.cond.lval = self.first_array
+                self.cond.rval = self.second_array
 
         def _summarize_args(self, node: ast_internal_classes.FNode, new_func_body: List[ast_internal_classes.FNode]):
-            pass
+
+            # The main argument is an array, not a binary operation
+            if self.cond is None:
+
+                par_Decl_Range_Finder(self.first_array, self.loop_ranges, [], [], self.count, new_func_body, self.scope_vars, True)
+                self.cond = ast_internal_classes.BinOp_Node(
+                    op="==",
+                    rval=ast_internal_classes.Int_Literal_Node(value="1"),
+                    lval=copy.deepcopy(self.first_array),
+                    line_number=node.line_number
+                )
+                return
+
+            # we have a binary operation with an array and a scalar
+            if self.dominant_array is not None:
+
+                par_Decl_Range_Finder(self.dominant_array, self.loop_ranges, [], [], self.count, new_func_body, self.scope_vars, True)
+                return
+
+            # we have a binary operation with two arrays
+
+            rangeslen_left = []
+            par_Decl_Range_Finder(self.first_array, self.loop_ranges, [], rangeslen_left, self.count, new_func_body, self.scope_vars, True)
+
+            loop_ranges_right = []
+            rangeslen_right = []
+            par_Decl_Range_Finder(self.second_array, loop_ranges_right, [], rangeslen_right, self.count, new_func_body, self.scope_vars, True)
+
+            for left_len, right_len in zip(rangeslen_left, rangeslen_right):
+                if left_len != right_len:
+                    raise TypeError("Can't support Fortran ANY with different array ranks!")
+
+            # Now, the loop will be dictated by the left array
+            # If the access pattern on the right array is different, we need to shfit it - for every dimension.
+            # For example, we can have arr(1:3) == arr2(3:5)
+            # Then, loop_idx is from 1 to 3
+            # arr becomes arr[loop_idx]
+            # but arr2 must be arr2[loop_idx + 2]
+            for i in range(len(self.second_array.indices)):
+
+                idx_var = self.second_array.indices[i]
+                start_loop = self.loop_ranges[i][0]
+                end_loop = loop_ranges_right[i][0]
+
+                difference = int(end_loop.value) - int(start_loop.value)
+                if difference != 0:
+                    new_index = ast_internal_classes.BinOp_Node(
+                        lval=idx_var,
+                        op="+",
+                        rval=ast_internal_classes.Int_Literal_Node(value=str(difference)),
+                        line_number=node.line_number
+                    )
+                    self.second_array.indices[i] = new_index
 
         def _initialize_result(self, node: ast_internal_classes.FNode) -> ast_internal_classes.BinOp_Node:
 

From 9fbea32b3b5f8056b8038e68f37c424076d0ec48 Mon Sep 17 00:00:00 2001
From: Marcin Copik <mcopik@gmail.com>
Date: Sat, 14 Oct 2023 01:07:07 +0200
Subject: [PATCH 091/163] Remove unnecessary SDFG save

---
 tests/fortran/intrinsic_any.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/fortran/intrinsic_any.py b/tests/fortran/intrinsic_any.py
index 4bfabfcec0..c1d82cd2e0 100644
--- a/tests/fortran/intrinsic_any.py
+++ b/tests/fortran/intrinsic_any.py
@@ -137,7 +137,6 @@ def test_fortran_frontend_any_array_scalar_comparison():
                     """
 
     sdfg = fortran_parser.create_sdfg_from_string(test_string, "intrinsic_any_test", False)
-    sdfg.save('test.sdfg')
     sdfg.simplify(verbose=True)
     sdfg.compile()
 

From 51f149d52a757d8378c9c841144936f020cc5193 Mon Sep 17 00:00:00 2001
From: Marcin Copik <mcopik@gmail.com>
Date: Sat, 14 Oct 2023 01:08:18 +0200
Subject: [PATCH 092/163] Implement Fortran ALL intrinsic

---
 dace/frontend/fortran/intrinsics.py | 387 ++++++++++++++++------------
 tests/fortran/intrinsic_all.py      | 361 ++++++++++++++++++++++++++
 2 files changed, 580 insertions(+), 168 deletions(-)
 create mode 100644 tests/fortran/intrinsic_all.py

diff --git a/dace/frontend/fortran/intrinsics.py b/dace/frontend/fortran/intrinsics.py
index f3466a9f95..d481097968 100644
--- a/dace/frontend/fortran/intrinsics.py
+++ b/dace/frontend/fortran/intrinsics.py
@@ -60,7 +60,8 @@ class LoopBasedReplacement:
     def replaced_name(func_name: str) -> str:
         replacements = {
             "SUM": "__dace_sum",
-            "ANY": "__dace_any"
+            "ANY": "__dace_any",
+            "ALL": "__dace_all"
         }
         return replacements[func_name]
 
@@ -68,7 +69,8 @@ def replaced_name(func_name: str) -> str:
     def replace(func_name: ast_internal_classes.Name_Node, args: ast_internal_classes.Arg_List_Node, line) -> ast_internal_classes.FNode:
         func_types = {
             "__dace_sum": "DOUBLE",
-            "__dace_any": "DOUBLE"
+            "__dace_any": "INTEGER",
+            "__dace_all": "INTEGER"
         }
         # FIXME: Any requires sometimes returning an array of booleans
         call_type = func_types[func_name.name]
@@ -278,212 +280,259 @@ def _generate_loop_body(self, node: ast_internal_classes.FNode) -> ast_internal_
                 line_number=node.line_number
             )
 
+class AnyAllTransformation(LoopBasedReplacementTransformation):
 
-class Any(LoopBasedReplacement):
+    def __init__(self, ast):
+        super().__init__(ast)
 
-    """
-        In this class, we implement the transformation for Fortran intrinsic ANY
-        We support three ways of invoking the function - by providing array name, array subscript,
-        and a binary operation.
-        We do NOT support the *DIM* argument.
+    def _parse_array(self, node: ast_internal_classes.Execution_Part_Node, arg: ast_internal_classes.FNode) -> ast_internal_classes.Array_Subscript_Node:
 
-        First, we split the implementation between three scenarios:
-        (1) ANY(arr)
-        (2) ANY(arr1 op arr2)
-        (3) ANY(arr1 op scalar)
-        Depending on the scenario, we verify if all participating arrays have the same rank.
-        We determine the loop range based on the arrays, and convert all array accesses to depend on
-        the loop. We take special care for situations where arrays have different subscripts, e.g.,
-        arr1(1:3) op arr2(5:7) - the second array needs a shift when indexing based on loop iterator.
+        # supports syntax ANY(arr)
+        if isinstance(arg, ast_internal_classes.Name_Node):
+            array_node = ast_internal_classes.Array_Subscript_Node(parent=arg.parent)
+            array_node.name = arg
 
-        During the loop construction, we add a single variable storing the partial result.
-        Then, we generate an if condition inside the loop to check if the value is true or not.
-        For (1), we check if the array entry is equal to 1.
-        For (2), we reuse the provided binary operation.
-        When the condition is true, we set the value to true and exit.
-    """
+            # If we access SUM(arr) where arr has many dimensions,
+            # We need to create a ParDecl_Node for each dimension
+            dims = len(self.scope_vars.get_var(node.parent, arg.name).sizes)
+            array_node.indices = [ast_internal_classes.ParDecl_Node(type='ALL')] * dims
 
-    class Transformation(LoopBasedReplacementTransformation):
+            return array_node
 
-        def __init__(self, ast):
-            super().__init__(ast)
+        # supports syntax ANY(arr(:))
+        if isinstance(arg, ast_internal_classes.Array_Subscript_Node):
+            return arg
 
-        def func_name(self) -> str:
-            return "__dace_any"
+    def _initialize(self):
+        self.rvals = []
 
-        def _parse_array(self, node: ast_internal_classes.Execution_Part_Node, arg: ast_internal_classes.FNode) -> ast_internal_classes.Array_Subscript_Node:
+        self.first_array = None
+        self.second_array = None
+        self.dominant_array = None
+        self.cond = None
 
-            # supports syntax ANY(arr)
-            if isinstance(arg, ast_internal_classes.Name_Node):
-                array_node = ast_internal_classes.Array_Subscript_Node(parent=arg.parent)
-                array_node.name = arg
+    def _parse_call_expr_node(self, node: ast_internal_classes.Call_Expr_Node):
 
-                # If we access SUM(arr) where arr has many dimensions,
-                # We need to create a ParDecl_Node for each dimension
-                dims = len(self.scope_vars.get_var(node.parent, arg.name).sizes)
-                array_node.indices = [ast_internal_classes.ParDecl_Node(type='ALL')] * dims
+        if len(node.args) > 1:
+            raise NotImplementedError("Fortran ANY with the DIM parameter is not supported!")
+        arg = node.args[0]
 
-                return array_node
+        array_node = self._parse_array(node, arg)
+        if array_node is not None:
 
-            # supports syntax ANY(arr(:))
-            if isinstance(arg, ast_internal_classes.Array_Subscript_Node):
-                return arg
+            self.first_array = array_node
 
-        def _initialize(self):
-            self.rvals = []
+        else:
 
-            self.first_array = None
-            self.second_array = None
-            self.dominant_array = None
-            self.cond = None
+            # supports syntax ANY(logical op)
+            # the logical op can be:
+            #
+            # (1) arr1 op arr2
+            # where arr1 and arr2 are name node or array subscript node
+            # there, we need to extract shape and verify they are the same
+            #
+            # (2) arr1 op scalar
+            # there, we ignore the scalar because it's not an array
+            if not isinstance(arg, ast_internal_classes.BinOp_Node):
+                return
 
-        def _parse_call_expr_node(self, node: ast_internal_classes.Call_Expr_Node):
+            self.first_array  = self._parse_array(node, arg.lval)
+            self.second_array  = self._parse_array(node, arg.rval)
+            has_two_arrays = self.first_array is not None and self.second_array is not None
 
-            if len(node.args) > 1:
-                raise NotImplementedError("Fortran ANY with the DIM parameter is not supported!")
-            arg = node.args[0]
+            # array and scalar - simplified case
+            if not has_two_arrays:
 
-            array_node = self._parse_array(node, arg)
-            if array_node is not None:
+                # if one side of the operator is scalar, then parsing array
+                # will return none
+                self.dominant_array = self.first_array
+                if self.dominant_array is None:
+                    self.dominant_array = self.second_array
 
-                self.first_array = array_node
+                # replace the array subscript node in the binary operation
+                # ignore this when the operand is a scalar
+                self.cond = copy.deepcopy(arg)
+                if self.first_array is not None:
+                    self.cond.lval = self.dominant_array
+                if self.second_array is not None:
+                    self.cond.rval = self.dominant_array
+
+                return
 
-            else:
 
-                # supports syntax ANY(logical op)
-                # the logical op can be:
-                #
-                # (1) arr1 op arr2
-                # where arr1 and arr2 are name node or array subscript node
-                # there, we need to extract shape and verify they are the same
-                #
-                # (2) arr1 op scalar
-                # there, we ignore the scalar because it's not an array
-                if not isinstance(arg, ast_internal_classes.BinOp_Node):
-                    return
-
-                self.first_array  = self._parse_array(node, arg.lval)
-                self.second_array  = self._parse_array(node, arg.rval)
-                has_two_arrays = self.first_array is not None and self.second_array is not None
-
-                # array and scalar - simplified case
-                if not has_two_arrays:
-
-                    # if one side of the operator is scalar, then parsing array
-                    # will return none
-                    self.dominant_array = self.first_array
-                    if self.dominant_array is None:
-                        self.dominant_array = self.second_array
-
-                    # replace the array subscript node in the binary operation
-                    # ignore this when the operand is a scalar
-                    self.cond = copy.deepcopy(arg)
-                    if self.first_array is not None:
-                        self.cond.lval = self.dominant_array
-                    if self.second_array is not None:
-                        self.cond.rval = self.dominant_array
-
-                    return
-
-
-                if len(self.first_array.indices) != len(self.second_array.indices):
+            if len(self.first_array.indices) != len(self.second_array.indices):
+                raise TypeError("Can't parse Fortran ANY with different array ranks!")
+
+            for left_idx, right_idx in zip(self.first_array.indices, self.second_array.indices):
+                if left_idx.type != right_idx.type:
                     raise TypeError("Can't parse Fortran ANY with different array ranks!")
 
-                for left_idx, right_idx in zip(self.first_array.indices, self.second_array.indices):
-                    if left_idx.type != right_idx.type:
-                        raise TypeError("Can't parse Fortran ANY with different array ranks!")
+            # Now, we need to convert the array to a proper subscript node
+            self.cond = copy.deepcopy(arg)
+            self.cond.lval = self.first_array
+            self.cond.rval = self.second_array
 
-                # Now, we need to convert the array to a proper subscript node
-                self.cond = copy.deepcopy(arg)
-                self.cond.lval = self.first_array
-                self.cond.rval = self.second_array
+    def _summarize_args(self, node: ast_internal_classes.FNode, new_func_body: List[ast_internal_classes.FNode]):
 
-        def _summarize_args(self, node: ast_internal_classes.FNode, new_func_body: List[ast_internal_classes.FNode]):
+        # The main argument is an array, not a binary operation
+        if self.cond is None:
 
-            # The main argument is an array, not a binary operation
-            if self.cond is None:
+            par_Decl_Range_Finder(self.first_array, self.loop_ranges, [], [], self.count, new_func_body, self.scope_vars, True)
+            self.cond = ast_internal_classes.BinOp_Node(
+                op="==",
+                rval=ast_internal_classes.Int_Literal_Node(value="1"),
+                lval=copy.deepcopy(self.first_array),
+                line_number=node.line_number
+            )
+            return
 
-                par_Decl_Range_Finder(self.first_array, self.loop_ranges, [], [], self.count, new_func_body, self.scope_vars, True)
-                self.cond = ast_internal_classes.BinOp_Node(
-                    op="==",
-                    rval=ast_internal_classes.Int_Literal_Node(value="1"),
-                    lval=copy.deepcopy(self.first_array),
-                    line_number=node.line_number
-                )
-                return
+        # we have a binary operation with an array and a scalar
+        if self.dominant_array is not None:
 
-            # we have a binary operation with an array and a scalar
-            if self.dominant_array is not None:
+            par_Decl_Range_Finder(self.dominant_array, self.loop_ranges, [], [], self.count, new_func_body, self.scope_vars, True)
+            return
 
-                par_Decl_Range_Finder(self.dominant_array, self.loop_ranges, [], [], self.count, new_func_body, self.scope_vars, True)
-                return
+        # we have a binary operation with two arrays
 
-            # we have a binary operation with two arrays
+        rangeslen_left = []
+        par_Decl_Range_Finder(self.first_array, self.loop_ranges, [], rangeslen_left, self.count, new_func_body, self.scope_vars, True)
 
-            rangeslen_left = []
-            par_Decl_Range_Finder(self.first_array, self.loop_ranges, [], rangeslen_left, self.count, new_func_body, self.scope_vars, True)
+        loop_ranges_right = []
+        rangeslen_right = []
+        par_Decl_Range_Finder(self.second_array, loop_ranges_right, [], rangeslen_right, self.count, new_func_body, self.scope_vars, True)
 
-            loop_ranges_right = []
-            rangeslen_right = []
-            par_Decl_Range_Finder(self.second_array, loop_ranges_right, [], rangeslen_right, self.count, new_func_body, self.scope_vars, True)
+        for left_len, right_len in zip(rangeslen_left, rangeslen_right):
+            if left_len != right_len:
+                raise TypeError("Can't support Fortran ANY with different array ranks!")
 
-            for left_len, right_len in zip(rangeslen_left, rangeslen_right):
-                if left_len != right_len:
-                    raise TypeError("Can't support Fortran ANY with different array ranks!")
+        # Now, the loop will be dictated by the left array
+        # If the access pattern on the right array is different, we need to shfit it - for every dimension.
+        # For example, we can have arr(1:3) == arr2(3:5)
+        # Then, loop_idx is from 1 to 3
+        # arr becomes arr[loop_idx]
+        # but arr2 must be arr2[loop_idx + 2]
+        for i in range(len(self.second_array.indices)):
 
-            # Now, the loop will be dictated by the left array
-            # If the access pattern on the right array is different, we need to shfit it - for every dimension.
-            # For example, we can have arr(1:3) == arr2(3:5)
-            # Then, loop_idx is from 1 to 3
-            # arr becomes arr[loop_idx]
-            # but arr2 must be arr2[loop_idx + 2]
-            for i in range(len(self.second_array.indices)):
+            idx_var = self.second_array.indices[i]
+            start_loop = self.loop_ranges[i][0]
+            end_loop = loop_ranges_right[i][0]
 
-                idx_var = self.second_array.indices[i]
-                start_loop = self.loop_ranges[i][0]
-                end_loop = loop_ranges_right[i][0]
+            difference = int(end_loop.value) - int(start_loop.value)
+            if difference != 0:
+                new_index = ast_internal_classes.BinOp_Node(
+                    lval=idx_var,
+                    op="+",
+                    rval=ast_internal_classes.Int_Literal_Node(value=str(difference)),
+                    line_number=node.line_number
+                )
+                self.second_array.indices[i] = new_index
 
-                difference = int(end_loop.value) - int(start_loop.value)
-                if difference != 0:
-                    new_index = ast_internal_classes.BinOp_Node(
-                        lval=idx_var,
-                        op="+",
-                        rval=ast_internal_classes.Int_Literal_Node(value=str(difference)),
-                        line_number=node.line_number
-                    )
-                    self.second_array.indices[i] = new_index
+    def _initialize_result(self, node: ast_internal_classes.FNode) -> ast_internal_classes.BinOp_Node:
 
-        def _initialize_result(self, node: ast_internal_classes.FNode) -> ast_internal_classes.BinOp_Node:
+        init_value = None
+        if 'any' in self.func_name():
+            init_value = "0"
+        else:
+            init_value = "1"
 
-            return ast_internal_classes.BinOp_Node(
-                lval=node.lval,
+        return ast_internal_classes.BinOp_Node(
+            lval=node.lval,
+            op="=",
+            rval=ast_internal_classes.Int_Literal_Node(value=init_value),
+            line_number=node.line_number
+        )
+
+    def _generate_loop_body(self, node: ast_internal_classes.FNode) -> ast_internal_classes.BinOp_Node:
+        
+        """
+        For any, we check if the condition is true and then set the value to true
+        For all, we check if the condition is NOT true and then set the value to false
+        """
+
+        assign_value = None
+        if 'any' in self.func_name():
+            assign_value = "1"
+        else:
+            assign_value = "0"
+
+        body_if = ast_internal_classes.Execution_Part_Node(execution=[
+            ast_internal_classes.BinOp_Node(
+                lval=copy.deepcopy(node.lval),
                 op="=",
-                rval=ast_internal_classes.Int_Literal_Node(value="0"),
+                rval=ast_internal_classes.Int_Literal_Node(value=assign_value),
                 line_number=node.line_number
+            ),
+            # TODO: we should make the `break` generation conditional based on the architecture
+            # For parallel maps, we should have no breaks
+            # For sequential loop, we want a break to be faster
+            #ast_internal_classes.Break_Node(
+            #    line_number=node.line_number
+            #)
+        ])
+
+        condition = None
+        if 'any' in self.func_name():
+            condition = self.cond
+        else:
+            condition = ast_internal_classes.UnOp_Node(
+                op="not",
+                lval=self.cond
             )
 
-        def _generate_loop_body(self, node: ast_internal_classes.FNode) -> ast_internal_classes.BinOp_Node:
+        return ast_internal_classes.If_Stmt_Node(
+            cond=condition,
+            body=body_if,
+            body_else=ast_internal_classes.Execution_Part_Node(execution=[]),
+            line_number=node.line_number
+        )
+
+class Any(LoopBasedReplacement):
+
+    """
+        In this class, we implement the transformation for Fortran intrinsic ANY
+        We support three ways of invoking the function - by providing array name, array subscript,
+        and a binary operation.
+        We do NOT support the *DIM* argument.
+
+        First, we split the implementation between three scenarios:
+        (1) ANY(arr)
+        (2) ANY(arr1 op arr2)
+        (3) ANY(arr1 op scalar)
+        Depending on the scenario, we verify if all participating arrays have the same rank.
+        We determine the loop range based on the arrays, and convert all array accesses to depend on
+        the loop. We take special care for situations where arrays have different subscripts, e.g.,
+        arr1(1:3) op arr2(5:7) - the second array needs a shift when indexing based on loop iterator.
+
+        During the loop construction, we add a single variable storing the partial result.
+        Then, we generate an if condition inside the loop to check if the value is true or not.
+        For (1), we check if the array entry is equal to 1.
+        For (2), we reuse the provided binary operation.
+        When the condition is true, we set the value to true and exit.
+    """
+    class Transformation(AnyAllTransformation):
+
+        def __init__(self, ast):
+            super().__init__(ast)
+
+        def func_name(self) -> str:
+            return "__dace_any"
+
+class All(LoopBasedReplacement):
+
+    """
+        In this class, we implement the transformation for Fortran intrinsic ALL.
+        The implementation is very similar to ANY.
+        The main difference is that we initialize the partial result to 1,
+        and set it to 0 if any of the evaluated conditions is false.
+    """
+    class Transformation(AnyAllTransformation):
+
+        def __init__(self, ast):
+            super().__init__(ast)
+
+        def func_name(self) -> str:
+            return "__dace_all"
 
-            body_if = ast_internal_classes.Execution_Part_Node(execution=[
-                ast_internal_classes.BinOp_Node(
-                    lval=copy.deepcopy(node.lval),
-                    op="=",
-                    rval=ast_internal_classes.Int_Literal_Node(value="1"),
-                    line_number=node.line_number
-                ),
-                # TODO: we should make the `break` generation conditional based on the architecture
-                # For parallel maps, we should have no breaks
-                # For sequential loop, we want a break to be faster
-                #ast_internal_classes.Break_Node(
-                #    line_number=node.line_number
-                #)
-            ])
-            return ast_internal_classes.If_Stmt_Node(
-                cond=self.cond,
-                body=body_if,
-                body_else=ast_internal_classes.Execution_Part_Node(execution=[]),
-                line_number=node.line_number
-            )
 
 class FortranIntrinsics:
 
@@ -491,14 +540,16 @@ class FortranIntrinsics:
         "SELECTED_INT_KIND": SelectedKind,
         "SELECTED_REAL_KIND": SelectedKind,
         "SUM": Sum,
-        "ANY": Any
+        "ANY": Any,
+        "ALL": All
     }
 
     IMPLEMENTATIONS_DACE = {
         "__dace_selected_int_kind": SelectedKind,
         "__dace_selected_real_kind": SelectedKind,
         "__dace_sum": Sum,
-        "__dace_any": Any
+        "__dace_any": Any,
+        "__dace_all": All
     }
 
     def __init__(self):
diff --git a/tests/fortran/intrinsic_all.py b/tests/fortran/intrinsic_all.py
new file mode 100644
index 0000000000..4a368aff2c
--- /dev/null
+++ b/tests/fortran/intrinsic_all.py
@@ -0,0 +1,361 @@
+# Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved.
+
+import numpy as np
+import pytest
+
+from dace.frontend.fortran import fortran_parser
+
+
+def test_fortran_frontend_all_array():
+    test_string = """
+                    PROGRAM intrinsic_all_test
+                    implicit none
+                    logical, dimension(5) :: d
+                    logical, dimension(2) :: res
+                    CALL intrinsic_all_test_function(d, res)
+                    end
+
+                    SUBROUTINE intrinsic_all_test_function(d, res)
+                    logical, dimension(5) :: d
+                    logical, dimension(2) :: res
+
+                    res(1) = ALL(d)
+
+                    END SUBROUTINE intrinsic_all_test_function
+                    """
+
+    sdfg = fortran_parser.create_sdfg_from_string(test_string, "intrinsic_all_test", False)
+    sdfg.simplify(verbose=True)
+    sdfg.compile()
+
+    size = 5
+    d = np.full([size], False, order="F", dtype=np.int32)
+    res = np.full([2], 42, order="F", dtype=np.int32)
+
+    d[2] = True
+    sdfg(d=d, res=res)
+    assert res[0] == False
+
+    d = np.full([size], True, order="F", dtype=np.int32)
+    sdfg(d=d, res=res)
+    assert res[0] == True
+
+
+def test_fortran_frontend_all_array_dim():
+    test_string = """
+                    PROGRAM intrinsic_all_test
+                    implicit none
+                    logical, dimension(5) :: d
+                    logical, dimension(2) :: res
+                    CALL intrinsic_all_test_function(d, res)
+                    end
+
+                    SUBROUTINE intrinsic_all_test_function(d, res)
+                    logical, dimension(5) :: d
+                    logical, dimension(2) :: res
+
+                    res(1) = ALL(d, 1)
+
+                    END SUBROUTINE intrinsic_all_test_function
+                    """
+
+    with pytest.raises(NotImplementedError):
+        fortran_parser.create_sdfg_from_string(test_string, "intrinsic_all_test", False)
+
+
+def test_fortran_frontend_all_array_comparison():
+    test_string = """
+                    PROGRAM intrinsic_all_test
+                    implicit none
+                    integer, dimension(5) :: first
+                    integer, dimension(5) :: second
+                    logical, dimension(7) :: res
+                    CALL intrinsic_all_test_function(first, second, res)
+                    end
+
+                    SUBROUTINE intrinsic_all_test_function(first, second, res)
+                    integer, dimension(5) :: first
+                    integer, dimension(5) :: second
+                    logical, dimension(7) :: res
+
+                    res(1) = ALL(first .eq. second)
+                    res(2) = ALL(first(:) .eq. second)
+                    res(3) = ALL(first .eq. second(:))
+                    res(4) = ALL(first(:) .eq. second(:))
+                    res(5) = ALL(first(1:5) .eq. second(1:5))
+                    ! This will also be true - the only same
+                    ! element is at position 3.
+                    res(6) = ALL(first(1:3) .eq. second(3:5))
+                    res(7) = ALL(first(1:2) .eq. second(4:5))
+
+                    END SUBROUTINE intrinsic_all_test_function
+                    """
+
+    sdfg = fortran_parser.create_sdfg_from_string(test_string, "intrinsic_all_test", False)
+    sdfg.simplify(verbose=True)
+    sdfg.compile()
+
+    size = 5
+    first = np.full([size], 1, order="F", dtype=np.int32)
+    second = np.full([size], 1, order="F", dtype=np.int32)
+    second[2] = 2
+    res = np.full([7], 0, order="F", dtype=np.int32)
+
+    sdfg(first=first, second=second, res=res)
+    assert list(res) == [0, 0, 0, 0, 0, 0, 1]
+
+    second = np.full([size], 2, order="F", dtype=np.int32)
+    res = np.full([7], 0, order="F", dtype=np.int32)
+    sdfg(first=first, second=second, res=res)
+    for val in res:
+        assert val == False
+
+def test_fortran_frontend_all_array_scalar_comparison():
+    test_string = """
+                    PROGRAM intrinsic_all_test
+                    implicit none
+                    integer, dimension(5) :: first
+                    logical, dimension(7) :: res
+                    CALL intrinsic_all_test_function(first, res)
+                    end
+
+                    SUBROUTINE intrinsic_all_test_function(first, res)
+                    integer, dimension(5) :: first
+                    logical, dimension(7) :: res
+
+                    res(1) = ALL(first .eq. 42)
+                    res(2) = ALL(first(:) .eq. 42)
+                    res(3) = ALL(first(1:2) .eq. 42)
+                    res(4) = ALL(first(3) .eq. 42)
+                    res(5) = ALL(first(3:5) .eq. 42)
+                    res(6) = ALL(42 .eq. first)
+                    res(7) = ALL(42 .ne. first)
+
+                    END SUBROUTINE intrinsic_all_test_function
+                    """
+
+    sdfg = fortran_parser.create_sdfg_from_string(test_string, "intrinsic_all_test", False)
+    sdfg.simplify(verbose=True)
+    sdfg.compile()
+
+    size = 5
+    first = np.full([size], 42, order="F", dtype=np.int32)
+    res = np.full([7], 0, order="F", dtype=np.int32)
+
+    sdfg(first=first, res=res)
+    for val in res[0:-1]:
+        assert val == True
+    assert res[-1] == False
+
+    first[1] = 5
+    sdfg(first=first, res=res)
+    assert list(res) == [0, 0, 0, 1, 1, 0, 0]
+
+    first[1] = 42
+    first[3] = 7
+    sdfg(first=first, res=res)
+    assert list(res) == [0, 0, 1, 1, 0, 0, 0]
+
+    first = np.full([size], 41, order="F", dtype=np.int32)
+    sdfg(first=first, res=res)
+    assert list(res) == [0, 0, 0, 0, 0, 0, 1]
+
+def test_fortran_frontend_all_array_comparison_wrong_subset():
+    test_string = """
+                    PROGRAM intrinsic_all_test
+                    implicit none
+                    logical, dimension(5) :: first
+                    logical, dimension(5) :: second
+                    logical, dimension(2) :: res
+                    CALL intrinsic_all_test_function(first, second, res)
+                    end
+
+                    SUBROUTINE intrinsic_all_test_function(first, second, res)
+                    logical, dimension(5) :: first
+                    logical, dimension(5) :: second
+                    logical, dimension(2) :: res
+
+                    res(1) = ALL(first(1:2) .eq. second(2:5))
+
+                    END SUBROUTINE intrinsic_all_test_function
+                    """
+
+    with pytest.raises(TypeError):
+        fortran_parser.create_sdfg_from_string(test_string, "intrinsic_all_test", False)
+
+def test_fortran_frontend_all_array_2d():
+    test_string = """
+                    PROGRAM intrinsic_all_test
+                    implicit none
+                    logical, dimension(5,7) :: d
+                    logical, dimension(2) :: res
+                    CALL intrinsic_all_test_function(d, res)
+                    end
+
+                    SUBROUTINE intrinsic_all_test_function(d, res)
+                    logical, dimension(5,7) :: d
+                    logical, dimension(2) :: res
+
+                    res(1) = ALL(d)
+
+                    END SUBROUTINE intrinsic_all_test_function
+                    """
+
+    sdfg = fortran_parser.create_sdfg_from_string(test_string, "intrinsic_all_test", False)
+    sdfg.simplify(verbose=True)
+    sdfg.compile()
+
+    sizes = [5, 7]
+    d = np.full(sizes, True, order="F", dtype=np.int32)
+    res = np.full([2], 42, order="F", dtype=np.int32)
+
+    d[2,2] = False
+    sdfg(d=d, res=res)
+    assert res[0] == False
+
+    d[2,2] = True
+    sdfg(d=d, res=res)
+    assert res[0] == True
+
+def test_fortran_frontend_all_array_comparison_2d():
+    test_string = """
+                    PROGRAM intrinsic_all_test
+                    implicit none
+                    integer, dimension(5,4) :: first
+                    integer, dimension(5,4) :: second
+                    logical, dimension(7) :: res
+                    CALL intrinsic_all_test_function(first, second, res)
+                    end
+
+                    SUBROUTINE intrinsic_all_test_function(first, second, res)
+                    integer, dimension(5,4) :: first
+                    integer, dimension(5,4) :: second
+                    logical, dimension(7) :: res
+
+                    res(1) = ALL(first .eq. second)
+                    res(2) = ALL(first(:,:) .eq. second)
+                    res(3) = ALL(first .eq. second(:,:))
+                    res(4) = ALL(first(:,:) .eq. second(:,:))
+                    res(5) = ALL(first(1:5,:) .eq. second(1:5,:))
+                    res(6) = ALL(first(:,1:4) .eq. second(:,1:4))
+                    ! Now test subsets.
+                    res(7) = ALL(first(2:3, 3:4) .eq. second(2:3, 3:4))
+
+                    END SUBROUTINE intrinsic_all_test_function
+                    """
+
+    sdfg = fortran_parser.create_sdfg_from_string(test_string, "intrinsic_all_test", False)
+    sdfg.simplify(verbose=True)
+    sdfg.compile()
+
+    sizes = [5, 4]
+    first = np.full(sizes, 1, order="F", dtype=np.int32)
+    second = np.full(sizes, 1, order="F", dtype=np.int32)
+    second[2,2] = 2
+    res = np.full([7], 0, order="F", dtype=np.int32)
+
+    sdfg(first=first, second=second, res=res)
+    for val in res:
+        assert val == False
+
+    second = np.full(sizes, 1, order="F", dtype=np.int32)
+    res = np.full([7], 0, order="F", dtype=np.int32)
+    sdfg(first=first, second=second, res=res)
+    for val in res:
+        assert val == True
+
+def test_fortran_frontend_all_array_comparison_2d_subset():
+    test_string = """
+                    PROGRAM intrinsic_all_test
+                    implicit none
+                    integer, dimension(5,4) :: first
+                    integer, dimension(5,4) :: second
+                    logical, dimension(2) :: res
+                    CALL intrinsic_all_test_function(first, second, res)
+                    end
+
+                    SUBROUTINE intrinsic_all_test_function(first, second, res)
+                    integer, dimension(5,4) :: first
+                    integer, dimension(5,4) :: second
+                    logical, dimension(2) :: res
+
+                    ! Now test subsets - make sure the equal values are only
+                    ! in the tested area.
+                    res(1) = ALL(first(1:2, 3:4) .ne. second(4:5, 2:3))
+                    res(2) = ALL(first(1:2, 3:4) .eq. second(4:5, 2:3))
+
+                    END SUBROUTINE intrinsic_all_test_function
+                    """
+
+    sdfg = fortran_parser.create_sdfg_from_string(test_string, "intrinsic_all_test", False)
+    sdfg.simplify(verbose=True)
+    sdfg.compile()
+
+    sizes = [5, 4]
+    first = np.full(sizes, 1, order="F", dtype=np.int32)
+    first[2:5, :] = 2
+    first[0:2, 0:2] = 2
+
+    second = np.full(sizes, 1, order="F", dtype=np.int32)
+    second[0:3, :] = 3
+    second[3:5, 0] = 3
+    second[3:5, 3:5] = 3
+
+    res = np.full([2], 0, order="F", dtype=np.int32)
+
+    sdfg(first=first, second=second, res=res)
+    assert list(res) == [0, 1]
+
+def test_fortran_frontend_all_array_comparison_2d_subset_offset():
+    test_string = """
+                    PROGRAM intrinsic_all_test
+                    implicit none
+                    integer, dimension(20:24,4) :: first
+                    integer, dimension(5,7:10) :: second
+                    logical, dimension(2) :: res
+                    CALL intrinsic_all_test_function(first, second, res)
+                    end
+
+                    SUBROUTINE intrinsic_all_test_function(first, second, res)
+                    integer, dimension(20:24,4) :: first
+                    integer, dimension(5,7:10) :: second
+                    logical, dimension(2) :: res
+
+                    ! Now test subsets - make sure the equal values are only
+                    ! in the tested area.
+                    res(1) = ALL(first(20:21, 3:4) .ne. second(4:5, 8:9))
+                    res(2) = ALL(first(20:21, 3:4) .eq. second(4:5, 8:9))
+
+                    END SUBROUTINE intrinsic_all_test_function
+                    """
+
+    sdfg = fortran_parser.create_sdfg_from_string(test_string, "intrinsic_all_test", True)
+    sdfg.simplify(verbose=True)
+    sdfg.compile()
+
+    sizes = [5, 4]
+    first = np.full(sizes, 1, order="F", dtype=np.int32)
+    first[2:5, :] = 2
+    first[0:2, 0:2] = 2
+
+    second = np.full(sizes, 1, order="F", dtype=np.int32)
+    second[0:3, :] = 3
+    second[3:5, 0] = 3
+    second[3:5, 3:5] = 3
+
+    res = np.full([2], 0, order="F", dtype=np.int32)
+
+    sdfg(first=first, second=second, res=res)
+    assert list(res) == [0, 1]
+
+if __name__ == "__main__":
+
+    test_fortran_frontend_all_array()
+    test_fortran_frontend_all_array_dim()
+    test_fortran_frontend_all_array_comparison()
+    test_fortran_frontend_all_array_scalar_comparison()
+    test_fortran_frontend_all_array_comparison_wrong_subset()
+    test_fortran_frontend_all_array_2d()
+    test_fortran_frontend_all_array_comparison_2d()
+    test_fortran_frontend_all_array_comparison_2d_subset()
+    test_fortran_frontend_all_array_comparison_2d_subset_offset()

From 827bd1f5a7fb8c10eebf1777658b5e9f6f807fd6 Mon Sep 17 00:00:00 2001
From: Marcin Copik <mcopik@gmail.com>
Date: Sat, 14 Oct 2023 01:40:36 +0200
Subject: [PATCH 093/163] Implement Fortran COUNT intrinsic

---
 dace/frontend/fortran/intrinsics.py | 120 ++++++---
 tests/fortran/intrinsic_count.py    | 369 ++++++++++++++++++++++++++++
 2 files changed, 455 insertions(+), 34 deletions(-)
 create mode 100644 tests/fortran/intrinsic_count.py

diff --git a/dace/frontend/fortran/intrinsics.py b/dace/frontend/fortran/intrinsics.py
index d481097968..e0301859d3 100644
--- a/dace/frontend/fortran/intrinsics.py
+++ b/dace/frontend/fortran/intrinsics.py
@@ -61,7 +61,8 @@ def replaced_name(func_name: str) -> str:
         replacements = {
             "SUM": "__dace_sum",
             "ANY": "__dace_any",
-            "ALL": "__dace_all"
+            "ALL": "__dace_all",
+            "COUNT": "__dace_count"
         }
         return replacements[func_name]
 
@@ -70,7 +71,8 @@ def replace(func_name: ast_internal_classes.Name_Node, args: ast_internal_classe
         func_types = {
             "__dace_sum": "DOUBLE",
             "__dace_any": "INTEGER",
-            "__dace_all": "INTEGER"
+            "__dace_all": "INTEGER",
+            "__dace_count": "INTEGER"
         }
         # FIXME: Any requires sometimes returning an array of booleans
         call_type = func_types[func_name.name]
@@ -280,7 +282,7 @@ def _generate_loop_body(self, node: ast_internal_classes.FNode) -> ast_internal_
                 line_number=node.line_number
             )
 
-class AnyAllTransformation(LoopBasedReplacementTransformation):
+class AnyAllCountTransformation(LoopBasedReplacementTransformation):
 
     def __init__(self, ast):
         super().__init__(ast)
@@ -429,11 +431,7 @@ def _summarize_args(self, node: ast_internal_classes.FNode, new_func_body: List[
 
     def _initialize_result(self, node: ast_internal_classes.FNode) -> ast_internal_classes.BinOp_Node:
 
-        init_value = None
-        if 'any' in self.func_name():
-            init_value = "0"
-        else:
-            init_value = "1"
+        init_value = self._result_init_value()
 
         return ast_internal_classes.BinOp_Node(
             lval=node.lval,
@@ -443,25 +441,14 @@ def _initialize_result(self, node: ast_internal_classes.FNode) -> ast_internal_c
         )
 
     def _generate_loop_body(self, node: ast_internal_classes.FNode) -> ast_internal_classes.BinOp_Node:
-        
+
         """
         For any, we check if the condition is true and then set the value to true
         For all, we check if the condition is NOT true and then set the value to false
         """
 
-        assign_value = None
-        if 'any' in self.func_name():
-            assign_value = "1"
-        else:
-            assign_value = "0"
-
         body_if = ast_internal_classes.Execution_Part_Node(execution=[
-            ast_internal_classes.BinOp_Node(
-                lval=copy.deepcopy(node.lval),
-                op="=",
-                rval=ast_internal_classes.Int_Literal_Node(value=assign_value),
-                line_number=node.line_number
-            ),
+            self._result_loop_update(node),
             # TODO: we should make the `break` generation conditional based on the architecture
             # For parallel maps, we should have no breaks
             # For sequential loop, we want a break to be faster
@@ -470,17 +457,8 @@ def _generate_loop_body(self, node: ast_internal_classes.FNode) -> ast_internal_
             #)
         ])
 
-        condition = None
-        if 'any' in self.func_name():
-            condition = self.cond
-        else:
-            condition = ast_internal_classes.UnOp_Node(
-                op="not",
-                lval=self.cond
-            )
-
         return ast_internal_classes.If_Stmt_Node(
-            cond=condition,
+            cond=self._loop_condition(),
             body=body_if,
             body_else=ast_internal_classes.Execution_Part_Node(execution=[]),
             line_number=node.line_number
@@ -509,11 +487,26 @@ class Any(LoopBasedReplacement):
         For (2), we reuse the provided binary operation.
         When the condition is true, we set the value to true and exit.
     """
-    class Transformation(AnyAllTransformation):
+    class Transformation(AnyAllCountTransformation):
 
         def __init__(self, ast):
             super().__init__(ast)
 
+        def _result_init_value(self):
+            return "0"
+
+        def _result_loop_update(self, node: ast_internal_classes.FNode):
+
+            return ast_internal_classes.BinOp_Node(
+                lval=copy.deepcopy(node.lval),
+                op="=",
+                rval=ast_internal_classes.Int_Literal_Node(value="1"),
+                line_number=node.line_number
+            )
+
+        def _loop_condition(self):
+            return self.cond
+
         def func_name(self) -> str:
             return "__dace_any"
 
@@ -525,14 +518,71 @@ class All(LoopBasedReplacement):
         The main difference is that we initialize the partial result to 1,
         and set it to 0 if any of the evaluated conditions is false.
     """
-    class Transformation(AnyAllTransformation):
+    class Transformation(AnyAllCountTransformation):
 
         def __init__(self, ast):
             super().__init__(ast)
 
+        def _result_init_value(self):
+            return "1"
+
+        def _result_loop_update(self, node: ast_internal_classes.FNode):
+
+            return ast_internal_classes.BinOp_Node(
+                lval=copy.deepcopy(node.lval),
+                op="=",
+                rval=ast_internal_classes.Int_Literal_Node(value="0"),
+                line_number=node.line_number
+            )
+
+        def _loop_condition(self):
+            return ast_internal_classes.UnOp_Node(
+                op="not",
+                lval=self.cond
+            )
+
         def func_name(self) -> str:
             return "__dace_all"
 
+class Count(LoopBasedReplacement):
+
+    """
+        In this class, we implement the transformation for Fortran intrinsic COUNT.
+        The implementation is very similar to ANY and ALL.
+        The main difference is that we initialize the partial result to 0
+        and increment it if any of the evaluated conditions is true.
+
+        We do not support the KIND argument.
+    """
+    class Transformation(AnyAllCountTransformation):
+
+        def __init__(self, ast):
+            super().__init__(ast)
+
+        def _result_init_value(self):
+            return "0"
+
+        def _result_loop_update(self, node: ast_internal_classes.FNode):
+
+            update = ast_internal_classes.BinOp_Node(
+                lval=copy.deepcopy(node.lval),
+                op="+",
+                rval=ast_internal_classes.Int_Literal_Node(value="1"),
+                line_number=node.line_number
+            )
+            return ast_internal_classes.BinOp_Node(
+                lval=copy.deepcopy(node.lval),
+                op="=",
+                rval=update,
+                line_number=node.line_number
+            )
+
+        def _loop_condition(self):
+            return self.cond
+
+        def func_name(self) -> str:
+            return "__dace_count"
+
 
 class FortranIntrinsics:
 
@@ -541,6 +591,7 @@ class FortranIntrinsics:
         "SELECTED_REAL_KIND": SelectedKind,
         "SUM": Sum,
         "ANY": Any,
+        "COUNT": Count,
         "ALL": All
     }
 
@@ -549,7 +600,8 @@ class FortranIntrinsics:
         "__dace_selected_real_kind": SelectedKind,
         "__dace_sum": Sum,
         "__dace_any": Any,
-        "__dace_all": All
+        "__dace_all": All,
+        "__dace_count": Count
     }
 
     def __init__(self):
diff --git a/tests/fortran/intrinsic_count.py b/tests/fortran/intrinsic_count.py
new file mode 100644
index 0000000000..5e6666513c
--- /dev/null
+++ b/tests/fortran/intrinsic_count.py
@@ -0,0 +1,369 @@
+# Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved.
+
+import numpy as np
+import pytest
+
+from dace.frontend.fortran import fortran_parser
+
+
+def test_fortran_frontend_count_array():
+    test_string = """
+                    PROGRAM intrinsic_count_test
+                    implicit none
+                    logical, dimension(5) :: d
+                    integer, dimension(2) :: res
+                    CALL intrinsic_count_test_function(d, res)
+                    end
+
+                    SUBROUTINE intrinsic_count_test_function(d, res)
+                    logical, dimension(5) :: d
+                    integer, dimension(2) :: res
+
+                    res(1) = COUNT(d)
+
+                    END SUBROUTINE intrinsic_count_test_function
+                    """
+
+    sdfg = fortran_parser.create_sdfg_from_string(test_string, "intrinsic_count_test", False)
+    sdfg.simplify(verbose=True)
+    sdfg.compile()
+
+    size = 5
+    d = np.full([size], False, order="F", dtype=np.int32)
+    res = np.full([2], 42, order="F", dtype=np.int32)
+
+    d[2] = True
+    sdfg(d=d, res=res)
+    assert res[0] == 1
+
+    d[2] = False
+    sdfg(d=d, res=res)
+    assert res[0] == 0
+
+
+def test_fortran_frontend_count_array_dim():
+    test_string = """
+                    PROGRAM intrinsic_count_test
+                    implicit none
+                    logical, dimension(5) :: d
+                    logical, dimension(2) :: res
+                    CALL intrinsic_count_test_function(d, res)
+                    end
+
+                    SUBROUTINE intrinsic_count_test_function(d, res)
+                    logical, dimension(5) :: d
+                    logical, dimension(2) :: res
+
+                    res(1) = COUNT(d, 1)
+
+                    END SUBROUTINE intrinsic_count_test_function
+                    """
+
+    with pytest.raises(NotImplementedError):
+        fortran_parser.create_sdfg_from_string(test_string, "intrinsic_count_test", False)
+
+
+def test_fortran_frontend_count_array_comparison():
+    test_string = """
+                    PROGRAM intrinsic_count_test
+                    implicit none
+                    integer, dimension(5) :: first
+                    integer, dimension(5) :: second
+                    logical, dimension(7) :: res
+                    CALL intrinsic_count_test_function(first, second, res)
+                    end
+
+                    SUBROUTINE intrinsic_count_test_function(first, second, res)
+                    integer, dimension(5) :: first
+                    integer, dimension(5) :: second
+                    logical, dimension(7) :: res
+
+                    res(1) = COUNT(first .eq. second)
+                    res(2) = COUNT(first(:) .eq. second)
+                    res(3) = COUNT(first .eq. second(:))
+                    res(4) = COUNT(first(:) .eq. second(:))
+                    res(5) = COUNT(first(1:5) .eq. second(1:5))
+                    ! This will also be true - the only same
+                    ! element is at position 3.
+                    res(6) = COUNT(first(1:3) .eq. second(3:5))
+                    res(7) = COUNT(first(1:2) .eq. second(4:5))
+
+                    END SUBROUTINE intrinsic_count_test_function
+                    """
+
+    sdfg = fortran_parser.create_sdfg_from_string(test_string, "intrinsic_count_test", False)
+    sdfg.simplify(verbose=True)
+    sdfg.compile()
+
+    size = 5
+    first = np.full([size], 1, order="F", dtype=np.int32)
+    second = np.full([size], 1, order="F", dtype=np.int32)
+    second[2] = 2
+    res = np.full([7], 0, order="F", dtype=np.int32)
+
+    sdfg(first=first, second=second, res=res)
+    assert list(res) == [4, 4, 4, 4, 4, 2, 2]
+
+    second = np.full([size], 2, order="F", dtype=np.int32)
+    res = np.full([7], 0, order="F", dtype=np.int32)
+    sdfg(first=first, second=second, res=res)
+    for val in res:
+        assert val == 0
+
+    second = np.full([size], 1, order="F", dtype=np.int32)
+    res = np.full([7], 0, order="F", dtype=np.int32)
+    sdfg(first=first, second=second, res=res)
+    assert list(res) == [5, 5, 5, 5, 5, 3, 2]
+
+def test_fortran_frontend_count_array_scalar_comparison():
+    test_string = """
+                    PROGRAM intrinsic_count_test
+                    implicit none
+                    integer, dimension(5) :: first
+                    logical, dimension(7) :: res
+                    CALL intrinsic_count_test_function(first, res)
+                    end
+
+                    SUBROUTINE intrinsic_count_test_function(first, res)
+                    integer, dimension(5) :: first
+                    logical, dimension(7) :: res
+
+                    res(1) = COUNT(first .eq. 42)
+                    res(2) = COUNT(first(:) .eq. 42)
+                    res(3) = COUNT(first(1:2) .eq. 42)
+                    res(4) = COUNT(first(3) .eq. 42)
+                    res(5) = COUNT(first(3:5) .eq. 42)
+                    res(6) = COUNT(42 .eq. first)
+                    res(7) = COUNT(42 .ne. first)
+
+                    END SUBROUTINE intrinsic_count_test_function
+                    """
+
+    sdfg = fortran_parser.create_sdfg_from_string(test_string, "intrinsic_count_test", False)
+    sdfg.simplify(verbose=True)
+    sdfg.compile()
+
+    size = 5
+    first = np.full([size], 1, order="F", dtype=np.int32)
+    res = np.full([7], 0, order="F", dtype=np.int32)
+
+    sdfg(first=first, res=res)
+    assert list(res) == [0, 0, 0, 0, 0, 0, 5]
+
+    first[1] = 42
+    sdfg(first=first, res=res)
+    assert list(res) == [1, 1, 1, 0, 0, 1, 4]
+
+    first[1] = 5
+    first[2] = 42
+    sdfg(first=first, res=res)
+    assert list(res) == [1, 1, 0, 1, 1, 1, 4]
+
+    first[2] = 7
+    first[3] = 42
+    sdfg(first=first, res=res)
+    assert list(res) == [1, 1, 0, 0, 1, 1, 4]
+
+def test_fortran_frontend_count_array_comparison_wrong_subset():
+    test_string = """
+                    PROGRAM intrinsic_count_test
+                    implicit none
+                    logical, dimension(5) :: first
+                    logical, dimension(5) :: second
+                    logical, dimension(2) :: res
+                    CALL intrinsic_count_test_function(first, second, res)
+                    end
+
+                    SUBROUTINE intrinsic_count_test_function(first, second, res)
+                    logical, dimension(5) :: first
+                    logical, dimension(5) :: second
+                    logical, dimension(2) :: res
+
+                    res(1) = COUNT(first(1:2) .eq. second(2:5))
+
+                    END SUBROUTINE intrinsic_count_test_function
+                    """
+
+    with pytest.raises(TypeError):
+        fortran_parser.create_sdfg_from_string(test_string, "intrinsic_count_test", False)
+
+def test_fortran_frontend_count_array_2d():
+    test_string = """
+                    PROGRAM intrinsic_count_test
+                    implicit none
+                    logical, dimension(5,7) :: d
+                    logical, dimension(2) :: res
+                    CALL intrinsic_count_test_function(d, res)
+                    end
+
+                    SUBROUTINE intrinsic_count_test_function(d, res)
+                    logical, dimension(5,7) :: d
+                    logical, dimension(2) :: res
+
+                    res(1) = COUNT(d)
+
+                    END SUBROUTINE intrinsic_count_test_function
+                    """
+
+    sdfg = fortran_parser.create_sdfg_from_string(test_string, "intrinsic_count_test", False)
+    sdfg.simplify(verbose=True)
+    sdfg.compile()
+
+    sizes = [5, 7]
+    d = np.full(sizes, True, order="F", dtype=np.int32)
+    res = np.full([2], 42, order="F", dtype=np.int32)
+    sdfg(d=d, res=res)
+    assert res[0] == 35
+
+    d[2,2] = False
+    sdfg(d=d, res=res)
+    assert res[0] == 34
+
+    d = np.full(sizes, False, order="F", dtype=np.int32)
+    sdfg(d=d, res=res)
+    assert res[0] == 0
+
+    d[2,2] = True
+    sdfg(d=d, res=res)
+    assert res[0] == 1
+
+def test_fortran_frontend_count_array_comparison_2d():
+    test_string = """
+                    PROGRAM intrinsic_count_test
+                    implicit none
+                    integer, dimension(5,4) :: first
+                    integer, dimension(5,4) :: second
+                    logical, dimension(7) :: res
+                    CALL intrinsic_count_test_function(first, second, res)
+                    end
+
+                    SUBROUTINE intrinsic_count_test_function(first, second, res)
+                    integer, dimension(5,4) :: first
+                    integer, dimension(5,4) :: second
+                    logical, dimension(7) :: res
+
+                    res(1) = COUNT(first .eq. second)
+                    res(2) = COUNT(first(:,:) .eq. second)
+                    res(3) = COUNT(first .eq. second(:,:))
+                    res(4) = COUNT(first(:,:) .eq. second(:,:))
+                    res(5) = COUNT(first(1:5,:) .eq. second(1:5,:))
+                    res(6) = COUNT(first(:,1:4) .eq. second(:,1:4))
+                    ! Now test subsets.
+                    res(7) = COUNT(first(2:3, 3:4) .eq. second(2:3, 3:4))
+
+                    END SUBROUTINE intrinsic_count_test_function
+                    """
+
+    sdfg = fortran_parser.create_sdfg_from_string(test_string, "intrinsic_count_test", False)
+    sdfg.simplify(verbose=True)
+    sdfg.compile()
+
+    sizes = [5, 4]
+    first = np.full(sizes, 1, order="F", dtype=np.int32)
+    second = np.full(sizes, 2, order="F", dtype=np.int32)
+    second[1, 1] = 1
+    res = np.full([7], 0, order="F", dtype=np.int32)
+
+    sdfg(first=first, second=second, res=res)
+    assert list(res) == [1, 1, 1, 1, 1, 1, 0]
+
+    second = np.full(sizes, 1, order="F", dtype=np.int32)
+    res = np.full([7], 0, order="F", dtype=np.int32)
+    sdfg(first=first, second=second, res=res)
+    assert list(res) == [20, 20, 20, 20, 20, 20, 4]
+
+def test_fortran_frontend_count_array_comparison_2d_subset():
+    test_string = """
+                    PROGRAM intrinsic_count_test
+                    implicit none
+                    integer, dimension(5,4) :: first
+                    integer, dimension(5,4) :: second
+                    logical, dimension(2) :: res
+                    CALL intrinsic_count_test_function(first, second, res)
+                    end
+
+                    SUBROUTINE intrinsic_count_test_function(first, second, res)
+                    integer, dimension(5,4) :: first
+                    integer, dimension(5,4) :: second
+                    logical, dimension(2) :: res
+
+                    ! Now test subsets - make sure the equal values are only
+                    ! in the tested area.
+                    res(1) = COUNT(first(1:2, 3:4) .ne. second(4:5, 2:3))
+                    res(2) = COUNT(first(1:2, 3:4) .eq. second(4:5, 2:3))
+
+                    END SUBROUTINE intrinsic_count_test_function
+                    """
+
+    sdfg = fortran_parser.create_sdfg_from_string(test_string, "intrinsic_count_test", False)
+    sdfg.simplify(verbose=True)
+    sdfg.compile()
+
+    sizes = [5, 4]
+    first = np.full(sizes, 1, order="F", dtype=np.int32)
+    first[2:5, :] = 2
+    first[0:2, 0:2] = 2
+
+    second = np.full(sizes, 1, order="F", dtype=np.int32)
+    second[0:3, :] = 2
+    second[3:5, 0] = 2
+    second[3:5, 3:5] = 2
+
+    res = np.full([2], 0, order="F", dtype=np.int32)
+
+    sdfg(first=first, second=second, res=res)
+    assert list(res) == [0, 4]
+
+def test_fortran_frontend_count_array_comparison_2d_subset_offset():
+    test_string = """
+                    PROGRAM intrinsic_count_test
+                    implicit none
+                    integer, dimension(20:24,4) :: first
+                    integer, dimension(5,7:10) :: second
+                    logical, dimension(2) :: res
+                    CALL intrinsic_count_test_function(first, second, res)
+                    end
+
+                    SUBROUTINE intrinsic_count_test_function(first, second, res)
+                    integer, dimension(20:24,4) :: first
+                    integer, dimension(5,7:10) :: second
+                    logical, dimension(2) :: res
+
+                    ! Now test subsets - make sure the equal values are only
+                    ! in the tested area.
+                    res(1) = COUNT(first(20:21, 3:4) .ne. second(4:5, 8:9))
+                    res(2) = COUNT(first(20:21, 3:4) .eq. second(4:5, 8:9))
+
+                    END SUBROUTINE intrinsic_count_test_function
+                    """
+
+    sdfg = fortran_parser.create_sdfg_from_string(test_string, "intrinsic_count_test", True)
+    sdfg.simplify(verbose=True)
+    sdfg.compile()
+
+    sizes = [5, 4]
+    first = np.full(sizes, 1, order="F", dtype=np.int32)
+    first[2:5, :] = 2
+    first[0:2, 0:2] = 2
+
+    second = np.full(sizes, 1, order="F", dtype=np.int32)
+    second[0:3, :] = 2
+    second[3:5, 0] = 2
+    second[3:5, 3:5] = 2
+
+    res = np.full([2], 0, order="F", dtype=np.int32)
+
+    sdfg(first=first, second=second, res=res)
+    assert list(res) == [0, 4]
+
+if __name__ == "__main__":
+
+    test_fortran_frontend_count_array()
+    test_fortran_frontend_count_array_dim()
+    test_fortran_frontend_count_array_comparison()
+    test_fortran_frontend_count_array_scalar_comparison()
+    test_fortran_frontend_count_array_comparison_wrong_subset()
+    test_fortran_frontend_count_array_2d()
+    test_fortran_frontend_count_array_comparison_2d()
+    test_fortran_frontend_count_array_comparison_2d_subset()
+    test_fortran_frontend_count_array_comparison_2d_subset_offset()

From 07553851cb10d339fc2752c99d62c1afb5d6ae29 Mon Sep 17 00:00:00 2001
From: BenWeber42 <dev.ben.weber@gmail.com>
Date: Mon, 16 Oct 2023 19:31:42 +0200
Subject: [PATCH 094/163] Bumb version to 0.15

---
 dace/version.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dace/version.py b/dace/version.py
index 9b67b07d2f..a3e6290df8 100644
--- a/dace/version.py
+++ b/dace/version.py
@@ -1 +1 @@
-__version__ = '0.14.4'
+__version__ = '0.15'

From 8fd8ba242ac0dd9bf8b49bf0893ca3eeecd66b14 Mon Sep 17 00:00:00 2001
From: Marcin Copik <mcopik@gmail.com>
Date: Wed, 18 Oct 2023 23:02:42 +0200
Subject: [PATCH 095/163] Implement Fortran intrinsic PRODUCT

---
 dace/frontend/fortran/intrinsics.py | 140 ++++++++++++++++++----------
 tests/fortran/intrinsic_product.py  | 118 +++++++++++++++++++++++
 2 files changed, 210 insertions(+), 48 deletions(-)
 create mode 100644 tests/fortran/intrinsic_product.py

diff --git a/dace/frontend/fortran/intrinsics.py b/dace/frontend/fortran/intrinsics.py
index e0301859d3..ad990cfcba 100644
--- a/dace/frontend/fortran/intrinsics.py
+++ b/dace/frontend/fortran/intrinsics.py
@@ -60,6 +60,7 @@ class LoopBasedReplacement:
     def replaced_name(func_name: str) -> str:
         replacements = {
             "SUM": "__dace_sum",
+            "PRODUCT": "__dace_product",
             "ANY": "__dace_any",
             "ALL": "__dace_all",
             "COUNT": "__dace_count"
@@ -70,6 +71,7 @@ def replaced_name(func_name: str) -> str:
     def replace(func_name: ast_internal_classes.Name_Node, args: ast_internal_classes.Arg_List_Node, line) -> ast_internal_classes.FNode:
         func_types = {
             "__dace_sum": "DOUBLE",
+            "__dace_product": "DOUBLE",
             "__dace_any": "INTEGER",
             "__dace_all": "INTEGER",
             "__dace_count": "INTEGER"
@@ -206,6 +208,72 @@ def visit_Execution_Part_Node(self, node: ast_internal_classes.Execution_Part_No
             self.count = self.count + range_index
         return ast_internal_classes.Execution_Part_Node(execution=newbody)
 
+class SumProduct(LoopBasedReplacementTransformation):
+
+    def __init__(self, ast):
+        super().__init__(ast)
+
+    def _initialize(self):
+        self.rvals = []
+        self.argument_variable = None
+
+    def _parse_call_expr_node(self, node: ast_internal_classes.Call_Expr_Node):
+
+        for arg in node.args:
+
+            # supports syntax SUM(arr)
+            if isinstance(arg, ast_internal_classes.Name_Node):
+                array_node = ast_internal_classes.Array_Subscript_Node(parent=arg.parent)
+                array_node.name = arg
+
+                # If we access SUM(arr) where arr has many dimensions,
+                # We need to create a ParDecl_Node for each dimension
+                dims = len(self.scope_vars.get_var(node.parent, arg.name).sizes)
+                array_node.indices = [ast_internal_classes.ParDecl_Node(type='ALL')] * dims
+
+                self.rvals.append(array_node)
+
+            # supports syntax SUM(arr(:))
+            elif isinstance(arg, ast_internal_classes.Array_Subscript_Node):
+                self.rvals.append(arg)
+
+            else:
+                raise NotImplementedError("We do not support non-array arguments for SUM/PRODUCT")
+
+
+    def _summarize_args(self, node: ast_internal_classes.FNode, new_func_body: List[ast_internal_classes.FNode]):
+
+        if len(self.rvals) != 1:
+            raise NotImplementedError("Only one array can be summed")
+
+        self.argument_variable = self.rvals[0]
+
+        par_Decl_Range_Finder(self.argument_variable, self.loop_ranges, [], [], self.count, new_func_body, self.scope_vars, True)
+
+    def _initialize_result(self, node: ast_internal_classes.FNode) -> ast_internal_classes.BinOp_Node:
+
+        return ast_internal_classes.BinOp_Node(
+            lval=node.lval,
+            op="=",
+            rval=ast_internal_classes.Int_Literal_Node(value=self._result_init_value()),
+            line_number=node.line_number
+        )
+
+    def _generate_loop_body(self, node: ast_internal_classes.FNode) -> ast_internal_classes.BinOp_Node:
+
+        return ast_internal_classes.BinOp_Node(
+            lval=node.lval,
+            op="=",
+            rval=ast_internal_classes.BinOp_Node(
+                lval=node.lval,
+                op=self._result_update_op(),
+                rval=self.argument_variable,
+                line_number=node.line_number
+            ),
+            line_number=node.line_number
+        )
+
+
 class Sum(LoopBasedReplacement):
 
     """
@@ -217,7 +285,7 @@ class Sum(LoopBasedReplacement):
         Then, we generate a binary node accumulating the result.
     """
 
-    class Transformation(LoopBasedReplacementTransformation):
+    class Transformation(SumProduct):
 
         def __init__(self, ast):
             super().__init__(ast)
@@ -225,62 +293,36 @@ def __init__(self, ast):
         def func_name(self) -> str:
             return "__dace_sum"
 
-        def _initialize(self):
-            self.rvals = []
-            self.argument_variable = None
-
-        def _parse_call_expr_node(self, node: ast_internal_classes.Call_Expr_Node):
-
-            for arg in node.args:
-
-                # supports syntax SUM(arr)
-                if isinstance(arg, ast_internal_classes.Name_Node):
-                    array_node = ast_internal_classes.Array_Subscript_Node(parent=arg.parent)
-                    array_node.name = arg
-
-                    # If we access SUM(arr) where arr has many dimensions,
-                    # We need to create a ParDecl_Node for each dimension
-                    dims = len(self.scope_vars.get_var(node.parent, arg.name).sizes)
-                    array_node.indices = [ast_internal_classes.ParDecl_Node(type='ALL')] * dims
-
-                    self.rvals.append(array_node)
-
-                # supports syntax SUM(arr(:))
-                if isinstance(arg, ast_internal_classes.Array_Subscript_Node):
-                    self.rvals.append(arg)
+        def _result_init_value(self):
+            return "0"
 
+        def _result_update_op(self):
+            return "+"
 
-        def _summarize_args(self, node: ast_internal_classes.FNode, new_func_body: List[ast_internal_classes.FNode]):
+class Product(LoopBasedReplacement):
 
-            if len(self.rvals) != 1:
-                raise NotImplementedError("Only one array can be summed")
+    """
+        In this class, we implement the transformation for Fortran intrinsic PRODUCT(:)
+        We support two ways of invoking the function - by providing array name and array subscript.
+        We do NOT support the *DIM* and *MASK* arguments.
 
-            self.argument_variable = self.rvals[0]
+        During the loop construction, we add a single variable storing the partial result.
+        Then, we generate a binary node accumulating the result.
+    """
 
-            par_Decl_Range_Finder(self.argument_variable, self.loop_ranges, [], [], self.count, new_func_body, self.scope_vars, True)
+    class Transformation(SumProduct):
 
-        def _initialize_result(self, node: ast_internal_classes.FNode) -> ast_internal_classes.BinOp_Node:
+        def __init__(self, ast):
+            super().__init__(ast)
 
-            return ast_internal_classes.BinOp_Node(
-                lval=node.lval,
-                op="=",
-                rval=ast_internal_classes.Int_Literal_Node(value="0"),
-                line_number=node.line_number
-            )
+        def func_name(self) -> str:
+            return "__dace_product"
 
-        def _generate_loop_body(self, node: ast_internal_classes.FNode) -> ast_internal_classes.BinOp_Node:
+        def _result_init_value(self):
+            return "1"
 
-            return ast_internal_classes.BinOp_Node(
-                lval=node.lval,
-                op="=",
-                rval=ast_internal_classes.BinOp_Node(
-                    lval=node.lval,
-                    op="+",
-                    rval=self.argument_variable,
-                    line_number=node.line_number
-                ),
-                line_number=node.line_number
-            )
+        def _result_update_op(self):
+            return "*"
 
 class AnyAllCountTransformation(LoopBasedReplacementTransformation):
 
@@ -590,6 +632,7 @@ class FortranIntrinsics:
         "SELECTED_INT_KIND": SelectedKind,
         "SELECTED_REAL_KIND": SelectedKind,
         "SUM": Sum,
+        "PRODUCT": Product,
         "ANY": Any,
         "COUNT": Count,
         "ALL": All
@@ -599,6 +642,7 @@ class FortranIntrinsics:
         "__dace_selected_int_kind": SelectedKind,
         "__dace_selected_real_kind": SelectedKind,
         "__dace_sum": Sum,
+        "__dace_product": Product,
         "__dace_any": Any,
         "__dace_all": All,
         "__dace_count": Count
diff --git a/tests/fortran/intrinsic_product.py b/tests/fortran/intrinsic_product.py
new file mode 100644
index 0000000000..06d14e0a34
--- /dev/null
+++ b/tests/fortran/intrinsic_product.py
@@ -0,0 +1,118 @@
+# Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved.
+
+import numpy as np
+import pytest
+
+from dace.frontend.fortran import ast_transforms, fortran_parser
+
+def test_fortran_frontend_product_array():
+    """
+    Tests that the generated array map correctly handles offsets.
+    """
+    test_string = """
+                    PROGRAM index_offset_test
+                    implicit none
+                    double precision, dimension(7) :: d
+                    double precision, dimension(3) :: res
+                    CALL index_test_function(d, res)
+                    end
+
+                    SUBROUTINE index_test_function(d, res)
+                    double precision, dimension(7) :: d
+                    double precision, dimension(3) :: res
+
+                    res(1) = PRODUCT(d)
+                    res(2) = PRODUCT(d(:))
+                    res(3) = PRODUCT(d(2:5))
+
+                    END SUBROUTINE index_test_function
+                    """
+
+    # Now test to verify it executes correctly with no offset normalization
+
+    sdfg = fortran_parser.create_sdfg_from_string(test_string, "index_offset_test", False)
+    sdfg.simplify(verbose=True)
+    sdfg.compile()
+
+    size = 7
+    d = np.full([size], 0, order="F", dtype=np.float64)
+    for i in range(size):
+        d[i] = i + 1
+    res = np.full([3], 42, order="F", dtype=np.float64)
+    sdfg(d=d, res=res)
+    print(d)
+    print(res)
+    assert res[0] == np.prod(d)
+    assert res[1] == np.prod(d)
+    assert res[2] == np.prod(d[1:5])
+
+def test_fortran_frontend_product_array_dim():
+    test_string = """
+                    PROGRAM intrinsic_count_test
+                    implicit none
+                    logical, dimension(5) :: d
+                    logical, dimension(2) :: res
+                    CALL intrinsic_count_test_function(d, res)
+                    end
+
+                    SUBROUTINE intrinsic_count_test_function(d, res)
+                    logical, dimension(5) :: d
+                    logical, dimension(2) :: res
+
+                    res(1) = PRODUCT(d, 1)
+
+                    END SUBROUTINE intrinsic_count_test_function
+                    """
+
+    with pytest.raises(NotImplementedError):
+        fortran_parser.create_sdfg_from_string(test_string, "intrinsic_count_test", False)
+
+def test_fortran_frontend_product_2d():
+    """
+    Tests that the generated array map correctly handles offsets.
+    """
+    test_string = """
+                    PROGRAM index_offset_test
+                    implicit none
+                    double precision, dimension(5,3) :: d
+                    double precision, dimension(4) :: res
+                    CALL index_test_function(d,res)
+                    end
+
+                    SUBROUTINE index_test_function(d, res)
+                    double precision, dimension(5,3) :: d
+                    double precision, dimension(4) :: res
+
+                    res(1) = PRODUCT(d)
+                    res(2) = PRODUCT(d(:,:))
+                    res(3) = PRODUCT(d(2:4, 2))
+                    res(4) = PRODUCT(d(2:4, 2:3))
+
+                    END SUBROUTINE index_test_function
+                    """
+
+    # Now test to verify it executes correctly with no offset normalization
+
+    sdfg = fortran_parser.create_sdfg_from_string(test_string, "index_offset_test", True)
+    sdfg.simplify(verbose=True)
+    sdfg.compile()
+
+    sizes = [5, 3]
+    d = np.full(sizes, 42, order="F", dtype=np.float64)
+    cnt = 1
+    for i in range(sizes[0]):
+        for j in range(sizes[1]):
+            d[i, j] = cnt
+            cnt += 1
+    res = np.full([4], 42, order="F", dtype=np.float64)
+    sdfg(d=d, res=res)
+    assert res[0] == np.prod(d)
+    assert res[1] == np.prod(d)
+    assert res[2] == np.prod(d[1:4, 1])
+    assert res[3] == np.prod(d[1:4, 1:3])
+
+if __name__ == "__main__":
+
+    test_fortran_frontend_product_array()
+    test_fortran_frontend_product_array_dim()
+    test_fortran_frontend_product_2d()

From fece069fd3d7e07297c7c4e8a3e95777c5b13d68 Mon Sep 17 00:00:00 2001
From: Marcin Copik <mcopik@gmail.com>
Date: Wed, 18 Oct 2023 23:16:52 +0200
Subject: [PATCH 096/163] Make the main array parsing function available to all
 loop-based intrinsics

---
 dace/frontend/fortran/intrinsics.py | 52 +++++++++++------------------
 1 file changed, 20 insertions(+), 32 deletions(-)

diff --git a/dace/frontend/fortran/intrinsics.py b/dace/frontend/fortran/intrinsics.py
index ad990cfcba..d7c6fcc7b3 100644
--- a/dace/frontend/fortran/intrinsics.py
+++ b/dace/frontend/fortran/intrinsics.py
@@ -140,6 +140,24 @@ def _initialize_result(self, node: ast_internal_classes.FNode) -> ast_internal_c
     def _generate_loop_body(self, node: ast_internal_classes.FNode) -> ast_internal_classes.BinOp_Node:
         pass
 
+    def _parse_array(self, node: ast_internal_classes.Execution_Part_Node, arg: ast_internal_classes.FNode) -> ast_internal_classes.Array_Subscript_Node:
+
+        # supports syntax func(arr)
+        if isinstance(arg, ast_internal_classes.Name_Node):
+            array_node = ast_internal_classes.Array_Subscript_Node(parent=arg.parent)
+            array_node.name = arg
+
+            # If we access SUM(arr) where arr has many dimensions,
+            # We need to create a ParDecl_Node for each dimension
+            dims = len(self.scope_vars.get_var(node.parent, arg.name).sizes)
+            array_node.indices = [ast_internal_classes.ParDecl_Node(type='ALL')] * dims
+
+            return array_node
+
+        # supports syntax func(arr(:))
+        if isinstance(arg, ast_internal_classes.Array_Subscript_Node):
+            return arg
+
     def visit_Execution_Part_Node(self, node: ast_internal_classes.Execution_Part_Node):
 
         newbody = []
@@ -221,22 +239,10 @@ def _parse_call_expr_node(self, node: ast_internal_classes.Call_Expr_Node):
 
         for arg in node.args:
 
-            # supports syntax SUM(arr)
-            if isinstance(arg, ast_internal_classes.Name_Node):
-                array_node = ast_internal_classes.Array_Subscript_Node(parent=arg.parent)
-                array_node.name = arg
-
-                # If we access SUM(arr) where arr has many dimensions,
-                # We need to create a ParDecl_Node for each dimension
-                dims = len(self.scope_vars.get_var(node.parent, arg.name).sizes)
-                array_node.indices = [ast_internal_classes.ParDecl_Node(type='ALL')] * dims
+            array_node = self._parse_array(node, arg)
 
+            if array_node is not None:
                 self.rvals.append(array_node)
-
-            # supports syntax SUM(arr(:))
-            elif isinstance(arg, ast_internal_classes.Array_Subscript_Node):
-                self.rvals.append(arg)
-
             else:
                 raise NotImplementedError("We do not support non-array arguments for SUM/PRODUCT")
 
@@ -329,24 +335,6 @@ class AnyAllCountTransformation(LoopBasedReplacementTransformation):
     def __init__(self, ast):
         super().__init__(ast)
 
-    def _parse_array(self, node: ast_internal_classes.Execution_Part_Node, arg: ast_internal_classes.FNode) -> ast_internal_classes.Array_Subscript_Node:
-
-        # supports syntax ANY(arr)
-        if isinstance(arg, ast_internal_classes.Name_Node):
-            array_node = ast_internal_classes.Array_Subscript_Node(parent=arg.parent)
-            array_node.name = arg
-
-            # If we access SUM(arr) where arr has many dimensions,
-            # We need to create a ParDecl_Node for each dimension
-            dims = len(self.scope_vars.get_var(node.parent, arg.name).sizes)
-            array_node.indices = [ast_internal_classes.ParDecl_Node(type='ALL')] * dims
-
-            return array_node
-
-        # supports syntax ANY(arr(:))
-        if isinstance(arg, ast_internal_classes.Array_Subscript_Node):
-            return arg
-
     def _initialize(self):
         self.rvals = []
 

From 41d2e279301f721f473de7e3d508c966f6c29209 Mon Sep 17 00:00:00 2001
From: Marcin Copik <mcopik@gmail.com>
Date: Thu, 19 Oct 2023 01:06:32 +0200
Subject: [PATCH 097/163] Return AST node from NodeVisitor

---
 dace/frontend/fortran/ast_transforms.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/dace/frontend/fortran/ast_transforms.py b/dace/frontend/fortran/ast_transforms.py
index 58146563da..7aa6205bd6 100644
--- a/dace/frontend/fortran/ast_transforms.py
+++ b/dace/frontend/fortran/ast_transforms.py
@@ -329,6 +329,8 @@ def visit(self, node: ast_internal_classes.FNode, parent_node: Optional[ast_inte
             elif isinstance(value, ast_internal_classes.FNode):
                 self.visit(value, node)
 
+        return node
+
 class ScopeVarsDeclarations(NodeVisitor):
     """
         Creates a mapping (scope name, variable name) -> variable declaration.

From a17c4e87b97f5abc8fd788e5feae0a2900778e20 Mon Sep 17 00:00:00 2001
From: Marcin Copik <mcopik@gmail.com>
Date: Thu, 19 Oct 2023 01:07:31 +0200
Subject: [PATCH 098/163] Implement the basic MINVAL Fortran intrinsic

---
 dace/frontend/fortran/intrinsics.py | 111 ++++++++++++++++++++++++++--
 1 file changed, 106 insertions(+), 5 deletions(-)

diff --git a/dace/frontend/fortran/intrinsics.py b/dace/frontend/fortran/intrinsics.py
index d7c6fcc7b3..efbe7bdc57 100644
--- a/dace/frontend/fortran/intrinsics.py
+++ b/dace/frontend/fortran/intrinsics.py
@@ -5,6 +5,7 @@
 from typing import Any, List, Set, Type
 
 from dace.frontend.fortran import ast_internal_classes
+from dace.frontend.fortran.ast_utils import fortrantypes2dacetypes
 from dace.frontend.fortran.ast_transforms import NodeVisitor, NodeTransformer, ParentScopeAssigner, ScopeVarsDeclarations, par_Decl_Range_Finder, mywalk
 
 FASTNode = Any
@@ -63,7 +64,8 @@ def replaced_name(func_name: str) -> str:
             "PRODUCT": "__dace_product",
             "ANY": "__dace_any",
             "ALL": "__dace_all",
-            "COUNT": "__dace_count"
+            "COUNT": "__dace_count",
+            "MINVAL": "__dace_minval"
         }
         return replacements[func_name]
 
@@ -74,7 +76,9 @@ def replace(func_name: ast_internal_classes.Name_Node, args: ast_internal_classe
             "__dace_product": "DOUBLE",
             "__dace_any": "INTEGER",
             "__dace_all": "INTEGER",
-            "__dace_count": "INTEGER"
+            "__dace_count": "INTEGER",
+            # FIXME: type should depend on array type
+            "__dace_minval": "DOUBLE"
         }
         # FIXME: Any requires sometimes returning an array of booleans
         call_type = func_types[func_name.name]
@@ -112,7 +116,6 @@ def __init__(self, ast):
         ParentScopeAssigner().visit(ast)
         self.scope_vars = ScopeVarsDeclarations()
         self.scope_vars.visit(ast)
-
         self.rvals = []
 
 
@@ -614,6 +617,102 @@ def func_name(self) -> str:
             return "__dace_count"
 
 
+class MinMaxValTransformation(LoopBasedReplacementTransformation):
+
+    def __init__(self, ast):
+        super().__init__(ast)
+
+    def _initialize(self):
+        self.rvals = []
+        self.argument_variable = None
+
+    def _parse_call_expr_node(self, node: ast_internal_classes.Call_Expr_Node):
+
+        for arg in node.args:
+
+            array_node = self._parse_array(node, arg)
+
+            if array_node is not None:
+                self.rvals.append(array_node)
+            else:
+                raise NotImplementedError("We do not support non-array arguments for MINVAL/MAXVAL")
+
+    def _summarize_args(self, node: ast_internal_classes.FNode, new_func_body: List[ast_internal_classes.FNode]):
+
+        if len(self.rvals) != 1:
+            raise NotImplementedError("Only one array can be summed")
+
+        self.argument_variable = self.rvals[0]
+
+        par_Decl_Range_Finder(self.argument_variable, self.loop_ranges, [], [], self.count, new_func_body, self.scope_vars, True)
+
+    def _initialize_result(self, node: ast_internal_classes.FNode) -> ast_internal_classes.BinOp_Node:
+
+        return ast_internal_classes.BinOp_Node(
+            lval=node.lval,
+            op="=",
+            rval=self._result_init_value(self.argument_variable),
+            line_number=node.line_number
+        )
+
+    def _generate_loop_body(self, node: ast_internal_classes.FNode) -> ast_internal_classes.BinOp_Node:
+
+        cond = ast_internal_classes.BinOp_Node(
+            lval=self.argument_variable,
+            op=self._condition_op(),
+            rval=node.lval,
+            line_number=node.line_number
+        )
+        body_if = ast_internal_classes.BinOp_Node(
+            lval=node.lval,
+            op="=",
+            rval=self.argument_variable,
+            line_number=node.line_number
+        )
+        return ast_internal_classes.If_Stmt_Node(
+            cond=cond,
+            body=body_if,
+            body_else=ast_internal_classes.Execution_Part_Node(execution=[]),
+            line_number=node.line_number
+        )
+
+class MinVal(LoopBasedReplacement):
+
+    """
+        In this class, we implement the transformation for Fortran intrinsic COUNT.
+        The implementation is very similar to ANY and ALL.
+        The main difference is that we initialize the partial result to 0
+        and increment it if any of the evaluated conditions is true.
+
+        We do not support the KIND argument.
+    """
+    class Transformation(MinMaxValTransformation):
+
+        def __init__(self, ast):
+            super().__init__(ast)
+
+        def _result_init_value(self, array: ast_internal_classes.Array_Subscript_Node):
+
+            var_decl = self.scope_vars.get_var(array.parent, array.name.name)
+
+            # TODO: this should be used as a call to HUGE
+            fortran_type = var_decl.type
+            dace_type = fortrantypes2dacetypes[fortran_type]
+            from dace.dtypes import max_value
+            max_val = max_value(dace_type)
+            print(fortran_type, max_val)
+
+            if fortran_type == "INTEGER":
+                return ast_internal_classes.Int_Literal_Node(value=str(max_val))
+            elif fortran_type == "DOUBLE":
+                return ast_internal_classes.Real_Literal_Node(value=str(max_val))
+
+        def _condition_op(self):
+            return "<"
+
+        def func_name(self) -> str:
+            return "__dace_minval"
+
 class FortranIntrinsics:
 
     IMPLEMENTATIONS_AST = {
@@ -623,7 +722,8 @@ class FortranIntrinsics:
         "PRODUCT": Product,
         "ANY": Any,
         "COUNT": Count,
-        "ALL": All
+        "ALL": All,
+        "MINVAL": MinVal
     }
 
     IMPLEMENTATIONS_DACE = {
@@ -633,7 +733,8 @@ class FortranIntrinsics:
         "__dace_product": Product,
         "__dace_any": Any,
         "__dace_all": All,
-        "__dace_count": Count
+        "__dace_count": Count,
+        "__dace_minval": MinVal
     }
 
     def __init__(self):

From 5ecba228a09b40c78cfaf272818ccf2235470e60 Mon Sep 17 00:00:00 2001
From: Marcin Copik <mcopik@gmail.com>
Date: Thu, 19 Oct 2023 03:00:27 +0200
Subject: [PATCH 099/163] Remove unnecessary output

---
 tests/fortran/intrinsic_product.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tests/fortran/intrinsic_product.py b/tests/fortran/intrinsic_product.py
index 06d14e0a34..fcf9dc8057 100644
--- a/tests/fortran/intrinsic_product.py
+++ b/tests/fortran/intrinsic_product.py
@@ -40,8 +40,6 @@ def test_fortran_frontend_product_array():
         d[i] = i + 1
     res = np.full([3], 42, order="F", dtype=np.float64)
     sdfg(d=d, res=res)
-    print(d)
-    print(res)
     assert res[0] == np.prod(d)
     assert res[1] == np.prod(d)
     assert res[2] == np.prod(d[1:5])

From 1ccad58d91aa31548ad673071965e6bbae0db9ab Mon Sep 17 00:00:00 2001
From: Marcin Copik <mcopik@gmail.com>
Date: Thu, 19 Oct 2023 03:05:03 +0200
Subject: [PATCH 100/163] Implement MINVAL intrinsic and add a new system for
 determining the return type of intrinsic

---
 dace/frontend/fortran/intrinsics.py | 111 ++++++++++++++++++----------
 1 file changed, 72 insertions(+), 39 deletions(-)

diff --git a/dace/frontend/fortran/intrinsics.py b/dace/frontend/fortran/intrinsics.py
index efbe7bdc57..cef1334f6a 100644
--- a/dace/frontend/fortran/intrinsics.py
+++ b/dace/frontend/fortran/intrinsics.py
@@ -57,32 +57,18 @@ def replace(func_name: ast_internal_classes.Name_Node, args: ast_internal_classe
 
 class LoopBasedReplacement:
 
-    @staticmethod
-    def replaced_name(func_name: str) -> str:
-        replacements = {
-            "SUM": "__dace_sum",
-            "PRODUCT": "__dace_product",
-            "ANY": "__dace_any",
-            "ALL": "__dace_all",
-            "COUNT": "__dace_count",
-            "MINVAL": "__dace_minval"
-        }
-        return replacements[func_name]
+    INTRINSIC_TO_DACE = {
+        "SUM": "__dace_sum",
+        "PRODUCT": "__dace_product",
+        "ANY": "__dace_any",
+        "ALL": "__dace_all",
+        "COUNT": "__dace_count",
+        "MINVAL": "__dace_minval"
+    }
 
     @staticmethod
-    def replace(func_name: ast_internal_classes.Name_Node, args: ast_internal_classes.Arg_List_Node, line) -> ast_internal_classes.FNode:
-        func_types = {
-            "__dace_sum": "DOUBLE",
-            "__dace_product": "DOUBLE",
-            "__dace_any": "INTEGER",
-            "__dace_all": "INTEGER",
-            "__dace_count": "INTEGER",
-            # FIXME: type should depend on array type
-            "__dace_minval": "DOUBLE"
-        }
-        # FIXME: Any requires sometimes returning an array of booleans
-        call_type = func_types[func_name.name]
-        return ast_internal_classes.Call_Expr_Node(name=func_name, type=call_type, args=args.args, line_number=line)
+    def replaced_name(func_name: str) -> str:
+        return LoopBasedReplacement.INTRINSIC_TO_DACE[func_name]
 
     @staticmethod
     def has_transformation() -> bool:
@@ -113,12 +99,14 @@ class LoopBasedReplacementTransformation(NodeTransformer):
     """
     def __init__(self, ast):
         self.count = 0
+
+        # We need to rerun the assignment because transformations could have created
+        # new AST nodes
         ParentScopeAssigner().visit(ast)
         self.scope_vars = ScopeVarsDeclarations()
         self.scope_vars.visit(ast)
         self.rvals = []
 
-
     @abstractmethod
     def func_name(self) -> str:
         pass
@@ -143,6 +131,21 @@ def _initialize_result(self, node: ast_internal_classes.FNode) -> ast_internal_c
     def _generate_loop_body(self, node: ast_internal_classes.FNode) -> ast_internal_classes.BinOp_Node:
         pass
 
+    """
+        When replacing Fortran's AST reference to an intrinsic function, we set a dummy variable with VOID type.
+        The reason is that at the point, we do not know the types of arguments. For many intrinsics, the return
+        type will depend on the input types.
+
+        When transforming the AST, we gather all scopes and variable declarations in that scope.
+        Then, we can query the types of input arguments and properly determine the return type.
+
+        Both the type of the variable and its corresponding Var_Decl_node need to be updated!
+    """
+
+    @abstractmethod
+    def _update_result_type(self, node: ast_internal_classes.Name_Node):
+        pass
+
     def _parse_array(self, node: ast_internal_classes.Execution_Part_Node, arg: ast_internal_classes.FNode) -> ast_internal_classes.Array_Subscript_Node:
 
         # supports syntax func(arr)
@@ -186,6 +189,9 @@ def visit_Execution_Part_Node(self, node: ast_internal_classes.Execution_Part_No
             # Verify that all of intrinsic args are correct and prepare them for loop generation
             self._summarize_args(child, newbody)
 
+            # Change the type of result variable
+            self._update_result_type(child.lval)
+
             # Initialize the result variable
             newbody.append(self._initialize_result(child))
 
@@ -238,6 +244,17 @@ def _initialize(self):
         self.rvals = []
         self.argument_variable = None
 
+    def _update_result_type(self, var: ast_internal_classes.Name_Node):
+
+        """
+            For both SUM and PRODUCT, the result type depends on the input variable.
+        """
+        input_type = self.scope_vars.get_var(var.parent, self.argument_variable.name.name)
+
+        var_decl = self.scope_vars.get_var(var.parent, var.name)
+        var.type = input_type.type
+        var_decl.type = input_type.type
+
     def _parse_call_expr_node(self, node: ast_internal_classes.Call_Expr_Node):
 
         for arg in node.args:
@@ -346,6 +363,17 @@ def _initialize(self):
         self.dominant_array = None
         self.cond = None
 
+    def _update_result_type(self, var: ast_internal_classes.Name_Node):
+
+        """
+            For all functions, the result type is INTEGER.
+            Theoretically, we should return LOGICAL for ANY and ALL,
+            but we no longer use booleans on DaCe side.
+        """
+        var_decl = self.scope_vars.get_var(var.parent, var.name)
+        var.type = "INTEGER"
+        var_decl.type = "INTEGER"
+
     def _parse_call_expr_node(self, node: ast_internal_classes.Call_Expr_Node):
 
         if len(node.args) > 1:
@@ -626,6 +654,18 @@ def _initialize(self):
         self.rvals = []
         self.argument_variable = None
 
+    def _update_result_type(self, var: ast_internal_classes.Name_Node):
+
+        """
+            For both MINVAL and MAXVAL, the result type depends on the input variable.
+        """
+
+        input_type = self.scope_vars.get_var(var.parent, self.argument_variable.name.name)
+
+        var_decl = self.scope_vars.get_var(var.parent, var.name)
+        var.type = input_type.type
+        var_decl.type = input_type.type
+
     def _parse_call_expr_node(self, node: ast_internal_classes.Call_Expr_Node):
 
         for arg in node.args:
@@ -700,7 +740,6 @@ def _result_init_value(self, array: ast_internal_classes.Array_Subscript_Node):
             dace_type = fortrantypes2dacetypes[fortran_type]
             from dace.dtypes import max_value
             max_val = max_value(dace_type)
-            print(fortran_type, max_val)
 
             if fortran_type == "INTEGER":
                 return ast_internal_classes.Int_Literal_Node(value=str(max_val))
@@ -726,17 +765,6 @@ class FortranIntrinsics:
         "MINVAL": MinVal
     }
 
-    IMPLEMENTATIONS_DACE = {
-        "__dace_selected_int_kind": SelectedKind,
-        "__dace_selected_real_kind": SelectedKind,
-        "__dace_sum": Sum,
-        "__dace_product": Product,
-        "__dace_any": Any,
-        "__dace_all": All,
-        "__dace_count": Count,
-        "__dace_minval": MinVal
-    }
-
     def __init__(self):
         self._transformations_to_run = set()
 
@@ -745,7 +773,7 @@ def transformations(self) -> Set[Type[NodeTransformer]]:
 
     @staticmethod
     def function_names() -> List[str]:
-        return list(FortranIntrinsics.IMPLEMENTATIONS_DACE.keys())
+        return list(LoopBasedReplacement.INTRINSIC_TO_DACE.values())
 
     def replace_function_name(self, node: FASTNode) -> ast_internal_classes.Name_Node:
 
@@ -793,4 +821,9 @@ def replace_function_reference(self, name: ast_internal_classes.Name_Node, args:
             call_type = func_types[name.name]
             return ast_internal_classes.Call_Expr_Node(name=name, type=call_type, args=args.args, line_number=line)
         else:
-            return self.IMPLEMENTATIONS_DACE[name.name].replace(name, args, line)
+            # We will do the actual type replacement later
+            # To that end, we need to know the input types - but these we do not know at the moment.
+            return ast_internal_classes.Call_Expr_Node(
+                name=name, type="VOID",
+                args=args.args, line_number=line
+            )

From 2813a25db76c6ce9c95d7a6df9b43e6060254a36 Mon Sep 17 00:00:00 2001
From: Marcin Copik <mcopik@gmail.com>
Date: Thu, 19 Oct 2023 03:14:18 +0200
Subject: [PATCH 101/163] Add test for MINVAL and MAXVAL

---
 tests/fortran/intrinsic_minmaxval.py | 252 +++++++++++++++++++++++++++
 1 file changed, 252 insertions(+)
 create mode 100644 tests/fortran/intrinsic_minmaxval.py

diff --git a/tests/fortran/intrinsic_minmaxval.py b/tests/fortran/intrinsic_minmaxval.py
new file mode 100644
index 0000000000..3981374555
--- /dev/null
+++ b/tests/fortran/intrinsic_minmaxval.py
@@ -0,0 +1,252 @@
+# Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved.
+
+import numpy as np
+
+from dace.frontend.fortran import ast_transforms, fortran_parser
+
+def test_fortran_frontend_minval_double():
+    """
+    Tests that the generated array map correctly handles offsets.
+    """
+    test_string = """
+                    PROGRAM minval_test
+                    implicit none
+                    double precision, dimension(7) :: d
+                    double precision, dimension(4) :: res
+                    CALL minval_test_function(d, res)
+                    end
+
+                    SUBROUTINE minval_test_function(d, res)
+                    double precision, dimension(7) :: d
+                    double precision, dimension(0) :: dt
+                    double precision, dimension(4) :: res
+
+                    res(1) = MINVAL(d)
+                    res(2) = MINVAL(d(:))
+                    res(3) = MINVAL(d(3:6))
+                    res(4) = MINVAL(dt)
+
+                    END SUBROUTINE minval_test_function
+                    """
+
+    # Now test to verify it executes correctly with no offset normalization
+    sdfg = fortran_parser.create_sdfg_from_string(test_string, "minval_test", True)
+    #sdfg.simplify(verbose=True)
+    sdfg.compile()
+    size = 7
+
+    # Minimum is in the beginning
+    d = np.full([size], 0, order="F", dtype=np.float64)
+    for i in range(size):
+        d[i] = i + 1
+    res = np.full([4], 42, order="F", dtype=np.float64)
+    sdfg(d=d, res=res)
+
+    assert res[0] == d[0]
+    assert res[1] == d[0]
+    assert res[2] == d[2]
+    # It should be the dace max for integer
+    assert res[3] == np.finfo(np.float64).max
+
+    # Minimum is in the beginning
+    for i in range(size):
+        d[i] = 10 - i
+    sdfg(d=d, res=res)
+    assert res[0] == d[-1]
+    assert res[1] == d[-1]
+    assert res[2] == d[5]
+    # It should be the dace max for integer
+    assert res[3] == np.finfo(np.float64).max
+
+def test_fortran_frontend_minval_int():
+    """
+    Tests that the generated array map correctly handles offsets.
+    """
+    test_string = """
+                    PROGRAM minval_test
+                    implicit none
+                    integer, dimension(7) :: d
+                    integer, dimension(4) :: res
+                    CALL minval_test_function(d, res)
+                    end
+
+                    SUBROUTINE minval_test_function(d, res)
+                    integer, dimension(7) :: d
+                    integer, dimension(0) :: dt
+                    integer, dimension(4) :: res
+
+                    res(1) = MINVAL(d)
+                    res(2) = MINVAL(d(:))
+                    res(3) = MINVAL(d(3:6))
+                    res(4) = MINVAL(dt)
+
+                    END SUBROUTINE minval_test_function
+                    """
+
+    # Now test to verify it executes correctly with no offset normalization
+    sdfg = fortran_parser.create_sdfg_from_string(test_string, "minval_test", True)
+    sdfg.simplify(verbose=True)
+    sdfg.compile()
+    size = 7
+
+    # Minimum is in the beginning
+    d = np.full([size], 0, order="F", dtype=np.int32)
+    for i in range(size):
+        d[i] = i + 1
+    res = np.full([4], 42, order="F", dtype=np.int32)
+    sdfg(d=d, res=res)
+
+    assert res[0] == d[0]
+    assert res[1] == d[0]
+    assert res[2] == d[2]
+    # It should be the dace max for integer
+    assert res[3] == np.iinfo(np.int32).max
+
+    # Minimum is in the beginning
+    for i in range(size):
+        d[i] = 10 - i
+    sdfg(d=d, res=res)
+    assert res[0] == d[-1]
+    assert res[1] == d[-1]
+    assert res[2] == d[5]
+    # It should be the dace max for integer
+    assert res[3] == np.iinfo(np.int32).max
+
+    # Minimum is in the middle
+    d = np.full([size], 0, order="F", dtype=np.int32)
+    d[:] = [-5, 10, -6, 4, 32, 42, -1]
+    res = np.full([4], 42, order="F", dtype=np.int32)
+    sdfg(d=d, res=res)
+
+    assert res[0] == d[2]
+    assert res[1] == d[2]
+    assert res[2] == d[2]
+    # It should be the dace max for integer
+    assert res[3] == np.iinfo(np.int32).max
+
+def test_fortran_frontend_maxval_double():
+    """
+    Tests that the generated array map correctly handles offsets.
+    """
+    test_string = """
+                    PROGRAM minval_test
+                    implicit none
+                    double precision, dimension(7) :: d
+                    double precision, dimension(4) :: res
+                    CALL minval_test_function(d, res)
+                    end
+
+                    SUBROUTINE minval_test_function(d, res)
+                    double precision, dimension(7) :: d
+                    double precision, dimension(0) :: dt
+                    double precision, dimension(4) :: res
+
+                    res(1) = MAXVAL(d)
+                    res(2) = MAXVAL(d(:))
+                    res(3) = MAXVAL(d(3:6))
+                    res(4) = MAXVAL(dt)
+
+                    END SUBROUTINE minval_test_function
+                    """
+
+    # Now test to verify it executes correctly with no offset normalization
+    sdfg = fortran_parser.create_sdfg_from_string(test_string, "minval_test", True)
+    #sdfg.simplify(verbose=True)
+    sdfg.compile()
+    size = 7
+
+    # Minimum is in the beginning
+    d = np.full([size], 0, order="F", dtype=np.float64)
+    for i in range(size):
+        d[i] = i + 1
+    res = np.full([4], 42, order="F", dtype=np.float64)
+    sdfg(d=d, res=res)
+
+    assert res[0] == d[-1]
+    assert res[1] == d[-1]
+    assert res[2] == d[5]
+    # It should be the dace max for integer
+    assert res[3] == np.finfo(np.float64).min
+
+    # Minimum is in the beginning
+    for i in range(size):
+        d[i] = 10 - i
+    sdfg(d=d, res=res)
+    assert res[0] == d[0]
+    assert res[1] == d[0]
+    assert res[2] == d[2]
+    # It should be the dace max for integer
+    assert res[3] == np.finfo(np.float64).min
+
+def test_fortran_frontend_maxval_int():
+    """
+    Tests that the generated array map correctly handles offsets.
+    """
+    test_string = """
+                    PROGRAM minval_test
+                    implicit none
+                    integer, dimension(7) :: d
+                    integer, dimension(4) :: res
+                    CALL minval_test_function(d, res)
+                    end
+
+                    SUBROUTINE minval_test_function(d, res)
+                    integer, dimension(7) :: d
+                    integer, dimension(0) :: dt
+                    integer, dimension(4) :: res
+
+                    res(1) = MAXVAL(d)
+                    res(2) = MAXVAL(d(:))
+                    res(3) = MAXVAL(d(3:6))
+                    res(4) = MAXVAL(dt)
+
+                    END SUBROUTINE minval_test_function
+                    """
+
+    # Now test to verify it executes correctly with no offset normalization
+    sdfg = fortran_parser.create_sdfg_from_string(test_string, "minval_test", True)
+    sdfg.simplify(verbose=True)
+    sdfg.compile()
+    size = 7
+
+    # Minimum is in the beginning
+    d = np.full([size], 0, order="F", dtype=np.int32)
+    for i in range(size):
+        d[i] = i + 1
+    res = np.full([4], 42, order="F", dtype=np.int32)
+    sdfg(d=d, res=res)
+
+    assert res[0] == d[-1]
+    assert res[1] == d[-1]
+    assert res[2] == d[5]
+    # It should be the dace max for integer
+    assert res[3] == np.iinfo(np.int32).min
+
+    # Minimum is in the beginning
+    for i in range(size):
+        d[i] = 10 - i
+    sdfg(d=d, res=res)
+    assert res[0] == d[0]
+    assert res[1] == d[0]
+    assert res[2] == d[2]
+    # It should be the dace max for integer
+    assert res[3] == np.iinfo(np.int32).min
+
+    # Minimum is in the middle
+    d = np.full([size], 0, order="F", dtype=np.int32)
+    d[:] = [41, 10, 42, -5, 32, 41, 40]
+    res = np.full([4], 42, order="F", dtype=np.int32)
+    sdfg(d=d, res=res)
+
+    assert res[0] == d[2]
+    assert res[1] == d[2]
+    assert res[2] == d[2]
+    # It should be the dace max for integer
+    assert res[3] == np.iinfo(np.int32).min
+
+if __name__ == "__main__":
+
+    test_fortran_frontend_minval_double()
+    test_fortran_frontend_minval_int()
+    test_fortran_frontend_maxval_double()
+    test_fortran_frontend_maxval_int()

From 24fde266bf6ae48d7c4c0553f94fa9aebe31056c Mon Sep 17 00:00:00 2001
From: Marcin Copik <mcopik@gmail.com>
Date: Thu, 19 Oct 2023 15:35:58 +0200
Subject: [PATCH 102/163] Implement MAXVAL intrinsic

---
 dace/frontend/fortran/intrinsics.py | 55 +++++++++++++++++++++++++----
 1 file changed, 48 insertions(+), 7 deletions(-)

diff --git a/dace/frontend/fortran/intrinsics.py b/dace/frontend/fortran/intrinsics.py
index cef1334f6a..0275d7878a 100644
--- a/dace/frontend/fortran/intrinsics.py
+++ b/dace/frontend/fortran/intrinsics.py
@@ -63,7 +63,8 @@ class LoopBasedReplacement:
         "ANY": "__dace_any",
         "ALL": "__dace_all",
         "COUNT": "__dace_count",
-        "MINVAL": "__dace_minval"
+        "MINVAL": "__dace_minval",
+        "MAXVAL": "__dace_maxval"
     }
 
     @staticmethod
@@ -719,12 +720,9 @@ def _generate_loop_body(self, node: ast_internal_classes.FNode) -> ast_internal_
 class MinVal(LoopBasedReplacement):
 
     """
-        In this class, we implement the transformation for Fortran intrinsic COUNT.
-        The implementation is very similar to ANY and ALL.
-        The main difference is that we initialize the partial result to 0
-        and increment it if any of the evaluated conditions is true.
+        In this class, we implement the transformation for Fortran intrinsic MINVAL.
 
-        We do not support the KIND argument.
+        We do not support the MASK and DIM argument.
     """
     class Transformation(MinMaxValTransformation):
 
@@ -752,6 +750,41 @@ def _condition_op(self):
         def func_name(self) -> str:
             return "__dace_minval"
 
+
+class MaxVal(LoopBasedReplacement):
+
+    """
+        In this class, we implement the transformation for Fortran intrinsic MAXVAL.
+
+        We do not support the MASK and DIM argument.
+    """
+    class Transformation(MinMaxValTransformation):
+
+        def __init__(self, ast):
+            super().__init__(ast)
+
+        def _result_init_value(self, array: ast_internal_classes.Array_Subscript_Node):
+
+            var_decl = self.scope_vars.get_var(array.parent, array.name.name)
+
+            # TODO: this should be used as a call to HUGE
+            fortran_type = var_decl.type
+            dace_type = fortrantypes2dacetypes[fortran_type]
+            from dace.dtypes import min_value
+            min_val = min_value(dace_type)
+
+            if fortran_type == "INTEGER":
+                return ast_internal_classes.Int_Literal_Node(value=str(min_val))
+            elif fortran_type == "DOUBLE":
+                return ast_internal_classes.Real_Literal_Node(value=str(min_val))
+
+        def _condition_op(self):
+            return ">"
+
+        def func_name(self) -> str:
+            return "__dace_maxval"
+
+
 class FortranIntrinsics:
 
     IMPLEMENTATIONS_AST = {
@@ -762,7 +795,13 @@ class FortranIntrinsics:
         "ANY": Any,
         "COUNT": Count,
         "ALL": All,
-        "MINVAL": MinVal
+        "MINVAL": MinVal,
+        "MAXVAL": MaxVal
+    }
+
+    DIRECT_REPLACEMENTS = {
+        "__dace_selected_int_kind": SelectedKind,
+        "__dace_selected_real_kind": SelectedKind
     }
 
     def __init__(self):
@@ -820,6 +859,8 @@ def replace_function_reference(self, name: ast_internal_classes.Name_Node, args:
             # FIXME: this will be progressively removed
             call_type = func_types[name.name]
             return ast_internal_classes.Call_Expr_Node(name=name, type=call_type, args=args.args, line_number=line)
+        elif name.name in self.DIRECT_REPLACEMENTS:
+            return self.DIRECT_REPLACEMENTS[name.name].replace(name, args, line)
         else:
             # We will do the actual type replacement later
             # To that end, we need to know the input types - but these we do not know at the moment.

From f74f061fa478d32d48db81851d65ea4e6074b45e Mon Sep 17 00:00:00 2001
From: Marcin Copik <mcopik@gmail.com>
Date: Thu, 19 Oct 2023 15:40:12 +0200
Subject: [PATCH 103/163] Reenable sdfg.simplify for testing

---
 tests/fortran/intrinsic_minmaxval.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/fortran/intrinsic_minmaxval.py b/tests/fortran/intrinsic_minmaxval.py
index 3981374555..6a32237d37 100644
--- a/tests/fortran/intrinsic_minmaxval.py
+++ b/tests/fortran/intrinsic_minmaxval.py
@@ -31,7 +31,7 @@ def test_fortran_frontend_minval_double():
 
     # Now test to verify it executes correctly with no offset normalization
     sdfg = fortran_parser.create_sdfg_from_string(test_string, "minval_test", True)
-    #sdfg.simplify(verbose=True)
+    sdfg.simplify(verbose=True)
     sdfg.compile()
     size = 7
 
@@ -151,7 +151,7 @@ def test_fortran_frontend_maxval_double():
 
     # Now test to verify it executes correctly with no offset normalization
     sdfg = fortran_parser.create_sdfg_from_string(test_string, "minval_test", True)
-    #sdfg.simplify(verbose=True)
+    sdfg.simplify(verbose=True)
     sdfg.compile()
     size = 7
 

From 6f471cf6bf6c1c915bdf57f94fdeadfa14215395 Mon Sep 17 00:00:00 2001
From: Timo Schneider <timos@inf.ethz.ch>
Date: Thu, 19 Oct 2023 16:17:36 +0200
Subject: [PATCH 104/163] replace |& which is not widely supported (#1399)

The test_all.sh script currently runs cpp tests using g++.
This is not good for the following reasons:
* During normal DaCe compilation we use cmake (and thus whatever compiler cmake picks up).
* We don't check if g++ is available.

This change uses whatever the user set as CXX env var. Cmake also uses CXX when it is set.
Thus if a user sets CXX, he will use the same compiler for tests and during dace compilation.
If CXX is not set we fall back to g++ as the hard-coded compiler.

The test script also prints the current progress before each test now.
---
 test_all.sh | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/test_all.sh b/test_all.sh
index c4240fa820..cc34b74b36 100755
--- a/test_all.sh
+++ b/test_all.sh
@@ -3,6 +3,12 @@
 
 set -a
 
+if [[ -z "${CXX}" ]]; then
+  CXX="g++" # I don't think that is a good default, but it was the hardcoded compiler before I made changes...
+else
+  CXX="${CXX}"
+fi
+
 SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )"
 PYTHONPATH=$SCRIPTPATH
 
@@ -53,7 +59,7 @@ bail_skip() {
 test_start() {
     TESTS=`expr $TESTS + 1`
     CURTEST="$TESTPREFIX$1"
-    echo "---------- TEST: $TESTPREFIX$1 ----------"
+    echo "---------- TEST: $TESTPREFIX$1 ---------- [ This is test $TESTS of $TOTAL_TESTS ]"
 }
 
 testcmd() {
@@ -64,14 +70,14 @@ testcmd() {
     #$* | tee -a test.log
     TESTCNT=`expr $TESTS - 1`
     MSG="($TESTCNT / $TOTAL_TESTS) $CURTEST (Fails: $ERRORS)"
-    ($* || echo "_TFAIL_ $?") |& awk "BEGIN{printf \"$MSG\r\"} /_TFAIL_/{printf \"$TGAP\r\"; exit \$NF} {printf \"$TGAP\r\"; print; printf \"$MSG\r\";} END{printf \"$TGAP\r\"}"
+    ($* || echo "_TFAIL_ $?") 2>&1 | awk "BEGIN{printf \"$MSG\r\"} /_TFAIL_/{printf \"$TGAP\r\"; exit \$NF} {printf \"$TGAP\r\"; print; printf \"$MSG\r\";} END{printf \"$TGAP\r\"}"
 }
 
 ################################################
 
 runtest_cpp() {
     test_start $1
-    testcmd g++ -std=c++14 -Wall -Wextra -O3 -march=native -ffast-math -fopenmp -fPIC \
+    testcmd $CXX -std=c++14 -Wall -Wextra -O3 -march=native -ffast-math -fopenmp -fPIC \
         -I $SCRIPTPATH/dace/runtime/include $1 -o ./$1.out
     if [ $? -ne 0 ]; then bail "$1 (compilation)"; fi
     testcmd ./$1.out

From 8402e526c5d5049204ff740e875c4dfc17e6c391 Mon Sep 17 00:00:00 2001
From: Carl Johnsen <carl-johannes@di.ku.dk>
Date: Thu, 19 Oct 2023 19:15:21 +0200
Subject: [PATCH 105/163] Fixed error when an accessor from an RTL tasklet is a
 stream (#1403)

* Copyright bump
* Ensured all RTL samples' comments are of a consistent style, and mentions which target mode they're inteded for.
* Added a comment about the temporal vectorization hardware test stalling in 2022.1.
---
 dace/codegen/targets/rtl.py               |  17 ++-
 samples/fpga/rtl/add_fortytwo.py          |  39 +++---
 samples/fpga/rtl/axpy.py                  |  13 +-
 samples/fpga/rtl/axpy_double_pump.py      | 143 +++++++++++-----------
 samples/fpga/rtl/fladd.py                 |  17 +--
 samples/fpga/rtl/pipeline.py              |  41 ++++---
 samples/fpga/rtl/rtl_multi_tasklet.py     |  44 +++----
 samples/fpga/rtl/rtl_tasklet_parameter.py |  36 +++---
 samples/fpga/rtl/rtl_tasklet_pipeline.py  |  36 +++---
 samples/fpga/rtl/rtl_tasklet_scalar.py    |  30 ++---
 samples/fpga/rtl/rtl_tasklet_vector.py    |  40 +++---
 tests/rtl/hardware_test.py                |  22 ++--
 tests/rtl/simulation_test.py              |   6 +-
 13 files changed, 256 insertions(+), 228 deletions(-)

diff --git a/dace/codegen/targets/rtl.py b/dace/codegen/targets/rtl.py
index dcb752e215..935615fad6 100644
--- a/dace/codegen/targets/rtl.py
+++ b/dace/codegen/targets/rtl.py
@@ -1,8 +1,8 @@
 # Copyright 2019-2021 ETH Zurich and the DaCe authors. All rights reserved.
 
 import itertools
-
 from typing import List, Tuple, Dict
+import warnings
 
 from dace import dtypes, config, registry, symbolic, nodes, sdfg, data
 from dace.sdfg import graph, state, find_input_arraynode, find_output_arraynode
@@ -102,6 +102,21 @@ def copy_memory(self, sdfg: sdfg.SDFG, dfg: state.StateSubgraphView, state_id: i
                 elif isinstance(arr, data.Scalar):
                     line: str = "{} {} = {};".format(dst_node.in_connectors[edge.dst_conn].ctype, edge.dst_conn,
                                                      edge.src.data)
+                elif isinstance(arr, data.Stream):
+                    # TODO Streams are currently unsupported, as the proper
+                    # behaviour has to be implemented to avoid deadlocking. It
+                    # is only a warning, as the RTL backend is partially used
+                    # by the Xilinx backend, which may hit this case, but will
+                    # discard the errorneous code.
+                    warnings.warn(
+                        'Streams are currently unsupported by the RTL backend.' \
+                        'This may produce errors or deadlocks in the generated code.'
+                    )
+                    line: str = "// WARNING: Unsupported read from ({}) variable '{}' from stream '{}'." \
+                        " This may lead to a deadlock if used in code.\n".format(
+                            dst_node.in_connectors[edge.dst_conn].ctype, edge.dst_conn, edge.src_conn)
+                    line += "{} {} = {}.pop();".format(
+                            dst_node.in_connectors[edge.dst_conn].ctype, edge.dst_conn, edge.src.data)
         elif isinstance(edge.src, nodes.MapEntry) and isinstance(edge.dst, nodes.Tasklet):
             rtl_name = self.unique_name(edge.dst, sdfg.nodes()[state_id], sdfg)
             self.n_unrolled[rtl_name] = symbolic.evaluate(edge.src.map.range[0][1] + 1, sdfg.constants)
diff --git a/samples/fpga/rtl/add_fortytwo.py b/samples/fpga/rtl/add_fortytwo.py
index 9c14ad098b..5abcd76a5b 100644
--- a/samples/fpga/rtl/add_fortytwo.py
+++ b/samples/fpga/rtl/add_fortytwo.py
@@ -1,8 +1,9 @@
-# Copyright 2019-2021 ETH Zurich and the DaCe authors. All rights reserved.
-#
-# This sample shows adding a constant integer value to a stream of integers.
-#
-# It is intended for running hardware_emulation or hardware xilinx targets.
+# Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved.
+"""
+    This sample shows adding a constant integer value to a stream of integers.
+
+    It is intended for running hardware_emulation or hardware xilinx targets.
+"""
 
 import dace
 import numpy as np
@@ -116,21 +117,21 @@
 ######################################################################
 
 if __name__ == '__main__':
+    with dace.config.set_temporary('compiler', 'xilinx', 'mode', value='hardware_emulation'):
+        # init data structures
+        N.set(8192)
+        a = np.random.randint(0, 100, N.get()).astype(np.int32)
+        b = np.zeros((N.get(), )).astype(np.int32)
 
-    # init data structures
-    N.set(8192)
-    a = np.random.randint(0, 100, N.get()).astype(np.int32)
-    b = np.zeros((N.get(), )).astype(np.int32)
-
-    # show initial values
-    print("a={}, b={}".format(a, b))
+        # show initial values
+        print("a={}, b={}".format(a, b))
 
-    # call program
-    sdfg(A=a, B=b, N=N)
+        # call program
+        sdfg(A=a, B=b, N=N)
 
-    # show result
-    print("a={}, b={}".format(a, b))
+        # show result
+        print("a={}, b={}".format(a, b))
 
-    # check result
-    for i in range(N.get()):
-        assert b[i] == a[i] + 42
+        # check result
+        for i in range(N.get()):
+            assert b[i] == a[i] + 42
diff --git a/samples/fpga/rtl/axpy.py b/samples/fpga/rtl/axpy.py
index 8b720aaa1e..4f386c82a4 100644
--- a/samples/fpga/rtl/axpy.py
+++ b/samples/fpga/rtl/axpy.py
@@ -1,7 +1,10 @@
-# Copyright 2019-2022 ETH Zurich and the DaCe authors. All rights reserved.
-#
-# This sample shows the AXPY BLAS routine. It is implemented through Xilinx IPs in order to utilize floating point
-# operations. It is intended for running hardware_emulation or hardware xilinx targets.
+# Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved.
+"""
+    This sample shows the AXPY BLAS routine. It is implemented through Xilinx IPs in order to utilize floating point
+    operations.
+
+    It is intended for running hardware_emulation or hardware xilinx targets.
+"""
 
 import dace
 import numpy as np
@@ -259,4 +262,4 @@ def make_sdfg(veclen=2):
         expected = a * x + y
         diff = np.linalg.norm(expected - result) / N.get()
         print("Difference:", diff)
-    exit(0 if diff <= 1e-5 else 1)
+        assert diff <= 1e-5
diff --git a/samples/fpga/rtl/axpy_double_pump.py b/samples/fpga/rtl/axpy_double_pump.py
index 2d44ab7689..c79948007b 100644
--- a/samples/fpga/rtl/axpy_double_pump.py
+++ b/samples/fpga/rtl/axpy_double_pump.py
@@ -1,73 +1,74 @@
-# Copyright 2019-2022 ETH Zurich and the DaCe authors. All rights reserved.
-#
-# This sample shows the AXPY BLAS routine. It is implemented through Xilinx
-# IPs in order to utilize double pumping, which doubles the performance per
-# consumed FPGA resource. The double pumping operation is "inwards", which
-# means that the internal vectorization width of the core computation is half
-# that of the external vectorization width. This translates into utilizing half
-# the amount of internal computing resources, compared to a regular vectorized
-# implementetation. The block diagram of the design for a 32-bit floating-point
-# implementation using vectorization width 2 is:
-#
-#          ap_aclk          s_axis_y_in        s_axis_x_in     a
-#             │                  │                  │          │
-#             │                  │                  │          │
-#             │                  │                  │          │
-#     ┌───────┼─────────┬────────┼─────────┐        │          │
-#     │       │         │        │         │        │          │
-#     │       │         │        ▼         │        ▼          │
-#     │       │         │  ┌────────────┐  │  ┌────────────┐   │
-#     │       │         └─►│            │  └─►│            │   │
-#     │       │            │ Clock sync │     │ Clock sync │   │
-#     │       │         ┌─►│            │  ┌─►│            │   │
-#     │       ▼ 300 MHz │  └─────┬──────┘  │  └─────┬──────┘   │
-#     │ ┌────────────┐  │        │         │        │          │
-#     │ │ Clock      │  │        │         │        │          │
-#     │ │            │  ├────────┼─────────┤        │          │
-#     │ │ Multiplier │  │        │         │        │          │
-#     │ └─────┬──────┘  │        ▼ 64 bit  │        ▼ 64 bit   │
-#     │       │ 600 MHz │  ┌────────────┐  │  ┌────────────┐   │
-#     │       │         │  │            │  │  │            │   │
-#     │       └─────────┼─►│ Data issue │  └─►│ Data issue │   │
-#     │                 │  │            │     │            │   │
-#     │                 │  └─────┬──────┘     └─────┬──────┘   │
-#     │                 │        │ 32 bit           │ 32 bit   │
-#     │                 │        │                  │          │
-#     │                 │        │                  │          │
-#     │                 │        │                  ▼          ▼
-#     │                 │        │                 ┌────────────┐
-#     │                 │        │                 │            │
-#     │                 ├────────┼────────────────►│ Multiplier │
-#     │                 │        │                 │            │
-#     │                 │        │                 └─────┬──────┘
-#     │                 │        │                       │
-#     │                 │        │        ┌──────────────┘
-#     │                 │        │        │
-#     │                 │        ▼        ▼
-#     │                 │      ┌────────────┐
-#     │                 │      │            │
-#     │                 ├─────►│    Adder   │
-#     │                 │      │            │
-#     │                 │      └─────┬──────┘
-#     │                 │            │
-#     │                 │            ▼ 32 bit
-#     │                 │      ┌─────────────┐
-#     │                 │      │             │
-#     │                 ├─────►│ Data packer │
-#     │                 │      │             │
-#     │                 │      └─────┬───────┘
-#     │                 │            │ 64 bit
-#     │                 │            ▼
-#     │                 │      ┌────────────┐
-#     │                 └─────►│            │
-#     │                        │ Clock sync │
-#     └───────────────────────►│            │
-#                              └─────┬──────┘
-#                                    │
-#                                    ▼
-#                            m_axis_result_out
-#
-# It is intended for running hardware_emulation or hardware xilinx targets.
+# Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved.
+"""
+    This sample shows the AXPY BLAS routine. It is implemented through Xilinx
+    IPs in order to utilize double pumping, which doubles the performance per
+    consumed FPGA resource. The double pumping operation is "inwards", which
+    means that the internal vectorization width of the core computation is half
+    that of the external vectorization width. This translates into utilizing half
+    the amount of internal computing resources, compared to a regular vectorized
+    implementetation. The block diagram of the design for a 32-bit floating-point
+    implementation using vectorization width 2 is:
+
+             ap_aclk          s_axis_y_in        s_axis_x_in     a
+                │                  │                  │          │
+                │                  │                  │          │
+                │                  │                  │          │
+        ┌───────┼─────────┬────────┼─────────┐        │          │
+        │       │         │        │         │        │          │
+        │       │         │        ▼         │        ▼          │
+        │       │         │  ┌────────────┐  │  ┌────────────┐   │
+        │       │         └─►│            │  └─►│            │   │
+        │       │            │ Clock sync │     │ Clock sync │   │
+        │       │         ┌─►│            │  ┌─►│            │   │
+        │       ▼ 300 MHz │  └─────┬──────┘  │  └─────┬──────┘   │
+        │ ┌────────────┐  │        │         │        │          │
+        │ │ Clock      │  │        │         │        │          │
+        │ │            │  ├────────┼─────────┤        │          │
+        │ │ Multiplier │  │        │         │        │          │
+        │ └─────┬──────┘  │        ▼ 64 bit  │        ▼ 64 bit   │
+        │       │ 600 MHz │  ┌────────────┐  │  ┌────────────┐   │
+        │       │         │  │            │  │  │            │   │
+        │       └─────────┼─►│ Data issue │  └─►│ Data issue │   │
+        │                 │  │            │     │            │   │
+        │                 │  └─────┬──────┘     └─────┬──────┘   │
+        │                 │        │ 32 bit           │ 32 bit   │
+        │                 │        │                  │          │
+        │                 │        │                  │          │
+        │                 │        │                  ▼          ▼
+        │                 │        │                 ┌────────────┐
+        │                 │        │                 │            │
+        │                 ├────────┼────────────────►│ Multiplier │
+        │                 │        │                 │            │
+        │                 │        │                 └─────┬──────┘
+        │                 │        │                       │
+        │                 │        │        ┌──────────────┘
+        │                 │        │        │
+        │                 │        ▼        ▼
+        │                 │      ┌────────────┐
+        │                 │      │            │
+        │                 ├─────►│    Adder   │
+        │                 │      │            │
+        │                 │      └─────┬──────┘
+        │                 │            │
+        │                 │            ▼ 32 bit
+        │                 │      ┌─────────────┐
+        │                 │      │             │
+        │                 ├─────►│ Data packer │
+        │                 │      │             │
+        │                 │      └─────┬───────┘
+        │                 │            │ 64 bit
+        │                 │            ▼
+        │                 │      ┌────────────┐
+        │                 └─────►│            │
+        │                        │ Clock sync │
+        └───────────────────────►│            │
+                                 └─────┬──────┘
+                                       │
+                                       ▼
+                               m_axis_result_out
+
+    It is intended for running hardware_emulation or hardware xilinx targets.
+"""
 
 import dace
 import numpy as np
@@ -452,4 +453,4 @@ def make_sdfg(veclen=2):
             diff = np.linalg.norm(expected - result) / N.get()
             print("Difference:", diff)
 
-    exit(0 if diff <= 1e-5 else 1)
+            assert diff <= 1e-5
diff --git a/samples/fpga/rtl/fladd.py b/samples/fpga/rtl/fladd.py
index f22d419cbc..daf1ed269b 100644
--- a/samples/fpga/rtl/fladd.py
+++ b/samples/fpga/rtl/fladd.py
@@ -1,10 +1,11 @@
-# Copyright 2019-2021 ETH Zurich and the DaCe authors. All rights reserved.
-#
-# This sample shows how to utilize an IP core in an RTL tasklet. This is done
-# through the vector add problem, which adds two floating point vectors
-# together.
-#
-# It is intended for running hardware_emulation or hardware xilinx targets.
+# Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved.
+"""
+    This sample shows how to utilize an IP core in an RTL tasklet. This is done
+    through the vector add problem, which adds two floating point vectors
+    together.
+
+    It is intended for running hardware_emulation or hardware xilinx targets.
+"""
 
 import dace
 import numpy as np
@@ -190,4 +191,4 @@
         expected = a + b
         diff = np.linalg.norm(expected - c) / N.get()
         print("Difference:", diff)
-    exit(0 if diff <= 1e-5 else 1)
+        assert diff <= 1e-5
diff --git a/samples/fpga/rtl/pipeline.py b/samples/fpga/rtl/pipeline.py
index b487da91ce..dbd0460fb0 100644
--- a/samples/fpga/rtl/pipeline.py
+++ b/samples/fpga/rtl/pipeline.py
@@ -1,9 +1,10 @@
-# Copyright 2019-2021 ETH Zurich and the DaCe authors. All rights reserved.
-#
-# This sample shows a DEPTH deep pipeline, where each stage adds 1 to the
-# integer input stream.
-#
-# It is intended for running hardware_emulation or hardware xilinx targets.
+# Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved.
+"""
+    This sample shows a DEPTH deep pipeline, where each stage adds 1 to the
+    integer input stream.
+
+    It is intended for running hardware_emulation or hardware xilinx targets.
+"""
 
 import dace
 import numpy as np
@@ -151,21 +152,21 @@
 ######################################################################
 
 if __name__ == '__main__':
+    with dace.config.set_temporary('compiler', 'xilinx', 'mode', value='hardware_emulation'):
+        # init data structures
+        N.set(8192)
+        a = np.random.randint(0, 100, N.get()).astype(np.int32)
+        b = np.zeros((N.get(), )).astype(np.int32)
 
-    # init data structures
-    N.set(8192)
-    a = np.random.randint(0, 100, N.get()).astype(np.int32)
-    b = np.zeros((N.get(), )).astype(np.int32)
-
-    # show initial values
-    print("a={}, b={}".format(a, b))
+        # show initial values
+        print("a={}, b={}".format(a, b))
 
-    # call program
-    sdfg(A=a, B=b, N=N)
+        # call program
+        sdfg(A=a, B=b, N=N)
 
-    # show result
-    print("a={}, b={}".format(a, b))
+        # show result
+        print("a={}, b={}".format(a, b))
 
-    # check result
-    for i in range(N.get()):
-        assert b[i] == a[i] + depth
+        # check result
+        for i in range(N.get()):
+            assert b[i] == a[i] + depth
diff --git a/samples/fpga/rtl/rtl_multi_tasklet.py b/samples/fpga/rtl/rtl_multi_tasklet.py
index a646eb6be9..4a4a09deec 100644
--- a/samples/fpga/rtl/rtl_multi_tasklet.py
+++ b/samples/fpga/rtl/rtl_multi_tasklet.py
@@ -1,11 +1,11 @@
-# Copyright 2019-2021 ETH Zurich and the DaCe authors. All rights reserved.
+# Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved.
 """
     Two sequential RTL tasklets connected through a memlet.
+
+    It is intended for running simulation xilinx targets.
 """
 
 import dace
-import argparse
-
 import numpy as np
 
 # add sdfg
@@ -32,7 +32,7 @@
         m_axis_b_tdata <= 0;
         s_axis_a_tready <= 1'b1;
         state <= READY;
-    end else if (s_axis_a_tvalid && state == READY) begin // case: load a 
+    end else if (s_axis_a_tvalid && state == READY) begin // case: load a
         m_axis_b_tdata <= s_axis_a_tdata;
         s_axis_a_tready <= 1'b0;
         state <= BUSY;
@@ -41,7 +41,7 @@
     else
         m_axis_b_tdata <= m_axis_b_tdata;
         state <= DONE;
-end    
+end
 
 assign m_axis_b_tvalid = (m_axis_b_tdata >= 80) ? 1'b1:1'b0;
 """,
@@ -59,7 +59,7 @@
         m_axis_c_tdata <= 0;
         s_axis_b_tready <= 1'b1;
         state <= READY;
-    end else if (s_axis_b_tvalid && state == READY) begin // case: load a 
+    end else if (s_axis_b_tvalid && state == READY) begin // case: load a
         m_axis_c_tdata <= s_axis_b_tdata;
         s_axis_b_tready <= 1'b0;
         state <= BUSY;
@@ -68,9 +68,9 @@
     else
         m_axis_c_tdata <= m_axis_c_tdata;
         state <= DONE;
-end    
+end
 
-assign m_axis_c_tvalid = (m_axis_c_tdata >= 100) ? 1'b1:1'b0;   
+assign m_axis_c_tvalid = (m_axis_c_tdata >= 100) ? 1'b1:1'b0;
 """,
                              language=dace.Language.SystemVerilog)
 
@@ -92,21 +92,21 @@
 ######################################################################
 
 if __name__ == '__main__':
+    with dace.config.set_temporary('compiler', 'xilinx', 'mode', value='simulation'):
+        # init data structures
+        a = np.random.randint(0, 80, 1).astype(np.int32)
+        b = np.array([0]).astype(np.int32)
+        c = np.array([0]).astype(np.int32)
 
-    # init data structures
-    a = np.random.randint(0, 80, 1).astype(np.int32)
-    b = np.array([0]).astype(np.int32)
-    c = np.array([0]).astype(np.int32)
-
-    # show initial values
-    print("a={}, b={}, c={}".format(a, b, c))
+        # show initial values
+        print("a={}, b={}, c={}".format(a, b, c))
 
-    # call program
-    sdfg(A=a, B=b, C=c)
+        # call program
+        sdfg(A=a, B=b, C=c)
 
-    # show result
-    print("a={}, b={}, c={}".format(a, b, c))
+        # show result
+        print("a={}, b={}, c={}".format(a, b, c))
 
-    # check result
-    assert b == 80
-    assert c == 100
+        # check result
+        assert b == 80
+        assert c == 100
diff --git a/samples/fpga/rtl/rtl_tasklet_parameter.py b/samples/fpga/rtl/rtl_tasklet_parameter.py
index d20688b385..112e88a6bf 100644
--- a/samples/fpga/rtl/rtl_tasklet_parameter.py
+++ b/samples/fpga/rtl/rtl_tasklet_parameter.py
@@ -1,11 +1,11 @@
-# Copyright 2019-2021 ETH Zurich and the DaCe authors. All rights reserved.
+# Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved.
 """
     Simple RTL tasklet with a single scalar input and a single scalar output. It increments b from a up to 100.
+
+    It is intended for running simulation xilinx targets.
 """
 
 import dace
-import argparse
-
 import numpy as np
 
 # add sdfg
@@ -47,7 +47,7 @@
             m_axis_b_tdata <= 0;
             s_axis_a_tready <= 1'b1;
             state <= READY;
-        end else if (s_axis_a_tvalid && state == READY) begin // case: load a 
+        end else if (s_axis_a_tvalid && state == READY) begin // case: load a
             m_axis_b_tdata <= s_axis_a_tdata;
             s_axis_a_tready <= 1'b0;
             state <= BUSY;
@@ -56,9 +56,9 @@
         else
             m_axis_b_tdata <= m_axis_b_tdata;
             state <= DONE;
-    end    
+    end
 
-    assign m_axis_b_tvalid  = (m_axis_b_tdata >= MAX_VAL) ? 1'b1:1'b0;  
+    assign m_axis_b_tvalid  = (m_axis_b_tdata >= MAX_VAL) ? 1'b1:1'b0;
     ''',
                             language=dace.Language.SystemVerilog)
 
@@ -76,19 +76,19 @@
 ######################################################################
 
 if __name__ == '__main__':
+    with dace.config.set_temporary('compiler', 'xilinx', 'mode', value='simulation'):
+        # init data structures
+        a = np.random.randint(0, 100, 1).astype(np.int32)
+        b = np.array([0]).astype(np.int32)
 
-    # init data structures
-    a = np.random.randint(0, 100, 1).astype(np.int32)
-    b = np.array([0]).astype(np.int32)
-
-    # show initial values
-    print("a={}, b={}".format(a, b))
+        # show initial values
+        print("a={}, b={}".format(a, b))
 
-    # call program
-    sdfg(A=a, B=b)
+        # call program
+        sdfg(A=a, B=b)
 
-    # show result
-    print("a={}, b={}".format(a, b))
+        # show result
+        print("a={}, b={}".format(a, b))
 
-    # check result
-    assert b == sdfg.constants["MAX_VAL"]
+        # check result
+        assert b == sdfg.constants["MAX_VAL"]
diff --git a/samples/fpga/rtl/rtl_tasklet_pipeline.py b/samples/fpga/rtl/rtl_tasklet_pipeline.py
index 9166806c63..3ef20cd03f 100644
--- a/samples/fpga/rtl/rtl_tasklet_pipeline.py
+++ b/samples/fpga/rtl/rtl_tasklet_pipeline.py
@@ -1,11 +1,11 @@
-# Copyright 2019-2021 ETH Zurich and the DaCe authors. All rights reserved.
+# Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved.
 """
     Pipelined, AXI-handshake compliant example that increments b from a up to 100.
+
+    It is intended for running simulation xilinx targets.
 """
 
 import dace
-import argparse
-
 import numpy as np
 
 # add symbol
@@ -59,7 +59,7 @@
             state <= state_next;
     end
 
-    always_comb 
+    always_comb
     begin
         state_next = state;
         case(state)
@@ -132,21 +132,21 @@
 ######################################################################
 
 if __name__ == '__main__':
+    with dace.config.set_temporary('compiler', 'xilinx', 'mode', value='simulation'):
+        # init data structures
+        num_elements = dace.symbolic.evaluate(N, sdfg.constants)
+        a = np.random.randint(0, 100, num_elements).astype(np.int32)
+        b = np.array([0] * num_elements).astype(np.int32)
 
-    # init data structures
-    num_elements = dace.symbolic.evaluate(N, sdfg.constants)
-    a = np.random.randint(0, 100, num_elements).astype(np.int32)
-    b = np.array([0] * num_elements).astype(np.int32)
-
-    # show initial values
-    print("a={}, b={}".format(a, b))
+        # show initial values
+        print("a={}, b={}".format(a, b))
 
-    # call program
-    sdfg(A=a, B=b)
+        # call program
+        sdfg(A=a, B=b)
 
-    # show result
-    print("a={}, b={}".format(a, b))
+        # show result
+        print("a={}, b={}".format(a, b))
 
-    assert b[
-        0] == 100  # TODO: implement detection of #elements to process, s.t. we can extend the assertion to the whole array
-    assert np.all(map((lambda x: x == 0), b[1:-1]))  # should still be at the init value (for the moment)
+        assert b[
+            0] == 100  # TODO: implement detection of #elements to process, s.t. we can extend the assertion to the whole array
+        assert np.all(map((lambda x: x == 0), b[1:-1]))  # should still be at the init value (for the moment)
diff --git a/samples/fpga/rtl/rtl_tasklet_scalar.py b/samples/fpga/rtl/rtl_tasklet_scalar.py
index c9f6380a2b..cf8d53ec91 100644
--- a/samples/fpga/rtl/rtl_tasklet_scalar.py
+++ b/samples/fpga/rtl/rtl_tasklet_scalar.py
@@ -1,11 +1,11 @@
-# Copyright 2019-2021 ETH Zurich and the DaCe authors. All rights reserved.
+# Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved.
 """
     Simple RTL tasklet with a single scalar input and a single scalar output. It increments b from a up to 100.
+
+    It is intended for running simulation xilinx targets.
 """
 
 import dace
-import argparse
-
 import numpy as np
 
 # add sdfg
@@ -79,19 +79,19 @@
 ######################################################################
 
 if __name__ == '__main__':
+    with dace.config.set_temporary('compiler', 'xilinx', 'mode', value='simulation'):
+        # init data structures
+        a = np.random.randint(0, 100, 1).astype(np.int32)
+        b = np.array([0]).astype(np.int32)
 
-    # init data structures
-    a = np.random.randint(0, 100, 1).astype(np.int32)
-    b = np.array([0]).astype(np.int32)
-
-    # show initial values
-    print("a={}, b={}".format(a, b))
+        # show initial values
+        print("a={}, b={}".format(a, b))
 
-    # call program
-    sdfg(A=a, B=b)
+        # call program
+        sdfg(A=a, B=b)
 
-    # show result
-    print("a={}, b={}".format(a, b))
+        # show result
+        print("a={}, b={}".format(a, b))
 
-    # check result
-    assert b == 100
+        # check result
+        assert b == 100
diff --git a/samples/fpga/rtl/rtl_tasklet_vector.py b/samples/fpga/rtl/rtl_tasklet_vector.py
index c099a6a38d..9015b4f35e 100644
--- a/samples/fpga/rtl/rtl_tasklet_vector.py
+++ b/samples/fpga/rtl/rtl_tasklet_vector.py
@@ -1,11 +1,11 @@
-# Copyright 2019-2021 ETH Zurich and the DaCe authors. All rights reserved.
+# Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved.
 """
     RTL tasklet with a vector input of 4 int32 (width=128bits) and a single scalar output. It increments b from a[31:0] up to 100.
+
+    It is intended for running simulation xilinx targets.
 """
 
 import dace
-import argparse
-
 import numpy as np
 
 # add symbol
@@ -44,13 +44,13 @@
 
         typedef enum [1:0] {READY, BUSY, DONE} state_e;
         state_e state;
-    
+
         always@(posedge ap_aclk) begin
             if (ap_areset) begin // case: reset
                 m_axis_b_tdata <= 0;
                 s_axis_a_tready <= 1'b1;
                 state <= READY;
-            end else if (s_axis_a_tvalid && state == READY) begin // case: load a 
+            end else if (s_axis_a_tvalid && state == READY) begin // case: load a
                 m_axis_b_tdata <= s_axis_a_tdata[0];
                 s_axis_a_tready <= 1'b0;
                 state <= BUSY;
@@ -60,9 +60,9 @@
                 m_axis_b_tdata <= m_axis_b_tdata;
                 state <= DONE;
             end
-        end    
-    
-        assign m_axis_b_tvalid = (m_axis_b_tdata >= s_axis_a_tdata[0] + s_axis_a_tdata[1] && (state == BUSY || state == DONE)) ? 1'b1:1'b0; 
+        end
+
+        assign m_axis_b_tvalid = (m_axis_b_tdata >= s_axis_a_tdata[0] + s_axis_a_tdata[1] && (state == BUSY || state == DONE)) ? 1'b1:1'b0;
     ''',
                             language=dace.Language.SystemVerilog)
 
@@ -80,19 +80,19 @@
 ######################################################################
 
 if __name__ == '__main__':
+    with dace.config.set_temporary('compiler', 'xilinx', 'mode', value='simulation'):
+        # init data structures
+        a = np.random.randint(0, 100, dace.symbolic.evaluate(WIDTH, sdfg.constants)).astype(np.int32)
+        b = np.array([0]).astype(np.int32)
 
-    # init data structures
-    a = np.random.randint(0, 100, dace.symbolic.evaluate(WIDTH, sdfg.constants)).astype(np.int32)
-    b = np.array([0]).astype(np.int32)
-
-    # show initial values
-    print("a={}, b={}".format(a, b))
+        # show initial values
+        print("a={}, b={}".format(a, b))
 
-    # call program
-    sdfg(A=a, B=b)
+        # call program
+        sdfg(A=a, B=b)
 
-    # show result
-    print("a={}, b={}".format(a, b))
+        # show result
+        print("a={}, b={}".format(a, b))
 
-    # check result
-    assert b == a[0] + a[1]
+        # check result
+        assert b == a[0] + a[1]
diff --git a/tests/rtl/hardware_test.py b/tests/rtl/hardware_test.py
index 821688f481..727dc7362b 100644
--- a/tests/rtl/hardware_test.py
+++ b/tests/rtl/hardware_test.py
@@ -1,4 +1,7 @@
-# Copyright 2019-2021 ETH Zurich and the DaCe authors. All rights reserved.
+# Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved.
+"""
+    Test suite for testing RTL integration with DaCe targeting Xilinx FPGAs.
+"""
 import dace
 from dace.fpga_testing import rtl_test
 import numpy as np
@@ -13,7 +16,7 @@
 def make_vadd_sdfg(N: dace.symbol, veclen: int = 8):
     '''
     Function for generating a simple vector addition SDFG that adds a vector `A` of `N` elements to a scalar `B` into a vector `C` of `N` elements, all using SystemVerilog.
-    The tasklet creates `veclen` instances of a floating point adder that operates on `N` elements. 
+    The tasklet creates `veclen` instances of a floating point adder that operates on `N` elements.
 
     :param N: The number of elements the SDFG takes as input and output.
     :param veclen: The number of floating point adders to instantiate.
@@ -197,7 +200,7 @@ def make_vadd_multi_sdfg(N, M):
 
     :param N: The number of elements to compute on.
     :param M: The number of compute PEs to initialize.
-    :return: An SDFG that has arguments `A` and `B`. 
+    :return: An SDFG that has arguments `A` and `B`.
     '''
     # add sdfg
     sdfg = dace.SDFG(f'integer_vector_plus_42_multiple_kernels_{N.get() // M.get()}')
@@ -321,7 +324,7 @@ def make_vadd_multi_sdfg(N, M):
 @rtl_test()
 def test_hardware_vadd():
     '''
-    Test for the simple vector addition. 
+    Test for the simple vector addition.
     '''
 
     # add symbol
@@ -346,7 +349,7 @@ def test_hardware_vadd():
 @rtl_test()
 def test_hardware_add42_single():
     '''
-    Test for adding a constant using a single PE. 
+    Test for adding a constant using a single PE.
     '''
     N = dace.symbol('N')
     M = dace.symbol('M')
@@ -428,10 +431,11 @@ def test_hardware_vadd_temporal_vectorization():
     '''
     Tests whether the multi-pumping optimization can be applied automatically by applying the temporal vectorization transformation. It starts from a numpy vector addition for generating the SDFG. This SDFG is then optimized by applying the vectorization, streaming memory, fpga and temporal vectorization transformations in that order.
     '''
-    # TODO !!!!! THIS TEST STALLS IN HARDWARE EMULATION WITH VITIS 2021.2 !!!!!
-    # But it works fine for 2020.2 and 2022.2. It seems like everything but the
-    # last transaction correctly goes through just fine. The last transaction
-    # is never output by the floating point adder, but the inputs are consumed. 
+    # TODO !!!!! THIS TEST STALLS IN HARDWARE EMULATION WITH VITIS 2021.2 and 2022.1 !!!!!
+    # But it works fine for 2020.2, 2022.2, and 2023.1. It seems like
+    # everything but the last transaction correctly goes through just fine. The
+    # last transaction is never output by the floating point adder, but the
+    # inputs are consumed.
     with dace.config.set_temporary('compiler', 'xilinx', 'frequency', value='"0:300\\|1:600"'):
         # Generate the test data and expected results
         size_n = 1024
diff --git a/tests/rtl/simulation_test.py b/tests/rtl/simulation_test.py
index f20ff6133a..6b7ac2cd15 100644
--- a/tests/rtl/simulation_test.py
+++ b/tests/rtl/simulation_test.py
@@ -1,5 +1,7 @@
-# Copyright 2019-2021 ETH Zurich and the DaCe authors. All rights reserved.
-
+# Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved.
+"""
+    Test suite for testing RTL tasklets in DaCe with Verilator as a backend for simulation.
+"""
 import dace
 import numpy as np
 import pytest

From 195ed59e0fce623188411f55e18ba27f87d2657a Mon Sep 17 00:00:00 2001
From: Marcin Copik <mcopik@gmail.com>
Date: Thu, 19 Oct 2023 20:50:19 +0200
Subject: [PATCH 106/163] Add first version of Fortran MERGE intrinsic and
 update intrinsic API

---
 dace/frontend/fortran/intrinsics.py | 228 +++++++++++++++++++++++++---
 tests/fortran/intrinsic_merge.py    |  75 +++++++++
 2 files changed, 283 insertions(+), 20 deletions(-)
 create mode 100644 tests/fortran/intrinsic_merge.py

diff --git a/dace/frontend/fortran/intrinsics.py b/dace/frontend/fortran/intrinsics.py
index 0275d7878a..ca37cc0019 100644
--- a/dace/frontend/fortran/intrinsics.py
+++ b/dace/frontend/fortran/intrinsics.py
@@ -2,7 +2,7 @@
 from abc import abstractmethod
 import copy
 import math
-from typing import Any, List, Set, Type
+from typing import Any, List, Optional, Set, Type
 
 from dace.frontend.fortran import ast_internal_classes
 from dace.frontend.fortran.ast_utils import fortrantypes2dacetypes
@@ -64,7 +64,8 @@ class LoopBasedReplacement:
         "ALL": "__dace_all",
         "COUNT": "__dace_count",
         "MINVAL": "__dace_minval",
-        "MAXVAL": "__dace_maxval"
+        "MAXVAL": "__dace_maxval",
+        "MERGE": "__dace_merge"
     }
 
     @staticmethod
@@ -108,8 +109,9 @@ def __init__(self, ast):
         self.scope_vars.visit(ast)
         self.rvals = []
 
+    @staticmethod
     @abstractmethod
-    def func_name(self) -> str:
+    def func_name() -> str:
         pass
 
     @abstractmethod
@@ -125,13 +127,16 @@ def _summarize_args(self, node: ast_internal_classes.FNode, new_func_body: List[
         pass
 
     @abstractmethod
-    def _initialize_result(self, node: ast_internal_classes.FNode) -> ast_internal_classes.BinOp_Node:
+    def _initialize_result(self, node: ast_internal_classes.FNode) -> Optional[ast_internal_classes.BinOp_Node]:
         pass
 
     @abstractmethod
     def _generate_loop_body(self, node: ast_internal_classes.FNode) -> ast_internal_classes.BinOp_Node:
         pass
 
+    def _skip_result_assignment(self):
+        return False
+
     """
         When replacing Fortran's AST reference to an intrinsic function, we set a dummy variable with VOID type.
         The reason is that at the point, we do not know the types of arguments. For many intrinsics, the return
@@ -144,7 +149,7 @@ def _generate_loop_body(self, node: ast_internal_classes.FNode) -> ast_internal_
     """
 
     @abstractmethod
-    def _update_result_type(self, node: ast_internal_classes.Name_Node):
+    def _update_result_type(self, node: ast_internal_classes.Execution_Part_Node, var: ast_internal_classes.Name_Node):
         pass
 
     def _parse_array(self, node: ast_internal_classes.Execution_Part_Node, arg: ast_internal_classes.FNode) -> ast_internal_classes.Array_Subscript_Node:
@@ -168,6 +173,7 @@ def _parse_array(self, node: ast_internal_classes.Execution_Part_Node, arg: ast_
     def visit_Execution_Part_Node(self, node: ast_internal_classes.Execution_Part_Node):
 
         newbody = []
+
         for child in node.execution:
             lister = LoopBasedReplacementVisitor(self.func_name())
             lister.visit(child)
@@ -182,6 +188,9 @@ def visit_Execution_Part_Node(self, node: ast_internal_classes.Execution_Part_No
             # calls to the same intrinsic.
             self._initialize()
 
+            # Change the type of result variable
+            self._update_result_type(node, child.lval)
+
             # Visit all intrinsic arguments and extract arrays
             for i in mywalk(child.rval):
                 if isinstance(i, ast_internal_classes.Call_Expr_Node) and i.name.name == self.func_name():
@@ -190,11 +199,10 @@ def visit_Execution_Part_Node(self, node: ast_internal_classes.Execution_Part_No
             # Verify that all of intrinsic args are correct and prepare them for loop generation
             self._summarize_args(child, newbody)
 
-            # Change the type of result variable
-            self._update_result_type(child.lval)
-
             # Initialize the result variable
-            newbody.append(self._initialize_result(child))
+            init_stm = self._initialize_result(child)
+            if init_stm is not None:
+                newbody.append(init_stm)
 
             # Generate the intrinsic-specific logic inside loop body
             body = self._generate_loop_body(child)
@@ -245,7 +253,7 @@ def _initialize(self):
         self.rvals = []
         self.argument_variable = None
 
-    def _update_result_type(self, var: ast_internal_classes.Name_Node):
+    def _update_result_type(self, node: ast_internal_classes.Execution_Part_Node, var: ast_internal_classes.Name_Node):
 
         """
             For both SUM and PRODUCT, the result type depends on the input variable.
@@ -317,7 +325,8 @@ class Transformation(SumProduct):
         def __init__(self, ast):
             super().__init__(ast)
 
-        def func_name(self) -> str:
+        @staticmethod
+        def func_name() -> str:
             return "__dace_sum"
 
         def _result_init_value(self):
@@ -342,7 +351,8 @@ class Transformation(SumProduct):
         def __init__(self, ast):
             super().__init__(ast)
 
-        def func_name(self) -> str:
+        @staticmethod
+        def func_name() -> str:
             return "__dace_product"
 
         def _result_init_value(self):
@@ -364,7 +374,7 @@ def _initialize(self):
         self.dominant_array = None
         self.cond = None
 
-    def _update_result_type(self, var: ast_internal_classes.Name_Node):
+    def _update_result_type(self, node: ast_internal_classes.Execution_Part_Node, var: ast_internal_classes.Name_Node):
 
         """
             For all functions, the result type is INTEGER.
@@ -569,7 +579,8 @@ def _result_loop_update(self, node: ast_internal_classes.FNode):
         def _loop_condition(self):
             return self.cond
 
-        def func_name(self) -> str:
+        @staticmethod
+        def func_name() -> str:
             return "__dace_any"
 
 class All(LoopBasedReplacement):
@@ -603,7 +614,8 @@ def _loop_condition(self):
                 lval=self.cond
             )
 
-        def func_name(self) -> str:
+        @staticmethod
+        def func_name() -> str:
             return "__dace_all"
 
 class Count(LoopBasedReplacement):
@@ -642,7 +654,8 @@ def _result_loop_update(self, node: ast_internal_classes.FNode):
         def _loop_condition(self):
             return self.cond
 
-        def func_name(self) -> str:
+        @staticmethod
+        def func_name() -> str:
             return "__dace_count"
 
 
@@ -655,7 +668,7 @@ def _initialize(self):
         self.rvals = []
         self.argument_variable = None
 
-    def _update_result_type(self, var: ast_internal_classes.Name_Node):
+    def _update_result_type(self, node: ast_internal_classes.Execution_Part_Node, var: ast_internal_classes.Name_Node):
 
         """
             For both MINVAL and MAXVAL, the result type depends on the input variable.
@@ -747,7 +760,8 @@ def _result_init_value(self, array: ast_internal_classes.Array_Subscript_Node):
         def _condition_op(self):
             return "<"
 
-        def func_name(self) -> str:
+        @staticmethod
+        def func_name() -> str:
             return "__dace_minval"
 
 
@@ -781,9 +795,174 @@ def _result_init_value(self, array: ast_internal_classes.Array_Subscript_Node):
         def _condition_op(self):
             return ">"
 
-        def func_name(self) -> str:
+        @staticmethod
+        def func_name() -> str:
             return "__dace_maxval"
 
+class Merge(LoopBasedReplacement):
+
+    class Transformation(LoopBasedReplacementTransformation):
+
+        def __init__(self, ast):
+            super().__init__(ast)
+
+        def _initialize(self):
+            self.rvals = []
+
+            self.first_array = None
+            self.second_array = None
+            self.dominant_array = None
+            self.cond = None
+            self.destination_array = None
+
+        @staticmethod
+        def func_name() -> str:
+            return "__dace_merge"
+
+        def _update_result_type(self, node: ast_internal_classes.Execution_Part_Node, var: ast_internal_classes.Name_Node):
+            """
+                We can ignore the result type, because we exempted this
+                transformation from generating a result.
+                In MERGE, we write directly to the destination array.
+                Thus, we store this result array for future use.
+            """
+
+            self.destination_array = self._parse_array(node, var)
+
+        def _parse_call_expr_node(self, node: ast_internal_classes.Call_Expr_Node):
+
+            if len(node.args) != 3:
+                raise NotImplementedError("Expected three arguments to MERGE!")
+
+            # First argument is always an array
+            self.first_array = self._parse_array(node, node.args[0])
+            assert self.first_array is not None
+
+            # Second argument is always an array
+            self.second_array = self._parse_array(node, node.args[1])
+            assert self.second_array is not None
+
+            # Last argument is either an array or a binary op
+            array_node = self._parse_array(node, node.args[2])
+            if array_node is not None:
+
+                self.mask_array = array_node
+
+            else:
+
+                # supports syntax ANY(logical op)
+                # the logical op can be:
+                #
+                # (1) arr1 op arr2
+                # where arr1 and arr2 are name node or array subscript node
+                # there, we need to extract shape and verify they are the same
+                #
+                # (2) arr1 op scalar
+                # there, we ignore the scalar because it's not an array
+                if not isinstance(arg, ast_internal_classes.BinOp_Node):
+                    return
+
+                self.mask_first_array  = self._parse_array(node, arg.lval)
+                self.mask_second_array  = self._parse_array(node, arg.rval)
+                has_two_arrays = self.mask_first_array is not None and self.mask_second_array is not None
+
+                # array and scalar - simplified case
+                if not has_two_arrays:
+
+                    # if one side of the operator is scalar, then parsing array
+                    # will return none
+                    self.dominant_array = self.mask_first_array
+                    if self.dominant_array is None:
+                        self.dominant_array = self.mask_second_array
+
+                    # replace the array subscript node in the binary operation
+                    # ignore this when the operand is a scalar
+                    self.cond = copy.deepcopy(arg)
+                    if self.mask_first_array is not None:
+                        self.cond.lval = self.mask_dominant_array
+                    if self.mask_second_array is not None:
+                        self.cond.rval = self.mask_dominant_array
+
+                    return
+
+
+                if len(self.mask_first_array.indices) != len(self.mask_second_array.indices):
+                    raise TypeError("Can't parse Fortran MERGE with different array ranks!")
+
+                for left_idx, right_idx in zip(self.mask_first_array.indices, self.mask_second_array.indices):
+                    if left_idx.type != right_idx.type:
+                        raise TypeError("Can't parse Fortran MERGE with different array ranks!")
+
+                # Now, we need to convert the array to a proper subscript node
+                self.cond = copy.deepcopy(arg)
+                self.cond.lval = self.mask_first_array
+                self.cond.rval = self.mask_second_array
+
+        def _summarize_args(self, node: ast_internal_classes.FNode, new_func_body: List[ast_internal_classes.FNode]):
+
+            # The first main argument is an array -> this dictates loop boundaries
+            # Other arrays, regardless if they appear as the second array or mask, need to have the same loop boundary.
+            par_Decl_Range_Finder(self.first_array, self.loop_ranges, [], [], self.count, new_func_body, self.scope_vars, True)
+            par_Decl_Range_Finder(self.second_array, [], [], [], self.count, new_func_body, self.scope_vars, True)
+            par_Decl_Range_Finder(self.destination_array, [], [], [], self.count, new_func_body, self.scope_vars, True)
+            #par_Decl_Range_Finder(self.mask_first_array, self.loop_ranges, [], [], self.count, new_func_body, self.scope_vars, True)
+            # FIXME: if condition for if creation
+            #self.cond = ast_internal_classes.BinOp_Node(
+            #    op="==",
+            #    rval=ast_internal_classes.Int_Literal_Node(value="1"),
+            #    lval=copy.deepcopy(self.first_array),
+            #    line_number=node.line_number
+            #)
+            par_Decl_Range_Finder(self.mask_array, [], [], [], self.count, new_func_body, self.scope_vars, True)
+            self.cond = ast_internal_classes.BinOp_Node(
+                op="==",
+                rval=ast_internal_classes.Int_Literal_Node(value="1"),
+                lval=copy.deepcopy(self.mask_array),
+                line_number=node.line_number
+            )
+            return
+
+        def _initialize_result(self, node: ast_internal_classes.FNode) -> Optional[ast_internal_classes.BinOp_Node]:
+            """
+                We don't use result variable in MERGE.
+            """
+            return None
+
+        def _generate_loop_body(self, node: ast_internal_classes.FNode) -> ast_internal_classes.BinOp_Node:
+
+            """
+                We check if the condition is true. If yes, then we write from the first array.
+                Otherwise, we copy data from the second array.
+            """
+
+            copy_first = ast_internal_classes.BinOp_Node(
+                lval=copy.deepcopy(self.destination_array),
+                op="=",
+                rval=self.first_array,
+                line_number=node.line_number
+            )
+
+            copy_second = ast_internal_classes.BinOp_Node(
+                lval=copy.deepcopy(self.destination_array),
+                op="=",
+                rval=self.second_array,
+                line_number=node.line_number
+            )
+
+            body_if = ast_internal_classes.Execution_Part_Node(execution=[
+                copy_first
+            ])
+
+            body_else = ast_internal_classes.Execution_Part_Node(execution=[
+                copy_second
+            ])
+
+            return ast_internal_classes.If_Stmt_Node(
+                cond=self.cond,
+                body=body_if,
+                body_else=body_else,
+                line_number=node.line_number
+            )
 
 class FortranIntrinsics:
 
@@ -796,7 +975,8 @@ class FortranIntrinsics:
         "COUNT": Count,
         "ALL": All,
         "MINVAL": MinVal,
-        "MAXVAL": MaxVal
+        "MAXVAL": MaxVal,
+        "MERGE": Merge
     }
 
     DIRECT_REPLACEMENTS = {
@@ -804,6 +984,10 @@ class FortranIntrinsics:
         "__dace_selected_real_kind": SelectedKind
     }
 
+    EXEMPTED_FROM_CALL_EXTRACTION = [
+        Merge
+    ]
+
     def __init__(self):
         self._transformations_to_run = set()
 
@@ -814,6 +998,10 @@ def transformations(self) -> Set[Type[NodeTransformer]]:
     def function_names() -> List[str]:
         return list(LoopBasedReplacement.INTRINSIC_TO_DACE.values())
 
+    @staticmethod
+    def call_extraction_exemptions() -> List[str]:
+        return [func.Transformation.func_name() for func in FortranIntrinsics.EXEMPTED_FROM_CALL_EXTRACTION]
+
     def replace_function_name(self, node: FASTNode) -> ast_internal_classes.Name_Node:
 
         func_name = node.string
diff --git a/tests/fortran/intrinsic_merge.py b/tests/fortran/intrinsic_merge.py
new file mode 100644
index 0000000000..2ad49911c3
--- /dev/null
+++ b/tests/fortran/intrinsic_merge.py
@@ -0,0 +1,75 @@
+# Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved.
+
+import numpy as np
+
+from dace.frontend.fortran import ast_transforms, fortran_parser
+
+def test_fortran_frontend_merge_double():
+    """
+    Tests that the generated array map correctly handles offsets.
+    """
+    test_string = """
+                    PROGRAM merge_test
+                    implicit none
+                    double precision, dimension(7) :: input1
+                    double precision, dimension(7) :: input2
+                    integer, dimension(7) :: mask
+                    double precision, dimension(7) :: res
+                    CALL merge_test_function(input1, input2, mask, res)
+                    end
+
+                    SUBROUTINE merge_test_function(input1, input2, mask, res)
+                    double precision, dimension(7) :: input1
+                    double precision, dimension(7) :: input2
+                    integer, dimension(7) :: mask
+                    double precision, dimension(7) :: res
+
+                    res = MERGE(input1, input2, mask)
+
+                    END SUBROUTINE merge_test_function
+                    """
+
+    # Now test to verify it executes correctly with no offset normalization
+    sdfg = fortran_parser.create_sdfg_from_string(test_string, "merge_test", True)
+    sdfg.simplify(verbose=True)
+    sdfg.compile()
+    size = 7
+
+    # Minimum is in the beginning
+    first = np.full([size], 13, order="F", dtype=np.float64)
+    second = np.full([size], 42, order="F", dtype=np.float64)
+    mask = np.full([size], 0, order="F", dtype=np.int32)
+    res = np.full([size], 40, order="F", dtype=np.float64)
+
+    sdfg(input1=first, input2=second, mask=mask, res=res)
+    for val in res:
+        assert val == 42
+
+    for i in range(int(size/2)):
+        mask[i] = 1
+    sdfg(input1=first, input2=second, mask=mask, res=res)
+    for i in range(int(size/2)):
+        assert res[i] == 13
+    for i in range(int(size/2), size):
+        assert res[i] == 42
+
+    mask[:] = 0
+    for i in range(size):
+        if i % 2 == 1:
+            mask[i] = 1
+    sdfg(input1=first, input2=second, mask=mask, res=res)
+    for i in range(size):
+        if i % 2 == 1:
+            assert res[i] == 13
+        else:
+            assert res[i] == 42
+
+    # mask comparison scalar
+    # mask comparison two arrays
+    # mask array
+    # mask comparison on array participating
+    # mask comparison on two arrays participating
+
+if __name__ == "__main__":
+
+    test_fortran_frontend_merge_double()

From 7573a18d79a6423bd0912ca1e791a2c15e8a8862 Mon Sep 17 00:00:00 2001
From: Marcin Copik <mcopik@gmail.com>
Date: Thu, 19 Oct 2023 20:50:38 +0200
Subject: [PATCH 107/163] Exempt some of the Fortran intrinsics from
 CallExtractor transformation

---
 dace/frontend/fortran/ast_transforms.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/dace/frontend/fortran/ast_transforms.py b/dace/frontend/fortran/ast_transforms.py
index 7aa6205bd6..0c96560fba 100644
--- a/dace/frontend/fortran/ast_transforms.py
+++ b/dace/frontend/fortran/ast_transforms.py
@@ -217,8 +217,10 @@ def visit_Call_Expr_Node(self, node: ast_internal_classes.Call_Expr_Node):
         if hasattr(node, "subroutine"):
             if node.subroutine is True:
                 stop = True
+
+        from dace.frontend.fortran.intrinsics import FortranIntrinsics
         if not stop and node.name.name not in [
-                "malloc", "exp", "pow", "sqrt", "cbrt", "max", "min", "abs", "tanh", "__dace_epsilon"
+                "malloc", "exp", "pow", "sqrt", "cbrt", "max", "min", "abs", "tanh", "__dace_epsilon", *FortranIntrinsics.call_extraction_exemptions()
         ]:
             self.nodes.append(node)
         return self.generic_visit(node)
@@ -238,7 +240,8 @@ def __init__(self, count=0):
 
     def visit_Call_Expr_Node(self, node: ast_internal_classes.Call_Expr_Node):
 
-        if node.name.name in ["malloc", "exp", "pow", "sqrt", "cbrt", "max", "min", "abs", "tanh", "__dace_epsilon"]:
+        from dace.frontend.fortran.intrinsics import FortranIntrinsics
+        if node.name.name in ["malloc", "exp", "pow", "sqrt", "cbrt", "max", "min", "abs", "tanh", "__dace_epsilon", *FortranIntrinsics.call_extraction_exemptions()]:
             return self.generic_visit(node)
         if hasattr(node, "subroutine"):
             if node.subroutine is True:

From 3eaeb249f605b0365dc4bdbc676cb86e815de1bd Mon Sep 17 00:00:00 2001
From: Marcin Copik <mcopik@gmail.com>
Date: Fri, 20 Oct 2023 00:03:36 +0200
Subject: [PATCH 108/163] Refactor Fortran intrinsic to create common function
 for parsing binary operations with an array

---
 dace/frontend/fortran/intrinsics.py | 217 +++++++++++++++-------------
 1 file changed, 119 insertions(+), 98 deletions(-)

diff --git a/dace/frontend/fortran/intrinsics.py b/dace/frontend/fortran/intrinsics.py
index ca37cc0019..894c9c1b0b 100644
--- a/dace/frontend/fortran/intrinsics.py
+++ b/dace/frontend/fortran/intrinsics.py
@@ -2,7 +2,7 @@
 from abc import abstractmethod
 import copy
 import math
-from typing import Any, List, Optional, Set, Type
+from typing import Any, List, Optional, Set, Tuple, Type
 
 from dace.frontend.fortran import ast_internal_classes
 from dace.frontend.fortran.ast_utils import fortrantypes2dacetypes
@@ -170,6 +170,98 @@ def _parse_array(self, node: ast_internal_classes.Execution_Part_Node, arg: ast_
         if isinstance(arg, ast_internal_classes.Array_Subscript_Node):
             return arg
 
+    def _parse_binary_op(self, node: ast_internal_classes.Call_Expr_Node, arg: ast_internal_classes.BinOp_Node) -> Tuple[
+            ast_internal_classes.Array_Subscript_Node,
+            Optional[ast_internal_classes.Array_Subscript_Node],
+            ast_internal_classes.BinOp_Node
+        ]:
+
+        """
+            Supports passing binary operations as an input to function.
+            In both cases, we extract the arrays used, and return a brand
+            new binary operation that has array references replaced.
+            We return both arrays (second optionaly None) and the binary op.
+
+            The binary op can be:
+
+            (1) arr1 op arr2
+            where arr1 and arr2 are name node or array subscript node
+            #there, we need to extract shape and verify they are the same
+
+            (2) arr1 op scalar
+            there, we ignore the scalar because it's not an array
+
+        """
+        if not isinstance(arg, ast_internal_classes.BinOp_Node):
+            return False
+
+        first_array = self._parse_array(node, arg.lval)
+        second_array = self._parse_array(node, arg.rval)
+        has_two_arrays = first_array is not None and second_array is not None
+
+        # array and scalar - simplified case
+        if not has_two_arrays:
+
+            # if one side of the operator is scalar, then parsing array
+            # will return none
+            dominant_array = first_array
+            if dominant_array is None:
+                dominant_array = second_array
+
+            # replace the array subscript node in the binary operation
+            # ignore this when the operand is a scalar
+            cond = copy.deepcopy(arg)
+            if first_array is not None:
+                cond.lval = dominant_array
+            if second_array is not None:
+                cond.rval = dominant_array
+
+            return (dominant_array, None, cond)
+
+        if len(first_array.indices) != len(second_array.indices):
+            raise TypeError("Can't parse Fortran ANY with different array ranks!")
+
+        for left_idx, right_idx in zip(first_array.indices, second_array.indices):
+            if left_idx.type != right_idx.type:
+                raise TypeError("Can't parse Fortran ANY with different array ranks!")
+
+        # Now, we need to convert the array to a proper subscript node
+        cond = copy.deepcopy(arg)
+        cond.lval = first_array
+        cond.rval = second_array
+
+        return (first_array, second_array, cond)
+
+    def _adjust_array_ranges(self, node: ast_internal_classes.FNode, array: ast_internal_classes.Array_Subscript_Node, loop_ranges_main: list, loop_ranges_array: list):
+
+        """
+            When given a binary operator with arrays as an argument to the intrinsic,
+            one array will dictate loop range.
+            However, the other array can potentially have a different access range.
+            Thus, we need to add an offset to the loop iterator when accessing array elements.
+
+            If the access pattern on the right array is different, we need to shfit it - for every dimension.
+            For example, we can have arr(1:3) == arr2(3:5)
+            Then, loop_idx is from 1 to 3
+            arr becomes arr[loop_idx]
+            but arr2 must be arr2[loop_idx + 2]
+        """
+        for i in range(len(array.indices)):
+
+            idx_var = array.indices[i]
+            start_loop = loop_ranges_main[i][0]
+            end_loop = loop_ranges_array[i][0]
+
+            difference = int(end_loop.value) - int(start_loop.value)
+            if difference != 0:
+                new_index = ast_internal_classes.BinOp_Node(
+                    lval=idx_var,
+                    op="+",
+                    rval=ast_internal_classes.Int_Literal_Node(value=str(difference)),
+                    line_number=node.line_number
+                )
+                array.indices[i] = new_index
+
     def visit_Execution_Part_Node(self, node: ast_internal_classes.Execution_Part_Node):
 
         newbody = []
@@ -393,83 +485,22 @@ def _parse_call_expr_node(self, node: ast_internal_classes.Call_Expr_Node):
 
         array_node = self._parse_array(node, arg)
         if array_node is not None:
-
             self.first_array = array_node
-
-        else:
-
-            # supports syntax ANY(logical op)
-            # the logical op can be:
-            #
-            # (1) arr1 op arr2
-            # where arr1 and arr2 are name node or array subscript node
-            # there, we need to extract shape and verify they are the same
-            #
-            # (2) arr1 op scalar
-            # there, we ignore the scalar because it's not an array
-            if not isinstance(arg, ast_internal_classes.BinOp_Node):
-                return
-
-            self.first_array  = self._parse_array(node, arg.lval)
-            self.second_array  = self._parse_array(node, arg.rval)
-            has_two_arrays = self.first_array is not None and self.second_array is not None
-
-            # array and scalar - simplified case
-            if not has_two_arrays:
-
-                # if one side of the operator is scalar, then parsing array
-                # will return none
-                self.dominant_array = self.first_array
-                if self.dominant_array is None:
-                    self.dominant_array = self.second_array
-
-                # replace the array subscript node in the binary operation
-                # ignore this when the operand is a scalar
-                self.cond = copy.deepcopy(arg)
-                if self.first_array is not None:
-                    self.cond.lval = self.dominant_array
-                if self.second_array is not None:
-                    self.cond.rval = self.dominant_array
-
-                return
-
-
-            if len(self.first_array.indices) != len(self.second_array.indices):
-                raise TypeError("Can't parse Fortran ANY with different array ranks!")
-
-            for left_idx, right_idx in zip(self.first_array.indices, self.second_array.indices):
-                if left_idx.type != right_idx.type:
-                    raise TypeError("Can't parse Fortran ANY with different array ranks!")
-
-            # Now, we need to convert the array to a proper subscript node
-            self.cond = copy.deepcopy(arg)
-            self.cond.lval = self.first_array
-            self.cond.rval = self.second_array
-
-    def _summarize_args(self, node: ast_internal_classes.FNode, new_func_body: List[ast_internal_classes.FNode]):
-
-        # The main argument is an array, not a binary operation
-        if self.cond is None:
-
-            par_Decl_Range_Finder(self.first_array, self.loop_ranges, [], [], self.count, new_func_body, self.scope_vars, True)
             self.cond = ast_internal_classes.BinOp_Node(
                 op="==",
                 rval=ast_internal_classes.Int_Literal_Node(value="1"),
-                lval=copy.deepcopy(self.first_array),
+                lval=self.first_array,
                 line_number=node.line_number
             )
-            return
-
-        # we have a binary operation with an array and a scalar
-        if self.dominant_array is not None:
-
-            par_Decl_Range_Finder(self.dominant_array, self.loop_ranges, [], [], self.count, new_func_body, self.scope_vars, True)
-            return
+        else:
+            self.first_array, self.second_array, self.cond = self._parse_binary_op(node, arg)
 
-        # we have a binary operation with two arrays
+    def _summarize_args(self, node: ast_internal_classes.FNode, new_func_body: List[ast_internal_classes.FNode]):
 
         rangeslen_left = []
         par_Decl_Range_Finder(self.first_array, self.loop_ranges, [], rangeslen_left, self.count, new_func_body, self.scope_vars, True)
+        if self.second_array is None:
+            return
 
         loop_ranges_right = []
         rangeslen_right = []
@@ -479,27 +510,10 @@ def _summarize_args(self, node: ast_internal_classes.FNode, new_func_body: List[
             if left_len != right_len:
                 raise TypeError("Can't support Fortran ANY with different array ranks!")
 
-        # Now, the loop will be dictated by the left array
-        # If the access pattern on the right array is different, we need to shfit it - for every dimension.
-        # For example, we can have arr(1:3) == arr2(3:5)
-        # Then, loop_idx is from 1 to 3
-        # arr becomes arr[loop_idx]
-        # but arr2 must be arr2[loop_idx + 2]
-        for i in range(len(self.second_array.indices)):
-
-            idx_var = self.second_array.indices[i]
-            start_loop = self.loop_ranges[i][0]
-            end_loop = loop_ranges_right[i][0]
+        # In this intrinsic, the left array dictates loop range.
+        # Thus, we only need to adjust the second array
+        self._adjust_array_ranges(node, self.second_array, self.loop_ranges, loop_ranges_right)
 
-            difference = int(end_loop.value) - int(start_loop.value)
-            if difference != 0:
-                new_index = ast_internal_classes.BinOp_Node(
-                    lval=idx_var,
-                    op="+",
-                    rval=ast_internal_classes.Int_Literal_Node(value=str(difference)),
-                    line_number=node.line_number
-                )
-                self.second_array.indices[i] = new_index
 
     def _initialize_result(self, node: ast_internal_classes.FNode) -> ast_internal_classes.BinOp_Node:
 
@@ -843,6 +857,7 @@ def _parse_call_expr_node(self, node: ast_internal_classes.Call_Expr_Node):
             assert self.second_array is not None
 
             # Last argument is either an array or a binary op
+            arg = node.args[2]
             array_node = self._parse_array(node, node.args[2])
             if array_node is not None:
 
@@ -871,9 +886,9 @@ def _parse_call_expr_node(self, node: ast_internal_classes.Call_Expr_Node):
 
                     # if one side of the operator is scalar, then parsing array
                     # will return none
-                    self.dominant_array = self.mask_first_array
-                    if self.dominant_array is None:
-                        self.dominant_array = self.mask_second_array
+                    self.mask_dominant_array = self.mask_first_array
+                    if self.mask_dominant_array is None:
+                        self.mask_dominant_array = self.mask_second_array
 
                     # replace the array subscript node in the binary operation
                     # ignore this when the operand is a scalar
@@ -882,6 +897,7 @@ def _parse_call_expr_node(self, node: ast_internal_classes.Call_Expr_Node):
                         self.cond.lval = self.mask_dominant_array
                     if self.mask_second_array is not None:
                         self.cond.rval = self.mask_dominant_array
+                    print('process', self.cond, self.cond.lval.name.name, self.cond.rval)
 
                     return
 
@@ -913,13 +929,18 @@ def _summarize_args(self, node: ast_internal_classes.FNode, new_func_body: List[
             #    lval=copy.deepcopy(self.first_array),
             #    line_number=node.line_number
             #)
-            par_Decl_Range_Finder(self.mask_array, [], [], [], self.count, new_func_body, self.scope_vars, True)
-            self.cond = ast_internal_classes.BinOp_Node(
-                op="==",
-                rval=ast_internal_classes.Int_Literal_Node(value="1"),
-                lval=copy.deepcopy(self.mask_array),
-                line_number=node.line_number
-            )
+            if self.cond is None:
+                par_Decl_Range_Finder(self.mask_array, [], [], [], self.count, new_func_body, self.scope_vars, True)
+                self.cond = ast_internal_classes.BinOp_Node(
+                    op="==",
+                    rval=ast_internal_classes.Int_Literal_Node(value="1"),
+                    lval=copy.deepcopy(self.mask_array),
+                    line_number=node.line_number
+                )
+            else:
+                # FIXME: move somewhere else
+                par_Decl_Range_Finder(self.cond.lval, [], [], [], self.count, new_func_body, self.scope_vars, True)
+                par_Decl_Range_Finder(self.cond.rval, [], [], [], self.count, new_func_body, self.scope_vars, True)
             return
 
         def _initialize_result(self, node: ast_internal_classes.FNode) -> Optional[ast_internal_classes.BinOp_Node]:

From db7e31551a94d2ae4f59982fcf52dde564c117cc Mon Sep 17 00:00:00 2001
From: Marcin Copik <mcopik@gmail.com>
Date: Fri, 20 Oct 2023 00:09:04 +0200
Subject: [PATCH 109/163] Add test to COUNT to verify we do not swap the order
 of operands in a binary op

---
 tests/fortran/intrinsic_count.py | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/tests/fortran/intrinsic_count.py b/tests/fortran/intrinsic_count.py
index 5e6666513c..ef55f9dd55 100644
--- a/tests/fortran/intrinsic_count.py
+++ b/tests/fortran/intrinsic_count.py
@@ -120,13 +120,13 @@ def test_fortran_frontend_count_array_scalar_comparison():
                     PROGRAM intrinsic_count_test
                     implicit none
                     integer, dimension(5) :: first
-                    logical, dimension(7) :: res
+                    logical, dimension(9) :: res
                     CALL intrinsic_count_test_function(first, res)
                     end
 
                     SUBROUTINE intrinsic_count_test_function(first, res)
                     integer, dimension(5) :: first
-                    logical, dimension(7) :: res
+                    logical, dimension(9) :: res
 
                     res(1) = COUNT(first .eq. 42)
                     res(2) = COUNT(first(:) .eq. 42)
@@ -135,6 +135,8 @@ def test_fortran_frontend_count_array_scalar_comparison():
                     res(5) = COUNT(first(3:5) .eq. 42)
                     res(6) = COUNT(42 .eq. first)
                     res(7) = COUNT(42 .ne. first)
+                    res(8) = COUNT(6 .lt. first)
+                    res(9) = COUNT(6 .gt. first)
 
                     END SUBROUTINE intrinsic_count_test_function
                     """
@@ -145,24 +147,24 @@ def test_fortran_frontend_count_array_scalar_comparison():
 
     size = 5
     first = np.full([size], 1, order="F", dtype=np.int32)
-    res = np.full([7], 0, order="F", dtype=np.int32)
+    res = np.full([9], 0, order="F", dtype=np.int32)
 
     sdfg(first=first, res=res)
-    assert list(res) == [0, 0, 0, 0, 0, 0, 5]
+    assert list(res) == [0, 0, 0, 0, 0, 0, 5, 0, size]
 
     first[1] = 42
     sdfg(first=first, res=res)
-    assert list(res) == [1, 1, 1, 0, 0, 1, 4]
+    assert list(res) == [1, 1, 1, 0, 0, 1, 4, 1, size - 1]
 
     first[1] = 5
     first[2] = 42
     sdfg(first=first, res=res)
-    assert list(res) == [1, 1, 0, 1, 1, 1, 4]
+    assert list(res) == [1, 1, 0, 1, 1, 1, 4, 1, size - 1]
 
     first[2] = 7
     first[3] = 42
     sdfg(first=first, res=res)
-    assert list(res) == [1, 1, 0, 0, 1, 1, 4]
+    assert list(res) == [1, 1, 0, 0, 1, 1, 4, 2, size - 2]
 
 def test_fortran_frontend_count_array_comparison_wrong_subset():
     test_string = """

From e45d62dc7292bc3407e34f14145a9ed2e70cdac8 Mon Sep 17 00:00:00 2001
From: Marcin Copik <mcopik@gmail.com>
Date: Fri, 20 Oct 2023 00:17:33 +0200
Subject: [PATCH 110/163] Implement standard MERGE intrinsic version

---
 dace/frontend/fortran/intrinsics.py |  91 ++++----------------
 tests/fortran/intrinsic_merge.py    | 127 ++++++++++++++++++++++++++--
 2 files changed, 140 insertions(+), 78 deletions(-)

diff --git a/dace/frontend/fortran/intrinsics.py b/dace/frontend/fortran/intrinsics.py
index 894c9c1b0b..dbcf626868 100644
--- a/dace/frontend/fortran/intrinsics.py
+++ b/dace/frontend/fortran/intrinsics.py
@@ -825,8 +825,9 @@ def _initialize(self):
 
             self.first_array = None
             self.second_array = None
-            self.dominant_array = None
-            self.cond = None
+            self.mask_first_array = None
+            self.mask_second_array = None
+            self.mask_cond = None
             self.destination_array = None
 
         @staticmethod
@@ -861,58 +862,17 @@ def _parse_call_expr_node(self, node: ast_internal_classes.Call_Expr_Node):
             array_node = self._parse_array(node, node.args[2])
             if array_node is not None:
 
-                self.mask_array = array_node
+                self.mask_first_array = array_node
+                self.mask_cond = ast_internal_classes.BinOp_Node(
+                    op="==",
+                    rval=ast_internal_classes.Int_Literal_Node(value="1"),
+                    lval=self.mask_first_array,
+                    line_number=node.line_number
+                )
 
             else:
 
-                # supports syntax ANY(logical op)
-                # the logical op can be:
-                #
-                # (1) arr1 op arr2
-                # where arr1 and arr2 are name node or array subscript node
-                # there, we need to extract shape and verify they are the same
-                #
-                # (2) arr1 op scalar
-                # there, we ignore the scalar because it's not an array
-                if not isinstance(arg, ast_internal_classes.BinOp_Node):
-                    return
-
-                self.mask_first_array  = self._parse_array(node, arg.lval)
-                self.mask_second_array  = self._parse_array(node, arg.rval)
-                has_two_arrays = self.mask_first_array is not None and self.mask_second_array is not None
-
-                # array and scalar - simplified case
-                if not has_two_arrays:
-
-                    # if one side of the operator is scalar, then parsing array
-                    # will return none
-                    self.mask_dominant_array = self.mask_first_array
-                    if self.mask_dominant_array is None:
-                        self.mask_dominant_array = self.mask_second_array
-
-                    # replace the array subscript node in the binary operation
-                    # ignore this when the operand is a scalar
-                    self.cond = copy.deepcopy(arg)
-                    if self.mask_first_array is not None:
-                        self.cond.lval = self.mask_dominant_array
-                    if self.mask_second_array is not None:
-                        self.cond.rval = self.mask_dominant_array
-                    print('process', self.cond, self.cond.lval.name.name, self.cond.rval)
-
-                    return
-
-
-                if len(self.mask_first_array.indices) != len(self.mask_second_array.indices):
-                    raise TypeError("Can't parse Fortran MERGE with different array ranks!")
-
-                for left_idx, right_idx in zip(self.mask_first_array.indices, self.mask_second_array.indices):
-                    if left_idx.type != right_idx.type:
-                        raise TypeError("Can't parse Fortran MERGE with different array ranks!")
-
-                # Now, we need to convert the array to a proper subscript node
-                self.cond = copy.deepcopy(arg)
-                self.cond.lval = self.mask_first_array
-                self.cond.rval = self.mask_second_array
+                self.mask_first_array, self.mask_second_array, self.mask_cond = self._parse_binary_op(node, arg)
 
         def _summarize_args(self, node: ast_internal_classes.FNode, new_func_body: List[ast_internal_classes.FNode]):
 
@@ -921,27 +881,12 @@ def _summarize_args(self, node: ast_internal_classes.FNode, new_func_body: List[
             par_Decl_Range_Finder(self.first_array, self.loop_ranges, [], [], self.count, new_func_body, self.scope_vars, True)
             par_Decl_Range_Finder(self.second_array, [], [], [], self.count, new_func_body, self.scope_vars, True)
             par_Decl_Range_Finder(self.destination_array, [], [], [], self.count, new_func_body, self.scope_vars, True)
-            #par_Decl_Range_Finder(self.mask_first_array, self.loop_ranges, [], [], self.count, new_func_body, self.scope_vars, True)
-            # FIXME: if condition for if creation
-            #self.cond = ast_internal_classes.BinOp_Node(
-            #    op="==",
-            #    rval=ast_internal_classes.Int_Literal_Node(value="1"),
-            #    lval=copy.deepcopy(self.first_array),
-            #    line_number=node.line_number
-            #)
-            if self.cond is None:
-                par_Decl_Range_Finder(self.mask_array, [], [], [], self.count, new_func_body, self.scope_vars, True)
-                self.cond = ast_internal_classes.BinOp_Node(
-                    op="==",
-                    rval=ast_internal_classes.Int_Literal_Node(value="1"),
-                    lval=copy.deepcopy(self.mask_array),
-                    line_number=node.line_number
-                )
-            else:
-                # FIXME: move somewhere else
-                par_Decl_Range_Finder(self.cond.lval, [], [], [], self.count, new_func_body, self.scope_vars, True)
-                par_Decl_Range_Finder(self.cond.rval, [], [], [], self.count, new_func_body, self.scope_vars, True)
-            return
+
+            if self.mask_first_array is not None:
+                par_Decl_Range_Finder(self.mask_first_array, [], [], [], self.count, new_func_body, self.scope_vars, True)
+            if self.mask_second_array is not None:
+                par_Decl_Range_Finder(self.mask_second_array, [], [], [], self.count, new_func_body, self.scope_vars, True)
+
 
         def _initialize_result(self, node: ast_internal_classes.FNode) -> Optional[ast_internal_classes.BinOp_Node]:
             """
@@ -979,7 +924,7 @@ def _generate_loop_body(self, node: ast_internal_classes.FNode) -> ast_internal_
             ])
 
             return ast_internal_classes.If_Stmt_Node(
-                cond=self.cond,
+                cond=self.mask_cond,
                 body=body_if,
                 body_else=body_else,
                 line_number=node.line_number
diff --git a/tests/fortran/intrinsic_merge.py b/tests/fortran/intrinsic_merge.py
index 2ad49911c3..1104eea72d 100644
--- a/tests/fortran/intrinsic_merge.py
+++ b/tests/fortran/intrinsic_merge.py
@@ -4,7 +4,7 @@
 
 from dace.frontend.fortran import ast_transforms, fortran_parser
 
-def test_fortran_frontend_merge_double():
+def test_fortran_frontend_merge_1d():
     """
     Tests that the generated array map correctly handles offsets.
     """
@@ -64,12 +64,129 @@ def test_fortran_frontend_merge_double():
         else:
             assert res[i] == 42
 
-    # mask comparison scalar
-    # mask comparison two arrays
-    # mask array
+def test_fortran_frontend_merge_comparison_scalar():
+    """
+    Tests that the generated array map correctly handles offsets.
+    """
+    test_string = """
+                    PROGRAM merge_test
+                    implicit none
+                    double precision, dimension(7) :: input1
+                    double precision, dimension(7) :: input2
+                    double precision, dimension(7) :: res
+                    CALL merge_test_function(input1, input2, res)
+                    end
+
+                    SUBROUTINE merge_test_function(input1, input2, res)
+                    double precision, dimension(7) :: input1
+                    double precision, dimension(7) :: input2
+                    double precision, dimension(7) :: res
+
+                    res = MERGE(input1, input2, input1 .eq. 3)
+
+                    END SUBROUTINE merge_test_function
+                    """
+
+    # Now test to verify it executes correctly with no offset normalization
+    sdfg = fortran_parser.create_sdfg_from_string(test_string, "merge_test", True)
+    sdfg.simplify(verbose=True)
+    sdfg.compile()
+    size = 7
+
+    # Minimum is in the beginning
+    first = np.full([size], 13, order="F", dtype=np.float64)
+    second = np.full([size], 42, order="F", dtype=np.float64)
+    res = np.full([size], 40, order="F", dtype=np.float64)
+
+    sdfg(input1=first, input2=second, res=res)
+    for val in res:
+        assert val == 42
+
+    for i in range(int(size/2)):
+        first[i] = 3
+    sdfg(input1=first, input2=second, res=res)
+    for i in range(int(size/2)):
+        assert res[i] == 3
+    for i in range(int(size/2), size):
+        assert res[i] == 42
+
+    first[:] = 13
+    for i in range(size):
+        if i % 2 == 1:
+            first[i] = 3
+    sdfg(input1=first, input2=second, res=res)
+    for i in range(size):
+        if i % 2 == 1:
+            assert res[i] == 3
+        else:
+            assert res[i] == 42
+
+def test_fortran_frontend_merge_comparison_arrays():
+    """
+    Tests that the generated array map correctly handles offsets.
+    """
+    test_string = """
+                    PROGRAM merge_test
+                    implicit none
+                    double precision, dimension(7) :: input1
+                    double precision, dimension(7) :: input2
+                    double precision, dimension(7) :: res
+                    CALL merge_test_function(input1, input2, res)
+                    end
+
+                    SUBROUTINE merge_test_function(input1, input2, res)
+                    double precision, dimension(7) :: input1
+                    double precision, dimension(7) :: input2
+                    double precision, dimension(7) :: res
+
+                    res = MERGE(input1, input2, input1 .lt. input2)
+
+                    END SUBROUTINE merge_test_function
+                    """
+
+    # Now test to verify it executes correctly with no offset normalization
+    sdfg = fortran_parser.create_sdfg_from_string(test_string, "merge_test", True)
+    sdfg.simplify(verbose=True)
+    sdfg.compile()
+    size = 7
+
+    # Minimum is in the beginning
+    first = np.full([size], 13, order="F", dtype=np.float64)
+    second = np.full([size], 42, order="F", dtype=np.float64)
+    res = np.full([size], 40, order="F", dtype=np.float64)
+
+    sdfg(input1=first, input2=second, res=res)
+    for val in res:
+        assert val == 13
+
+    for i in range(int(size/2)):
+        first[i] = 45
+    sdfg(input1=first, input2=second, res=res)
+    for i in range(int(size/2)):
+        assert res[i] == 42
+    for i in range(int(size/2), size):
+        assert res[i] == 13
+
+    first[:] = 13
+    for i in range(size):
+        if i % 2 == 1:
+            first[i] = 45
+    sdfg(input1=first, input2=second, res=res)
+    for i in range(size):
+        if i % 2 == 1:
+            assert res[i] == 42
+        else:
+            assert res[i] == 13
+
     # mask comparison on array participating
     # mask comparison on two arrays participating
+    # mask comparison - second array with shift
+    # mask comparison - both arrays wiht a shift
+    # second array - shift!
+    # merge 2d
 
 if __name__ == "__main__":
 
-    test_fortran_frontend_merge_double()
+    test_fortran_frontend_merge_1d()
+    test_fortran_frontend_merge_comparison_scalar()
+    test_fortran_frontend_merge_comparison_arrays()

From 873c6dc42e06c2f24eb9738486330bff5e24ec5e Mon Sep 17 00:00:00 2001
From: Marcin Copik <mcopik@gmail.com>
Date: Fri, 20 Oct 2023 01:15:51 +0200
Subject: [PATCH 111/163] Finish implementation of MERGE intrinsic

---
 dace/frontend/fortran/intrinsics.py |  20 ++++--
 tests/fortran/intrinsic_merge.py    | 103 ++++++++++++++++++++++++++--
 2 files changed, 111 insertions(+), 12 deletions(-)

diff --git a/dace/frontend/fortran/intrinsics.py b/dace/frontend/fortran/intrinsics.py
index dbcf626868..92cb4619cc 100644
--- a/dace/frontend/fortran/intrinsics.py
+++ b/dace/frontend/fortran/intrinsics.py
@@ -219,11 +219,11 @@ def _parse_binary_op(self, node: ast_internal_classes.Call_Expr_Node, arg: ast_i
             return (dominant_array, None, cond)
 
         if len(first_array.indices) != len(second_array.indices):
-            raise TypeError("Can't parse Fortran ANY with different array ranks!")
+            raise TypeError("Can't parse Fortran binary op with different array ranks!")
 
         for left_idx, right_idx in zip(first_array.indices, second_array.indices):
             if left_idx.type != right_idx.type:
-                raise TypeError("Can't parse Fortran ANY with different array ranks!")
+                raise TypeError("Can't parse Fortran binary op with different array ranks!")
 
         # Now, we need to convert the array to a proper subscript node
         cond = copy.deepcopy(arg)
@@ -879,14 +879,22 @@ def _summarize_args(self, node: ast_internal_classes.FNode, new_func_body: List[
             # The first main argument is an array -> this dictates loop boundaries
             # Other arrays, regardless if they appear as the second array or mask, need to have the same loop boundary.
             par_Decl_Range_Finder(self.first_array, self.loop_ranges, [], [], self.count, new_func_body, self.scope_vars, True)
-            par_Decl_Range_Finder(self.second_array, [], [], [], self.count, new_func_body, self.scope_vars, True)
+
+            loop_ranges = []
+            par_Decl_Range_Finder(self.second_array, loop_ranges, [], [], self.count, new_func_body, self.scope_vars, True)
+            self._adjust_array_ranges(node, self.second_array, self.loop_ranges, loop_ranges)
+
             par_Decl_Range_Finder(self.destination_array, [], [], [], self.count, new_func_body, self.scope_vars, True)
 
             if self.mask_first_array is not None:
-                par_Decl_Range_Finder(self.mask_first_array, [], [], [], self.count, new_func_body, self.scope_vars, True)
-            if self.mask_second_array is not None:
-                par_Decl_Range_Finder(self.mask_second_array, [], [], [], self.count, new_func_body, self.scope_vars, True)
+                loop_ranges = []
+                par_Decl_Range_Finder(self.mask_first_array, loop_ranges, [], [], self.count, new_func_body, self.scope_vars, True)
+                self._adjust_array_ranges(node, self.mask_first_array, self.loop_ranges, loop_ranges)
 
+            if self.mask_second_array is not None:
+                loop_ranges = []
+                par_Decl_Range_Finder(self.mask_second_array, loop_ranges, [], [], self.count, new_func_body, self.scope_vars, True)
+                self._adjust_array_ranges(node, self.mask_second_array, self.loop_ranges, loop_ranges)
 
         def _initialize_result(self, node: ast_internal_classes.FNode) -> Optional[ast_internal_classes.BinOp_Node]:
             """
diff --git a/tests/fortran/intrinsic_merge.py b/tests/fortran/intrinsic_merge.py
index 1104eea72d..1778b9c2fb 100644
--- a/tests/fortran/intrinsic_merge.py
+++ b/tests/fortran/intrinsic_merge.py
@@ -178,15 +178,106 @@ def test_fortran_frontend_merge_comparison_arrays():
         else:
             assert res[i] == 13
 
-    # mask comparison on array participating
-    # mask comparison on two arrays participating
-    # mask comparison - second array with shift
-    # mask comparison - both arrays wiht a shift
-    # second array - shift!
-    # merge 2d
+
+def test_fortran_frontend_merge_comparison_arrays_offset():
+    """
+    Tests that the generated array map correctly handles offsets.
+    """
+    test_string = """
+                    PROGRAM merge_test
+                    implicit none
+                    double precision, dimension(7) :: input1
+                    double precision, dimension(7) :: input2
+                    double precision, dimension(14) :: mask1
+                    double precision, dimension(14) :: mask2
+                    double precision, dimension(7) :: res
+                    CALL merge_test_function(input1, input2, mask1, mask2, res)
+                    end
+
+                    SUBROUTINE merge_test_function(input1, input2, mask1, mask2, res)
+                    double precision, dimension(7) :: input1
+                    double precision, dimension(7) :: input2
+                    double precision, dimension(14) :: mask1
+                    double precision, dimension(14) :: mask2
+                    double precision, dimension(7) :: res
+
+                    res = MERGE(input1, input2, mask1(3:9) .lt. mask2(5:11))
+
+                    END SUBROUTINE merge_test_function
+                    """
+
+    # Now test to verify it executes correctly with no offset normalization
+    sdfg = fortran_parser.create_sdfg_from_string(test_string, "merge_test", True)
+    sdfg.simplify(verbose=True)
+    sdfg.compile()
+    size = 7
+
+    # Minimum is in the beginning
+    first = np.full([size], 13, order="F", dtype=np.float64)
+    second = np.full([size], 42, order="F", dtype=np.float64)
+    mask1 = np.full([size*2], 30, order="F", dtype=np.float64)
+    mask2 = np.full([size*2], 0, order="F", dtype=np.float64)
+    res = np.full([size], 40, order="F", dtype=np.float64)
+
+    mask1[2:9] = 3
+    mask2[4:11] = 4
+    sdfg(input1=first, input2=second, mask1=mask1, mask2=mask2, res=res)
+    for val in res:
+        assert val == 13
+
+
+def test_fortran_frontend_merge_array_shift():
+    """
+    Tests that the generated array map correctly handles offsets.
+    """
+    test_string = """
+                    PROGRAM merge_test
+                    implicit none
+                    double precision, dimension(7) :: input1
+                    double precision, dimension(21) :: input2
+                    double precision, dimension(14) :: mask1
+                    double precision, dimension(14) :: mask2
+                    double precision, dimension(7) :: res
+                    CALL merge_test_function(input1, input2, mask1, mask2, res)
+                    end
+
+                    SUBROUTINE merge_test_function(input1, input2, mask1, mask2, res)
+                    double precision, dimension(7) :: input1
+                    double precision, dimension(21) :: input2
+                    double precision, dimension(14) :: mask1
+                    double precision, dimension(14) :: mask2
+                    double precision, dimension(7) :: res
+
+                    res = MERGE(input1, input2(13:19), mask1(3:9) .gt. mask2(5:11))
+
+                    END SUBROUTINE merge_test_function
+                    """
+
+    # Now test to verify it executes correctly with no offset normalization
+    sdfg = fortran_parser.create_sdfg_from_string(test_string, "merge_test", True)
+    sdfg.simplify(verbose=True)
+    sdfg.compile()
+    size = 7
+
+    # Minimum is in the beginning
+    first = np.full([size], 13, order="F", dtype=np.float64)
+    second = np.full([size*3], 42, order="F", dtype=np.float64)
+    mask1 = np.full([size*2], 30, order="F", dtype=np.float64)
+    mask2 = np.full([size*2], 0, order="F", dtype=np.float64)
+    res = np.full([size], 40, order="F", dtype=np.float64)
+
+    second[12:19] = 100
+    mask1[2:9] = 3
+    mask2[4:11] = 4
+    sdfg(input1=first, input2=second, mask1=mask1, mask2=mask2, res=res)
+    for val in res:
+        assert val == 100
+
 
 if __name__ == "__main__":
 
     test_fortran_frontend_merge_1d()
     test_fortran_frontend_merge_comparison_scalar()
     test_fortran_frontend_merge_comparison_arrays()
+    test_fortran_frontend_merge_comparison_arrays_offset()
+    test_fortran_frontend_merge_array_shift()

From 34cc173f23bedd110cff55944c73e1992a908c59 Mon Sep 17 00:00:00 2001
From: Marcin Copik <mcopik@gmail.com>
Date: Fri, 20 Oct 2023 01:38:36 +0200
Subject: [PATCH 112/163] Fix the order of intrinsic parsing

---
 dace/frontend/fortran/intrinsics.py | 33 +++++++++++++++--------------
 1 file changed, 17 insertions(+), 16 deletions(-)

diff --git a/dace/frontend/fortran/intrinsics.py b/dace/frontend/fortran/intrinsics.py
index 92cb4619cc..c2e5afe79b 100644
--- a/dace/frontend/fortran/intrinsics.py
+++ b/dace/frontend/fortran/intrinsics.py
@@ -123,7 +123,7 @@ def _parse_call_expr_node(self, node: ast_internal_classes.Call_Expr_Node):
         pass
 
     @abstractmethod
-    def _summarize_args(self, node: ast_internal_classes.FNode, new_func_body: List[ast_internal_classes.FNode]):
+    def _summarize_args(self, exec_node: ast_internal_classes.Execution_Part_Node, node: ast_internal_classes.FNode, new_func_body: List[ast_internal_classes.FNode]):
         pass
 
     @abstractmethod
@@ -149,7 +149,7 @@ def _skip_result_assignment(self):
     """
 
     @abstractmethod
-    def _update_result_type(self, node: ast_internal_classes.Execution_Part_Node, var: ast_internal_classes.Name_Node):
+    def _update_result_type(self, var: ast_internal_classes.Name_Node):
         pass
 
     def _parse_array(self, node: ast_internal_classes.Execution_Part_Node, arg: ast_internal_classes.FNode) -> ast_internal_classes.Array_Subscript_Node:
@@ -280,16 +280,16 @@ def visit_Execution_Part_Node(self, node: ast_internal_classes.Execution_Part_No
             # calls to the same intrinsic.
             self._initialize()
 
-            # Change the type of result variable
-            self._update_result_type(node, child.lval)
-
             # Visit all intrinsic arguments and extract arrays
             for i in mywalk(child.rval):
                 if isinstance(i, ast_internal_classes.Call_Expr_Node) and i.name.name == self.func_name():
                     self._parse_call_expr_node(i)
 
             # Verify that all of intrinsic args are correct and prepare them for loop generation
-            self._summarize_args(child, newbody)
+            self._summarize_args(node, child, newbody)
+
+            # Change the type of result variable
+            self._update_result_type(child.lval)
 
             # Initialize the result variable
             init_stm = self._initialize_result(child)
@@ -345,7 +345,7 @@ def _initialize(self):
         self.rvals = []
         self.argument_variable = None
 
-    def _update_result_type(self, node: ast_internal_classes.Execution_Part_Node, var: ast_internal_classes.Name_Node):
+    def _update_result_type(self, var: ast_internal_classes.Name_Node):
 
         """
             For both SUM and PRODUCT, the result type depends on the input variable.
@@ -368,7 +368,7 @@ def _parse_call_expr_node(self, node: ast_internal_classes.Call_Expr_Node):
                 raise NotImplementedError("We do not support non-array arguments for SUM/PRODUCT")
 
 
-    def _summarize_args(self, node: ast_internal_classes.FNode, new_func_body: List[ast_internal_classes.FNode]):
+    def _summarize_args(self, exec_node: ast_internal_classes.Execution_Part_Node, node: ast_internal_classes.FNode, new_func_body: List[ast_internal_classes.FNode]):
 
         if len(self.rvals) != 1:
             raise NotImplementedError("Only one array can be summed")
@@ -466,7 +466,7 @@ def _initialize(self):
         self.dominant_array = None
         self.cond = None
 
-    def _update_result_type(self, node: ast_internal_classes.Execution_Part_Node, var: ast_internal_classes.Name_Node):
+    def _update_result_type(self, var: ast_internal_classes.Name_Node):
 
         """
             For all functions, the result type is INTEGER.
@@ -495,7 +495,7 @@ def _parse_call_expr_node(self, node: ast_internal_classes.Call_Expr_Node):
         else:
             self.first_array, self.second_array, self.cond = self._parse_binary_op(node, arg)
 
-    def _summarize_args(self, node: ast_internal_classes.FNode, new_func_body: List[ast_internal_classes.FNode]):
+    def _summarize_args(self, exec_node: ast_internal_classes.Execution_Part_Node, node: ast_internal_classes.FNode, new_func_body: List[ast_internal_classes.FNode]):
 
         rangeslen_left = []
         par_Decl_Range_Finder(self.first_array, self.loop_ranges, [], rangeslen_left, self.count, new_func_body, self.scope_vars, True)
@@ -682,7 +682,7 @@ def _initialize(self):
         self.rvals = []
         self.argument_variable = None
 
-    def _update_result_type(self, node: ast_internal_classes.Execution_Part_Node, var: ast_internal_classes.Name_Node):
+    def _update_result_type(self, var: ast_internal_classes.Name_Node):
 
         """
             For both MINVAL and MAXVAL, the result type depends on the input variable.
@@ -705,7 +705,7 @@ def _parse_call_expr_node(self, node: ast_internal_classes.Call_Expr_Node):
             else:
                 raise NotImplementedError("We do not support non-array arguments for MINVAL/MAXVAL")
 
-    def _summarize_args(self, node: ast_internal_classes.FNode, new_func_body: List[ast_internal_classes.FNode]):
+    def _summarize_args(self, exec_node: ast_internal_classes.Execution_Part_Node, node: ast_internal_classes.FNode, new_func_body: List[ast_internal_classes.FNode]):
 
         if len(self.rvals) != 1:
             raise NotImplementedError("Only one array can be summed")
@@ -834,15 +834,14 @@ def _initialize(self):
         def func_name() -> str:
             return "__dace_merge"
 
-        def _update_result_type(self, node: ast_internal_classes.Execution_Part_Node, var: ast_internal_classes.Name_Node):
+        def _update_result_type(self, var: ast_internal_classes.Name_Node):
             """
                 We can ignore the result type, because we exempted this
                 transformation from generating a result.
                 In MERGE, we write directly to the destination array.
                 Thus, we store this result array for future use.
             """
-
-            self.destination_array = self._parse_array(node, var)
+            pass
 
         def _parse_call_expr_node(self, node: ast_internal_classes.Call_Expr_Node):
 
@@ -874,7 +873,9 @@ def _parse_call_expr_node(self, node: ast_internal_classes.Call_Expr_Node):
 
                 self.mask_first_array, self.mask_second_array, self.mask_cond = self._parse_binary_op(node, arg)
 
-        def _summarize_args(self, node: ast_internal_classes.FNode, new_func_body: List[ast_internal_classes.FNode]):
+        def _summarize_args(self, exec_node: ast_internal_classes.Execution_Part_Node, node: ast_internal_classes.FNode, new_func_body: List[ast_internal_classes.FNode]):
+
+            self.destination_array = self._parse_array(exec_node, node.lval)
 
             # The first main argument is an array -> this dictates loop boundaries
             # Other arrays, regardless if they appear as the second array or mask, need to have the same loop boundary.

From bdecb25064b103bdc481cb895e106f2bb7ae12f7 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 20 Oct 2023 13:27:54 +0000
Subject: [PATCH 113/163] Bump urllib3 from 2.0.6 to 2.0.7 (#1400)

Bumps [urllib3](https://github.com/urllib3/urllib3) from 2.0.6 to 2.0.7.
- [Release notes](https://github.com/urllib3/urllib3/releases)
- [Changelog](https://github.com/urllib3/urllib3/blob/main/CHANGES.rst)
- [Commits](https://github.com/urllib3/urllib3/compare/2.0.6...2.0.7)

---
updated-dependencies:
- dependency-name: urllib3
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: alexnick83 <31545860+alexnick83@users.noreply.github.com>
---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 996449dbef..5f804e1b4c 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -20,7 +20,7 @@ PyYAML==6.0
 requests==2.31.0
 six==1.16.0
 sympy==1.9
-urllib3==2.0.6
+urllib3==2.0.7
 websockets==11.0.3
 Werkzeug==2.3.5
 zipp==3.15.0

From af62440be7bd3a2756279288042b4c67de0b3411 Mon Sep 17 00:00:00 2001
From: Marcin Copik <mcopik@gmail.com>
Date: Fri, 20 Oct 2023 23:11:44 +0200
Subject: [PATCH 114/163] Bugfixes and extended testing for Fortran SUM (#1390)

* Fix incorrect generation of sum to loop code for Fortran frontend

* Support passing array with no bounds in Fortran sum()

* Add test case for Foftran sum

* Fix bug in offset normalization and support Fortran SUM for arrays with offsets

* Expand tests for array2loop in Fortran

* Add more tests covering 2D sum in Fortran

* Support Fortran sum for arrays without explicit dimension access declaration

* Add more tests for Fortran sum over 2D arrays

---------

Co-authored-by: acalotoiu <61420859+acalotoiu@users.noreply.github.com>
---
 dace/frontend/fortran/ast_transforms.py |  40 +++++-
 tests/fortran/array_to_loop_offset.py   | 104 ++++++++++++++
 tests/fortran/sum_to_loop_offset.py     | 176 ++++++++++++++++++++++++
 3 files changed, 313 insertions(+), 7 deletions(-)
 create mode 100644 tests/fortran/sum_to_loop_offset.py

diff --git a/dace/frontend/fortran/ast_transforms.py b/dace/frontend/fortran/ast_transforms.py
index e2a7246aed..32744c5120 100644
--- a/dace/frontend/fortran/ast_transforms.py
+++ b/dace/frontend/fortran/ast_transforms.py
@@ -268,7 +268,7 @@ def visit_Execution_Part_Node(self, node: ast_internal_classes.Execution_Part_No
                                 ast_internal_classes.Var_Decl_Node(
                                     name="tmp_call_" + str(temp),
                                     type=res[i].type,
-                                    sizes=None,
+                                    sizes=None
                                 )
                             ]))
                         newbody.append(
@@ -284,7 +284,7 @@ def visit_Execution_Part_Node(self, node: ast_internal_classes.Execution_Part_No
                                 ast_internal_classes.Var_Decl_Node(
                                     name="tmp_call_" + str(temp),
                                     type=res[i].type,
-                                    sizes=None,
+                                    sizes=None
                                 )
                             ]))
                     newbody.append(
@@ -458,7 +458,11 @@ def visit_Execution_Part_Node(self, node: ast_internal_classes.Execution_Part_No
                             if self.normalize_offsets:
 
                                 # Find the offset of a variable to which we are assigning
-                                var_name = child.lval.name.name
+                                var_name = ""
+                                if isinstance(j, ast_internal_classes.Name_Node):
+                                    var_name = j.name
+                                else:
+                                    var_name = j.name.name
                                 variable = self.scope_vars.get_var(child.parent, var_name)
                                 offset = variable.offsets[idx]
 
@@ -737,8 +741,7 @@ def par_Decl_Range_Finder(node: ast_internal_classes.Array_Subscript_Node,
                           count: int,
                           newbody: list,
                           scope_vars: ScopeVarsDeclarations,
-                          declaration=True,
-                          is_sum_to_loop=False):
+                          declaration=True):
     """
     Helper function for the transformation of array operations and sums to loops
     :param node: The AST to be transformed
@@ -753,6 +756,7 @@ def par_Decl_Range_Finder(node: ast_internal_classes.Array_Subscript_Node,
 
     currentindex = 0
     indices = []
+
     offsets = scope_vars.get_var(node.parent, node.name.name).offsets
 
     for idx, i in enumerate(node.indices):
@@ -926,14 +930,36 @@ def visit_Execution_Part_Node(self, node: ast_internal_classes.Execution_Part_No
 
                 current = child.lval
                 val = child.rval
-                rvals = [i for i in mywalk(val) if isinstance(i, ast_internal_classes.Array_Subscript_Node)]
+
+                rvals = []
+                for i in mywalk(val):
+                    if isinstance(i, ast_internal_classes.Call_Expr_Node) and i.name.name == '__dace_sum':
+
+                        for arg in i.args:
+
+                            # supports syntax SUM(arr)
+                            if isinstance(arg, ast_internal_classes.Name_Node):
+                                array_node = ast_internal_classes.Array_Subscript_Node(parent=arg.parent)
+                                array_node.name = arg
+
+                                # If we access SUM(arr) where arr has many dimensions,
+                                # We need to create a ParDecl_Node for each dimension
+                                dims = len(self.scope_vars.get_var(node.parent, arg.name).sizes)
+                                array_node.indices = [ast_internal_classes.ParDecl_Node(type='ALL')] * dims
+
+                                rvals.append(array_node)
+
+                            # supports syntax SUM(arr(:))
+                            if isinstance(arg, ast_internal_classes.Array_Subscript_Node):
+                                rvals.append(arg)
+
                 if len(rvals) != 1:
                     raise NotImplementedError("Only one array can be summed")
                 val = rvals[0]
                 rangeposrval = []
                 rangesrval = []
 
-                par_Decl_Range_Finder(val, rangesrval, rangeposrval, self.count, newbody, self.scope_vars, False, True)
+                par_Decl_Range_Finder(val, rangesrval, rangeposrval, self.count, newbody, self.scope_vars, True)
 
                 range_index = 0
                 body = ast_internal_classes.BinOp_Node(lval=current,
diff --git a/tests/fortran/array_to_loop_offset.py b/tests/fortran/array_to_loop_offset.py
index 43d01d9b6b..5042859f8c 100644
--- a/tests/fortran/array_to_loop_offset.py
+++ b/tests/fortran/array_to_loop_offset.py
@@ -112,8 +112,112 @@ def test_fortran_frontend_arr2loop_2d_offset():
         for j in range(7,10):
             assert a[i-1, j-1] == i * 2
 
+def test_fortran_frontend_arr2loop_2d_offset2():
+    """
+    Tests that the generated array map correctly handles offsets.
+    """
+    test_string = """
+                    PROGRAM index_offset_test
+                    implicit none
+                    double precision, dimension(5,7:9) :: d
+                    CALL index_test_function(d)
+                    end
+
+                    SUBROUTINE index_test_function(d)
+                    double precision, dimension(5,7:9) :: d
+
+                    d(:,:) = 43
+
+                    END SUBROUTINE index_test_function
+                    """
+
+    # Now test to verify it executes correctly with no offset normalization
+
+    sdfg = fortran_parser.create_sdfg_from_string(test_string, "index_offset_test", False)
+    sdfg.simplify(verbose=True)
+    sdfg.compile()
+
+    assert len(sdfg.data('d').shape) == 2
+    assert sdfg.data('d').shape[0] == 5
+    assert sdfg.data('d').shape[1] == 3
+
+    a = np.full([5,9], 42, order="F", dtype=np.float64)
+    sdfg(d=a)
+    for i in range(1,6):
+        for j in range(7,10):
+            assert a[i-1, j-1] == 43
+
+    sdfg = fortran_parser.create_sdfg_from_string(test_string, "index_offset_test", True)
+    sdfg.simplify(verbose=True)
+    sdfg.compile()
+
+    a = np.full([5,3], 42, order="F", dtype=np.float64)
+    sdfg(d=a)
+    for i in range(0,5):
+        for j in range(0,3):
+            assert a[i, j] == 43
+
+def test_fortran_frontend_arr2loop_2d_offset3():
+    """
+    Tests that the generated array map correctly handles offsets.
+    """
+    test_string = """
+                    PROGRAM index_offset_test
+                    implicit none
+                    double precision, dimension(5,7:9) :: d
+                    CALL index_test_function(d)
+                    end
+
+                    SUBROUTINE index_test_function(d)
+                    double precision, dimension(5,7:9) :: d
+
+                    d(2:4, 7:8) = 43
+
+                    END SUBROUTINE index_test_function
+                    """
+
+    # Now test to verify it executes correctly with no offset normalization
+
+    sdfg = fortran_parser.create_sdfg_from_string(test_string, "index_offset_test", False)
+    sdfg.simplify(verbose=True)
+    sdfg.compile()
+
+    assert len(sdfg.data('d').shape) == 2
+    assert sdfg.data('d').shape[0] == 5
+    assert sdfg.data('d').shape[1] == 3
+
+    a = np.full([5,9], 42, order="F", dtype=np.float64)
+    sdfg(d=a)
+    for i in range(2,4):
+        for j in range(7,9):
+            assert a[i-1, j-1] == 43
+        for j in range(9,10):
+            assert a[i-1, j-1] == 42
+
+    for i in [1, 5]:
+        for j in range(7,10):
+            assert a[i-1, j-1] == 42
+
+    sdfg = fortran_parser.create_sdfg_from_string(test_string, "index_offset_test", True)
+    sdfg.simplify(verbose=True)
+    sdfg.compile()
+
+    a = np.full([5,3], 42, order="F", dtype=np.float64)
+    sdfg(d=a)
+    for i in range(1,4):
+        for j in range(0,2):
+            assert a[i, j] == 43
+        for j in range(2,3):
+            assert a[i, j] == 42
+
+    for i in [0, 4]:
+        for j in range(0,3):
+            assert a[i, j] == 42
+
 if __name__ == "__main__":
 
     test_fortran_frontend_arr2loop_1d_offset()
     test_fortran_frontend_arr2loop_2d_offset()
+    test_fortran_frontend_arr2loop_2d_offset2()
+    test_fortran_frontend_arr2loop_2d_offset3()
     test_fortran_frontend_arr2loop_without_offset()
diff --git a/tests/fortran/sum_to_loop_offset.py b/tests/fortran/sum_to_loop_offset.py
new file mode 100644
index 0000000000..e933589e0f
--- /dev/null
+++ b/tests/fortran/sum_to_loop_offset.py
@@ -0,0 +1,176 @@
+# Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved.
+
+import numpy as np
+
+from dace.frontend.fortran import ast_transforms, fortran_parser
+
+def test_fortran_frontend_sum2loop_1d_without_offset():
+    """
+    Tests that the generated array map correctly handles offsets.
+    """
+    test_string = """
+                    PROGRAM index_offset_test
+                    implicit none
+                    double precision, dimension(7) :: d
+                    double precision, dimension(3) :: res
+                    CALL index_test_function(d, res)
+                    end
+
+                    SUBROUTINE index_test_function(d, res)
+                    double precision, dimension(7) :: d
+                    double precision, dimension(3) :: res
+
+                    res(1) = SUM(d(:))
+                    res(2) = SUM(d)
+                    res(3) = SUM(d(2:6))
+
+                    END SUBROUTINE index_test_function
+                    """
+
+    # Now test to verify it executes correctly with no offset normalization
+
+    sdfg = fortran_parser.create_sdfg_from_string(test_string, "index_offset_test", False)
+    sdfg.simplify(verbose=True)
+    sdfg.compile()
+
+    size = 7
+    d = np.full([size], 0, order="F", dtype=np.float64)
+    for i in range(size):
+        d[i] = i + 1
+    res = np.full([3], 42, order="F", dtype=np.float64)
+    sdfg(d=d, res=res)
+    assert res[0] == (1 + size) * size / 2
+    assert res[1] == (1 + size) * size / 2
+    assert res[2] == (2 + size - 1) * (size - 2)/ 2
+
+def test_fortran_frontend_sum2loop_1d_offset():
+    """
+    Tests that the generated array map correctly handles offsets.
+    """
+    test_string = """
+                    PROGRAM index_offset_test
+                    implicit none
+                    double precision, dimension(2:6) :: d
+                    double precision, dimension(3) :: res
+                    CALL index_test_function(d,res)
+                    end
+
+                    SUBROUTINE index_test_function(d, res)
+                    double precision, dimension(2:6) :: d
+                    double precision, dimension(3) :: res
+
+                    res(1) = SUM(d)
+                    res(2) = SUM(d(:))
+                    res(3) = SUM(d(3:5))
+
+                    END SUBROUTINE index_test_function
+                    """
+
+    # Now test to verify it executes correctly with no offset normalization
+
+    sdfg = fortran_parser.create_sdfg_from_string(test_string, "index_offset_test", True)
+    sdfg.simplify(verbose=True)
+    sdfg.compile()
+
+    size = 5
+    d = np.full([size], 0, order="F", dtype=np.float64)
+    for i in range(size):
+        d[i] = i + 1
+    res = np.full([3], 42, order="F", dtype=np.float64)
+    sdfg(d=d, res=res)
+    assert res[0] == (1 + size) * size / 2
+    assert res[1] == (1 + size) * size / 2
+    assert res[2] == (2 + size - 1) * (size - 2) / 2
+
+def test_fortran_frontend_arr2loop_2d():
+    """
+    Tests that the generated array map correctly handles offsets.
+    """
+    test_string = """
+                    PROGRAM index_offset_test
+                    implicit none
+                    double precision, dimension(5,3) :: d
+                    double precision, dimension(4) :: res
+                    CALL index_test_function(d,res)
+                    end
+
+                    SUBROUTINE index_test_function(d, res)
+                    double precision, dimension(5,3) :: d
+                    double precision, dimension(4) :: res
+
+                    res(1) = SUM(d)
+                    res(2) = SUM(d(:,:))
+                    res(3) = SUM(d(2:4, 2))
+                    res(4) = SUM(d(2:4, 2:3))
+
+                    END SUBROUTINE index_test_function
+                    """
+
+    # Now test to verify it executes correctly with no offset normalization
+
+    sdfg = fortran_parser.create_sdfg_from_string(test_string, "index_offset_test", True)
+    sdfg.simplify(verbose=True)
+    sdfg.compile()
+
+    sizes = [5, 3]
+    d = np.full(sizes, 42, order="F", dtype=np.float64)
+    cnt = 0
+    for i in range(sizes[0]):
+        for j in range(sizes[1]):
+            d[i, j] = cnt
+            cnt += 1
+    res = np.full([4], 42, order="F", dtype=np.float64)
+    sdfg(d=d, res=res)
+    assert res[0] == 105
+    assert res[1] == 105
+    assert res[2] == 21
+    assert res[3] == 45
+
+def test_fortran_frontend_arr2loop_2d_offset():
+    """
+    Tests that the generated array map correctly handles offsets.
+    """
+    test_string = """
+                    PROGRAM index_offset_test
+                    implicit none
+                    double precision, dimension(2:6,7:10) :: d
+                    double precision, dimension(3) :: res
+                    CALL index_test_function(d,res)
+                    end
+
+                    SUBROUTINE index_test_function(d, res)
+                    double precision, dimension(2:6,7:10) :: d
+                    double precision, dimension(3) :: res
+
+                    res(1) = SUM(d)
+                    res(2) = SUM(d(:,:))
+                    res(3) = SUM(d(3:5, 8:9))
+
+                    END SUBROUTINE index_test_function
+                    """
+
+    # Now test to verify it executes correctly with no offset normalization
+
+    sdfg = fortran_parser.create_sdfg_from_string(test_string, "index_offset_test", True)
+    sdfg.simplify(verbose=True)
+    sdfg.compile()
+
+    sizes = [5, 4]
+    d = np.full(sizes, 42, order="F", dtype=np.float64)
+    cnt = 0
+    for i in range(sizes[0]):
+        for j in range(sizes[1]):
+            d[i, j] = cnt
+            cnt += 1
+    res = np.full([3], 42, order="F", dtype=np.float64)
+    sdfg(d=d, res=res)
+    assert res[0] == 190
+    assert res[1] == 190
+    assert res[2] == 57
+
+if __name__ == "__main__":
+
+    test_fortran_frontend_sum2loop_1d_without_offset()
+    test_fortran_frontend_sum2loop_1d_offset()
+    test_fortran_frontend_arr2loop_2d()
+    test_fortran_frontend_arr2loop_2d_offset()

From 66913220ea600492db59cf8e536271b36c1554bd Mon Sep 17 00:00:00 2001
From: alexnick83 <31545860+alexnick83@users.noreply.github.com>
Date: Sat, 21 Oct 2023 11:22:06 +0200
Subject: [PATCH 115/163] Option for utilizing GPU global memory (#1405)

* Added option to change storage of non-transient data to GPU global memory.

* Fixed typos.
---
 dace/transformation/auto/auto_optimize.py | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/dace/transformation/auto/auto_optimize.py b/dace/transformation/auto/auto_optimize.py
index 54dbc8d4ac..644df59e5c 100644
--- a/dace/transformation/auto/auto_optimize.py
+++ b/dace/transformation/auto/auto_optimize.py
@@ -515,11 +515,29 @@ def make_transients_persistent(sdfg: SDFG,
     return result
 
 
+def apply_gpu_storage(sdfg: SDFG) -> None:
+    """ Changes the storage of the SDFG's input and output data to GPU global memory. """
+
+    written_scalars = set()
+    for state in sdfg.nodes():
+        for node in state.data_nodes():
+            desc = node.desc(sdfg)
+            if isinstance(desc, dt.Scalar) and not desc.transient and state.in_degree(node) > 0:
+                written_scalars.add(node.data)
+
+    for name, desc in sdfg.arrays.items():
+        if not desc.transient and desc.storage == dtypes.StorageType.Default:
+            if isinstance(desc, dt.Scalar) and not name in written_scalars:
+                continue
+            desc.storage = dtypes.StorageType.GPU_Global
+
+
 def auto_optimize(sdfg: SDFG,
                   device: dtypes.DeviceType,
                   validate: bool = True,
                   validate_all: bool = False,
-                  symbols: Dict[str, int] = None) -> SDFG:
+                  symbols: Dict[str, int] = None,
+                  use_gpu_storage: bool = False) -> SDFG:
     """
     Runs a basic sequence of transformations to optimize a given SDFG to decent
     performance. In particular, performs the following:
@@ -539,6 +557,7 @@ def auto_optimize(sdfg: SDFG,
                      have been applied.
     :param validate_all: If True, validates the SDFG after every step.
     :param symbols: Optional dict that maps symbols (str/symbolic) to int/float
+    :param use_gpu_storage: If True, changes the storage of non-transient data to GPU global memory.
     :return: The optimized SDFG.
     :note: Operates in-place on the given SDFG.
     :note: This function is still experimental and may harm correctness in
@@ -565,6 +584,8 @@ def auto_optimize(sdfg: SDFG,
     # Apply GPU transformations and set library node implementations
 
     if device == dtypes.DeviceType.GPU:
+        if use_gpu_storage:
+            apply_gpu_storage(sdfg)
         sdfg.apply_gpu_transformations()
         sdfg.simplify()
 

From 0f731d6c60fdbc26fa3963c6a4c7c58a24afeb9a Mon Sep 17 00:00:00 2001
From: Jan Kleine <jkleine@ethz.ch>
Date: Thu, 26 Oct 2023 18:25:58 +0200
Subject: [PATCH 116/163] Add tensor storage format abstraction (#1392)

* Add tensor storage format abstraction

Format abstraction is based on [https://doi.org/10.1145/3276493].

* Fix type signature from OrderedDict to Dict

* Fix typos sefl and Singelton

* Remove OrderedDict in favor of Dict

* Replace |= with .update() for backwards compatibility

* Fix serialization issues
---
 dace/data.py                   | 697 +++++++++++++++++++++++++++++++++
 tests/sdfg/data/tensor_test.py | 131 +++++++
 2 files changed, 828 insertions(+)
 create mode 100644 tests/sdfg/data/tensor_test.py

diff --git a/dace/data.py b/dace/data.py
index 0a9858458b..199e7dabd4 100644
--- a/dace/data.py
+++ b/dace/data.py
@@ -1,8 +1,10 @@
 # Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved.
+import aenum
 import copy as cp
 import ctypes
 import functools
 
+from abc import ABC, abstractmethod
 from collections import OrderedDict
 from numbers import Number
 from typing import Any, Dict, List, Optional, Sequence, Set, Tuple, Union
@@ -482,6 +484,701 @@ def __getitem__(self, s):
         if isinstance(s, list) or isinstance(s, tuple):
             return StructArray(self, tuple(s))
         return StructArray(self, (s, ))
+    
+
+class TensorIterationTypes(aenum.AutoNumberEnum):
+    """
+    Types of tensor iteration capabilities.
+
+    Value (Coordinate Value Iteration) allows to directly iterate over
+    coordinates such as when using the Dense index type.
+
+    Position (Coordinate Position Iteratation) iterates over coordinate
+    positions, at which the actual coordinates lie. This is for example the case
+    with a compressed index, in which the pos array enables one to iterate over
+    the positions in the crd array that hold the actual coordinates.
+    """
+    Value = ()
+    Position = ()
+
+
+class TensorAssemblyType(aenum.AutoNumberEnum):
+    """
+    Types of possible assembly strategies for the individual indices.
+
+    NoAssembly: Assembly is not possible as such.
+
+    Insert: index allows inserting elements at random (e.g. Dense)
+
+    Append: index allows appending to a list of existing coordinates. Depending
+    on append order, this affects whether the index is ordered or not. This
+    could be changed by sorting the index after assembly
+    """
+    NoAssembly = ()
+    Insert = ()
+    Append = ()
+
+
+class TensorIndex(ABC):
+    """
+    Abstract base class for tensor index implementations.
+    """
+
+    @property
+    @abstractmethod
+    def iteration_type(self) -> TensorIterationTypes:
+        """
+        Iteration capability supported by this index.
+        
+        See TensorIterationTypes for reference.
+        """
+        pass
+
+    @property
+    @abstractmethod
+    def locate(self) -> bool:
+        """
+        True if the index supports locate (aka random access), False otw.
+        """
+        pass
+
+    @property
+    @abstractmethod
+    def assembly(self) -> TensorAssemblyType:
+        """
+        What assembly type is supported by the index.
+        
+        See TensorAssemblyType for reference.
+        """
+        pass
+
+    @property
+    @abstractmethod
+    def full(self) -> bool:
+        """
+        True if the level is full, False otw.
+         
+        A level is considered full if it encompasses all valid coordinates along
+        the corresponding tensor dimension.
+        """
+        pass
+
+    @property
+    @abstractmethod
+    def ordered(self) -> bool:
+        """
+        True if the level is ordered, False otw.
+        
+        A level is ordered when all coordinates that share the same ancestor are
+        ordered by increasing value (e.g. in typical CSR).
+        """
+        pass
+
+    @property
+    @abstractmethod
+    def unique(self) -> bool:
+        """
+        True if coordinate in the level are unique, False otw.
+        
+        A level is considered unique if no collection of coordinates that share
+        the same ancestor contains duplicates. In CSR this is True, in COO it is
+        not.
+        """
+        pass
+
+    @property
+    @abstractmethod
+    def branchless(self) -> bool:
+        """
+        True if the level doesn't branch, false otw.
+        
+        A level is considered branchless if no coordinate has a sibling (another
+        coordinate with same ancestor) and all coordinates in parent level have
+        a child. In other words if there is a bijection between the coordinates
+        in this level and the parent level. An example of the is the Singleton
+        index level in the COO format.
+        """
+        pass
+
+    @property
+    @abstractmethod
+    def compact(self) -> bool:
+        """
+        True if the level is compact, false otw.
+        
+        A level is compact if no two coordinates are separated by an unlabled
+        node that does not encode a coordinate. An example of a compact level
+        can be found in CSR, while the DIA formats range and offset levels are
+        not compact (they have entries that would coorespond to entries outside
+        the tensors index range, e.g. column -1).
+        """
+        pass
+
+    @abstractmethod
+    def fields(self, lvl: int, dummy_symbol: symbolic.SymExpr) -> Dict[str, Data]:
+        """
+        Generates the fields needed for the index.
+        
+        :returns: a Dict of fields that need to be present in the struct
+        """
+        pass
+
+
+    def to_json(self):
+        attrs = serialize.all_properties_to_json(self)
+
+        retdict = {"type": type(self).__name__, "attributes": attrs}
+
+        return retdict
+    
+
+    @classmethod
+    def from_json(cls, json_obj, context=None):
+        
+        # Selecting proper subclass
+        if json_obj['type'] == "TensorIndexDense":
+            self = TensorIndexDense.__new__(TensorIndexDense)
+        elif json_obj['type'] == "TensorIndexCompressed":
+            self = TensorIndexCompressed.__new__(TensorIndexCompressed)
+        elif json_obj['type'] == "TensorIndexSingleton":
+            self = TensorIndexSingleton.__new__(TensorIndexSingleton)
+        elif json_obj['type'] == "TensorIndexRange":
+            self = TensorIndexRange.__new__(TensorIndexRange)
+        elif json_obj['type'] == "TensorIndexOffset":
+            self = TensorIndexOffset.__new__(TensorIndexOffset)
+        else:
+            raise TypeError(f"Invalid data type, got: {json_obj['type']}")
+        
+        serialize.set_properties_from_json(self, json_obj['attributes'], context=context)
+
+        return self
+
+
+@make_properties
+class TensorIndexDense(TensorIndex):
+    """
+    Dense tensor index.
+    
+    Levels of this type encode the the coordinate in the interval [0, N), where
+    N is the size of the corresponding dimension. This level doesn't need any
+    index structure beyond the corresponding dimension size.
+    """
+
+    _ordered = Property(dtype=bool, default=False)
+    _unique = Property(dtype=bool)
+
+    @property
+    def iteration_type(self) -> TensorIterationTypes:
+        return TensorIterationTypes.Value
+
+    @property
+    def locate(self) -> bool:
+        return True
+
+    @property
+    def assembly(self) -> TensorAssemblyType:
+        return TensorAssemblyType.Insert
+
+    @property
+    def full(self) -> bool:
+        return True
+
+    @property
+    def ordered(self) -> bool:
+        return self._ordered
+
+    @property
+    def unique(self) -> bool:
+        return self._unique
+
+    @property
+    def branchless(self) -> bool:
+        return False
+
+    @property
+    def compact(self) -> bool:
+        return True
+
+    def __init__(self, ordered: bool = True, unique: bool = True):
+        self._ordered = ordered
+        self._unique = unique
+
+    def fields(self, lvl: int, dummy_symbol: symbolic.SymExpr) -> Dict[str, Data]:
+        return {}
+
+    def __repr__(self) -> str:
+        s = "Dense"
+
+        non_defaults = []
+        if not self._ordered:
+            non_defaults.append("¬O")
+        if not self._unique:
+            non_defaults.append("¬U")
+        
+        if len(non_defaults) > 0:
+            s += f"({','.join(non_defaults)})"
+        
+        return s
+
+
+@make_properties
+class TensorIndexCompressed(TensorIndex):
+    """
+    Tensor level that stores coordinates in segmented array.
+    
+    Levels of this type are compressed using a segented array. The pos array
+    holds the start and end positions of the segment in the crd (coordinate) 
+    array that holds the child coordinates corresponding the parent.
+    """
+
+    _full = Property(dtype=bool, default=False)
+    _ordered = Property(dtype=bool, default=False)
+    _unique = Property(dtype=bool, default=False)
+
+    @property
+    def iteration_type(self) -> TensorIterationTypes:
+        return TensorIterationTypes.Position
+
+    @property
+    def locate(self) -> bool:
+        return False
+
+    @property
+    def assembly(self) -> TensorAssemblyType:
+        return TensorAssemblyType.Append
+
+    @property
+    def full(self) -> bool:
+        return self._full
+
+    @property
+    def ordered(self) -> bool:
+        return self._ordered
+
+    @property
+    def unique(self) -> bool:
+        return self._unique
+
+    @property
+    def branchless(self) -> bool:
+        return False
+
+    @property
+    def compact(self) -> bool:
+        return True
+
+    def __init__(self,
+                 full: bool = False,
+                 ordered: bool = True,
+                 unique: bool = True):
+        self._full = full
+        self._ordered = ordered
+        self._unique = unique
+
+    def fields(self, lvl: int, dummy_symbol: symbolic.SymExpr) -> Dict[str, Data]:
+        return {
+            f"idx{lvl}_pos": dtypes.int32[dummy_symbol],  # TODO (later) choose better length
+            f"idx{lvl}_crd": dtypes.int32[dummy_symbol],  # TODO (later) choose better length
+        }
+
+    def __repr__(self) -> str:
+        s = "Compressed"
+
+        non_defaults = []
+        if self._full:
+            non_defaults.append("F")
+        if not self._ordered:
+            non_defaults.append("¬O")
+        if not self._unique:
+            non_defaults.append("¬U")
+        
+        if len(non_defaults) > 0:
+            s += f"({','.join(non_defaults)})"
+        
+        return s
+    
+
+@make_properties
+class TensorIndexSingleton(TensorIndex):
+    """
+    Tensor index that encodes a single coordinate per parent coordinate.
+    
+    Levels of this type hold exactly one coordinate for every coordinate in the
+    parent level. An example can be seen in the COO format, where every
+    coordinate but the first is encoded in this manner.
+    """
+
+    _full = Property(dtype=bool, default=False)
+    _ordered = Property(dtype=bool, default=False)
+    _unique = Property(dtype=bool, default=False)
+
+    @property
+    def iteration_type(self) -> TensorIterationTypes:
+        return TensorIterationTypes.Position
+
+    @property
+    def locate(self) -> bool:
+        return False
+
+    @property
+    def assembly(self) -> TensorAssemblyType:
+        return TensorAssemblyType.Append
+
+    @property
+    def full(self) -> bool:
+        return self._full
+
+    @property
+    def ordered(self) -> bool:
+        return self._ordered
+
+    @property
+    def unique(self) -> bool:
+        return self._unique
+
+    @property
+    def branchless(self) -> bool:
+        return True
+
+    @property
+    def compact(self) -> bool:
+        return True
+
+    def __init__(self, 
+                 full: bool = False,
+                 ordered: bool = True,
+                 unique: bool = True):
+        self._full = full
+        self._ordered = ordered
+        self._unique = unique
+
+    def fields(self, lvl: int, dummy_symbol: symbolic.SymExpr) -> Dict[str, Data]:
+        return {
+            f"idx{lvl}_crd": dtypes.int32[dummy_symbol],  # TODO (later) choose better length
+        }
+    
+    def __repr__(self) -> str:
+        s = "Singleton"
+
+        non_defaults = []
+        if self._full:
+            non_defaults.append("F")
+        if not self._ordered:
+            non_defaults.append("¬O")
+        if not self._unique:
+            non_defaults.append("¬U")
+        
+        if len(non_defaults) > 0:
+            s += f"({','.join(non_defaults)})"
+        
+        return s 
+
+
+@make_properties
+class TensorIndexRange(TensorIndex):
+    """
+    Tensor index that encodes a interval of coordinates for every parent.
+    
+    The interval is computed from an offset for each parent together with the
+    tensor dimension size of this level (M) and the parent level (N) parents
+    corresponding tensor. Given the parent coordinate i, the level encodes the
+    range of coordinates between max(0, -offset[i]) and min(N, M - offset[i]).
+    """
+
+    _ordered = Property(dtype=bool, default=False)
+    _unique = Property(dtype=bool, default=False)
+
+    @property
+    def iteration_type(self) -> TensorIterationTypes:
+        return TensorIterationTypes.Value
+
+    @property
+    def locate(self) -> bool:
+        return False
+
+    @property
+    def assembly(self) -> TensorAssemblyType:
+        return TensorAssemblyType.NoAssembly
+
+    @property
+    def full(self) -> bool:
+        return False
+
+    @property
+    def ordered(self) -> bool:
+        return self._ordered
+
+    @property
+    def unique(self) -> bool:
+        return self._unique
+
+    @property
+    def branchless(self) -> bool:
+        return False
+
+    @property
+    def compact(self) -> bool:
+        return False
+
+    def __init__(self, ordered: bool = True, unique: bool = True):
+        self._ordered = ordered
+        self._unique = unique
+
+    def fields(self, lvl: int, dummy_symbol: symbolic.SymExpr) -> Dict[str, Data]:
+        return {
+            f"idx{lvl}_offset": dtypes.int32[dummy_symbol],  # TODO (later) choose better length
+        }
+        
+    def __repr__(self) -> str:
+        s = "Range"
+
+        non_defaults = []
+        if not self._ordered:
+            non_defaults.append("¬O")
+        if not self._unique:
+            non_defaults.append("¬U")
+        
+        if len(non_defaults) > 0:
+            s += f"({','.join(non_defaults)})"
+        
+        return s
+    
+
+@make_properties
+class TensorIndexOffset(TensorIndex):
+    """
+    Tensor index that encodes the next coordinates as offset from parent.
+    
+    Given a parent coordinate i and an offset index k, the level encodes the
+    coordinate j = i + offset[k].
+    """
+
+    _ordered = Property(dtype=bool, default=False)
+    _unique = Property(dtype=bool, default=False)
+
+    @property
+    def iteration_type(self) -> TensorIterationTypes:
+        return TensorIterationTypes.Position
+
+    @property
+    def locate(self) -> bool:
+        return False
+
+    @property
+    def assembly(self) -> TensorAssemblyType:
+        return TensorAssemblyType.NoAssembly
+
+    @property
+    def full(self) -> bool:
+        return False
+
+    @property
+    def ordered(self) -> bool:
+        return self._ordered
+
+    @property
+    def unique(self) -> bool:
+        return self._unique
+
+    @property
+    def branchless(self) -> bool:
+        return True
+
+    @property
+    def compact(self) -> bool:
+        return False
+
+    def __init__(self, ordered: bool = True, unique: bool = True):
+        self._ordered = ordered
+        self._unique = unique
+
+    def fields(self, lvl: int, dummy_symbol: symbolic.SymExpr) -> Dict[str, Data]:
+        return {
+            f"idx{lvl}_offset": dtypes.int32[dummy_symbol],  # TODO (later) choose better length
+        }
+
+    def __repr__(self) -> str:
+        s = "Offset"
+
+        non_defaults = []
+        if not self._ordered:
+            non_defaults.append("¬O")
+        if not self._unique:
+            non_defaults.append("¬U")
+        
+        if len(non_defaults) > 0:
+            s += f"({','.join(non_defaults)})"
+        
+        return s
+
+
+@make_properties
+class Tensor(Structure):
+    """
+    Abstraction for Tensor storage format.
+    
+    This abstraction is based on [https://doi.org/10.1145/3276493].
+    """
+
+    value_dtype = TypeClassProperty(default=dtypes.int32, choices=dtypes.Typeclasses)
+    tensor_shape = ShapeProperty(default=[])
+    indices = ListProperty(element_type=TensorIndex)
+    index_ordering = ListProperty(element_type=symbolic.SymExpr) 
+    value_count = SymbolicProperty(default=0)
+
+    def __init__(
+            self,
+            value_dtype: dtypes.Typeclasses,
+            tensor_shape,
+            indices: List[Tuple[TensorIndex, Union[int, symbolic.SymExpr]]],
+            value_count: symbolic.SymExpr,
+            name: str,
+            transient: bool = False,
+            storage: dtypes.StorageType = dtypes.StorageType.Default,
+            location: Dict[str, str] = None,
+            lifetime: dtypes.AllocationLifetime = dtypes.AllocationLifetime.Scope,
+            debuginfo: dtypes.DebugInfo = None):
+        """
+        Constructor for Tensor storage format.
+
+        Below are examples of common matrix storage formats:
+
+        .. code-block:: python
+            
+            M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz'))
+
+            csr = dace.data.Tensor(
+                dace.float32,
+                (M, N),
+                [(dace.data.Dense(), 0), (dace.data.Compressed(), 1)],
+                nnz,
+                "CSR_Matrix",
+            )
+
+            csc = dace.data.Tensor(
+                dace.float32,
+                (M, N),
+                [(dace.data.Dense(), 1), (dace.data.Compressed(), 0)],
+                nnz,
+                "CSC_Matrix",
+            )
+
+            coo = dace.data.Tensor(
+                dace.float32,
+                (M, N),
+                [
+                    (dace.data.Compressed(unique=False), 0),
+                    (dace.data.Singleton(), 1),
+                ],
+                nnz,
+                "CSC_Matrix",
+            )
+
+            num_diags = dace.symbol('num_diags')  # number of diagonals stored
+
+            diag = dace.data.Tensor(
+                dace.float32,
+                (M, N),
+                [
+                    (dace.data.Dense(), num_diags),
+                    (dace.data.Range(), 0),
+                    (dace.data.Offset(), 1),
+                ],
+                nnz,
+                "DIA_Matrix",
+            )
+
+        Below you can find examples of common 3rd order tensor storage formats:
+
+        .. code-block:: python
+
+            I, J, K, nnz = (dace.symbol(s) for s in ('I', 'J', 'K', 'nnz'))
+
+            coo = dace.data.Tensor(
+                dace.float32,
+                (I, J, K),
+                [
+                    (dace.data.Compressed(unique=False), 0),
+                    (dace.data.Singleton(unique=False), 1),
+                    (dace.data.Singleton(), 2),
+                ],
+                nnz,
+                "COO_3D_Tensor",
+            )
+
+            csf = dace.data.Tensor(
+                dace.float32,
+                (I, J, K),
+                [
+                    (dace.data.Compressed(), 0),
+                    (dace.data.Compressed(), 1),
+                    (dace.data.Compressed(), 2),
+                ],
+                nnz,
+                "CSF_3D_Tensor",
+            )
+
+        :param value_type: data type of the explicitly stored values.
+        :param tensor_shape: logical shape of tensor (#rows, #cols, etc...)
+        :param indices: 
+            a list of tuples, each tuple represents a level in the tensor
+            storage hirachy, specifying the levels tensor index type, and the
+            corresponding dimension this level encodes (as index of the
+            tensor_shape tuple above). The order of the dimensions may differ
+            from the logical shape of the tensor, e.g. as seen in the CSC
+            format. If an index's dimension is unrelated to the tensor shape
+            (e.g. in diagonal format where the first index's dimension is the
+            number of diagonals stored), a symbol can be specified instead.
+        :param value_count: number of explicitly stored values.
+        :param name: name of resulting struct.
+        :param others: See Structure class for remaining arguments
+        """
+        
+        self.value_dtype = value_dtype
+        self.tensor_shape = tensor_shape
+        self.value_count = value_count
+
+        indices, index_ordering = zip(*indices)
+        self.indices, self.index_ordering = list(indices), list(index_ordering)
+
+        num_dims = len(tensor_shape)
+        dimension_order = [idx for idx in self.index_ordering if isinstance(idx, int)]
+
+        # all tensor dimensions must occure exactly once in indices
+        if not sorted(dimension_order) == list(range(num_dims)):
+            raise TypeError((
+                f"All tensor dimensions must be refferenced exactly once in "
+                f"tensor indices. (referenced dimensions: {dimension_order}; "
+                f"tensor dimensions: {list(range(num_dims))})"
+            ))
+
+        # assembling permanent and index specific fields
+        fields = dict(
+            order=Scalar(dtypes.int32),
+            dim_sizes=dtypes.int32[num_dims],
+            value_count=value_count,
+            values=dtypes.float32[value_count],
+        )
+
+        for (lvl, index) in enumerate(indices):
+            fields.update(index.fields(lvl, value_count))
+
+        super(Tensor, self).__init__(fields, name, transient, storage, location,
+                                     lifetime, debuginfo)
+    
+    def __repr__(self):
+        return f"{self.name} (dtype: {self.value_dtype}, shape: {list(self.tensor_shape)}, indices: {self.indices})"
+
+    @staticmethod
+    def from_json(json_obj, context=None):
+        if json_obj['type'] != 'Tensor':
+            raise TypeError("Invalid data type")
+
+        # Create dummy object
+        tensor = Tensor.__new__(Tensor)
+        serialize.set_properties_from_json(tensor, json_obj, context=context)
+
+        return  tensor
 
 
 @make_properties
diff --git a/tests/sdfg/data/tensor_test.py b/tests/sdfg/data/tensor_test.py
new file mode 100644
index 0000000000..06d3363a8b
--- /dev/null
+++ b/tests/sdfg/data/tensor_test.py
@@ -0,0 +1,131 @@
+# Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved.
+import dace
+import numpy as np
+import pytest
+
+from scipy import sparse
+
+
+def test_read_csr_tensor():
+
+    M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz'))
+    csr_obj = dace.data.Tensor(
+        dace.float32,
+        (M, N),
+        [(dace.data.TensorIndexDense(), 0), (dace.data.TensorIndexCompressed(), 1)],
+        nnz,
+        "CSR_Tensor")
+
+    sdfg = dace.SDFG('tensor_csr_to_dense')
+
+    sdfg.add_datadesc('A', csr_obj)
+    sdfg.add_array('B', [M, N], dace.float32)
+
+    sdfg.add_view('vindptr', csr_obj.members['idx1_pos'].shape, csr_obj.members['idx1_pos'].dtype)
+    sdfg.add_view('vindices', csr_obj.members['idx1_crd'].shape, csr_obj.members['idx1_crd'].dtype)
+    sdfg.add_view('vdata', csr_obj.members['values'].shape, csr_obj.members['values'].dtype)
+
+    state = sdfg.add_state()
+
+    A = state.add_access('A')
+    B = state.add_access('B')
+
+    indptr = state.add_access('vindptr')
+    indices = state.add_access('vindices')
+    data = state.add_access('vdata')
+
+    state.add_edge(A, None, indptr, 'views', dace.Memlet.from_array('A.idx1_pos', csr_obj.members['idx1_pos']))
+    state.add_edge(A, None, indices, 'views', dace.Memlet.from_array('A.idx1_crd', csr_obj.members['idx1_crd']))
+    state.add_edge(A, None, data, 'views', dace.Memlet.from_array('A.values', csr_obj.members['values']))
+
+    ime, imx = state.add_map('i', dict(i='0:M'))
+    jme, jmx = state.add_map('idx', dict(idx='start:stop'))
+    jme.add_in_connector('start')
+    jme.add_in_connector('stop')
+    t = state.add_tasklet('indirection', {'j', '__val'}, {'__out'}, '__out[i, j] = __val')
+
+    state.add_memlet_path(indptr, ime, jme, memlet=dace.Memlet(data='vindptr', subset='i'), dst_conn='start')
+    state.add_memlet_path(indptr, ime, jme, memlet=dace.Memlet(data='vindptr', subset='i+1'), dst_conn='stop')
+    state.add_memlet_path(indices, ime, jme, t, memlet=dace.Memlet(data='vindices', subset='idx'), dst_conn='j')
+    state.add_memlet_path(data, ime, jme, t, memlet=dace.Memlet(data='vdata', subset='idx'), dst_conn='__val')
+    state.add_memlet_path(t, jmx, imx, B, memlet=dace.Memlet(data='B', subset='0:M, 0:N', volume=1), src_conn='__out')
+
+    func = sdfg.compile()
+
+    rng = np.random.default_rng(42)
+    A = sparse.random(20, 20, density=0.1, format='csr', dtype=np.float32, random_state=rng)
+    B = np.zeros((20, 20), dtype=np.float32)
+
+    inpA = csr_obj.dtype._typeclass.as_ctypes()(idx1_pos=A.indptr.__array_interface__['data'][0],
+                                                idx1_crd=A.indices.__array_interface__['data'][0],
+                                                values=A.data.__array_interface__['data'][0])
+
+    func(A=inpA, B=B, M=A.shape[0], N=A.shape[1], nnz=A.nnz)
+    ref = A.toarray()
+
+    sdfg.save("./tensor.json")
+
+    assert np.allclose(B, ref)
+
+
+def test_csr_fields():
+
+    M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz'))
+
+    csr = dace.data.Tensor(
+        dace.float32,
+        (M, N),
+        [(dace.data.TensorIndexDense(), 0), (dace.data.TensorIndexCompressed(), 1)],
+        nnz,
+        "CSR_Matrix",
+    )
+
+    expected_fields = ["idx1_pos", "idx1_crd"]
+    assert all(key in csr.members.keys() for key in expected_fields)
+
+
+def test_dia_fields():
+
+    M, N, nnz, num_diags = (dace.symbol(s) for s in ('M', 'N', 'nnz', 'num_diags'))
+
+    diag = dace.data.Tensor(
+        dace.float32,
+        (M, N),
+        [
+            (dace.data.TensorIndexDense(), num_diags),
+            (dace.data.TensorIndexRange(), 0),
+            (dace.data.TensorIndexOffset(), 1),
+        ],
+        nnz,
+        "DIA_Matrix",
+    )
+
+    expected_fields = ["idx1_offset", "idx2_offset"]
+    assert all(key in diag.members.keys() for key in expected_fields)
+
+
+def test_coo_fields():
+
+    I, J, K, nnz = (dace.symbol(s) for s in ('I', 'J', 'K', 'nnz'))
+
+    coo = dace.data.Tensor(
+        dace.float32,
+        (I, J, K),
+        [
+            (dace.data.TensorIndexCompressed(unique=False), 0),
+            (dace.data.TensorIndexSingleton(unique=False), 1),
+            (dace.data.TensorIndexSingleton(), 2),
+        ],
+        nnz,
+        "COO_3D_Tensor",
+    )
+
+    expected_fields = ["idx0_pos", "idx0_crd", "idx1_crd", "idx2_crd"]
+    assert all(key in coo.members.keys() for key in expected_fields)
+
+
+if __name__ == "__main__":
+    test_read_csr_tensor()
+    test_csr_fields()
+    test_dia_fields()
+    test_coo_fields()

From 3ddd2cccf54e3812c08c3762cd3c4257d312b7e2 Mon Sep 17 00:00:00 2001
From: Jan Kleine <jkleine@ethz.ch>
Date: Mon, 30 Oct 2023 14:17:30 +0100
Subject: [PATCH 117/163] Remove eroneous file creation (#1411)

---
 tests/sdfg/data/tensor_test.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tests/sdfg/data/tensor_test.py b/tests/sdfg/data/tensor_test.py
index 06d3363a8b..3057539f70 100644
--- a/tests/sdfg/data/tensor_test.py
+++ b/tests/sdfg/data/tensor_test.py
@@ -63,8 +63,6 @@ def test_read_csr_tensor():
     func(A=inpA, B=B, M=A.shape[0], N=A.shape[1], nnz=A.nnz)
     ref = A.toarray()
 
-    sdfg.save("./tensor.json")
-
     assert np.allclose(B, ref)
 
 
From 69d4f3d05aa84c77a44add95a19f23320e27c909 Mon Sep 17 00:00:00 2001
From: matteonussbauemer <m.l.nussbaumer@student.tudelft.nl>
Date: Mon, 30 Oct 2023 21:59:38 +0100
Subject: [PATCH 118/163] create new branch that only contains changes to
 subsets.py and tests

---
 dace/subsets.py                     | 268 ++++++++++++++++++++++++----
 tests/subset_covers_precise_test.py | 161 +++++++++++++++++
 2 files changed, 399 insertions(+), 30 deletions(-)
 create mode 100644 tests/subset_covers_precise_test.py

diff --git a/dace/subsets.py b/dace/subsets.py
index f8b66a565d..f2a2072343 100644
--- a/dace/subsets.py
+++ b/dace/subsets.py
@@ -10,21 +10,52 @@
 from dace.config import Config
 
 
+def nng(expr):
+    # When dealing with set sizes, assume symbols are non-negative
+    try:
+        # TODO: Fix in symbol definition, not here
+        for sym in list(expr.free_symbols):
+            expr = expr.subs({sym: sp.Symbol(sym.name, nonnegative=True)})
+        return expr
+    except AttributeError:  # No free_symbols in expr
+        return expr
+
+def bounding_box_cover_exact(subset_a, subset_b) -> bool:
+    return all([(symbolic.simplify_ext(nng(rb)) <= symbolic.simplify_ext(nng(orb))) == True
+                and (symbolic.simplify_ext(nng(re)) >= symbolic.simplify_ext(nng(ore))) == True
+                for rb, re, orb, ore in zip(subset_a.min_element(), subset_a.max_element(),
+                                            subset_b.min_element(), subset_b.max_element())])
+
+def bounding_box_symbolic_positive(subset_a, subset_b, approximation = False)-> bool:
+    min_elements_a = subset_a.min_element_approx() if approximation else subset_a.min_element()
+    max_elements_a = subset_a.max_element_approx() if approximation else subset_a.max_element()
+    min_elements_b = subset_b.min_element_approx() if approximation else subset_b.min_element()
+    max_elements_b = subset_b.max_element_approx() if approximation else subset_b.max_element()
+
+    for rb, re, orb, ore in zip(min_elements_a, max_elements_a,
+                                min_elements_b, max_elements_b):
+        # NOTE: We first test for equality, which always returns True or False. If the equality test returns
+        # False, then we test for less-equal and greater-equal, which may return an expression, leading to
+        # TypeError. This is a workaround for the case where two expressions are the same or equal and
+        # SymPy confirms this but fails to return True when testing less-equal and greater-equal.
+
+        # lower bound: first check whether symbolic positive condition applies
+        if not (len(rb.free_symbols) == 0 and len(orb.free_symbols) == 1):
+            if not (symbolic.simplify_ext(nng(rb)) == symbolic.simplify_ext(nng(orb)) or
+                    symbolic.simplify_ext(nng(rb)) <= symbolic.simplify_ext(nng(orb))):
+                return False
+        # upper bound: first check whether symbolic positive condition applies
+        if not (len(re.free_symbols) == 1 and len(ore.free_symbols) == 0):
+            if not (symbolic.simplify_ext(nng(re)) == symbolic.simplify_ext(nng(ore)) or
+                    symbolic.simplify_ext(nng(re)) >= symbolic.simplify_ext(nng(ore))):
+                return False
+    return True
+
 class Subset(object):
     """ Defines a subset of a data descriptor. """
     def covers(self, other):
         """ Returns True if this subset covers (using a bounding box) another
             subset. """
-        def nng(expr):
-            # When dealing with set sizes, assume symbols are non-negative
-            try:
-                # TODO: Fix in symbol definition, not here
-                for sym in list(expr.free_symbols):
-                    expr = expr.subs({sym: sp.Symbol(sym.name, nonnegative=True)})
-                return expr
-            except AttributeError:  # No free_symbols in expr
-                return expr
-
         symbolic_positive = Config.get('optimizer', 'symbolic_positive')
 
         if not symbolic_positive:
@@ -38,28 +69,65 @@ def nng(expr):
 
         else:
             try:
-                for rb, re, orb, ore in zip(self.min_element_approx(), self.max_element_approx(),
-                                            other.min_element_approx(), other.max_element_approx()):
-                    # NOTE: We first test for equality, which always returns True or False. If the equality test returns
-                    # False, then we test for less-equal and greater-equal, which may return an expression, leading to
-                    # TypeError. This is a workaround for the case where two expressions are the same or equal and
-                    # SymPy confirms this but fails to return True when testing less-equal and greater-equal.
-
-                    # lower bound: first check whether symbolic positive condition applies
-                    if not (len(rb.free_symbols) == 0 and len(orb.free_symbols) == 1):
-                        if not (symbolic.simplify_ext(nng(rb)) == symbolic.simplify_ext(nng(orb)) or
-                                symbolic.simplify_ext(nng(rb)) <= symbolic.simplify_ext(nng(orb))):
-                            return False
-
-                    # upper bound: first check whether symbolic positive condition applies
-                    if not (len(re.free_symbols) == 1 and len(ore.free_symbols) == 0):
-                        if not (symbolic.simplify_ext(nng(re)) == symbolic.simplify_ext(nng(ore)) or
-                                symbolic.simplify_ext(nng(re)) >= symbolic.simplify_ext(nng(ore))):
-                            return False
+                if not bounding_box_symbolic_positive(self, other, True):
+                    return False
             except TypeError:
                 return False
 
             return True
+        
+    def covers_precise(self, other):
+        """ Returns True if self contains all the elements in other. """
+
+        # If self does not cover other with a bounding box union, return false.
+        symbolic_positive = Config.get('optimizer', 'symbolic_positive')
+        try:
+            bounding_box_cover = bounding_box_cover_exact(self, other) if symbolic_positive else bounding_box_symbolic_positive(self, other)
+            if not bounding_box_cover:
+                return False
+        except TypeError:
+            return False
+
+        try:
+            # if self is an index no further distinction is needed
+            if isinstance(self, Indices):
+                return True
+
+            elif isinstance(self, Range):
+                # other is an index so we need to check if the step of self is such that other is covered
+                # self.start % self.step == other.index % self.step
+                if isinstance(other, Indices):
+                    try:
+                        return all(
+                            [(symbolic.simplify_ext(nng(start)) % symbolic.simplify_ext(nng(step)) ==
+                              symbolic.simplify_ext(nng(i)) % symbolic.simplify_ext(nng(step))) == True
+                             for (start, _, step), i in zip(self.ranges, other.indices)])
+                    except:
+                        return False
+                if isinstance(other, Range):
+                    # other is a range so in every dimension self.step has to divide other.step and
+                    # self.start % self.step = other.start % other.step
+                    try:
+                        self_steps = [r[2] for r in self.ranges]
+                        other_steps = [r[2] for r in other.ranges]
+                        for start, step, ostart, ostep in zip(self.min_element(), self_steps, other.min_element(),
+                                                              other_steps):
+                            if not (ostep % step == 0 and
+                                    ((symbolic.simplify_ext(nng(start)) == symbolic.simplify_ext(nng(ostart))) or
+                                     (symbolic.simplify_ext(nng(start)) % symbolic.simplify_ext(
+                                         nng(step)) == symbolic.simplify_ext(nng(ostart)) % symbolic.simplify_ext(
+                                         nng(ostep))) == True)):
+                                return False
+                    except:
+                        return False
+                    return True
+        # unknown type
+            else:
+                raise TypeError
+
+        except TypeError:
+            return False
+
 
     def __repr__(self):
         return '%s (%s)' % (type(self).__name__, self.__str__())
@@ -973,6 +1041,111 @@ def intersection(self, other: 'Indices'):
             return self
         return None
 
+class Subsetlist(Subset):
+    """
+    Wrapper subset type that stores multiple Subsets in a list.
+    """
+
+    def __init__(self, subset):
+        self.subset_list: list[Subset] = []
+        if isinstance(subset, Subsetlist):
+            self.subset_list = subset.subset_list
+        elif isinstance(subset, list):
+            for subset in subset:
+                if not subset:
+                    break
+                if isinstance(subset, (Range, Indices)):
+                    self.subset_list.append(subset)
+                else:
+                    raise NotImplementedError
+        elif isinstance(subset, (Range, Indices)):
+            self.subset_list = [subset]
+
+    def covers(self, other):
+        """ 
+        Returns True if this Subsetlist covers another subset (using a bounding box). 
+        If other is another SubsetList then self and other will
+        only return true if self is other. If other is a different type of subset
+        true is returned when one of the subsets in self is equal to other.
+        """
+
+        if isinstance(other, Subsetlist):
+            for subset in self.subset_list:
+                # check if ther is a subset in self that covers every subset in other
+                if all(subset.covers(s) for s in other.subset_list):
+                    return True
+            # return False if that's not the case for any of the subsets in self
+            return False
+        else:
+            return any(s.covers(other) for s in self.subset_list)
+        
+    def covers_precise(self, other):
+        """ 
+        Returns True if this Subsetlist covers another
+        subset. If other is another SubsetList then self and other will
+        only return true if self is other. If other is a different type of subset
+        true is returned when one of the subsets in self is equal to other 
+        """
+
+        if isinstance(other, Subsetlist):
+            for subset in self.subset_list:
+                # check if ther is a subset in self that covers every subset in other
+                if all(subset.covers_precise(s) for s in other.subset_list):
+                    return True
+            # return False if that's not the case for any of the subsets in self
+            return False
+        else:
+            return any(s.covers_precise(other) for s in self.subset_list)
+
+    def __str__(self):
+        string = ''
+        for subset in self.subset_list:
+            if not string == '':
+                string += " "
+            string += subset.__str__()
+        return string
+    
+    def dims(self):
+        if not self.subset_list:
+            return 0
+        return next(iter(self.subset_list)).dims()
+
+    def union(self, other: Subset):
+        """In place union of self with another Subset"""
+        try:
+            if isinstance(other, Subsetlist):
+                self.subset_list += other.subset_list
+            elif isinstance(other, Indices) or isinstance(other, Range):
+                self.subset_list.append(other)
+            else:
+                raise TypeError
+        except TypeError:  # cannot determine truth value of Relational
+            return None
+
+    @property
+    def free_symbols(self) -> Set[str]:
+        result = set()
+        for subset in self.subset_list:
+            result |= subset.free_symbols
+        return result
+    
+    def replace(self, repl_dict):
+        for subset in self.subset_list:
+            subset.replace(repl_dict)
+
+    def num_elements(self):
+        # TODO: write something more meaningful here
+        min = 0
+        for subset in self.subset_list:
+            try:
+                if subset.num_elements() < min or min ==0:
+                    min = subset.num_elements()
+            except:
+                continue
+            
+        return min
+
+
 
 def _union_special_cases(arb: symbolic.SymbolicType, brb: symbolic.SymbolicType, are: symbolic.SymbolicType,
                          bre: symbolic.SymbolicType):
@@ -1038,6 +1211,8 @@ def bounding_box_union(subset_a: Subset, subset_b: Subset) -> Range:
     return Range(result)
 
 
+
+
 def union(subset_a: Subset, subset_b: Subset) -> Subset:
     """ Compute the union of two Subset objects.
         If the subsets are not of the same type, degenerates to bounding-box
@@ -1056,6 +1231,9 @@ def union(subset_a: Subset, subset_b: Subset) -> Subset:
             return subset_b
         elif subset_a is None and subset_b is None:
             raise TypeError('Both subsets cannot be None')
+        elif isinstance(subset_a, Subsetlist) or isinstance(
+                subset_b, Subsetlist):
+            return list_union(subset_a, subset_b)
         elif type(subset_a) != type(subset_b):
             return bounding_box_union(subset_a, subset_b)
         elif isinstance(subset_a, Indices):
@@ -1066,13 +1244,43 @@ def union(subset_a: Subset, subset_b: Subset) -> Subset:
             # TODO(later): More involved Strided-Tiled Range union
             return bounding_box_union(subset_a, subset_b)
         else:
-            warnings.warn('Unrecognized Subset type %s in union, degenerating to'
-                          ' bounding box' % type(subset_a).__name__)
+            warnings.warn(
+                'Unrecognized Subset type %s in union, degenerating to'
+                ' bounding box' % type(subset_a).__name__)
             return bounding_box_union(subset_a, subset_b)
     except TypeError:  # cannot determine truth value of Relational
         return None
 
 
+def list_union(subset_a: Subset, subset_b: Subset) -> Subset:
+    """ 
+    Returns the union of two Subset lists.
+
+    :param subset_a: The first subset.
+    :param subset_b: The second subset.
+    :return: A Subsetlist object that contains all elements of subset_a and subset_b.
+    """
+    # TODO(later): Merge subsets in both lists if possible
+    try:
+        if subset_a is not None and subset_b is None:
+            return subset_a
+        elif subset_b is not None and subset_a is None:
+            return subset_b
+        elif subset_a is None and subset_b is None:
+            raise TypeError('Both subsets cannot be None')
+        elif type(subset_a) != type(subset_b):
+            if isinstance(subset_b, Subsetlist):
+                return Subsetlist(subset_b.subset_list.append(subset_a))
+            else:
+                return Subsetlist(subset_a.subset_list.append(subset_b))
+        elif isinstance(subset_a, Subsetlist):
+            return Subsetlist(subset_a.subset_list + subset_b.subset_list)
+        else:
+            return Subsetlist([subset_a, subset_b])
+
+    except TypeError:
+        return None
+
 def intersects(subset_a: Subset, subset_b: Subset) -> Union[bool, None]:
     """
     Returns True if two subsets intersect, False if they do not, or
diff --git a/tests/subset_covers_precise_test.py b/tests/subset_covers_precise_test.py
new file mode 100644
index 0000000000..793926ab1c
--- /dev/null
+++ b/tests/subset_covers_precise_test.py
@@ -0,0 +1,161 @@
+# Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved.
+
+import pytest
+import dace
+from dace.subsets import Indices, Subset, Range
+from dace.config import Config
+
+
+def test_integer_overlap_no_cover():
+    # two overlapping subsets, neither of them covering the other
+    subset1 = Range.from_string("0:10:1")
+    subset2 = Range.from_string("5:11:1")
+
+    assert (subset1.covers_precise(subset2) is False)
+    assert (subset2.covers_precise(subset1) is False)
+
+    subset1 = Range.from_string("0:10:1, 3:8:1")
+    subset2 = Range.from_string("5:11:1, 2:9:1")
+    assert (subset1.covers_precise(subset2) is False)
+    assert (subset2.covers_precise(subset1) is False)
+
+
+def test_integer_bounding_box_cover_coprime_step():
+    # bb of subset1 covers bb of subset2 but step sizes of the subsets are coprime
+    subset1 = Range.from_string("0:10:3")
+    subset2 = Range.from_string("0:10:2")
+
+    assert (subset1.covers_precise(subset2) is False)
+    assert (subset2.covers_precise(subset1) is False)
+
+    subset1 = Range.from_string("0:10:3, 5:10:2")
+    subset2 = Range.from_string("0:10:2, 5:10:4")
+    assert (subset1.covers_precise(subset2) is False)
+    assert (subset2.covers_precise(subset1) is False)
+
+    subset1 = Range.from_string("0:10:3, 6:10:2")
+    subset2 = Range.from_string("0:10:2, 5:10:4")
+    assert (subset1.covers_precise(subset2) is False)
+    assert (subset2.covers_precise(subset1) is False)
+
+
+def test_integer_same_step_different_start():
+    subset1 = Range.from_string("0:10:3")
+    subset2 = Range.from_string("1:10:3")
+
+    assert (subset1.covers_precise(subset2) is False)
+
+
+def test_integer_bounding_box_symbolic_step():
+
+    subset1 = Range.from_string("0:20:s")
+    subset2 = Range.from_string("0:10:s")
+    subset3 = Range.from_string("0:10:2 * s")
+
+    assert (subset1.covers_precise(subset2))
+    assert (subset1.covers_precise(subset3))
+    assert (subset3.covers_precise(subset1) is False)
+    assert (subset3.covers_precise(subset2) is False)
+
+    subset1 = Range.from_string("0:20:s, 30:50:k")
+    subset2 = Range.from_string("0:10:s, 40:50:k")
+    assert (subset1.covers_precise(subset2) is False)
+
+
+def test_symbolic_boundaries():
+
+    subset1 = Range.from_string("N:M:1")
+    subset2 = Range.from_string("N:M:2")
+    assert (subset1.covers_precise(subset2))
+    assert (subset2.covers_precise(subset1) is False)
+
+    subset1 = Range.from_string("N + 1:M:1")
+    subset2 = Range.from_string("N:M:2")
+    assert (subset1.covers_precise(subset2) is False)
+    assert (subset2.covers_precise(subset1) is False)
+
+    subset1 = Range.from_string("-N:M:1")
+    subset2 = Range.from_string("N:M:2")
+    assert (subset1.covers_precise(subset2) is False)
+    assert (subset2.covers_precise(subset1) is False)
+
+
+def test_symbolic_boundaries_not_symbolic_positive():
+    Config.set('optimizer', 'symbolic_positive', value=False)
+
+    subset1 = Range.from_string("N:M:1")
+    subset2 = Range.from_string("N:M:2")
+    assert (subset1.covers_precise(subset2))
+    assert (subset2.covers_precise(subset1) is False)
+
+    subset1 = Range.from_string("N + 1:M:1")
+    subset2 = Range.from_string("N:M:2")
+    assert (subset1.covers_precise(subset2) is False)
+    assert (subset2.covers_precise(subset1) is False)
+
+    subset1 = Range.from_string("-N:M:1")
+    subset2 = Range.from_string("N:M:2")
+    assert (subset1.covers_precise(subset2) is False)
+    assert (subset2.covers_precise(subset1) is False)
+
+
+def test_range_indices():
+    subset1 = Indices.from_string('0')
+    subset2 = Range.from_string('0:2:1')
+    assert (subset2.covers_precise(subset1))
+    assert (subset1.covers_precise(subset2) is False)
+    subset1 = Indices.from_string('0')
+    subset2 = Range.from_string('0:1:1')
+    assert (subset2.covers_precise(subset1))
+    assert (subset1.covers_precise(subset2))
+    subset1 = Indices.from_string('0, 1')
+    subset2 = Range.from_string('0:2:1, 2:4:1')
+    assert (subset2.covers_precise(subset1) is False)
+    assert (subset1.covers_precise(subset2) is False)
+
+def test_index_index():
+    subset1 = Indices.from_string('1')
+    subset2 = Indices.from_string('1')
+    assert (subset2.covers_precise(subset1))
+    assert (subset1.covers_precise(subset2))
+    subset1 = Indices.from_string('1')
+    subset2 = Indices.from_string('2')
+    assert (subset2.covers_precise(subset1) is False)
+    assert (subset1.covers_precise(subset2) is False)
+    subset1 = Indices.from_string('1, 2')
+    subset2 = Indices.from_string('1, 2')
+    assert (subset2.covers_precise(subset1))
+    assert (subset1.covers_precise(subset2))
+    subset1 = Indices.from_string('2, 1')
+    subset2 = Indices.from_string('1, 2')
+    assert (subset2.covers_precise(subset1) is False)
+    assert (subset1.covers_precise(subset2) is False)
+    subset1 = Indices.from_string('i')
+    subset2 = Indices.from_string('j')
+    assert (subset2.covers_precise(subset1) is False)
+    assert (subset1.covers_precise(subset2) is False)
+    subset1 = Indices.from_string('i')
+    subset2 = Indices.from_string('i')
+    assert (subset2.covers_precise(subset1))
+    assert (subset1.covers_precise(subset2))
+    subset1 = Indices.from_string('i, j')
+    subset2 = Indices.from_string('i, k')
+    assert (subset2.covers_precise(subset1) is False)
+    assert (subset1.covers_precise(subset2) is False)
+    subset1 = Indices.from_string('i, j')
+    subset2 = Indices.from_string('i, j')
+    assert (subset2.covers_precise(subset1))
+    assert (subset1.covers_precise(subset2))
+
+
+
+
+if __name__ == "__main__":
+    test_integer_overlap_no_cover()
+    test_integer_bounding_box_cover_coprime_step()
+    test_integer_same_step_different_start()
+    test_integer_bounding_box_symbolic_step()
+    test_symbolic_boundaries()
+    test_symbolic_boundaries_not_symbolic_positive()
+    test_range_indices()
+    test_index_index()

From c1935b6b8995404240b82ebd548d958d6ab68502 Mon Sep 17 00:00:00 2001
From: matteonussbauemer <m.l.nussbaumer@student.tudelft.nl>
Date: Mon, 30 Oct 2023 22:15:43 +0100
Subject: [PATCH 119/163] formatting

---
 tests/subset_covers_precise_test.py | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/tests/subset_covers_precise_test.py b/tests/subset_covers_precise_test.py
index 793926ab1c..644cfa20ee 100644
--- a/tests/subset_covers_precise_test.py
+++ b/tests/subset_covers_precise_test.py
@@ -7,7 +7,9 @@
 
 
 def test_integer_overlap_no_cover():
-    # two overlapping subsets, neither of them covering the other
+    """
+    two overlapping subsets, neither of them covering the other
+    """
     subset1 = Range.from_string("0:10:1")
     subset2 = Range.from_string("5:11:1")
 
@@ -21,7 +23,9 @@ def test_integer_overlap_no_cover():
 
 
 def test_integer_bounding_box_cover_coprime_step():
-    # bb of subset1 covers bb of subset2 but step sizes of the subsets are coprime
+    """
+    boundingbox of subset1 covers bb of subset2 but step sizes of the subsets are coprime
+    """
     subset1 = Range.from_string("0:10:3")
     subset2 = Range.from_string("0:10:2")
 
@@ -47,7 +51,6 @@ def test_integer_same_step_different_start():
 
 
 def test_integer_bounding_box_symbolic_step():
-
     subset1 = Range.from_string("0:20:s")
     subset2 = Range.from_string("0:10:s")
     subset3 = Range.from_string("0:10:2 * s")
@@ -63,7 +66,6 @@ def test_integer_bounding_box_symbolic_step():
 
 
 def test_symbolic_boundaries():
-
     subset1 = Range.from_string("N:M:1")
     subset2 = Range.from_string("N:M:2")
     assert (subset1.covers_precise(subset2))
@@ -104,10 +106,12 @@ def test_range_indices():
     subset2 = Range.from_string('0:2:1')
     assert (subset2.covers_precise(subset1))
     assert (subset1.covers_precise(subset2) is False)
+
     subset1 = Indices.from_string('0')
     subset2 = Range.from_string('0:1:1')
     assert (subset2.covers_precise(subset1))
     assert (subset1.covers_precise(subset2))
+
     subset1 = Indices.from_string('0, 1')
     subset2 = Range.from_string('0:2:1, 2:4:1')
     assert (subset2.covers_precise(subset1) is False)
@@ -118,30 +122,37 @@ def test_index_index():
     subset2 = Indices.from_string('1')
     assert (subset2.covers_precise(subset1))
     assert (subset1.covers_precise(subset2))
+
     subset1 = Indices.from_string('1')
     subset2 = Indices.from_string('2')
     assert (subset2.covers_precise(subset1) is False)
     assert (subset1.covers_precise(subset2) is False)
+
     subset1 = Indices.from_string('1, 2')
     subset2 = Indices.from_string('1, 2')
     assert (subset2.covers_precise(subset1))
     assert (subset1.covers_precise(subset2))
+
     subset1 = Indices.from_string('2, 1')
     subset2 = Indices.from_string('1, 2')
     assert (subset2.covers_precise(subset1) is False)
     assert (subset1.covers_precise(subset2) is False)
+
     subset1 = Indices.from_string('i')
     subset2 = Indices.from_string('j')
     assert (subset2.covers_precise(subset1) is False)
     assert (subset1.covers_precise(subset2) is False)
+
     subset1 = Indices.from_string('i')
     subset2 = Indices.from_string('i')
     assert (subset2.covers_precise(subset1))
     assert (subset1.covers_precise(subset2))
+
     subset1 = Indices.from_string('i, j')
     subset2 = Indices.from_string('i, k')
     assert (subset2.covers_precise(subset1) is False)
     assert (subset1.covers_precise(subset2) is False)
+
     subset1 = Indices.from_string('i, j')
     subset2 = Indices.from_string('i, j')
     assert (subset2.covers_precise(subset1))

From ecbca2d990272fce01e8c3d1fdc67a8d2984f462 Mon Sep 17 00:00:00 2001
From: matteonussbauemer <m.l.nussbaumer@student.tudelft.nl>
Date: Tue, 31 Oct 2023 12:29:08 +0100
Subject: [PATCH 120/163] rename Subsetlist to SubsetUnion

(cherry picked from commit e75e782b86fa6476af84ec59d878624e79369a18)
---
 dace/properties.py |  2 +-
 dace/subsets.py    | 26 +++++++++++++-------------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/dace/properties.py b/dace/properties.py
index 44f8b4fbcc..e02a54ad1f 100644
--- a/dace/properties.py
+++ b/dace/properties.py
@@ -1153,7 +1153,7 @@ def allow_none(self):
     def __set__(self, obj, val):
         if isinstance(val, str):
             val = self.from_string(val)
-        if (val is not None and not isinstance(val, sbs.Range) and not isinstance(val, sbs.Indices)):
+        if (val is not None and not isinstance(val, sbs.Range) and not isinstance(val, sbs.Indices) and not isinstance(val, sbs.SubsetUnion)):
             raise TypeError("Subset property must be either Range or Indices: got {}".format(type(val).__name__))
         super(SubsetProperty, self).__set__(obj, val)
 
diff --git a/dace/subsets.py b/dace/subsets.py
index f2a2072343..f53520c5aa 100644
--- a/dace/subsets.py
+++ b/dace/subsets.py
@@ -1041,14 +1041,14 @@ def intersection(self, other: 'Indices'):
             return self
         return None
 
-class Subsetlist(Subset):
+class SubsetUnion(Subset):
     """
     Wrapper subset type that stores multiple Subsets in a list.
     """
 
     def __init__(self, subset):
         self.subset_list: list[Subset] = []
-        if isinstance(subset, Subsetlist):
+        if isinstance(subset, SubsetUnion):
             self.subset_list = subset.subset_list
         elif isinstance(subset, list):
             for subset in subset:
@@ -1069,7 +1069,7 @@ def covers(self, other):
         true is returned when one of the subsets in self is equal to other.
         """
 
-        if isinstance(other, Subsetlist):
+        if isinstance(other, SubsetUnion):
             for subset in self.subset_list:
                 # check if ther is a subset in self that covers every subset in other
                 if all(subset.covers(s) for s in other.subset_list):
@@ -1087,7 +1087,7 @@ def covers_precise(self, other):
         true is returned when one of the subsets in self is equal to other 
         """
 
-        if isinstance(other, Subsetlist):
+        if isinstance(other, SubsetUnion):
             for subset in self.subset_list:
                 # check if ther is a subset in self that covers every subset in other
                 if all(subset.covers_precise(s) for s in other.subset_list):
@@ -1113,7 +1113,7 @@ def dims(self):
     def union(self, other: Subset):
         """In place union of self with another Subset"""
         try:
-            if isinstance(other, Subsetlist):
+            if isinstance(other, SubsetUnion):
                 self.subset_list += other.subset_list
             elif isinstance(other, Indices) or isinstance(other, Range):
                 self.subset_list.append(other)
@@ -1231,8 +1231,8 @@ def union(subset_a: Subset, subset_b: Subset) -> Subset:
             return subset_b
         elif subset_a is None and subset_b is None:
             raise TypeError('Both subsets cannot be None')
-        elif isinstance(subset_a, Subsetlist) or isinstance(
-                subset_b, Subsetlist):
+        elif isinstance(subset_a, SubsetUnion) or isinstance(
+                subset_b, SubsetUnion):
             return list_union(subset_a, subset_b)
         elif type(subset_a) != type(subset_b):
             return bounding_box_union(subset_a, subset_b)
@@ -1269,14 +1269,14 @@ def list_union(subset_a: Subset, subset_b: Subset) -> Subset:
         elif subset_a is None and subset_b is None:
             raise TypeError('Both subsets cannot be None')
         elif type(subset_a) != type(subset_b):
-            if isinstance(subset_b, Subsetlist):
-                return Subsetlist(subset_b.subset_list.append(subset_a))
+            if isinstance(subset_b, SubsetUnion):
+                return SubsetUnion(subset_b.subset_list.append(subset_a))
             else:
-                return Subsetlist(subset_a.subset_list.append(subset_b))
-        elif isinstance(subset_a, Subsetlist):
-            return Subsetlist(subset_a.subset_list + subset_b.subset_list)
+                return SubsetUnion(subset_a.subset_list.append(subset_b))
+        elif isinstance(subset_a, SubsetUnion):
+            return SubsetUnion(subset_a.subset_list + subset_b.subset_list)
         else:
-            return Subsetlist([subset_a, subset_b])
+            return SubsetUnion([subset_a, subset_b])
 
     except TypeError:
         return None

From 3c10cb126ee68b2558f636a73aedf71f4863a125 Mon Sep 17 00:00:00 2001
From: matteonussbauemer <m.l.nussbaumer@student.tudelft.nl>
Date: Tue, 31 Oct 2023 12:32:07 +0100
Subject: [PATCH 121/163] rename occurences in comments

(cherry picked from commit 1301c3a4ae6d4e634e0bdc38f94bfcf1ff677c88)
---
 dace/subsets.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/dace/subsets.py b/dace/subsets.py
index f53520c5aa..068b330a07 100644
--- a/dace/subsets.py
+++ b/dace/subsets.py
@@ -1063,8 +1063,8 @@ def __init__(self, subset):
 
     def covers(self, other):
         """ 
-        Returns True if this Subsetlist covers another subset (using a bounding box). 
-        If other is another SubsetList then self and other will
+        Returns True if this SubsetUnion covers another subset (using a bounding box).
+        If other is another SubsetUnion then self and other will
         only return true if self is other. If other is a different type of subset
         true is returned when one of the subsets in self is equal to other.
         """
@@ -1081,8 +1081,8 @@ def covers(self, other):
         
     def covers_precise(self, other):
         """ 
-        Returns True if this Subsetlist covers another
-        subset. If other is another SubsetList then self and other will
+        Returns True if this SubsetUnion covers another
+        subset. If other is another SubsetUnion then self and other will
         only return true if self is other. If other is a different type of subset
         true is returned when one of the subsets in self is equal to other 
         """
@@ -1258,7 +1258,7 @@ def list_union(subset_a: Subset, subset_b: Subset) -> Subset:
 
     :param subset_a: The first subset.
     :param subset_b: The second subset.
-    :return: A Subsetlist object that contains all elements of subset_a and subset_b.
+    :return: A SubsetUnion object that contains all elements of subset_a and subset_b.
     """
     # TODO(later): Merge subsets in both lists if possible
     try:

From 6965b96aed93445d15f7a45720a27b3f310bc053 Mon Sep 17 00:00:00 2001
From: matteonussbauemer <m.l.nussbaumer@student.tudelft.nl>
Date: Tue, 31 Oct 2023 18:57:09 +0100
Subject: [PATCH 122/163] upgrade sympy to 1.12

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index a0ac2e2d49..cd5189437e 100644
--- a/setup.py
+++ b/setup.py
@@ -73,7 +73,7 @@
       },
       include_package_data=True,
       install_requires=[
-         'numpy', 'networkx >= 2.5', 'astunparse', 'sympy<=1.9', 'pyyaml', 'ply', 'websockets', 'requests', 'flask',
+         'numpy', 'networkx >= 2.5', 'astunparse', 'sympy>=1.12', 'pyyaml', 'ply', 'websockets', 'requests', 'flask',
           'fparser >= 0.1.3', 'aenum >= 3.1', 'dataclasses; python_version < "3.7"', 'dill',
           'pyreadline;platform_system=="Windows"', 'typing-compat; python_version < "3.8"'
       ] + cmake_requires,

From 9ff33a709b4d90d515b69975802debabc6a9d1ff Mon Sep 17 00:00:00 2001
From: Christos Kotsalos <kotsaloscv@gmail.com>
Date: Wed, 1 Nov 2023 19:50:05 +0100
Subject: [PATCH 123/163] Fix for VS Code debug console: view opens sdfg in VS
 Code and not in browser (#1419)

* Fix for VS Code debug console: view opens sdfg in VS Code and not in browser

* Fix for VS Code debug console: view opens sdfg in VS Code and not in browser

---------

Co-authored-by: Christos Kotsalos <christos.kotsalos@cscs.ch>
---
 dace/cli/sdfv.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/dace/cli/sdfv.py b/dace/cli/sdfv.py
index 3be8e1ca45..c0ff3da36d 100644
--- a/dace/cli/sdfv.py
+++ b/dace/cli/sdfv.py
@@ -36,7 +36,11 @@ def view(sdfg: dace.SDFG, filename: Optional[Union[str, int]] = None):
     """
     # If vscode is open, try to open it inside vscode
     if filename is None:
-        if 'VSCODE_IPC_HOOK_CLI' in os.environ or 'VSCODE_GIT_IPC_HANDLE' in os.environ:
+        if (
+            'VSCODE_IPC_HOOK' in os.environ
+            or 'VSCODE_IPC_HOOK_CLI' in os.environ
+            or 'VSCODE_GIT_IPC_HANDLE' in os.environ
+        ):
             filename = tempfile.mktemp(suffix='.sdfg')
             sdfg.save(filename)
             os.system(f'code {filename}')

From bd7a82b9a1f46e139b0a350afa1e40ee71c56c3f Mon Sep 17 00:00:00 2001
From: matteonussbauemer <m.l.nussbaumer@student.tudelft.nl>
Date: Wed, 1 Nov 2023 21:33:09 +0100
Subject: [PATCH 124/163] Annotate tests with expected outcome

---
 tests/subset_covers_precise_test.py | 73 ++++++++++++++++++++++-------
 1 file changed, 55 insertions(+), 18 deletions(-)

diff --git a/tests/subset_covers_precise_test.py b/tests/subset_covers_precise_test.py
index 644cfa20ee..185932ab53 100644
--- a/tests/subset_covers_precise_test.py
+++ b/tests/subset_covers_precise_test.py
@@ -1,14 +1,16 @@
 # Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved.
 
 import pytest
+
 import dace
-from dace.subsets import Indices, Subset, Range
 from dace.config import Config
+from dace.subsets import Indices, Range
 
 
-def test_integer_overlap_no_cover():
+def test_integer_overlap_same_step_no_cover():
     """
-    two overlapping subsets, neither of them covering the other
+    Tests ranges with overlapping bounding boxes neither of them covering the other.
+    The ranges have the same step size. Covers_precise should return false.
     """
     subset1 = Range.from_string("0:10:1")
     subset2 = Range.from_string("5:11:1")
@@ -16,15 +18,16 @@ def test_integer_overlap_no_cover():
     assert (subset1.covers_precise(subset2) is False)
     assert (subset2.covers_precise(subset1) is False)
 
-    subset1 = Range.from_string("0:10:1, 3:8:1")
-    subset2 = Range.from_string("5:11:1, 2:9:1")
+    subset1 = Range.from_string("0:10:2")
+    subset2 = Range.from_string("2:11:1")
     assert (subset1.covers_precise(subset2) is False)
     assert (subset2.covers_precise(subset1) is False)
 
 
 def test_integer_bounding_box_cover_coprime_step():
     """
-    boundingbox of subset1 covers bb of subset2 but step sizes of the subsets are coprime
+    Tests ranges where the boundingbox of subset1 covers the boundingbox of subset2 but 
+    step sizes of the subsets are coprime so subset1 does not cover subset2.
     """
     subset1 = Range.from_string("0:10:3")
     subset2 = Range.from_string("0:10:2")
@@ -44,6 +47,11 @@ def test_integer_bounding_box_cover_coprime_step():
 
 
 def test_integer_same_step_different_start():
+    """
+    Tests range where the bounding box of subset1 covers the bounding box of subset2 
+    but since subset2 starts at an offset that is not a multiple subset1's stepsize it 
+    is not contained in subset1.
+    """
     subset1 = Range.from_string("0:10:3")
     subset2 = Range.from_string("1:10:3")
 
@@ -51,6 +59,14 @@ def test_integer_same_step_different_start():
 
 
 def test_integer_bounding_box_symbolic_step():
+    """
+    Tests ranges where the step is symbolic but the start and end are not.
+    For 2 subsets s1 and s2 where s1's start is equal to s2's start and both subsets' step 
+    sizes are symbolic s1.covers_precise(s2) should only return true iff s2's step size is 
+    a multiple of s1's step size.
+    For 2 subsets s1 and s2 where s1's start is not equal to s2's start and both subsets' step 
+    sizes are symbolic, s1.covers_precise(s2) should return false.
+    """
     subset1 = Range.from_string("0:20:s")
     subset2 = Range.from_string("0:10:s")
     subset3 = Range.from_string("0:10:2 * s")
@@ -60,12 +76,17 @@ def test_integer_bounding_box_symbolic_step():
     assert (subset3.covers_precise(subset1) is False)
     assert (subset3.covers_precise(subset2) is False)
 
-    subset1 = Range.from_string("0:20:s, 30:50:k")
-    subset2 = Range.from_string("0:10:s, 40:50:k")
+    subset1 = Range.from_string("30:50:k")
+    subset2 = Range.from_string("40:50:k")
     assert (subset1.covers_precise(subset2) is False)
 
 
-def test_symbolic_boundaries():
+def test_ranges_symbolic_boundaries():
+    """
+    Tests where the boundaries of ranges are symbolic.
+    The function subset1.covers_precise(subset2) should return true only when the 
+    start, end, and step size of subset1 are multiples of those in subset2
+    """
     subset1 = Range.from_string("N:M:1")
     subset2 = Range.from_string("N:M:2")
     assert (subset1.covers_precise(subset2))
@@ -83,6 +104,9 @@ def test_symbolic_boundaries():
 
 
 def test_symbolic_boundaries_not_symbolic_positive():
+    """
+    Tests from test_symbolic_boundaries with symbolic_positive flag deactivated.
+    """
     Config.set('optimizer', 'symbolic_positive', value=False)
 
     subset1 = Range.from_string("N:M:1")
@@ -102,22 +126,35 @@ def test_symbolic_boundaries_not_symbolic_positive():
 
 
 def test_range_indices():
-    subset1 = Indices.from_string('0')
+    """
+    Tests the handling of indices covering ranges and vice versa.
+    Given a range r and indices i:
+    If r's bounding box covers i r.covers_precise(i) should return true iff 
+    i is covered by the step of r.
+    i.covers_precise(r) should only return true iff r.start == r.end == i.
+    If i is not in r's bounding box i.covers_precise(r) and r.covers_precise(i)
+    should return false
+    """
+    subset1 = Indices.from_string('1')
     subset2 = Range.from_string('0:2:1')
     assert (subset2.covers_precise(subset1))
     assert (subset1.covers_precise(subset2) is False)
 
-    subset1 = Indices.from_string('0')
-    subset2 = Range.from_string('0:1:1')
-    assert (subset2.covers_precise(subset1))
-    assert (subset1.covers_precise(subset2))
+    subset1 = Indices.from_string('3')
+    subset2 = Range.from_string('0:4:2')
+    assert (subset2.covers_precise(subset1) is False)
+    assert (subset2.covers_precise(subset1) is False)
 
-    subset1 = Indices.from_string('0, 1')
-    subset2 = Range.from_string('0:2:1, 2:4:1')
+    subset1 = Indices.from_string('3')
+    subset2 = Range.from_string('0:2:1')
     assert (subset2.covers_precise(subset1) is False)
     assert (subset1.covers_precise(subset2) is False)
 
 def test_index_index():
+    """
+    Tests the handling of indices covering indices.
+    Given two indices i1 and i2 i1.covers_precise should only return true iff i1 = i2
+    """
     subset1 = Indices.from_string('1')
     subset2 = Indices.from_string('1')
     assert (subset2.covers_precise(subset1))
@@ -162,11 +199,11 @@ def test_index_index():
 
 
 if __name__ == "__main__":
-    test_integer_overlap_no_cover()
+    test_integer_overlap_same_step_no_cover()
     test_integer_bounding_box_cover_coprime_step()
     test_integer_same_step_different_start()
     test_integer_bounding_box_symbolic_step()
-    test_symbolic_boundaries()
+    test_ranges_symbolic_boundaries()
     test_symbolic_boundaries_not_symbolic_positive()
     test_range_indices()
     test_index_index()

From 1e83e7112d6d79a96bd3637df5ff9d2b0563c6a8 Mon Sep 17 00:00:00 2001
From: matteonussbauemer <m.l.nussbaumer@student.tudelft.nl>
Date: Wed, 1 Nov 2023 21:51:28 +0100
Subject: [PATCH 125/163] Upgrade sympy version in requirements.txt

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 5f804e1b4c..12c50a2eb5 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -19,7 +19,7 @@ ply==3.11
 PyYAML==6.0
 requests==2.31.0
 six==1.16.0
-sympy==1.9
+sympy==1.12
 urllib3==2.0.7
 websockets==11.0.3
 Werkzeug==2.3.5

From ca75b88c96b2c97592515ca437a9749e1cea080d Mon Sep 17 00:00:00 2001
From: matteonussbauemer <m.l.nussbaumer@student.tudelft.nl>
Date: Wed, 1 Nov 2023 23:18:50 +0100
Subject: [PATCH 126/163] fix config in test

---
 tests/subset_covers_precise_test.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/subset_covers_precise_test.py b/tests/subset_covers_precise_test.py
index 185932ab53..8c688ea6c1 100644
--- a/tests/subset_covers_precise_test.py
+++ b/tests/subset_covers_precise_test.py
@@ -107,6 +107,7 @@ def test_symbolic_boundaries_not_symbolic_positive():
     """
     Tests from test_symbolic_boundaries with symbolic_positive flag deactivated.
     """
+    symbolic_positive = Config.get('optimizer', 'symbolic_positive')
     Config.set('optimizer', 'symbolic_positive', value=False)
 
     subset1 = Range.from_string("N:M:1")
@@ -124,6 +125,8 @@ def test_symbolic_boundaries_not_symbolic_positive():
     assert (subset1.covers_precise(subset2) is False)
     assert (subset2.covers_precise(subset1) is False)
 
+    Config.set('optimizer', 'symbolic_positive', value=symbolic_positive)
+
 
 def test_range_indices():
     """

From d947bf87b1120f7612af5264a9fc690605920e50 Mon Sep 17 00:00:00 2001
From: Philipp Schaad <schaad.phil@gmail.com>
Date: Thu, 2 Nov 2023 17:03:51 +0100
Subject: [PATCH 127/163] Hierarchical Control Flow / Control Flow Regions
 (#1404)

* Adds just the framework for integral loops

* Fix duplicate collapsed property on states

* Fix inorrect parent class initialization

* Add deprecation warning to is_start_state kwarg

* Symbols and start block fixes

* More symbol fixes

* label and state list fix

* Remove loop scope for now

* Renaming

* revert to traditional nodes-based iteration for now

* Update docs

* Add test for deprecation

* Improve iteration function names

* Remove obsolete property

* Improve type naming

* Remove obsolete scope_subgraph method
---
 dace/codegen/instrumentation/papi.py          |  13 +-
 .../analysis/schedule_tree/sdfg_to_tree.py    |   2 +-
 dace/sdfg/nodes.py                            |   5 +-
 dace/sdfg/replace.py                          |  29 +-
 dace/sdfg/sdfg.py                             | 261 +-----
 dace/sdfg/state.py                            | 824 +++++++++++++++---
 dace/sdfg/utils.py                            |  56 +-
 .../dataflow/double_buffering.py              |   6 +-
 dace/transformation/interstate/loop_unroll.py |   3 +-
 .../interstate/multistate_inline.py           |   2 +-
 doc/sdfg/images/elements.svg                  | 592 +++++++++++--
 doc/sdfg/ir.rst                               |  21 +-
 requirements.txt                              |   4 +-
 .../sdfg/nested_control_flow_regions_test.py  |  18 +
 tests/sdfg_validate_names_test.py             |   2 +-
 15 files changed, 1331 insertions(+), 507 deletions(-)
 create mode 100644 tests/sdfg/nested_control_flow_regions_test.py

diff --git a/dace/codegen/instrumentation/papi.py b/dace/codegen/instrumentation/papi.py
index c0d3b657a1..4885611408 100644
--- a/dace/codegen/instrumentation/papi.py
+++ b/dace/codegen/instrumentation/papi.py
@@ -12,7 +12,7 @@
 from dace.sdfg.graph import SubgraphView
 from dace.memlet import Memlet
 from dace.sdfg import scope_contains_scope
-from dace.sdfg.state import StateGraphView
+from dace.sdfg.state import DataflowGraphView
 
 import sympy as sp
 import os
@@ -392,7 +392,7 @@ def should_instrument_entry(map_entry: EntryNode) -> bool:
         return cond
 
     @staticmethod
-    def has_surrounding_perfcounters(node, dfg: StateGraphView):
+    def has_surrounding_perfcounters(node, dfg: DataflowGraphView):
         """ Returns true if there is a possibility that this node is part of a
             section that is profiled. """
         parent = dfg.entry_node(node)
@@ -605,7 +605,7 @@ def get_memlet_byte_size(sdfg: dace.SDFG, memlet: Memlet):
         return memlet.volume * memdata.dtype.bytes
 
     @staticmethod
-    def get_out_memlet_costs(sdfg: dace.SDFG, state_id: int, node: nodes.Node, dfg: StateGraphView):
+    def get_out_memlet_costs(sdfg: dace.SDFG, state_id: int, node: nodes.Node, dfg: DataflowGraphView):
         scope_dict = sdfg.node(state_id).scope_dict()
 
         out_costs = 0
@@ -636,7 +636,10 @@ def get_out_memlet_costs(sdfg: dace.SDFG, state_id: int, node: nodes.Node, dfg:
         return out_costs
 
     @staticmethod
-    def get_tasklet_byte_accesses(tasklet: nodes.CodeNode, dfg: StateGraphView, sdfg: dace.SDFG, state_id: int) -> str:
+    def get_tasklet_byte_accesses(tasklet: nodes.CodeNode,
+                                  dfg: DataflowGraphView,
+                                  sdfg: dace.SDFG,
+                                  state_id: int) -> str:
         """ Get the amount of bytes processed by `tasklet`. The formula is
             sum(inedges * size) + sum(outedges * size) """
         in_accum = []
@@ -693,7 +696,7 @@ def get_memory_input_size(node, sdfg, state_id) -> str:
         return sym2cpp(input_size)
 
     @staticmethod
-    def accumulate_byte_movement(outermost_node, node, dfg: StateGraphView, sdfg, state_id):
+    def accumulate_byte_movement(outermost_node, node, dfg: DataflowGraphView, sdfg, state_id):
 
         itvars = dict()  # initialize an empty dict
 
diff --git a/dace/sdfg/analysis/schedule_tree/sdfg_to_tree.py b/dace/sdfg/analysis/schedule_tree/sdfg_to_tree.py
index 917f748cb8..084d46f47d 100644
--- a/dace/sdfg/analysis/schedule_tree/sdfg_to_tree.py
+++ b/dace/sdfg/analysis/schedule_tree/sdfg_to_tree.py
@@ -275,7 +275,7 @@ def remove_name_collisions(sdfg: SDFG):
         # Rename duplicate states
         for state in nsdfg.nodes():
             if state.label in state_names_seen:
-                state.set_label(data.find_new_name(state.label, state_names_seen))
+                state.label = data.find_new_name(state.label, state_names_seen)
             state_names_seen.add(state.label)
 
         replacements: Dict[str, str] = {}
diff --git a/dace/sdfg/nodes.py b/dace/sdfg/nodes.py
index 32369a19a3..a28e9fce38 100644
--- a/dace/sdfg/nodes.py
+++ b/dace/sdfg/nodes.py
@@ -262,9 +262,8 @@ def label(self):
     def __label__(self, sdfg, state):
         return self.data
 
-    def desc(self, sdfg):
-        from dace.sdfg import SDFGState, ScopeSubgraphView
-        if isinstance(sdfg, (SDFGState, ScopeSubgraphView)):
+    def desc(self, sdfg: Union['dace.sdfg.SDFG', 'dace.sdfg.SDFGState', 'dace.sdfg.ScopeSubgraphView']):
+        if isinstance(sdfg, (dace.sdfg.SDFGState, dace.sdfg.ScopeSubgraphView)):
             sdfg = sdfg.parent
         return sdfg.arrays[self.data]
 
diff --git a/dace/sdfg/replace.py b/dace/sdfg/replace.py
index 4b36fad4fe..a2c7b9a43c 100644
--- a/dace/sdfg/replace.py
+++ b/dace/sdfg/replace.py
@@ -175,17 +175,18 @@ def replace_datadesc_names(sdfg, repl: Dict[str, str]):
                 sdfg.constants_prop[repl[aname]] = sdfg.constants_prop[aname]
                 del sdfg.constants_prop[aname]
 
-    # Replace in interstate edges
-    for e in sdfg.edges():
-        e.data.replace_dict(repl, replace_keys=False)
-
-    for state in sdfg.nodes():
-        # Replace in access nodes
-        for node in state.data_nodes():
-            if node.data in repl:
-                node.data = repl[node.data]
-
-        # Replace in memlets
-        for edge in state.edges():
-            if edge.data.data in repl:
-                edge.data.data = repl[edge.data.data]
+    for cf in sdfg.all_control_flow_regions():
+        # Replace in interstate edges
+        for e in cf.edges():
+            e.data.replace_dict(repl, replace_keys=False)
+
+        for block in cf.nodes():
+            if isinstance(block, dace.SDFGState):
+                # Replace in access nodes
+                for node in block.data_nodes():
+                    if node.data in repl:
+                        node.data = repl[node.data]
+                # Replace in memlets
+                for edge in block.edges():
+                    if edge.data.data in repl:
+                        edge.data.data = repl[edge.data.data]
diff --git a/dace/sdfg/sdfg.py b/dace/sdfg/sdfg.py
index a85e773337..fdf8835c7e 100644
--- a/dace/sdfg/sdfg.py
+++ b/dace/sdfg/sdfg.py
@@ -30,7 +30,7 @@
 from dace.frontend.python import astutils, wrappers
 from dace.sdfg import nodes as nd
 from dace.sdfg.graph import OrderedDiGraph, Edge, SubgraphView
-from dace.sdfg.state import SDFGState
+from dace.sdfg.state import SDFGState, ControlFlowRegion
 from dace.sdfg.propagation import propagate_memlets_sdfg
 from dace.distr_types import ProcessGrid, SubArray, RedistrArray
 from dace.dtypes import validate_name
@@ -402,7 +402,7 @@ def label(self):
 
 
 @make_properties
-class SDFG(OrderedDiGraph[SDFGState, InterstateEdge]):
+class SDFG(ControlFlowRegion):
     """ The main intermediate representation of code in DaCe.
 
         A Stateful DataFlow multiGraph (SDFG) is a directed graph of directed
@@ -499,8 +499,6 @@ def __init__(self,
         self._parent_sdfg = None
         self._parent_nsdfg_node = None
         self._sdfg_list = [self]
-        self._start_state: Optional[int] = None
-        self._cached_start_state: Optional[SDFGState] = None
         self._arrays = NestedDict()  # type: Dict[str, dt.Array]
         self._labels: Set[str] = set()
         self.global_code = {'frame': CodeBlock("", dtypes.Language.CPP)}
@@ -531,14 +529,14 @@ def __deepcopy__(self, memo):
         memo[id(self)] = result
         for k, v in self.__dict__.items():
             # Skip derivative attributes
-            if k in ('_cached_start_state', '_edges', '_nodes', '_parent', '_parent_sdfg', '_parent_nsdfg_node',
+            if k in ('_cached_start_block', '_edges', '_nodes', '_parent', '_parent_sdfg', '_parent_nsdfg_node',
                      '_sdfg_list', '_transformation_hist'):
                 continue
             setattr(result, k, copy.deepcopy(v, memo))
         # Copy edges and nodes
         result._edges = copy.deepcopy(self._edges, memo)
         result._nodes = copy.deepcopy(self._nodes, memo)
-        result._cached_start_state = copy.deepcopy(self._cached_start_state, memo)
+        result._cached_start_block = copy.deepcopy(self._cached_start_block, memo)
         # Copy parent attributes
         for k in ('_parent', '_parent_sdfg', '_parent_nsdfg_node'):
             if id(getattr(self, k)) in memo:
@@ -583,7 +581,7 @@ def to_json(self, hash=False):
         tmp['attributes']['constants_prop'] = json.loads(dace.serialize.dumps(tmp['attributes']['constants_prop']))
 
         tmp['sdfg_list_id'] = int(self.sdfg_id)
-        tmp['start_state'] = self._start_state
+        tmp['start_state'] = self._start_block
 
         tmp['attributes']['name'] = self.name
         if hash:
@@ -627,7 +625,7 @@ def from_json(cls, json_obj, context_info=None):
             ret.add_edge(nodelist[int(e.src)], nodelist[int(e.dst)], e.data)
 
         if 'start_state' in json_obj:
-            ret._start_state = json_obj['start_state']
+            ret._start_block = json_obj['start_state']
 
         return ret
 
@@ -753,14 +751,7 @@ def replace_dict(self,
         for array in self.arrays.values():
             replace_properties_dict(array, repldict, symrepl)
 
-        if replace_in_graph:
-            # Replace in inter-state edges
-            for edge in self.edges():
-                edge.data.replace_dict(repldict, replace_keys=replace_keys)
-
-            # Replace in states
-            for state in self.nodes():
-                state.replace_dict(repldict, symrepl)
+        super().replace_dict(repldict, symrepl, replace_in_graph, replace_keys)
 
     def add_symbol(self, name, stype):
         """ Adds a symbol to the SDFG.
@@ -787,34 +778,11 @@ def remove_symbol(self, name):
 
     @property
     def start_state(self):
-        """ Returns the starting state of this SDFG. """
-        if self._cached_start_state is not None:
-            return self._cached_start_state
-
-        source_nodes = self.source_nodes()
-        if len(source_nodes) == 1:
-            self._cached_start_state = source_nodes[0]
-            return source_nodes[0]
-        # If starting state is ambiguous (i.e., loop to initial state or more
-        # than one possible start state), allow manually overriding start state
-        if self._start_state is not None:
-            self._cached_start_state = self.node(self._start_state)
-            return self._cached_start_state
-        raise ValueError('Ambiguous or undefined starting state for SDFG, '
-                         'please use "is_start_state=True" when adding the '
-                         'starting state with "add_state"')
+        return self.start_block
 
     @start_state.setter
     def start_state(self, state_id):
-        """ Manually sets the starting state of this SDFG.
-
-            :param state_id: The node ID (use `node_id(state)`) of the
-                             state to set.
-        """
-        if state_id < 0 or state_id >= self.number_of_nodes():
-            raise ValueError("Invalid state ID")
-        self._start_state = state_id
-        self._cached_start_state = self.node(state_id)
+        self.start_block = state_id
 
     def set_global_code(self, cpp_code: str, location: str = 'frame'):
         """
@@ -1127,7 +1095,7 @@ def remove_data(self, name, validate=True):
 
         # Verify that there are no access nodes that use this data
         if validate:
-            for state in self.nodes():
+            for state in self.states():
                 for node in state.nodes():
                     if isinstance(node, nd.AccessNode) and node.data == name:
                         raise ValueError(f"Cannot remove data descriptor "
@@ -1243,75 +1211,14 @@ def parent_sdfg(self, value):
     def parent_nsdfg_node(self, value):
         self._parent_nsdfg_node = value
 
-    def add_node(self, node, is_start_state=False):
-        """ Adds a new node to the SDFG. Must be an SDFGState or a subclass
-            thereof.
-
-            :param node: The node to add.
-            :param is_start_state: If True, sets this node as the starting
-                                   state.
-        """
-        if not isinstance(node, SDFGState):
-            raise TypeError("Expected SDFGState, got " + str(type(node)))
-        super(SDFG, self).add_node(node)
-        self._cached_start_state = None
-        if is_start_state is True:
-            self.start_state = len(self.nodes()) - 1
-            self._cached_start_state = node
-
     def remove_node(self, node: SDFGState):
-        if node is self._cached_start_state:
-            self._cached_start_state = None
+        if node is self._cached_start_block:
+            self._cached_start_block = None
         return super().remove_node(node)
 
-    def add_edge(self, u, v, edge):
-        """ Adds a new edge to the SDFG. Must be an InterstateEdge or a
-            subclass thereof.
-
-            :param u: Source node.
-            :param v: Destination node.
-            :param edge: The edge to add.
-        """
-        if not isinstance(u, SDFGState):
-            raise TypeError("Expected SDFGState, got: {}".format(type(u).__name__))
-        if not isinstance(v, SDFGState):
-            raise TypeError("Expected SDFGState, got: {}".format(type(v).__name__))
-        if not isinstance(edge, InterstateEdge):
-            raise TypeError("Expected InterstateEdge, got: {}".format(type(edge).__name__))
-        if v is self._cached_start_state:
-            self._cached_start_state = None
-        return super(SDFG, self).add_edge(u, v, edge)
-
     def states(self):
-        """ Alias that returns the nodes (states) in this SDFG. """
-        return self.nodes()
-
-    def all_nodes_recursive(self) -> Iterator[Tuple[nd.Node, Union['SDFG', 'SDFGState']]]:
-        """ Iterate over all nodes in this SDFG, including states, nodes in
-            states, and recursive states and nodes within nested SDFGs,
-            returning tuples on the form (node, parent), where the parent is
-            either the SDFG (for states) or a DFG (nodes). """
-        for node in self.nodes():
-            yield node, self
-            yield from node.all_nodes_recursive()
-
-    def all_sdfgs_recursive(self):
-        """ Iterate over this and all nested SDFGs. """
-        yield self
-        for state in self.nodes():
-            for node in state.nodes():
-                if isinstance(node, nd.NestedSDFG):
-                    yield from node.sdfg.all_sdfgs_recursive()
-
-    def all_edges_recursive(self):
-        """ Iterate over all edges in this SDFG, including state edges,
-            inter-state edges, and recursively edges within nested SDFGs,
-            returning tuples on the form (edge, parent), where the parent is
-            either the SDFG (for states) or a DFG (nodes). """
-        for e in self.edges():
-            yield e, self
-        for node in self.nodes():
-            yield from node.all_edges_recursive()
+        """ Returns the states in this SDFG, recursing into state scope blocks. """
+        return list(self.all_states())
 
     def arrays_recursive(self):
         """ Iterate over all arrays in this SDFG, including arrays within
@@ -1323,19 +1230,15 @@ def arrays_recursive(self):
                 if isinstance(node, nd.NestedSDFG):
                     yield from node.sdfg.arrays_recursive()
 
-    def used_symbols(self, all_symbols: bool, keep_defined_in_mapping: bool=False) -> Set[str]:
-        """
-        Returns a set of symbol names that are used by the SDFG, but not
-        defined within it. This property is used to determine the symbolic
-        parameters of the SDFG.
-
-        :param all_symbols: If False, only returns the set of symbols that will be used
-                            in the generated code and are needed as arguments.
-        :param keep_defined_in_mapping: If True, symbols defined in inter-state edges that are in the symbol mapping
-                                        will be removed from the set of defined symbols.
-        """
-        defined_syms = set()
-        free_syms = set()
+    def _used_symbols_internal(self,
+                               all_symbols: bool,
+                               defined_syms: Optional[Set]=None,
+                               free_syms: Optional[Set]=None,
+                               used_before_assignment: Optional[Set]=None,
+                               keep_defined_in_mapping: bool=False) -> Tuple[Set[str], Set[str], Set[str]]:
+        defined_syms = set() if defined_syms is None else defined_syms
+        free_syms = set() if free_syms is None else free_syms
+        used_before_assignment = set() if used_before_assignment is None else used_before_assignment
 
         # Exclude data descriptor names and constants
         for name in self.arrays.keys():
@@ -1349,54 +1252,10 @@ def used_symbols(self, all_symbols: bool, keep_defined_in_mapping: bool=False) -
         for code in self.exit_code.values():
             free_syms |= symbolic.symbols_in_code(code.as_string, self.symbols.keys())
 
-        # Add free state symbols
-        used_before_assignment = set()
-
-        try:
-            ordered_states = self.topological_sort(self.start_state)
-        except ValueError:  # Failsafe (e.g., for invalid or empty SDFGs)
-            ordered_states = self.nodes()
-
-        for state in ordered_states:
-            state_fsyms = state.used_symbols(all_symbols)
-            free_syms |= state_fsyms
-
-            # Add free inter-state symbols
-            for e in self.out_edges(state):
-                # NOTE: First we get the true InterstateEdge free symbols, then we compute the newly defined symbols by
-                # subracting the (true) free symbols from the edge's assignment keys. This way we can correctly
-                # compute the symbols that are used before being assigned.
-                efsyms = e.data.used_symbols(all_symbols)
-                defined_syms |= set(e.data.assignments.keys()) - (efsyms | state_fsyms)
-                used_before_assignment.update(efsyms - defined_syms)
-                free_syms |= efsyms
-
-        # Remove symbols that were used before they were assigned
-        defined_syms -= used_before_assignment
-
-        # Remove from defined symbols those that are in the symbol mapping
-        if self.parent_nsdfg_node is not None and keep_defined_in_mapping:
-            defined_syms -= set(self.parent_nsdfg_node.symbol_mapping.keys())
-
-        # Add the set of SDFG symbol parameters
-        # If all_symbols is False, those symbols would only be added in the case of non-Python tasklets
-        if all_symbols:
-            free_syms |= set(self.symbols.keys())
-
-        # Subtract symbols defined in inter-state edges and constants
-        return free_syms - defined_syms
-
-    @property
-    def free_symbols(self) -> Set[str]:
-        """
-        Returns a set of symbol names that are used by the SDFG, but not
-        defined within it. This property is used to determine the symbolic
-        parameters of the SDFG and verify that ``SDFG.symbols`` is complete.
-
-        :note: Assumes that the graph is valid (i.e., without undefined or
-               overlapping symbols).
-        """
-        return self.used_symbols(all_symbols=True)
+        return super()._used_symbols_internal(
+            all_symbols=all_symbols, keep_defined_in_mapping=keep_defined_in_mapping,
+            defined_syms=defined_syms, free_syms=free_syms, used_before_assignment=used_before_assignment
+        )
 
     def get_all_toplevel_symbols(self) -> Set[str]:
         """
@@ -1608,16 +1467,16 @@ def shared_transients(self, check_toplevel=True) -> List[str]:
         shared = []
 
         # If a transient is present in an inter-state edge, it is shared
-        for interstate_edge in self.edges():
+        for interstate_edge in self.all_interstate_edges():
             for sym in interstate_edge.data.free_symbols:
                 if sym in self.arrays and self.arrays[sym].transient:
                     seen[sym] = interstate_edge
                     shared.append(sym)
 
         # If transient is accessed in more than one state, it is shared
-        for state in self.nodes():
-            for node in state.nodes():
-                if isinstance(node, nd.AccessNode) and node.desc(self).transient:
+        for state in self.states():
+            for node in state.data_nodes():
+                if node.desc(self).transient:
                     if (check_toplevel and node.desc(self).toplevel) or (node.data in seen
                                                                          and seen[node.data] != state):
                         shared.append(node.data)
@@ -1706,62 +1565,6 @@ def from_file(filename: str) -> 'SDFG':
 
     # Dynamic SDFG creation API
     ##############################
-    def add_state(self, label=None, is_start_state=False) -> 'SDFGState':
-        """ Adds a new SDFG state to this graph and returns it.
-
-            :param label: State label.
-            :param is_start_state: If True, resets SDFG starting state to this
-                                   state.
-            :return: A new SDFGState object.
-        """
-        if self._labels is None or len(self._labels) != self.number_of_nodes():
-            self._labels = set(s.label for s in self.nodes())
-        label = label or 'state'
-        existing_labels = self._labels
-        label = dt.find_new_name(label, existing_labels)
-        state = SDFGState(label, self)
-        self._labels.add(label)
-
-        self.add_node(state, is_start_state=is_start_state)
-        return state
-
-    def add_state_before(self, state: 'SDFGState', label=None, is_start_state=False) -> 'SDFGState':
-        """ Adds a new SDFG state before an existing state, reconnecting
-            predecessors to it instead.
-
-            :param state: The state to prepend the new state before.
-            :param label: State label.
-            :param is_start_state: If True, resets SDFG starting state to this
-                                   state.
-            :return: A new SDFGState object.
-        """
-        new_state = self.add_state(label, is_start_state)
-        # Reconnect
-        for e in self.in_edges(state):
-            self.remove_edge(e)
-            self.add_edge(e.src, new_state, e.data)
-        # Add unconditional connection between the new state and the current
-        self.add_edge(new_state, state, InterstateEdge())
-        return new_state
-
-    def add_state_after(self, state: 'SDFGState', label=None, is_start_state=False) -> 'SDFGState':
-        """ Adds a new SDFG state after an existing state, reconnecting
-            it to the successors instead.
-
-            :param state: The state to append the new state after.
-            :param label: State label.
-            :param is_start_state: If True, resets SDFG starting state to this
-                                   state.
-            :return: A new SDFGState object.
-        """
-        new_state = self.add_state(label, is_start_state)
-        # Reconnect
-        for e in self.out_edges(state):
-            self.remove_edge(e)
-            self.add_edge(new_state, e.dst, e.data)
-        # Add unconditional connection between the current and the new state
-        self.add_edge(state, new_state, InterstateEdge())
-        return new_state
 
     def _find_new_name(self, name: str):
         """ Tries to find a new name by adding an underscore and a number. """
@@ -2482,7 +2285,7 @@ def __call__(self, *args, **kwargs):
     def fill_scope_connectors(self):
         """ Fills missing scope connectors (i.e., "IN_#"/"OUT_#" on entry/exit
             nodes) according to data on the memlets. """
-        for state in self.nodes():
+        for state in self.states():
             state.fill_scope_connectors()
 
     def predecessor_state_transitions(self, state):
diff --git a/dace/sdfg/state.py b/dace/sdfg/state.py
index 1ff8fe4cf1..097365fbc3 100644
--- a/dace/sdfg/state.py
+++ b/dace/sdfg/state.py
@@ -2,6 +2,7 @@
 """ Contains classes of a single SDFG state and dataflow subgraphs. """
 
 import ast
+import abc
 import collections
 import copy
 import inspect
@@ -19,7 +20,7 @@
 from dace.properties import (CodeBlock, DictProperty, EnumProperty, Property, SubsetProperty, SymbolicProperty,
                              CodeProperty, make_properties)
 from dace.sdfg import nodes as nd
-from dace.sdfg.graph import MultiConnectorEdge, OrderedMultiDiConnectorGraph, SubgraphView
+from dace.sdfg.graph import MultiConnectorEdge, OrderedMultiDiConnectorGraph, SubgraphView, OrderedDiGraph, Edge
 from dace.sdfg.propagation import propagate_memlet
 from dace.sdfg.validation import validate_state
 from dace.subsets import Range, Subset
@@ -28,6 +29,11 @@
     import dace.sdfg.scope
 
 
+NodeT = Union[nd.Node, 'ControlFlowBlock']
+EdgeT = Union[MultiConnectorEdge[mm.Memlet], Edge['dace.sdfg.InterstateEdge']]
+GraphT = Union['ControlFlowRegion', 'SDFGState']
+
+
 def _getdebuginfo(old_dinfo=None) -> dtypes.DebugInfo:
     """ Returns a DebugInfo object for the position that called this function.
 
@@ -66,13 +72,248 @@ def _make_iterators(ndrange):
     return params, map_range
 
 
-class StateGraphView(object):
+class BlockGraphView(object):
     """
-    Read-only view interface of an SDFG state, containing methods for memlet
-    tracking, traversal, subgraph creation, queries, and replacements.
-    ``SDFGState`` and ``StateSubgraphView`` inherit from this class to share
+    Read-only view interface of an SDFG control flow block, containing methods for memlet tracking, traversal, subgraph
+    creation, queries, and replacements. ``ControlFlowBlock`` and ``StateSubgraphView`` inherit from this class to share
     methods.
     """
+    
+
+    ###################################################################
+    # Typing overrides
+
+    @overload
+    def nodes(self) -> List[NodeT]:
+        ...
+
+    @overload
+    def edges(self) -> List[EdgeT]:
+        ...
+
+    @overload
+    def in_degree(self, node: NodeT) -> int:
+        ...
+
+    @overload
+    def out_degree(self, node: NodeT) -> int:
+        ...
+
+    ###################################################################
+    # Traversal methods
+
+    @abc.abstractmethod
+    def all_nodes_recursive(self) -> Iterator[Tuple[NodeT, GraphT]]:
+        """
+        Iterate over all nodes in this graph or subgraph.
+        This includes control flow blocks, nodes in those blocks, and recursive control flow blocks and nodes within
+        nested SDFGs. It returns tuples of the form (node, parent), where the node is either a dataflow node, in which
+        case the parent is an SDFG state, or a control flow block, in which case the parent is a control flow graph
+        (i.e., an SDFG or a scope block).
+        """
+        raise NotImplementedError()
+
+    @abc.abstractmethod
+    def all_edges_recursive(self) -> Iterator[Tuple[EdgeT, GraphT]]:
+        """
+        Iterate over all edges in this graph or subgraph.
+        This includes dataflow edges, inter-state edges, and recursive edges within nested SDFGs. It returns tuples of
+        the form (edge, parent), where the edge is either a dataflow edge, in which case the parent is an SDFG state, or
+        an inter-stte edge, in which case the parent is a control flow graph (i.e., an SDFG or a scope block).
+        """
+        raise NotImplementedError()
+
+    @abc.abstractmethod
+    def data_nodes(self) -> List[nd.AccessNode]:
+        """
+        Returns all data nodes (i.e., AccessNodes, arrays) present in this graph or subgraph.
+        Note: This does not recurse into nested SDFGs.
+        """
+        raise NotImplementedError()
+
+    @abc.abstractmethod
+    def entry_node(self, node: nd.Node) -> nd.EntryNode:
+        """ Returns the entry node that wraps the current node, or None if it is top-level in a state. """
+        raise NotImplementedError()
+
+    @abc.abstractmethod
+    def exit_node(self, entry_node: nd.EntryNode) -> nd.ExitNode:
+        """ Returns the exit node leaving the context opened by the given entry node. """
+        raise NotImplementedError()
+
+    ###################################################################
+    # Memlet-tracking methods
+
+    @abc.abstractmethod
+    def memlet_path(self, edge: MultiConnectorEdge[mm.Memlet]) -> List[MultiConnectorEdge[mm.Memlet]]:
+        """
+        Given one edge, returns a list of edges representing a path between its source and sink nodes.
+        Used for memlet tracking.
+
+        :note: Behavior is undefined when there is more than one path involving this edge.
+        :param edge: An edge within a state (memlet).
+        :return: A list of edges from a source node to a destination node.
+        """
+        raise NotImplementedError()
+
+    @abc.abstractmethod
+    def memlet_tree(self, edge: MultiConnectorEdge) -> mm.MemletTree:
+        """
+        Given one edge, returns a tree of edges between its node source(s) and sink(s).
+        Used for memlet tracking.
+
+        :param edge: An edge within a state (memlet).
+        :return: A tree of edges whose root is the source/sink node (depending on direction) and associated children
+                 edges.
+        """
+        raise NotImplementedError()
+
+    @abc.abstractmethod
+    def in_edges_by_connector(self, node: nd.Node, connector: AnyStr) -> Iterable[MultiConnectorEdge[mm.Memlet]]:
+        """
+        Returns a generator over edges entering the given connector of the given node.
+
+        :param node: Destination node of edges.
+        :param connector: Destination connector of edges.
+        """
+        raise NotImplementedError()
+
+    @abc.abstractmethod
+    def out_edges_by_connector(self, node: nd.Node, connector: AnyStr) -> Iterable[MultiConnectorEdge[mm.Memlet]]:
+        """
+        Returns a generator over edges exiting the given connector of the given node.
+
+        :param node: Source node of edges.
+        :param connector: Source connector of edges.
+        """
+        raise NotImplementedError()
+
+    @abc.abstractmethod
+    def edges_by_connector(self, node: nd.Node, connector: AnyStr) -> Iterable[MultiConnectorEdge[mm.Memlet]]:
+        """
+        Returns a generator over edges entering or exiting the given connector of the given node.
+
+        :param node: Source/destination node of edges.
+        :param connector: Source/destination connector of edges.
+        """
+        raise NotImplementedError()
+
+    ###################################################################
+    # Query, subgraph, and replacement methods
+
+    @abc.abstractmethod
+    def used_symbols(self, all_symbols: bool, keep_defined_in_mapping: bool=False) -> Set[str]:
+        """
+        Returns a set of symbol names that are used in the graph.
+
+        :param all_symbols: If False, only returns symbols that are needed as arguments (only used in generated code).
+        :param keep_defined_in_mapping: If True, symbols defined in inter-state edges that are in the symbol mapping
+                                        will be removed from the set of defined symbols.
+        """
+        raise NotImplementedError()
+    
+    @property
+    def free_symbols(self) -> Set[str]:
+        """
+        Returns a set of symbol names that are used, but not defined, in this graph view.
+        In the case of an SDFG, this property is used to determine the symbolic parameters of the SDFG and
+        verify that ``SDFG.symbols`` is complete.
+
+        :note: Assumes that the graph is valid (i.e., without undefined or overlapping symbols).
+        """
+        return self.used_symbols(all_symbols=True)
+
+    @abc.abstractmethod
+    def read_and_write_sets(self) -> Tuple[Set[AnyStr], Set[AnyStr]]:
+        """
+        Determines what data is read and written in this graph.
+        Does not include reads to subsets of containers that have previously been written within the same state.
+        
+        :return: A two-tuple of sets of things denoting ({data read}, {data written}).
+        """
+        raise NotImplementedError()
+
+    @abc.abstractmethod
+    def unordered_arglist(self,
+                          defined_syms=None,
+                          shared_transients=None) -> Tuple[Dict[str, dt.Data], Dict[str, dt.Data]]:
+        raise NotImplementedError()
+
+    def arglist(self, defined_syms=None, shared_transients=None) -> Dict[str, dt.Data]:
+        """
+        Returns an ordered dictionary of arguments (names and types) required to invoke this subgraph.
+
+        The arguments differ from SDFG.arglist, but follow the same order,
+        namely: <sorted data arguments>, <sorted scalar arguments>.
+
+        Data arguments contain:
+            * All used non-transient data containers in the subgraph
+            * All used transient data containers that were allocated outside.
+              This includes data from memlets, transients shared across multiple states, and transients that could not
+              be allocated within the subgraph (due to their ``AllocationLifetime`` or according to the
+              ``dtypes.can_allocate`` function).
+
+        Scalar arguments contain:
+            * Free symbols in this state/subgraph.
+            * All transient and non-transient scalar data containers used in this subgraph.
+
+        This structure will create a sorted list of pointers followed by a sorted list of PoDs and structs.
+
+        :return: An ordered dictionary of (name, data descriptor type) of all the arguments, sorted as defined here.
+        """
+        data_args, scalar_args = self.unordered_arglist(defined_syms, shared_transients)
+
+        # Fill up ordered dictionary
+        result = collections.OrderedDict()
+        for k, v in itertools.chain(sorted(data_args.items()), sorted(scalar_args.items())):
+            result[k] = v
+
+        return result
+
+    def signature_arglist(self, with_types=True, for_call=False):
+        """ Returns a list of arguments necessary to call this state or subgraph, formatted as a list of C definitions.
+
+            :param with_types: If True, includes argument types in the result.
+            :param for_call: If True, returns arguments that can be used when calling the SDFG.
+            :return: A list of strings. For example: `['float *A', 'int b']`.
+        """
+        return [v.as_arg(name=k, with_types=with_types, for_call=for_call) for k, v in self.arglist().items()]
+
+    @abc.abstractmethod
+    def top_level_transients(self) -> Set[str]:
+        """Iterate over top-level transients of this graph."""
+        raise NotImplementedError()
+
+    @abc.abstractmethod
+    def all_transients(self) -> List[str]:
+        """Iterate over all transients in this graph."""
+        raise NotImplementedError()
+
+    @abc.abstractmethod
+    def replace(self, name: str, new_name: str):
+        """
+        Finds and replaces all occurrences of a symbol or array in this graph.
+
+        :param name: Name to find.
+        :param new_name: Name to replace.
+        """
+        raise NotImplementedError()
+
+    @abc.abstractmethod
+    def replace_dict(self,
+                     repl: Dict[str, str],
+                     symrepl: Optional[Dict[symbolic.SymbolicType, symbolic.SymbolicType]] = None):
+        """
+        Finds and replaces all occurrences of a set of symbols or arrays in this graph.
+
+        :param repl: Mapping from names to replacements.
+        :param symrepl: Optional symbolic version of ``repl``.
+        """
+        raise NotImplementedError()
+
+
+@make_properties
+class DataflowGraphView(BlockGraphView, abc.ABC):
 
     def __init__(self, *args, **kwargs):
         self._clear_scopedict_cache()
@@ -91,29 +332,29 @@ def edges(self) -> List[MultiConnectorEdge[mm.Memlet]]:
     ###################################################################
     # Traversal methods
 
-    def all_nodes_recursive(self):
+    def all_nodes_recursive(self) -> Iterator[Tuple[NodeT, GraphT]]:
         for node in self.nodes():
             yield node, self
             if isinstance(node, nd.NestedSDFG):
                 yield from node.sdfg.all_nodes_recursive()
 
-    def all_edges_recursive(self):
+    def all_edges_recursive(self) -> Iterator[Tuple[EdgeT, GraphT]]:
         for e in self.edges():
             yield e, self
         for node in self.nodes():
             if isinstance(node, nd.NestedSDFG):
                 yield from node.sdfg.all_edges_recursive()
 
-    def data_nodes(self):
+    def data_nodes(self) -> List[nd.AccessNode]:
         """ Returns all data_nodes (arrays) present in this state. """
         return [n for n in self.nodes() if isinstance(n, nd.AccessNode)]
 
-    def entry_node(self, node: nd.Node) -> nd.EntryNode:
+    def entry_node(self, node: nd.Node) -> Optional[nd.EntryNode]:
         """ Returns the entry node that wraps the current node, or None if
             it is top-level in a state. """
         return self.scope_dict()[node]
 
-    def exit_node(self, entry_node: nd.EntryNode) -> nd.ExitNode:
+    def exit_node(self, entry_node: nd.EntryNode) -> Optional[nd.ExitNode]:
         """ Returns the exit node leaving the context opened by
             the given entry node. """
         node_to_children = self.scope_children()
@@ -152,7 +393,7 @@ def memlet_path(self, edge: MultiConnectorEdge[mm.Memlet]) -> List[MultiConnecto
                 result.insert(0, next_edge)
                 curedge = next_edge
 
-        # Prepend outgoing edges until reaching the sink node
+        # Append outgoing edges until reaching the sink node
         curedge = edge
         while not isinstance(curedge.dst, (nd.CodeNode, nd.AccessNode)):
             # Trace through scope entry using IN_# -> OUT_#
@@ -168,13 +409,6 @@ def memlet_path(self, edge: MultiConnectorEdge[mm.Memlet]) -> List[MultiConnecto
         return result
 
     def memlet_tree(self, edge: MultiConnectorEdge) -> mm.MemletTree:
-        """ Given one edge, returns a tree of edges between its node source(s)
-            and sink(s). Used for memlet tracking.
-
-            :param edge: An edge within this state.
-            :return: A tree of edges whose root is the source/sink node
-                     (depending on direction) and associated children edges.
-            """
         propagate_forward = False
         propagate_backward = False
         if ((isinstance(edge.src, nd.EntryNode) and edge.src_conn is not None) or
@@ -246,30 +480,12 @@ def traverse(node):
         return traverse(tree_root)
 
     def in_edges_by_connector(self, node: nd.Node, connector: AnyStr) -> Iterable[MultiConnectorEdge[mm.Memlet]]:
-        """ Returns a generator over edges entering the given connector of the
-            given node.
-
-            :param node: Destination node of edges.
-            :param connector: Destination connector of edges.
-        """
         return (e for e in self.in_edges(node) if e.dst_conn == connector)
 
     def out_edges_by_connector(self, node: nd.Node, connector: AnyStr) -> Iterable[MultiConnectorEdge[mm.Memlet]]:
-        """ Returns a generator over edges exiting the given connector of the
-            given node.
-
-            :param node: Source node of edges.
-            :param connector: Source connector of edges.
-        """
         return (e for e in self.out_edges(node) if e.src_conn == connector)
 
     def edges_by_connector(self, node: nd.Node, connector: AnyStr) -> Iterable[MultiConnectorEdge[mm.Memlet]]:
-        """ Returns a generator over edges entering or exiting the given
-            connector of the given node.
-
-            :param node: Source/destination node of edges.
-            :param connector: Source/destination connector of edges.
-        """
         return itertools.chain(self.in_edges_by_connector(node, connector),
                                self.out_edges_by_connector(node, connector))
 
@@ -297,8 +513,6 @@ def scope_tree(self) -> 'dace.sdfg.scope.ScopeTree':
 
         result = {}
 
-        sdfg_symbols = self.parent.symbols.keys()
-
         # Get scopes
         for node, scopenodes in sdc.items():
             if node is None:
@@ -325,15 +539,7 @@ def scope_leaves(self) -> List['dace.sdfg.scope.ScopeTree']:
         self._scope_leaves_cached = [scope for scope in st.values() if len(scope.children) == 0]
         return copy.copy(self._scope_leaves_cached)
 
-    def scope_dict(self, return_ids: bool = False, validate: bool = True) -> Dict[nd.Node, Optional[nd.Node]]:
-        """ Returns a dictionary that maps each SDFG node to its parent entry
-            node, or to None if the node is not in any scope.
-
-            :param return_ids: Return node ID numbers instead of node objects.
-            :param validate: Ensure that the graph is not malformed when
-                             computing dictionary.
-            :return: The mapping from a node to its parent scope entry node.
-        """
+    def scope_dict(self, return_ids: bool = False, validate: bool = True) -> Dict[nd.Node, Union['SDFGState', nd.Node]]:
         from dace.sdfg.scope import _scope_dict_inner, _scope_dict_to_ids
         result = None
         result = copy.copy(self._scope_dict_toparent_cached)
@@ -367,16 +573,7 @@ def scope_dict(self, return_ids: bool = False, validate: bool = True) -> Dict[nd
 
     def scope_children(self,
                        return_ids: bool = False,
-                       validate: bool = True) -> Dict[Optional[nd.EntryNode], List[nd.Node]]:
-        """ Returns a dictionary that maps each SDFG entry node to its children,
-            not including the children of children entry nodes. The key `None`
-            contains a list of top-level nodes (i.e., not in any scope).
-
-            :param return_ids: Return node ID numbers instead of node objects.
-            :param validate: Ensure that the graph is not malformed when
-                             computing dictionary.
-            :return: The mapping from a node to a list of children nodes.
-        """
+                       validate: bool = True) -> Dict[Union[nd.Node, 'SDFGState'], List[nd.Node]]:
         from dace.sdfg.scope import _scope_dict_inner, _scope_dict_to_ids
         result = None
         if self._scope_dict_tochildren_cached is not None:
@@ -419,13 +616,7 @@ def is_leaf_memlet(self, e):
             return False
         return True
 
-    def used_symbols(self, all_symbols: bool) -> Set[str]:
-        """
-        Returns a set of symbol names that are used in the state.
-
-        :param all_symbols: If False, only returns the set of symbols that will be used
-                            in the generated code and are needed as arguments.
-        """
+    def used_symbols(self, all_symbols: bool, keep_defined_in_mapping: bool=False) -> Set[str]:
         state = self.graph if isinstance(self, SubgraphView) else self
         sdfg = state.parent
         new_symbols = set()
@@ -579,33 +770,9 @@ def read_and_write_sets(self) -> Tuple[Set[AnyStr], Set[AnyStr]]:
         read_set, write_set = self._read_and_write_sets()
         return set(read_set.keys()), set(write_set.keys())
 
-    def arglist(self, defined_syms=None, shared_transients=None) -> Dict[str, dt.Data]:
-        """
-        Returns an ordered dictionary of arguments (names and types) required
-        to invoke this SDFG state or subgraph thereof.
-
-        The arguments differ from SDFG.arglist, but follow the same order,
-        namely: <sorted data arguments>, <sorted scalar arguments>.
-
-        Data arguments contain:
-            * All used non-transient data containers in the subgraph
-            * All used transient data containers that were allocated outside.
-              This includes data from memlets, transients shared across multiple
-              states, and transients that could not be allocated within the
-              subgraph (due to their ``AllocationLifetime`` or according to the
-              ``dtypes.can_allocate`` function).
-
-        Scalar arguments contain:
-            * Free symbols in this state/subgraph.
-            * All transient and non-transient scalar data containers used in
-              this subgraph.
-
-        This structure will create a sorted list of pointers followed by a
-        sorted list of PoDs and structs.
-
-        :return: An ordered dictionary of (name, data descriptor type) of all
-                 the arguments, sorted as defined here.
-        """
+    def unordered_arglist(self,
+                          defined_syms=None,
+                          shared_transients=None) -> Tuple[Dict[str, dt.Data], Dict[str, dt.Data]]:
         sdfg: 'dace.sdfg.SDFG' = self.parent
         shared_transients = shared_transients or sdfg.shared_transients()
         sdict = self.scope_dict()
@@ -699,12 +866,7 @@ def arglist(self, defined_syms=None, shared_transients=None) -> Dict[str, dt.Dat
                 if not str(k).startswith('__dace') and str(k) not in sdfg.constants
             })
 
-        # Fill up ordered dictionary
-        result = collections.OrderedDict()
-        for k, v in itertools.chain(sorted(data_args.items()), sorted(scalar_args.items())):
-            result[k] = v
-
-        return result
+        return data_args, scalar_args
 
     def signature_arglist(self, with_types=True, for_call=False):
         """ Returns a list of arguments necessary to call this state or
@@ -749,22 +911,212 @@ def replace(self, name: str, new_name: str):
     def replace_dict(self,
                      repl: Dict[str, str],
                      symrepl: Optional[Dict[symbolic.SymbolicType, symbolic.SymbolicType]] = None):
-        """ Finds and replaces all occurrences of a set of symbols or arrays in this state.
-
-            :param repl: Mapping from names to replacements.
-            :param symrepl: Optional symbolic version of ``repl``.
-        """
         from dace.sdfg.replace import replace_dict
         replace_dict(self, repl, symrepl)
 
 
 @make_properties
-class SDFGState(OrderedMultiDiConnectorGraph[nd.Node, mm.Memlet], StateGraphView):
+class ControlGraphView(BlockGraphView, abc.ABC):
+
+    ###################################################################
+    # Typing overrides
+
+    @overload
+    def nodes(self) -> List['ControlFlowBlock']:
+        ...
+
+    @overload
+    def edges(self) -> List[Edge['dace.sdfg.InterstateEdge']]:
+        ...
+
+    ###################################################################
+    # Traversal methods
+
+    def all_nodes_recursive(self) -> Iterator[Tuple[NodeT, GraphT]]:
+        for node in self.nodes():
+            yield node, self
+            yield from node.all_nodes_recursive()
+
+    def all_edges_recursive(self) -> Iterator[Tuple[EdgeT, GraphT]]:
+        for e in self.edges():
+            yield e, self
+        for node in self.nodes():
+            yield from node.all_edges_recursive()
+
+    def data_nodes(self) -> List[nd.AccessNode]:
+        data_nodes = []
+        for node in self.nodes():
+            data_nodes.extend(node.data_nodes())
+        return data_nodes
+
+    def entry_node(self, node: nd.Node) -> Optional[nd.EntryNode]:
+        for block in self.nodes():
+            if node in block.nodes():
+                return block.exit_node(node)
+        return None
+
+    def exit_node(self, entry_node: nd.EntryNode) -> Optional[nd.ExitNode]:
+        for block in self.nodes():
+            if entry_node in block.nodes():
+                return block.exit_node(entry_node)
+        return None
+
+    ###################################################################
+    # Memlet-tracking methods
+
+    def memlet_path(self, edge: MultiConnectorEdge[mm.Memlet]) -> List[MultiConnectorEdge[mm.Memlet]]:
+        for block in self.nodes():
+            if edge in block.edges():
+                return block.memlet_path(edge)
+        return []
+
+    def memlet_tree(self, edge: MultiConnectorEdge) -> mm.MemletTree:
+        for block in self.nodes():
+            if edge in block.edges():
+                return block.memlet_tree(edge)
+        return mm.MemletTree(edge)
+
+    def in_edges_by_connector(self, node: nd.Node, connector: AnyStr) -> Iterable[MultiConnectorEdge[mm.Memlet]]:
+        for block in self.nodes():
+            if node in block.nodes():
+                return block.in_edges_by_connector(node, connector)
+        return []
+
+    def out_edges_by_connector(self, node: nd.Node, connector: AnyStr) -> Iterable[MultiConnectorEdge[mm.Memlet]]:
+        for block in self.nodes():
+            if node in block.nodes():
+                return block.out_edges_by_connector(node, connector)
+        return []
+
+    def edges_by_connector(self, node: nd.Node, connector: AnyStr) -> Iterable[MultiConnectorEdge[mm.Memlet]]:
+        for block in self.nodes():
+            if node in block.nodes():
+                return block.edges_by_connector(node, connector)
+
+    ###################################################################
+    # Query, subgraph, and replacement methods
+
+    @abc.abstractmethod
+    def _used_symbols_internal(self,
+                               all_symbols: bool,
+                               defined_syms: Optional[Set] = None,
+                               free_syms: Optional[Set] = None,
+                               used_before_assignment: Optional[Set] = None,
+                               keep_defined_in_mapping: bool = False) -> Tuple[Set[str], Set[str], Set[str]]:
+        raise NotImplementedError()
+
+    def used_symbols(self, all_symbols: bool, keep_defined_in_mapping: bool=False) -> Set[str]:
+        return self._used_symbols_internal(all_symbols, keep_defined_in_mapping=keep_defined_in_mapping)[0]
+
+    def read_and_write_sets(self) -> Tuple[Set[AnyStr], Set[AnyStr]]:
+        read_set = set()
+        write_set = set()
+        for block in self.nodes():
+            for edge in self.in_edges(block):
+                read_set |= edge.data.free_symbols & self.sdfg.arrays.keys()
+            rs, ws = block.read_and_write_sets()
+            read_set.update(rs)
+            write_set.update(ws)
+        return read_set, write_set
+
+    def unordered_arglist(self,
+                          defined_syms=None,
+                          shared_transients=None) -> Tuple[Dict[str, dt.Data], Dict[str, dt.Data]]:
+        data_args = {}
+        scalar_args = {}
+        for block in self.nodes():
+            n_data_args, n_scalar_args = block.unordered_arglist(defined_syms, shared_transients)
+            data_args.update(n_data_args)
+            scalar_args.update(n_scalar_args)
+        return data_args, scalar_args
+
+    def top_level_transients(self) -> Set[str]:
+        res = set()
+        for block in self.nodes():
+            res.update(block.top_level_transients())
+        return res
+
+    def all_transients(self) -> List[str]:
+        res = []
+        for block in self.nodes():
+            res.extend(block.all_transients())
+        return dtypes.deduplicate(res)
+
+    def replace(self, name: str, new_name: str):
+        for n in self.nodes():
+            n.replace(name, new_name)
+
+    def replace_dict(self,
+                     repl: Dict[str, str],
+                     symrepl: Optional[Dict[symbolic.SymbolicType, symbolic.SymbolicType]] = None,
+                     replace_in_graph: bool = True, replace_keys: bool = False):
+        symrepl = symrepl or {
+            symbolic.symbol(k): symbolic.pystr_to_symbolic(v) if isinstance(k, str) else v
+            for k, v in repl.items()
+        }
+
+        if replace_in_graph:
+            # Replace in inter-state edges
+            for edge in self.edges():
+                edge.data.replace_dict(repl, replace_keys=replace_keys)
+
+            # Replace in states
+            for state in self.nodes():
+                state.replace_dict(repl, symrepl)
+
+@make_properties
+class ControlFlowBlock(BlockGraphView, abc.ABC):
+
+    is_collapsed = Property(dtype=bool, desc='Show this block as collapsed', default=False)
+
+    _label: str
+
+    def __init__(self, label: str=''):
+        super(ControlFlowBlock, self).__init__()
+        self._label = label
+        self._default_lineinfo = None
+        self.is_collapsed = False
+
+    def set_default_lineinfo(self, lineinfo: dace.dtypes.DebugInfo):
+        """
+        Sets the default source line information to be lineinfo, or None to
+        revert to default mode.
+        """
+        self._default_lineinfo = lineinfo
+
+    def to_json(self, parent=None):
+        tmp = {
+            'type': self.__class__.__name__,
+            'collapsed': self.is_collapsed,
+            'label': self._label,
+            'id': parent.node_id(self) if parent is not None else None,
+        }
+        return tmp
+
+    def __str__(self):
+        return self._label
+
+    def __repr__(self) -> str:
+        return f'ControlFlowBlock ({self.label})'
+
+    @property
+    def label(self) -> str:
+        return self._label
+
+    @label.setter
+    def label(self, label: str):
+        self._label = label
+
+    @property
+    def name(self) -> str:
+        return self._label
+
+
+@make_properties
+class SDFGState(OrderedMultiDiConnectorGraph[nd.Node, mm.Memlet], ControlFlowBlock, DataflowGraphView):
     """ An acyclic dataflow multigraph in an SDFG, corresponding to a
         single state in the SDFG state machine. """
 
-    is_collapsed = Property(dtype=bool, desc="Show this node/scope/state as collapsed", default=False)
-
     nosync = Property(dtype=bool, default=False, desc="Do not synchronize at the end of the state")
 
     instrument = EnumProperty(dtype=dtypes.InstrumentationType,
@@ -803,13 +1155,14 @@ def __init__(self, label=None, sdfg=None, debuginfo=None, location=None):
             :param debuginfo: Source code locator for debugging.
         """
         from dace.sdfg.sdfg import SDFG  # Avoid import loop
+        OrderedMultiDiConnectorGraph.__init__(self)
+        ControlFlowBlock.__init__(self, label)
         super(SDFGState, self).__init__()
         self._label = label
         self._parent: SDFG = sdfg
         self._graph = self  # Allowing MemletTrackingView mixin to work
         self._clear_scopedict_cache()
         self._debuginfo = debuginfo
-        self.is_collapsed = False
         self.nosync = False
         self.location = location if location is not None else {}
         self._default_lineinfo = None
@@ -839,33 +1192,12 @@ def parent(self):
     def parent(self, value):
         self._parent = value
 
-    def __str__(self):
-        return self._label
-
-    @property
-    def label(self):
-        return self._label
-
-    @property
-    def name(self):
-        return self._label
-
-    def set_label(self, label):
-        self._label = label
-
     def is_empty(self):
         return self.number_of_nodes() == 0
 
     def validate(self) -> None:
         validate_state(self)
 
-    def set_default_lineinfo(self, lineinfo: dtypes.DebugInfo):
-        """
-        Sets the default source line information to be lineinfo, or None to
-        revert to default mode.
-        """
-        self._default_lineinfo = lineinfo
-
     def nodes(self) -> List[nd.Node]:  # Added for type hints
         return super().nodes()
 
@@ -1981,8 +2313,244 @@ def fill_scope_connectors(self):
                     node.add_in_connector(edge.dst_conn)
 
 
-class StateSubgraphView(SubgraphView, StateGraphView):
+class StateSubgraphView(SubgraphView, DataflowGraphView):
     """ A read-only subgraph view of an SDFG state. """
 
     def __init__(self, graph, subgraph_nodes):
         super().__init__(graph, subgraph_nodes)
+
+
+@make_properties
+class ControlFlowRegion(OrderedDiGraph[ControlFlowBlock, 'dace.sdfg.InterstateEdge'], ControlGraphView,
+                        ControlFlowBlock):
+
+    def __init__(self,
+                 label: str=''):
+        OrderedDiGraph.__init__(self)
+        ControlGraphView.__init__(self)
+        ControlFlowBlock.__init__(self, label)
+
+        self._labels: Set[str] = set()
+        self._start_block: Optional[int] = None
+        self._cached_start_block: Optional[ControlFlowBlock] = None
+
+    def add_edge(self, src: ControlFlowBlock, dst: ControlFlowBlock, data: 'dace.sdfg.InterstateEdge'):
+        """ Adds a new edge to the graph. Must be an InterstateEdge or a subclass thereof.
+
+            :param u: Source node.
+            :param v: Destination node.
+            :param edge: The edge to add.
+        """
+        if not isinstance(src, ControlFlowBlock):
+            raise TypeError('Expected ControlFlowBlock, got ' + str(type(src)))
+        if not isinstance(dst, ControlFlowBlock):
+            raise TypeError('Expected ControlFlowBlock, got ' + str(type(dst)))
+        if not isinstance(data, dace.sdfg.InterstateEdge):
+            raise TypeError('Expected InterstateEdge, got ' + str(type(data)))
+        if dst is self._cached_start_block:
+            self._cached_start_block = None
+        return super().add_edge(src, dst, data)
+
+    def add_node(self, node, is_start_block=False, *, is_start_state: bool=None):
+        if not isinstance(node, ControlFlowBlock):
+            raise TypeError('Expected ControlFlowBlock, got ' + str(type(node)))
+        super().add_node(node)
+        self._cached_start_block = None
+        start_block = is_start_block
+        if is_start_state is not None:
+            warnings.warn('is_start_state is deprecated, use is_start_block instead', DeprecationWarning)
+            start_block = is_start_state
+
+        if start_block:
+            self.start_block = len(self.nodes()) - 1
+            self._cached_start_block = node
+
+    def add_state(self, label=None, is_start_block=False, *, is_start_state: bool=None) -> SDFGState:
+        if self._labels is None or len(self._labels) != self.number_of_nodes():
+            self._labels = set(s.label for s in self.nodes())
+        label = label or 'state'
+        existing_labels = self._labels
+        label = dt.find_new_name(label, existing_labels)
+        state = SDFGState(label)
+        state.parent = self
+        self._labels.add(label)
+        start_block = is_start_block
+        if is_start_state is not None:
+            warnings.warn('is_start_state is deprecated, use is_start_block instead', DeprecationWarning)
+            start_block = is_start_state
+        self.add_node(state, is_start_block=start_block)
+        return state
+
+    def add_state_before(self, state: SDFGState, label=None, is_start_state=False) -> SDFGState:
+        """ Adds a new SDFG state before an existing state, reconnecting predecessors to it instead.
+
+            :param state: The state to prepend the new state before.
+            :param label: State label.
+            :param is_start_state: If True, resets scope block starting state to this state.
+            :return: A new SDFGState object.
+        """
+        new_state = self.add_state(label, is_start_state)
+        # Reconnect
+        for e in self.in_edges(state):
+            self.remove_edge(e)
+            self.add_edge(e.src, new_state, e.data)
+        # Add unconditional connection between the new state and the current
+        self.add_edge(new_state, state, dace.sdfg.InterstateEdge())
+        return new_state
+
+    def add_state_after(self, state: SDFGState, label=None, is_start_state=False) -> SDFGState:
+        """ Adds a new SDFG state after an existing state, reconnecting it to the successors instead.
+
+            :param state: The state to append the new state after.
+            :param label: State label.
+            :param is_start_state: If True, resets SDFG starting state to this state.
+            :return: A new SDFGState object.
+        """
+        new_state = self.add_state(label, is_start_state)
+        # Reconnect
+        for e in self.out_edges(state):
+            self.remove_edge(e)
+            self.add_edge(new_state, e.dst, e.data)
+        # Add unconditional connection between the current and the new state
+        self.add_edge(state, new_state, dace.sdfg.InterstateEdge())
+        return new_state
+
+    @abc.abstractmethod
+    def _used_symbols_internal(self,
+                               all_symbols: bool,
+                               defined_syms: Optional[Set] = None,
+                               free_syms: Optional[Set] = None,
+                               used_before_assignment: Optional[Set] = None,
+                               keep_defined_in_mapping: bool = False) -> Tuple[Set[str], Set[str], Set[str]]:
+        defined_syms = set() if defined_syms is None else defined_syms
+        free_syms = set() if free_syms is None else free_syms
+        used_before_assignment = set() if used_before_assignment is None else used_before_assignment
+
+        try:
+            ordered_blocks = self.topological_sort(self.start_block)
+        except ValueError:  # Failsafe (e.g., for invalid or empty SDFGs)
+            ordered_blocks = self.nodes()
+
+        for block in ordered_blocks:
+            state_symbols = set()
+            if isinstance(block, ControlFlowRegion):
+                b_free_syms, b_defined_syms, b_used_before_syms = block._used_symbols_internal(all_symbols)
+                free_syms |= b_free_syms
+                defined_syms |= b_defined_syms
+                used_before_assignment |= b_used_before_syms
+                state_symbols = b_free_syms
+            else:
+                state_symbols = block.used_symbols(all_symbols)
+                free_syms |= state_symbols
+
+            # Add free inter-state symbols
+            for e in self.out_edges(block):
+                # NOTE: First we get the true InterstateEdge free symbols, then we compute the newly defined symbols by
+                # subracting the (true) free symbols from the edge's assignment keys. This way we can correctly
+                # compute the symbols that are used before being assigned.
+                efsyms = e.data.used_symbols(all_symbols)
+                defined_syms |= set(e.data.assignments.keys()) - (efsyms | state_symbols)
+                used_before_assignment.update(efsyms - defined_syms)
+                free_syms |= efsyms
+
+        # Remove symbols that were used before they were assigned.
+        defined_syms -= used_before_assignment
+
+        if isinstance(self, dace.SDFG):
+            # Remove from defined symbols those that are in the symbol mapping
+            if self.parent_nsdfg_node is not None and keep_defined_in_mapping:
+                defined_syms -= set(self.parent_nsdfg_node.symbol_mapping.keys())
+
+            # Add the set of SDFG symbol parameters
+            # If all_symbols is False, those symbols would only be added in the case of non-Python tasklets
+            if all_symbols:
+                free_syms |= set(self.symbols.keys())
+
+        # Subtract symbols defined in inter-state edges and constants from the list of free symbols.
+        free_syms -= defined_syms
+
+        return free_syms, defined_syms, used_before_assignment
+
+    def to_json(self, parent=None):
+        graph_json = OrderedDiGraph.to_json(self)
+        block_json = ControlFlowBlock.to_json(self, parent)
+        graph_json.update(block_json)
+        return graph_json
+
+    ###################################################################
+    # Traversal methods
+
+    def all_control_flow_regions(self, recursive=False) -> Iterator['ControlFlowRegion']:
+        """ Iterate over this and all nested control flow regions. """
+        yield self
+        for block in self.nodes():
+            if isinstance(block, SDFGState) and recursive:
+                for node in block.nodes():
+                    if isinstance(node, nd.NestedSDFG):
+                        yield from node.sdfg.all_control_flow_regions(recursive=recursive)
+            elif isinstance(block, ControlFlowRegion):
+                yield from block.all_control_flow_regions(recursive=recursive)
+
+    def all_sdfgs_recursive(self) -> Iterator['dace.SDFG']:
+        """ Iterate over this and all nested SDFGs. """
+        for cfg in self.all_control_flow_regions(recursive=True):
+            if isinstance(cfg, dace.SDFG):
+                yield cfg
+
+    def all_states(self) -> Iterator[SDFGState]:
+        """ Iterate over all states in this control flow graph. """
+        for block in self.nodes():
+            if isinstance(block, SDFGState):
+                yield block
+            elif isinstance(block, ControlFlowRegion):
+                yield from block.all_states()
+
+    def all_control_flow_blocks(self, recursive=False) -> Iterator[ControlFlowBlock]:
+        """ Iterate over all control flow blocks in this control flow graph. """
+        for cfg in self.all_control_flow_regions(recursive=recursive):
+            for block in cfg.nodes():
+                yield block
+
+    def all_interstate_edges(self, recursive=False) -> Iterator[Edge['dace.sdfg.InterstateEdge']]:
+        """ Iterate over all interstate edges in this control flow graph. """
+        for cfg in self.all_control_flow_regions(recursive=recursive):
+            for edge in cfg.edges():
+                yield edge
+
+    ###################################################################
+    # Getters & setters, overrides
+
+    def __str__(self):
+        return ControlFlowBlock.__str__(self)
+
+    def __repr__(self) -> str:
+        return f'{self.__class__.__name__} ({self.label})'
+
+    @property
+    def start_block(self):
+        """ Returns the starting block of this ControlFlowGraph. """
+        if self._cached_start_block is not None:
+            return self._cached_start_block
+
+        source_nodes = self.source_nodes()
+        if len(source_nodes) == 1:
+            self._cached_start_block = source_nodes[0]
+            return source_nodes[0]
+        # If the starting block is ambiguous allow manual override.
+        if self._start_block is not None:
+            self._cached_start_block = self.node(self._start_block)
+            return self._cached_start_block
+        raise ValueError('Ambiguous or undefined starting block for ControlFlowGraph, '
+                         'please use "is_start_block=True" when adding the '
+                         'starting block with "add_state" or "add_node"')
+
+    @start_block.setter
+    def start_block(self, block_id):
+        """ Manually sets the starting block of this ControlFlowGraph.
+
+            :param block_id: The node ID (use `node_id(block)`) of the block to set.
+        """
+        if block_id < 0 or block_id >= self.number_of_nodes():
+            raise ValueError('Invalid state ID')
+        self._start_block = block_id
+        self._cached_start_block = self.node(block_id)
diff --git a/dace/sdfg/utils.py b/dace/sdfg/utils.py
index 1078414161..621f8a9e16 100644
--- a/dace/sdfg/utils.py
+++ b/dace/sdfg/utils.py
@@ -668,7 +668,7 @@ def consolidate_edges(sdfg: SDFG, starting_scope=None) -> int:
     from dace.sdfg.propagation import propagate_memlets_scope
 
     total_consolidated = 0
-    for state in sdfg.nodes():
+    for state in sdfg.states():
         # Start bottom-up
         if starting_scope and starting_scope.entry not in state.nodes():
             continue
@@ -1206,8 +1206,8 @@ def fuse_states(sdfg: SDFG, permissive: bool = False, progress: bool = None) ->
     counter = 0
     if progress is True or progress is None:
         fusible_states = 0
-        for sd in sdfg.all_sdfgs_recursive():
-            fusible_states += sd.number_of_edges()
+        for cfg in sdfg.all_control_flow_regions():
+            fusible_states += cfg.number_of_edges()
 
     if progress is True:
         pbar = tqdm(total=fusible_states, desc='Fusing states')
@@ -1217,30 +1217,32 @@ def fuse_states(sdfg: SDFG, permissive: bool = False, progress: bool = None) ->
     for sd in sdfg.all_sdfgs_recursive():
         id = sd.sdfg_id
 
-        while True:
-            edges = list(sd.nx.edges)
-            applied = 0
-            skip_nodes = set()
-            for u, v in edges:
-                if (progress is None and tqdm is not None and (time.time() - start) > 5):
-                    progress = True
-                    pbar = tqdm(total=fusible_states, desc='Fusing states', initial=counter)
-
-                if u in skip_nodes or v in skip_nodes:
-                    continue
-                candidate = {StateFusion.first_state: u, StateFusion.second_state: v}
-                sf = StateFusion()
-                sf.setup_match(sd, id, -1, candidate, 0, override=True)
-                if sf.can_be_applied(sd, 0, sd, permissive=permissive):
-                    sf.apply(sd, sd)
-                    applied += 1
-                    counter += 1
-                    if progress:
-                        pbar.update(1)
-                    skip_nodes.add(u)
-                    skip_nodes.add(v)
-            if applied == 0:
-                break
+        for cfg in sd.all_control_flow_regions():
+            while True:
+                edges = list(cfg.nx.edges)
+                applied = 0
+                skip_nodes = set()
+                for u, v in edges:
+                    if (progress is None and tqdm is not None and (time.time() - start) > 5):
+                        progress = True
+                        pbar = tqdm(total=fusible_states, desc='Fusing states', initial=counter)
+
+                    if (u in skip_nodes or v in skip_nodes or not isinstance(v, SDFGState) or
+                        not isinstance(u, SDFGState)):
+                        continue
+                    candidate = {StateFusion.first_state: u, StateFusion.second_state: v}
+                    sf = StateFusion()
+                    sf.setup_match(cfg, id, -1, candidate, 0, override=True)
+                    if sf.can_be_applied(cfg, 0, sd, permissive=permissive):
+                        sf.apply(cfg, sd)
+                        applied += 1
+                        counter += 1
+                        if progress:
+                            pbar.update(1)
+                        skip_nodes.add(u)
+                        skip_nodes.add(v)
+                if applied == 0:
+                    break
     if progress:
         pbar.close()
     return counter
diff --git a/dace/transformation/dataflow/double_buffering.py b/dace/transformation/dataflow/double_buffering.py
index 8ff70a6355..6efe6543ca 100644
--- a/dace/transformation/dataflow/double_buffering.py
+++ b/dace/transformation/dataflow/double_buffering.py
@@ -128,7 +128,7 @@ def apply(self, graph: sd.SDFGState, sdfg: sd.SDFG):
         ##############################
         # Add initial reads to initial nested state
         initial_state: sd.SDFGState = nsdfg_node.sdfg.start_state
-        initial_state.set_label('%s_init' % map_entry.map.label)
+        initial_state.label = '%s_init' % map_entry.map.label
         for edge in edges_to_replace:
             initial_state.add_node(edge.src)
             rnode = edge.src
@@ -152,7 +152,7 @@ def apply(self, graph: sd.SDFGState, sdfg: sd.SDFG):
         # Add the main state's contents to the last state, modifying
         # memlets appropriately.
         final_state: sd.SDFGState = nsdfg_node.sdfg.sink_nodes()[0]
-        final_state.set_label('%s_final_computation' % map_entry.map.label)
+        final_state.label = '%s_final_computation' % map_entry.map.label
         dup_nstate = copy.deepcopy(nstate)
         final_state.add_nodes_from(dup_nstate.nodes())
         for e in dup_nstate.edges():
@@ -183,7 +183,7 @@ def apply(self, graph: sd.SDFGState, sdfg: sd.SDFG):
 
             nstate.add_edge(rnode, edge.src_conn, wnode, edge.dst_conn, new_memlet)
 
-        nstate.set_label('%s_double_buffered' % map_entry.map.label)
+        nstate.label = '%s_double_buffered' % map_entry.map.label
         # Divide by loop stride
         new_expr = symbolic.pystr_to_symbolic('((%s / %s) + 1) %% 2' % (map_param, map_rstride))
         sd.replace(nstate, '__dace_db_param', new_expr)
diff --git a/dace/transformation/interstate/loop_unroll.py b/dace/transformation/interstate/loop_unroll.py
index 47d438a2fc..b1dbfdd5c9 100644
--- a/dace/transformation/interstate/loop_unroll.py
+++ b/dace/transformation/interstate/loop_unroll.py
@@ -116,8 +116,7 @@ def instantiate_loop(
 
         # Replace iterate with value in each state
         for state in new_states:
-            state.set_label(state.label + '_' + itervar + '_' +
-                            (state_suffix if state_suffix is not None else str(value)))
+            state.label = state.label + '_' + itervar + '_' + (state_suffix if state_suffix is not None else str(value))
             state.replace(itervar, value)
 
         # Add subgraph to original SDFG
diff --git a/dace/transformation/interstate/multistate_inline.py b/dace/transformation/interstate/multistate_inline.py
index 74dd51a483..4d560ab70a 100644
--- a/dace/transformation/interstate/multistate_inline.py
+++ b/dace/transformation/interstate/multistate_inline.py
@@ -334,7 +334,7 @@ def apply(self, outer_state: SDFGState, sdfg: SDFG):
             if nstate.label in statenames:
                 newname = data.find_new_name(nstate.label, statenames)
                 statenames.add(newname)
-                nstate.set_label(newname)
+                nstate.label = newname
 
         #######################################################
         # Add nested SDFG states into top-level SDFG
diff --git a/doc/sdfg/images/elements.svg b/doc/sdfg/images/elements.svg
index 80d35e39f0..6402de8e1d 100644
--- a/doc/sdfg/images/elements.svg
+++ b/doc/sdfg/images/elements.svg
@@ -1,90 +1,506 @@
-<?xml version="1.0" encoding="utf-8"?>
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
 <!-- Generator: Adobe Illustrator 26.4.1, SVG Export Plug-In . SVG Version: 6.00 Build 0)  -->
-<svg version="1.1" baseProfile="basic" id="Layer_1"
-	 xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px" width="412.67px"
-	 height="1127.67px" viewBox="0 0 412.67 1127.67" xml:space="preserve">
-<path fill="#F2F2F2" d="M404.34,216.45H8.33c-4.6,0-8.33-3.73-8.33-8.33V14.35c0-4.6,3.73-8.33,8.33-8.33h396.01
-	c4.6,0,8.33,3.73,8.33,8.33v193.78C412.67,212.72,408.94,216.45,404.34,216.45z"/>
-<path fill-rule="evenodd" clip-rule="evenodd" fill="#FFFFFF" stroke="#000000" stroke-width="3" stroke-miterlimit="8" d="
-	M75.49,84.74c0-14.63,24.79-26.5,55.36-26.5s55.36,11.87,55.36,26.5c0,14.64-24.79,26.5-55.36,26.5S75.49,99.37,75.49,84.74z"/>
-<text transform="matrix(1 0 0 1 122.945 32.2236)" font-family="'Arial'" font-size="28px">Access Nodes</text>
-<path fill-rule="evenodd" clip-rule="evenodd" fill="#FFFFFF" stroke="#000000" stroke-width="1.3333" stroke-miterlimit="8" d="
-	M209.38,84.74c0-14.63,32.01-26.5,71.5-26.5s71.5,11.87,71.5,26.5c0,14.64-32.01,26.5-71.5,26.5S209.38,99.37,209.38,84.74z"/>
-<text transform="matrix(1 0 0 1 231.3017 93.2632)" font-family="'Arial'" font-size="24px">T</text>
-<text transform="matrix(1 0 0 1 245.0717 93.2632)" font-family="'Arial'" font-size="24px">ransient</text>
-<text transform="matrix(1 0 0 1 96.1712 93.2632)" font-family="'Arial'" font-size="24px">Global</text>
-<path fill-rule="evenodd" clip-rule="evenodd" fill="#FFFFFF" stroke="#000000" stroke-width="1.3333" stroke-miterlimit="8" stroke-dasharray="5.3333,4" d="
-	M3.06,172.74c0-14.63,24.62-26.5,55-26.5s55,11.87,55,26.5c0,14.64-24.62,26.5-55,26.5S3.06,187.37,3.06,172.74z"/>
-<text transform="matrix(1 0 0 1 19.4225 180.9851)" font-family="'Arial'" font-size="24px">Stream</text>
-<path fill-rule="evenodd" clip-rule="evenodd" fill="#F0FDFF" stroke="#000000" stroke-width="1.3333" stroke-miterlimit="8" d="
-	M151.06,172.74c0-14.63,24.62-26.5,55-26.5s55,11.87,55,26.5c0,14.64-24.62,26.5-55,26.5S151.06,187.37,151.06,172.74z"/>
-<text transform="matrix(1 0 0 1 180.1187 180.9852)" font-family="'Arial'" font-size="24px">V</text>
-<text transform="matrix(1 0 0 1 195.6887 180.9852)" font-family="'Arial'" font-size="24px">iew</text>
-<path fill-rule="evenodd" clip-rule="evenodd" fill="#FFDEDE" stroke="#000000" stroke-width="1.3333" stroke-miterlimit="8" d="
-	M299.06,172.74c0-14.63,24.62-26.5,55-26.5s55,11.87,55,26.5c0,14.64-24.62,26.5-55,26.5S299.06,187.37,299.06,172.74z"/>
-<text transform="matrix(1 0 0 1 310.49 179.6512)" font-family="'Arial'" font-size="19px">Reference</text>
-<path fill-rule="evenodd" clip-rule="evenodd" fill="#FFFFFF" stroke="#000000" stroke-width="1.3333" stroke-miterlimit="8" d="
-	M31.49,286.23l13.18-13.18h121.64l13.18,13.18v18.64l-13.18,13.18H44.67l-13.18-13.18C31.49,304.87,31.49,286.23,31.49,286.23z"/>
-<text transform="matrix(1 0 0 1 67.6431 303.5469)" font-family="'Arial'" font-size="24px" letter-spacing="-2">T</text>
-<text transform="matrix(1 0 0 1 79.6431 303.5469)" font-family="'Arial'" font-size="24px">asklet</text>
-<rect x="237.49" y="287.05" fill="#DEEBF7" stroke="#4472C4" stroke-width="1.3333" stroke-miterlimit="8" width="28" height="22"/>
-<rect x="291.49" y="287.05" fill="#DEEBF7" stroke="#4472C4" stroke-width="1.3333" stroke-miterlimit="8" width="28" height="22"/>
-<rect x="344.49" y="287.05" fill="#DEEBF7" stroke="#4472C4" stroke-width="1.3333" stroke-miterlimit="8" width="28" height="22"/>
-<path fill="#5B9BD5" d="M227.49,297.71h5.82v0.67h-5.82V297.71z M231.97,294.05l5.33,4l-5.33,4V294.05z"/>
-<path fill="#5B9BD5" d="M265.49,297.71l21.61,0.05v0.67l-21.61-0.05V297.71z M285.77,294.09l5.32,4.01l-5.34,3.99L285.77,294.09z"/>
-<path fill="#5B9BD5" d="M319.4,297.5l1.38-0.25l-0.08,0.03l1.38-0.62l-0.09,0.06l2.44-2.31l-0.07,0.09l1.75-3.31l-0.03,0.1
-	l0.62-3.75v0.1l-0.25-1.88l0.02,0.07l-0.69-1.88l0.03,0.06l-1.06-1.69l0.03,0.04l-1.38-1.56l0.06,0.06l-3.44-2.31l0.08,0.04
-	l-1.94-0.62l0.06,0.01l-2-0.25h0.07l-2,0.19l0.07-0.01l-2,0.62l0.08-0.04l-3.44,2.19l0.06-0.05l-1.38,1.44l0.07-0.1l-0.6,1.46
-	l-0.62-0.25l0.62-1.52l1.45-1.51l3.51-2.23l2.08-0.65l2.07-0.19l2.07,0.26l2.01,0.65l3.51,2.36l1.42,1.61l1.1,1.74l0.71,1.94
-	l0.26,1.96l-0.64,3.85l-1.8,3.41l-2.52,2.39l-1.46,0.67l-1.42,0.26L319.4,297.5z M311.08,283.41l-5.59,3.63l-1.92-6.38
-	L311.08,283.41z"/>
-<path fill="#5B9BD5" d="M319.49,298.44l21.61-0.05v-0.67l-21.61,0.05V298.44z M339.77,302.06l5.32-4.01l-5.34-3.99L339.77,302.06z"
-	/>
-<path fill-rule="evenodd" clip-rule="evenodd" fill="#FFFFFF" fill-opacity="0.651" d="M216.99,286.58l15.03-15.03h142.94
-	l15.03,15.03v15.94l-15.03,15.03H232.02l-15.03-15.03V286.58z"/>
-<path d="M214.99,285.75l16.2-16.2h144.59l16.2,16.2v17.59l-16.2,16.2H231.2l-16.2-16.2L214.99,285.75L214.99,285.75z M216.33,302.79
-	l15.42,15.42h143.49l15.42-15.42V286.3l-15.42-15.42H231.75l-15.42,15.42V302.79z M217.66,286.85l14.64-14.64h142.38l14.64,14.64
-	v15.38l-14.64,14.64H232.3l-14.64-14.64V286.85z M218.99,301.69l13.86,13.86h141.28l13.86-13.86v-14.28l-13.86-13.86H232.85
-	l-13.86,13.86V301.69z"/>
-<text transform="matrix(1 0 0 1 228.4901 303.5469)" font-family="'Arial'" font-size="24px">Nested SDFG</text>
-<path fill-rule="evenodd" clip-rule="evenodd" fill="#FFFFFF" stroke="#000000" stroke-width="3" stroke-miterlimit="8" stroke-dasharray="9,3" d="
-	M220.49,437.05l37.61-39h91.79l37.61,39H220.49z"/>
-<text transform="matrix(1 0 0 1 245.4776 429.5469)" font-family="'Arial'" font-size="27px">Consume</text>
-<path fill-rule="evenodd" clip-rule="evenodd" fill="#FFFFFF" stroke="#000000" stroke-width="3" stroke-miterlimit="8" stroke-dasharray="9,3" d="
-	M387.49,490.05l-37.61,39H258.1l-37.61-39H387.49z"/>
-<path fill-rule="evenodd" clip-rule="evenodd" fill="#FFFFFF" stroke="#000000" stroke-width="3" stroke-miterlimit="8" d="
-	M23.49,437.05l37.61-39h91.79l37.61,39H23.49z"/>
-<text transform="matrix(1 0 0 1 80.7049 429.5469)" font-family="'Arial'" font-size="27px">Map</text>
-<path fill-rule="evenodd" clip-rule="evenodd" fill="#FFFFFF" stroke="#000000" stroke-width="3" stroke-miterlimit="8" d="
-	M190.49,490.05l-37.61,39H61.1l-37.61-39H190.49z"/>
-<text transform="matrix(1 0 0 1 96.6216 464.5469)" font-family="'Arial'" font-size="24px">...</text>
-<path fill-rule="evenodd" clip-rule="evenodd" fill="#FFFFFF" stroke="#000000" stroke-width="1.3333" stroke-miterlimit="8" stroke-dasharray="5.3333,4" d="
-	M291.49,367.55c0-7.46,5.6-13.5,12.5-13.5s12.5,6.04,12.5,13.5s-5.6,13.5-12.5,13.5S291.49,375,291.49,367.55z"/>
-<path d="M304.83,381.05v9.78h-0.67v-9.78H304.83z M308.49,389.5l-4,8l-4-8H308.49z"/>
-<path fill-rule="evenodd" clip-rule="evenodd" fill="#FFFFFF" stroke="#000000" stroke-width="3" stroke-miterlimit="8" d="
-	M90.49,609.05h220.92l9.08,9.08v47.92h-230C90.49,666.05,90.49,609.05,90.49,609.05z"/>
-<text transform="matrix(1 0 0 1 137.7216 647.5469)" font-family="'Arial'" font-size="24px">Library Node</text>
-<path fill="none" stroke="#000000" stroke-width="3" stroke-miterlimit="8" d="M310.49,609.05v9.5"/>
-<path fill="none" stroke="#000000" stroke-width="3" stroke-miterlimit="8" d="M309.03,618.05h10.88"/>
-<text transform="matrix(1 0 0 1 293.5616 464.5469)" font-family="'Arial'" font-size="24px">...</text>
-<path d="M292.64,849.31v12.8h-3.2v-12.8H292.64z M292.64,871.72v12.8h-3.2v-12.8H292.64z M292.64,894.12v12.8h-3.2v-12.8H292.64z
-	 M292.64,916.53v12.8h-3.2v-12.8H292.64z M292.64,938.94v12.8h-3.2v-12.8H292.64z M299.05,959.68l-8,16.01l-8-16.01H299.05z"/>
-<text transform="matrix(1 0 0 1 301.987 895.7191)" font-family="'Arial'" font-size="31.1027px">A[0]</text>
-<text transform="matrix(1 0 0 1 300.8307 926.8236)" font-family="'Arial'" font-weight="bold" font-size="24.623px">CR: Sum</text>
-<text transform="matrix(1 0 0 1 301.9889 950.1508)" font-family="'Arial'" font-size="19.4392px">V</text>
-<text transform="matrix(1 0 0 1 313.8789 950.1508)" font-family="'Arial'" font-size="19.4392px">olume: 1</text>
-<path d="M292.64,696.31v111.91h-3.2V696.31L292.64,696.31L292.64,696.31z M299.05,806.62l-8,16.01l-8-16.01H299.05z"/>
-<text transform="matrix(1 0 0 1 299.1393 758.2539)" font-family="'Arial'" font-size="31.1027px">B[i, j]</text>
-<text transform="matrix(1 0 0 1 299.0514 784.1719)" font-family="'Arial'" font-size="19.4392px">V</text>
-<text transform="matrix(1 0 0 1 310.9414 784.1719)" font-family="'Arial'" font-size="19.4392px">olume: 1</text>
-<text transform="matrix(1 0 0 1 45.5647 764.6731)" font-family="'Arial'" font-size="28.0101px">Memlet</text>
-<text transform="matrix(1 0 0 1 45.5647 901.7043)" font-family="'Arial'" font-size="28.0101px">W</text>
-<text transform="matrix(1 0 0 1 71.4947 901.7043)" font-family="'Arial'" font-size="28.0101px">rite-Conflict</text>
-<text transform="matrix(1 0 0 1 45.5647 935.3143)" font-family="'Arial'" font-size="28.0101px">Resolution</text>
-<rect x="16" y="1038.33" fill="#DEEBF7" width="196.89" height="53.33"/>
-<text transform="matrix(1 0 0 1 81.7414 1073.6106)" font-family="'Arial'" font-size="28.0101px">State</text>
-<text transform="matrix(1 0 0 1 283.114 1053.8434)" fill="#86ADD9" font-family="'Arial'" font-size="28.0101px">State</text>
-<text transform="matrix(1 0 0 1 254.844 1095.8434)" fill="#86ADD9" font-family="'Arial'" font-size="28.0101px" letter-spacing="-1">T</text>
-<text transform="matrix(1 0 0 1 270.914 1095.8434)" fill="#86ADD9" font-family="'Arial'" font-size="28.0101px">ransition</text>
-<path fill="#86ADD9" d="M240.33,1063.69h142.96v3.2H240.33V1063.69z M381.7,1057.29l16.01,8l-16.01,8V1057.29z"/>
+
+<svg
+   version="1.1"
+   id="Layer_1"
+   x="0px"
+   y="0px"
+   width="412.67001"
+   height="1156.323"
+   viewBox="0 0 412.67 1156.323"
+   xml:space="preserve"
+   sodipodi:docname="elements.svg"
+   inkscape:version="1.2.1 (9c6d41e410, 2022-07-14)"
+   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+   xmlns="http://www.w3.org/2000/svg"
+   xmlns:svg="http://www.w3.org/2000/svg"><defs
+   id="defs380" /><sodipodi:namedview
+   id="namedview378"
+   pagecolor="#ffffff"
+   bordercolor="#666666"
+   borderopacity="1.0"
+   inkscape:showpageshadow="2"
+   inkscape:pageopacity="0.0"
+   inkscape:pagecheckerboard="0"
+   inkscape:deskcolor="#d1d1d1"
+   showgrid="false"
+   inkscape:zoom="0.5285234"
+   inkscape:cx="341.51752"
+   inkscape:cy="467.33977"
+   inkscape:current-layer="g1510" />
+<path
+   fill="#f2f2f2"
+   d="M 404.34,210.43 H 8.33 C 3.73,210.43 0,206.7 0,202.1 V 8.33 C 0,3.73 3.73,0 8.33,0 h 396.01 c 4.6,0 8.33,3.73 8.33,8.33 v 193.78 c 0,4.59 -3.73,8.32 -8.33,8.32 z"
+   id="path257" />
+<path
+   fill-rule="evenodd"
+   clip-rule="evenodd"
+   fill="#ffffff"
+   stroke="#000000"
+   stroke-width="3"
+   stroke-miterlimit="8"
+   d="m 75.49,78.72 c 0,-14.63 24.79,-26.5 55.36,-26.5 30.57,0 55.36,11.87 55.36,26.5 0,14.64 -24.79,26.5 -55.36,26.5 -30.57,0 -55.36,-11.87 -55.36,-26.5 z"
+   id="path259" />
+<text
+   font-family="Arial"
+   font-size="28px"
+   id="text261"
+   x="122.945"
+   y="26.2036">Access Nodes</text>
+<path
+   fill-rule="evenodd"
+   clip-rule="evenodd"
+   fill="#ffffff"
+   stroke="#000000"
+   stroke-width="1.3333"
+   stroke-miterlimit="8"
+   d="m 209.38,78.72 c 0,-14.63 32.01,-26.5 71.5,-26.5 39.49,0 71.5,11.87 71.5,26.5 0,14.64 -32.01,26.5 -71.5,26.5 -39.49,0 -71.5,-11.87 -71.5,-26.5 z"
+   id="path263" />
+<text
+   font-family="Arial"
+   font-size="24px"
+   id="text265"
+   x="231.3017"
+   y="87.243202">T</text>
+<text
+   font-family="Arial"
+   font-size="24px"
+   id="text267"
+   x="245.0717"
+   y="87.243202">ransient</text>
+<text
+   font-family="Arial"
+   font-size="24px"
+   id="text269"
+   x="96.171204"
+   y="87.243202">Global</text>
+<path
+   fill-rule="evenodd"
+   clip-rule="evenodd"
+   fill="#ffffff"
+   stroke="#000000"
+   stroke-width="1.3333"
+   stroke-miterlimit="8"
+   stroke-dasharray="5.3333, 4"
+   d="m 3.06,166.72 c 0,-14.63 24.62,-26.5 55,-26.5 30.38,0 55,11.87 55,26.5 0,14.64 -24.62,26.5 -55,26.5 -30.38,0 -55,-11.87 -55,-26.5 z"
+   id="path271" />
+<text
+   font-family="Arial"
+   font-size="24px"
+   id="text273"
+   x="19.422501"
+   y="174.9651">Stream</text>
+<path
+   fill-rule="evenodd"
+   clip-rule="evenodd"
+   fill="#f0fdff"
+   stroke="#000000"
+   stroke-width="1.3333"
+   stroke-miterlimit="8"
+   d="m 151.06,166.72 c 0,-14.63 24.62,-26.5 55,-26.5 30.38,0 55,11.87 55,26.5 0,14.64 -24.62,26.5 -55,26.5 -30.38,0 -55,-11.87 -55,-26.5 z"
+   id="path275" />
+<text
+   font-family="Arial"
+   font-size="24px"
+   id="text277"
+   x="180.1187"
+   y="174.96519">V</text>
+<text
+   font-family="Arial"
+   font-size="24px"
+   id="text279"
+   x="195.68871"
+   y="174.96519">iew</text>
+<path
+   fill-rule="evenodd"
+   clip-rule="evenodd"
+   fill="#ffdede"
+   stroke="#000000"
+   stroke-width="1.3333"
+   stroke-miterlimit="8"
+   d="m 299.06,166.72 c 0,-14.63 24.62,-26.5 55,-26.5 30.38,0 55,11.87 55,26.5 0,14.64 -24.62,26.5 -55,26.5 -30.38,0 -55,-11.87 -55,-26.5 z"
+   id="path281" />
+<text
+   font-family="Arial"
+   font-size="19px"
+   id="text283"
+   x="310.48999"
+   y="173.6312">Reference</text>
+<path
+   fill-rule="evenodd"
+   clip-rule="evenodd"
+   fill="#ffffff"
+   stroke="#000000"
+   stroke-width="1.3333"
+   stroke-miterlimit="8"
+   d="m 31.49,280.21 13.18,-13.18 h 121.64 l 13.18,13.18 v 18.64 l -13.18,13.18 H 44.67 L 31.49,298.85 c 0,0 0,-18.64 0,-18.64 z"
+   id="path285" />
+<text
+   font-family="Arial"
+   font-size="24px"
+   letter-spacing="-2"
+   id="text287"
+   x="67.643097"
+   y="297.52689">T</text>
+<text
+   font-family="Arial"
+   font-size="24px"
+   id="text289"
+   x="79.643097"
+   y="297.52689">asklet</text>
+<rect
+   x="237.49001"
+   y="281.03"
+   fill="#deebf7"
+   stroke="#4472c4"
+   stroke-width="1.3333"
+   stroke-miterlimit="8"
+   width="28"
+   height="22"
+   id="rect291" />
+<rect
+   x="291.48999"
+   y="281.03"
+   fill="#deebf7"
+   stroke="#4472c4"
+   stroke-width="1.3333"
+   stroke-miterlimit="8"
+   width="28"
+   height="22"
+   id="rect293" />
+<rect
+   x="344.48999"
+   y="281.03"
+   fill="#deebf7"
+   stroke="#4472c4"
+   stroke-width="1.3333"
+   stroke-miterlimit="8"
+   width="28"
+   height="22"
+   id="rect295" />
+<path
+   fill="#5b9bd5"
+   d="m 227.49,291.69 h 5.82 v 0.67 h -5.82 z m 4.48,-3.66 5.33,4 -5.33,4 z"
+   id="path297" />
+<path
+   fill="#5b9bd5"
+   d="m 265.49,291.69 21.61,0.05 v 0.67 l -21.61,-0.05 z m 20.28,-3.62 5.32,4.01 -5.34,3.99 z"
+   id="path299" />
+<path
+   fill="#5b9bd5"
+   d="m 319.4,291.48 1.38,-0.25 -0.08,0.03 1.38,-0.62 -0.09,0.06 2.44,-2.31 -0.07,0.09 1.75,-3.31 -0.03,0.1 0.62,-3.75 v 0.1 l -0.25,-1.88 0.02,0.07 -0.69,-1.88 0.03,0.06 -1.06,-1.69 0.03,0.04 -1.38,-1.56 0.06,0.06 -3.44,-2.31 0.08,0.04 -1.94,-0.62 0.06,0.01 -2,-0.25 h 0.07 l -2,0.19 0.07,-0.01 -2,0.62 0.08,-0.04 -3.44,2.19 0.06,-0.05 -1.38,1.44 0.07,-0.1 -0.6,1.46 -0.62,-0.25 0.62,-1.52 1.45,-1.51 3.51,-2.23 2.08,-0.65 2.07,-0.19 2.07,0.26 2.01,0.65 3.51,2.36 1.42,1.61 1.1,1.74 0.71,1.94 0.26,1.96 -0.64,3.85 -1.8,3.41 -2.52,2.39 -1.46,0.67 -1.42,0.26 z m -8.32,-14.09 -5.59,3.63 -1.92,-6.38 z"
+   id="path301" />
+<path
+   fill="#5b9bd5"
+   d="m 319.49,292.42 21.61,-0.05 v -0.67 l -21.61,0.05 z m 20.28,3.62 5.32,-4.01 -5.34,-3.99 z"
+   id="path303" />
+<path
+   fill-rule="evenodd"
+   clip-rule="evenodd"
+   fill="#ffffff"
+   fill-opacity="0.651"
+   d="m 216.99,280.56 15.03,-15.03 h 142.94 l 15.03,15.03 v 15.94 l -15.03,15.03 H 232.02 L 216.99,296.5 Z"
+   id="path305" />
+<path
+   d="m 214.99,279.73 16.2,-16.2 h 144.59 l 16.2,16.2 v 17.59 l -16.2,16.2 H 231.2 L 215,297.32 Z m 1.34,17.04 15.42,15.42 h 143.49 l 15.42,-15.42 V 280.28 L 375.24,264.86 H 231.75 l -15.42,15.42 z m 1.33,-15.94 14.64,-14.64 h 142.38 l 14.64,14.64 v 15.38 l -14.64,14.64 H 232.3 l -14.64,-14.64 z m 1.33,14.84 13.86,13.86 h 141.28 l 13.86,-13.86 V 281.39 L 374.13,267.53 H 232.85 l -13.86,13.86 z"
+   id="path307" />
+<text
+   font-family="Arial"
+   font-size="24px"
+   id="text309"
+   x="228.4901"
+   y="297.52689">Nested SDFG</text>
+<path
+   fill-rule="evenodd"
+   clip-rule="evenodd"
+   fill="#ffffff"
+   stroke="#000000"
+   stroke-width="3"
+   stroke-miterlimit="8"
+   stroke-dasharray="9, 3"
+   d="m 220.49,431.03 37.61,-39 h 91.79 l 37.61,39 z"
+   id="path311" />
+<text
+   font-family="Arial"
+   font-size="27px"
+   id="text313"
+   x="245.4776"
+   y="423.52689">Consume</text>
+<path
+   fill-rule="evenodd"
+   clip-rule="evenodd"
+   fill="#ffffff"
+   stroke="#000000"
+   stroke-width="3"
+   stroke-miterlimit="8"
+   stroke-dasharray="9, 3"
+   d="m 387.49,484.03 -37.61,39 H 258.1 l -37.61,-39 z"
+   id="path315" />
+<path
+   fill-rule="evenodd"
+   clip-rule="evenodd"
+   fill="#ffffff"
+   stroke="#000000"
+   stroke-width="3"
+   stroke-miterlimit="8"
+   d="m 23.49,431.03 37.61,-39 h 91.79 l 37.61,39 z"
+   id="path317" />
+<text
+   font-family="Arial"
+   font-size="27px"
+   id="text319"
+   x="80.704903"
+   y="423.52689">Map</text>
+<path
+   fill-rule="evenodd"
+   clip-rule="evenodd"
+   fill="#ffffff"
+   stroke="#000000"
+   stroke-width="3"
+   stroke-miterlimit="8"
+   d="m 190.49,484.03 -37.61,39 H 61.1 l -37.61,-39 z"
+   id="path321" />
+<text
+   font-family="Arial"
+   font-size="24px"
+   id="text323"
+   x="96.621597"
+   y="458.52689">...</text>
+<path
+   fill-rule="evenodd"
+   clip-rule="evenodd"
+   fill="#ffffff"
+   stroke="#000000"
+   stroke-width="1.3333"
+   stroke-miterlimit="8"
+   stroke-dasharray="5.3333, 4"
+   d="m 291.49,361.53 c 0,-7.46 5.6,-13.5 12.5,-13.5 6.9,0 12.5,6.04 12.5,13.5 0,7.46 -5.6,13.5 -12.5,13.5 -6.9,0 -12.5,-6.05 -12.5,-13.5 z"
+   id="path325" />
+<path
+   d="m 304.83,375.03 v 9.78 h -0.67 v -9.78 z m 3.66,8.45 -4,8 -4,-8 z"
+   id="path327" />
+<path
+   fill-rule="evenodd"
+   clip-rule="evenodd"
+   fill="#ffffff"
+   stroke="#000000"
+   stroke-width="3"
+   stroke-miterlimit="8"
+   d="m 90.49,603.03 h 220.92 l 9.08,9.08 v 47.92 h -230 c 0,0 0,-57 0,-57 z"
+   id="path329" />
+<text
+   font-family="Arial"
+   font-size="24px"
+   id="text331"
+   x="137.7216"
+   y="641.52692">Library Node</text>
+<path
+   fill="none"
+   stroke="#000000"
+   stroke-width="3"
+   stroke-miterlimit="8"
+   d="m 310.49,603.03 v 9.5"
+   id="path333" />
+<path
+   fill="none"
+   stroke="#000000"
+   stroke-width="3"
+   stroke-miterlimit="8"
+   d="m 309.03,612.03 h 10.88"
+   id="path335" />
+<text
+   font-family="Arial"
+   font-size="24px"
+   id="text337"
+   x="293.56161"
+   y="458.52689">...</text>
+<path
+   d="m 292.64,843.29 v 12.8 h -3.2 v -12.8 z m 0,22.41 v 12.8 h -3.2 v -12.8 z m 0,22.4 v 12.8 h -3.2 v -12.8 z m 0,22.41 v 12.8 h -3.2 v -12.8 z m 0,22.41 v 12.8 h -3.2 v -12.8 z m 6.41,20.74 -8,16.01 -8,-16.01 z"
+   id="path339" />
+<text
+   font-family="Arial"
+   font-size="31.1027px"
+   id="text341"
+   x="301.987"
+   y="889.6991">A[0]</text>
+<text
+   font-family="Arial"
+   font-weight="bold"
+   font-size="24.623px"
+   id="text343"
+   x="300.83069"
+   y="920.80359">CR: Sum</text>
+<text
+   font-family="Arial"
+   font-size="19.4392px"
+   id="text345"
+   x="301.98889"
+   y="944.1308">V</text>
+<text
+   font-family="Arial"
+   font-size="19.4392px"
+   id="text347"
+   x="313.87891"
+   y="944.1308">olume: 1</text>
+<path
+   d="M 292.64,690.29 V 802.2 h -3.2 V 690.29 Z m 6.41,110.31 -8,16.01 -8,-16.01 z"
+   id="path349" />
+<text
+   font-family="Arial"
+   font-size="31.1027px"
+   id="text351"
+   x="299.13931"
+   y="752.23389">B[i, j]</text>
+<text
+   font-family="Arial"
+   font-size="19.4392px"
+   id="text353"
+   x="299.05139"
+   y="778.15192">V</text>
+<text
+   font-family="Arial"
+   font-size="19.4392px"
+   id="text355"
+   x="310.94141"
+   y="778.15192">olume: 1</text>
+<text
+   font-family="Arial"
+   font-size="28.0101px"
+   id="text357"
+   x="45.564701"
+   y="758.65308">Memlet</text>
+<text
+   font-family="Arial"
+   font-size="28.0101px"
+   id="text359"
+   x="45.564701"
+   y="895.68433">W</text>
+<text
+   font-family="Arial"
+   font-size="28.0101px"
+   id="text361"
+   x="71.494698"
+   y="895.68433">rite-Conflict</text>
+<text
+   font-family="Arial"
+   font-size="28.0101px"
+   id="text363"
+   x="45.564701"
+   y="929.29431">Resolution</text>
+<rect
+   x="18.675783"
+   y="992.11169"
+   fill="#deebf7"
+   width="196.89"
+   height="53.330002"
+   id="rect365" /><rect
+   x="18.286003"
+   y="1064.9932"
+   fill="#deebf7"
+   width="196.88998"
+   height="77.329956"
+   id="rect365-2"
+   style="fill:#86add9;fill-opacity:1" /><rect
+   x="21.630732"
+   y="1068.6724"
+   fill="#deebf7"
+   width="189.86607"
+   height="70.30603"
+   id="rect365-2-8"
+   style="fill:#ffffff;fill-opacity:1;stroke-width:0.91505" />
+<text
+   font-family="Arial"
+   font-size="28.0101px"
+   id="text367"
+   x="84.417183"
+   y="1027.3923">State</text>
+<text
+   fill="#86add9"
+   font-family="Arial"
+   font-size="28.0101px"
+   id="text369"
+   x="283.11401"
+   y="1047.8234">State</text>
+<text
+   fill="#86add9"
+   font-family="Arial"
+   font-size="28.0101px"
+   letter-spacing="-1"
+   id="text371"
+   x="254.84399"
+   y="1089.8234">T</text>
+<text
+   fill="#86add9"
+   font-family="Arial"
+   font-size="28.0101px"
+   id="text373"
+   x="270.914"
+   y="1089.8234">ransition</text>
+<path
+   fill="#86add9"
+   d="m 240.33,1057.67 h 142.96 v 3.2 H 240.33 Z m 141.37,-6.4 16.01,8 -16.01,8 z"
+   id="path375" /><g
+   id="g1510"
+   transform="translate(4.2571473,15.136511)"><rect
+     x="45.677025"
+     y="1081.5244"
+     fill="#deebf7"
+     stroke="#4472c4"
+     stroke-width="1.3333"
+     stroke-miterlimit="8"
+     width="28"
+     height="22"
+     id="rect291-3" /><rect
+     x="99.677025"
+     y="1081.5244"
+     fill="#deebf7"
+     stroke="#4472c4"
+     stroke-width="1.3333"
+     stroke-miterlimit="8"
+     width="28"
+     height="22"
+     id="rect293-3" /><rect
+     x="152.67703"
+     y="1081.5244"
+     fill="#deebf7"
+     stroke="#4472c4"
+     stroke-width="1.3333"
+     stroke-miterlimit="8"
+     width="28"
+     height="22"
+     id="rect295-4" /><path
+     fill="#5b9bd5"
+     d="m 35.677017,1092.1845 h 5.82 v 0.67 h -5.82 z m 4.48,-3.66 5.33,4 -5.33,4 z"
+     id="path297-0" /><path
+     fill="#5b9bd5"
+     d="m 73.677017,1092.1845 21.61,0.05 v 0.67 l -21.61,-0.05 z m 20.28,-3.62 5.32,4.01 -5.34,3.99 z"
+     id="path299-3" /><path
+     fill="#5b9bd5"
+     d="m 127.58702,1091.9745 1.38,-0.25 -0.08,0.03 1.38,-0.62 -0.09,0.06 2.44,-2.31 -0.07,0.09 1.75,-3.31 -0.03,0.1 0.62,-3.75 v 0.1 l -0.25,-1.88 0.02,0.07 -0.69,-1.88 0.03,0.06 -1.06,-1.69 0.03,0.04 -1.38,-1.56 0.06,0.06 -3.44,-2.31 0.08,0.04 -1.94,-0.62 0.06,0.01 -2,-0.25 h 0.07 l -2,0.19 0.07,-0.01 -2,0.62 0.08,-0.04 -3.44,2.19 0.06,-0.05 -1.38,1.44 0.07,-0.1 -0.6,1.46 -0.62,-0.25 0.62,-1.52 1.45,-1.51 3.51,-2.23 2.08,-0.65 2.07,-0.19 2.07,0.26 2.01,0.65 3.51,2.36 1.42,1.61 1.1,1.74 0.71,1.94 0.26,1.96 -0.64,3.85 -1.8,3.41 -2.52,2.39 -1.46,0.67 -1.42,0.26 z m -8.32,-14.09 -5.59,3.63 -1.92,-6.38 z"
+     id="path301-2" /><path
+     fill="#5b9bd5"
+     d="m 127.67702,1092.9145 21.61,-0.05 v -0.67 l -21.61,0.05 z m 20.28,3.62 5.32,-4.01 -5.34,-3.99 z"
+     id="path303-2" /><path
+     fill-rule="evenodd"
+     clip-rule="evenodd"
+     fill="#ffffff"
+     fill-opacity="0.651"
+     d="m 30.06541,1085.1649 15.03,-15.03 h 142.94 l 15.03,15.03 v 15.94 l -15.03,15.03 h -142.94 l -15.03,-15.03 z"
+     id="path305-2" /></g><text
+   font-family="Arial"
+   font-size="28.0101px"
+   id="text367-9"
+   x="38.159119"
+   y="1094.6525"
+   style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:28.0101px;font-family:Arial;-inkscape-font-specification:'Arial, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal"><tspan
+     sodipodi:role="line"
+     id="tspan1264"
+     x="38.159119"
+     y="1094.6525">Control Flow</tspan><tspan
+     sodipodi:role="line"
+     id="tspan1266"
+     x="38.159119"
+     y="1129.665">Region</tspan></text>
 </svg>
diff --git a/doc/sdfg/ir.rst b/doc/sdfg/ir.rst
index 3c651fab19..f7bbb0ff79 100644
--- a/doc/sdfg/ir.rst
+++ b/doc/sdfg/ir.rst
@@ -29,7 +29,7 @@ Some of the main differences between SDFGs and other representations are:
 The Language
 ------------
 
-In a nutshell, an SDFG is a state machine of acyclic dataflow multigraphs. Here is an example graph:
+In a nutshell, an SDFG is a hierarchical state machine of acyclic dataflow multigraphs. Here is an example graph:
 
 .. raw:: html
 
@@ -43,7 +43,7 @@ In a nutshell, an SDFG is a state machine of acyclic dataflow multigraphs. Here
 
 The cyan rectangles are called **states** and together they form a state machine, executing the code from the starting
 state and following the blue edge that matches the conditions. In each state, an acyclic multigraph controls execution
-through dataflow. There are four elements in the above state:
+through dataflow. There are four elements in the above states:
 
     * **Access nodes** (ovals) that give access to data containers
     * **Memlets** (edges/dotted arrows) that represent units of data movement
@@ -58,7 +58,14 @@ The state machine shown in the example is a for-loop (``for _ in range(5)``). Th
 the guard state controls the loop, and at the end the result is copied to the special ``__return`` data container, which
 designates the return value of the function.
 
-There are other kinds of elements in an SDFG, as detailed below.
+The state machine is analogous to a control flow graph, where states represent basic blocks. Multiple such basic blocks,
+such as with the described loop, can be put together to form a **control flow region**. This allows them to be
+represented with a single graph node in the SDFG's state machine, which is useful for optimization and analysis.
+The SDFG itself can be thought of as one big control flow region. This means that control flow regions are directed
+graphs, where nodes are states or other control flow regions, and edges are state transitions.
+
+In addition to the elements seen in the example above, there are other kinds of elements in an SDFG, which are detailed
+below.
 
 .. _sdfg-lang:
 
@@ -142,6 +149,12 @@ new value, and specifies how the update is performed. In the summation example,
 end of each state there is an implicit synchronization point, so it will not finish executing until all the last nodes
 have been reached (this assumption can be removed in extreme cases, see :class:`~dace.sdfg.state.SDFGState.nosync`).
 
+**Control Flow Region**: Forms a directed graph of states and other control flow regions, where edges are state
+transitions. This allows representing complex control flow in a single graph node, which is useful for analysis and
+optimization. The SDFG itself is a control flow region, which means that control flow regions are recursive /
+hierarchical. Similar to the SDFG, each control flow region has a unique starting state, which is the entry point to
+the region and is executed first.
+
 **State Transition**: Transitions, internally referred to as *inter-state edges*, specify how execution proceeds after
 the end of a State. Inter-state edges optionally contain a symbolic *condition* that is checked at the end of the
 preceding state. If any of the conditions are true, execution will continue to the destination of this edge (the
@@ -783,5 +796,7 @@ file uses the :func:`~dace.sdfg.sdfg.SDFG.from_file` static method. For example,
 
 The ``compress`` argument can be used to save a smaller (``gzip`` compressed) file. It can keep the same extension,
 but it is customary to use ``.sdfg.gz`` or ``.sdfgz`` to let others know it is compressed.
+It is recommended to use this option for large SDFGs, as it not only saves space, but also speeds up loading and
+editing of the SDFG in visualization tools and the VSCode extension.
 
 
diff --git a/requirements.txt b/requirements.txt
index 5f804e1b4c..27560949fb 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -14,9 +14,9 @@ Jinja2==3.1.2
 MarkupSafe==2.1.3
 mpmath==1.3.0
 networkx==3.1
-numpy==1.24.3
+numpy==1.26.1
 ply==3.11
-PyYAML==6.0
+PyYAML==6.0.1
 requests==2.31.0
 six==1.16.0
 sympy==1.9
diff --git a/tests/sdfg/nested_control_flow_regions_test.py b/tests/sdfg/nested_control_flow_regions_test.py
new file mode 100644
index 0000000000..f29c093dad
--- /dev/null
+++ b/tests/sdfg/nested_control_flow_regions_test.py
@@ -0,0 +1,18 @@
+# Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved.
+import pytest
+
+import dace
+
+
+def test_is_start_state_deprecation():
+    sdfg = dace.SDFG('deprecation_test')
+    with pytest.deprecated_call():
+        sdfg.add_state('state1', is_start_state=True)
+    sdfg2 = dace.SDFG('deprecation_test2')
+    state = dace.SDFGState('state2')
+    with pytest.deprecated_call():
+        sdfg2.add_node(state, is_start_state=True)
+
+
+if __name__ == '__main__':
+    test_is_start_state_deprecation()
diff --git a/tests/sdfg_validate_names_test.py b/tests/sdfg_validate_names_test.py
index dad79c8950..1650a4e4b1 100644
--- a/tests/sdfg_validate_names_test.py
+++ b/tests/sdfg_validate_names_test.py
@@ -28,7 +28,7 @@ def test_state_duplication(self):
             sdfg = dace.SDFG('ok')
             s1 = sdfg.add_state('also_ok')
             s2 = sdfg.add_state('also_ok')
-            s2.set_label('also_ok')
+            s2.label = 'also_ok'
             sdfg.add_edge(s1, s2, dace.InterstateEdge())
             sdfg.validate()
             self.fail('Failed to detect duplicate state')

From dff301c3d28c4cb3d0a6ba6c017bce22f941f6f6 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 2 Nov 2023 17:25:33 +0100
Subject: [PATCH 128/163] Bump werkzeug from 2.3.5 to 3.0.1 (#1409)

Bumps [werkzeug](https://github.com/pallets/werkzeug) from 2.3.5 to 3.0.1.
- [Release notes](https://github.com/pallets/werkzeug/releases)
- [Changelog](https://github.com/pallets/werkzeug/blob/main/CHANGES.rst)
- [Commits](https://github.com/pallets/werkzeug/compare/2.3.5...3.0.1)

---
updated-dependencies:
- dependency-name: werkzeug
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 27560949fb..266b3368c8 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -22,5 +22,5 @@ six==1.16.0
 sympy==1.9
 urllib3==2.0.7
 websockets==11.0.3
-Werkzeug==2.3.5
+Werkzeug==3.0.1
 zipp==3.15.0

From ab11b20a66e720b5250ab46580787b20c87418e2 Mon Sep 17 00:00:00 2001
From: matteonussbauemer <m.l.nussbaumer@student.tudelft.nl>
Date: Thu, 2 Nov 2023 18:43:32 +0100
Subject: [PATCH 129/163] set sympy version back to 1.9

---
 requirements.txt | 2 +-
 setup.py         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 12c50a2eb5..5f804e1b4c 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -19,7 +19,7 @@ ply==3.11
 PyYAML==6.0
 requests==2.31.0
 six==1.16.0
-sympy==1.12
+sympy==1.9
 urllib3==2.0.7
 websockets==11.0.3
 Werkzeug==2.3.5
diff --git a/setup.py b/setup.py
index cd5189437e..a0ac2e2d49 100644
--- a/setup.py
+++ b/setup.py
@@ -73,7 +73,7 @@
       },
       include_package_data=True,
       install_requires=[
-         'numpy', 'networkx >= 2.5', 'astunparse', 'sympy>=1.12', 'pyyaml', 'ply', 'websockets', 'requests', 'flask',
+         'numpy', 'networkx >= 2.5', 'astunparse', 'sympy<=1.9', 'pyyaml', 'ply', 'websockets', 'requests', 'flask',
           'fparser >= 0.1.3', 'aenum >= 3.1', 'dataclasses; python_version < "3.7"', 'dill',
           'pyreadline;platform_system=="Windows"', 'typing-compat; python_version < "3.8"'
       ] + cmake_requires,

From 5389a3136605f5ad59a0bd610eaca75906e1069c Mon Sep 17 00:00:00 2001
From: Tal Ben-Nun <tbennun@users.noreply.github.com>
Date: Thu, 2 Nov 2023 18:00:21 -0700
Subject: [PATCH 130/163] GPU code generation: User-specified block/thread/warp
 location (#1358)

* Remove persistent GPU kernel write scope heuristics
* Allow CUDA device-level tasklets to have user-specified block/thread/warp specialization
* Logic fixes for CPU dispatch in GPU code generator
---
 dace/codegen/targets/cuda.py | 90 ++++++++++++++++++++++++++++++++----
 tests/cuda_block_test.py     | 38 +++++++++++++++
 2 files changed, 120 insertions(+), 8 deletions(-)

diff --git a/dace/codegen/targets/cuda.py b/dace/codegen/targets/cuda.py
index a465d2bbc0..fb8ae90187 100644
--- a/dace/codegen/targets/cuda.py
+++ b/dace/codegen/targets/cuda.py
@@ -445,7 +445,7 @@ def node_dispatch_predicate(self, sdfg, state, node):
         if hasattr(node, 'schedule'):  # NOTE: Works on nodes and scopes
             if node.schedule in dtypes.GPU_SCHEDULES:
                 return True
-        if isinstance(node, nodes.NestedSDFG) and CUDACodeGen._in_device_code:
+        if CUDACodeGen._in_device_code:
             return True
         return False
 
@@ -1324,11 +1324,11 @@ def generate_devicelevel_state(self, sdfg, state, function_stream, callsite_stre
 
                     if write_scope == 'grid':
                         callsite_stream.write("if (blockIdx.x == 0 "
-                                            "&& threadIdx.x == 0) "
-                                            "{  // sub-graph begin", sdfg, state.node_id)
+                                              "&& threadIdx.x == 0) "
+                                              "{  // sub-graph begin", sdfg, state.node_id)
                     elif write_scope == 'block':
                         callsite_stream.write("if (threadIdx.x == 0) "
-                                            "{  // sub-graph begin", sdfg, state.node_id)
+                                              "{  // sub-graph begin", sdfg, state.node_id)
                     else:
                         callsite_stream.write("{  // subgraph begin", sdfg, state.node_id)
                 else:
@@ -2519,15 +2519,17 @@ def generate_devicelevel_scope(self, sdfg, dfg_scope, state_id, function_stream,
     def generate_node(self, sdfg, dfg, state_id, node, function_stream, callsite_stream):
         if self.node_dispatch_predicate(sdfg, dfg, node):
             # Dynamically obtain node generator according to class name
-            gen = getattr(self, '_generate_' + type(node).__name__)
-            gen(sdfg, dfg, state_id, node, function_stream, callsite_stream)
-            return
+            gen = getattr(self, '_generate_' + type(node).__name__, False)
+            if gen is not False:  # Not every node type has a code generator here
+                gen(sdfg, dfg, state_id, node, function_stream, callsite_stream)
+                return
 
         if not CUDACodeGen._in_device_code:
             self._cpu_codegen.generate_node(sdfg, dfg, state_id, node, function_stream, callsite_stream)
             return
 
-        self._locals.clear_scope(self._code_state.indentation + 1)
+        if isinstance(node, nodes.ExitNode):
+            self._locals.clear_scope(self._code_state.indentation + 1)
 
         if CUDACodeGen._in_device_code and isinstance(node, nodes.MapExit):
             return  # skip
@@ -2591,6 +2593,78 @@ def _generate_MapExit(self, sdfg, dfg, state_id, node, function_stream, callsite
 
         self._cpu_codegen._generate_MapExit(sdfg, dfg, state_id, node, function_stream, callsite_stream)
 
+    def _get_thread_id(self) -> str:
+        result = 'threadIdx.x'
+        if self._block_dims[1] != 1:
+            result += f' + ({sym2cpp(self._block_dims[0])}) * threadIdx.y'
+        if self._block_dims[2] != 1:
+            result += f' + ({sym2cpp(self._block_dims[0] * self._block_dims[1])}) * threadIdx.z'
+        return result
+
+    def _get_warp_id(self) -> str:
+        return f'(({self._get_thread_id()}) / warpSize)'
+
+    def _get_block_id(self) -> str:
+        result = 'blockIdx.x'
+        if self._block_dims[1] != 1:
+            result += f' + gridDim.x * blockIdx.y'
+        if self._block_dims[2] != 1:
+            result += f' + gridDim.x * gridDim.y * blockIdx.z'
+        return result
+
+    def _generate_condition_from_location(self, name: str, index_expr: str, node: nodes.Tasklet,
+                                          callsite_stream: CodeIOStream) -> str:
+        if name not in node.location:
+            return 0
+
+        location: Union[int, str, subsets.Range] = node.location[name]
+        if isinstance(location, str) and ':' in location:
+            location = subsets.Range.from_string(location)
+        elif symbolic.issymbolic(location):
+            location = sym2cpp(location)
+
+        if isinstance(location, subsets.Range):
+            # Range of indices
+            if len(location) != 1:
+                raise ValueError(f'Only one-dimensional ranges are allowed for {name} specialization, {location} given')
+            begin, end, stride = location[0]
+            rb, re, rs = sym2cpp(begin), sym2cpp(end), sym2cpp(stride)
+            cond = ''
+            cond += f'(({index_expr}) >= {rb}) && (({index_expr}) <= {re})'
+            if stride != 1:
+                cond += f' && ((({index_expr}) - {rb}) % {rs} == 0)'
+
+            callsite_stream.write(f'if ({cond}) {{')
+        else:
+            # Single-element
+            callsite_stream.write(f'if (({index_expr}) == {location}) {{')
+
+        return 1
+
+    def _generate_Tasklet(self, sdfg: SDFG, dfg, state_id: int, node: nodes.Tasklet, function_stream: CodeIOStream,
+                          callsite_stream: CodeIOStream):
+        generated_preamble_scopes = 0
+        if self._in_device_code:
+            # If location dictionary prescribes that the code should run on a certain group of threads/blocks,
+            # add condition
+            generated_preamble_scopes += self._generate_condition_from_location('gpu_thread', self._get_thread_id(),
+                                                                                node, callsite_stream)
+            generated_preamble_scopes += self._generate_condition_from_location('gpu_warp', self._get_warp_id(), node,
+                                                                                callsite_stream)
+            generated_preamble_scopes += self._generate_condition_from_location('gpu_block', self._get_block_id(), node,
+                                                                                callsite_stream)
+
+        # Call standard tasklet generation
+        old_codegen = self._cpu_codegen.calling_codegen
+        self._cpu_codegen.calling_codegen = self
+        self._cpu_codegen._generate_Tasklet(sdfg, dfg, state_id, node, function_stream, callsite_stream)
+        self._cpu_codegen.calling_codegen = old_codegen
+
+        if generated_preamble_scopes > 0:
+            # Generate appropriate postamble
+            for i in range(generated_preamble_scopes):
+                callsite_stream.write('}', sdfg, state_id, node)
+
     def make_ptr_vector_cast(self, *args, **kwargs):
         return cpp.make_ptr_vector_cast(*args, **kwargs)
 
diff --git a/tests/cuda_block_test.py b/tests/cuda_block_test.py
index f77e80673f..676785e0e5 100644
--- a/tests/cuda_block_test.py
+++ b/tests/cuda_block_test.py
@@ -10,8 +10,10 @@
 
 @dace.program(dace.float64[N], dace.float64[N])
 def cudahello(V, Vout):
+
     @dace.mapscope(_[0:N:32])
     def multiplication(i):
+
         @dace.map(_[0:32])
         def mult_block(bi):
             in_V << V[i + bi]
@@ -55,6 +57,7 @@ def test_gpu():
 
 @pytest.mark.gpu
 def test_different_block_sizes_nesting():
+
     @dace.program
     def nested(V: dace.float64[34], v1: dace.float64[1]):
         with dace.tasklet:
@@ -105,6 +108,7 @@ def diffblocks(V: dace.float64[130], v1: dace.float64[4], v2: dace.float64[128])
 
 @pytest.mark.gpu
 def test_custom_block_size_onemap():
+
     @dace.program
     def tester(A: dace.float64[400, 300]):
         for i, j in dace.map[0:400, 0:300]:
@@ -132,6 +136,7 @@ def tester(A: dace.float64[400, 300]):
 
 @pytest.mark.gpu
 def test_custom_block_size_twomaps():
+
     @dace.program
     def tester(A: dace.float64[400, 300, 2, 32]):
         for i, j in dace.map[0:400, 0:300]:
@@ -154,9 +159,42 @@ def tester(A: dace.float64[400, 300, 2, 32]):
     sdfg.compile()
 
 
+@pytest.mark.gpu
+def test_block_thread_specialization():
+
+    @dace.program
+    def tester(A: dace.float64[200]):
+        for i in dace.map[0:200:32]:
+            for bi in dace.map[0:32]:
+                with dace.tasklet:
+                    a >> A[i + bi]
+                    a = 1
+                with dace.tasklet:  # Tasklet to be specialized
+                    a >> A[i + bi]
+                    a = 2
+
+    sdfg = tester.to_sdfg()
+    sdfg.apply_gpu_transformations(sequential_innermaps=False)
+    tasklet = next(n for n, _ in sdfg.all_nodes_recursive()
+                   if isinstance(n, dace.nodes.Tasklet) and '2' in n.code.as_string)
+    tasklet.location['gpu_thread'] = dace.subsets.Range.from_string('2:9:3')
+    tasklet.location['gpu_block'] = 1
+
+    code = sdfg.generate_code()[1].clean_code  # Get GPU code (second file)
+    assert '>= 2' in code and '<= 8' in code
+    assert ' == 1' in code
+
+    a = np.random.rand(200)
+    ref = np.ones_like(a)
+    ref[32:64][2:9:3] = 2
+    sdfg(a)
+    assert np.allclose(a, ref)
+
+
 if __name__ == "__main__":
     test_cpu()
     test_gpu()
     test_different_block_sizes_nesting()
     test_custom_block_size_onemap()
     test_custom_block_size_twomaps()
+    test_block_thread_specialization()

From 9430e874dadcf77e45a03887e66fd6da4a9cc4b0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lukas=20Tr=C3=BCmper?= <lukas.truemper@outlook.de>
Date: Fri, 3 Nov 2023 09:07:31 +0100
Subject: [PATCH 131/163] AugAssignToWCR: Support for more cases and increased
 test coverage (#1359)

---
 .../transformation/dataflow/wcr_conversion.py | 152 ++++++-----
 tests/transformations/wcr_conversion_test.py  | 247 ++++++++++++++++++
 2 files changed, 332 insertions(+), 67 deletions(-)
 create mode 100644 tests/transformations/wcr_conversion_test.py

diff --git a/dace/transformation/dataflow/wcr_conversion.py b/dace/transformation/dataflow/wcr_conversion.py
index e95674adc1..7f4fbc654d 100644
--- a/dace/transformation/dataflow/wcr_conversion.py
+++ b/dace/transformation/dataflow/wcr_conversion.py
@@ -2,10 +2,14 @@
 """ Transformations to convert subgraphs to write-conflict resolutions. """
 import ast
 import re
-from dace import registry, nodes, dtypes
+import copy
+from dace import registry, nodes, dtypes, Memlet
 from dace.transformation import transformation, helpers as xfh
 from dace.sdfg import graph as gr, utils as sdutil
 from dace import SDFG, SDFGState
+from dace.sdfg.state import StateSubgraphView
+from dace.transformation import helpers
+from dace.sdfg.propagation import propagate_memlets_state
 
 
 class AugAssignToWCR(transformation.SingleStateTransformation):
@@ -20,6 +24,7 @@ class AugAssignToWCR(transformation.SingleStateTransformation):
     map_exit = transformation.PatternNode(nodes.MapExit)
 
     _EXPRESSIONS = ['+', '-', '*', '^', '%']  #, '/']
+    _FUNCTIONS = ['min', 'max']
     _EXPR_MAP = {'-': ('+', '-({expr})'), '/': ('*', '((decltype({expr}))1)/({expr})')}
     _PYOP_MAP = {ast.Add: '+', ast.Sub: '-', ast.Mult: '*', ast.BitXor: '^', ast.Mod: '%', ast.Div: '/'}
 
@@ -27,6 +32,7 @@ class AugAssignToWCR(transformation.SingleStateTransformation):
     def expressions(cls):
         return [
             sdutil.node_path_graph(cls.input, cls.tasklet, cls.output),
+            sdutil.node_path_graph(cls.input, cls.map_entry, cls.tasklet, cls.map_exit, cls.output)
         ]
 
     def can_be_applied(self, graph, expr_index, sdfg, permissive=False):
@@ -38,7 +44,6 @@ def can_be_applied(self, graph, expr_index, sdfg, permissive=False):
 
         # Free tasklet
         if expr_index == 0:
-            # Only free tasklets supported for now
             if graph.entry_node(tasklet) is not None:
                 return False
 
@@ -49,8 +54,6 @@ def can_be_applied(self, graph, expr_index, sdfg, permissive=False):
             # Make sure augmented assignment can be fissioned as necessary
             if any(not isinstance(e.src, nodes.AccessNode) for e in graph.in_edges(tasklet)):
                 return False
-            if graph.in_degree(inarr) > 0 and graph.out_degree(outarr) > 0:
-                return False
 
             outedge = graph.edges_between(tasklet, outarr)[0]
         else:  # Free map
@@ -65,12 +68,10 @@ def can_be_applied(self, graph, expr_index, sdfg, permissive=False):
             if len(graph.edges_between(tasklet, mx)) > 1:
                 return False
 
-            # Currently no fission is supported
+            # Make sure augmented assignment can be fissioned as necessary
             if any(e.src is not me and not isinstance(e.src, nodes.AccessNode)
                    for e in graph.in_edges(me) + graph.in_edges(tasklet)):
                 return False
-            if graph.in_degree(inarr) > 0:
-                return False
 
             outedge = graph.edges_between(tasklet, mx)[0]
 
@@ -78,6 +79,7 @@ def can_be_applied(self, graph, expr_index, sdfg, permissive=False):
         outconn = outedge.src_conn
 
         ops = '[%s]' % ''.join(re.escape(o) for o in AugAssignToWCR._EXPRESSIONS)
+        funcs = '|'.join(re.escape(o) for o in AugAssignToWCR._FUNCTIONS)
 
         if tasklet.language is dtypes.Language.Python:
             # Match a single assignment with a binary operation as RHS
@@ -108,18 +110,33 @@ def can_be_applied(self, graph, expr_index, sdfg, permissive=False):
                 # Try to match a single C assignment that can be converted to WCR
                 inconn = edge.dst_conn
                 lhs = r'^\s*%s\s*=\s*%s\s*%s.*;$' % (re.escape(outconn), re.escape(inconn), ops)
-                rhs = r'^\s*%s\s*=\s*.*%s\s*%s;$' % (re.escape(outconn), ops, re.escape(inconn))
-                if re.match(lhs, cstr) is None:
-                    continue
+                # rhs: a = (...) op b
+                rhs = r'^\s*%s\s*=\s*\(.*\)\s*%s\s*%s;$' % (re.escape(outconn), ops, re.escape(inconn))
+                func_lhs = r'^\s*%s\s*=\s*(%s)\(\s*%s\s*,.*\)\s*;$' % (re.escape(outconn), funcs, re.escape(inconn))
+                func_rhs = r'^\s*%s\s*=\s*(%s)\(.*,\s*%s\s*\)\s*;$' % (re.escape(outconn), funcs, re.escape(inconn))
+                if re.match(lhs, cstr) is None and re.match(rhs, cstr) is None:
+                    if re.match(func_lhs, cstr) is None and re.match(func_rhs, cstr) is None:
+                        inconns = list(self.tasklet.in_connectors)
+                        if len(inconns) != 2:
+                            continue
+
+                        # Special case: a = <other> op b
+                        other_inconn = inconns[0] if inconns[0] != inconn else inconns[1]
+                        rhs2 = r'^\s*%s\s*=\s*%s\s*%s\s*%s;$' % (re.escape(outconn), re.escape(other_inconn), ops,
+                                                                 re.escape(inconn))
+                        if re.match(rhs2, cstr) is None:
+                            continue
+
                 # Same memlet
                 if edge.data.subset != outedge.data.subset:
                     continue
 
                 # If in map, only match if the subset is independent of any
                 # map indices (otherwise no conflict)
-                if (expr_index == 1 and len(outedge.data.subset.free_symbols
-                                            & set(me.map.params)) == len(me.map.params)):
-                    continue
+                if expr_index == 1:
+                    if not permissive and len(outedge.data.subset.free_symbols & set(me.map.params)) == len(
+                            me.map.params):
+                        continue
 
                 return True
         else:
@@ -132,50 +149,22 @@ def apply(self, state: SDFGState, sdfg: SDFG):
         input: nodes.AccessNode = self.input
         tasklet: nodes.Tasklet = self.tasklet
         output: nodes.AccessNode = self.output
+        if self.expr_index == 1:
+            me = self.map_entry
+            mx = self.map_exit
 
         # If state fission is necessary to keep semantics, do it first
-        if (self.expr_index == 0 and state.in_degree(input) > 0 and state.out_degree(output) == 0):
-            newstate = sdfg.add_state_after(state)
-            newstate.add_node(tasklet)
-            new_input, new_output = None, None
-
-            # Keep old edges for after we remove tasklet from the original state
-            in_edges = list(state.in_edges(tasklet))
-            out_edges = list(state.out_edges(tasklet))
-
-            for e in in_edges:
-                r = newstate.add_read(e.src.data)
-                newstate.add_edge(r, e.src_conn, e.dst, e.dst_conn, e.data)
-                if e.src is input:
-                    new_input = r
-            for e in out_edges:
-                w = newstate.add_write(e.dst.data)
-                newstate.add_edge(e.src, e.src_conn, w, e.dst_conn, e.data)
-                if e.dst is output:
-                    new_output = w
-
-            # Remove tasklet and resulting isolated nodes
-            state.remove_node(tasklet)
-            for e in in_edges:
-                if state.degree(e.src) == 0:
-                    state.remove_node(e.src)
-            for e in out_edges:
-                if state.degree(e.dst) == 0:
-                    state.remove_node(e.dst)
-
-            # Reset state and nodes for rest of transformation
-            input = new_input
-            output = new_output
-            state = newstate
-        # End of state fission
+        if state.in_degree(input) > 0:
+            subgraph_nodes = set([e.src for e in state.bfs_edges(input, reverse=True)])
+            subgraph_nodes.add(input)
+
+            subgraph = StateSubgraphView(state, subgraph_nodes)
+            helpers.state_fission(sdfg, subgraph)
 
         if self.expr_index == 0:
             inedges = state.edges_between(input, tasklet)
             outedge = state.edges_between(tasklet, output)[0]
         else:
-            me = self.map_entry
-            mx = self.map_exit
-
             inedges = state.edges_between(me, tasklet)
             outedge = state.edges_between(tasklet, mx)[0]
 
@@ -183,6 +172,7 @@ def apply(self, state: SDFGState, sdfg: SDFG):
         outconn = outedge.src_conn
 
         ops = '[%s]' % ''.join(re.escape(o) for o in AugAssignToWCR._EXPRESSIONS)
+        funcs = '|'.join(re.escape(o) for o in AugAssignToWCR._FUNCTIONS)
 
         # Change tasklet code
         if tasklet.language is dtypes.Language.Python:
@@ -206,13 +196,40 @@ def apply(self, state: SDFGState, sdfg: SDFG):
                 inconn = edge.dst_conn
                 match = re.match(r'^\s*%s\s*=\s*%s\s*(%s)(.*);$' % (re.escape(outconn), re.escape(inconn), ops), cstr)
                 if match is None:
-                    # match = re.match(
-                    #     r'^\s*%s\s*=\s*(.*)\s*(%s)\s*%s;$' %
-                    #     (re.escape(outconn), ops, re.escape(inconn)), cstr)
-                    # if match is None:
-                    continue
-                    # op = match.group(2)
-                    # expr = match.group(1)
+                    match = re.match(
+                            r'^\s*%s\s*=\s*\((.*)\)\s*(%s)\s*%s;$' % (re.escape(outconn), ops, re.escape(inconn)), cstr)
+                    if match is None:
+                        func_rhs = r'^\s*%s\s*=\s*(%s)\((.*),\s*%s\s*\)\s*;$' % (re.escape(outconn), funcs,
+                                                                                 re.escape(inconn))
+                        match = re.match(func_rhs, cstr)
+                        if match is None:
+                            func_lhs = r'^\s*%s\s*=\s*(%s)\(\s*%s\s*,(.*)\)\s*;$' % (re.escape(outconn), funcs,
+                                                                                     re.escape(inconn))
+                            match = re.match(func_lhs, cstr)
+                            if match is None:
+                                inconns = list(self.tasklet.in_connectors)
+                                if len(inconns) != 2:
+                                    continue
+
+                                # Special case: a = <other> op b
+                                other_inconn = inconns[0] if inconns[0] != inconn else inconns[1]
+                                rhs2 = r'^\s*%s\s*=\s*(%s)\s*(%s)\s*%s;$' % (
+                                    re.escape(outconn), re.escape(other_inconn), ops, re.escape(inconn))
+                                match = re.match(rhs2, cstr)
+                                if match is None:
+                                    continue
+                                else:
+                                    op = match.group(2)
+                                    expr = match.group(1)
+                            else:
+                                op = match.group(1)
+                                expr = match.group(2)
+                        else:
+                            op = match.group(1)
+                            expr = match.group(2)
+                    else:
+                        op = match.group(2)
+                        expr = match.group(1)
                 else:
                     op = match.group(1)
                     expr = match.group(2)
@@ -232,16 +249,14 @@ def apply(self, state: SDFGState, sdfg: SDFG):
             raise NotImplementedError
 
         # Change output edge
-        outedge.data.wcr = f'lambda a,b: a {op} b'
-
-        if self.expr_index == 0:
-            # Remove input node and connector
-            state.remove_edge_and_connectors(inedge)
-            if state.degree(input) == 0:
-                state.remove_node(input)
+        if op in AugAssignToWCR._FUNCTIONS:
+            outedge.data.wcr = f'lambda a,b: {op}(a, b)'
         else:
-            # Remove input edge and dst connector, but not necessarily src
-            state.remove_memlet_path(inedge)
+            outedge.data.wcr = f'lambda a,b: a {op} b'
+
+        # Remove input node and connector
+        state.remove_memlet_path(inedge)
+        propagate_memlets_state(sdfg, state)
 
         # If outedge leads to non-transient, and this is a nested SDFG,
         # propagate outwards
@@ -252,6 +267,9 @@ def apply(self, state: SDFGState, sdfg: SDFG):
             sd = sd.parent_sdfg
             outedge = next(iter(nstate.out_edges_by_connector(nsdfg, outedge.data.data)))
             for outedge in nstate.memlet_path(outedge):
-                outedge.data.wcr = f'lambda a,b: a {op} b'
+                if op in AugAssignToWCR._FUNCTIONS:
+                    outedge.data.wcr = f'lambda a,b: {op}(a, b)'
+                else:
+                    outedge.data.wcr = f'lambda a,b: a {op} b'
             # At this point we are leading to an access node again and can
             # traverse further up
diff --git a/tests/transformations/wcr_conversion_test.py b/tests/transformations/wcr_conversion_test.py
new file mode 100644
index 0000000000..091b2a9db8
--- /dev/null
+++ b/tests/transformations/wcr_conversion_test.py
@@ -0,0 +1,247 @@
+import dace
+
+from dace.transformation.dataflow import AugAssignToWCR
+
+
+def test_aug_assign_tasklet_lhs():
+
+    @dace.program
+    def sdfg_aug_assign_tasklet_lhs(A: dace.float64[32], B: dace.float64[32]):
+        for i in range(32):
+            with dace.tasklet:
+                a << A[i]
+                k << B[i]
+                b >> A[i]
+                b = a + k
+
+    sdfg = sdfg_aug_assign_tasklet_lhs.to_sdfg()
+    sdfg.simplify()
+
+    applied = sdfg.apply_transformations_repeated(AugAssignToWCR)
+    assert applied == 1
+
+
+def test_aug_assign_tasklet_lhs_brackets():
+
+    @dace.program
+    def sdfg_aug_assign_tasklet_lhs_brackets(A: dace.float64[32], B: dace.float64[32]):
+        for i in range(32):
+            with dace.tasklet:
+                a << A[i]
+                k << B[i]
+                b >> A[i]
+                b = a + (k + 1)
+
+    sdfg = sdfg_aug_assign_tasklet_lhs_brackets.to_sdfg()
+    sdfg.simplify()
+
+    applied = sdfg.apply_transformations_repeated(AugAssignToWCR)
+    assert applied == 1
+
+
+def test_aug_assign_tasklet_rhs():
+
+    @dace.program
+    def sdfg_aug_assign_tasklet_rhs(A: dace.float64[32], B: dace.float64[32]):
+        for i in range(32):
+            with dace.tasklet:
+                a << A[i]
+                k << B[i]
+                b >> A[i]
+                b = k + a
+
+    sdfg = sdfg_aug_assign_tasklet_rhs.to_sdfg()
+    sdfg.simplify()
+
+    applied = sdfg.apply_transformations_repeated(AugAssignToWCR)
+    assert applied == 1
+
+
+def test_aug_assign_tasklet_rhs_brackets():
+
+    @dace.program
+    def sdfg_aug_assign_tasklet_rhs_brackets(A: dace.float64[32], B: dace.float64[32]):
+        for i in range(32):
+            with dace.tasklet:
+                a << A[i]
+                k << B[i]
+                b >> A[i]
+                b = (k + 1) + a
+
+    sdfg = sdfg_aug_assign_tasklet_rhs_brackets.to_sdfg()
+    sdfg.simplify()
+
+    applied = sdfg.apply_transformations_repeated(AugAssignToWCR)
+    assert applied == 1
+
+
+def test_aug_assign_tasklet_lhs_cpp():
+
+    @dace.program
+    def sdfg_aug_assign_tasklet_lhs_cpp(A: dace.float64[32], B: dace.float64[32]):
+        for i in range(32):
+            with dace.tasklet(language=dace.Language.CPP):
+                a << A[i]
+                k << B[i]
+                b >> A[i]
+                """
+                b = a + k;
+                """
+
+    sdfg = sdfg_aug_assign_tasklet_lhs_cpp.to_sdfg()
+    sdfg.simplify()
+
+    applied = sdfg.apply_transformations_repeated(AugAssignToWCR)
+    assert applied == 1
+
+
+def test_aug_assign_tasklet_lhs_brackets_cpp():
+
+    @dace.program
+    def sdfg_aug_assign_tasklet_lhs_brackets_cpp(A: dace.float64[32], B: dace.float64[32]):
+        for i in range(32):
+            with dace.tasklet(language=dace.Language.CPP):
+                a << A[i]
+                k << B[i]
+                b >> A[i]
+                """
+                b = a + (k + 1);
+                """
+
+    sdfg = sdfg_aug_assign_tasklet_lhs_brackets_cpp.to_sdfg()
+    sdfg.simplify()
+
+    applied = sdfg.apply_transformations_repeated(AugAssignToWCR)
+    assert applied == 1
+
+
+def test_aug_assign_tasklet_rhs_brackets_cpp():
+
+    @dace.program
+    def sdfg_aug_assign_tasklet_rhs_brackets_cpp(A: dace.float64[32], B: dace.float64[32]):
+        for i in range(32):
+            with dace.tasklet(language=dace.Language.CPP):
+                a << A[i]
+                k << B[i]
+                b >> A[i]
+                """
+                b = (k + 1) + a;
+                """
+
+    sdfg = sdfg_aug_assign_tasklet_rhs_brackets_cpp.to_sdfg()
+    sdfg.simplify()
+
+    applied = sdfg.apply_transformations_repeated(AugAssignToWCR)
+    assert applied == 1
+
+
+def test_aug_assign_tasklet_func_lhs_cpp():
+
+    @dace.program
+    def sdfg_aug_assign_tasklet_func_lhs_cpp(A: dace.float64[32], B: dace.float64[32]):
+        for i in range(32):
+            with dace.tasklet(language=dace.Language.CPP):
+                a << A[i]
+                c << B[i]
+                b >> A[i]
+                """
+                b = min(a, c);
+                """
+
+    sdfg = sdfg_aug_assign_tasklet_func_lhs_cpp.to_sdfg()
+    sdfg.simplify()
+
+    applied = sdfg.apply_transformations_repeated(AugAssignToWCR)
+    assert applied == 1
+
+
+def test_aug_assign_tasklet_func_rhs_cpp():
+
+    @dace.program
+    def sdfg_aug_assign_tasklet_func_rhs_cpp(A: dace.float64[32], B: dace.float64[32]):
+        for i in range(32):
+            with dace.tasklet(language=dace.Language.CPP):
+                a << A[i]
+                c << B[i]
+                b >> A[i]
+                """
+                b = min(c, a);
+                """
+
+    sdfg = sdfg_aug_assign_tasklet_func_rhs_cpp.to_sdfg()
+    sdfg.simplify()
+
+    applied = sdfg.apply_transformations_repeated(AugAssignToWCR)
+    assert applied == 1
+
+
+def test_aug_assign_free_map():
+
+    @dace.program
+    def sdfg_aug_assign_free_map(A: dace.float64[32], B: dace.float64[32]):
+        for i in dace.map[0:32]:
+            with dace.tasklet(language=dace.Language.CPP):
+                a << A[0]
+                k << B[i]
+                b >> A[0]
+                """
+                b = k * a;
+                """
+
+    sdfg = sdfg_aug_assign_free_map.to_sdfg()
+    sdfg.simplify()
+
+    applied = sdfg.apply_transformations_repeated(AugAssignToWCR)
+    assert applied == 1
+
+
+def test_aug_assign_state_fission_map():
+
+    @dace.program
+    def sdfg_aug_assign_state_fission(A: dace.float64[32], B: dace.float64[32]):
+        for i in dace.map[0:32]:
+            with dace.tasklet:
+                a << B[i]
+                b >> A[i]
+                b = a
+
+        for i in dace.map[0:32]:
+            with dace.tasklet:
+                a << A[0]
+                b >> A[0]
+                b = a * 2
+
+        for i in dace.map[0:32]:
+            with dace.tasklet:
+                a << A[0]
+                b >> A[0]
+                b = a * 2
+
+    sdfg = sdfg_aug_assign_state_fission.to_sdfg()
+    sdfg.simplify()
+
+    applied = sdfg.apply_transformations_repeated(AugAssignToWCR)
+    assert applied == 2
+
+
+def test_free_map_permissive():
+
+    @dace.program
+    def sdfg_free_map_permissive(A: dace.float64[32], B: dace.float64[32]):
+        for i in dace.map[0:32]:
+            with dace.tasklet(language=dace.Language.CPP):
+                a << A[i]
+                k << B[i]
+                b >> A[i]
+                """
+                b = k * a;
+                """
+
+    sdfg = sdfg_free_map_permissive.to_sdfg()
+    sdfg.simplify()
+
+    applied = sdfg.apply_transformations_repeated(AugAssignToWCR, permissive=False)
+    assert applied == 0
+
+    applied = sdfg.apply_transformations_repeated(AugAssignToWCR, permissive=True)
+    assert applied == 1

From 1c36f61ab84e8ee52944b14d104a66124b0bfe21 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lukas=20Tr=C3=BCmper?= <lukas.truemper@outlook.de>
Date: Mon, 6 Nov 2023 15:35:01 +0100
Subject: [PATCH 132/163] OTFMapFusion: Bugfix for tasklets with None
 connectors (#1415)

---
 .../transformation/dataflow/otf_map_fusion.py | 17 +++++-----
 tests/transformations/otf_map_fusion_test.py  | 31 +++++++++++++++++++
 2 files changed, 41 insertions(+), 7 deletions(-)

diff --git a/dace/transformation/dataflow/otf_map_fusion.py b/dace/transformation/dataflow/otf_map_fusion.py
index b2e5710942..f41e3b4e0b 100644
--- a/dace/transformation/dataflow/otf_map_fusion.py
+++ b/dace/transformation/dataflow/otf_map_fusion.py
@@ -289,14 +289,17 @@ def apply(self, graph: SDFGState, sdfg: SDFG):
                     for edge in graph.edges_between(first_map_entry, node):
                         memlet = copy.deepcopy(edge.data)
 
-                        in_connector = edge.src_conn.replace("OUT", "IN")
-                        if in_connector in connector_mapping:
-                            out_connector = connector_mapping[in_connector].replace("IN", "OUT")
+                        if edge.src_conn is not None:
+                            in_connector = edge.src_conn.replace("OUT", "IN")
+                            if in_connector in connector_mapping:
+                                out_connector = connector_mapping[in_connector].replace("IN", "OUT")
+                            else:
+                                out_connector = edge.src_conn
+
+                            if out_connector not in self.second_map_entry.out_connectors:
+                                self.second_map_entry.add_out_connector(out_connector)
                         else:
-                            out_connector = edge.src_conn
-
-                        if out_connector not in self.second_map_entry.out_connectors:
-                            self.second_map_entry.add_out_connector(out_connector)
+                            out_connector = None
 
                         graph.add_edge(self.second_map_entry, out_connector, node, edge.dst_conn, memlet)
                         graph.remove_edge(edge)
diff --git a/tests/transformations/otf_map_fusion_test.py b/tests/transformations/otf_map_fusion_test.py
index eb871566d1..4786901887 100644
--- a/tests/transformations/otf_map_fusion_test.py
+++ b/tests/transformations/otf_map_fusion_test.py
@@ -330,6 +330,36 @@ def test_trivial_fusion_nested_sdfg():
     assert (res == res_fused).all()
 
 
+@dace.program
+def trivial_fusion_none_connectors(B: dace.float64[10, 20]):
+    tmp = dace.define_local([10, 20], dtype=B.dtype)
+    for i, j in dace.map[0:10, 0:20]:
+        with dace.tasklet:
+            b >> tmp[i, j]
+            b = 0
+
+    for i, j in dace.map[0:10, 0:20]:
+        with dace.tasklet:
+            a << tmp[i, j]
+            b >> B[i, j]
+            b = a + 2
+
+
+def test_trivial_fusion_none_connectors():
+    sdfg = trivial_fusion_none_connectors.to_sdfg()
+    sdfg.simplify()
+    assert count_maps(sdfg) == 2
+
+    sdfg.apply_transformations(OTFMapFusion)
+    assert count_maps(sdfg) == 1
+
+    B = np.zeros((10, 20))
+    ref = np.zeros((10, 20)) + 2
+
+    sdfg(B=B)
+    assert np.allclose(B, ref)
+
+
 @dace.program
 def undefined_subset(A: dace.float64[10], B: dace.float64[10]):
     tmp = dace.define_local([10], dtype=A.dtype)
@@ -703,6 +733,7 @@ def test_hdiff():
     test_trivial_fusion_permute()
     test_trivial_fusion_not_remove_map()
     test_trivial_fusion_nested_sdfg()
+    test_trivial_fusion_none_connectors()
 
     # Defined subsets
     test_undefined_subset()

From f3781b105435bf36219ff5c5283b4c99e8e6cf35 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Philip=20M=C3=BCller?=
 <147368808+philip-paul-mueller@users.noreply.github.com>
Date: Tue, 7 Nov 2023 09:17:16 +0100
Subject: [PATCH 133/163] Better mangling of the state struct in the code
 generator (#1413)

This PR addresses issue #1396, by modifying the way how the name of the
state struct is created.
---
 dace/codegen/compiled_sdfg.py                 |  3 ++-
 .../codegen/instrumentation/data/data_dump.py |  2 +-
 dace/codegen/targets/cpp.py                   | 16 ++++++++++++
 dace/codegen/targets/cpu.py                   |  2 +-
 dace/codegen/targets/cuda.py                  | 23 +++++++---------
 dace/codegen/targets/fpga.py                  |  2 +-
 dace/codegen/targets/framecode.py             | 26 ++++++++++---------
 dace/codegen/targets/intel_fpga.py            |  7 +++--
 dace/codegen/targets/mpi.py                   | 11 ++++----
 dace/codegen/targets/xilinx.py                |  6 +++--
 dace/config_schema.yml                        |  9 +++++++
 11 files changed, 67 insertions(+), 40 deletions(-)

diff --git a/dace/codegen/compiled_sdfg.py b/dace/codegen/compiled_sdfg.py
index 8a132f3df3..7de385cead 100644
--- a/dace/codegen/compiled_sdfg.py
+++ b/dace/codegen/compiled_sdfg.py
@@ -239,6 +239,7 @@ def get_state_struct(self) -> ctypes.Structure:
         return ctypes.cast(self._libhandle, ctypes.POINTER(self._try_parse_state_struct())).contents
 
     def _try_parse_state_struct(self) -> Optional[Type[ctypes.Structure]]:
+        from dace.codegen.targets.cpp import mangle_dace_state_struct_name  # Avoid import cycle
         # the path of the main sdfg file containing the state struct
         main_src_path = os.path.join(os.path.dirname(os.path.dirname(self._lib._library_filename)), "src", "cpu",
                                      self._sdfg.name + ".cpp")
@@ -247,7 +248,7 @@ def _try_parse_state_struct(self) -> Optional[Type[ctypes.Structure]]:
         code_flat = code.replace("\n", " ")
 
         # try to find the first struct definition that matches the name we are looking for in the sdfg file
-        match = re.search(f"struct {self._sdfg.name}_t {{(.*?)}};", code_flat)
+        match = re.search(f"struct {mangle_dace_state_struct_name(self._sdfg)} {{(.*?)}};", code_flat)
         if match is None or len(match.groups()) != 1:
             return None
 
diff --git a/dace/codegen/instrumentation/data/data_dump.py b/dace/codegen/instrumentation/data/data_dump.py
index 859f78bd79..2217524d19 100644
--- a/dace/codegen/instrumentation/data/data_dump.py
+++ b/dace/codegen/instrumentation/data/data_dump.py
@@ -195,7 +195,7 @@ def __init__(self):
 
     def _generate_report_setter(self, sdfg: SDFG) -> str:
         return f'''
-        DACE_EXPORTED void __dace_set_instrumented_data_report({sdfg.name}_t *__state, const char *dirpath) {{
+        DACE_EXPORTED void __dace_set_instrumented_data_report({cpp.mangle_dace_state_struct_name(sdfg)} *__state, const char *dirpath) {{
             __state->serializer->set_folder(dirpath);
         }}
         '''
diff --git a/dace/codegen/targets/cpp.py b/dace/codegen/targets/cpp.py
index 3d26f76214..9687fb1783 100644
--- a/dace/codegen/targets/cpp.py
+++ b/dace/codegen/targets/cpp.py
@@ -34,6 +34,22 @@
     from dace.codegen.dispatcher import TargetDispatcher
 
 
+def mangle_dace_state_struct_name(sdfg: Union[SDFG, str]) -> str:
+    """This function creates a unique type name for the `SDFG`'s state `struct`.
+
+    The function uses the `compiler.codegen_state_struct_suffix`
+    configuration entry for deriving the type name of the state `struct`.
+
+    :param sdfg:    The SDFG for which the name should be generated.
+    """
+    name = sdfg if isinstance(sdfg, str) else sdfg.name
+    state_suffix = Config.get("compiler", "codegen_state_struct_suffix")
+    type_name = f"{name}{state_suffix}"
+    if not dtypes.validate_name(type_name):
+        raise ValueError(f"The mangled type name `{type_name}` of the state struct of SDFG '{name}' is invalid.")
+    return type_name
+
+
 def copy_expr(
     dispatcher,
     sdfg,
diff --git a/dace/codegen/targets/cpu.py b/dace/codegen/targets/cpu.py
index 88dda0058f..72ca554a4a 100644
--- a/dace/codegen/targets/cpu.py
+++ b/dace/codegen/targets/cpu.py
@@ -1490,7 +1490,7 @@ def generate_nsdfg_header(self, sdfg, state, state_id, node, memlet_references,
 
         if state_struct:
             toplevel_sdfg: SDFG = sdfg.sdfg_list[0]
-            arguments.append(f'{toplevel_sdfg.name}_t *__state')
+            arguments.append(f'{cpp.mangle_dace_state_struct_name(toplevel_sdfg)} *__state')
 
         # Add "__restrict__" keywords to arguments that do not alias with others in the context of this SDFG
         restrict_args = []
diff --git a/dace/codegen/targets/cuda.py b/dace/codegen/targets/cuda.py
index fb8ae90187..b729b34088 100644
--- a/dace/codegen/targets/cuda.py
+++ b/dace/codegen/targets/cuda.py
@@ -1,11 +1,8 @@
 # Copyright 2019-2021 ETH Zurich and the DaCe authors. All rights reserved.
-import ast
-import copy
 import ctypes
 import functools
-import os
 import warnings
-from typing import Any, Dict, List, Set, Tuple, Union
+from typing import Dict, List, Set, Tuple, Union
 
 import networkx as nx
 import sympy
@@ -14,7 +11,6 @@
 import dace
 from dace import data as dt
 from dace import dtypes, registry
-from dace import sdfg as sd
 from dace import subsets, symbolic
 from dace.codegen import common, cppunparse
 from dace.codegen.codeobject import CodeObject
@@ -23,7 +19,7 @@
 from dace.codegen.targets import cpp
 from dace.codegen.common import update_persistent_desc
 from dace.codegen.targets.cpp import (codeblock_to_cpp, cpp_array_expr, memlet_copy_to_absolute_strides, sym2cpp,
-                                      synchronize_streams, unparse_cr, unparse_cr_split)
+                                      synchronize_streams, unparse_cr, mangle_dace_state_struct_name)
 from dace.codegen.targets.target import IllegalCopy, TargetCodeGenerator, make_absolute
 from dace.config import Config
 from dace.frontend import operations
@@ -345,12 +341,12 @@ def get_generated_codeobjects(self):
 
 {file_header}
 
-DACE_EXPORTED int __dace_init_cuda({sdfg.name}_t *__state{params});
-DACE_EXPORTED int __dace_exit_cuda({sdfg.name}_t *__state);
+DACE_EXPORTED int __dace_init_cuda({sdfg_state_name} *__state{params});
+DACE_EXPORTED int __dace_exit_cuda({sdfg_state_name} *__state);
 
 {other_globalcode}
 
-int __dace_init_cuda({sdfg.name}_t *__state{params}) {{
+int __dace_init_cuda({sdfg_state_name} *__state{params}) {{
     int count;
 
     // Check that we are able to run {backend} code
@@ -389,7 +385,7 @@ def get_generated_codeobjects(self):
     return 0;
 }}
 
-int __dace_exit_cuda({sdfg.name}_t *__state) {{
+int __dace_exit_cuda({sdfg_state_name} *__state) {{
     {exitcode}
 
     // Synchronize and check for CUDA errors
@@ -409,7 +405,7 @@ def get_generated_codeobjects(self):
     return __err;
 }}
 
-DACE_EXPORTED bool __dace_gpu_set_stream({sdfg.name}_t *__state, int streamid, gpuStream_t stream)
+DACE_EXPORTED bool __dace_gpu_set_stream({sdfg_state_name} *__state, int streamid, gpuStream_t stream)
 {{
     if (streamid < 0 || streamid >= {nstreams})
         return false;
@@ -419,7 +415,7 @@ def get_generated_codeobjects(self):
     return true;
 }}
 
-DACE_EXPORTED void __dace_gpu_set_all_streams({sdfg.name}_t *__state, gpuStream_t stream)
+DACE_EXPORTED void __dace_gpu_set_all_streams({sdfg_state_name} *__state, gpuStream_t stream)
 {{
     for (int i = 0; i < {nstreams}; ++i)
         __state->gpu_context->streams[i] = stream;
@@ -427,6 +423,7 @@ def get_generated_codeobjects(self):
 
 {localcode}
 """.format(params=params_comma,
+           sdfg_state_name=mangle_dace_state_struct_name(self._global_sdfg),
            initcode=initcode.getvalue(),
            exitcode=exitcode.getvalue(),
            other_globalcode=self._globalcode.getvalue(),
@@ -1567,7 +1564,7 @@ def generate_scope(self, sdfg, dfg_scope, state_id, function_stream, callsite_st
         self.scope_entry_stream = old_entry_stream
         self.scope_exit_stream = old_exit_stream
 
-        state_param = [f'{self._global_sdfg.name}_t *__state']
+        state_param = [f'{mangle_dace_state_struct_name(self._global_sdfg)} *__state']
 
         # Write callback function definition
         self._localcode.write(
diff --git a/dace/codegen/targets/fpga.py b/dace/codegen/targets/fpga.py
index 413cb751d6..8df8fe94fa 100644
--- a/dace/codegen/targets/fpga.py
+++ b/dace/codegen/targets/fpga.py
@@ -652,7 +652,7 @@ def generate_state(self, sdfg: dace.SDFG, state: dace.SDFGState, function_stream
             kernel_args_opencl = []
 
             # Include state in args
-            kernel_args_opencl.append(f"{self._global_sdfg.name}_t *__state")
+            kernel_args_opencl.append(f"{cpp.mangle_dace_state_struct_name(self._global_sdfg)} *__state")
             kernel_args_call_host.append(f"__state")
 
             for is_output, arg_name, arg, interface_id in state_parameters:
diff --git a/dace/codegen/targets/framecode.py b/dace/codegen/targets/framecode.py
index b1eb42fe60..0db4062976 100644
--- a/dace/codegen/targets/framecode.py
+++ b/dace/codegen/targets/framecode.py
@@ -131,6 +131,7 @@ def generate_fileheader(self, sdfg: SDFG, global_stream: CodeIOStream, backend:
             :param global_stream: Stream to write to (global).
             :param backend: Whose backend this header belongs to.
         """
+        from dace.codegen.targets.cpp import mangle_dace_state_struct_name      # Avoid circular import
         # Hash file include
         if backend == 'frame':
             global_stream.write('#include "../../include/hash.h"\n', sdfg)
@@ -181,7 +182,7 @@ def _emit_definitions(dtype: dtypes.typeclass, wrote_something: bool) -> bool:
         # Write state struct
         structstr = '\n'.join(self.statestruct)
         global_stream.write(f'''
-struct {sdfg.name}_t {{
+struct {mangle_dace_state_struct_name(sdfg)} {{
     {structstr}
 }};
 
@@ -226,6 +227,7 @@ def generate_footer(self, sdfg: SDFG, global_stream: CodeIOStream, callsite_stre
             :param callsite_stream: Stream to write to (at call site).
         """
         import dace.library
+        from dace.codegen.targets.cpp import mangle_dace_state_struct_name      # Avoid circular import
         fname = sdfg.name
         params = sdfg.signature(arglist=self.arglist)
         paramnames = sdfg.signature(False, for_call=True, arglist=self.arglist)
@@ -255,7 +257,7 @@ def generate_footer(self, sdfg: SDFG, global_stream: CodeIOStream, callsite_stre
         initparamnames_comma = (', ' + initparamnames) if initparamnames else ''
         callsite_stream.write(
             f'''
-DACE_EXPORTED void __program_{fname}({fname}_t *__state{params_comma})
+DACE_EXPORTED void __program_{fname}({mangle_dace_state_struct_name(fname)} *__state{params_comma})
 {{
     __program_{fname}_internal(__state{paramnames_comma});
 }}''', sdfg)
@@ -263,18 +265,17 @@ def generate_footer(self, sdfg: SDFG, global_stream: CodeIOStream, callsite_stre
         for target in self._dispatcher.used_targets:
             if target.has_initializer:
                 callsite_stream.write(
-                    'DACE_EXPORTED int __dace_init_%s(%s_t *__state%s);\n' %
-                    (target.target_name, sdfg.name, initparams_comma), sdfg)
+                    f'DACE_EXPORTED int __dace_init_{target.target_name}({mangle_dace_state_struct_name(sdfg)} *__state{initparams_comma});\n', sdfg)
             if target.has_finalizer:
                 callsite_stream.write(
-                    'DACE_EXPORTED int __dace_exit_%s(%s_t *__state);\n' % (target.target_name, sdfg.name), sdfg)
+                    f'DACE_EXPORTED int __dace_exit_{target.target_name}({mangle_dace_state_struct_name(sdfg)} *__state);\n', sdfg)
 
         callsite_stream.write(
             f"""
-DACE_EXPORTED {sdfg.name}_t *__dace_init_{sdfg.name}({initparams})
+DACE_EXPORTED {mangle_dace_state_struct_name(sdfg)} *__dace_init_{sdfg.name}({initparams})
 {{
     int __result = 0;
-    {sdfg.name}_t *__state = new {sdfg.name}_t;
+    {mangle_dace_state_struct_name(sdfg)} *__state = new {mangle_dace_state_struct_name(sdfg)};
 
             """, sdfg)
 
@@ -306,7 +307,7 @@ def generate_footer(self, sdfg: SDFG, global_stream: CodeIOStream, callsite_stre
     return __state;
 }}
 
-DACE_EXPORTED int __dace_exit_{sdfg.name}({sdfg.name}_t *__state)
+DACE_EXPORTED int __dace_exit_{sdfg.name}({mangle_dace_state_struct_name(sdfg)} *__state)
 {{
     int __err = 0;
 """, sdfg)
@@ -352,6 +353,7 @@ def generate_external_memory_management(self, sdfg: SDFG, callsite_stream: CodeI
         can be ``CPU_Heap`` or any other ``dtypes.StorageType``); and (2) set the externally-allocated
         pointer to the generated code's internal state (``__dace_set_external_memory_<STORAGE>``).
         """
+        from dace.codegen.targets.cpp import mangle_dace_state_struct_name      # Avoid circular import
         
         # Collect external arrays
         ext_arrays: Dict[dtypes.StorageType, List[Tuple[SDFG, str, data.Data]]] = collections.defaultdict(list)
@@ -374,7 +376,7 @@ def generate_external_memory_management(self, sdfg: SDFG, callsite_stream: CodeI
             # Size query functions
             callsite_stream.write(
                 f'''
-DACE_EXPORTED size_t __dace_get_external_memory_size_{storage.name}({sdfg.name}_t *__state{initparams_comma})
+DACE_EXPORTED size_t __dace_get_external_memory_size_{storage.name}({mangle_dace_state_struct_name(sdfg)} *__state{initparams_comma})
 {{
     return {sym2cpp(size)};
 }}
@@ -383,7 +385,7 @@ def generate_external_memory_management(self, sdfg: SDFG, callsite_stream: CodeI
             # Pointer set functions
             callsite_stream.write(
                 f'''
-DACE_EXPORTED void __dace_set_external_memory_{storage.name}({sdfg.name}_t *__state, char *ptr{initparams_comma})
+DACE_EXPORTED void __dace_set_external_memory_{storage.name}({mangle_dace_state_struct_name(sdfg)} *__state, char *ptr{initparams_comma})
 {{''', sdfg)
             
             offset = 0
@@ -828,7 +830,6 @@ def generate_code(self,
                      code, and a set of targets that have been used in the
                      generation of this SDFG.
         """
-
         if len(sdfg_id) == 0 and sdfg.sdfg_id != 0:
             sdfg_id = '_%d' % sdfg.sdfg_id
 
@@ -923,6 +924,7 @@ def generate_code(self,
             # Get all environments used in the generated code, including
             # dependent environments
             import dace.library  # Avoid import loops
+            from dace.codegen.targets.cpp import mangle_dace_state_struct_name
             self.environments = dace.library.get_environments_and_dependencies(self._dispatcher.used_environments)
 
             self.generate_header(sdfg, header_global_stream, header_stream)
@@ -931,7 +933,7 @@ def generate_code(self,
             params = sdfg.signature(arglist=self.arglist)
             if params:
                 params = ', ' + params
-            function_signature = ('void __program_%s_internal(%s_t *__state%s)\n{\n' % (sdfg.name, sdfg.name, params))
+            function_signature = f'void __program_{sdfg.name}_internal({mangle_dace_state_struct_name(sdfg)}*__state{params})\n{{'
 
             self.generate_footer(sdfg, footer_global_stream, footer_stream)
             self.generate_external_memory_management(sdfg, footer_stream)
diff --git a/dace/codegen/targets/intel_fpga.py b/dace/codegen/targets/intel_fpga.py
index d3c46b0069..03a04fda41 100644
--- a/dace/codegen/targets/intel_fpga.py
+++ b/dace/codegen/targets/intel_fpga.py
@@ -3,8 +3,6 @@
 import functools
 import copy
 import itertools
-import os
-import re
 from six import StringIO
 import numpy as np
 
@@ -143,19 +141,20 @@ def get_generated_codeobjects(self):
             params_comma = ', ' + params_comma
 
         host_code.write("""
-DACE_EXPORTED int __dace_init_intel_fpga({sdfg.name}_t *__state{signature}) {{{emulation_flag}
+DACE_EXPORTED int __dace_init_intel_fpga({sdfg_state_name} *__state{signature}) {{{emulation_flag}
     __state->fpga_context = new dace_fpga_context();
     __state->fpga_context->Get().MakeProgram({kernel_file_name});
     return 0;
 }}
 
-DACE_EXPORTED int __dace_exit_intel_fpga({sdfg.name}_t *__state) {{
+DACE_EXPORTED int __dace_exit_intel_fpga({sdfg_state_name} *__state) {{
     delete __state->fpga_context;
     return 0;
 }}
 
 {host_code}""".format(signature=params_comma,
                       sdfg=self._global_sdfg,
+                      sdfg_state_name=cpp.mangle_dace_state_struct_name(self._global_sdfg),
                       emulation_flag=emulation_flag,
                       kernel_file_name=kernel_file_name,
                       host_code="".join([
diff --git a/dace/codegen/targets/mpi.py b/dace/codegen/targets/mpi.py
index 419334ba5a..0bb2b67a7e 100644
--- a/dace/codegen/targets/mpi.py
+++ b/dace/codegen/targets/mpi.py
@@ -4,6 +4,7 @@
 from dace.codegen.prettycode import CodeIOStream
 from dace.codegen.codeobject import CodeObject
 from dace.codegen.targets.target import TargetCodeGenerator, make_absolute
+from dace.codegen.targets.cpp import mangle_dace_state_struct_name
 from dace.sdfg import nodes, SDFG
 from dace.config import Config
 
@@ -45,10 +46,10 @@ def get_generated_codeobjects(self):
 
 {file_header}
 
-DACE_EXPORTED int __dace_init_mpi({sdfg.name}_t *__state{params});
-DACE_EXPORTED int __dace_exit_mpi({sdfg.name}_t *__state);
+DACE_EXPORTED int __dace_init_mpi({sdfg_state_name} *__state{params});
+DACE_EXPORTED int __dace_exit_mpi({sdfg_state_name} *__state);
 
-int __dace_init_mpi({sdfg.name}_t *__state{params}) {{
+int __dace_init_mpi({sdfg_state_name} *__state{params}) {{
     int isinit = 0;
     if (MPI_Initialized(&isinit) != MPI_SUCCESS)
         return 1;
@@ -66,7 +67,7 @@ def get_generated_codeobjects(self):
     return 0;
 }}
 
-int __dace_exit_mpi({sdfg.name}_t *__state) {{
+int __dace_exit_mpi({sdfg_state_name} *__state) {{
     MPI_Comm_free(&__dace_mpi_comm);
     MPI_Finalize();
 
@@ -74,7 +75,7 @@ def get_generated_codeobjects(self):
            __dace_comm_size);
     return 0;
 }}
-""".format(params=params_comma, sdfg=sdfg, file_header=fileheader.getvalue()), 'cpp', MPICodeGen, 'MPI')
+""".format(params=params_comma, sdfg=sdfg, sdfg_state_name=mangle_dace_state_struct_name(sdfg), file_header=fileheader.getvalue()), 'cpp', MPICodeGen, 'MPI')
         return [codeobj]
 
     @staticmethod
diff --git a/dace/codegen/targets/xilinx.py b/dace/codegen/targets/xilinx.py
index 5d82cfeafc..0c562c59c5 100644
--- a/dace/codegen/targets/xilinx.py
+++ b/dace/codegen/targets/xilinx.py
@@ -7,6 +7,7 @@
 import re
 import numpy as np
 import ast
+
 import dace
 from dace import data as dt, registry, dtypes, subsets
 from dace.config import Config
@@ -141,7 +142,7 @@ def get_generated_codeobjects(self):
             params_comma = ', ' + params_comma
 
         host_code.write("""
-DACE_EXPORTED int __dace_init_xilinx({sdfg.name}_t *__state{signature}) {{
+DACE_EXPORTED int __dace_init_xilinx({sdfg_state_name} *__state{signature}) {{
     {environment_variables}
 
     __state->fpga_context = new dace_fpga_context();
@@ -149,13 +150,14 @@ def get_generated_codeobjects(self):
     return 0;
 }}
 
-DACE_EXPORTED int __dace_exit_xilinx({sdfg.name}_t *__state) {{
+DACE_EXPORTED int __dace_exit_xilinx({sdfg_state_name} *__state) {{
     delete __state->fpga_context;
     return 0;
 }}
 
 {host_code}""".format(signature=params_comma,
                       sdfg=self._global_sdfg,
+                      sdfg_state_name=cpp.mangle_dace_state_struct_name(self._global_sdfg),
                       environment_variables=set_env_vars,
                       kernel_file_name=kernel_file_name,
                       host_code="".join([
diff --git a/dace/config_schema.yml b/dace/config_schema.yml
index 08a427aa52..063815e319 100644
--- a/dace/config_schema.yml
+++ b/dace/config_schema.yml
@@ -164,6 +164,15 @@ required:
                     of the code generator that generated it. Used for debugging
                     code generation.
 
+            codegen_state_struct_suffix:
+                type: str
+                default: "_state_t"
+                title: Suffix used by the code generator to mangle the state struct.
+                description: >
+                    For every SDFG the code generator is is processing a state struct is generated.
+                    The typename of this struct is derived by appending this value to the SDFG's name.
+                    Note that the suffix may only contains letters, digits and underscores.
+
             default_data_types:
                 type : str
                 default: Python

From 3e8c74c9b4f92631cddee7a1d73e6c7e68aa941a Mon Sep 17 00:00:00 2001
From: Samuel Martin <samuel.martin@macrolino.ch>
Date: Wed, 8 Nov 2023 10:46:07 +0100
Subject: [PATCH 134/163] Trivial map elimination init (#1353)

The EliminateTrivialMap transformation fails to properly connect the
content of the map if it is a write only map and the map to be removed
is nested inside another map. This is as the current solution uses
memlet_path which does not work on the empty memlets going away from the
map entry.

This adds a testcase and solution which seems to work for my specific
needs but are not polished and might not work in all cases.

---------

Co-authored-by: Samuel Martin <martisam@student.ethz.ch>
Co-authored-by: alexnick83 <31545860+alexnick83@users.noreply.github.com>
Co-authored-by: acalotoiu <61420859+acalotoiu@users.noreply.github.com>
---
 .../dataflow/trivial_map_elimination.py       |  13 +-
 tests/trivial_map_elimination_test.py         | 132 ++++++++++++++++++
 2 files changed, 143 insertions(+), 2 deletions(-)

diff --git a/dace/transformation/dataflow/trivial_map_elimination.py b/dace/transformation/dataflow/trivial_map_elimination.py
index 327d5d8c9a..9387cfce23 100644
--- a/dace/transformation/dataflow/trivial_map_elimination.py
+++ b/dace/transformation/dataflow/trivial_map_elimination.py
@@ -5,6 +5,7 @@
 from dace.sdfg import utils as sdutil
 from dace.transformation import transformation
 from dace.properties import make_properties
+from dace.memlet import Memlet
 
 
 @make_properties
@@ -48,12 +49,15 @@ def apply(self, graph, sdfg):
 
         if len(remaining_ranges) == 0:
             # Redirect map entry's out edges
+            write_only_map = True
             for edge in graph.out_edges(map_entry):
                 path = graph.memlet_path(edge)
                 index = path.index(edge)
 
-                # Add an edge directly from the previous source connector to the destination
-                graph.add_edge(path[index - 1].src, path[index - 1].src_conn, edge.dst, edge.dst_conn, edge.data)
+                if not edge.data.is_empty():
+                    # Add an edge directly from the previous source connector to the destination
+                    graph.add_edge(path[index - 1].src, path[index - 1].src_conn, edge.dst, edge.dst_conn, edge.data)
+                    write_only_map = False
 
             # Redirect map exit's in edges.
             for edge in graph.in_edges(map_exit):
@@ -63,6 +67,11 @@ def apply(self, graph, sdfg):
                 # Add an edge directly from the source to the next destination connector
                 if len(path) > index + 1:
                     graph.add_edge(edge.src, edge.src_conn, path[index + 1].dst, path[index + 1].dst_conn, edge.data)
+                    if write_only_map:
+                        outer_exit = path[index+1].dst
+                        outer_entry = graph.entry_node(outer_exit)
+                        if outer_entry is not None:
+                            graph.add_edge(outer_entry, None, edge.src, None, Memlet())
 
             # Remove map
             graph.remove_nodes_from([map_entry, map_exit])
diff --git a/tests/trivial_map_elimination_test.py b/tests/trivial_map_elimination_test.py
index 44b1f77652..9600dad640 100644
--- a/tests/trivial_map_elimination_test.py
+++ b/tests/trivial_map_elimination_test.py
@@ -25,7 +25,69 @@ def trivial_map_sdfg():
     return sdfg
 
 
+def trivial_map_init_sdfg():
+    sdfg = dace.SDFG('trivial_map_range_expanded')
+    sdfg.add_array('B', [5, 1], dace.float64)
+    state = sdfg.add_state()
+
+    # Nodes
+    map_entry_outer, map_exit_outer = state.add_map('map_outer', dict(j='0:5'))
+    map_entry_inner, map_exit_inner = state.add_map('map_inner', dict(i='0:1'))
+
+    tasklet = state.add_tasklet('tasklet', {}, {'b'}, 'b = 1')
+    write = state.add_write('B')
+
+    # Edges
+    state.add_memlet_path(map_entry_outer, map_entry_inner, memlet=dace.Memlet())
+    state.add_memlet_path(map_entry_inner, tasklet, memlet=dace.Memlet())
+
+    state.add_memlet_path(tasklet, map_exit_inner, memlet=dace.Memlet.simple('B', 'j, i'), src_conn='b',
+                          dst_conn='IN_B')
+    state.add_memlet_path(map_exit_inner, map_exit_outer, memlet=dace.Memlet.simple('B', 'j, 0'), src_conn='OUT_B',
+                          dst_conn='IN_B')
+    state.add_memlet_path(map_exit_outer, write, memlet=dace.Memlet.simple('B', '0:5, 0'),
+                          src_conn='OUT_B')
+
+    sdfg.validate()
+    return sdfg
+
+
+def trivial_map_pseudo_init_sdfg():
+    sdfg = dace.SDFG('trivial_map_range_expanded')
+    sdfg.add_array('A', [5, 1], dace.float64)
+    sdfg.add_array('B', [5, 1], dace.float64)
+    state = sdfg.add_state()
+
+    # Nodes
+    map_entry_outer, map_exit_outer = state.add_map('map_outer', dict(j='0:5'))
+    map_entry_inner, map_exit_inner = state.add_map('map_inner', dict(i='0:1'))
+
+    read = state.add_read('A')
+    tasklet = state.add_tasklet('tasklet', {'a'}, {'b'}, 'b = a')
+    write = state.add_write('B')
+
+    # Edges
+    state.add_memlet_path(map_entry_outer, map_entry_inner, memlet=dace.Memlet())
+    state.add_memlet_path(read, map_entry_outer, map_entry_inner, memlet=dace.Memlet.simple('A', '0:5, 0'),
+                          dst_conn='IN_A')
+    state.add_memlet_path(map_entry_inner, tasklet, memlet=dace.Memlet())
+    state.add_memlet_path(map_entry_inner, tasklet, memlet=dace.Memlet.simple('A', 'j, 0'), src_conn='OUT_A', dst_conn='a')
+
+    state.add_memlet_path(tasklet, map_exit_inner, memlet=dace.Memlet.simple('B', 'j, i'), src_conn='b',
+                          dst_conn='IN_B')
+    state.add_memlet_path(map_exit_inner, map_exit_outer, memlet=dace.Memlet.simple('B', 'j, 0'), src_conn='OUT_B',
+                          dst_conn='IN_B')
+    state.add_memlet_path(map_exit_outer, write, memlet=dace.Memlet.simple('B', '0:5, 0'),
+                          src_conn='OUT_B')
+
+    sdfg.validate()
+    return sdfg
+
+
 class TrivialMapEliminationTest(unittest.TestCase):
+    """
+    Tests the case where the map has an empty input edge
+    """
     def test_can_be_applied(self):
         graph = trivial_map_sdfg()
 
@@ -56,5 +118,75 @@ def test_raplaces_map_params_in_scope(self):
         self.assertEqual(out_memlet.data.subset, dace.subsets.Range([(0, 0, 1)]))
 
 
+class TrivialMapInitEliminationTest(unittest.TestCase):
+    def test_can_be_applied(self):
+        graph = trivial_map_init_sdfg()
+
+        count = graph.apply_transformations(TrivialMapElimination, validate=False, validate_all=False)
+        graph.validate()
+
+        self.assertGreater(count, 0)
+
+    def test_removes_map(self):
+        graph = trivial_map_init_sdfg()
+
+        state = graph.nodes()[0]
+        map_entries = [n for n in state.nodes() if isinstance(n, dace.sdfg.nodes.MapEntry)]
+        self.assertEqual(len(map_entries), 2)
+
+        graph.apply_transformations(TrivialMapElimination)
+
+        state = graph.nodes()[0]
+        map_entries = [n for n in state.nodes() if isinstance(n, dace.sdfg.nodes.MapEntry)]
+        self.assertEqual(len(map_entries), 1)
+
+    def test_reconnects_edges(self):
+        graph = trivial_map_init_sdfg()
+
+        graph.apply_transformations(TrivialMapElimination)
+        state = graph.nodes()[0]
+        map_entries = [n for n in state.nodes() if isinstance(n, dace.sdfg.nodes.MapEntry)]
+        self.assertEqual(len(map_entries), 1)
+        # Check that there is an outgoing edge from the map entry
+        self.assertEqual(len(state.out_edges(map_entries[0])), 1)
+
+
+class TrivialMapPseudoInitEliminationTest(unittest.TestCase):
+    """
+    Test cases where the map has an empty input and a non empty input
+    """
+    def test_can_be_applied(self):
+        graph = trivial_map_pseudo_init_sdfg()
+
+        count = graph.apply_transformations(TrivialMapElimination, validate=False, validate_all=False)
+        graph.validate()
+        graph.view()
+
+        self.assertGreater(count, 0)
+
+    def test_removes_map(self):
+        graph = trivial_map_pseudo_init_sdfg()
+
+        state = graph.nodes()[0]
+        map_entries = [n for n in state.nodes() if isinstance(n, dace.sdfg.nodes.MapEntry)]
+        self.assertEqual(len(map_entries), 2)
+
+        graph.apply_transformations(TrivialMapElimination)
+
+        state = graph.nodes()[0]
+        map_entries = [n for n in state.nodes() if isinstance(n, dace.sdfg.nodes.MapEntry)]
+        self.assertEqual(len(map_entries), 1)
+
+    def test_reconnects_edges(self):
+        graph = trivial_map_pseudo_init_sdfg()
+
+        graph.apply_transformations(TrivialMapElimination)
+        state = graph.nodes()[0]
+        map_entries = [n for n in state.nodes() if isinstance(n, dace.sdfg.nodes.MapEntry)]
+        self.assertEqual(len(map_entries), 1)
+        # Check that there is an outgoing edge from the map entry
+        self.assertEqual(len(state.out_edges(map_entries[0])), 1)
+
+
 if __name__ == '__main__':
     unittest.main()

From 777083ad55426e0273d9251ad0bfac6b930470aa Mon Sep 17 00:00:00 2001
From: Samuel Martin <samuel.martin@macrolino.ch>
Date: Wed, 8 Nov 2023 14:41:08 +0100
Subject: [PATCH 135/163] Change strides move assignment outside if (#1402)

Adds functions/transformations from my thesis to:
- Change strides of existing data arrays
- Move constants assignment outside of if/else construct

Adds fixes for:
- Codegen getting struct depening on some scope situation
- Map expansion using hardcoded schedule instead of taking existing onej

---------

Co-authored-by: Samuel Martin <martisam@student.ethz.ch>
Co-authored-by: alexnick83 <31545860+alexnick83@users.noreply.github.com>
Co-authored-by: Alexandros Nikolaos Ziogas <alexandros.ziogas@inf.ethz.ch>
---
 dace/transformation/change_strides.py         | 210 ++++++++++++++++++
 dace/transformation/dataflow/map_expansion.py |  17 +-
 dace/transformation/helpers.py                |   3 +-
 dace/transformation/interstate/__init__.py    |   1 +
 .../interstate/move_assignment_outside_if.py  | 113 ++++++++++
 tests/transformations/change_strides_test.py  |  48 ++++
 .../move_assignment_outside_if_test.py        | 161 ++++++++++++++
 7 files changed, 546 insertions(+), 7 deletions(-)
 create mode 100644 dace/transformation/change_strides.py
 create mode 100644 dace/transformation/interstate/move_assignment_outside_if.py
 create mode 100644 tests/transformations/change_strides_test.py
 create mode 100644 tests/transformations/move_assignment_outside_if_test.py

diff --git a/dace/transformation/change_strides.py b/dace/transformation/change_strides.py
new file mode 100644
index 0000000000..001cd4aa63
--- /dev/null
+++ b/dace/transformation/change_strides.py
@@ -0,0 +1,210 @@
+# Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved.
+""" This module provides a function to change the stride in a given SDFG """
+from typing import List, Union, Tuple
+import sympy
+
+import dace
+from dace.dtypes import ScheduleType
+from dace.sdfg import SDFG, nodes, SDFGState
+from dace.data import Array, Scalar
+from dace.memlet import Memlet
+
+
+def list_access_nodes(
+        sdfg: dace.SDFG,
+        array_name: str) -> List[Tuple[nodes.AccessNode, Union[SDFGState, dace.SDFG]]]:
+    """
+    Find all access nodes in the SDFG of the given array name. Does not recourse into nested SDFGs.
+
+    :param sdfg: The SDFG to search through
+    :type sdfg: dace.SDFG
+    :param array_name: The name of the wanted array
+    :type array_name: str
+    :return: List of the found access nodes together with their state
+    :rtype: List[Tuple[nodes.AccessNode, Union[dace.SDFGState, dace.SDFG]]]
+    """
+    found_nodes = []
+    for state in sdfg.states():
+        for node in state.nodes():
+            if isinstance(node, nodes.AccessNode) and node.data == array_name:
+                found_nodes.append((node, state))
+    return found_nodes
+
+
+def change_strides(
+        sdfg: dace.SDFG,
+        stride_one_values: List[str],
+        schedule: ScheduleType) -> SDFG:
+    """
+    Change the strides of the arrays on the given SDFG such that the given dimension has stride 1. Returns a new SDFG.
+
+    :param sdfg: The input SDFG
+    :type sdfg: dace.SDFG
+    :param stride_one_values: Length of the dimension whose stride should be set to one. Expects that each array has
+    only one dimension whose length is in this list. Expects that list contains name of symbols
+    :type stride_one_values: List[str]
+    :param schedule: Schedule to use to copy the arrays
+    :type schedule: ScheduleType
+    :return: SDFG with changed strides
+    :rtype: SDFG
+    """
+    # Create new SDFG and copy constants and symbols
+    original_name = sdfg.name
+    sdfg.name = "changed_strides"
+    new_sdfg = SDFG(original_name)
+    for dname, value in sdfg.constants.items():
+        new_sdfg.add_constant(dname, value)
+    for dname, stype in sdfg.symbols.items():
+        new_sdfg.add_symbol(dname, stype)
+
+    changed_stride_state = new_sdfg.add_state("with_changed_strides", is_start_state=True)
+    inputs, outputs = sdfg.read_and_write_sets()
+    # Get all arrays which are persistent == not transient
+    persistent_arrays = {name: desc for name, desc in sdfg.arrays.items() if not desc.transient}
+
+    # Get the persistent arrays of all the transient arrays which get copied to GPU
+    for dname in persistent_arrays:
+        for access, state in list_access_nodes(sdfg, dname):
+            if len(state.out_edges(access)) == 1:
+                edge = state.out_edges(access)[0]
+                if isinstance(edge.dst, nodes.AccessNode):
+                    if edge.dst.data in inputs:
+                        inputs.remove(edge.dst.data)
+                        inputs.add(dname)
+            if len(state.in_edges(access)) == 1:
+                edge = state.in_edges(access)[0]
+                if isinstance(edge.src, nodes.AccessNode):
+                    if edge.src.data in inputs:
+                        outputs.remove(edge.src.data)
+                        outputs.add(dname)
+
+    # Only keep inputs and outputs which are persistent
+    inputs.intersection_update(persistent_arrays.keys())
+    outputs.intersection_update(persistent_arrays.keys())
+    nsdfg = changed_stride_state.add_nested_sdfg(sdfg, new_sdfg, inputs=inputs, outputs=outputs)
+    transform_state = new_sdfg.add_state_before(changed_stride_state, label="transform_data", is_start_state=True)
+    transform_state_back = new_sdfg.add_state_after(changed_stride_state, "transform_data_back", is_start_state=False)
+
+    # copy arrays
+    for dname, desc in sdfg.arrays.items():
+        if not desc.transient:
+            if isinstance(desc, Array):
+                new_sdfg.add_array(dname, desc.shape, desc.dtype, desc.storage,
+                                   desc.location, desc.transient, desc.strides,
+                                   desc.offset)
+            elif isinstance(desc, Scalar):
+                new_sdfg.add_scalar(dname, desc.dtype, desc.storage, desc.transient, desc.lifetime, desc.debuginfo)
+
+    new_order = {}
+    new_strides_map = {}
+
+    # Map of array names in the nested sdfg:  key: array name in parent sdfg (this sdfg), value: name in the nsdfg
+    # Assumes that name changes only appear in the first level of nsdfg nesting
+    array_names_map = {}
+    for graph in sdfg.sdfg_list:
+        if graph.parent_nsdfg_node is not None:
+            if graph.parent_sdfg == sdfg:
+                for connector in graph.parent_nsdfg_node.in_connectors:
+                    for in_edge in graph.parent.in_edges_by_connector(graph.parent_nsdfg_node, connector):
+                        array_names_map[str(connector)] = in_edge.data.data
+
+    for containing_sdfg, dname, desc in sdfg.arrays_recursive():
+        shape_str = [str(s) for s in desc.shape]
+        # Get index of the dimension we want to have stride 1
+        stride_one_idx = None
+        this_stride_one_value = None
+        for dim in stride_one_values:
+            if str(dim) in shape_str:
+                stride_one_idx = shape_str.index(str(dim))
+                this_stride_one_value = dim
+                break
+
+        if stride_one_idx is not None:
+            new_order[dname] = [stride_one_idx]
+
+            new_strides = list(desc.strides)
+            new_strides[stride_one_idx] = sympy.S.One
+
+            previous_size = dace.symbolic.symbol(this_stride_one_value)
+            previous_stride = sympy.S.One
+            for i in range(len(new_strides)):
+                if i != stride_one_idx:
+                    new_order[dname].append(i)
+                    new_strides[i] = previous_size * previous_stride
+                    previous_size = desc.shape[i]
+                    previous_stride = new_strides[i]
+
+            new_strides_map[dname] = {}
+            # Create a map entry for this data linking old strides to new strides. This assumes that each entry in
+            # strides is unique which is given as otherwise there would be two dimension i, j where a[i, j] would point
+            # to the same address as a[j, i]
+            for new_stride, old_stride in zip(new_strides, desc.strides):
+                new_strides_map[dname][old_stride] = new_stride
+            desc.strides = tuple(new_strides)
+        else:
+            parent_name = array_names_map[dname] if dname in array_names_map else dname
+            if parent_name in new_strides_map:
+                new_strides = []
+                for stride in desc.strides:
+                    new_strides.append(new_strides_map[parent_name][stride])
+                desc.strides = new_strides
+
+    # Add new flipped arrays for every non-transient array
+    flipped_names_map = {}
+    for dname, desc in sdfg.arrays.items():
+        if not desc.transient:
+            flipped_name = f"{dname}_flipped"
+            flipped_names_map[dname] = flipped_name
+            new_sdfg.add_array(flipped_name, desc.shape, desc.dtype,
+                               desc.storage, desc.location, True,
+                               desc.strides, desc.offset)
+
+    # Deal with the inputs: Create tasklet to flip them and connect via memlets
+    # for input in inputs:
+    for input in set([*inputs, *outputs]):
+        if input in new_order:
+            flipped_data = flipped_names_map[input]
+            if input in inputs:
+                changed_stride_state.add_memlet_path(changed_stride_state.add_access(flipped_data), nsdfg,
+                                                     dst_conn=input, memlet=Memlet(data=flipped_data))
+            # Simply need to copy the data, the different strides take care of the transposing
+            arr = sdfg.arrays[input]
+            tasklet, map_entry, map_exit = transform_state.add_mapped_tasklet(
+                    name=f"transpose_{input}",
+                    map_ranges={f"_i{i}": f"0:{s}" for i, s in enumerate(arr.shape)},
+                    inputs={'_in': Memlet(data=input, subset=", ".join(f"_i{i}" for i, _ in enumerate(arr.shape)))},
+                    code='_out = _in',
+                    outputs={'_out': Memlet(data=flipped_data,
+                                            subset=", ".join(f"_i{i}" for i, _ in enumerate(arr.shape)))},
+                    external_edges=True,
+                    schedule=schedule,
+                    )
+    # Do the same for the outputs
+    for output in outputs:
+        if output in new_order:
+            flipped_data = flipped_names_map[output]
+            changed_stride_state.add_memlet_path(nsdfg, changed_stride_state.add_access(flipped_data),
+                                                 src_conn=output, memlet=Memlet(data=flipped_data))
+            # Simply need to copy the data, the different strides take care of the transposing
+            arr = sdfg.arrays[output]
+            tasklet, map_entry, map_exit = transform_state_back.add_mapped_tasklet(
+                    name=f"transpose_{output}",
+                    map_ranges={f"_i{i}": f"0:{s}" for i, s in enumerate(arr.shape)},
+                    inputs={'_in': Memlet(data=flipped_data,
+                                          subset=", ".join(f"_i{i}" for i, _ in enumerate(arr.shape)))},
+                    code='_out = _in',
+                    outputs={'_out': Memlet(data=output, subset=", ".join(f"_i{i}" for i, _ in enumerate(arr.shape)))},
+                    external_edges=True,
+                    schedule=schedule,
+                    )
+    # Deal with any arrays which have not been flipped (should only be scalars). Connect them directly
+    for dname, desc in sdfg.arrays.items():
+        if not desc.transient and dname not in new_order:
+            if dname in inputs:
+                changed_stride_state.add_memlet_path(changed_stride_state.add_access(dname), nsdfg, dst_conn=dname,
+                                                     memlet=Memlet(data=dname))
+            if dname in outputs:
+                changed_stride_state.add_memlet_path(nsdfg, changed_stride_state.add_access(dname), src_conn=dname,
+                                                     memlet=Memlet(data=dname))
+
+    return new_sdfg
diff --git a/dace/transformation/dataflow/map_expansion.py b/dace/transformation/dataflow/map_expansion.py
index 275b99c7e8..60f1f13f32 100644
--- a/dace/transformation/dataflow/map_expansion.py
+++ b/dace/transformation/dataflow/map_expansion.py
@@ -1,16 +1,18 @@
-# Copyright 2019-2021 ETH Zurich and the DaCe authors. All rights reserved.
+# Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved.
 """ Contains classes that implement the map-expansion transformation. """
 
 from dace.sdfg.utils import consolidate_edges
 from typing import Dict, List
 import dace
 from dace import dtypes, subsets, symbolic
+from dace.properties import EnumProperty, make_properties
 from dace.sdfg import nodes
 from dace.sdfg import utils as sdutil
 from dace.sdfg.graph import OrderedMultiDiConnectorGraph
 from dace.transformation import transformation as pm
 
 
+@make_properties
 class MapExpansion(pm.SingleStateTransformation):
     """ Implements the map-expansion pattern.
 
@@ -25,14 +27,16 @@ class MapExpansion(pm.SingleStateTransformation):
 
     map_entry = pm.PatternNode(nodes.MapEntry)
 
+    inner_schedule = EnumProperty(desc="Schedule for inner maps",
+                                  dtype=dtypes.ScheduleType,
+                                  default=dtypes.ScheduleType.Sequential,
+                                  allow_none=True)
+
     @classmethod
     def expressions(cls):
         return [sdutil.node_path_graph(cls.map_entry)]
 
-    def can_be_applied(self, graph: dace.SDFGState,
-                       expr_index: int,
-                       sdfg: dace.SDFG,
-                       permissive: bool = False):
+    def can_be_applied(self, graph: dace.SDFGState, expr_index: int, sdfg: dace.SDFG, permissive: bool = False):
         # A candidate subgraph matches the map-expansion pattern when it
         # includes an N-dimensional map, with N greater than one.
         return self.map_entry.map.get_param_num() > 1
@@ -44,10 +48,11 @@ def apply(self, graph: dace.SDFGState, sdfg: dace.SDFG):
         current_map = map_entry.map
 
         # Create new maps
+        inner_schedule = self.inner_schedule or current_map.schedule
         new_maps = [
             nodes.Map(current_map.label + '_' + str(param), [param],
                       subsets.Range([param_range]),
-                      schedule=dtypes.ScheduleType.Sequential)
+                      schedule=inner_schedule)
             for param, param_range in zip(current_map.params[1:], current_map.range[1:])
         ]
         current_map.params = [current_map.params[0]]
diff --git a/dace/transformation/helpers.py b/dace/transformation/helpers.py
index 8986c4e37f..9c41e4dec4 100644
--- a/dace/transformation/helpers.py
+++ b/dace/transformation/helpers.py
@@ -1137,7 +1137,8 @@ def traverse(state: SDFGState, treenode: ScopeTree):
                     ntree.state = nstate
                     treenode.children.append(ntree)
         for child in treenode.children:
-            traverse(getattr(child, 'state', state), child)
+            if hasattr(child, 'state') and child.state != state:
+                traverse(getattr(child, 'state', state), child)
 
     traverse(state, stree)
     return stree
diff --git a/dace/transformation/interstate/__init__.py b/dace/transformation/interstate/__init__.py
index 0bd168751c..b8bcc716e6 100644
--- a/dace/transformation/interstate/__init__.py
+++ b/dace/transformation/interstate/__init__.py
@@ -15,3 +15,4 @@
 from .move_loop_into_map import MoveLoopIntoMap
 from .trivial_loop_elimination import TrivialLoopElimination
 from .multistate_inline import InlineMultistateSDFG
+from .move_assignment_outside_if import MoveAssignmentOutsideIf
diff --git a/dace/transformation/interstate/move_assignment_outside_if.py b/dace/transformation/interstate/move_assignment_outside_if.py
new file mode 100644
index 0000000000..3d4db9ae25
--- /dev/null
+++ b/dace/transformation/interstate/move_assignment_outside_if.py
@@ -0,0 +1,113 @@
+# Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved.
+""" 
+Transformation to move assignments outside if statements to potentially avoid warp divergence. Speedup gained is
+questionable.
+"""
+
+import ast
+import sympy as sp
+
+from dace import sdfg as sd
+from dace.sdfg import graph as gr
+from dace.sdfg.nodes import Tasklet, AccessNode
+from dace.transformation import transformation
+
+
+class MoveAssignmentOutsideIf(transformation.MultiStateTransformation):
+
+    if_guard = transformation.PatternNode(sd.SDFGState)
+    if_stmt = transformation.PatternNode(sd.SDFGState)
+    else_stmt = transformation.PatternNode(sd.SDFGState)
+
+    @classmethod
+    def expressions(cls):
+        sdfg = gr.OrderedDiGraph()
+        sdfg.add_nodes_from([cls.if_guard, cls.if_stmt, cls.else_stmt])
+        sdfg.add_edge(cls.if_guard, cls.if_stmt, sd.InterstateEdge())
+        sdfg.add_edge(cls.if_guard, cls.else_stmt, sd.InterstateEdge())
+        return [sdfg]
+
+    def can_be_applied(self, graph, expr_index, sdfg, permissive=False):
+        # The if-guard can only have two outgoing edges: to the if and to the else part
+        guard_outedges = graph.out_edges(self.if_guard)
+        if len(guard_outedges) != 2:
+            return False
+
+        # Outgoing edges must be a negation of each other
+        if guard_outedges[0].data.condition_sympy() != (sp.Not(guard_outedges[1].data.condition_sympy())):
+            return False
+
+        # The if guard should either have zero or one incoming edge
+        if len(sdfg.in_edges(self.if_guard)) > 1:
+            return False
+
+        # set of the variables which get a const value assigned
+        assigned_const = set()
+        # Dict which collects all AccessNodes for each variable together with its state
+        access_nodes = {}
+        # set of the variables which are only written to
+        self.write_only_values = set()
+        # Dictionary which stores additional information for the variables which are written only
+        self.assign_context = {}
+        for state in [self.if_stmt, self.else_stmt]:
+            for node in state.nodes():
+                if isinstance(node, Tasklet):
+                    # If node is a tasklet, check if assigns a constant value
+                    assigns_const = True
+                    for code_stmt in node.code.code:
+                        if not (isinstance(code_stmt, ast.Assign) and isinstance(code_stmt.value, ast.Constant)):
+                            assigns_const = False
+                    if assigns_const:
+                        for edge in state.out_edges(node):
+                            if isinstance(edge.dst, AccessNode):
+                                assigned_const.add(edge.dst.data)
+                                self.assign_context[edge.dst.data] = {"state": state, "tasklet": node}
+                elif isinstance(node, AccessNode):
+                    if node.data not in access_nodes:
+                        access_nodes[node.data] = []
+                    access_nodes[node.data].append((node, state))
+
+        # check that the found access nodes only get written to
+        for data, nodes in access_nodes.items():
+            write_only = True
+            for node, state in nodes:
+                if node.has_reads(state):
+                    # The read is only a problem if it is not written before -> the access node has no incoming edge
+                    if state.in_degree(node) == 0:
+                        write_only = False
+                    else:
+                        # There is also a problem if any edge is an update instead of write
+                        for edge in [*state.out_edges(node), *state.out_edges(node)]:
+                            if edge.data.wcr is not None:
+                                write_only = False
+
+            if write_only:
+                self.write_only_values.add(data)
+
+        # Want only the values which are only written to and one option uses a constant value
+        self.write_only_values = assigned_const.intersection(self.write_only_values)
+
+        if len(self.write_only_values) == 0:
+            return False
+        return True
+
+    def apply(self, _, sdfg: sd.SDFG):
+        # create a new state before the guard state where the zero assignment happens
+        new_assign_state = sdfg.add_state_before(self.if_guard, label="const_assignment_state")
+
+        # Move all the Tasklets together with the AccessNode
+        for value in self.write_only_values:
+            state = self.assign_context[value]["state"]
+            tasklet = self.assign_context[value]["tasklet"]
+            new_assign_state.add_node(tasklet)
+            for edge in state.out_edges(tasklet):
+                state.remove_edge(edge)
+                state.remove_node(edge.dst)
+                new_assign_state.add_node(edge.dst)
+                new_assign_state.add_edge(tasklet, edge.src_conn, edge.dst, edge.dst_conn, edge.data)
+
+            state.remove_node(tasklet)
+            # Remove the state if it was emptied
+            if state.is_empty():
+                sdfg.remove_node(state)
+        return sdfg
diff --git a/tests/transformations/change_strides_test.py b/tests/transformations/change_strides_test.py
new file mode 100644
index 0000000000..3975761fd5
--- /dev/null
+++ b/tests/transformations/change_strides_test.py
@@ -0,0 +1,48 @@
+# Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved.
+import dace
+from dace import nodes
+from dace.dtypes import ScheduleType
+from dace.memlet import Memlet
+from dace.transformation.change_strides import change_strides
+
+
+def change_strides_test():
+    sdfg = dace.SDFG('change_strides_test')
+    N = dace.symbol('N')
+    M = dace.symbol('M')
+    sdfg.add_array('A', [N, M], dace.float64)
+    sdfg.add_array('B', [N, M, 3], dace.float64)
+    state = sdfg.add_state()
+
+    task1, mentry1, mexit1 = state.add_mapped_tasklet(
+            name="map1",
+            map_ranges={'i': '0:N', 'j': '0:M'},
+            inputs={'a': Memlet(data='A', subset='i, j')},
+            outputs={'b': Memlet(data='B', subset='i, j, 0')},
+            code='b = a + 1',
+            external_edges=True,
+            propagate=True)
+
+    # Check that states are as expected
+    changed_sdfg = change_strides(sdfg, ['N'], ScheduleType.Sequential)
+    assert len(changed_sdfg.states()) == 3
+    assert len(changed_sdfg.out_edges(changed_sdfg.start_state)) == 1
+    work_state = changed_sdfg.out_edges(changed_sdfg.start_state)[0].dst
+    nsdfg = None
+    for node in work_state.nodes():
+        if isinstance(node, nodes.NestedSDFG):
+            nsdfg = node
+    # Check shape and strides of data inside nested SDFG
+    assert nsdfg is not None
+    assert nsdfg.sdfg.data('A').shape == (N, M)
+    assert nsdfg.sdfg.data('B').shape == (N, M, 3)
+    assert nsdfg.sdfg.data('A').strides == (1, N)
+    assert nsdfg.sdfg.data('B').strides == (1, N, M*N)
+
+
+def main():
+    change_strides_test()
+
+
+if __name__ == '__main__':
+    main()
diff --git a/tests/transformations/move_assignment_outside_if_test.py b/tests/transformations/move_assignment_outside_if_test.py
new file mode 100644
index 0000000000..323e83cf61
--- /dev/null
+++ b/tests/transformations/move_assignment_outside_if_test.py
@@ -0,0 +1,161 @@
+# Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved.
+import dace
+from dace.transformation.interstate import MoveAssignmentOutsideIf
+from dace.sdfg import InterstateEdge
+from dace.memlet import Memlet
+from dace.sdfg.nodes import Tasklet
+
+
+def one_variable_simple_test(const_value: int = 0):
+    """ Test with one variable which has formula and const branch. Uses the given const value """
+    sdfg = dace.SDFG('one_variable_simple_test')
+    # Create guard state and one state where A is set to 0 and another where it is set using B and some formula
+    guard = sdfg.add_state('guard', is_start_state=True)
+    formula_state = sdfg.add_state('formula', is_start_state=False)
+    const_state = sdfg.add_state('const', is_start_state=False)
+    sdfg.add_array('A', [1], dace.float64)
+    sdfg.add_array('B', [1], dace.float64)
+
+    # Add tasklet inside states
+    formula_tasklet = formula_state.add_tasklet('formula_assign', {'b'}, {'a'}, 'a = 2*b')
+    formula_state.add_memlet_path(formula_state.add_read('B'), formula_tasklet, memlet=Memlet(data='B', subset='0'),
+                                  dst_conn='b')
+    formula_state.add_memlet_path(formula_tasklet, formula_state.add_write('A'), memlet=Memlet(data='A', subset='0'),
+                                  src_conn='a')
+    const_tasklet = const_state.add_tasklet('const_assign', {}, {'a'}, f"a = {const_value}")
+    const_state.add_memlet_path(const_tasklet, const_state.add_write('A'), memlet=Memlet(data='A', subset='0'),
+                                src_conn='a')
+
+    # Create if-else condition such that either the formula state or the const state is executed
+    sdfg.add_edge(guard, formula_state, InterstateEdge(condition='B[0] < 0.5'))
+    sdfg.add_edge(guard, const_state, InterstateEdge(condition='B[0] >= 0.5'))
+    sdfg.validate()
+
+    # Assure transformation is applied
+    assert sdfg.apply_transformations_repeated([MoveAssignmentOutsideIf]) == 1
+    # SDFG now starts with a state containing the const_tasklet
+    assert const_tasklet in sdfg.start_state.nodes()
+    # The formula state has only one in_edge with the condition
+    assert len(sdfg.in_edges(formula_state)) == 1
+    assert sdfg.in_edges(formula_state)[0].data.condition.as_string == '(B[0] < 0.5)'
+    # All state have at most one out_edge -> there is no if-else branching anymore
+    for state in sdfg.states():
+        assert len(sdfg.out_edges(state)) <= 1
+
+
+def multiple_variable_test():
+    """ Test with multiple variables where not all appear in the const branch """
+    sdfg = dace.SDFG('one_variable_simple_test')
+    # Create guard state and one state where A is set to 0 and another where it is set using B and some formula
+    guard = sdfg.add_state('guard', is_start_state=True)
+    formula_state = sdfg.add_state('formula', is_start_state=False)
+    const_state = sdfg.add_state('const', is_start_state=False)
+    sdfg.add_array('A', [1], dace.float64)
+    sdfg.add_array('B', [1], dace.float64)
+    sdfg.add_array('C', [1], dace.float64)
+    sdfg.add_array('D', [1], dace.float64)
+
+    A = formula_state.add_access('A')
+    B = formula_state.add_access('B')
+    C = formula_state.add_access('C')
+    D = formula_state.add_access('D')
+    formula_tasklet_a = formula_state.add_tasklet('formula_assign', {'b'}, {'a'}, 'a = 2*b')
+    formula_state.add_memlet_path(B, formula_tasklet_a, memlet=Memlet(data='B', subset='0'), dst_conn='b')
+    formula_state.add_memlet_path(formula_tasklet_a, A, memlet=Memlet(data='A', subset='0'), src_conn='a')
+    formula_tasklet_b = formula_state.add_tasklet('formula_assign', {'c'}, {'b'}, 'a = 2*c')
+    formula_state.add_memlet_path(C, formula_tasklet_b, memlet=Memlet(data='C', subset='0'), dst_conn='c')
+    formula_state.add_memlet_path(formula_tasklet_b, B, memlet=Memlet(data='B', subset='0'), src_conn='b')
+    formula_tasklet_c = formula_state.add_tasklet('formula_assign', {'d'}, {'c'}, 'a = 2*d')
+    formula_state.add_memlet_path(D, formula_tasklet_c, memlet=Memlet(data='D', subset='0'), dst_conn='d')
+    formula_state.add_memlet_path(formula_tasklet_c, C, memlet=Memlet(data='C', subset='0'), src_conn='c')
+
+    const_tasklet_a = const_state.add_tasklet('const_assign', {}, {'a'}, 'a = 0')
+    const_state.add_memlet_path(const_tasklet_a, const_state.add_write('A'), memlet=Memlet(data='A', subset='0'),
+                                src_conn='a')
+    const_tasklet_b = const_state.add_tasklet('const_assign', {}, {'b'}, 'b = 0')
+    const_state.add_memlet_path(const_tasklet_b, const_state.add_write('B'), memlet=Memlet(data='B', subset='0'),
+                                src_conn='b')
+
+    # Create if-else condition such that either the formula state or the const state is executed
+    sdfg.add_edge(guard, formula_state, InterstateEdge(condition='D[0] < 0.5'))
+    sdfg.add_edge(guard, const_state, InterstateEdge(condition='D[0] >= 0.5'))
+    sdfg.validate()
+
+    # Assure transformation is applied
+    assert sdfg.apply_transformations_repeated([MoveAssignmentOutsideIf]) == 1
+    # There are no other tasklets in the start state beside the const assignment tasklet as there are no other const
+    # assignments
+    for node in sdfg.start_state.nodes():
+        if isinstance(node, Tasklet):
+            assert node == const_tasklet_a or node == const_tasklet_b
+    # The formula state has only one in_edge with the condition
+    assert len(sdfg.in_edges(formula_state)) == 1
+    assert sdfg.in_edges(formula_state)[0].data.condition.as_string == '(D[0] < 0.5)'
+    # All state have at most one out_edge -> there is no if-else branching anymore
+    for state in sdfg.states():
+        assert len(sdfg.out_edges(state)) <= 1
+
+
+def multiple_variable_not_all_const_test():
+    """ Test with multiple variables where not all get const-assigned in const branch """
+    sdfg = dace.SDFG('one_variable_simple_test')
+    # Create guard state and one state where A is set to 0 and another where it is set using B and some formula
+    guard = sdfg.add_state('guard', is_start_state=True)
+    formula_state = sdfg.add_state('formula', is_start_state=False)
+    const_state = sdfg.add_state('const', is_start_state=False)
+    sdfg.add_array('A', [1], dace.float64)
+    sdfg.add_array('B', [1], dace.float64)
+    sdfg.add_array('C', [1], dace.float64)
+
+    A = formula_state.add_access('A')
+    B = formula_state.add_access('B')
+    C = formula_state.add_access('C')
+    formula_tasklet_a = formula_state.add_tasklet('formula_assign', {'b'}, {'a'}, 'a = 2*b')
+    formula_state.add_memlet_path(B, formula_tasklet_a, memlet=Memlet(data='B', subset='0'), dst_conn='b')
+    formula_state.add_memlet_path(formula_tasklet_a, A, memlet=Memlet(data='A', subset='0'), src_conn='a')
+    formula_tasklet_b = formula_state.add_tasklet('formula_assign', {'c'}, {'b'}, 'a = 2*c')
+    formula_state.add_memlet_path(C, formula_tasklet_b, memlet=Memlet(data='C', subset='0'), dst_conn='c')
+    formula_state.add_memlet_path(formula_tasklet_b, B, memlet=Memlet(data='B', subset='0'), src_conn='b')
+
+    const_tasklet_a = const_state.add_tasklet('const_assign', {}, {'a'}, 'a = 0')
+    const_state.add_memlet_path(const_tasklet_a, const_state.add_write('A'), memlet=Memlet(data='A', subset='0'),
+                                src_conn='a')
+    const_tasklet_b = const_state.add_tasklet('const_assign', {'c'}, {'b'}, 'b = 1.5 * c')
+    const_state.add_memlet_path(const_state.add_read('C'), const_tasklet_b, memlet=Memlet(data='C', subset='0'),
+                                dst_conn='c')
+    const_state.add_memlet_path(const_tasklet_b, const_state.add_write('B'), memlet=Memlet(data='B', subset='0'),
+                                src_conn='b')
+
+    # Create if-else condition such that either the formula state or the const state is executed
+    sdfg.add_edge(guard, formula_state, InterstateEdge(condition='C[0] < 0.5'))
+    sdfg.add_edge(guard, const_state, InterstateEdge(condition='C[0] >= 0.5'))
+    sdfg.validate()
+
+    # Assure transformation is applied
+    assert sdfg.apply_transformations_repeated([MoveAssignmentOutsideIf]) == 1
+    # There are no other tasklets in the start state beside the const assignment tasklet as there are no other const
+    # assignments
+    for node in sdfg.start_state.nodes():
+        if isinstance(node, Tasklet):
+            assert node == const_tasklet_a or node == const_tasklet_b
+    # The formula state has only one in_edge with the condition
+    assert len(sdfg.in_edges(formula_state)) == 1
+    assert sdfg.in_edges(formula_state)[0].data.condition.as_string == '(C[0] < 0.5)'
+    # Guard still has two outgoing edges as if-else pattern still exists
+    assert len(sdfg.out_edges(guard)) == 2
+    # const state now has only const_tasklet_b left plus two access nodes
+    assert len(const_state.nodes()) == 3
+    for node in const_state.nodes():
+        if isinstance(node, Tasklet):
+            assert node == const_tasklet_b
+
+
+def main():
+    one_variable_simple_test(0)
+    one_variable_simple_test(2)
+    multiple_variable_test()
+    multiple_variable_not_all_const_test()
+
+
+if __name__ == '__main__':
+    main()

From 97a2e1cac726d10089f8e7fdc3b0493b25a39b95 Mon Sep 17 00:00:00 2001
From: Ataf Fazledin Ahamed <ataf@openrefactory.com>
Date: Sat, 11 Nov 2023 17:28:16 +0600
Subject: [PATCH 136/163] Fixed Improper Method Call: Replaced `mktemp` (#1428)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## Details
While triaging your project, our bug fixing tool generated the following
message(s)-

> In file:
[sdfv.py](https://github.com/spcl/dace/blob/master/dace/cli/sdfv.py#L44),
there is a method that creates a temporary file using an unsafe API
`mktemp`. The use of this method is discouraged in the [Python
documentation](https://docs.python.org/3/library/tempfile.html#tempfile.mktemp).
iCR suggested that a temporary file should be created using `mkstemp`
which is a [safe
API](https://docs.python.org/3/library/tempfile.html#tempfile.mkstemp).
iCR replaced the usage of mktemp with `mkstemp`.


## Changes
Replaced `mktemp()` method with `mkstemp()`


## Previously Found & Fixed
- https://www.github.com/invesalius/invesalius3/pull/679
- https://www.github.com/Azure/azure-linux-extensions/pull/1816
- https://www.github.com/celery/billiard/pull/394


## CLA Requirements
*This section is only relevant if your project requires contributors to
sign a Contributor License Agreement (CLA) for external contributions.*

All contributed commits are already automatically signed off.

> The meaning of a signoff depends on the project, but it typically
certifies that committer has the rights to submit this work under the
same license and agrees to a Developer Certificate of Origin (see
[https://developercertificate.org/](https://developercertificate.org/)
for more information).
\- [Git Commit SignOff documentation](https://developercertificate.org/)


## Sponsorship and Support
This work is done by the security researchers from OpenRefactory and is
supported by the [Open Source Security Foundation
(OpenSSF)](https://openssf.org/): [Project
Alpha-Omega](https://alpha-omega.dev/). Alpha-Omega is a project
partnering with open source software project maintainers to
systematically find new, as-yet-undiscovered vulnerabilities in open
source code - and get them fixed – to improve global software supply
chain security.

The bug is found by running the Intelligent Code Repair (iCR) tool by
OpenRefactory and then manually triaging the results.

Co-authored-by: alexnick83 <31545860+alexnick83@users.noreply.github.com>
---
 dace/cli/sdfv.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/dace/cli/sdfv.py b/dace/cli/sdfv.py
index c0ff3da36d..f503775814 100644
--- a/dace/cli/sdfv.py
+++ b/dace/cli/sdfv.py
@@ -41,9 +41,10 @@ def view(sdfg: dace.SDFG, filename: Optional[Union[str, int]] = None):
             or 'VSCODE_IPC_HOOK_CLI' in os.environ
             or 'VSCODE_GIT_IPC_HANDLE' in os.environ
         ):
-            filename = tempfile.mktemp(suffix='.sdfg')
+            fd, filename = tempfile.mkstemp(suffix='.sdfg')
             sdfg.save(filename)
             os.system(f'code {filename}')
+            os.close(fd)
             return
 
     if type(sdfg) is dace.SDFG:

From e5b64bfaa91208f12c00464317e591e1aaaf7993 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Philip=20M=C3=BCller?=
 <147368808+philip-paul-mueller@users.noreply.github.com>
Date: Tue, 14 Nov 2023 01:36:17 +0100
Subject: [PATCH 137/163] Symbol specialization in `auto_optimizer()` never
 took effect. (#1410)

The `dict` that was storing all the symbols, `known_symbols`, was
emptied just after its creation. The efect was that the specialization
never took effect.

---------

Co-authored-by: Philipp Schaad <schaad.phil@gmail.com>
---
 dace/transformation/auto/auto_optimize.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/dace/transformation/auto/auto_optimize.py b/dace/transformation/auto/auto_optimize.py
index 644df59e5c..bb384cfd9a 100644
--- a/dace/transformation/auto/auto_optimize.py
+++ b/dace/transformation/auto/auto_optimize.py
@@ -646,7 +646,6 @@ def auto_optimize(sdfg: SDFG,
 
     if symbols:
         # Specialize for all known symbols
-        known_symbols = {s: v for (s, v) in symbols.items() if s in sdfg.free_symbols}
         known_symbols = {}
         for (s, v) in symbols.items():
             if s in sdfg.free_symbols:

From 43ca982bd31a9d8b52181613769c4f8cb0b0c03f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Philip=20M=C3=BCller?=
 <147368808+philip-paul-mueller@users.noreply.github.com>
Date: Tue, 14 Nov 2023 16:44:32 +0100
Subject: [PATCH 138/163] Issue a warning when `to_sdfg()` ignores the
 auto_optimize flag (Issue #1380). (#1395)

As it was discussed the ignoring of the `auto_optimize` flag in
`to_sdfg()` is intentional. To make it clear to the user the function
now issues a warning in this case.

---------

Co-authored-by: Philip Mueller, KY <philip.paul.mueller@bluemail.ch>
Co-authored-by: Tal Ben-Nun <tbennun@users.noreply.github.com>
---
 dace/frontend/python/parser.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/dace/frontend/python/parser.py b/dace/frontend/python/parser.py
index 991613a9ea..1b6817a7d0 100644
--- a/dace/frontend/python/parser.py
+++ b/dace/frontend/python/parser.py
@@ -240,6 +240,9 @@ def to_sdfg(self, *args, simplify=None, save=False, validate=False, use_cache=Fa
             warnings.warn("You are calling to_sdfg() on a dace program that "
                           "has set 'recompile' to False. "
                           "This may not be what you want.")
+        if self.autoopt == True:
+            warnings.warn("You are calling to_sdfg() on a dace program that "
+                          "has set `auto_optimize` to True. Automatic optimization will not be applied.")
 
         if use_cache:
             # Update global variables with current closure

From 40ed43812a10f3a622572bd8c82baa68d15053a0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Philip=20M=C3=BCller?=
 <147368808+philip-paul-mueller@users.noreply.github.com>
Date: Fri, 17 Nov 2023 17:21:55 +0100
Subject: [PATCH 139/163] Numpy fill accepts also variables  (#1420)

This PR is for addressing issue
[#1389](https://github.com/spcl/dace/issues/1389).

---------

Co-authored-by: acalotoiu <61420859+acalotoiu@users.noreply.github.com>
Co-authored-by: BenWeber42 <dev.ben.weber@gmail.com>
---
 dace/frontend/common/op_repository.py         |  7 +--
 dace/frontend/python/astutils.py              | 25 ++++++-----
 dace/frontend/python/replacements.py          | 45 +++++++++++++++----
 .../numpy/ndarray_attributes_methods_test.py  | 14 ++++++
 4 files changed, 66 insertions(+), 25 deletions(-)

diff --git a/dace/frontend/common/op_repository.py b/dace/frontend/common/op_repository.py
index 32e10417dc..067c19ac57 100644
--- a/dace/frontend/common/op_repository.py
+++ b/dace/frontend/common/op_repository.py
@@ -17,12 +17,7 @@ def _get_all_bases(class_or_name: Union[str, Type]) -> List[str]:
     """
     if isinstance(class_or_name, str):
         return [class_or_name]
-
-    classes = [class_or_name.__name__]
-    for base in class_or_name.__bases__:
-        classes.extend(_get_all_bases(base))
-
-    return deduplicate(classes)
+    return [base.__name__ for base in class_or_name.__mro__]
 
 
 class Replacements(object):
diff --git a/dace/frontend/python/astutils.py b/dace/frontend/python/astutils.py
index 67d8b6aded..c9a400e5f1 100644
--- a/dace/frontend/python/astutils.py
+++ b/dace/frontend/python/astutils.py
@@ -442,9 +442,10 @@ class ExtNodeTransformer(ast.NodeTransformer):
         bodies in order to discern DaCe statements from others.
     """
     def visit_TopLevel(self, node):
-        clsname = type(node).__name__
-        if getattr(self, "visit_TopLevel" + clsname, False):
-            return getattr(self, "visit_TopLevel" + clsname)(node)
+        visitor_name = "visit_TopLevel" + type(node).__name__
+        if hasattr(self, visitor_name):
+            visitor = getattr(self, visitor_name)
+            return visitor(node)
         else:
             return self.visit(node)
 
@@ -480,21 +481,23 @@ class ExtNodeVisitor(ast.NodeVisitor):
         top-level expressions in bodies in order to discern DaCe statements 
         from others. """
     def visit_TopLevel(self, node):
-        clsname = type(node).__name__
-        if getattr(self, "visit_TopLevel" + clsname, False):
-            getattr(self, "visit_TopLevel" + clsname)(node)
+        visitor_name = "visit_TopLevel" + type(node).__name__
+        if hasattr(self, visitor_name):
+            visitor = getattr(self, visitor_name)
+            return visitor(node)
         else:
-            self.visit(node)
+            return self.visit(node)
 
     def generic_visit(self, node):
         for field, old_value in ast.iter_fields(node):
             if isinstance(old_value, list):
                 for value in old_value:
                     if isinstance(value, ast.AST):
-                        if (field == 'body' or field == 'orelse'):
-                            clsname = type(value).__name__
-                            if getattr(self, "visit_TopLevel" + clsname, False):
-                                getattr(self, "visit_TopLevel" + clsname)(value)
+                        if field == 'body' or field == 'orelse':
+                            visitor_name = "visit_TopLevel" + type(value).__name__
+                            if hasattr(self, visitor_name):
+                                visitor = getattr(self, visitor_name)
+                                visitor(value)
                             else:
                                 self.visit(value)
                         else:
diff --git a/dace/frontend/python/replacements.py b/dace/frontend/python/replacements.py
index eace0c8336..f55a65eabb 100644
--- a/dace/frontend/python/replacements.py
+++ b/dace/frontend/python/replacements.py
@@ -605,11 +605,10 @@ def _elementwise(pv: 'ProgramVisitor',
     else:
         state.add_mapped_tasklet(
             name="_elementwise_",
-            map_ranges={'__i%d' % i: '0:%s' % n
-                        for i, n in enumerate(inparr.shape)},
-            inputs={'__inp': Memlet.simple(in_array, ','.join(['__i%d' % i for i in range(len(inparr.shape))]))},
+            map_ranges={f'__i{dim}': f'0:{N}' for dim, N in enumerate(inparr.shape)},
+            inputs={'__inp': Memlet.simple(in_array, ','.join([f'__i{dim}' for dim in range(len(inparr.shape))]))},
             code=code,
-            outputs={'__out': Memlet.simple(out_array, ','.join(['__i%d' % i for i in range(len(inparr.shape))]))},
+            outputs={'__out': Memlet.simple(out_array, ','.join([f'__i{dim}' for dim in range(len(inparr.shape))]))},
             external_edges=True)
 
     return out_array
@@ -4232,10 +4231,40 @@ def _ndarray_copy(pv: ProgramVisitor, sdfg: SDFG, state: SDFGState, arr: str) ->
 @oprepo.replaces_method('Array', 'fill')
 @oprepo.replaces_method('Scalar', 'fill')
 @oprepo.replaces_method('View', 'fill')
-def _ndarray_fill(pv: ProgramVisitor, sdfg: SDFG, state: SDFGState, arr: str, value: Number) -> str:
-    if not isinstance(value, (Number, np.bool_)):
-        raise mem_parser.DaceSyntaxError(pv, None, "Fill value {f} must be a number!".format(f=value))
-    return _elementwise(pv, sdfg, state, "lambda x: {}".format(value), arr, arr)
+def _ndarray_fill(pv: ProgramVisitor, sdfg: SDFG, state: SDFGState, arr: str, value: Union[str, Number,
+                                                                                           sp.Expr]) -> str:
+    assert arr in sdfg.arrays
+
+    if isinstance(value, sp.Expr):
+        raise NotImplementedError(
+            f"{arr}.fill is not implemented for symbolic expressions ({value}).")  # Look at `full`.
+
+    if isinstance(value, (Number, np.bool_)):
+        body = value
+        inputs = {}
+    elif isinstance(value, str) and value in sdfg.arrays:
+        value_array = sdfg.arrays[value]
+        if not isinstance(value_array, data.Scalar):
+            raise mem_parser.DaceSyntaxError(
+                pv, None, f"{arr}.fill requires a scalar argument, but {type(value_array)} was given.")
+        body = '__inp'
+        inputs = {'__inp': dace.Memlet(data=value, subset='0')}
+    else:
+        raise mem_parser.DaceSyntaxError(pv, None, f"Unsupported argument '{value}' for {arr}.fill.")
+
+    shape = sdfg.arrays[arr].shape
+    state.add_mapped_tasklet(
+        '_numpy_fill_',
+        map_ranges={
+            f"__i{dim}": f"0:{s}"
+            for dim, s in enumerate(shape)
+        },
+        inputs=inputs,
+        code=f"__out = {body}",
+        outputs={'__out': dace.Memlet.simple(arr, ",".join([f"__i{dim}" for dim in range(len(shape))]))},
+        external_edges=True)
+
+    return arr
 
 
 @oprepo.replaces_method('Array', 'reshape')
diff --git a/tests/numpy/ndarray_attributes_methods_test.py b/tests/numpy/ndarray_attributes_methods_test.py
index 40a6db7a6c..c9c38e245c 100644
--- a/tests/numpy/ndarray_attributes_methods_test.py
+++ b/tests/numpy/ndarray_attributes_methods_test.py
@@ -38,6 +38,18 @@ def test_fill(A: dace.int32[M, N]):
     return A  # return A.fill(5) doesn't work because A is not copied
 
 
+@compare_numpy_output()
+def test_fill2(A: dace.int32[M, N], a: dace.int32):
+    A.fill(a)
+    return A  # return A.fill(5) doesn't work because A is not copied
+
+
+@compare_numpy_output()
+def test_fill3(A: dace.int32[M, N], a: dace.int32):
+    A.fill(a + 1)
+    return A
+
+
 @compare_numpy_output()
 def test_reshape(A: dace.float32[N, N]):
     return A.reshape([1, N * N])
@@ -124,6 +136,8 @@ def test_any():
     test_copy()
     test_astype()
     test_fill()
+    test_fill2()
+    test_fill3()
     test_reshape()
     test_transpose1()
     test_transpose2()

From 12b998193d966ce656384aaba9dfd32395a4d42c Mon Sep 17 00:00:00 2001
From: matteonu <54644158+matteonu@users.noreply.github.com>
Date: Sat, 18 Nov 2023 16:12:22 +0100
Subject: [PATCH 140/163] Implement writeset underapproximation (#1425)

This PR adds a pass that under-approximates the write-sets of loops and
maps based on the propagation pass. Currently it only supports
unconditional writes and affine subscript expressions.

---------

Co-authored-by: matteonussbauemer <m.l.nussbaumer@student.tudelft.nl>
Co-authored-by: acalotoiu <61420859+acalotoiu@users.noreply.github.com>
---
 .../analysis/writeset_underapproximation.py   | 1587 +++++++++++++++++
 .../writeset_underapproximation_test.py       | 1109 ++++++++++++
 2 files changed, 2696 insertions(+)
 create mode 100644 dace/sdfg/analysis/writeset_underapproximation.py
 create mode 100644 tests/passes/writeset_underapproximation_test.py

diff --git a/dace/sdfg/analysis/writeset_underapproximation.py b/dace/sdfg/analysis/writeset_underapproximation.py
new file mode 100644
index 0000000000..bfd5f4cb00
--- /dev/null
+++ b/dace/sdfg/analysis/writeset_underapproximation.py
@@ -0,0 +1,1587 @@
+# Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved.
+"""
+Pass derived from ``propagation.py`` that under-approximates write-sets of for-loops and Maps in
+an SDFG.
+"""
+
+from collections import defaultdict
+import copy
+import itertools
+import warnings
+from typing import Any, Dict, List, Set, Tuple, Type, Union
+import sympy
+
+import dace
+from dace.symbolic import issymbolic, pystr_to_symbolic, simplify
+from dace.transformation.pass_pipeline import Modifies, Pass
+from dace import registry, subsets, symbolic, dtypes, data, SDFG, Memlet
+from dace.sdfg.nodes import NestedSDFG, AccessNode
+from dace.sdfg import nodes, SDFGState, graph as gr
+from dace.sdfg.analysis import cfg
+from dace.transformation import pass_pipeline as ppl
+from dace.sdfg import graph
+from dace.sdfg import scope
+
+# dictionary mapping each edge to a copy of the memlet of that edge with its write set
+# underapproximated
+approximation_dict: Dict[graph.Edge, Memlet] = {}
+# dictionary that maps loop headers to "border memlets" that are written to in the
+# corresponding loop
+loop_write_dict: Dict[SDFGState, Dict[str, Memlet]] = {}
+# dictionary containing information about the for loops in the SDFG
+loop_dict: Dict[SDFGState, Tuple[SDFGState, SDFGState,
+                                 List[SDFGState], str, subsets.Range]] = {}
+# dictionary mapping each nested SDFG to the iteration variables surrounding it
+iteration_variables: Dict[SDFG, Set[str]] = {}
+# dictionary mapping each state to the iteration variables surrounding it
+# (including the ones from surrounding SDFGs)
+ranges_per_state: Dict[SDFGState,
+                       Dict[str, subsets.Range]] = defaultdict(lambda: {})
+
+
+@registry.make_registry
+class UnderapproximationMemletPattern(object):
+    """
+    A pattern match on a memlet subset that can be used for propagation.
+    """
+
+    def can_be_applied(self, expressions, variable_context, node_range, orig_edges):
+        raise NotImplementedError
+
+    def propagate(self, array, expressions, node_range):
+        raise NotImplementedError
+
+
+@registry.make_registry
+class SeparableUnderapproximationMemletPattern(object):
+    """ Memlet pattern that can be applied to each of the dimensions 
+        separately. """
+
+    def can_be_applied(self, dim_exprs, variable_context, node_range, orig_edges, dim_index,
+                       total_dims):
+        raise NotImplementedError
+
+    def propagate(self, array, dim_exprs, node_range):
+        raise NotImplementedError
+
+
+@registry.autoregister
+class SeparableUnderapproximationMemlet(UnderapproximationMemletPattern):
+    """ Meta-memlet pattern that applies all separable memlet patterns. """
+
+    def can_be_applied(self, expressions, variable_context, node_range, orig_edges):
+        # Assuming correct dimensionality in each of the expressions
+        data_dims = len(expressions[0])
+        self.patterns_per_dim = [None] * data_dims
+
+        # get iteration variables that should be propagated
+        params = variable_context[-1]
+        # get other iteration variables that should not be propagated
+        other_params = variable_context[-3]
+
+        # Return False if iteration variable appears in multiple dimensions
+        # or if two iteration variables appear in the same dimension
+        if not self._iteration_variables_appear_multiple_times(data_dims, expressions, other_params, params):
+            return False
+
+        node_range = self._make_range(node_range)
+
+        for dim in range(data_dims):
+            dexprs = []
+            for expr in expressions:
+                if isinstance(expr[dim], symbolic.SymExpr):
+                    dexprs.append(expr[dim].expr)
+                elif isinstance(expr[dim], tuple):
+                    dexprs.append(
+                        (expr[dim][0].expr if isinstance(expr[dim][0], symbolic.SymExpr) else
+                         expr[dim][0], expr[dim][1].expr if isinstance(
+                            expr[dim][1], symbolic.SymExpr) else expr[dim][1], expr[dim][2].expr
+                         if isinstance(expr[dim][2], symbolic.SymExpr) else expr[dim][2]))
+                else:
+                    dexprs.append(expr[dim])
+
+            for pattern_class in SeparableUnderapproximationMemletPattern.extensions().keys():
+                smpattern = pattern_class()
+                if smpattern.can_be_applied(dexprs, variable_context, node_range, orig_edges, dim,
+                                            data_dims):
+                    self.patterns_per_dim[dim] = smpattern
+                    break
+
+        return None not in self.patterns_per_dim
+
+    def _iteration_variables_appear_multiple_times(self, data_dims, expressions, other_params, params):
+        for expr in expressions:
+            for param in params:
+                occured_before = False
+                for dim in range(data_dims):
+                    # collect free_symbols in current dimension
+                    free_symbols = []
+                    curr_dim_expr = expr[dim]
+                    if isinstance(curr_dim_expr, symbolic.SymExpr):
+                        free_symbols += curr_dim_expr.expr.free_symbols
+                    elif isinstance(curr_dim_expr, tuple):
+                        free_symbols += curr_dim_expr[0].expr.free_symbols if isinstance(
+                            curr_dim_expr[0], symbolic.SymExpr) else list(
+                            pystr_to_symbolic(curr_dim_expr[0]).expand().free_symbols)
+                        free_symbols += curr_dim_expr[1].expr.free_symbols if isinstance(
+                            curr_dim_expr[1], symbolic.SymExpr) else list(
+                            pystr_to_symbolic(curr_dim_expr[1]).expand().free_symbols)
+                        free_symbols += curr_dim_expr[2].expr.free_symbols if isinstance(
+                            curr_dim_expr[2], symbolic.SymExpr) else list(
+                            pystr_to_symbolic(curr_dim_expr[2]).expand().free_symbols)
+                    else:
+                        free_symbols += [curr_dim_expr]
+
+                    if param in free_symbols:
+                        if occured_before:
+                            return False
+                        occured_before = True
+
+                    for other_param in set(params) | set(other_params):
+                        if other_param is param:
+                            continue
+                        if other_param in free_symbols and param in free_symbols:
+                            return False
+        return True
+
+    def _make_range(self, node_range):
+        return subsets.Range([(rb.expr if isinstance(rb, symbolic.SymExpr) else rb,
+                               re.expr if isinstance(
+                                   re, symbolic.SymExpr) else re,
+                               rs.expr if isinstance(rs, symbolic.SymExpr) else rs)
+                              for rb, re, rs in node_range])
+
+    def propagate(self, array, expressions, node_range):
+        result = [(None, None, None)] * len(self.patterns_per_dim)
+
+        node_range = self._make_range(node_range)
+
+        for i, smpattern in enumerate(self.patterns_per_dim):
+
+            dexprs = []
+            for expr in expressions:
+                if isinstance(expr[i], symbolic.SymExpr):
+                    dexprs.append(expr[i].expr)
+                elif isinstance(expr[i], tuple):
+                    dexprs.append((
+                        expr[i][0].expr if isinstance(
+                            expr[i][0], symbolic.SymExpr) else expr[i][0],
+                        expr[i][1].expr if isinstance(
+                            expr[i][1], symbolic.SymExpr) else expr[i][1],
+                        expr[i][2].expr if isinstance(
+                            expr[i][2], symbolic.SymExpr) else expr[i][2],
+                        expr.tile_sizes[i]))
+                else:
+                    dexprs.append(expr[i])
+
+            result[i] = smpattern.propagate(array, dexprs, node_range)
+
+        # TODO(later): Not necessarily Range (general integer sets)
+        return subsets.Range(result)
+
+
+@registry.autoregister
+class AffineUnderapproximationSMemlet(SeparableUnderapproximationMemletPattern):
+    """ 
+    Separable memlet pattern that matches affine expressions, i.e., of the 
+    form `a * {index} + b`. Only works for expressions like (a * i + b : a * i + b : 1)
+    """
+
+    def can_be_applied(self, dim_exprs, variable_context, node_range, orig_edges, dim_index,
+                       total_dims):
+
+        params = variable_context[-1]
+        defined_vars = variable_context[-2]
+        # Create wildcards for multiplication and addition
+        a = sympy.Wild('a', exclude=params)
+        b = sympy.Wild('b', exclude=params)
+
+        self.param = None
+        self.paramind = None
+        self.mult = None
+
+        # Special case: Get the total internal access range
+        # If this range matches (0, rs), we say that the propagated skip is 1
+        self.internal_range = set()
+
+        for dexpr in dim_exprs:
+            subexprs = None
+            step = None
+            if isinstance(dexpr, sympy.Basic):  # Affine index
+                subexprs = [dexpr, dexpr]
+
+            elif isinstance(dexpr, tuple) and len(dexpr) == 3:  # Affine range
+                subexprs = [dexpr[0], dexpr[1]]
+                step = dexpr[2]
+                # if the range does not represent a single index return False
+                # if step of subscript expression is not 1 back off
+                if not subexprs[0] == subexprs[1] or step != 1:
+                    return False
+
+            if subexprs is None:  # Something else
+                return False
+
+            for i, subexpr in enumerate(subexprs):
+                if not issymbolic(subexpr):
+                    subexpr = pystr_to_symbolic(subexpr)
+
+                # Try to match an affine expression with a parameter
+                param = None
+                pind = -1
+                for indp, p in enumerate(params):
+                    if p not in subexpr.free_symbols:
+                        continue
+                    matches = subexpr.match(a * p + b)
+                    if param is None and matches is None:
+                        continue
+                    elif param is not None and matches is not None:
+                        return False  # Only one parameter may match
+                    elif matches is not None:
+                        multiplier = matches[a]
+                        addition = matches[b]
+                        param = p
+                        pind = indp
+
+                if param is None:
+                    return False  # A parameter must match
+                if self.param is not None and param != self.param:
+                    return False  # There can only be one parameter
+                if self.mult is not None and multiplier != self.mult:
+                    return False  # Multiplier must be the same
+
+                self.param = param
+                self.paramind = pind
+                self.multiplier = multiplier
+
+                # If this is one expression
+                if len(subexprs) == 1:
+                    self.internal_range.add(addition)
+                elif i == 0:  # Range begin
+                    brb = addition
+                elif i == 1:  # Range end
+                    bre = addition
+
+            if len(subexprs) > 1:
+                self.internal_range.add((brb, bre))
+
+            if step is not None:
+                if (symbolic.issymbolic(step) and self.param in step.free_symbols):
+                    return False  # Step must be independent of parameter
+
+            node_rb, node_re, node_rs = node_range[self.paramind]
+            if (any(s not in defined_vars for s in node_rb.free_symbols) or
+                    any(s not in defined_vars for s in node_re.free_symbols)):
+                # Cannot propagate variables only defined in this scope (e.g.,
+                # dynamic map ranges)
+                return False
+
+        if self.param is None:  # and self.constant_min is None:
+            return False
+
+        return True
+
+    def propagate(self, array, dim_exprs, node_range):
+        # Compute last index in map according to range definition
+        # parameter range
+        node_rb, node_re, node_rs = node_range[self.paramind]  # node_rs = 1
+
+        if isinstance(dim_exprs, list):
+            dim_exprs = dim_exprs[0]
+
+        if isinstance(dim_exprs, tuple):
+
+            if len(dim_exprs) == 3:
+                rb, re, rs = dim_exprs
+                rt = '1'
+            elif len(dim_exprs) == 4:
+                rb, re, rs, rt = dim_exprs
+            else:
+                raise NotImplementedError
+
+            # subscript expression
+            rb = symbolic.pystr_to_symbolic(rb).expand()
+            re = symbolic.pystr_to_symbolic(re).expand()
+            rs = symbolic.pystr_to_symbolic(rs).expand()
+            rt = symbolic.pystr_to_symbolic(rt).expand()
+        else:
+            rb, re = (dim_exprs.expand(), dim_exprs.expand())
+            rs = 1
+            rt = 1
+
+        result_begin = rb.subs(self.param, node_rb).expand()
+        result_end = re.subs(self.param, node_re).expand()
+
+        # Special case: multiplier < 0
+        if (self.multiplier < 0) == True:
+            result_begin, result_end = result_end, result_begin
+
+        result_skip = self.multiplier * node_rs
+        result_tile = 1
+
+        result_begin = simplify(result_begin)
+        result_end = simplify(result_end)
+        result_skip = simplify(result_skip)
+        result_tile = simplify(result_tile)
+
+        return (result_begin, result_end, result_skip, result_tile)
+
+
+@registry.autoregister
+class ConstantUnderapproximationSMemlet(SeparableUnderapproximationMemletPattern):
+    """ Separable memlet pattern that matches constant (i.e., unrelated to 
+        current scope) expressions.
+    """
+
+    def can_be_applied(self, dim_exprs, variable_context, node_range, orig_edges, dim_index,
+                       total_dims):
+        # Pattern does not support unions of expressions. TODO: Support
+        if len(dim_exprs) > 1:
+            return False
+        dexpr = dim_exprs[0]
+
+        free_symbols = set()
+        for expr in dexpr:
+            if isinstance(expr, sympy.Basic):
+                free_symbols |= expr.free_symbols
+            else:
+                continue
+        for var in variable_context[-1]:
+            if var in free_symbols:
+                return False
+
+        return True
+
+    def propagate(self, array, dim_exprs, node_range):
+        if isinstance(dim_exprs[0], tuple):
+            return dim_exprs[0]  # Already in range format
+        # Convert index to range format
+        return (dim_exprs[0], dim_exprs[0], 1)
+
+
+def _subexpr(dexpr, repldict):
+    if isinstance(dexpr, tuple):
+        return tuple(_subexpr(d, repldict) for d in dexpr)
+    elif isinstance(dexpr, symbolic.SymExpr):
+        return dexpr.expr.subs(repldict)
+    else:
+        return dexpr.subs(repldict)
+
+
+@registry.autoregister
+class ConstantRangeUnderapproximationMemlet(UnderapproximationMemletPattern):
+    """ 
+    Memlet pattern that matches arbitrary expressions with constant range.
+    """
+
+    def can_be_applied(self, expressions, variable_context, node_range, orig_edges):
+        constant_range = True
+        for dim in node_range:
+            for rngelem in dim:  # For (begin, end, skip)
+                if not dtypes.isconstant(rngelem) and not isinstance(rngelem, sympy.Number):
+                    constant_range = False
+                    break
+        if not constant_range:
+            return False
+
+        self.params = variable_context[-1]
+
+        return True
+
+    def propagate(self, array, expressions, node_range):
+        rng = [(None, None, 1)] * len(array.shape)
+        node_range_gen = (range(rb, re, rs) for rb, re, rs in node_range)
+        for ndind in itertools.product(*tuple(node_range_gen)):
+            repldict = {p: ndind[i] for i, p in enumerate(self.params)}
+            for expr in expressions:
+                for dim, dexpr in enumerate(expr):
+                    evaldexpr = _subexpr(dexpr, repldict)
+                    rb, re, rs = rng[dim]
+                    if rb is None:
+                        rng[dim] = (evaldexpr, evaldexpr, 1)
+                    else:
+                        if evaldexpr < rb:
+                            rng[dim] = (evaldexpr, re, rs)
+                        if evaldexpr > re:  # The +1 is because ranges are exclusive
+                            rng[dim] = (rb, evaldexpr, rs)
+
+        return subsets.Range(rng)
+
+
+def _find_unconditionally_executed_states(sdfg: SDFG) -> Set[SDFGState]:
+    """
+    Returns all states that are executed unconditionally in an SDFG
+    """
+    dummy_sink = sdfg.add_state("dummy_state")
+    for sink_node in sdfg.sink_nodes():
+        if sink_node is not dummy_sink:
+            sdfg.add_edge(sink_node, dummy_sink, dace.sdfg.InterstateEdge())
+    # get all the nodes that are executed unconditionally in the state-machine a.k.a nodes
+    # that dominate the sink states
+    dominators = cfg.all_dominators(sdfg)
+    states = dominators[dummy_sink]
+    # remove dummy state
+    sdfg.remove_node(dummy_sink)
+    return states
+
+
+def _unsqueeze_memlet_subsetunion(internal_memlet: Memlet, external_memlet: Memlet,
+                                  parent_sdfg: dace.SDFG, nsdfg: NestedSDFG) -> Memlet:
+    """
+    Helper method that tries to unsqueeze a memlet, containing a SubsetUnion as subset, in
+    a nested SDFG. If it fails it falls back to an empty memlet.
+
+    :param internal_memlet: The internal memlet to unsqueeze.
+    :param 
+    """
+
+    from dace.transformation.helpers import unsqueeze_memlet
+
+    if isinstance(external_memlet.subset, subsets.SubsetUnion):
+        external_memlet.subset = external_memlet.subset.subset_list[0]
+    if isinstance(external_memlet.dst_subset, subsets.SubsetUnion):
+        external_memlet.dst_subset = external_memlet.dst_subset.subset_list[0]
+    if isinstance(external_memlet.src_subset, subsets.SubsetUnion):
+        external_memlet.src_subset = external_memlet.src_subset.subset_list[0]
+    if isinstance(internal_memlet.subset, subsets.SubsetUnion):
+        _subsets = internal_memlet.subset.subset_list
+    else:
+        _subsets = [internal_memlet.subset]
+
+    tmp_memlet = Memlet(data=internal_memlet.data,
+                        subset=internal_memlet.subset,
+                        other_subset=internal_memlet.other_subset)
+
+    internal_array = nsdfg.sdfg.arrays[internal_memlet.data]
+    external_array = parent_sdfg.arrays[external_memlet.data]
+
+    for j, subset in enumerate(_subsets):
+        if subset is None:
+            continue
+        tmp_memlet.subset = subset
+        try:
+            unsqueezed_memlet = unsqueeze_memlet(tmp_memlet,
+                                                 external_memlet,
+                                                 False,
+                                                 internal_offset=internal_array.offset,
+                                                 external_offset=external_array.offset)
+            subset = unsqueezed_memlet.subset
+        except (ValueError, NotImplementedError):
+            # In any case of memlets that cannot be unsqueezed (i.e.,
+            # reshapes), use empty memlets.
+            subset = None
+        _subsets[j] = subset
+
+    # if all subsets are empty make memlet empty
+    if all(s is None for s in _subsets):
+        external_memlet.subset = None
+        external_memlet.other_subset = None
+    else:
+        external_memlet = unsqueezed_memlet
+        external_memlet.subset = subsets.SubsetUnion(_subsets)
+
+    return external_memlet
+
+
+def _freesyms(expr):
+    """
+    Helper function that either returns free symbols for sympy expressions
+    or an empty set if constant.
+    """
+    if isinstance(expr, sympy.Basic):
+        return expr.free_symbols
+    return {}
+
+
+def _collect_iteration_variables(state: SDFGState, node: nodes.NestedSDFG) -> Set[str]:
+    """
+    Helper method which finds all the iteration variables that
+    surround a nested SDFG in a state.
+
+    :param state: The state in which the nested SDFG resides
+    :param node: The nested SDFG that the surrounding iteration
+                variables need to be found for
+    :return: The set of iteration variables surrounding the nested SDFG
+    """
+    scope_dict = state.scope_dict()
+    current_scope: nodes.EntryNode = scope_dict[node]
+    params = set()
+    while current_scope:
+        mapnode: nodes.Map = current_scope.map
+        params.update(set(mapnode.params))
+        current_scope = scope_dict[current_scope]
+
+    return params
+
+
+def _collect_itvars_scope(scopes: Union[scope.ScopeTree, List[scope.ScopeTree]]) -> Dict[scope.ScopeTree, Set[str]]:
+    """
+    Helper method which finds all surrounding iteration variables for each scope
+
+    :param scopes: A List of scope trees or a single scopetree to analize
+    :return: A dictionary mapping each ScopeTree object in scopes to the
+            list of iteration variables surrounding it
+    """
+    if isinstance(scopes, scope.ScopeTree):
+        scopes_to_process = [scopes]
+    else:
+        scopes_to_process = scopes
+
+    next_scopes = set()
+    surrounding_map_vars = {}
+    while len(scopes_to_process) > 0:
+        for scope_node in scopes_to_process:
+            if scope_node is None:
+                continue
+            next_scope = scope_node
+            while next_scope:
+                next_scope = next_scope.parent
+                if next_scope is None:
+                    break
+                curr_entry = next_scope.entry
+                if scope_node not in surrounding_map_vars:
+                    surrounding_map_vars[scope_node] = set()
+                if isinstance(curr_entry, nodes.MapEntry):
+                    surrounding_map_vars[scope_node] |= set(
+                        curr_entry.map.params)
+            next_scopes.add(scope_node.parent)
+        scopes_to_process = next_scopes
+        next_scopes = set()
+    return surrounding_map_vars
+
+
+def _map_header_to_parent_headers(
+        loops: Dict[SDFGState, Tuple[SDFGState, SDFGState,
+                                     List[SDFGState], str, subsets.Range]]
+) -> Dict[SDFGState, Set[SDFGState]]:
+    """
+    Given the loops of an SDFG returns a mapping that maps each loop to its parents in the loop 
+    nest tree.
+    """
+    mapping = {}
+    for header, loop in loops.items():
+        _, _, loop_states, _, _ = loop
+        for state in loop_states:
+            if state not in mapping:
+                mapping[state] = set()
+            if state in loops:
+                mapping[state].add(header)
+    return mapping
+
+
+def _generate_loop_nest_tree(
+        loops: Dict[SDFGState, Tuple[SDFGState, SDFGState,
+                                     List[SDFGState], str, subsets.Range]]
+) -> Dict[SDFGState, Set[SDFGState]]:
+    """
+    Given the loops of an SDFG returns the loop nest trees in the SDFG represented by a dictionary.
+    """
+    header_parents_mapping = _map_header_to_parent_headers(loops)
+    tree_dict: Dict[SDFGState, Set[SDFGState]] = {}
+    for header, loop in loops.items():
+        _, _, loop_states, _, _ = loop
+        tree_dict[header] = set()
+        for state in loop_states:
+            # if the state is a loop header and no parent header is a child of header state is a direct child
+            if state in loops and len(set(loop_states).intersection(
+                    header_parents_mapping[state])) == 0:
+                tree_dict[header].add(state)
+    return tree_dict
+
+
+def _postorder_traversal(root: SDFGState, loop_nest_tree: Dict[SDFGState,
+                                                               Set[SDFGState]]) -> List[SDFGState]:
+    """
+    Given a loop nest tree in the form of a dictionary and the root of the tree, returns the DFS 
+    traversal order of that tree starting from the root.
+    """
+    post_order_list = []
+    if root is None:
+        return []
+    stack = [root]
+    last = None
+
+    while stack:
+        root = stack[-1]
+        if root in loop_nest_tree:
+            children = loop_nest_tree[root]
+        else:
+            children = []
+        if not children or last is not None and (last in children):
+            post_order_list.append(root)
+            stack.pop()
+            last = root
+        # if not, push children in stack
+        else:
+            for child in children:
+                stack.append(child)
+    return post_order_list
+
+
+def _find_loop_nest_roots(loop_nest_tree: Dict[SDFGState, Set[SDFGState]]) -> Set[SDFGState]:
+    """
+    Given the loop nest trees in an SDFG in the form of a dictionary, returns the root nodes of 
+    all loop nest trees in that SDFG.
+    """
+    all_nodes = set()
+    child_nodes = set()
+
+    for parent, children in loop_nest_tree.items():
+        all_nodes.add(parent)
+        all_nodes.update(children)
+        child_nodes.update(children)
+    roots = all_nodes - child_nodes
+    return roots
+
+
+def _filter_undefined_symbols(border_memlet: Memlet,
+                              outer_symbols: Dict[str, dtypes.typeclass]):
+    '''
+    Helper method that filters out subsets containing symbols which are not defined
+    outside a nested SDFG.
+
+    :note: This function operates in-place on the given memlet.
+    '''
+    if border_memlet.src_subset is not None:
+        if isinstance(border_memlet.src_subset, subsets.SubsetUnion):
+            _subsets = border_memlet.src_subset.subset_list
+        else:
+            _subsets = [border_memlet.src_subset]
+        for i, subset in enumerate(_subsets):
+            for rng in subset:
+                fall_back = False
+                for item in rng:
+                    if any(str(s) not in outer_symbols for s in item.free_symbols):
+                        fall_back = True
+                        break
+                if fall_back:
+                    _subsets[i] = None
+                    break
+        border_memlet.src_subset = subsets.SubsetUnion(_subsets)
+    if border_memlet.dst_subset is not None:
+        if isinstance(border_memlet.dst_subset, subsets.SubsetUnion):
+            _subsets = border_memlet.dst_subset.subset_list
+        else:
+            _subsets = [border_memlet.dst_subset]
+        for i, subset in enumerate(_subsets):
+            for rng in subset:
+                fall_back = False
+                for item in rng:
+                    if any(str(s) not in outer_symbols for s in item.free_symbols):
+                        fall_back = True
+                        break
+                if fall_back:
+                    _subsets[i] = None
+                    break
+        border_memlet.dst_subset = subsets.SubsetUnion(_subsets)
+
+
+def _merge_subsets(subset_a: subsets.Subset, subset_b: subsets.Subset) -> subsets.SubsetUnion:
+    """
+    Helper function that merges two subsets to a SubsetUnion and throws
+    an error if the subsets have different dimensions
+    """
+    if subset_a is not None:
+        if subset_a.dims() != subset_b.dims():
+            raise ValueError(
+                'Cannot merge subset ranges of unequal dimension!')
+        return subsets.list_union(subset_a, subset_b)
+    else:
+        return subset_b
+
+
+class UnderapproximateWrites(ppl.Pass):
+
+    def modifies(self) -> Modifies:
+        return ppl.Modifies.Everything
+
+    def should_reapply(self, modified: ppl.Modifies) -> bool:
+        # If anything was modified, reapply
+        return modified & ppl.Modifies.States | ppl.Modifies.Edges | ppl.Modifies.Symbols | ppl.Modifies.Nodes
+
+    def apply_pass(
+            self, sdfg: dace.SDFG, pipeline_results: Dict[str, Any]
+    ) -> Dict[str, Union[
+            Dict[graph.Edge, Memlet],
+            Dict[SDFGState, Dict[str, Memlet]],
+            Dict[SDFGState, Tuple[SDFGState, SDFGState, List[SDFGState], str, subsets.Range]]]]:
+        """
+        Applies the pass to the given SDFG.
+
+        :param sdfg: The SDFG to apply the pass to.
+        :param pipeline_results: If in the context of a ``Pipeline``, a dictionary that is
+                                populated with prior Pass results as ``{Pass subclass name:
+                                returned object from pass}``. If not run in a pipeline, an
+                                empty dictionary is expected.
+        :return: A dictionary containing three dictionaries with analysis data:
+            - 'approximation': A dictionary mapping each edge to a copy of the memlet of that edge 
+                                with its write set underapproximated
+            - 'loop_approximation': A dictionary mapping each identified for-loop in the SDFG to 
+                                    its under-approximated write-set
+            - 'loops': A dictionary containing information about the identified for-loops in the 
+                        SDFG. It maps loop guard-states to the first state in the loop,
+                        the set of states enclosed by the loop, the itearation variable and
+                        the range of the iteration variable
+
+        :notes: The only modification this pass performs on the SDFG is splitting interstate
+                edges.
+        """
+        # clear the global dictionaries
+        approximation_dict.clear()
+        loop_write_dict.clear()
+        loop_dict.clear()
+        iteration_variables.clear()
+        ranges_per_state.clear()
+
+        # fill the approximation dictionary with the original edges as keys and the edges with the
+        # approximated memlets as values
+        for (edge, parent) in sdfg.all_edges_recursive():
+            if isinstance(parent, SDFGState):
+                approximation_dict[edge] = copy.deepcopy(edge.data)
+                if not isinstance(approximation_dict[edge].subset,
+                                  subsets.SubsetUnion) and approximation_dict[edge].subset:
+                    approximation_dict[edge].subset = subsets.SubsetUnion(
+                        [approximation_dict[edge].subset])
+                if not isinstance(approximation_dict[edge].dst_subset,
+                                  subsets.SubsetUnion) and approximation_dict[edge].dst_subset:
+                    approximation_dict[edge].dst_subset = subsets.SubsetUnion(
+                        [approximation_dict[edge].dst_subset])
+                if not isinstance(approximation_dict[edge].src_subset,
+                                  subsets.SubsetUnion) and approximation_dict[edge].src_subset:
+                    approximation_dict[edge].src_subset = subsets.SubsetUnion(
+                        [approximation_dict[edge].src_subset])
+
+        self._underapproximate_writes_sdfg(sdfg)
+
+        # Replace None with empty SubsetUnion in each Memlet
+        for entry in approximation_dict.values():
+            if entry.subset is None:
+                entry.subset = subsets.SubsetUnion([])
+        return {
+            "approximation": approximation_dict,
+            "loop_approximation": loop_write_dict,
+            "loops": loop_dict
+        }
+
+    def _underapproximate_writes_sdfg(self, sdfg: SDFG):
+        """ 
+        Underapproximates write-sets of loops, maps and nested SDFGs in the given SDFG.
+        """
+        from dace.transformation.helpers import split_interstate_edges
+
+        split_interstate_edges(sdfg)
+        loops = self._find_for_loops(sdfg)
+        loop_dict.update(loops)
+
+        for state in sdfg.nodes():
+            self._underapproximate_writes_state(sdfg, state)
+
+        self._underapproximate_writes_loops(loops, sdfg)
+
+    def _find_for_loops(self,
+                        sdfg: SDFG
+                        ) -> Dict[SDFGState, Tuple[SDFGState, SDFGState, List[SDFGState], str, subsets.Range]]:
+        """
+        Modified version of _annotate_loop_ranges from dace.sdfg.propagation
+        that returns the identified loops in a dictionary and stores the found iteration variables
+        in the global ranges_per_state dictionary.
+
+        :param sdfg: The SDFG in which to look.
+        :return: dictionary mapping loop headers to first state in the loop,
+                the set of states enclosed by the loop, the itearation variable,
+                the range of the iteration variable
+        """
+
+        # We import here to avoid cyclic imports.
+        from dace.transformation.interstate.loop_detection import find_for_loop
+        from dace.sdfg import utils as sdutils
+
+        # dictionary mapping loop headers to beginstate, loopstates, looprange
+        identified_loops = {}
+        for cycle in sdfg.find_cycles():
+            # In each cycle, try to identify a valid loop guard state.
+            guard = None
+            begin = None
+            itvar = None
+            for state in cycle:
+                # Try to identify a valid for-loop guard.
+                in_edges = sdfg.in_edges(state)
+                out_edges = sdfg.out_edges(state)
+
+                # A for-loop guard has two or more incoming edges (1 increment and
+                # n init, all identical), and exactly two outgoing edges (loop and
+                # exit loop).
+                if len(in_edges) < 2 or len(out_edges) != 2:
+                    continue
+
+                # All incoming guard edges must set exactly one variable and it must
+                # be the same for all of them.
+                itvars = set()
+                for iedge in in_edges:
+                    if len(iedge.data.assignments) > 0:
+                        if not itvars:
+                            itvars = set(iedge.data.assignments.keys())
+                        else:
+                            itvars &= set(iedge.data.assignments.keys())
+                    else:
+                        itvars = None
+                        break
+                if not itvars or len(itvars) > 1:
+                    continue
+                itvar = next(iter(itvars))
+                itvarsym = pystr_to_symbolic(itvar)
+
+                # The outgoing edges must be negations of one another.
+                if out_edges[0].data.condition_sympy() != (sympy.Not(
+                        out_edges[1].data.condition_sympy())):
+                    continue
+
+                # Make sure the last state of the loop (i.e. the state leading back
+                # to the guard via 'increment' edge) is part of this cycle. If not,
+                # we're looking at the guard for a nested cycle, which we ignore for
+                # this cycle.
+                increment_edge = None
+                for iedge in in_edges:
+                    if itvarsym in _freesyms(pystr_to_symbolic(iedge.data.assignments[itvar])):
+                        increment_edge = iedge
+                        break
+                if increment_edge is None or increment_edge.src not in cycle:
+                    continue
+
+                # One of the child states must be in the loop (loop begin), and the
+                # other one must be outside the cycle (loop exit).
+                loop_state = None
+                exit_state = None
+                if out_edges[0].dst in cycle and out_edges[1].dst not in cycle:
+                    loop_state = out_edges[0].dst
+                    exit_state = out_edges[1].dst
+                elif out_edges[1].dst in cycle and out_edges[0].dst not in cycle:
+                    loop_state = out_edges[1].dst
+                    exit_state = out_edges[0].dst
+                if loop_state is None or exit_state is None:
+                    continue
+
+                # This is a valid guard state candidate.
+                guard = state
+                begin = loop_state
+                break
+
+            if guard is not None and begin is not None and itvar is not None:
+                # A guard state was identified, see if it has valid for-loop ranges
+                # and annotate the loop as such.
+
+                loop_state_list = []
+                res = find_for_loop(sdfg, guard, begin, itervar=itvar)
+                if res is None:
+                    continue
+                itervar, rng, (_, last_loop_state) = res
+                # Make sure the range is flipped in a direction such that the
+                # stride is positive (in order to match subsets.Range).
+                start, stop, stride = rng
+                # This inequality needs to be checked exactly like this due to
+                # constraints in sympy/symbolic expressions, do not simplify!!!
+                if (stride < 0) == True:
+                    rng = (stop, start, -stride)
+                loop_states = sdutils.dfs_conditional(sdfg,
+                                                      sources=[begin],
+                                                      condition=lambda _, child: child != guard)
+
+                if itvar not in ranges_per_state[begin]:
+
+                    for loop_state in loop_states:
+                        ranges_per_state[loop_state][itervar] = subsets.Range([
+                                                                              rng])
+                        loop_state_list.append(loop_state)
+                    ranges_per_state[guard][itervar] = subsets.Range([rng])
+                    identified_loops[guard] = (begin, last_loop_state, loop_state_list, itvar,
+                                               subsets.Range([rng]))
+
+        return identified_loops
+
+    def _underapproximate_writes_loops(self, loops: Dict[SDFGState, Tuple[SDFGState, SDFGState, List[SDFGState],
+                                                                          str, subsets.Range]], sdfg: SDFG):
+        """
+        Helper function that calls underapproximate_writes_loops on all the loops in the SDFG in
+        bottom up order of the loop nests.
+        """
+        loop_nest_tree = _generate_loop_nest_tree(loops)
+        root_loop_headers = _find_loop_nest_roots(loop_nest_tree)
+        for root in root_loop_headers:
+            post_order_traversal = _postorder_traversal(root, loop_nest_tree)
+            for loop_header in post_order_traversal:
+                self._underapproximate_writes_loop(sdfg, loops, loop_header)
+
+    def _underapproximate_writes_state(self, sdfg: SDFG, state: SDFGState):
+        """ Propagates memlets throughout one SDFG state.
+
+            :param sdfg: The SDFG in which the state is situated.
+            :param state: The state to propagate in.
+        """
+
+        # Algorithm:
+        # 1. Start propagating information from tasklets outwards (their edges
+        #    are hardcoded).
+        # 2. Traverse the neighboring nodes (topological sort, first forward to
+        #    outputs and then backward to inputs).
+        #    There are four possibilities:
+        #    a. If the neighboring node is a tasklet, skip (such edges are
+        #       immutable)
+        #    b. If the neighboring node is an array, make sure it is the correct
+        #       array. Otherwise, throw a mismatch exception.
+        #    c. If the neighboring node is a scope node, and its other edges are
+        #       not set, set the results per-array, using the union of the
+        #       obtained ranges in the previous depth.
+        # 3. For each edge in the multigraph, store the results in the global dictionary
+        #    approximation_dict
+
+        # First, propagate nested SDFGs in a bottom-up fashion
+        for node in state.nodes():
+            if isinstance(node, nodes.NestedSDFG):
+                self._find_live_iteration_variables(node, sdfg, state)
+
+                # Propagate memlets inside the nested SDFG.
+                self._underapproximate_writes_sdfg(node.sdfg)
+
+                # Propagate memlets out of the nested SDFG.
+                self._underapproximate_writes_nested_sdfg(sdfg, state, node)
+
+        # Process scopes from the leaves upwards
+        self._underapproximate_writes_scope(sdfg, state, state.scope_leaves())
+
+    def _find_live_iteration_variables(self,
+                                       nsdfg: nodes.NestedSDFG,
+                                       sdfg: SDFG,
+                                       state: SDFGState):
+        """
+        Helper method that collects all iteration variables of surrounding maps and loops of a 
+        given nested SDFG and stores them in the global iteration_variables dictionary after 
+        applying the symbol-mapping of the nested SDFG.
+        """
+
+        def symbol_map(mapping, symbol):
+            if symbol in mapping:
+                return mapping[symbol]
+            return None
+
+        map_iteration_variables = _collect_iteration_variables(state, nsdfg)
+        sdfg_iteration_variables = iteration_variables[
+            sdfg] if sdfg in iteration_variables else set()
+        state_iteration_variables = ranges_per_state[state].keys()
+        iteration_variables_local = (map_iteration_variables | sdfg_iteration_variables |
+                                     state_iteration_variables)
+        mapped_iteration_variables = set(
+            map(lambda x: symbol_map(nsdfg.symbol_mapping, x), iteration_variables_local))
+        if mapped_iteration_variables:
+            iteration_variables[nsdfg.sdfg] = mapped_iteration_variables
+
+    def _underapproximate_writes_nested_sdfg(
+            self,
+            parent_sdfg: SDFG,
+            parent_state: SDFGState,
+            nsdfg_node: NestedSDFG,
+    ):
+        """
+        Propagate writes out of a nested sdfg. Only considers memlets in states that are
+        executed unconditionally. The results are stored in the global approximation_dict
+
+        :param parent_sdfg: The parent SDFG this nested SDFG is in.
+        :param parent_state: The state containing this nested SDFG.
+        :param nsdfg_node: The NSDFG node containing this nested SDFG.
+        """
+
+        def _init_border_memlet(template_memlet: Memlet,
+                                node_label: str
+                                ):
+            '''
+            Creates a Memlet with the same data as the template_memlet, stores it in the
+            border_memlets dictionary and returns it.
+            '''
+            border_memlet = Memlet(data=template_memlet.data)
+            border_memlet._is_data_src = True
+            border_memlets[node_label] = border_memlet
+            return border_memlet
+
+        # Build a map of connectors to associated 'border' memlets inside
+        # the nested SDFG. This map will be populated with memlets once they
+        # get propagated in the SDFG.
+        border_memlets = {}
+        for connector in nsdfg_node.out_connectors:
+            border_memlets[connector] = None
+
+        outer_symbols = parent_state.symbols_defined_at(nsdfg_node)
+        # For each state, go through all access nodes corresponding to any
+        # out-connector from this SDFG. Given those access nodes, collect
+        # the corresponding memlets and use them to calculate the
+        # subset corresponding to the outside memlet attached to that connector.
+        # This is passed out via `border_memlets` and propagated along from there.
+        states = _find_unconditionally_executed_states(nsdfg_node.sdfg)
+        for state in states:
+            for node in state.data_nodes():
+                if node.label not in border_memlets:
+                    continue
+                # Get the edges to this access node
+                edges = state.in_edges(node)
+                border_memlet = border_memlets[node.label]
+
+                # Collect all memlets belonging to this access node
+                memlets = []
+                for edge in edges:
+                    inside_memlet = approximation_dict[edge]
+                    memlets.append(inside_memlet)
+                    # initialize border memlet if it does not exist already
+                    if border_memlet is None:
+                        border_memlet = _init_border_memlet(
+                            inside_memlet, node.label)
+
+                # Given all of this access nodes' memlets union all the subsets to one SubsetUnion
+                if len(memlets) > 0:
+                    subset = subsets.SubsetUnion([])
+                    for memlet in memlets:
+                        subset = subsets.list_union(subset, memlet.subset)
+                    # compute the union of the ranges to merge the subsets.
+                    border_memlet.subset = _merge_subsets(
+                        border_memlet.subset, subset)
+
+            # collect the memlets for each loop in the NSDFG
+            if state in loop_write_dict:
+                for node_label, loop_memlet in loop_write_dict[state].items():
+                    if node_label not in border_memlets:
+                        continue
+                    border_memlet = border_memlets[node_label]
+                    # initialize border memlet if it does not exist already
+                    if border_memlet is None:
+                        border_memlet = _init_border_memlet(
+                            loop_memlet, node_label)
+                    # compute the union of the ranges to merge the subsets.
+                    border_memlet.subset = _merge_subsets(
+                        border_memlet.subset, loop_memlet.subset)
+
+        # Make sure any potential NSDFG symbol mapping is correctly reversed
+        # when propagating out.
+        for connector in border_memlets:
+            border_memlet = border_memlets[connector]
+            if not border_memlet:
+                continue
+            border_memlet.replace(nsdfg_node.symbol_mapping)
+            # filter out subsets that use symbols that are not defined outside of the nsdfg
+            _filter_undefined_symbols(border_memlet, outer_symbols)
+
+        # Propagate the inside 'border' memlets outside the SDFG by
+        # offsetting, and unsqueezing if necessary.
+        for edge in parent_state.out_edges(nsdfg_node):
+            out_memlet = approximation_dict[edge]
+            if edge.src_conn in border_memlets:
+                internal_memlet = border_memlets[edge.src_conn]
+                if internal_memlet is None:
+                    out_memlet.subset = None
+                    out_memlet.dst_subset = None
+                    approximation_dict[edge] = out_memlet
+                    continue
+                out_memlet = _unsqueeze_memlet_subsetunion(internal_memlet, out_memlet, parent_sdfg,
+                                                           nsdfg_node)
+                approximation_dict[edge] = out_memlet
+
+    def _underapproximate_writes_loop(self,
+                                      sdfg: SDFG,
+                                      loops: Dict[SDFGState, Tuple[SDFGState, SDFGState, List[SDFGState],
+                                                                   str, subsets.Range]],
+                                      loop_header: SDFGState):
+        """
+        Propagate Memlets recursively out of loop constructs with representative border memlets, 
+        similar to propagate_memlets_nested_sdfg. Only states that are executed unconditionally
+        are considered. Loops containing breaks are ignored. The results are stored in the 
+        global loop_write_dict.
+
+        :param sdfg: The SDFG the loops are contained in.
+        :param loops: dictionary that maps each for-loop construct to a tuple consisting of first
+                    state in the loop, the last state in the loop the set of states enclosed by
+                    the loop, the itearation variable and the range of the iterator variable
+        :param loop_header: a loopheader to start the propagation with. If no parameter is given,
+                    propagate_memlet_loop will be called recursively on the outermost loopheaders
+        """
+
+        def _init_border_memlet(template_memlet: Memlet,
+                                node_label: str
+                                ):
+            '''
+            Creates a Memlet with the same data as the template_memlet, stores it in the
+            border_memlets dictionary and returns it.
+            '''
+            border_memlet = Memlet(data=template_memlet.data)
+            border_memlet._is_data_src = True
+            border_memlets[node_label] = border_memlet
+            return border_memlet
+
+        def filter_subsets(itvar: str, itrange: subsets.Range,
+                           memlet: Memlet) -> List[subsets.Subset]:
+            # helper method that filters out subsets that do not depend on the iteration variable
+            # if the iteration range is symbolic
+
+            # if loop range is symbolic
+            # -> only propagate subsets that contain the iterator as a symbol
+            # if loop range is constant (and not empty, which is already verified)
+            # -> always propagate all subsets out
+            if memlet.subset is None:
+                return []
+            result = memlet.subset.subset_list if isinstance(
+                memlet.subset, subsets.SubsetUnion) else [memlet.subset]
+            # range contains symbols
+            if itrange.free_symbols:
+                result = [s for s in result if itvar in s.free_symbols]
+            return result
+
+        current_loop = loops[loop_header]
+        begin, last_loop_state, loop_states, itvar, rng = current_loop
+        if rng.num_elements() == 0:
+            return
+        # make sure there is no break out of the loop
+        dominators = cfg.all_dominators(sdfg)
+        if any(begin not in dominators[s] and not begin is s for s in loop_states):
+            return
+        border_memlets = defaultdict(None)
+        # get all the nodes that are executed unconditionally in the cfg
+        # a.k.a nodes that dominate the sink states
+        states = dominators[last_loop_state].intersection(set(loop_states))
+        states.update([loop_header, last_loop_state])
+
+        for state in states:
+            # iterate over the data_nodes that are actually in the current state
+            # plus the data_nodes that are overwritten in the corresponding loop body
+            # if the state is a loop header
+            # iterate over acccessnodes in the state
+            for node in state.data_nodes():
+                # no writes associated with this access node
+                if state.in_degree(node) == 0:
+                    continue
+                edges = state.in_edges(node)
+                # get the current border memlet for this data node
+                border_memlet = border_memlets.get(node.label)
+                memlets = []
+
+                # collect all the subsets of the incoming memlets for the current access node
+                for edge in edges:
+                    inside_memlet = copy.copy(approximation_dict[edge])
+                    # filter out subsets that could become empty depending on assignments
+                    # of symbols
+                    filtered_subsets = filter_subsets(
+                        itvar, rng, inside_memlet)
+                    if not filtered_subsets:
+                        continue
+
+                    inside_memlet.subset = subsets.SubsetUnion(
+                        filtered_subsets)
+                    memlets.append(inside_memlet)
+                    if border_memlet is None:
+                        border_memlet = _init_border_memlet(
+                            inside_memlet, node.label)
+
+                self._underapproximate_writes_loop_subset(sdfg, memlets, border_memlet, sdfg.arrays[node.label],
+                                                          itvar, rng)
+
+            if state not in loop_write_dict:
+                continue
+            # propagate the border memlets of nested loop
+            for node_label, other_border_memlet in loop_write_dict[state].items():
+                # filter out subsets that could become empty depending on symbol assignments
+                filtered_subsets = filter_subsets(
+                    itvar, rng, other_border_memlet)
+                if not filtered_subsets:
+                    continue
+
+                other_border_memlet.subset = subsets.SubsetUnion(
+                    filtered_subsets)
+                border_memlet = border_memlets.get(node_label)
+                if border_memlet is None:
+                    border_memlet = _init_border_memlet(
+                        other_border_memlet, node_label)
+
+                self._underapproximate_writes_loop_subset(sdfg, [other_border_memlet], border_memlet,
+                                                          sdfg.arrays[node_label], itvar, rng)
+
+        loop_write_dict[loop_header] = border_memlets
+
+    def _underapproximate_writes_loop_subset(self,
+                                             sdfg: dace.SDFG,
+                                             memlets: List[Memlet],
+                                             dst_memlet: Memlet,
+                                             arr: dace.data.Array,
+                                             itvar: str,
+                                             rng: subsets.Subset,
+                                             loop_nest_itvars: Union[Set[str], None] = None):
+        """
+        Helper function that takes a list of (border) memlets, propagates them out of a
+        loop-construct and summarizes them to one Memlet. The result is written back to dst_memlet
+
+        :param sdfg: The SDFG the memlets reside in
+        :param memlets: A list of memlets to propagate
+        :param arr: The array the memlets write to
+        :param itvar: The iteration variable of the loop the memlets are propagated out of
+        :param rng: The iteration range of the iteration variable
+        :param loop_nest_itvars: A set of iteration variables of surrounding loops
+        """
+        if not loop_nest_itvars:
+            loop_nest_itvars = set()
+        if len(memlets) > 0:
+            params = [itvar]
+            # get all the other iteration variables surrounding this memlet
+            surrounding_itvars = iteration_variables[sdfg] if sdfg in iteration_variables else set(
+            )
+            if loop_nest_itvars:
+                surrounding_itvars |= loop_nest_itvars
+
+            subset = self._underapproximate_subsets(memlets,
+                                                    arr,
+                                                    params,
+                                                    rng,
+                                                    use_dst=True,
+                                                    surrounding_itvars=surrounding_itvars).subset
+
+            if subset is None or len(subset.subset_list) == 0:
+                return
+            # compute the union of the ranges to merge the subsets.
+            dst_memlet.subset = _merge_subsets(dst_memlet.subset, subset)
+
+    def _underapproximate_writes_scope(self,
+                                       sdfg: SDFG,
+                                       state: SDFGState,
+                                       scopes: Union[scope.ScopeTree, List[scope.ScopeTree]]):
+        """ 
+        Propagate memlets from the given scopes outwards. 
+
+        :param sdfg: The SDFG in which the scopes reside.
+        :param state: The SDFG state in which the scopes reside.
+        :param scopes: The ScopeTree object or a list thereof to start from.
+        """
+
+        # for each map scope find the iteration variables of surrounding maps
+        surrounding_map_vars: Dict[scope.ScopeTree,
+                                   Set[str]] = _collect_itvars_scope(scopes)
+        if isinstance(scopes, scope.ScopeTree):
+            scopes_to_process = [scopes]
+        else:
+            scopes_to_process = scopes
+
+        # Process scopes from the inputs upwards, propagating edges at the
+        # entry and exit nodes
+        next_scopes = set()
+        while len(scopes_to_process) > 0:
+            for scope_node in scopes_to_process:
+                if scope_node.entry is None:
+                    continue
+
+                surrounding_iteration_variables = self._collect_iteration_variables_scope_node(scope_node,
+                                                                                               sdfg,
+                                                                                               state,
+                                                                                               surrounding_map_vars)
+                self._underapproximate_writes_node(
+                    state, scope_node.exit, surrounding_iteration_variables)
+                # Add parent to next frontier
+                next_scopes.add(scope_node.parent)
+            scopes_to_process = next_scopes
+            next_scopes = set()
+
+    def _collect_iteration_variables_scope_node(self,
+                                                scope_node: scope.ScopeTree,
+                                                sdfg: SDFG,
+                                                state: SDFGState,
+                                                surrounding_map_vars: Dict[scope.ScopeTree, Set[str]]) -> Set[str]:
+        map_iteration_variables = surrounding_map_vars[
+            scope_node] if scope_node in surrounding_map_vars else set()
+        sdfg_iteration_variables = iteration_variables[
+            sdfg] if sdfg in iteration_variables else set()
+        loop_iteration_variables = ranges_per_state[state].keys()
+        surrounding_iteration_variables = (map_iteration_variables |
+                                           sdfg_iteration_variables |
+                                           loop_iteration_variables)
+        return surrounding_iteration_variables
+
+    def _underapproximate_writes_node(self,
+                                      dfg_state: SDFGState,
+                                      node: Union[nodes.EntryNode, nodes.ExitNode],
+                                      surrounding_itvars: Union[Set[str], None] = None):
+        """
+        Helper method which propagates all memlets attached to a map scope out of the map scope.
+        Can be used for both propagation directions. The propagated memlets are stored in the
+        global approximation dictonary.
+
+        :param dfg_state: The state the map resides in
+        :param node: Either an entry or an exit node of a map scope
+        :param surrounding_itvars: Iteration variables that surround the map scope
+        """
+        if isinstance(node, nodes.EntryNode):
+            internal_edges = [
+                e for e in dfg_state.out_edges(node) if e.src_conn and e.src_conn.startswith('OUT_')
+            ]
+            external_edges = [
+                e for e in dfg_state.in_edges(node) if e.dst_conn and e.dst_conn.startswith('IN_')
+            ]
+
+            def geticonn(e):
+                return e.src_conn[4:]
+
+            def geteconn(e):
+                return e.dst_conn[3:]
+
+            use_dst = False
+        else:
+            internal_edges = [
+                e for e in dfg_state.in_edges(node) if e.dst_conn and e.dst_conn.startswith('IN_')
+            ]
+            external_edges = [
+                e for e in dfg_state.out_edges(node) if e.src_conn and e.src_conn.startswith('OUT_')
+            ]
+
+            def geticonn(e):
+                return e.dst_conn[3:]
+
+            def geteconn(e):
+                return e.src_conn[4:]
+
+            use_dst = True
+
+        for edge in external_edges:
+            if approximation_dict[edge].is_empty():
+                new_memlet = Memlet()
+            else:
+                internal_edge = next(
+                    e for e in internal_edges if geticonn(e) == geteconn(edge))
+                aligned_memlet = self._align_memlet(
+                    dfg_state, internal_edge, dst=use_dst)
+                new_memlet = self._underapproximate_memlets(dfg_state,
+                                                            aligned_memlet,
+                                                            node,
+                                                            True,
+                                                            connector=geteconn(
+                                                                edge),
+                                                            surrounding_itvars=surrounding_itvars)
+            approximation_dict[edge] = new_memlet
+
+    def _align_memlet(self,
+                      state: SDFGState,
+                      edge: gr.MultiConnectorEdge[Memlet],
+                      dst: bool) -> Memlet:
+        """ 
+        Takes Multiconnectoredge containing Memlet in DFG and swaps subset and other_subset of 
+        Memlet if it "points" in the wrong direction
+
+        :param state: The state the memlet resides in
+        :param edge: The edge containing the memlet that needs to be aligned
+        :param dst: True if Memlet should "point" to destination
+
+        :return: Aligned memlet
+        """
+
+        is_src = edge.data._is_data_src
+        # Memlet is already aligned
+        if is_src is None or (is_src and not dst) or (not is_src and dst):
+            res = approximation_dict[edge]
+            return res
+
+        # Data<->Code memlets always have one data container
+        mpath = state.memlet_path(edge)
+        if not isinstance(mpath[0].src, AccessNode) or not isinstance(mpath[-1].dst, AccessNode):
+            return approximation_dict[edge]
+
+        # Otherwise, find other data container
+        result = copy.deepcopy(approximation_dict[edge])
+        if dst:
+            node = mpath[-1].dst
+        else:
+            node = mpath[0].src
+
+        # Fix memlet fields
+        result.data = node.data
+        result.subset = approximation_dict[edge].other_subset
+        result.other_subset = approximation_dict[edge].subset
+        result._is_data_src = not is_src
+        return result
+
+    def _underapproximate_memlets(self,
+                                  dfg_state,
+                                  memlet: Memlet,
+                                  scope_node: Union[nodes.EntryNode, nodes.ExitNode],
+                                  union_inner_edges: bool,
+                                  arr: Union[dace.data.Array, None] = None,
+                                  connector=None,
+                                  surrounding_itvars: Union[Set[str], None] = None):
+        """ Tries to underapproximate a memlet through a scope (computes an underapproximation
+            of the image of the memlet function applied on an integer set of, e.g., a map range)
+            and returns a new memlet object.
+
+            :param dfg_state: An SDFGState object representing the graph.
+            :param memlet: The memlet adjacent to the scope node from the inside.
+            :param scope_node: A scope entry or exit node.
+            :param union_inner_edges: True if the propagation should take other
+                                    neighboring internal memlets within the same
+                                    scope into account.
+        """
+        if isinstance(scope_node, nodes.EntryNode):
+            use_dst = False
+            entry_node = scope_node
+            neighboring_edges = dfg_state.out_edges(scope_node)
+            if connector is not None:
+                neighboring_edges = [
+                    e for e in neighboring_edges if e.src_conn and e.src_conn[4:] == connector
+                ]
+        elif isinstance(scope_node, nodes.ExitNode):
+            use_dst = True
+            entry_node = dfg_state.entry_node(scope_node)
+            neighboring_edges = dfg_state.in_edges(scope_node)
+            if connector is not None:
+                neighboring_edges = [
+                    e for e in neighboring_edges if e.dst_conn and e.dst_conn[3:] == connector
+                ]
+        else:
+            raise TypeError('Trying to propagate through a non-scope node')
+        if memlet.is_empty():
+            return Memlet()
+
+        sdfg = dfg_state.parent
+        scope_node_symbols = set(
+            conn for conn in entry_node.in_connectors if not conn.startswith('IN_'))
+        defined_vars = {
+            symbolic.pystr_to_symbolic(s)
+            for s in (dfg_state.symbols_defined_at(entry_node).keys() | sdfg.constants.keys())
+            if s not in scope_node_symbols
+        }
+
+        # Find other adjacent edges within the connected to the scope node
+        # and union their subsets
+        if union_inner_edges:
+            aggdata = [
+                approximation_dict[e]
+                for e in neighboring_edges
+                if approximation_dict[e].data == memlet.data and approximation_dict[e] != memlet
+            ]
+        else:
+            aggdata = []
+
+        aggdata.append(memlet)
+
+        if arr is None:
+            if memlet.data not in sdfg.arrays:
+                raise KeyError('Data descriptor (Array, Stream) "%s" not defined in SDFG.' %
+                               memlet.data)
+
+            # FIXME: A memlet alone (without an edge) cannot figure out whether it is data<->data or data<->code
+            #        so this test cannot be used
+            arr = sdfg.arrays[memlet.data]
+
+        # Propagate subset
+        if isinstance(entry_node, nodes.MapEntry):
+            mapnode = entry_node.map
+            return self._underapproximate_subsets(aggdata,
+                                                  arr,
+                                                  mapnode.params,
+                                                  mapnode.range,
+                                                  defined_vars,
+                                                  use_dst=use_dst,
+                                                  surrounding_itvars=surrounding_itvars)
+
+        elif isinstance(entry_node, nodes.ConsumeEntry):
+            # Nothing to analyze/propagate in consume
+            new_memlet = copy.copy(memlet)
+            new_memlet.subset = subsets.Range.from_array(arr)
+            new_memlet.other_subset = None
+            return new_memlet
+        else:
+            raise NotImplementedError(
+                'Unimplemented primitive: %s' % type(entry_node))
+
+    def _underapproximate_subsets(self,
+                                  memlets: List[Memlet],
+                                  arr: data.Data,
+                                  params: List[str],
+                                  rng: subsets.Subset,
+                                  defined_variables: Union[Set[symbolic.SymbolicType],
+                                                           None] = None,
+                                  use_dst: bool = False,
+                                  surrounding_itvars: Union[Set[str], None] = None) -> Memlet:
+        """ Tries to underapproximate a list of memlets through a range (underapproximates
+            the image of the memlet function applied on an integer set of, e.g., a
+            map range) and returns a new memlet object.
+
+            :param memlets: The memlets to propagate.
+            :param arr: Array descriptor for memlet (used for obtaining extents).
+            :param params: A list of variable names.
+            :param rng: A subset with dimensionality len(params) that contains the
+                        range to propagate with.
+            :param defined_variables: A set of symbols defined that will remain the
+                                    same throughout underapproximation. If None, assumes
+                                    that all symbols outside of `params` have been
+                                    defined.
+            :param use_dst: Whether to underapproximate the memlets' dst subset or use the
+                            src instead, depending on propagation direction.
+            :param surrounding_itvars:  set of iteration variables that surround the memlet
+                                        but are not considered for the underapproximation in
+                                        this call
+            :return: Memlet with underapproximated subset.
+        """
+        if not surrounding_itvars:
+            surrounding_itvars = set()
+        # Argument handling
+        if defined_variables is None:
+            # Default defined variables is "everything but params"
+            defined_variables = set()
+            defined_variables |= rng.free_symbols
+            for memlet in memlets:
+                defined_variables |= memlet.free_symbols
+            defined_variables -= set(params)
+            defined_variables = set(symbolic.pystr_to_symbolic(p)
+                                    for p in defined_variables)
+
+        # Propagate subset
+        variable_context = [[symbolic.pystr_to_symbolic(p) for p in surrounding_itvars],
+                            defined_variables, [symbolic.pystr_to_symbolic(p) for p in params]]
+
+        new_subset = None
+        for memlet in memlets:
+            if memlet.is_empty():
+                continue
+
+            _subsets = None
+            if use_dst and memlet.dst_subset is not None:
+                _subsets = copy.deepcopy(memlet.dst_subset)
+            elif not use_dst and memlet.src_subset is not None:
+                _subsets = copy.deepcopy(memlet.src_subset)
+            else:
+                _subsets = copy.deepcopy(memlet.subset)
+
+            if isinstance(_subsets, subsets.SubsetUnion):
+                _subsets = _subsets.subset_list
+            else:
+                _subsets = [_subsets]
+
+            if len(list(set(_subsets) - set([None]))) == 0 or _subsets is None:
+                continue
+
+            # iterate over all the subsets in the SubsetUnion of the current memlet and
+            # try to apply a memletpattern. If no pattern matches fall back to the empty set
+            for i, subset in enumerate(_subsets):
+                # find a pattern for the current subset
+                for pclass in UnderapproximationMemletPattern.extensions():
+                    pattern = pclass()
+                    if pattern.can_be_applied([subset], variable_context, rng, [memlet]):
+                        subset = pattern.propagate(arr, [subset], rng)
+                        break
+                else:
+                    # No patterns found. Underapproximate the subset with an empty subset (so None)
+                    subset = None
+                _subsets[i] = subset
+
+            # Union edges as necessary
+            if new_subset is None:
+                new_subset = subsets.SubsetUnion(_subsets)
+            else:
+                old_subset = new_subset
+                new_subset = subsets.list_union(
+                    new_subset, subsets.SubsetUnion(_subsets))
+                if new_subset is None:
+                    warnings.warn('Subset union failed between %s and %s ' % (
+                        old_subset, _subsets))
+                    break
+
+        # Create new memlet
+        new_memlet = copy.copy(memlets[0])
+        new_memlet.subset = new_subset
+        new_memlet.other_subset = None
+        return new_memlet
diff --git a/tests/passes/writeset_underapproximation_test.py b/tests/passes/writeset_underapproximation_test.py
new file mode 100644
index 0000000000..a696c5ba24
--- /dev/null
+++ b/tests/passes/writeset_underapproximation_test.py
@@ -0,0 +1,1109 @@
+# Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved.
+
+import dace
+from dace.sdfg.analysis.writeset_underapproximation import UnderapproximateWrites
+from dace.subsets import Range
+from dace.transformation.pass_pipeline import Pipeline
+
+N = dace.symbol("N")
+M = dace.symbol("M")
+K = dace.symbol("K")
+
+pipeline = Pipeline([UnderapproximateWrites()])
+
+
+def test_2D_map_overwrites_2D_array():
+    """
+    2-dimensional map that fully overwrites 2-dimensional array
+    --> Approximated write-set of Map to array equals shape of array
+    """
+
+    sdfg = dace.SDFG('twoD_map')
+    sdfg.add_array('B', (M, N), dace.float64)
+    map_state = sdfg.add_state('map')
+    a1 = map_state.add_access('B')
+    map_state.add_mapped_tasklet('overwrite_1',
+                                 map_ranges={
+                                     '_i': '0:N:1',
+                                     '_j': '0:M:1'
+                                 },
+                                 inputs={},
+                                 code='b = 5',
+                                 outputs={'b': dace.Memlet('B[_j,_i]')},
+                                 output_nodes={'B': a1},
+                                 external_edges=True)
+
+    results = pipeline.apply_pass(sdfg, {})[UnderapproximateWrites.__name__]
+
+    result = results['approximation']
+    edge = map_state.in_edges(a1)[0]
+    result_subset_list = result[edge].subset.subset_list
+    result_subset = result_subset_list[0]
+    expected_subset = Range.from_string('0:M, 0:N')
+    assert (str(result_subset) == str(expected_subset))
+
+
+def test_2D_map_added_indices():
+    """
+    2-dimensional array that writes to two-dimensional array with 
+    subscript expression that adds two indices 
+    --> Approximated write-set of Map is empty
+    """
+
+    sdfg = dace.SDFG("twoD_map")
+    sdfg.add_array("B", (M, N), dace.float64)
+    map_state = sdfg.add_state("map")
+    a1 = map_state.add_access('B')
+    map_state.add_mapped_tasklet("overwrite_1",
+                                 map_ranges={
+                                     '_i': '0:N:1',
+                                     '_j': '0:M:1'
+                                 },
+                                 inputs={},
+                                 code="b = 5",
+                                 outputs={"b": dace.Memlet("B[_j,_i + _j]")},
+                                 output_nodes={"B": a1},
+                                 external_edges=True)
+
+    results = pipeline.apply_pass(sdfg, {})[UnderapproximateWrites.__name__]
+
+    result = results["approximation"]
+    edge = map_state.in_edges(a1)[0]
+    assert (len(result[edge].subset.subset_list) == 0)
+
+
+def test_2D_map_multiplied_indices():
+    """
+    2-dimensional array that writes to two-dimensional array with 
+    subscript expression that multiplies two indices
+    --> Approximated write-set of Map is empty
+    """
+
+    sdfg = dace.SDFG("twoD_map")
+    sdfg.add_array("B", (M, N), dace.float64)
+    map_state = sdfg.add_state("map")
+    a1 = map_state.add_access('B')
+    map_state.add_mapped_tasklet("overwrite_1",
+                                 map_ranges={
+                                     '_i': '0:N:1',
+                                     '_j': '0:M:1'
+                                 },
+                                 inputs={},
+                                 code="b = 5",
+                                 outputs={"b": dace.Memlet("B[_j,_i * _j]")},
+                                 output_nodes={"B": a1},
+                                 external_edges=True)
+
+    results = pipeline.apply_pass(sdfg, {})[UnderapproximateWrites.__name__]
+
+    result = results["approximation"]
+    edge = map_state.in_edges(a1)[0]
+    assert (len(result[edge].subset.subset_list) == 0)
+
+
+def test_1D_map_one_index_multiple_dims():
+    """
+    One-dimensional map that has the same index 
+    in two dimensions in a write-access
+    --> Approximated write-set of Map is empty
+    """
+
+    sdfg = dace.SDFG("twoD_map")
+
+    sdfg.add_array("B", (M, N), dace.float64)
+    map_state = sdfg.add_state("map")
+    a1 = map_state.add_access('B')
+    map_state.add_mapped_tasklet("overwrite_1",
+                                 map_ranges={'_j': '0:M:1'},
+                                 inputs={},
+                                 code="b = 5",
+                                 outputs={"b": dace.Memlet("B[_j, _j]")},
+                                 output_nodes={"B": a1},
+                                 external_edges=True)
+
+    results = pipeline.apply_pass(sdfg, {})[UnderapproximateWrites.__name__]
+
+    result = results["approximation"]
+    edge = map_state.in_edges(a1)[0]
+    assert (len(result[edge].subset.subset_list) == 0)
+
+
+def test_1D_map_one_index_squared():
+    """
+    One-dimensional map that multiplies the index 
+    in the subscript expression 
+    --> Approximated write-set of Map is empty
+    """
+    sdfg = dace.SDFG("twoD_map")
+    sdfg.add_array("B", (M, ), dace.float64)
+    map_state = sdfg.add_state("map")
+    a1 = map_state.add_access('B')
+    map_state.add_mapped_tasklet("overwrite_1",
+                                 map_ranges={'_j': '0:M:1'},
+                                 inputs={},
+                                 code="b = 5",
+                                 outputs={"b": dace.Memlet("B[_j * _j]")},
+                                 output_nodes={"B": a1},
+                                 external_edges=True)
+
+    results = pipeline.apply_pass(sdfg, {})[UnderapproximateWrites.__name__]
+
+    result = results["approximation"]
+    edge = map_state.in_edges(a1)[0]
+    assert (len(result[edge].subset.subset_list) == 0)
+
+
+def test_map_tree_full_write():
+    """
+    Two maps nested in a map. Both nested maps overwrite the whole first dimension of the array
+    together with the outer map the whole array is overwritten
+    --> Approximated write-set of Map to array equals shape of array
+    """
+
+    sdfg = dace.SDFG("twoD_map")
+    sdfg.add_array("B", (M, N), dace.float64)
+    map_state = sdfg.add_state("map")
+    a1 = map_state.add_access('B')
+    map_entry, map_exit = map_state.add_map("outer_map", {"_i": '0:N:1'})
+    map_exit.add_in_connector("IN_B")
+    map_exit.add_out_connector("OUT_B")
+    inner_map_entry_0, inner_map_exit_0 = map_state.add_map(
+        "inner_map_0", {"_j": '0:M:1'})
+    inner_map_exit_0.add_in_connector("IN_B")
+    inner_map_exit_0.add_out_connector("OUT_B")
+    inner_map_entry_1, inner_map_exit_1 = map_state.add_map(
+        "inner_map_1", {"_j": '0:M:1'})
+    inner_map_exit_1.add_in_connector("IN_B")
+    inner_map_exit_1.add_out_connector("OUT_B")
+    map_tasklet_0 = map_state.add_tasklet("map_tasklet_0", {}, {"b"}, "b = 1")
+    map_tasklet_1 = map_state.add_tasklet("map_tasklet_1", {}, {"b"}, "b = 2")
+    map_state.add_edge(map_entry, None, inner_map_entry_0, None, dace.Memlet())
+    map_state.add_edge(inner_map_entry_0, None, map_tasklet_0, None,
+                       dace.Memlet())
+    map_state.add_edge(map_tasklet_0, "b", inner_map_exit_0, "IN_B",
+                       dace.Memlet("B[_j, _i]"))
+    inner_edge_0 = map_state.add_edge(inner_map_exit_0, "OUT_B", map_exit,
+                                      "IN_B", dace.Memlet(data="B"))
+    map_state.add_edge(map_entry, None, inner_map_entry_1, None, dace.Memlet())
+    map_state.add_edge(inner_map_entry_1, None, map_tasklet_1, None,
+                       dace.Memlet())
+    map_state.add_edge(map_tasklet_1, "b", inner_map_exit_1, "IN_B",
+                       dace.Memlet("B[_j, _i]"))
+    inner_edge_1 = map_state.add_edge(inner_map_exit_1, "OUT_B", map_exit,
+                                      "IN_B", dace.Memlet(data="B"))
+    outer_edge = map_state.add_edge(map_exit, "OUT_B", a1, None,
+                                    dace.Memlet(data="B"))
+
+    results = pipeline.apply_pass(sdfg, {})[UnderapproximateWrites.__name__]
+
+    result = results["approximation"]
+    expected_subset_outer_edge = Range.from_string("0:M, 0:N")
+    expected_subset_inner_edge = Range.from_string("0:M, _i")
+    result_inner_edge_0 = result[inner_edge_0].subset.subset_list[0]
+    result_inner_edge_1 = result[inner_edge_1].subset.subset_list[0]
+    result_outer_edge = result[outer_edge].subset.subset_list[0]
+    assert (str(result_inner_edge_0) == str(expected_subset_inner_edge))
+    assert (str(result_inner_edge_1) == str(expected_subset_inner_edge))
+    assert (str(result_outer_edge) == str(expected_subset_outer_edge))
+
+
+def test_map_tree_no_write_multiple_indices():
+    """
+    Two maps nested in a map. Both nested writes contain an addition of 
+    indices in the subscript expression 
+    --> Approximated write-set of outer Map to array equals shape of array
+    """
+
+    sdfg = dace.SDFG("twoD_map")
+    sdfg.add_array("B", (M, N), dace.float64)
+    map_state = sdfg.add_state("map")
+    a1 = map_state.add_access('B')
+    map_entry, map_exit = map_state.add_map("outer_map", {"_i": '0:N:1'})
+    map_exit.add_in_connector("IN_B")
+    map_exit.add_out_connector("OUT_B")
+    inner_map_entry_0, inner_map_exit_0 = map_state.add_map(
+        "inner_map_0", {"_j": '0:M:1'})
+    inner_map_exit_0.add_in_connector("IN_B")
+    inner_map_exit_0.add_out_connector("OUT_B")
+    inner_map_entry_1, inner_map_exit_1 = map_state.add_map(
+        "inner_map_1", {"_j": '0:M:1'})
+    inner_map_exit_1.add_in_connector("IN_B")
+    inner_map_exit_1.add_out_connector("OUT_B")
+    map_tasklet_0 = map_state.add_tasklet("map_tasklet_0", {}, {"b"}, "b = 1")
+    map_tasklet_1 = map_state.add_tasklet("map_tasklet_1", {}, {"b"}, "b = 2")
+    map_state.add_edge(map_entry, None, inner_map_entry_0, None, dace.Memlet())
+    map_state.add_edge(inner_map_entry_0, None, map_tasklet_0, None,
+                       dace.Memlet())
+    map_state.add_edge(map_tasklet_0, "b", inner_map_exit_0, "IN_B",
+                       dace.Memlet("B[_j + _i, _i]"))
+    inner_edge_0 = map_state.add_edge(inner_map_exit_0, "OUT_B", map_exit,
+                                      "IN_B", dace.Memlet(data="B"))
+    map_state.add_edge(map_entry, None, inner_map_entry_1, None, dace.Memlet())
+    map_state.add_edge(inner_map_entry_1, None, map_tasklet_1, None,
+                       dace.Memlet())
+    map_state.add_edge(map_tasklet_1, "b", inner_map_exit_1, "IN_B",
+                       dace.Memlet("B[_j, _i + _j]"))
+    inner_edge_1 = map_state.add_edge(inner_map_exit_1, "OUT_B", map_exit,
+                                      "IN_B", dace.Memlet(data="B"))
+    outer_edge = map_state.add_edge(map_exit, "OUT_B", a1, None,
+                                    dace.Memlet(data="B"))
+
+    results = pipeline.apply_pass(sdfg, {})[UnderapproximateWrites.__name__]
+
+    result = results["approximation"]
+    result_inner_edge_0 = result[inner_edge_0].subset.subset_list
+    result_inner_edge_1 = result[inner_edge_1].subset.subset_list
+    result_outer_edge = result[outer_edge].subset.subset_list
+    assert (len(result_inner_edge_0) == 0)
+    assert (len(result_inner_edge_1) == 0)
+    assert (len(result_outer_edge) == 0)
+
+
+def test_map_tree_multiple_indices_per_dimension():
+    """
+    Two maps nested in a map. One inner Map writes to array using multiple indices. 
+    The other inner map writes to array with affine indices
+    --> Approximated write-set of outer Map to array equals shape of array
+    """
+
+    sdfg = dace.SDFG("twoD_map")
+    sdfg.add_array("B", (M, N), dace.float64)
+    map_state = sdfg.add_state("map")
+    a1 = map_state.add_access('B')
+    map_entry, map_exit = map_state.add_map("outer_map", {"_i": '0:N:1'})
+    map_exit.add_in_connector("IN_B")
+    map_exit.add_out_connector("OUT_B")
+    inner_map_entry_0, inner_map_exit_0 = map_state.add_map(
+        "inner_map_0", {"_j": '0:M:1'})
+    inner_map_exit_0.add_in_connector("IN_B")
+    inner_map_exit_0.add_out_connector("OUT_B")
+    inner_map_entry_1, inner_map_exit_1 = map_state.add_map(
+        "inner_map_1", {"_j": '0:M:1'})
+    inner_map_exit_1.add_in_connector("IN_B")
+    inner_map_exit_1.add_out_connector("OUT_B")
+    map_tasklet_0 = map_state.add_tasklet("map_tasklet_0", {}, {"b"}, "b = 1")
+    map_tasklet_1 = map_state.add_tasklet("map_tasklet_1", {}, {"b"}, "b = 2")
+    map_state.add_edge(map_entry, None, inner_map_entry_0, None, dace.Memlet())
+    map_state.add_edge(inner_map_entry_0, None, map_tasklet_0, None,
+                       dace.Memlet())
+    map_state.add_edge(map_tasklet_0, "b", inner_map_exit_0, "IN_B",
+                       dace.Memlet("B[_j * _j, _i ]"))
+    inner_edge_0 = map_state.add_edge(inner_map_exit_0, "OUT_B", map_exit,
+                                      "IN_B", dace.Memlet(data="B"))
+    map_state.add_edge(map_entry, None, inner_map_entry_1, None, dace.Memlet())
+    map_state.add_edge(inner_map_entry_1, None, map_tasklet_1, None,
+                       dace.Memlet())
+    map_state.add_edge(map_tasklet_1, "b", inner_map_exit_1, "IN_B",
+                       dace.Memlet("B[_j, _i]"))
+    inner_edge_1 = map_state.add_edge(inner_map_exit_1, "OUT_B", map_exit,
+                                      "IN_B", dace.Memlet(data="B"))
+    outer_edge = map_state.add_edge(map_exit, "OUT_B", a1, None,
+                                    dace.Memlet(data="B"))
+
+    results = pipeline.apply_pass(sdfg, {})[UnderapproximateWrites.__name__]
+
+    result = results["approximation"]
+    expected_subset_outer_edge = Range.from_string("0:M, 0:N")
+    expected_subset_inner_edge_1 = Range.from_string("0:M, _i")
+    result_inner_edge_1 = result[inner_edge_1].subset.subset_list[0]
+    result_outer_edge = result[outer_edge].subset.subset_list[0]
+    assert (len(result[inner_edge_0].subset.subset_list) == 0)
+    assert (str(result_inner_edge_1) == str(expected_subset_inner_edge_1))
+    assert (str(result_outer_edge) == str(expected_subset_outer_edge))
+
+
+def test_loop_in_map_multiplied_indices():
+    """
+    Loop nested in a map that writes to array. In the subscript expression 
+    of the write indices are multiplied  
+    --> Approximated write-set of Map to array is empty
+    """
+
+    @dace.program
+    def loop(A: dace.float64[N, M]):
+        for i in dace.map[0:N]:
+            for j in range(M):
+                A[i, j * i] = 0
+
+    sdfg = loop.to_sdfg(simplify=True)
+
+    results = pipeline.apply_pass(sdfg, {})[UnderapproximateWrites.__name__]
+
+    nsdfg = sdfg.sdfg_list[1].parent_nsdfg_node
+    map_state = sdfg.states()[0]
+    result = results["approximation"]
+    edge = map_state.out_edges(nsdfg)[0]
+    assert (len(result[edge].subset.subset_list) == 0)
+
+
+def test_loop_in_map():
+    """
+    Loop nested in a map that writes to array. Outer map overwrites the array.
+     --> Approximated write-set of Map to array equals shape of array
+    """
+
+    @dace.program
+    def loop(A: dace.float64[N, M]):
+        for i in dace.map[0:N]:
+            for j in range(M):
+                A[i, j] = 0
+
+    sdfg = loop.to_sdfg(simplify=True)
+
+    results = pipeline.apply_pass(sdfg, {})[UnderapproximateWrites.__name__]
+
+    map_state = sdfg.states()[0]
+    edge = map_state.in_edges(map_state.data_nodes()[0])[0]
+    result = results["approximation"]
+    expected_subset = Range.from_string("0:N, 0:M")
+    assert (str(result[edge].subset.subset_list[0]) == str(expected_subset))
+
+
+def test_map_in_loop():
+    """
+    Map nested in a loop that writes to array. Outer loop overwrites the array.
+     --> Approximated write-set of Map to array equals shape of array
+    """
+
+    sdfg = dace.SDFG("nested")
+    sdfg.add_array("B", (N, M), dace.float64)
+    init = sdfg.add_state("init")
+    guard = sdfg.add_state("guard")
+    body = sdfg.add_state("body")
+    end = sdfg.add_state("end")
+    sdfg.add_edge(init, guard, dace.InterstateEdge(assignments={"j": "0"}))
+    sdfg.add_edge(guard, body, dace.InterstateEdge(condition="j < N"))
+    sdfg.add_edge(guard, end, dace.InterstateEdge(condition="not(j < N)"))
+    sdfg.add_edge(body, guard, dace.InterstateEdge(assignments={"j": "j + 1"}))
+    a1 = body.add_access("B")
+    body.add_mapped_tasklet("overwrite_1",
+                            map_ranges={'i': '0:M:1'},
+                            inputs={},
+                            code="b = 5",
+                            outputs={"b": dace.Memlet("B[j, i]")},
+                            output_nodes={"B": a1},
+                            external_edges=True)
+
+    results = pipeline.apply_pass(sdfg, {})[UnderapproximateWrites.__name__]
+
+    result = results["loop_approximation"]
+    expected_subset = Range.from_string("0:N, 0:M")
+    assert (str(
+        result[guard]["B"].subset.subset_list[0]) == str(expected_subset))
+
+
+def test_map_in_loop_multiplied_indices_first_dimension():
+    """
+    Map nested in a loop that writes to array. Subscript expression
+      of array access multiplies two indicies in first dimension
+    --> Approximated write-set of loop to array is empty
+    """
+
+    sdfg = dace.SDFG("nested")
+    sdfg.add_array("B", (N, M), dace.float64)
+    init = sdfg.add_state("init")
+    guard = sdfg.add_state("guard")
+    body = sdfg.add_state("body")
+    end = sdfg.add_state("end")
+    sdfg.add_edge(init, guard, dace.InterstateEdge(assignments={"j": "0"}))
+    sdfg.add_edge(guard, body, dace.InterstateEdge(condition="j < N"))
+    sdfg.add_edge(guard, end, dace.InterstateEdge(condition="not(j < N)"))
+    sdfg.add_edge(body, guard, dace.InterstateEdge(assignments={"j": "j + 1"}))
+    a1 = body.add_access("B")
+    body.add_mapped_tasklet("overwrite_1",
+                            map_ranges={'i': '0:M:1'},
+                            inputs={},
+                            code="b = 5",
+                            outputs={"b": dace.Memlet("B[j * i, i]")},
+                            output_nodes={"B": a1},
+                            external_edges=True)
+
+    results = pipeline.apply_pass(sdfg, {})[UnderapproximateWrites.__name__]
+
+    result = results["loop_approximation"]
+    assert (guard not in result.keys() or len(result[guard]) == 0)
+
+
+def test_map_in_loop_multiplied_indices_second_dimension():
+    """
+    Map nested in a loop that writes to array. Subscript expression
+      of array access multiplies two indicies in second dimension
+    --> Approximated write-set of loop to array is empty
+    """
+    sdfg = dace.SDFG("nested")
+    sdfg.add_array("B", (N, M), dace.float64)
+    init = sdfg.add_state("init")
+    guard = sdfg.add_state("guard")
+    body = sdfg.add_state("body")
+    end = sdfg.add_state("end")
+    sdfg.add_edge(init, guard, dace.InterstateEdge(assignments={"j": "0"}))
+    sdfg.add_edge(guard, body, dace.InterstateEdge(condition="j < N"))
+    sdfg.add_edge(guard, end, dace.InterstateEdge(condition="not(j < N)"))
+    sdfg.add_edge(body, guard, dace.InterstateEdge(assignments={"j": "j + 1"}))
+    a1 = body.add_access("B")
+    body.add_mapped_tasklet("overwrite_1",
+                            map_ranges={'i': '0:M:1'},
+                            inputs={},
+                            code="b = 5",
+                            outputs={"b": dace.Memlet("B[j, i * j]")},
+                            output_nodes={"B": a1},
+                            external_edges=True)
+
+    results = pipeline.apply_pass(sdfg, {})[UnderapproximateWrites.__name__]
+
+    result = results["loop_approximation"]
+    assert (guard not in result.keys() or len(result[guard]) == 0)
+
+
+def test_nested_sdfg_in_map_nest():
+    """
+    Write in nested SDFG in two-dimensional map nest.
+    --> should approximate write-set of map nest as shape of array."""
+
+    @dace.program
+    def nested_loop(A: dace.float64[M, N]):
+        for i in dace.map[0:M]:
+            for j in dace.map[0:N]:
+                if A[0]:
+                    A[i, j] = 1
+                else:
+                    A[i, j] = 2
+                A[i, j] = A[i, j] * A[i, j]
+
+    sdfg = nested_loop.to_sdfg(simplify=True)
+
+    result = pipeline.apply_pass(sdfg, {})[UnderapproximateWrites.__name__]
+    write_approx = result["approximation"]
+    # find write set
+    accessnode = None
+    write_set = None
+    for node, _ in sdfg.all_nodes_recursive():
+        if isinstance(node, dace.nodes.AccessNode):
+            if node.data == "A":
+                accessnode = node
+    for edge, memlet in write_approx.items():
+        if edge.dst is accessnode:
+            write_set = memlet.subset
+
+    assert (str(write_set) == "0:M, 0:N")
+
+
+def test_loop_in_nested_sdfg_in_map_partial_write():
+    """
+    Write in nested SDFG in two-dimensional map nest. 
+    Nested map does not iterate over shape of second array dimension.
+    --> should approximate write-set of map nest precisely."""
+
+    @dace.program
+    def nested_loop(A: dace.float64[M, N]):
+        for i in dace.map[0:M]:
+            for j in range(2, N, 1):
+                if A[0]:
+                    A[i, j] = 1
+                else:
+                    A[i, j] = 2
+                A[i, j] = A[i, j] * A[i, j]
+
+    sdfg = nested_loop.to_sdfg(simplify=True)
+
+    result = pipeline.apply_pass(sdfg, {})[UnderapproximateWrites.__name__]
+
+    write_approx = result["approximation"]
+    # find write set
+    accessnode = None
+    write_set = None
+    for node, _ in sdfg.all_nodes_recursive():
+        if isinstance(node, dace.nodes.AccessNode):
+            if node.data == "A":
+                accessnode = node
+    for edge, memlet in write_approx.items():
+        if edge.dst is accessnode:
+            write_set = memlet.subset
+    assert (str(write_set) == "0:M, 0:N - 2")
+
+
+def test_map_in_nested_sdfg_in_map():
+    """
+    Write in Map nested in a nested SDFG nested in a map.
+    --> should approximate write-set of loop nest precisely."""
+
+    @dace.program
+    def nested_loop(A: dace.float64[M, N]):
+        for i in dace.map[0:M]:
+            if A[0]:
+                A[i, :] = 1
+            else:
+                A[i, :] = 2
+            A[i, :] = 0
+
+    sdfg = nested_loop.to_sdfg(simplify=True)
+
+    result = pipeline.apply_pass(sdfg, {})[UnderapproximateWrites.__name__]
+
+    write_approx = result["approximation"]
+    # find write set
+    accessnode = None
+    write_set = None
+    for node, _ in sdfg.all_nodes_recursive():
+        if isinstance(node, dace.nodes.AccessNode):
+            if node.data == "A":
+                accessnode = node
+    for edge, memlet in write_approx.items():
+        if edge.dst is accessnode:
+            write_set = memlet.subset
+    assert (str(write_set) == "0:M, 0:N")
+
+
+def test_nested_sdfg_in_map_branches():
+    """
+    Nested SDFG that overwrites second dimension of array conditionally.
+    --> should approximate write-set of map as empty
+    """
+
+    @dace.program
+    def nested_loop(A: dace.float64[M, N]):
+        for i in dace.map[0:M]:
+            if A[0]:
+                A[i, :] = 1
+            else:
+                A[i, :] = 2
+
+    sdfg = nested_loop.to_sdfg(simplify=True)
+
+    result = pipeline.apply_pass(sdfg, {})[UnderapproximateWrites.__name__]
+
+    write_approx = result["approximation"]
+    # find write set
+    accessnode = None
+    write_set = None
+    for node, _ in sdfg.all_nodes_recursive():
+        if isinstance(node, dace.nodes.AccessNode):
+            if node.data == "A":
+                accessnode = node
+    for edge, memlet in write_approx.items():
+        if edge.dst is accessnode:
+            write_set = memlet.subset.subset_list
+    assert (not write_set)
+
+
+def test_simple_loop_overwrite():
+    """
+    simple loop that overwrites a one-dimensional array
+    --> should approximate write-set of loop as shape of array
+    """
+
+    sdfg = dace.SDFG("simple_loop")
+    sdfg.add_array("A", [N], dace.int64)
+    init = sdfg.add_state("init")
+    end = sdfg.add_state("end")
+    loop_body = sdfg.add_state("loop_body")
+    _, guard, _ = sdfg.add_loop(init, loop_body, end, "i", "0", "i < N",
+                                "i + 1")
+    a0 = loop_body.add_access("A")
+    loop_tasklet = loop_body.add_tasklet("overwrite", {}, {"a"}, "a = 0")
+    loop_body.add_edge(loop_tasklet, "a", a0, None, dace.Memlet("A[i]"))
+
+    result = pipeline.apply_pass(
+        sdfg, {})[UnderapproximateWrites.__name__]["loop_approximation"]
+
+    assert (str(result[guard]["A"].subset) == str(
+        Range.from_array(sdfg.arrays["A"])))
+
+
+def test_loop_2D_overwrite():
+    """
+    Two-dimensional loop nest overwrites a two-dimensional array
+    --> should approximate write-set of loop nest as shape of array
+    """
+
+    sdfg = dace.SDFG("loop_2D_overwrite")
+    sdfg.add_array("A", [M, N], dace.int64)
+    init = sdfg.add_state("init")
+    end = sdfg.add_state("end")
+    loop_body = sdfg.add_state("loop_body")
+    loop_before_1 = sdfg.add_state("loop_before_1")
+    loop_after_1 = sdfg.add_state("loop_after_1")
+    _, guard2, _ = sdfg.add_loop(loop_before_1, loop_body, loop_after_1, "i",
+                                 "0", "i < N", "i + 1")
+    _, guard1, _ = sdfg.add_loop(init, loop_before_1, end, "j", "0", "j < M",
+                                 "j + 1", loop_after_1)
+    a0 = loop_body.add_access("A")
+    loop_tasklet = loop_body.add_tasklet("overwrite", {}, {"a"}, "a = 0")
+    loop_body.add_edge(loop_tasklet, "a", a0, None, dace.Memlet("A[j,i]"))
+
+    result = pipeline.apply_pass(
+        sdfg, {})[UnderapproximateWrites.__name__]["loop_approximation"]
+
+    assert (str(result[guard1]["A"].subset) == str(
+        Range.from_array(sdfg.arrays["A"])))
+    assert (str(result[guard2]["A"].subset) == "j, 0:N")
+
+
+def test_loop_2D_propagation_gap_symbolic():
+    """
+    Three nested loops that overwrite two dimensional array.
+    Innermost loop is surrounded by loop that doesn't iterate 
+    over array range and is potentially empty.
+    --> should approximate write-set to array of outer loop as empty
+    """
+
+    sdfg = dace.SDFG("loop_2D_no_overwrite")
+    sdfg.add_array("A", [M, N], dace.int64)
+    init = sdfg.add_state("init")
+    end = sdfg.add_state("end")
+    loop_body = sdfg.add_state("loop_body")
+    loop_before_1 = sdfg.add_state("loop_before_1")
+    loop_after_1 = sdfg.add_state("loop_after_1")
+    loop_before_2 = sdfg.add_state("loop_before_2")
+    loop_after_2 = sdfg.add_state("loop_after_2")
+    _, guard3, _ = sdfg.add_loop(loop_before_1, loop_body, loop_after_1, "i",
+                                 "0", "i < N", "i + 1")  # inner-most loop
+    _, guard2, _ = sdfg.add_loop(loop_before_2, loop_before_1, loop_after_2,
+                                 "k", "0", "k < K", "k + 1",
+                                 loop_after_1)  # second-inner-most loop
+    _, guard1, _ = sdfg.add_loop(init, loop_before_2, end, "j", "0", "j < M",
+                                 "j + 1", loop_after_2)  # outer-most loop
+    a0 = loop_body.add_access("A")
+    loop_tasklet = loop_body.add_tasklet("overwrite", {}, {"a"}, "a = 0")
+    loop_body.add_edge(loop_tasklet, "a", a0, None, dace.Memlet("A[j,i]"))
+
+    result = pipeline.apply_pass(
+        sdfg, {})[UnderapproximateWrites.__name__]["loop_approximation"]
+
+    assert ("A" not in result[guard1].keys())
+    assert ("A" not in result[guard2].keys())
+    assert (str(result[guard3]["A"].subset) == "j, 0:N")
+
+
+def test_2_loops_overwrite():
+    """
+    2 loops one after another overwriting an array
+    --> should approximate write-set to array of both loops as shape of array
+    """
+
+    sdfg = dace.SDFG("two_loops_overwrite")
+    sdfg.add_array("A", [N], dace.int64)
+    init = sdfg.add_state("init")
+    end = sdfg.add_state("end")
+    loop_body_1 = sdfg.add_state("loop_body_1")
+    loop_body_2 = sdfg.add_state("loop_body_2")
+    _, guard_1, after_state = sdfg.add_loop(init, loop_body_1, None, "i", "0",
+                                            "i < N", "i + 1")
+    _, guard_2, _ = sdfg.add_loop(after_state, loop_body_2, end, "i", "0",
+                                  "i < N", "i + 1")
+    a0 = loop_body_1.add_access("A")
+    loop_tasklet_1 = loop_body_1.add_tasklet("overwrite", {}, {"a"}, "a = 0")
+    loop_body_1.add_edge(loop_tasklet_1, "a", a0, None, dace.Memlet("A[i]"))
+    a1 = loop_body_2.add_access("A")
+    loop_tasklet_2 = loop_body_2.add_tasklet("overwrite", {}, {"a"}, "a = 0")
+    loop_body_2.add_edge(loop_tasklet_2, "a", a1, None, dace.Memlet("A[i]"))
+
+    result = pipeline.apply_pass(
+        sdfg, {})[UnderapproximateWrites.__name__]["loop_approximation"]
+
+    assert (str(result[guard_1]["A"].subset) == str(
+        Range.from_array(sdfg.arrays["A"])))
+    assert (str(result[guard_2]["A"].subset) == str(
+        Range.from_array(sdfg.arrays["A"])))
+
+
+def test_loop_2D_overwrite_propagation_gap_non_empty():
+    """
+    Three nested loops that overwrite two-dimensional array.
+    Innermost loop is surrounded by a loop that doesn't iterate 
+    over array range but over a non-empty constant range.
+    --> should approximate write-set to array of loop nest as shape of array
+    """
+
+    sdfg = dace.SDFG("loop_2D_no_overwrite")
+    sdfg.add_array("A", [M, N], dace.int64)
+    init = sdfg.add_state("init")
+    end = sdfg.add_state("end")
+    loop_body = sdfg.add_state("loop_body")
+    loop_before_1 = sdfg.add_state("loop_before_1")
+    loop_after_1 = sdfg.add_state("loop_after_1")
+    loop_before_2 = sdfg.add_state("loop_before_2")
+    loop_after_2 = sdfg.add_state("loop_after_2")
+    _, guard3, _ = sdfg.add_loop(loop_before_1, loop_body, loop_after_1, "i",
+                                 "0", "i < N", "i + 1")
+    _, guard2, _ = sdfg.add_loop(loop_before_2, loop_before_1, loop_after_2,
+                                 "k", "0", "k < 10", "k + 1", loop_after_1)
+    _, guard1, _ = sdfg.add_loop(init, loop_before_2, end, "j", "0", "j < M",
+                                 "j + 1", loop_after_2)
+    a0 = loop_body.add_access("A")
+    loop_tasklet = loop_body.add_tasklet("overwrite", {}, {"a"}, "a = 0")
+    loop_body.add_edge(loop_tasklet, "a", a0, None, dace.Memlet("A[j,i]"))
+
+    result = pipeline.apply_pass(
+        sdfg, {})[UnderapproximateWrites.__name__]["loop_approximation"]
+
+    assert (str(result[guard1]["A"].subset) == str(
+        Range.from_array(sdfg.arrays["A"])))
+    assert (str(result[guard2]["A"].subset) == "j, 0:N")
+    assert (str(result[guard3]["A"].subset) == "j, 0:N")
+
+
+def test_loop_nest_multiplied_indices():
+    """
+    three nested loops that write to two dimensional array.
+    The subscript expression is a multiplication of two indices 
+    -> should approximate write-sets of loops as empty
+    """
+
+    sdfg = dace.SDFG("loop_2D_no_overwrite")
+    sdfg.add_array("A", [N, N], dace.int64)
+    init = sdfg.add_state("init")
+    end = sdfg.add_state("end")
+    loop_body = sdfg.add_state("loop_body")
+    loop_before_1 = sdfg.add_state("loop_before_1")
+    loop_after_1 = sdfg.add_state("loop_after_1")
+    loop_before_2 = sdfg.add_state("loop_before_2")
+    loop_after_2 = sdfg.add_state("loop_after_2")
+    _, guard3, _ = sdfg.add_loop(loop_before_1, loop_body, loop_after_1, "i",
+                                 "0", "i < N", "i + 1")
+    _, guard2, _ = sdfg.add_loop(loop_before_2, loop_before_1, loop_after_2,
+                                 "k", "0", "k < 10", "k + 1", loop_after_1)
+    _, guard1, _ = sdfg.add_loop(init, loop_before_2, end, "j", "0", "j < M",
+                                 "j + 1", loop_after_2)
+    a0 = loop_body.add_access("A")
+    loop_tasklet = loop_body.add_tasklet("overwrite", {}, {"a"}, "a = 0")
+    loop_body.add_edge(loop_tasklet, "a", a0, None, dace.Memlet("A[i,i*j]"))
+
+    result = pipeline.apply_pass(
+        sdfg, {})[UnderapproximateWrites.__name__]["loop_approximation"]
+
+    assert (guard1 not in result.keys() or "A" not in result[guard1].keys())
+    assert (guard2 not in result.keys() or "A" not in result[guard2].keys())
+    assert (guard3 not in result.keys() or "A" not in result[guard3].keys()
+            or not result[guard3]['A'].subset)
+
+
+def test_loop_nest_empty_nested_loop():
+    """
+    three nested loops that write to two dimensional array.
+    the innermost loop is surrounded by a loop that iterates over an empty range.
+    --> Approximated write-set to array of outer loop is empty. 
+    Approximated write-set to array of innermost loop is equal to shape of array
+    """
+
+    sdfg = dace.SDFG("loop_2D_no_overwrite")
+    sdfg.add_array("A", [M, N], dace.int64)
+    init = sdfg.add_state("init")
+    end = sdfg.add_state("end")
+    loop_body = sdfg.add_state("loop_body")
+    loop_before_1 = sdfg.add_state("loop_before_1")
+    loop_after_1 = sdfg.add_state("loop_after_1")
+    loop_before_2 = sdfg.add_state("loop_before_2")
+    loop_after_2 = sdfg.add_state("loop_after_2")
+    _, guard3, _ = sdfg.add_loop(loop_before_1, loop_body, loop_after_1, "i",
+                                 "0", "i < N", "i + 1")
+    _, guard2, _ = sdfg.add_loop(loop_before_2, loop_before_1, loop_after_2,
+                                 "k", "0", "k < 0", "k + 1", loop_after_1)
+    _, guard1, _ = sdfg.add_loop(init, loop_before_2, end, "j", "0", "j < M",
+                                 "j + 1", loop_after_2)
+    a0 = loop_body.add_access("A")
+    loop_tasklet = loop_body.add_tasklet("overwrite", {}, {"a"}, "a = 0")
+    loop_body.add_edge(loop_tasklet, "a", a0, None, dace.Memlet("A[j,i]"))
+
+    result = pipeline.apply_pass(
+        sdfg, {})[UnderapproximateWrites.__name__]["loop_approximation"]
+
+    assert (guard1 not in result.keys() or "A" not in result[guard1].keys())
+    assert (guard2 not in result.keys() or "A" not in result[guard2].keys())
+    assert (str(result[guard3]["A"].subset) == "j, 0:N")
+
+
+def test_loop_nest_inner_loop_conditional():
+    """
+    Loop nested in another loop. Nested loop is in a branch and overwrites the array.
+        --> should approximate write-set to array of outer loop as empty
+        and write-set to array of inner loop equal to shape of array
+    """
+    sdfg = dace.SDFG("loop_2D_branch")
+    sdfg.add_array("A", [N], dace.int64)
+    init = sdfg.add_state("init")
+    end = sdfg.add_state("end")
+    loop_body = sdfg.add_state("loop_body")
+    if_guard = sdfg.add_state("if_guard")
+    if_merge = sdfg.add_state("if_merge")
+    loop_before_2 = sdfg.add_state("loop_before_2")
+    loop_after_2 = sdfg.add_state("loop_after_2")
+    _, guard2, _ = sdfg.add_loop(loop_before_2, loop_body, loop_after_2, "k",
+                                 "0", "k < N", "k + 1")
+    _, guard1, _ = sdfg.add_loop(init, if_guard, end, "j", "0", "j < M",
+                                 "j + 1", if_merge)
+    sdfg.add_edge(if_guard, loop_before_2,
+                  dace.InterstateEdge(condition="j % 2 == 0"))
+    sdfg.add_edge(if_guard, if_merge,
+                  dace.InterstateEdge(condition="j % 2 == 1"))
+    sdfg.add_edge(loop_after_2, if_merge, dace.InterstateEdge())
+    a0 = loop_body.add_access("A")
+    loop_tasklet = loop_body.add_tasklet("overwrite", {}, {"a"}, "a = 0")
+    loop_body.add_edge(loop_tasklet, "a", a0, None, dace.Memlet("A[k]"))
+
+    result = pipeline.apply_pass(
+        sdfg, {})[UnderapproximateWrites.__name__]["loop_approximation"]
+
+    assert (guard1 not in result.keys() or "A" not in result[guard1].keys())
+    assert (guard2 in result.keys() and "A" in result[guard2].keys()
+            and str(result[guard2]['A'].subset) == "0:N")
+
+
+def test_loop_in_nested_sdfg_in_map_multiplied_indices():
+    """
+    Loop in nested SDFG nested in map. The subscript of the write multiplies two indices
+    --> should approximate write-set of loop as empty
+    """
+
+    @dace.program
+    def nested_loop(A: dace.float64[M, N]):
+        for i in dace.map[0:M]:
+            for j in range(N):
+                A[i + 1, j * i] = 1
+
+    sdfg = nested_loop.to_sdfg(simplify=True)
+
+    result = pipeline.apply_pass(sdfg, {})[UnderapproximateWrites.__name__]
+
+    write_approx = result["approximation"]
+    write_set = None
+    accessnode = None
+    for node, _ in sdfg.all_nodes_recursive():
+        if isinstance(node, dace.nodes.AccessNode):
+            if node.data == "A":
+                accessnode = node
+    for edge, memlet in write_approx.items():
+        if edge.dst is accessnode:
+            write_set = memlet.subset
+    assert (not write_set.subset_list)
+
+
+def test_loop_in_nested_sdfg_simple():
+    """
+    Loop nested in a map that overwrites two-dimensional array
+    --> should approximate write-set of map to full shape of array
+    """
+
+    @dace.program
+    def nested_loop(A: dace.float64[M, N]):
+        for i in dace.map[0:M]:
+            for j in range(N):
+                A[i, j] = 1
+
+    sdfg = nested_loop.to_sdfg(simplify=True)
+
+    result = pipeline.apply_pass(sdfg, {})[UnderapproximateWrites.__name__]
+
+    # find write set
+    write_approx = result["approximation"]
+    accessnode = None
+    write_set = None
+    for node, _ in sdfg.all_nodes_recursive():
+        if isinstance(node, dace.nodes.AccessNode):
+            if node.data == "A":
+                accessnode = node
+    for edge, memlet in write_approx.items():
+        if edge.dst is accessnode:
+            write_set = memlet.subset
+
+    assert (str(write_set) == "0:M, 0:N")
+
+
+def test_loop_break():
+    """
+    Loop that has a break statement writing to array.
+        --> Approximated write-set of loop to array is empty
+    """
+
+    sdfg = dace.SDFG("loop_2D_no_overwrite")
+    sdfg.add_array("A", [N], dace.int64)
+    init = sdfg.add_state("init", is_start_state=True)
+    loop_body_0 = sdfg.add_state("loop_body_0")
+    loop_body_1 = sdfg.add_state("loop_body_1")
+    loop_after_1 = sdfg.add_state("loop_after_1")
+    _, guard3, _ = sdfg.add_loop(init, loop_body_0, loop_after_1, "i", "0",
+                                 "i < N", "i + 1", loop_body_1)
+    sdfg.add_edge(loop_body_0, loop_after_1,
+                  dace.InterstateEdge(condition="i > 10"))
+    sdfg.add_edge(loop_body_0, loop_body_1,
+                  dace.InterstateEdge(condition="not(i > 10)"))
+    a0 = loop_body_1.add_access("A")
+    loop_tasklet = loop_body_1.add_tasklet("overwrite", {}, {"a"}, "a = 0")
+    loop_body_1.add_edge(loop_tasklet, "a", a0, None, dace.Memlet("A[i]"))
+
+    results = pipeline.apply_pass(sdfg, {})[UnderapproximateWrites.__name__]
+
+    result = results["loop_approximation"]
+    assert (guard3 not in result.keys() or "A" not in result[guard3].keys())
+
+
+def test_constant_multiplicative_2D():
+    """
+    Array is accessed via index that is multiplied with a constant.
+    --> should approximate write-set precisely
+    """
+
+    A = dace.data.Array(dace.int64, (N, M))
+    subset = Range.from_string("i,3*j")
+    i_subset = Range.from_string("0:N:1")
+    j_subset = Range.from_string("0:M:1")
+    memlet = dace.Memlet(None, "A", subset)
+    memlets = [memlet]
+
+    propagated_memlet = UnderapproximateWrites()._underapproximate_subsets(
+        memlets, A, ["j"], j_subset, None, True)
+    propagated_memlet = UnderapproximateWrites()._underapproximate_subsets(
+        [propagated_memlet], A, ["i"], i_subset, None, True)
+
+    propagated_subset = propagated_memlet.subset.subset_list[0]
+    expected_subset = Range.from_string("0:N:1, 0:3*M - 2:3")
+    propagated_string = str(propagated_subset)
+    expected_string = str(expected_subset)
+    assert (propagated_string == expected_string)
+
+
+def test_affine_2D():
+    """
+    Array is accessed via affine subscript expresion.
+    --> should approximate write-set precisely
+    """
+
+    A = dace.data.Array(dace.int64, (N, M))
+    subset = Range.from_string("i,3 * j + 3")
+    i_subset = Range.from_string("0:N:1")
+    j_subset = Range.from_string("0:M:1")
+    memlet = dace.Memlet(None, "A", subset)
+    memlets = [memlet]
+
+    propagated_memlet = UnderapproximateWrites()._underapproximate_subsets(
+        memlets, A, ["j"], j_subset, None, True)
+    propagated_memlet = UnderapproximateWrites()._underapproximate_subsets(
+        [propagated_memlet], A, ["i"], i_subset, None, True)
+
+    propagated_subset = propagated_memlet.subset.subset_list[0]
+    expected_subset = Range.from_string("0:N:1, 3 : 3 * M + 1 : 3")
+    propagated_string = str(propagated_subset)
+    expected_string = str(expected_subset)
+    assert (propagated_string == expected_string)
+
+
+def test_multiplied_variables():
+    """
+    Two indices are multiplied in subscript expression
+    --> should fall back to empty subset
+    """
+
+    A = dace.data.Array(dace.int64, (M, ))
+    subset = Range.from_string("i * j")
+    i_subset = Range.from_string("0:N:1")
+    j_subset = Range.from_string("0:M:1")
+    memlet = dace.Memlet(None, "A", subset)
+    memlets = [memlet]
+
+    propagated_memlet = UnderapproximateWrites()._underapproximate_subsets(
+        memlets, A, ["j"], j_subset, None, True)
+    propagated_memlet = UnderapproximateWrites()._underapproximate_subsets(
+        [propagated_memlet], A, ["i"], i_subset, None, True)
+
+    assert (not propagated_memlet.subset.subset_list)
+
+
+def test_one_variable_in_2dimensions():
+    """
+    One index occurs in two dimensions
+    --> should fall back to empty subset
+    """
+
+    A = dace.data.Array(dace.int64, (N, M))
+    subset = Range.from_string("i, i")
+    i_subset = Range.from_string("0:N:1")
+    j_subset = Range.from_string("0:M:1")
+    memlet = dace.Memlet(None, "A", subset)
+    memlets = [memlet]
+
+    propagated_memlet = UnderapproximateWrites()._underapproximate_subsets(
+        memlets, A, ["j"], j_subset, None, True)
+    propagated_memlet = UnderapproximateWrites()._underapproximate_subsets(
+        [propagated_memlet], A, ["i"], i_subset, None, True)
+
+    assert (not propagated_memlet.subset.subset_list)
+
+
+def test_negative_step():
+    A = dace.data.Array(dace.int64, (N, M))
+    subset = Range.from_string("i, j")
+    i_subset = Range.from_string("0:N:1")
+    j_subset = Range.from_string("M:0:-1")
+    memlet = dace.Memlet(None, "A", subset)
+    memlets = [memlet]
+
+    propagated_memlet = UnderapproximateWrites()._underapproximate_subsets(
+        memlets, A, ["j"], j_subset, None, True)
+    propagated_memlet = UnderapproximateWrites()._underapproximate_subsets(
+        [propagated_memlet], A, ["i"], i_subset, None, True)
+
+    propagated_subset = propagated_memlet.subset.subset_list[0]
+    expected_subset = Range.from_string("0:N:1,M:0:-1")
+    propagated_string = str(propagated_subset)
+    expected_string = str(expected_subset)
+    assert (propagated_string == expected_string)
+
+
+def test_step_not_one():
+    """
+    Array is accessed via index that is defined 
+    over Range with stepsize > 1.
+    --> should approximate write-set precisely
+"""
+
+    A = dace.data.Array(dace.int64, (N, M))
+    subset = Range.from_string("i")
+    i_subset = Range.from_string("0:N:3")
+    memlet = dace.Memlet(None, "A", subset)
+    memlets = [memlet]
+
+    propagated_memlet = UnderapproximateWrites()._underapproximate_subsets(
+        memlets, A, ["i"], i_subset, None, True)
+    propagated_subset = propagated_memlet.subset.subset_list[0]
+
+    expected_subset = Range.from_string("0:N:3")
+    propagated_string = str(propagated_subset)
+    expected_string = str(expected_subset)
+    assert (propagated_string == expected_string)
+
+
+if __name__ == '__main__':
+    test_nested_sdfg_in_map_branches()
+    test_map_in_nested_sdfg_in_map()
+    test_loop_in_nested_sdfg_in_map_partial_write()
+    test_nested_sdfg_in_map_nest()
+    test_map_in_loop_multiplied_indices_first_dimension()
+    test_map_in_loop_multiplied_indices_second_dimension()
+    test_map_in_loop()
+    test_loop_in_map_multiplied_indices()
+    test_loop_in_map()
+    test_map_tree_full_write()
+    test_2D_map_overwrites_2D_array()
+    test_2D_map_added_indices()
+    test_2D_map_multiplied_indices()
+    test_1D_map_one_index_multiple_dims()
+    test_1D_map_one_index_squared()
+    test_map_tree_multiple_indices_per_dimension()
+    test_map_tree_no_write_multiple_indices()
+    test_step_not_one()
+    test_one_variable_in_2dimensions()
+    test_affine_2D()
+    test_constant_multiplicative_2D()
+    test_multiplied_variables()
+    test_loop_in_nested_sdfg_simple()
+    test_loop_nest_inner_loop_conditional()
+    test_loop_nest_empty_nested_loop()
+    test_simple_loop_overwrite()
+    test_loop_2D_overwrite()
+    test_loop_2D_overwrite_propagation_gap_non_empty()
+    test_2_loops_overwrite()
+    test_loop_2D_propagation_gap_symbolic()
+    test_loop_nest_multiplied_indices()
+    test_loop_in_nested_sdfg_in_map_multiplied_indices()
+    test_loop_break()
+    test_negative_step()

From 4139ddf02a2c274d583f1d99604cb6e03d656089 Mon Sep 17 00:00:00 2001
From: Tal Ben-Nun <tbennun@users.noreply.github.com>
Date: Tue, 21 Nov 2023 19:38:58 -0700
Subject: [PATCH 141/163] Fix schedule tree conversion for use of arrays in
 conditions (#1440)

If a data container appears as part of `read_symbols` (i.e., as an
object, e.g., `x is not None`), do not replace it with a memlet.
---
 dace/sdfg/analysis/schedule_tree/sdfg_to_tree.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/dace/sdfg/analysis/schedule_tree/sdfg_to_tree.py b/dace/sdfg/analysis/schedule_tree/sdfg_to_tree.py
index 084d46f47d..a519f24596 100644
--- a/dace/sdfg/analysis/schedule_tree/sdfg_to_tree.py
+++ b/dace/sdfg/analysis/schedule_tree/sdfg_to_tree.py
@@ -249,7 +249,10 @@ def replace_memlets(sdfg: SDFG, input_mapping: Dict[str, Memlet], output_mapping
                     syms.remove(memlet.data)
         for s in syms:
             if s in input_mapping:
-                repl_dict[s] = str(input_mapping[s])
+                if s in sdfg.arrays:
+                    repl_dict[s] = input_mapping[s].data
+                else:
+                    repl_dict[s] = str(input_mapping[s])
 
         # Manual replacement with strings
         # TODO(later): Would be MUCH better to use MemletReplacer / e.data.replace_dict(repl_dict, replace_keys=False)

From 6d53e24fe483dd9f8e6afbff8dbf95e764d8761d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lukas=20Tr=C3=BCmper?= <lukas.truemper@outlook.de>
Date: Sat, 25 Nov 2023 20:33:37 +0100
Subject: [PATCH 142/163] Fixes for TaskletFusion, AugAssignToWCR and
 MapExpansion (#1432)

- The PR fixes two minor bugs for corner cases of the AugAssignToWCR and
TaskletFusion which are reflected in additional test cases:
- TaskletFusion: Should not remove array from SDFG, since it could be
used elsewhere
- AugAssignToWCR: Handle tasklets where all inputs come from same array
- The PR re-writes MapExpansion to create only one memlet path per out
connector to be more efficient. I experienced MapExpansion running for
literally hours because it uses add_memlet_path for each edge to a
tasklet. This is too expensive for >4 dimensional stencils with >50
edges
---
 dace/transformation/dataflow/map_expansion.py |  31 +++--
 .../transformation/dataflow/tasklet_fusion.py |   2 +-
 .../transformation/dataflow/wcr_conversion.py |  28 ++---
 tests/expansion_dynamic_range_test.py         |  37 ------
 tests/transformations/map_expansion_test.py   | 119 ++++++++++++++++++
 tests/transformations/tasklet_fusion_test.py  |  29 +++++
 tests/transformations/wcr_conversion_test.py  |  18 +++
 7 files changed, 204 insertions(+), 60 deletions(-)
 delete mode 100644 tests/expansion_dynamic_range_test.py
 create mode 100644 tests/transformations/map_expansion_test.py

diff --git a/dace/transformation/dataflow/map_expansion.py b/dace/transformation/dataflow/map_expansion.py
index 60f1f13f32..9d89ec7c09 100644
--- a/dace/transformation/dataflow/map_expansion.py
+++ b/dace/transformation/dataflow/map_expansion.py
@@ -3,6 +3,7 @@
 
 from dace.sdfg.utils import consolidate_edges
 from typing import Dict, List
+import copy
 import dace
 from dace import dtypes, subsets, symbolic
 from dace.properties import EnumProperty, make_properties
@@ -10,6 +11,7 @@
 from dace.sdfg import utils as sdutil
 from dace.sdfg.graph import OrderedMultiDiConnectorGraph
 from dace.transformation import transformation as pm
+from dace.sdfg.propagation import propagate_memlets_scope
 
 
 @make_properties
@@ -66,14 +68,28 @@ def apply(self, graph: dace.SDFGState, sdfg: dace.SDFG):
         # 1. If there are no edges coming from the outside, use empty memlets
         # 2. Edges with IN_* connectors replicate along the maps
         # 3. Edges for dynamic map ranges replicate until reaching range(s)
-        for edge in graph.out_edges(map_entry):
+        for edge in list(graph.out_edges(map_entry)):
+            if edge.src_conn is not None and edge.src_conn not in entries[-1].out_connectors:
+                entries[-1].add_out_connector(edge.src_conn)
+
+            graph.add_edge(entries[-1], edge.src_conn, edge.dst, edge.dst_conn, memlet=copy.deepcopy(edge.data))
             graph.remove_edge(edge)
-            graph.add_memlet_path(map_entry,
-                                  *entries,
-                                  edge.dst,
-                                  src_conn=edge.src_conn,
-                                  memlet=edge.data,
-                                  dst_conn=edge.dst_conn)
+
+        if graph.in_degree(map_entry) == 0:
+            graph.add_memlet_path(map_entry, *entries, memlet=dace.Memlet())
+        else:
+            for edge in graph.in_edges(map_entry):
+                if not edge.dst_conn.startswith("IN_"):
+                    continue
+                
+                in_conn = edge.dst_conn
+                out_conn = "OUT_" + in_conn[3:]
+                if in_conn not in entries[-1].in_connectors:
+                    graph.add_memlet_path(map_entry,
+                                          *entries,
+                                          memlet=copy.deepcopy(edge.data),
+                                          src_conn=out_conn,
+                                          dst_conn=in_conn)
 
         # Modify dynamic map ranges
         dynamic_edges = dace.sdfg.dynamic_map_inputs(graph, map_entry)
@@ -116,6 +132,7 @@ def apply(self, graph: dace.SDFGState, sdfg: dace.SDFG):
         else:
             raise ValueError('Cannot find scope in state')
 
+        propagate_memlets_scope(sdfg, state=graph, scopes=scope)
         consolidate_edges(sdfg, scope)
 
         return [map_entry] + entries
diff --git a/dace/transformation/dataflow/tasklet_fusion.py b/dace/transformation/dataflow/tasklet_fusion.py
index d6b4a3039b..29bb014263 100644
--- a/dace/transformation/dataflow/tasklet_fusion.py
+++ b/dace/transformation/dataflow/tasklet_fusion.py
@@ -267,5 +267,5 @@ def apply(self, graph: dace.SDFGState, sdfg: dace.SDFG):
         graph.remove_node(t1)
         if data is not None:
             graph.remove_node(data)
-            sdfg.remove_data(data.data, True)
+
         graph.remove_node(t2)
diff --git a/dace/transformation/dataflow/wcr_conversion.py b/dace/transformation/dataflow/wcr_conversion.py
index 7f4fbc654d..09bd8a3741 100644
--- a/dace/transformation/dataflow/wcr_conversion.py
+++ b/dace/transformation/dataflow/wcr_conversion.py
@@ -75,6 +75,12 @@ def can_be_applied(self, graph, expr_index, sdfg, permissive=False):
 
             outedge = graph.edges_between(tasklet, mx)[0]
 
+            # If in map, only match if the subset is independent of any
+            # map indices (otherwise no conflict)
+            if not permissive and len(outedge.data.subset.free_symbols & set(me.map.params)) == len(
+                    me.map.params):
+                return False
+
         # Get relevant output connector
         outconn = outedge.src_conn
 
@@ -131,17 +137,7 @@ def can_be_applied(self, graph, expr_index, sdfg, permissive=False):
                 if edge.data.subset != outedge.data.subset:
                     continue
 
-                # If in map, only match if the subset is independent of any
-                # map indices (otherwise no conflict)
-                if expr_index == 1:
-                    if not permissive and len(outedge.data.subset.free_symbols & set(me.map.params)) == len(
-                            me.map.params):
-                        continue
-
                 return True
-        else:
-            # Only Python/C++ tasklets supported
-            return False
 
         return False
 
@@ -182,11 +178,13 @@ def apply(self, state: SDFGState, sdfg: SDFG):
             rhs: ast.BinOp = ast_node.value
             op = AugAssignToWCR._PYOP_MAP[type(rhs.op)]
             inconns = list(edge.dst_conn for edge in inedges)
-            for n in (rhs.left, rhs.right):
-                if isinstance(n, ast.Name) and n.id in inconns:
-                    inedge = inedges[inconns.index(n.id)]
-                else:
-                    new_rhs = n
+            if isinstance(rhs.left, ast.Name) and rhs.left.id in inconns:
+                inedge = inedges[inconns.index(rhs.left.id)]
+                new_rhs = rhs.right
+            else:
+                inedge = inedges[inconns.index(rhs.right.id)]
+                new_rhs = rhs.left
+
             new_node = ast.copy_location(ast.Assign(targets=[lhs], value=new_rhs), ast_node)
             tasklet.code.code = [new_node]
 
diff --git a/tests/expansion_dynamic_range_test.py b/tests/expansion_dynamic_range_test.py
deleted file mode 100644
index 2cafe5b6f1..0000000000
--- a/tests/expansion_dynamic_range_test.py
+++ /dev/null
@@ -1,37 +0,0 @@
-# Copyright 2019-2021 ETH Zurich and the DaCe authors. All rights reserved.
-import dace
-from dace.transformation.dataflow import MapExpansion
-import numpy as np
-
-
-@dace.program
-def expansion(A: dace.float32[20, 30, 5], rng: dace.int32[2]):
-    @dace.map
-    def mymap(i: _[0:20], j: _[rng[0]:rng[1]], k: _[0:5]):
-        a << A[i, j, k]
-        b >> A[i, j, k]
-        b = a * 2
-
-
-def test():
-    A = np.random.rand(20, 30, 5).astype(np.float32)
-    b = np.array([5, 10], dtype=np.int32)
-    expected = A.copy()
-    expected[:, 5:10, :] *= 2
-
-    sdfg = expansion.to_sdfg()
-    sdfg(A=A, rng=b)
-    diff = np.linalg.norm(A - expected)
-    print('Difference (before transformation):', diff)
-
-    sdfg.apply_transformations(MapExpansion)
-
-    sdfg(A=A, rng=b)
-    expected[:, 5:10, :] *= 2
-    diff2 = np.linalg.norm(A - expected)
-    print('Difference:', diff2)
-    assert (diff <= 1e-5) and (diff2 <= 1e-5)
-
-
-if __name__ == "__main__":
-    test()
diff --git a/tests/transformations/map_expansion_test.py b/tests/transformations/map_expansion_test.py
new file mode 100644
index 0000000000..1f9a97f810
--- /dev/null
+++ b/tests/transformations/map_expansion_test.py
@@ -0,0 +1,119 @@
+# Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved.
+import dace
+import numpy as np
+from dace.transformation.dataflow import MapExpansion
+
+def test_expand_with_inputs():
+    @dace.program
+    def toexpand(A: dace.float64[4, 2], B: dace.float64[2, 2]):
+        for i, j in dace.map[1:3, 0:2]:
+            with dace.tasklet:
+                a1 << A[i, j]
+                a2 << A[i + 1, j]
+                a3 << A[i - 1, j]
+                b >> B[i-1, j]
+                b = a1 + a2 + a3
+
+    sdfg = toexpand.to_sdfg()
+    sdfg.simplify()
+
+    # Init conditions
+    sdfg.validate()
+    assert len([node for node in sdfg.start_state.nodes() if isinstance(node, dace.nodes.MapEntry)]) == 1
+    assert len([node for node in sdfg.start_state.nodes() if isinstance(node, dace.nodes.MapExit)]) == 1
+
+    # Expansion
+    assert sdfg.apply_transformations_repeated(MapExpansion) == 1
+    sdfg.validate()
+
+    map_entries = set()
+    state = sdfg.start_state
+    for node in state.nodes():
+        if not isinstance(node, dace.nodes.MapEntry):
+            continue
+
+        # (Fast) MapExpansion should not add memlet paths for each memlet to a tasklet
+        if sdfg.start_state.entry_node(node) is None:
+            assert state.in_degree(node) == 1
+            assert state.out_degree(node) == 1
+            assert len(node.out_connectors) == 1
+        else:
+            assert state.in_degree(node) == 1
+            assert state.out_degree(node) == 3
+            assert len(node.out_connectors) == 1
+
+        map_entries.add(node)
+
+    assert len(map_entries) == 2
+
+def test_expand_without_inputs():
+    @dace.program
+    def toexpand(B: dace.float64[4, 4]):
+        for i, j in dace.map[0:4, 0:4]:
+            with dace.tasklet:
+                b >> B[i, j]
+                b = 0
+
+    sdfg = toexpand.to_sdfg()
+    sdfg.simplify()
+
+    # Init conditions
+    sdfg.validate()
+    assert len([node for node in sdfg.start_state.nodes() if isinstance(node, dace.nodes.MapEntry)]) == 1
+    assert len([node for node in sdfg.start_state.nodes() if isinstance(node, dace.nodes.MapExit)]) == 1
+
+    # Expansion
+    assert sdfg.apply_transformations_repeated(MapExpansion) == 1
+    sdfg.validate()
+
+    map_entries = set()
+    state = sdfg.start_state
+    for node in state.nodes():
+        if not isinstance(node, dace.nodes.MapEntry):
+            continue
+
+        # (Fast) MapExpansion should not add memlet paths for each memlet to a tasklet
+        if sdfg.start_state.entry_node(node) is None:
+            assert state.in_degree(node) == 0
+            assert state.out_degree(node) == 1
+            assert len(node.out_connectors) == 0
+        else:
+            assert state.in_degree(node) == 1
+            assert state.out_degree(node) == 1
+            assert len(node.out_connectors) == 0
+
+        map_entries.add(node)
+
+    assert len(map_entries) == 2
+
+def test_expand_without_dynamic_inputs():
+    @dace.program
+    def expansion(A: dace.float32[20, 30, 5], rng: dace.int32[2]):
+        @dace.map
+        def mymap(i: _[0:20], j: _[rng[0]:rng[1]], k: _[0:5]):
+            a << A[i, j, k]
+            b >> A[i, j, k]
+            b = a * 2
+
+    A = np.random.rand(20, 30, 5).astype(np.float32)
+    b = np.array([5, 10], dtype=np.int32)
+    expected = A.copy()
+    expected[:, 5:10, :] *= 2
+
+    sdfg = expansion.to_sdfg()
+    sdfg(A=A, rng=b)
+    diff = np.linalg.norm(A - expected)
+    print('Difference (before transformation):', diff)
+
+    sdfg.apply_transformations(MapExpansion)
+
+    sdfg(A=A, rng=b)
+    expected[:, 5:10, :] *= 2
+    diff2 = np.linalg.norm(A - expected)
+    print('Difference:', diff2)
+    assert (diff <= 1e-5) and (diff2 <= 1e-5)
+
+if __name__ == '__main__':
+    test_expand_with_inputs()
+    test_expand_without_inputs()
+    test_expand_without_dynamic_inputs()
diff --git a/tests/transformations/tasklet_fusion_test.py b/tests/transformations/tasklet_fusion_test.py
index 743010e8c9..59a7e8b36b 100644
--- a/tests/transformations/tasklet_fusion_test.py
+++ b/tests/transformations/tasklet_fusion_test.py
@@ -3,6 +3,7 @@
 import dace
 from dace import dtypes
 from dace.transformation.dataflow import TaskletFusion, MapFusion
+from dace.transformation.optimizer import Optimizer
 import pytest
 
 datatype = dace.float32
@@ -257,6 +258,33 @@ def sdfg_none_connector(A: dace.float32[32], B: dace.float32[32]):
     assert sdfg.start_state.out_degree(map_entry) == 1
     assert len([edge.src_conn for edge in sdfg.start_state.out_edges(map_entry) if edge.src_conn is None]) == 0
 
+
+def test_intermediate_transients():
+    @dace.program
+    def sdfg_intermediate_transients(A: dace.float32[10], B: dace.float32[10]):
+        tmp = dace.define_local_scalar(dace.float32)
+        
+        # Use tmp twice to test removal of data
+        tmp = A[0] + 1
+        tmp = tmp * 2
+        B[0] = tmp
+
+
+    sdfg = sdfg_intermediate_transients.to_sdfg(simplify=True)
+    assert len([node for node in sdfg.start_state.data_nodes() if node.data == "tmp"]) == 2
+
+    xforms = Optimizer(sdfg=sdfg).get_pattern_matches(patterns=(TaskletFusion,))
+    applied = False
+    for xform in xforms:
+        if xform.data.data == "tmp":
+            xform.apply(sdfg.start_state, sdfg)
+            applied = True
+            break
+
+    assert applied
+    assert len([node for node in sdfg.start_state.data_nodes() if node.data == "tmp"]) == 1
+    assert "tmp" in sdfg.arrays
+
 if __name__ == '__main__':
     test_basic()
     test_same_name()
@@ -268,3 +296,4 @@ def sdfg_none_connector(A: dace.float32[32], B: dace.float32[32]):
     test_map_with_tasklets(language='CPP', with_data=False)
     test_map_with_tasklets(language='CPP', with_data=True)
     test_none_connector()
+    test_intermediate_transients()
diff --git a/tests/transformations/wcr_conversion_test.py b/tests/transformations/wcr_conversion_test.py
index 091b2a9db8..25913e8db1 100644
--- a/tests/transformations/wcr_conversion_test.py
+++ b/tests/transformations/wcr_conversion_test.py
@@ -245,3 +245,21 @@ def sdfg_free_map_permissive(A: dace.float64[32], B: dace.float64[32]):
 
     applied = sdfg.apply_transformations_repeated(AugAssignToWCR, permissive=True)
     assert applied == 1
+
+def test_aug_assign_same_inconns():
+
+    @dace.program
+    def sdfg_aug_assign_same_inconns(A: dace.float64[32]):
+        for i in dace.map[0:31]:
+            with dace.tasklet(language=dace.Language.Python):
+                a << A[i]
+                b << A[i+1]
+                c >> A[i]
+
+                c = a * b
+
+    sdfg = sdfg_aug_assign_same_inconns.to_sdfg()
+    sdfg.simplify()
+
+    applied = sdfg.apply_transformations_repeated(AugAssignToWCR, permissive=True)
+    assert applied == 1

From 6ed6136c0872b32b65230b504f6dddecadf49207 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lukas=20Tr=C3=BCmper?= <lukas.truemper@outlook.de>
Date: Sun, 26 Nov 2023 00:27:28 +0100
Subject: [PATCH 143/163] AugAssignToWCR: Minor fix for node not found error
 (#1447)

This fixes the problem that state_fission might have changed the number
of nodes and thus self.tasklet is not pointing to the tasklet anymore
---
 dace/transformation/dataflow/wcr_conversion.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dace/transformation/dataflow/wcr_conversion.py b/dace/transformation/dataflow/wcr_conversion.py
index 09bd8a3741..3ef508f7e5 100644
--- a/dace/transformation/dataflow/wcr_conversion.py
+++ b/dace/transformation/dataflow/wcr_conversion.py
@@ -205,7 +205,7 @@ def apply(self, state: SDFGState, sdfg: SDFG):
                                                                                      re.escape(inconn))
                             match = re.match(func_lhs, cstr)
                             if match is None:
-                                inconns = list(self.tasklet.in_connectors)
+                                inconns = list(tasklet.in_connectors)
                                 if len(inconns) != 2:
                                     continue
 

From 8f229bc105d9e7e57c4d6883580b749a6d770b2f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lukas=20Tr=C3=BCmper?= <lukas.truemper@outlook.de>
Date: Sun, 26 Nov 2023 10:59:44 +0100
Subject: [PATCH 144/163] OTFMapFusion: Minor bug fixes (#1448)

This PR fixes the problem that data containers were removed from an
SDFG, although they might be used in another state. Furthermore, it
fixes the problem that self.second_map_entry may not point to the
correct node after adding/removing nodes
---
 .../transformation/dataflow/otf_map_fusion.py | 34 ++++++++-----------
 1 file changed, 15 insertions(+), 19 deletions(-)

diff --git a/dace/transformation/dataflow/otf_map_fusion.py b/dace/transformation/dataflow/otf_map_fusion.py
index f41e3b4e0b..0ff55213d7 100644
--- a/dace/transformation/dataflow/otf_map_fusion.py
+++ b/dace/transformation/dataflow/otf_map_fusion.py
@@ -132,6 +132,7 @@ def apply(self, graph: SDFGState, sdfg: SDFG):
         intermediate_access_node = self.array
         first_map_exit = self.first_map_exit
         first_map_entry = graph.entry_node(first_map_exit)
+        second_map_entry = self.second_map_entry
 
         # Prepare: Make first and second map parameters disjoint
         # This avoids mutual matching: i -> j, j -> i
@@ -139,7 +140,7 @@ def apply(self, graph: SDFGState, sdfg: SDFG):
         for param in first_map_entry.map.params:
             i = 0
             new_param = f"_i{i}"
-            while new_param in self.second_map_entry.map.params or new_param in first_map_entry.map.params:
+            while new_param in second_map_entry.map.params or new_param in first_map_entry.map.params:
                 i = i + 1
                 new_param = f"_i{i}"
 
@@ -147,12 +148,12 @@ def apply(self, graph: SDFGState, sdfg: SDFG):
 
         # Prepare: Preemptively rename params defined by second map in scope of first
         # This avoids that local variables (e.g., in nested SDFG) have collisions with new map scope
-        for param in self.second_map_entry.map.params:
+        for param in second_map_entry.map.params:
             new_param = param + "_local"
             advanced_replace(subgraph, param, new_param)
 
         # Add local buffers for array-like OTFs
-        for edge in graph.out_edges(self.second_map_entry):
+        for edge in graph.out_edges(second_map_entry):
             if edge.data is None or edge.data.data != intermediate_access_node.data:
                 continue
 
@@ -208,18 +209,18 @@ def apply(self, graph: SDFGState, sdfg: SDFG):
                                                  save=False)
 
         # Phase 1: Add new access nodes to second map
-        for edge in graph.edges_between(intermediate_access_node, self.second_map_entry):
+        for edge in graph.edges_between(intermediate_access_node, second_map_entry):
             graph.remove_edge_and_connectors(edge)
 
         connector_mapping = {}
         for edge in graph.in_edges(first_map_entry):
-            new_in_connector = self.second_map_entry.next_connector(edge.dst_conn[3:])
+            new_in_connector = second_map_entry.next_connector(edge.dst_conn[3:])
             new_in_connector = "IN_" + new_in_connector
-            if not self.second_map_entry.add_in_connector(new_in_connector):
+            if not second_map_entry.add_in_connector(new_in_connector):
                 raise ValueError("Failed to add new in connector")
 
             memlet = copy.deepcopy(edge.data)
-            graph.add_edge(edge.src, edge.src_conn, self.second_map_entry, new_in_connector, memlet)
+            graph.add_edge(edge.src, edge.src_conn, second_map_entry, new_in_connector, memlet)
 
             connector_mapping[edge.dst_conn] = new_in_connector
 
@@ -231,7 +232,7 @@ def apply(self, graph: SDFGState, sdfg: SDFG):
 
         # Group by same access scheme
         consume_memlets = {}
-        for edge in graph.out_edges(self.second_map_entry):
+        for edge in graph.out_edges(second_map_entry):
             memlet = edge.data
             if memlet.data not in produce_memlets:
                 continue
@@ -246,7 +247,7 @@ def apply(self, graph: SDFGState, sdfg: SDFG):
             consume_memlets[memlet.data][accesses].append(edge)
 
             # And remove from second map
-            self.second_map_entry.remove_out_connector(edge.src_conn)
+            second_map_entry.remove_out_connector(edge.src_conn)
             graph.remove_edge(edge)
 
         # Phase 3: OTF - copy content of first map for each memlet of second according to matches
@@ -256,7 +257,7 @@ def apply(self, graph: SDFGState, sdfg: SDFG):
             for second_accesses in consume_memlets[array]:
                 # Step 1: Infer index access of second map to new inputs with respect to original first map
                 mapping = OTFMapFusion.solve(first_map_entry.map.params, first_accesses,
-                                             self.second_map_entry.map.params, second_accesses)
+                                             second_map_entry.map.params, second_accesses)
 
                 # Step 2: Add Temporary buffer
                 tmp_name = sdfg.temp_data_name()
@@ -296,16 +297,16 @@ def apply(self, graph: SDFGState, sdfg: SDFG):
                             else:
                                 out_connector = edge.src_conn
 
-                            if out_connector not in self.second_map_entry.out_connectors:
-                                self.second_map_entry.add_out_connector(out_connector)
+                            if out_connector not in second_map_entry.out_connectors:
+                                second_map_entry.add_out_connector(out_connector)
                         else:
                             out_connector = None
 
-                        graph.add_edge(self.second_map_entry, out_connector, node, edge.dst_conn, memlet)
+                        graph.add_edge(second_map_entry, out_connector, node, edge.dst_conn, memlet)
                         graph.remove_edge(edge)
 
                 # Step 4: Rename all symbols of first map in copied content my matched symbol of second map
-                otf_nodes.append(self.second_map_entry)
+                otf_nodes.append(second_map_entry)
                 otf_subgraph = StateSubgraphView(graph, otf_nodes)
                 for param in mapping:
                     if isinstance(param, tuple):
@@ -316,14 +317,9 @@ def apply(self, graph: SDFGState, sdfg: SDFG):
 
         # Check if first_map is still consumed by some node
         if graph.out_degree(intermediate_access_node) == 0:
-            del sdfg.arrays[intermediate_access_node.data]
             graph.remove_node(intermediate_access_node)
 
             subgraph = graph.scope_subgraph(first_map_entry, include_entry=True, include_exit=True)
-            for dnode in subgraph.data_nodes():
-                if dnode.data in sdfg.arrays:
-                    del sdfg.arrays[dnode.data]
-
             obsolete_nodes = graph.all_nodes_between(first_map_entry,
                                                      first_map_exit) | {first_map_entry, first_map_exit}
             graph.remove_nodes_from(obsolete_nodes)

From d15734661c409ff86cf85f0ead0c870defbe4de3 Mon Sep 17 00:00:00 2001
From: Philipp Schaad <schaad.phil@gmail.com>
Date: Mon, 27 Nov 2023 17:18:07 +0100
Subject: [PATCH 145/163] Loop Regions (#1407)

This PR adds `LoopRegion`s to SDFGs. This forms the second core element
of the [plan to make loops first class citizens of
SDFGs](https://github.com/orgs/spcl/projects/10).
`LoopRegion`s are a special class of `ControlFlowRegion`s that represent
different types of loops, meaning the control flow region inside them is
executed a parametric number of times.
A `LoopRegion` _must_ have a conditional expression that determines
whether the region is executed or not. It may additionally have:
- an initialization expression that is run when the region is first
executed (even if the condition to execute its contents does not hold)
- an update expression, executed each time the execution of the region
contents is finished
- a flag indicating that it's inverted, meaning the contents are
executed once before the condition is checked (condition is checked
again after every execution)
- a set of control flow blocks in its contents that, when reached,
finish the execution of the region and do not execute it again even if
the condition still holds (equivalent to `break` in C/Python)
- a set of control flow blocks that end the execution of its contents
and execute the update statement if there is one. After this, the
condition is checked again and the contents may be run again (equivalent
to `continue` in C/Python)

For more general information on control flow regions please refer to
[the
documentation](https://spcldace.readthedocs.io/en/latest/sdfg/ir.html#elements)
or the [PR that introduced
them](https://github.com/spcl/dace/pull/1404).

An example of a tripple loop nest of regular for loops can be seen in
the GEMM program below, showing that a proof-of-concept visualization
for the introduced concepts is already available in the latest release
version of the [VSCode
extension](https://marketplace.visualstudio.com/items?itemName=phschaad.sdfv)
(version 1.6.0 and upwards):


![image](https://github.com/spcl/dace/assets/9193712/9955e3f4-3356-4c52-b715-323330a0e4e4)

As outlined by the [project
plan](https://github.com/orgs/spcl/projects/10), these `LoopRegion`s are
currently _not_ being used by any frontend. They can, however, already
be manually used through the SDFG builder API. According to plan, most
passes and transformations are not currently able to handle the use of
such `LoopRegion`s yet, so their use is still highly experimental. To
allow traditional transformations and passes to work, as well as to be
able to generate codes for SDFGs containing `LoopRegion`s, a
compatibility pass (`InlineLoopRegions`) is available, together with a
utility function `dace.sdfg.utils.inline_loop_blocks` that inlines any
`LoopRegion`s to traditional state machine loops.

In summary, the PR includes:
- [x] Add `LoopRegion`s to SDFGs, a special control flow region to
represent loops
- [x] Pass / Utility function to remove all loop regions from a graph,
turning them back into regular state machine loops (a.k.a., inlining)
- [x] Tests for inlining of loops
- [x] Tests for the functional correctness of different types of loops
(regular for, while, do-while, do-for) and nested loops (gemm - tripple
nested loop)

---------

Co-authored-by: Tal Ben-Nun <tbennun@users.noreply.github.com>
---
 dace/codegen/codegen.py                       |   5 +-
 dace/sdfg/sdfg.py                             |   5 +
 dace/sdfg/state.py                            | 256 ++++++++++++---
 dace/sdfg/utils.py                            |  30 +-
 dace/sdfg/validation.py                       | 286 +++++++++--------
 dace/transformation/interstate/__init__.py    |   1 +
 .../interstate/control_flow_inline.py         | 115 +++++++
 tests/sdfg/loop_region_test.py                | 172 ++++++++++
 .../control_flow_inline_test.py               | 295 ++++++++++++++++++
 9 files changed, 982 insertions(+), 183 deletions(-)
 create mode 100644 dace/transformation/interstate/control_flow_inline.py
 create mode 100644 tests/sdfg/loop_region_test.py
 create mode 100644 tests/transformations/control_flow_inline_test.py

diff --git a/dace/codegen/codegen.py b/dace/codegen/codegen.py
index c502a47376..b7eed49f17 100644
--- a/dace/codegen/codegen.py
+++ b/dace/codegen/codegen.py
@@ -6,7 +6,7 @@
 import dace
 from dace import dtypes
 from dace import data
-from dace.sdfg import SDFG
+from dace.sdfg import SDFG, utils as sdutils
 from dace.codegen.targets import framecode
 from dace.codegen.codeobject import CodeObject
 from dace.config import Config
@@ -178,6 +178,9 @@ def generate_code(sdfg, validate=True) -> List[CodeObject]:
                 shutil.move(f"{tmp_dir}/test2.sdfg", "test2.sdfg")
                 raise RuntimeError('SDFG serialization failed - files do not match')
 
+    # Convert any loop constructs with hierarchical loop regions into simple 1-level state machine loops.
+    # TODO (later): Adapt codegen to deal with hierarchical CFGs instead.
+    sdutils.inline_loop_blocks(sdfg)
 
     # Before generating the code, run type inference on the SDFG connectors
     infer_types.infer_connector_types(sdfg)
diff --git a/dace/sdfg/sdfg.py b/dace/sdfg/sdfg.py
index fdf8835c7e..07267ec786 100644
--- a/dace/sdfg/sdfg.py
+++ b/dace/sdfg/sdfg.py
@@ -2150,6 +2150,7 @@ def compile(self, output_file=None, validate=True) -> 'CompiledSDFG':
 
         # Importing these outside creates an import loop
         from dace.codegen import codegen, compiler
+        from dace.sdfg import utils as sdutils
 
         # Compute build folder path before running codegen
         build_folder = self.build_folder
@@ -2170,6 +2171,10 @@ def compile(self, output_file=None, validate=True) -> 'CompiledSDFG':
             # if the codegen modifies the SDFG (thereby changing its hash)
             sdfg.build_folder = build_folder
 
+            # Convert any loop constructs with hierarchical loop regions into simple 1-level state machine loops.
+            # TODO (later): Adapt codegen to deal with hierarchical CFGs instead.
+            sdutils.inline_loop_blocks(sdfg)
+
             # Rename SDFG to avoid runtime issues with clashing names
             index = 0
             while sdfg.is_loaded():
diff --git a/dace/sdfg/state.py b/dace/sdfg/state.py
index 097365fbc3..ccc30df6ca 100644
--- a/dace/sdfg/state.py
+++ b/dace/sdfg/state.py
@@ -18,7 +18,7 @@
 from dace import subsets as sbs
 from dace import symbolic
 from dace.properties import (CodeBlock, DictProperty, EnumProperty, Property, SubsetProperty, SymbolicProperty,
-                             CodeProperty, make_properties)
+                             CodeProperty, make_properties, SetProperty)
 from dace.sdfg import nodes as nd
 from dace.sdfg.graph import MultiConnectorEdge, OrderedMultiDiConnectorGraph, SubgraphView, OrderedDiGraph, Edge
 from dace.sdfg.propagation import propagate_memlet
@@ -27,6 +27,7 @@
 
 if TYPE_CHECKING:
     import dace.sdfg.scope
+    from dace.sdfg import SDFG
 
 
 NodeT = Union[nd.Node, 'ControlFlowBlock']
@@ -99,6 +100,10 @@ def in_degree(self, node: NodeT) -> int:
     def out_degree(self, node: NodeT) -> int:
         ...
 
+    @property
+    def sdfg(self) -> 'SDFG':
+        ...
+
     ###################################################################
     # Traversal methods
 
@@ -618,7 +623,7 @@ def is_leaf_memlet(self, e):
 
     def used_symbols(self, all_symbols: bool, keep_defined_in_mapping: bool=False) -> Set[str]:
         state = self.graph if isinstance(self, SubgraphView) else self
-        sdfg = state.parent
+        sdfg = state.sdfg
         new_symbols = set()
         freesyms = set()
 
@@ -681,7 +686,7 @@ def defined_symbols(self) -> Dict[str, dt.Data]:
         state or subgraph to their types.
         """
         state = self.graph if isinstance(self, SubgraphView) else self
-        sdfg = state.parent
+        sdfg = state.sdfg
 
         # Start with SDFG global symbols
         defined_syms = {k: v for k, v in sdfg.symbols.items()}
@@ -773,7 +778,7 @@ def read_and_write_sets(self) -> Tuple[Set[AnyStr], Set[AnyStr]]:
     def unordered_arglist(self,
                           defined_syms=None,
                           shared_transients=None) -> Tuple[Dict[str, dt.Data], Dict[str, dt.Data]]:
-        sdfg: 'dace.sdfg.SDFG' = self.parent
+        sdfg: 'SDFG' = self.sdfg
         shared_transients = shared_transients or sdfg.shared_transients()
         sdict = self.scope_dict()
 
@@ -886,7 +891,7 @@ def scope_subgraph(self, entry_node, include_entry=True, include_exit=True):
     def top_level_transients(self):
         """Iterate over top-level transients of this state."""
         schildren = self.scope_children()
-        sdfg = self.parent
+        sdfg = self.sdfg
         result = set()
         for node in schildren[None]:
             if isinstance(node, nd.AccessNode) and node.desc(sdfg).transient:
@@ -896,7 +901,7 @@ def top_level_transients(self):
     def all_transients(self) -> List[str]:
         """Iterate over all transients in this state."""
         return dtypes.deduplicate(
-            [n.data for n in self.nodes() if isinstance(n, nd.AccessNode) and n.desc(self.parent).transient])
+            [n.data for n in self.nodes() if isinstance(n, nd.AccessNode) and n.desc(self.sdfg).transient])
 
     def replace(self, name: str, new_name: str):
         """ Finds and replaces all occurrences of a symbol or array in this
@@ -1071,10 +1076,15 @@ class ControlFlowBlock(BlockGraphView, abc.ABC):
 
     _label: str
 
-    def __init__(self, label: str=''):
+    def __init__(self,
+                 label: str='',
+                 sdfg: Optional['SDFG'] = None,
+                 parent: Optional['ControlFlowRegion'] = None):
         super(ControlFlowBlock, self).__init__()
         self._label = label
         self._default_lineinfo = None
+        self._sdfg = sdfg
+        self._parent_graph = parent
         self.is_collapsed = False
 
     def set_default_lineinfo(self, lineinfo: dace.dtypes.DebugInfo):
@@ -1111,6 +1121,22 @@ def label(self, label: str):
     def name(self) -> str:
         return self._label
 
+    @property
+    def sdfg(self) -> 'SDFG':
+        return self._sdfg
+
+    @sdfg.setter
+    def sdfg(self, sdfg: 'SDFG'):
+        self._sdfg = sdfg
+
+    @property
+    def parent_graph(self) -> 'ControlFlowRegion':
+        return self._parent_graph
+
+    @parent_graph.setter
+    def parent_graph(self, parent: Optional['ControlFlowRegion']):
+        self._parent_graph = parent
+
 
 @make_properties
 class SDFGState(OrderedMultiDiConnectorGraph[nd.Node, mm.Memlet], ControlFlowBlock, DataflowGraphView):
@@ -1156,10 +1182,9 @@ def __init__(self, label=None, sdfg=None, debuginfo=None, location=None):
         """
         from dace.sdfg.sdfg import SDFG  # Avoid import loop
         OrderedMultiDiConnectorGraph.__init__(self)
-        ControlFlowBlock.__init__(self, label)
+        ControlFlowBlock.__init__(self, label, sdfg)
         super(SDFGState, self).__init__()
         self._label = label
-        self._parent: SDFG = sdfg
         self._graph = self  # Allowing MemletTrackingView mixin to work
         self._clear_scopedict_cache()
         self._debuginfo = debuginfo
@@ -1186,11 +1211,11 @@ def __deepcopy__(self, memo):
     @property
     def parent(self):
         """ Returns the parent SDFG of this state. """
-        return self._parent
+        return self.sdfg
 
     @parent.setter
     def parent(self, value):
-        self._parent = value
+        self.sdfg = value
 
     def is_empty(self):
         return self.number_of_nodes() == 0
@@ -1218,7 +1243,7 @@ def add_node(self, node):
         # Correct nested SDFG's parent attributes
         if isinstance(node, nd.NestedSDFG):
             node.sdfg.parent = self
-            node.sdfg.parent_sdfg = self.parent
+            node.sdfg.parent_sdfg = self.sdfg
             node.sdfg.parent_nsdfg_node = node
         self._clear_scopedict_cache()
         return super(SDFGState, self).add_node(node)
@@ -1246,7 +1271,7 @@ def add_edge(self, u, u_connector, v, v_connector, memlet):
 
         self._clear_scopedict_cache()
         result = super(SDFGState, self).add_edge(u, u_connector, v, v_connector, memlet)
-        memlet.try_initialize(self.parent, self, result)
+        memlet.try_initialize(self.sdfg, self, result)
         return result
 
     def remove_edge(self, edge):
@@ -1270,7 +1295,7 @@ def to_json(self, parent=None):
 
         # Try to initialize edges before serialization
         for edge in self.edges():
-            edge.data.try_initialize(self.parent, self, edge)
+            edge.data.try_initialize(self.sdfg, self, edge)
 
         ret = {
             'type': type(self).__name__,
@@ -1342,7 +1367,7 @@ def _repr_html_(self):
         from dace.sdfg import SDFG
         arrays = set(n.data for n in self.data_nodes())
         sdfg = SDFG(self.label)
-        sdfg._arrays = {k: self._parent.arrays[k] for k in arrays}
+        sdfg._arrays = {k: self.sdfg.arrays[k] for k in arrays}
         sdfg.add_node(self)
 
         return sdfg._repr_html_()
@@ -1362,7 +1387,7 @@ def symbols_defined_at(self, node: nd.Node) -> Dict[str, dtypes.typeclass]:
         if node is None:
             return collections.OrderedDict()
 
-        sdfg: SDFG = self.parent
+        sdfg: SDFG = self.sdfg
 
         # Start with global symbols
         symbols = collections.OrderedDict(sdfg.symbols)
@@ -1488,7 +1513,7 @@ def add_tasklet(
 
     def add_nested_sdfg(
         self,
-        sdfg: 'dace.sdfg.SDFG',
+        sdfg: 'SDFG',
         parent,
         inputs: Union[Set[str], Dict[str, dtypes.typeclass]],
         outputs: Union[Set[str], Dict[str, dtypes.typeclass]],
@@ -1504,7 +1529,7 @@ def add_nested_sdfg(
         debuginfo = _getdebuginfo(debuginfo or self._default_lineinfo)
 
         sdfg.parent = self
-        sdfg.parent_sdfg = self.parent
+        sdfg.parent_sdfg = self.sdfg
 
         sdfg.update_sdfg_list([])
 
@@ -1551,7 +1576,7 @@ def add_nested_sdfg(
             if sym not in sdfg.symbols:
                 # TODO: Think of a better way to avoid calling
                 # symbols_defined_at in this moment
-                sdfg.add_symbol(sym, infer_expr_type(symval, self.parent.symbols) or dtypes.typeclass(int))
+                sdfg.add_symbol(sym, infer_expr_type(symval, self.sdfg.symbols) or dtypes.typeclass(int))
 
         return s
 
@@ -1704,7 +1729,7 @@ def add_mapped_tasklet(self,
                 else:
                     outdict[out] = self.add_write(out)
 
-        edges = []
+        edges: List[Edge[dace.Memlet]] = []
 
         # Connect inputs from map to tasklet
         tomemlet = {}
@@ -1770,7 +1795,7 @@ def add_mapped_tasklet(self,
 
         # Try to initialize memlets
         for edge in edges:
-            edge.data.try_initialize(self.parent, self, edge)
+            edge.data.try_initialize(self.sdfg, self, edge)
 
         return tasklet, map_entry, map_exit
 
@@ -1952,8 +1977,8 @@ def add_edge_pair(
             )
 
         # Try to initialize memlets
-        iedge.data.try_initialize(self.parent, self, iedge)
-        eedge.data.try_initialize(self.parent, self, eedge)
+        iedge.data.try_initialize(self.sdfg, self, iedge)
+        eedge.data.try_initialize(self.sdfg, self, eedge)
 
         return (iedge, eedge)
 
@@ -2056,7 +2081,7 @@ def add_memlet_path(self, *path_nodes, memlet=None, src_conn=None, dst_conn=None
                         cur_memlet = propagate_memlet(self, cur_memlet, snode, True)
         # Try to initialize memlets
         for edge in edges:
-            edge.data.try_initialize(self.parent, self, edge)
+            edge.data.try_initialize(self.sdfg, self, edge)
 
     def remove_memlet_path(self, edge: MultiConnectorEdge, remove_orphans: bool = True) -> None:
         """ Removes all memlets and associated connectors along a path formed
@@ -2153,20 +2178,20 @@ def add_array(self,
             'The "SDFGState.add_array" API is deprecated, please '
             'use "SDFG.add_array" and "SDFGState.add_access"', DeprecationWarning)
         # Workaround to allow this legacy API
-        if name in self.parent._arrays:
-            del self.parent._arrays[name]
-        self.parent.add_array(name,
-                              shape,
-                              dtype,
-                              storage=storage,
-                              transient=transient,
-                              strides=strides,
-                              offset=offset,
-                              lifetime=lifetime,
-                              debuginfo=debuginfo,
-                              find_new_name=find_new_name,
-                              total_size=total_size,
-                              alignment=alignment)
+        if name in self.sdfg._arrays:
+            del self.sdfg._arrays[name]
+        self.sdfg.add_array(name,
+                            shape,
+                            dtype,
+                            storage=storage,
+                            transient=transient,
+                            strides=strides,
+                            offset=offset,
+                            lifetime=lifetime,
+                            debuginfo=debuginfo,
+                            find_new_name=find_new_name,
+                            total_size=total_size,
+                            alignment=alignment)
         return self.add_access(name, debuginfo)
 
     def add_stream(
@@ -2186,9 +2211,9 @@ def add_stream(
             'The "SDFGState.add_stream" API is deprecated, please '
             'use "SDFG.add_stream" and "SDFGState.add_access"', DeprecationWarning)
         # Workaround to allow this legacy API
-        if name in self.parent._arrays:
-            del self.parent._arrays[name]
-        self.parent.add_stream(
+        if name in self.sdfg._arrays:
+            del self.sdfg._arrays[name]
+        self.sdfg.add_stream(
             name,
             dtype,
             buffer_size,
@@ -2215,9 +2240,9 @@ def add_scalar(
             'The "SDFGState.add_scalar" API is deprecated, please '
             'use "SDFG.add_scalar" and "SDFGState.add_access"', DeprecationWarning)
         # Workaround to allow this legacy API
-        if name in self.parent._arrays:
-            del self.parent._arrays[name]
-        self.parent.add_scalar(name, dtype, storage, transient, lifetime, debuginfo)
+        if name in self.sdfg._arrays:
+            del self.sdfg._arrays[name]
+        self.sdfg.add_scalar(name, dtype, storage, transient, lifetime, debuginfo)
         return self.add_access(name, debuginfo)
 
     def add_transient(self,
@@ -2319,16 +2344,20 @@ class StateSubgraphView(SubgraphView, DataflowGraphView):
     def __init__(self, graph, subgraph_nodes):
         super().__init__(graph, subgraph_nodes)
 
+    @property
+    def sdfg(self) -> 'SDFG':
+        state: SDFGState = self.graph
+        return state.sdfg
+
 
 @make_properties
 class ControlFlowRegion(OrderedDiGraph[ControlFlowBlock, 'dace.sdfg.InterstateEdge'], ControlGraphView,
                         ControlFlowBlock):
 
-    def __init__(self,
-                 label: str=''):
+    def __init__(self, label: str='', sdfg: Optional['SDFG'] = None):
         OrderedDiGraph.__init__(self)
         ControlGraphView.__init__(self)
-        ControlFlowBlock.__init__(self, label)
+        ControlFlowBlock.__init__(self, label, sdfg)
 
         self._labels: Set[str] = set()
         self._start_block: Optional[int] = None
@@ -2356,6 +2385,11 @@ def add_node(self, node, is_start_block=False, *, is_start_state: bool=None):
             raise TypeError('Expected ControlFlowBlock, got ' + str(type(node)))
         super().add_node(node)
         self._cached_start_block = None
+        node.parent_graph = self
+        if isinstance(self, dace.SDFG):
+            node.sdfg = self
+        else:
+            node.sdfg = self.sdfg
         start_block = is_start_block
         if is_start_state is not None:
             warnings.warn('is_start_state is deprecated, use is_start_block instead', DeprecationWarning)
@@ -2372,7 +2406,6 @@ def add_state(self, label=None, is_start_block=False, *, is_start_state: bool=No
         existing_labels = self._labels
         label = dt.find_new_name(label, existing_labels)
         state = SDFGState(label)
-        state.parent = self
         self._labels.add(label)
         start_block = is_start_block
         if is_start_state is not None:
@@ -2491,7 +2524,7 @@ def all_control_flow_regions(self, recursive=False) -> Iterator['ControlFlowRegi
             elif isinstance(block, ControlFlowRegion):
                 yield from block.all_control_flow_regions(recursive=recursive)
 
-    def all_sdfgs_recursive(self) -> Iterator['dace.SDFG']:
+    def all_sdfgs_recursive(self) -> Iterator['SDFG']:
         """ Iterate over this and all nested SDFGs. """
         for cfg in self.all_control_flow_regions(recursive=True):
             if isinstance(cfg, dace.SDFG):
@@ -2554,3 +2587,128 @@ def start_block(self, block_id):
             raise ValueError('Invalid state ID')
         self._start_block = block_id
         self._cached_start_block = self.node(block_id)
+
+
+@make_properties
+class LoopRegion(ControlFlowRegion):
+    """
+    A control flow region that represents a loop.
+
+    Like in traditional programming languages, a loop has a condition that is checked before each iteration.
+    It may have zero or more initialization statements that are executed before the first loop iteration, and zero or
+    more update statements that are executed after each iteration. For example, a loop with only a condition and neither
+    an initialization nor an update statement is equivalent to a while loop, while a loop with initialization and update
+    statements represents a for loop. Loops may additionally be inverted, meaning that the condition is checked after
+    the first iteration instead of before.
+
+    A loop region, like any other control flow region, has a single distinct entry / start block, and one or more
+    exit blocks. Exit blocks are blocks that have no outgoing edges or only conditional outgoing edges. Whenever an
+    exit block finshes executing, one iteration of the loop is completed.
+
+    Loops may have an arbitrary number of break states. Whenever a break state finishes executing, the loop is exited
+    immediately. A loop may additionally have an arbitrary number of continue states. Whenever a continue state finishes
+    executing, the next iteration of the loop is started immediately (with execution of the update statement(s), if
+    present).
+    """
+
+    update_statement = CodeProperty(optional=True, allow_none=True, default=None,
+                                    desc='The loop update statement. May be None if the update happens elsewhere.')
+    init_statement = CodeProperty(optional=True, allow_none=True, default=None,
+                                  desc='The loop init statement. May be None if the initialization happens elsewhere.')
+    loop_condition = CodeProperty(allow_none=True, default=None, desc='The loop condition')
+    inverted = Property(dtype=bool, default=False,
+                        desc='If True, the loop condition is checked after the first iteration.')
+    loop_variable = Property(dtype=str, default='', desc='The loop variable, if given')
+    break_states = SetProperty(element_type=int, desc='States that when reached break out of the loop')
+    continue_states = SetProperty(element_type=int, desc='States that when reached directly execute the next iteration')
+
+    def __init__(self,
+                 label: str,
+                 condition_expr: str,
+                 loop_var: Optional[str] = None,
+                 initialize_expr: Optional[str] = None,
+                 update_expr: Optional[str] = None,
+                 inverted: bool = False):
+        super(LoopRegion, self).__init__(label)
+
+        if initialize_expr is not None:
+            self.init_statement = CodeBlock(initialize_expr)
+        else:
+            self.init_statement = None
+
+        if condition_expr:
+            self.loop_condition = CodeBlock(condition_expr)
+        else:
+            self.loop_condition = CodeBlock('True')
+
+        if update_expr is not None:
+            self.update_statement = CodeBlock(update_expr)
+        else:
+            self.update_statement = None
+
+        self.loop_variable = loop_var or ''
+        self.inverted = inverted
+
+    def _used_symbols_internal(self,
+                               all_symbols: bool,
+                               defined_syms: Optional[Set]=None,
+                               free_syms: Optional[Set]=None,
+                               used_before_assignment: Optional[Set]=None,
+                               keep_defined_in_mapping: bool=False) -> Tuple[Set[str], Set[str], Set[str]]:
+        defined_syms = set() if defined_syms is None else defined_syms
+        free_syms = set() if free_syms is None else free_syms
+        used_before_assignment = set() if used_before_assignment is None else used_before_assignment
+
+        defined_syms.add(self.loop_variable)
+        if self.init_statement is not None:
+            free_syms |= self.init_statement.get_free_symbols()
+        if self.update_statement is not None:
+            free_syms |= self.update_statement.get_free_symbols()
+        free_syms |= self.loop_condition.get_free_symbols()
+
+        b_free_symbols, b_defined_symbols, b_used_before_assignment = super()._used_symbols_internal(
+            all_symbols, keep_defined_in_mapping=keep_defined_in_mapping
+        )
+        free_syms |= b_free_symbols
+        defined_syms |= b_defined_symbols
+        used_before_assignment |= b_used_before_assignment
+
+        defined_syms -= used_before_assignment
+        free_syms -= defined_syms
+
+        return free_syms, defined_syms, used_before_assignment
+
+    def replace_dict(self, repl: Dict[str, str],
+                     symrepl: Optional[Dict[symbolic.SymbolicType, symbolic.SymbolicType]] = None,
+                     replace_in_graph: bool = True, replace_keys: bool = True):
+        if replace_keys:
+            from dace.sdfg.replace import replace_properties_dict
+            replace_properties_dict(self, repl, symrepl)
+
+            if self.loop_variable and self.loop_variable in repl:
+                self.loop_variable = repl[self.loop_variable]
+
+        super().replace_dict(repl, symrepl, replace_in_graph)
+
+    def to_json(self, parent=None):
+        return super().to_json(parent)
+
+    def _add_node_internal(self, node, is_continue=False, is_break=False):
+        if is_continue:
+            if is_break:
+                raise ValueError('Cannot set both is_continue and is_break')
+            self.continue_states.add(self.node_id(node))
+        if is_break:
+            if is_continue:
+                raise ValueError('Cannot set both is_continue and is_break')
+            self.break_states.add(self.node_id(node))
+
+    def add_node(self, node, is_start_block=False, is_continue=False, is_break=False, *, is_start_state: bool = None):
+        super().add_node(node, is_start_block, is_start_state=is_start_state)
+        self._add_node_internal(node, is_continue, is_break)
+
+    def add_state(self, label=None, is_start_block=False, is_continue=False, is_break=False, *,
+                  is_start_state: bool = None) -> SDFGState:
+        state = super().add_state(label, is_start_block, is_start_state=is_start_state)
+        self._add_node_internal(state, is_continue, is_break)
+        return state
diff --git a/dace/sdfg/utils.py b/dace/sdfg/utils.py
index 621f8a9e16..d0f1a67ab9 100644
--- a/dace/sdfg/utils.py
+++ b/dace/sdfg/utils.py
@@ -13,7 +13,7 @@
 from dace.sdfg.graph import MultiConnectorEdge
 from dace.sdfg.sdfg import SDFG
 from dace.sdfg.nodes import Node, NestedSDFG
-from dace.sdfg.state import SDFGState, StateSubgraphView
+from dace.sdfg.state import SDFGState, StateSubgraphView, LoopRegion, ControlFlowBlock, GraphT
 from dace.sdfg.scope import ScopeSubgraphView
 from dace.sdfg import nodes as nd, graph as gr, propagation
 from dace import config, data as dt, dtypes, memlet as mm, subsets as sbs, symbolic
@@ -1248,6 +1248,34 @@ def fuse_states(sdfg: SDFG, permissive: bool = False, progress: bool = None) ->
     return counter
 
 
+def inline_loop_blocks(sdfg: SDFG, permissive: bool = False, progress: bool = None) -> int:
+    # Avoid import loops
+    from dace.transformation.interstate import LoopRegionInline
+
+    counter = 0
+    blocks = [(n, p) for n, p in sdfg.all_nodes_recursive() if isinstance(n, LoopRegion)]
+
+    for _block, _graph in optional_progressbar(reversed(blocks), title='Inlining Loops',
+                                               n=len(blocks), progress=progress):
+        block: ControlFlowBlock = _block
+        graph: SomeGraphT = _graph
+        id = block.sdfg.sdfg_id
+
+        # We have to reevaluate every time due to changing IDs
+        block_id = graph.node_id(block)
+
+        candidate = {
+            LoopRegionInline.loop: block,
+        }
+        inliner = LoopRegionInline()
+        inliner.setup_match(graph, id, block_id, candidate, 0, override=True)
+        if inliner.can_be_applied(graph, 0, block.sdfg, permissive=permissive):
+            inliner.apply(graph, block.sdfg)
+            counter += 1
+
+    return counter
+
+
 def inline_sdfgs(sdfg: SDFG, permissive: bool = False, progress: bool = None, multistate: bool = True) -> int:
     """
     Inlines all possible nested SDFGs (or sub-SDFGs) using an optimized
diff --git a/dace/sdfg/validation.py b/dace/sdfg/validation.py
index 0bb3e9a64e..45d38e33e2 100644
--- a/dace/sdfg/validation.py
+++ b/dace/sdfg/validation.py
@@ -1,11 +1,11 @@
 # Copyright 2019-2021 ETH Zurich and the DaCe authors. All rights reserved.
 """ Exception classes and methods for validation of SDFGs. """
 import copy
-from dace.dtypes import DebugInfo, StorageType
+from dace.dtypes import DebugInfo
 import os
-from typing import TYPE_CHECKING, Dict, List, Set, Tuple, Union
+from typing import TYPE_CHECKING, Dict, List, Set
 import warnings
-from dace import dtypes, data as dt, subsets
+from dace import dtypes, subsets
 from dace import symbolic
 
 if TYPE_CHECKING:
@@ -19,7 +19,8 @@
 
 
 def validate(graph: 'dace.sdfg.graph.SubgraphView'):
-    from dace.sdfg import SDFG, SDFGState, SubgraphView
+    from dace.sdfg import SDFG, SDFGState
+    from dace.sdfg.graph import SubgraphView
     gtype = graph.parent if isinstance(graph, SubgraphView) else graph
     if isinstance(gtype, SDFG):
         validate_sdfg(graph)
@@ -27,6 +28,148 @@ def validate(graph: 'dace.sdfg.graph.SubgraphView'):
         validate_state(graph)
 
 
+def validate_control_flow_region(sdfg: 'dace.sdfg.SDFG',
+                                 region: 'dace.sdfg.state.ControlFlowRegion',
+                                 initialized_transients: Set[str],
+                                 symbols: dict,
+                                 references: Set[int] = None,
+                                 **context: bool):
+    from dace.sdfg import SDFGState
+    from dace.sdfg.scope import is_in_scope
+
+    if len(region.source_nodes()) > 1 and region.start_block is None:
+        raise InvalidSDFGError("Starting block undefined", sdfg, None)
+
+    in_default_scope = None
+
+    # Check every state separately
+    start_block = region.start_block
+    visited = set()
+    visited_edges = set()
+    # Run through blocks via DFS, ensuring that only the defined symbols are available for validation
+    for edge in region.dfs_edges(start_block):
+        # Source -> inter-state definition -> Destination
+        ##########################################
+        visited_edges.add(edge)
+
+        # Reference check
+        if id(edge) in references:
+            raise InvalidSDFGInterstateEdgeError(
+                f'Duplicate inter-state edge object detected: "{edge}". Please '
+                'copy objects rather than using multiple references to the same one', sdfg, region.edge_id(edge))
+        references.add(id(edge))
+        if id(edge.data) in references:
+            raise InvalidSDFGInterstateEdgeError(
+                f'Duplicate inter-state edge object detected: "{edge}". Please '
+                'copy objects rather than using multiple references to the same one', sdfg, region.edge_id(edge))
+        references.add(id(edge.data))
+
+        # Source
+        if edge.src not in visited:
+            visited.add(edge.src)
+            if isinstance(edge.src, SDFGState):
+                validate_state(edge.src, region.node_id(edge.src), sdfg, symbols, initialized_transients, references,
+                               **context)
+            else:
+                validate_control_flow_region(sdfg, edge.src, initialized_transients, symbols, references, **context)
+
+        ##########################################
+        # Edge
+        # Check inter-state edge for undefined symbols
+        undef_syms = set(edge.data.free_symbols) - set(symbols.keys())
+        if len(undef_syms) > 0:
+            eid = region.edge_id(edge)
+            raise InvalidSDFGInterstateEdgeError(
+                f'Undefined symbols in edge: {undef_syms}. Add those with '
+                '`sdfg.add_symbol()` or define outside with `dace.symbol()`', sdfg, eid)
+
+        # Validate inter-state edge names
+        issyms = edge.data.new_symbols(sdfg, symbols)
+        if any(not dtypes.validate_name(s) for s in issyms):
+            invalid = next(s for s in issyms if not dtypes.validate_name(s))
+            eid = region.edge_id(edge)
+            raise InvalidSDFGInterstateEdgeError("Invalid interstate symbol name %s" % invalid, sdfg, eid)
+
+        # Ensure accessed data containers in assignments and conditions are accessible in this context
+        ise_memlets = edge.data.get_read_memlets(sdfg.arrays)
+        for memlet in ise_memlets:
+            container = memlet.data
+            if not _accessible(sdfg, container, context):
+                # Check context w.r.t. maps
+                if in_default_scope is None:  # Lazy-evaluate in_default_scope
+                    in_default_scope = False
+                    if sdfg.parent_nsdfg_node is not None:
+                        if is_in_scope(sdfg.parent_sdfg, sdfg.parent, sdfg.parent_nsdfg_node,
+                                    [dtypes.ScheduleType.Default]):
+                            in_default_scope = True
+                if in_default_scope is False:
+                    eid = region.edge_id(edge)
+                    raise InvalidSDFGInterstateEdgeError(
+                        f'Trying to read an inaccessible data container "{container}" '
+                        f'(Storage: {sdfg.arrays[container].storage}) in host code interstate edge', sdfg, eid)
+
+        # Add edge symbols into defined symbols
+        symbols.update(issyms)
+
+        ##########################################
+        # Destination
+        if edge.dst not in visited:
+            visited.add(edge.dst)
+            if isinstance(edge.dst, SDFGState):
+                validate_state(edge.dst, region.node_id(edge.dst), sdfg, symbols, initialized_transients, references,
+                               **context)
+            else:
+                validate_control_flow_region(sdfg, edge.dst, initialized_transients, symbols, references, **context)
+    # End of block DFS
+
+    # If there is only one block, the DFS will miss it
+    if start_block not in visited:
+        if isinstance(start_block, SDFGState):
+            validate_state(start_block, region.node_id(start_block), sdfg, symbols, initialized_transients, references,
+                            **context)
+        else:
+                validate_control_flow_region(sdfg, start_block, initialized_transients, symbols, references, **context)
+
+    # Validate all inter-state edges (including self-loops not found by DFS)
+    for eid, edge in enumerate(region.edges()):
+        if edge in visited_edges:
+            continue
+
+        # Reference check
+        if id(edge) in references:
+            raise InvalidSDFGInterstateEdgeError(
+                f'Duplicate inter-state edge object detected: "{edge}". Please '
+                'copy objects rather than using multiple references to the same one', sdfg, eid)
+        references.add(id(edge))
+        if id(edge.data) in references:
+            raise InvalidSDFGInterstateEdgeError(
+                f'Duplicate inter-state edge object detected: "{edge}". Please '
+                'copy objects rather than using multiple references to the same one', sdfg, eid)
+        references.add(id(edge.data))
+
+        issyms = edge.data.assignments.keys()
+        if any(not dtypes.validate_name(s) for s in issyms):
+            invalid = next(s for s in issyms if not dtypes.validate_name(s))
+            raise InvalidSDFGInterstateEdgeError("Invalid interstate symbol name %s" % invalid, sdfg, eid)
+
+        # Ensure accessed data containers in assignments and conditions are accessible in this context
+        ise_memlets = edge.data.get_read_memlets(sdfg.arrays)
+        for memlet in ise_memlets:
+            container = memlet.data
+            if not _accessible(sdfg, container, context):
+                # Check context w.r.t. maps
+                if in_default_scope is None:  # Lazy-evaluate in_default_scope
+                    in_default_scope = False
+                    if sdfg.parent_nsdfg_node is not None:
+                        if is_in_scope(sdfg.parent_sdfg, sdfg.parent, sdfg.parent_nsdfg_node,
+                                    [dtypes.ScheduleType.Default]):
+                            in_default_scope = True
+                if in_default_scope is False:
+                    raise InvalidSDFGInterstateEdgeError(
+                        f'Trying to read an inaccessible data container "{container}" '
+                        f'(Storage: {sdfg.arrays[container].storage}) in host code interstate edge', sdfg, eid)
+
+
 def validate_sdfg(sdfg: 'dace.sdfg.SDFG', references: Set[int] = None, **context: bool):
     """ Verifies the correctness of an SDFG by applying multiple tests.
     
@@ -42,7 +185,7 @@ def validate_sdfg(sdfg: 'dace.sdfg.SDFG', references: Set[int] = None, **context
     """
     # Avoid import loop
     from dace.codegen.targets import fpga
-    from dace.sdfg.scope import is_devicelevel_gpu, is_devicelevel_fpga, is_in_scope
+    from dace.sdfg.scope import is_devicelevel_gpu, is_devicelevel_fpga
 
     references = references or set()
 
@@ -58,11 +201,9 @@ def validate_sdfg(sdfg: 'dace.sdfg.SDFG', references: Set[int] = None, **context
         if not dtypes.validate_name(sdfg.name):
             raise InvalidSDFGError("Invalid name", sdfg, None)
 
-        if len(sdfg.source_nodes()) > 1 and sdfg.start_state is None:
-            raise InvalidSDFGError("Starting state undefined", sdfg, None)
-
-        if len(set([s.label for s in sdfg.nodes()])) != len(sdfg.nodes()):
-            raise InvalidSDFGError("Found multiple states with the same name", sdfg, None)
+        all_blocks = set(sdfg.all_control_flow_blocks())
+        if len(all_blocks) != len(set([s.label for s in all_blocks])):
+            raise InvalidSDFGError('Found multiple blocks with the same name', sdfg, None)
 
         # Validate data descriptors
         for name, desc in sdfg._arrays.items():
@@ -111,10 +252,7 @@ def validate_sdfg(sdfg: 'dace.sdfg.SDFG', references: Set[int] = None, **context
         # Check if SDFG is located within a GPU kernel
         context['in_gpu'] = is_devicelevel_gpu(sdfg, None, None)
         context['in_fpga'] = is_devicelevel_fpga(sdfg, None, None)
-        in_default_scope = None
 
-        # Check every state separately
-        start_state = sdfg.start_state
         initialized_transients = {'__pystate'}
         initialized_transients.update(sdfg.constants_prop.keys())
         symbols = copy.deepcopy(sdfg.symbols)
@@ -123,123 +261,7 @@ def validate_sdfg(sdfg: 'dace.sdfg.SDFG', references: Set[int] = None, **context
         for desc in sdfg.arrays.values():
             for sym in desc.free_symbols:
                 symbols[str(sym)] = sym.dtype
-        visited = set()
-        visited_edges = set()
-        # Run through states via DFS, ensuring that only the defined symbols
-        # are available for validation
-        for edge in sdfg.dfs_edges(start_state):
-            # Source -> inter-state definition -> Destination
-            ##########################################
-            visited_edges.add(edge)
-
-            # Reference check
-            if id(edge) in references:
-                raise InvalidSDFGInterstateEdgeError(
-                    f'Duplicate inter-state edge object detected: "{edge}". Please '
-                    'copy objects rather than using multiple references to the same one', sdfg, sdfg.edge_id(edge))
-            references.add(id(edge))
-            if id(edge.data) in references:
-                raise InvalidSDFGInterstateEdgeError(
-                    f'Duplicate inter-state edge object detected: "{edge}". Please '
-                    'copy objects rather than using multiple references to the same one', sdfg, sdfg.edge_id(edge))
-            references.add(id(edge.data))
-
-            # Source
-            if edge.src not in visited:
-                visited.add(edge.src)
-                validate_state(edge.src, sdfg.node_id(edge.src), sdfg, symbols, initialized_transients, references,
-                               **context)
-
-            ##########################################
-            # Edge
-            # Check inter-state edge for undefined symbols
-            undef_syms = set(edge.data.free_symbols) - set(symbols.keys())
-            if len(undef_syms) > 0:
-                eid = sdfg.edge_id(edge)
-                raise InvalidSDFGInterstateEdgeError(
-                    f'Undefined symbols in edge: {undef_syms}. Add those with '
-                    '`sdfg.add_symbol()` or define outside with `dace.symbol()`', sdfg, eid)
-
-            # Validate inter-state edge names
-            issyms = edge.data.new_symbols(sdfg, symbols)
-            if any(not dtypes.validate_name(s) for s in issyms):
-                invalid = next(s for s in issyms if not dtypes.validate_name(s))
-                eid = sdfg.edge_id(edge)
-                raise InvalidSDFGInterstateEdgeError("Invalid interstate symbol name %s" % invalid, sdfg, eid)
-
-            # Ensure accessed data containers in assignments and conditions are accessible in this context
-            ise_memlets = edge.data.get_read_memlets(sdfg.arrays)
-            for memlet in ise_memlets:
-                container = memlet.data
-                if not _accessible(sdfg, container, context):
-                    # Check context w.r.t. maps
-                    if in_default_scope is None:  # Lazy-evaluate in_default_scope
-                        in_default_scope = False
-                        if sdfg.parent_nsdfg_node is not None:
-                            if is_in_scope(sdfg.parent_sdfg, sdfg.parent, sdfg.parent_nsdfg_node,
-                                        [dtypes.ScheduleType.Default]):
-                                in_default_scope = True
-                    if in_default_scope is False:
-                        eid = sdfg.edge_id(edge)
-                        raise InvalidSDFGInterstateEdgeError(
-                            f'Trying to read an inaccessible data container "{container}" '
-                            f'(Storage: {sdfg.arrays[container].storage}) in host code interstate edge', sdfg, eid)
-
-            # Add edge symbols into defined symbols
-            symbols.update(issyms)
-
-            ##########################################
-            # Destination
-            if edge.dst not in visited:
-                visited.add(edge.dst)
-                validate_state(edge.dst, sdfg.node_id(edge.dst), sdfg, symbols, initialized_transients, references,
-                               **context)
-        # End of state DFS
-
-        # If there is only one state, the DFS will miss it
-        if start_state not in visited:
-            validate_state(start_state, sdfg.node_id(start_state), sdfg, symbols, initialized_transients, references,
-                           **context)
-
-        # Validate all inter-state edges (including self-loops not found by DFS)
-        for eid, edge in enumerate(sdfg.edges()):
-            if edge in visited_edges:
-                continue
-
-            # Reference check
-            if id(edge) in references:
-                raise InvalidSDFGInterstateEdgeError(
-                    f'Duplicate inter-state edge object detected: "{edge}". Please '
-                    'copy objects rather than using multiple references to the same one', sdfg, eid)
-            references.add(id(edge))
-            if id(edge.data) in references:
-                raise InvalidSDFGInterstateEdgeError(
-                    f'Duplicate inter-state edge object detected: "{edge}". Please '
-                    'copy objects rather than using multiple references to the same one', sdfg, eid)
-            references.add(id(edge.data))
-
-            issyms = edge.data.assignments.keys()
-            if any(not dtypes.validate_name(s) for s in issyms):
-                invalid = next(s for s in issyms if not dtypes.validate_name(s))
-                raise InvalidSDFGInterstateEdgeError("Invalid interstate symbol name %s" % invalid, sdfg, eid)
-
-            # Ensure accessed data containers in assignments and conditions are accessible in this context
-            ise_memlets = edge.data.get_read_memlets(sdfg.arrays)
-            for memlet in ise_memlets:
-                container = memlet.data
-                if not _accessible(sdfg, container, context):
-                    # Check context w.r.t. maps
-                    if in_default_scope is None:  # Lazy-evaluate in_default_scope
-                        in_default_scope = False
-                        if sdfg.parent_nsdfg_node is not None:
-                            if is_in_scope(sdfg.parent_sdfg, sdfg.parent, sdfg.parent_nsdfg_node,
-                                        [dtypes.ScheduleType.Default]):
-                                in_default_scope = True
-                    if in_default_scope is False:
-                        raise InvalidSDFGInterstateEdgeError(
-                            f'Trying to read an inaccessible data container "{container}" '
-                            f'(Storage: {sdfg.arrays[container].storage}) in host code interstate edge', sdfg, eid)
-
+        validate_control_flow_region(sdfg, sdfg, initialized_transients, symbols, references, **context)
     except InvalidSDFGError as ex:
         # If the SDFG is invalid, save it
         fpath = os.path.join('_dacegraphs', 'invalid.sdfg')
@@ -315,7 +337,7 @@ def validate_state(state: 'dace.sdfg.SDFGState',
     from dace.sdfg.scope import scope_contains_scope, is_devicelevel_gpu, is_devicelevel_fpga
 
     sdfg = sdfg or state.parent
-    state_id = state_id or sdfg.node_id(state)
+    state_id = state_id if state_id is not None else state.parent_graph.node_id(state)
     symbols = symbols or {}
     initialized_transients = (initialized_transients if initialized_transients is not None else {'__pystate'})
     references = references or set()
@@ -337,7 +359,7 @@ def validate_state(state: 'dace.sdfg.SDFGState',
     if not dtypes.validate_name(state._label):
         raise InvalidSDFGError("Invalid state name", sdfg, state_id)
 
-    if state._parent != sdfg:
+    if state.sdfg != sdfg:
         raise InvalidSDFGError("State does not point to the correct "
                                "parent", sdfg, state_id)
 
diff --git a/dace/transformation/interstate/__init__.py b/dace/transformation/interstate/__init__.py
index b8bcc716e6..b60b1891b1 100644
--- a/dace/transformation/interstate/__init__.py
+++ b/dace/transformation/interstate/__init__.py
@@ -1,6 +1,7 @@
 # Copyright 2019-2021 ETH Zurich and the DaCe authors. All rights reserved.
 """ This module initializes the inter-state transformations package."""
 
+from .control_flow_inline import LoopRegionInline
 from .state_fusion import StateFusion
 from .state_fusion_with_happens_before import StateFusionExtended
 from .state_elimination import (EndStateElimination, StartStateElimination, StateAssignElimination,
diff --git a/dace/transformation/interstate/control_flow_inline.py b/dace/transformation/interstate/control_flow_inline.py
new file mode 100644
index 0000000000..b86317b8ed
--- /dev/null
+++ b/dace/transformation/interstate/control_flow_inline.py
@@ -0,0 +1,115 @@
+# Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved.
+""" Inline control flow regions in SDFGs. """
+
+from typing import Set, Optional
+
+from dace.frontend.python import astutils
+from dace.sdfg import SDFG, InterstateEdge, SDFGState
+from dace.sdfg import utils as sdutil
+from dace.sdfg.nodes import CodeBlock
+from dace.sdfg.state import ControlFlowRegion, LoopRegion
+from dace.transformation import transformation
+
+
+class LoopRegionInline(transformation.MultiStateTransformation):
+    """
+    Inlines a loop regions into a single state machine.
+    """
+
+    loop = transformation.PatternNode(LoopRegion)
+
+    @staticmethod
+    def annotates_memlets():
+        return False
+
+    @classmethod
+    def expressions(cls):
+        return [sdutil.node_path_graph(cls.loop)]
+
+    def can_be_applied(self, graph: ControlFlowRegion, expr_index: int, sdfg: SDFG, permissive: bool = False) -> bool:
+        # Check that the loop initialization and update statements each only contain assignments, if the loop has any.
+        if self.loop.init_statement is not None:
+            if isinstance(self.loop.init_statement.code, list):
+                for stmt in self.loop.init_statement.code:
+                    if not isinstance(stmt, astutils.ast.Assign):
+                        return False
+        if self.loop.update_statement is not None:
+            if isinstance(self.loop.update_statement.code, list):
+                for stmt in self.loop.update_statement.code:
+                    if not isinstance(stmt, astutils.ast.Assign):
+                        return False
+        return True
+
+    def apply(self, graph: ControlFlowRegion, sdfg: SDFG) -> Optional[int]:
+        parent: ControlFlowRegion = graph
+
+        internal_start = self.loop.start_block
+
+        # Add all boilerplate loop states necessary for the structure.
+        init_state = parent.add_state(self.loop.label + '_init')
+        guard_state = parent.add_state(self.loop.label + '_guard')
+        end_state = parent.add_state(self.loop.label + '_end')
+        loop_tail_state = parent.add_state(self.loop.label + '_tail')
+
+        # Add all loop states and make sure to keep track of all the ones that need to be connected in the end.
+        to_connect: Set[SDFGState] = set()
+        for node in self.loop.nodes():
+            parent.add_node(node)
+            if self.loop.out_degree(node) == 0:
+                to_connect.add(node)
+
+        # Handle break and continue.
+        for continue_state_id in self.loop.continue_states:
+            continue_state = self.loop.node(continue_state_id)
+            to_connect.add(continue_state)
+        for break_state_id in self.loop.break_states:
+            break_state = self.loop.node(break_state_id)
+            parent.add_edge(break_state, end_state, InterstateEdge())
+
+        # Add all internal loop edges.
+        for edge in self.loop.edges():
+            parent.add_edge(edge.src, edge.dst, edge.data)
+
+        # Redirect all edges to the loop to the init state.
+        for b_edge in parent.in_edges(self.loop):
+            parent.add_edge(b_edge.src, init_state, b_edge.data)
+            parent.remove_edge(b_edge)
+        # Redirect all edges exiting the loop to instead exit the end state.
+        for a_edge in parent.out_edges(self.loop):
+            parent.add_edge(end_state, a_edge.dst, a_edge.data)
+            parent.remove_edge(a_edge)
+
+        # Add an initialization edge that initializes the loop variable if applicable.
+        init_edge = InterstateEdge()
+        if self.loop.init_statement is not None:
+            init_edge.assignments = {}
+            for stmt in self.loop.init_statement.code:
+                assign: astutils.ast.Assign = stmt
+                init_edge.assignments[assign.targets[0].id] = astutils.unparse(assign.value)
+        if self.loop.inverted:
+            parent.add_edge(init_state, internal_start, init_edge)
+        else:
+            parent.add_edge(init_state, guard_state, init_edge)
+
+        # Connect the loop tail.
+        update_edge = InterstateEdge()
+        if self.loop.update_statement is not None:
+            update_edge.assignments = {}
+            for stmt in self.loop.update_statement.code:
+                assign: astutils.ast.Assign = stmt
+                update_edge.assignments[assign.targets[0].id] = astutils.unparse(assign.value)
+        parent.add_edge(loop_tail_state, guard_state, update_edge)
+
+        # Add condition checking edges and connect the guard state.
+        cond_expr = self.loop.loop_condition.code
+        parent.add_edge(guard_state, end_state,
+                        InterstateEdge(CodeBlock(astutils.negate_expr(cond_expr)).code))
+        parent.add_edge(guard_state, internal_start, InterstateEdge(CodeBlock(cond_expr).code))
+
+        # Connect any end states from the loop's internal state machine to the tail state so they end a
+        # loop iteration. Do the same for any continue states.
+        for node in to_connect:
+            parent.add_edge(node, loop_tail_state, InterstateEdge())
+
+        # Remove the original loop.
+        parent.remove_node(self.loop)
diff --git a/tests/sdfg/loop_region_test.py b/tests/sdfg/loop_region_test.py
new file mode 100644
index 0000000000..5742fc12ac
--- /dev/null
+++ b/tests/sdfg/loop_region_test.py
@@ -0,0 +1,172 @@
+# Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved.
+import dace
+import numpy as np
+from dace.sdfg.state import LoopRegion
+
+
+def test_loop_regular_for():
+    sdfg = dace.SDFG('regular_for')
+    state0 = sdfg.add_state('state0', is_start_block=True)
+    loop1 = LoopRegion(label='loop1', condition_expr='i < 10', loop_var='i', initialize_expr='i = 0',
+                       update_expr='i = i + 1', inverted=False)
+    sdfg.add_node(loop1)
+    sdfg.add_symbol('i', dace.int32)
+    sdfg.add_array('A', [10], dace.float32)
+    state1 = loop1.add_state('state1', is_start_block=True)
+    acc_a = state1.add_access('A')
+    t1 = state1.add_tasklet('t1', None, {'a'}, 'a = i')
+    state1.add_edge(t1, 'a', acc_a, None, dace.Memlet('A[i]'))
+    state3 = sdfg.add_state('state3')
+    sdfg.add_edge(state0, loop1, dace.InterstateEdge())
+    sdfg.add_edge(loop1, state3, dace.InterstateEdge())
+
+    assert sdfg.is_valid()
+
+    a_validation = np.zeros([10], dtype=np.float32)
+    a_test = np.zeros([10], dtype=np.float32)
+    sdfg(A=a_test)
+    for i in range(10):
+        a_validation[i] = i
+    assert np.allclose(a_validation, a_test)
+
+
+def test_loop_regular_while():
+    sdfg = dace.SDFG('regular_while')
+    state0 = sdfg.add_state('state0', is_start_block=True)
+    loop1 = LoopRegion(label='loop1', condition_expr='i < 10')
+    sdfg.add_array('A', [10], dace.float32)
+    sdfg.add_node(loop1)
+    state1 = loop1.add_state('state1', is_start_block=True)
+    state2 = loop1.add_state('state2')
+    acc_a = state1.add_access('A')
+    t1 = state1.add_tasklet('t1', None, {'a'}, 'a = i')
+    state1.add_edge(t1, 'a', acc_a, None, dace.Memlet('A[i]'))
+    sdfg.add_symbol('i', dace.int32)
+    loop1.add_edge(state1, state2, dace.InterstateEdge(assignments={'i': 'i + 1'}))
+    state3 = sdfg.add_state('state3')
+    sdfg.add_edge(state0, loop1, dace.InterstateEdge(assignments={'i': '0'}))
+    sdfg.add_edge(loop1, state3, dace.InterstateEdge())
+
+    assert sdfg.is_valid()
+
+    a_validation = np.zeros([10], dtype=np.float32)
+    a_test = np.zeros([10], dtype=np.float32)
+    sdfg(A=a_test)
+    for i in range(10):
+        a_validation[i] = i
+    assert np.allclose(a_validation, a_test)
+
+
+def test_loop_do_while():
+    sdfg = dace.SDFG('do_while')
+    sdfg.add_symbol('i', dace.int32)
+    state0 = sdfg.add_state('state0', is_start_block=True)
+    loop1 = LoopRegion(label='loop1', condition_expr='i < 10', inverted=True)
+    sdfg.add_node(loop1)
+    sdfg.add_array('A', [10], dace.float32)
+    state1 = loop1.add_state('state1', is_start_block=True)
+    state2 = loop1.add_state('state2')
+    acc_a = state1.add_access('A')
+    t1 = state1.add_tasklet('t1', None, {'a'}, 'a = i')
+    state1.add_edge(t1, 'a', acc_a, None, dace.Memlet('A[i]'))
+    loop1.add_edge(state1, state2, dace.InterstateEdge(assignments={'i': 'i + 1'}))
+    state3 = sdfg.add_state('state3')
+    sdfg.add_edge(state0, loop1, dace.InterstateEdge(assignments={'i': '10'}))
+    sdfg.add_edge(loop1, state3, dace.InterstateEdge())
+
+    assert sdfg.is_valid()
+
+    a_validation = np.zeros([11], dtype=np.float32)
+    a_test = np.zeros([11], dtype=np.float32)
+    a_validation[10] = 10
+    sdfg(A=a_test)
+    assert np.allclose(a_validation, a_test)
+
+
+def test_loop_do_for():
+    sdfg = dace.SDFG('do_for')
+    sdfg.add_symbol('i', dace.int32)
+    sdfg.add_array('A', [10], dace.float32)
+    state0 = sdfg.add_state('state0', is_start_block=True)
+    loop1 = LoopRegion(label='loop1', condition_expr='i < 10', loop_var='i', initialize_expr='i = 0',
+                       update_expr='i = i + 1', inverted=True)
+    sdfg.add_node(loop1)
+    state1 = loop1.add_state('state1', is_start_block=True)
+    acc_a = state1.add_access('A')
+    t1 = state1.add_tasklet('t1', None, {'a'}, 'a = i')
+    state1.add_edge(t1, 'a', acc_a, None, dace.Memlet('A[i]'))
+    state2 = loop1.add_state('state2')
+    loop1.add_edge(state1, state2, dace.InterstateEdge())
+    state3 = sdfg.add_state('state3')
+    sdfg.add_edge(state0, loop1, dace.InterstateEdge())
+    sdfg.add_edge(loop1, state3, dace.InterstateEdge())
+
+    assert sdfg.is_valid()
+
+    a_validation = np.zeros([10], dtype=np.float32)
+    a_test = np.zeros([10], dtype=np.float32)
+    sdfg(A=a_test)
+    for i in range(10):
+        a_validation[i] = i
+    assert np.allclose(a_validation, a_test)
+
+
+def test_triple_nested_for():
+    sdfg = dace.SDFG('gemm')
+    sdfg.add_symbol('i', dace.int32)
+    sdfg.add_symbol('j', dace.int32)
+    sdfg.add_symbol('k', dace.int32)
+    N = dace.symbol('N')
+    M = dace.symbol('M')
+    K = dace.symbol('K')
+    sdfg.add_symbol('N', dace.int32)
+    sdfg.add_array('A', [N, K], dace.float32)
+    sdfg.add_array('B', [K, M], dace.float32)
+    sdfg.add_array('C', [N, M], dace.float32)
+    sdfg.add_array('tmp', [N, M, K], dace.float32, transient=True)
+    i_loop = LoopRegion('outer', 'i < N', 'i', 'i = 0', 'i = i + 1')
+    j_loop = LoopRegion('middle', 'j < M', 'j', 'j = 0', 'j = j + 1')
+    k_loop = LoopRegion('inner', 'k < K', 'k', 'k = 0', 'k = k + 1')
+    reduce_state = sdfg.add_state('reduce')
+    sdfg.add_node(i_loop, is_start_block=True)
+    sdfg.add_edge(i_loop, reduce_state, dace.InterstateEdge())
+    i_loop.add_node(j_loop, is_start_block=True)
+    j_loop.add_node(k_loop, is_start_block=True)
+    comp_state = k_loop.add_state('comp', is_start_block=True)
+    anode = comp_state.add_access('A')
+    bnode = comp_state.add_access('B')
+    tmpnode = comp_state.add_access('tmp')
+    tasklet = comp_state.add_tasklet('comp', {'a', 'b'}, {'t'}, 't = a * b')
+    comp_state.add_memlet_path(anode, tasklet, dst_conn='a', memlet=dace.Memlet.simple('A', 'i, k'))
+    comp_state.add_memlet_path(bnode, tasklet, dst_conn='b', memlet=dace.Memlet.simple('B', 'k, j'))
+    comp_state.add_memlet_path(tasklet, tmpnode, src_conn='t', memlet=dace.Memlet.simple('tmp', 'i, j, k'))
+
+    tmpnode2 = reduce_state.add_access('tmp')
+    cnode = reduce_state.add_access('C')
+    red = reduce_state.add_reduce('lambda a, b: a + b', (2,), 0)
+    reduce_state.add_edge(tmpnode2, None, red, None, dace.Memlet.simple('tmp', '0:N, 0:M, 0:K'))
+    reduce_state.add_edge(red, None, cnode, None, dace.Memlet.simple('C', '0:N, 0:M'))
+
+    assert sdfg.is_valid()
+
+    N = 5
+    M = 10
+    K = 8
+    A = np.random.rand(N, K).astype(np.float32)
+    B = np.random.rand(K, M).astype(np.float32)
+    C_test = np.random.rand(N, M).astype(np.float32)
+    C_validation = np.random.rand(N, M).astype(np.float32)
+
+    C_validation = A @ B
+
+    sdfg(A=A, B=B, C=C_test, N=N, M=M, K=K)
+
+    assert np.allclose(C_validation, C_test)
+
+
+if __name__ == '__main__':
+    test_loop_regular_for()
+    test_loop_regular_while()
+    test_loop_do_while()
+    test_loop_do_for()
+    test_triple_nested_for()
diff --git a/tests/transformations/control_flow_inline_test.py b/tests/transformations/control_flow_inline_test.py
new file mode 100644
index 0000000000..106a955143
--- /dev/null
+++ b/tests/transformations/control_flow_inline_test.py
@@ -0,0 +1,295 @@
+# Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved.
+import sympy
+
+import dace
+from dace.sdfg.state import LoopRegion
+from dace.sdfg import utils as sdutils
+
+
+def test_loop_inlining_regular_for():
+    sdfg = dace.SDFG('inlining')
+    state0 = sdfg.add_state('state0', is_start_block=True)
+    loop1 = LoopRegion(label='loop1', condition_expr='i < 10', loop_var='i', initialize_expr='i = 0',
+                       update_expr='i = i + 1', inverted=False)
+    sdfg.add_node(loop1)
+    state1 = loop1.add_state('state1', is_start_block=True)
+    state2 = loop1.add_state('state2')
+    loop1.add_edge(state1, state2, dace.InterstateEdge())
+    state3 = sdfg.add_state('state3')
+    sdfg.add_edge(state0, loop1, dace.InterstateEdge())
+    sdfg.add_edge(loop1, state3, dace.InterstateEdge())
+
+    sdutils.inline_loop_blocks(sdfg)
+
+    states = sdfg.nodes() # Get top-level states only, not all (.states()), in case something went wrong
+    assert len(states) == 8
+    assert state0 in states
+    assert state1 in states
+    assert state2 in states
+    assert state3 in states
+
+
+def test_loop_inlining_regular_while():
+    sdfg = dace.SDFG('inlining')
+    state0 = sdfg.add_state('state0', is_start_block=True)
+    loop1 = LoopRegion(label='loop1', condition_expr='i < 10')
+    sdfg.add_node(loop1)
+    state1 = loop1.add_state('state1', is_start_block=True)
+    state2 = loop1.add_state('state2')
+    loop1.add_edge(state1, state2, dace.InterstateEdge())
+    state3 = sdfg.add_state('state3')
+    sdfg.add_edge(state0, loop1, dace.InterstateEdge())
+    sdfg.add_edge(loop1, state3, dace.InterstateEdge())
+
+    sdutils.inline_loop_blocks(sdfg)
+
+    states = sdfg.nodes() # Get top-level states only, not all (.states()), in case something went wrong
+    guard = None
+    for state in states:
+        if state.label == 'loop1_guard':
+            guard = state
+            break
+    assert guard is not None
+    cond_edges = sdfg.out_edges(guard)
+    assert len(cond_edges) == 2
+    assert cond_edges[0].data.condition_sympy() == sympy.Not(cond_edges[1].data.condition_sympy())
+    assert len(states) == 8
+    assign_edges = sdfg.in_edges(guard)
+    assert len(assign_edges) == 2
+    assert not any(e.data.assignments for e in assign_edges)
+    assert state0 in states
+    assert state1 in states
+    assert state2 in states
+    assert state3 in states
+
+
+def test_loop_inlining_do_while():
+    sdfg = dace.SDFG('inlining')
+    state0 = sdfg.add_state('state0', is_start_block=True)
+    loop1 = LoopRegion(label='loop1', condition_expr='i < 10', inverted=True)
+    sdfg.add_node(loop1)
+    state1 = loop1.add_state('state1', is_start_block=True)
+    state2 = loop1.add_state('state2')
+    loop1.add_edge(state1, state2, dace.InterstateEdge())
+    state3 = sdfg.add_state('state3')
+    sdfg.add_edge(state0, loop1, dace.InterstateEdge())
+    sdfg.add_edge(loop1, state3, dace.InterstateEdge())
+
+    sdutils.inline_loop_blocks(sdfg)
+
+    states = sdfg.nodes() # Get top-level states only, not all (.states()), in case something went wrong
+    guard = None
+    init_state = None
+    for state in states:
+        if state.label == 'loop1_guard':
+            guard = state
+        elif state.label == 'loop1_init':
+            init_state = state
+    assert guard is not None
+    cond_edges = sdfg.out_edges(guard)
+    assert len(cond_edges) == 2
+    assert cond_edges[0].data.condition_sympy() == sympy.Not(cond_edges[1].data.condition_sympy())
+    assert len(states) == 8
+    assign_edges = sdfg.in_edges(guard)
+    assert len(assign_edges) == 1
+    assert not assign_edges[0].data.assignments
+    init_edges = sdfg.out_edges(init_state)
+    assert len(init_edges) == 1
+    assert not init_edges[0].data.assignments
+    assert state0 in states
+    assert state1 in states
+    assert state2 in states
+    assert state3 in states
+
+
+def test_loop_inlining_do_for():
+    sdfg = dace.SDFG('inlining')
+    state0 = sdfg.add_state('state0', is_start_block=True)
+    loop1 = LoopRegion(label='loop1', condition_expr='i < 10', loop_var='i', initialize_expr='i = 0',
+                       update_expr='i = i + 1', inverted=True)
+    sdfg.add_node(loop1)
+    state1 = loop1.add_state('state1', is_start_block=True)
+    state2 = loop1.add_state('state2')
+    loop1.add_edge(state1, state2, dace.InterstateEdge())
+    state3 = sdfg.add_state('state3')
+    sdfg.add_edge(state0, loop1, dace.InterstateEdge())
+    sdfg.add_edge(loop1, state3, dace.InterstateEdge())
+
+    sdutils.inline_loop_blocks(sdfg)
+
+    states = sdfg.nodes() # Get top-level states only, not all (.states()), in case something went wrong
+    guard = None
+    init_state = None
+    for state in states:
+        if state.label == 'loop1_guard':
+            guard = state
+        elif state.label == 'loop1_init':
+            init_state = state
+    assert guard is not None
+    cond_edges = sdfg.out_edges(guard)
+    assert len(cond_edges) == 2
+    assert cond_edges[0].data.condition_sympy() == sympy.Not(cond_edges[1].data.condition_sympy())
+    assert len(states) == 8
+    assign_edges = sdfg.in_edges(guard)
+    assert len(assign_edges) == 1
+    assert assign_edges[0].data.assignments == {'i': '(i + 1)'}
+    init_edges = sdfg.out_edges(init_state)
+    assert len(init_edges) == 1
+    assert init_edges[0].data.assignments == {'i': '0'}
+    assert state0 in states
+    assert state1 in states
+    assert state2 in states
+    assert state3 in states
+
+
+def test_inline_triple_nested_for():
+    sdfg = dace.SDFG('gemm')
+    N = dace.symbol('N')
+    M = dace.symbol('M')
+    K = dace.symbol('K')
+    sdfg.add_symbol('N', dace.int32)
+    sdfg.add_array('A', [N, K], dace.float32)
+    sdfg.add_array('B', [K, M], dace.float32)
+    sdfg.add_array('C', [N, M], dace.float32)
+    sdfg.add_array('tmp', [N, M, K], dace.float32, transient=True)
+    i_loop = LoopRegion('outer', 'i < N', 'i', 'i = 0', 'i = i + 1')
+    j_loop = LoopRegion('middle', 'j < M', 'j', 'j = 0', 'j = j + 1')
+    k_loop = LoopRegion('inner', 'k < K', 'k', 'k = 0', 'k = k + 1')
+    reduce_state = sdfg.add_state('reduce')
+    sdfg.add_node(i_loop, is_start_block=True)
+    sdfg.add_edge(i_loop, reduce_state, dace.InterstateEdge())
+    i_loop.add_node(j_loop, is_start_block=True)
+    j_loop.add_node(k_loop, is_start_block=True)
+    comp_state = k_loop.add_state('comp', is_start_block=True)
+    anode = comp_state.add_access('A')
+    bnode = comp_state.add_access('B')
+    tmpnode = comp_state.add_access('tmp')
+    tasklet = comp_state.add_tasklet('comp', {'a', 'b'}, {'t'}, 't = a * b')
+    comp_state.add_memlet_path(anode, tasklet, dst_conn='a', memlet=dace.Memlet.simple('A', 'i, k'))
+    comp_state.add_memlet_path(bnode, tasklet, dst_conn='b', memlet=dace.Memlet.simple('B', 'k, j'))
+    comp_state.add_memlet_path(tasklet, tmpnode, src_conn='t', memlet=dace.Memlet.simple('tmp', 'i, j, k'))
+
+    tmpnode2 = reduce_state.add_access('tmp')
+    cnode = reduce_state.add_access('C')
+    red = reduce_state.add_reduce('lambda a, b: a + b', (2,), 0)
+    reduce_state.add_edge(tmpnode2, None, red, None, dace.Memlet.simple('tmp', '0:N, 0:M, 0:K'))
+    reduce_state.add_edge(red, None, cnode, None, dace.Memlet.simple('C', '0:N, 0:M'))
+
+    sdutils.inline_loop_blocks(sdfg)
+
+    assert len(sdfg.nodes()) == 14
+    assert not any(isinstance(s, LoopRegion) for s in sdfg.nodes())
+    assert sdfg.is_valid()
+
+
+def test_loop_inlining_for_continue_break():
+    sdfg = dace.SDFG('inlining')
+    state0 = sdfg.add_state('state0', is_start_block=True)
+    loop1 = LoopRegion(label='loop1', condition_expr='i < 10', loop_var='i', initialize_expr='i = 0',
+                       update_expr='i = i + 1', inverted=False)
+    sdfg.add_node(loop1)
+    state1 = loop1.add_state('state1', is_start_block=True)
+    state2 = loop1.add_state('state2')
+    state3 = loop1.add_state('state3')
+    state4 = loop1.add_state('state4')
+    state5 = loop1.add_state('state5')
+    state6 = loop1.add_state('state6')
+    loop1.add_edge(state1, state2, dace.InterstateEdge(condition='i < 5'))
+    loop1.add_edge(state1, state3, dace.InterstateEdge(condition='i >= 5'))
+    loop1.add_edge(state3, state4, dace.InterstateEdge(condition='i < 6'))
+    loop1.add_edge(state3, state5, dace.InterstateEdge(condition='i >= 6'))
+    loop1.add_edge(state5, state6, dace.InterstateEdge())
+    loop1.continue_states = {loop1.node_id(state2)}
+    loop1.break_states = {loop1.node_id(state4)}
+    sdfg.add_edge(state0, loop1, dace.InterstateEdge())
+    state7 = sdfg.add_state('state7')
+    sdfg.add_edge(loop1, state7, dace.InterstateEdge())
+
+    sdutils.inline_loop_blocks(sdfg)
+
+    states = sdfg.nodes() # Get top-level states only, not all (.states()), in case something went wrong
+    assert len(states) == 12
+    assert not any(isinstance(s, LoopRegion) for s in states)
+    end_state = None
+    tail_state = None
+    for state in states:
+        if state.label == 'loop1_end':
+            end_state = state
+        elif state.label == 'loop1_tail':
+            tail_state = state
+    assert end_state is not None
+    assert len(sdfg.edges_between(state4, end_state)) == 1
+    assert len(sdfg.edges_between(state2, tail_state)) == 1
+
+
+def test_loop_inlining_multi_assignments():
+    sdfg = dace.SDFG('inlining')
+    sdfg.add_symbol('j', dace.int32)
+    state0 = sdfg.add_state('state0', is_start_block=True)
+    loop1 = LoopRegion(label='loop1', condition_expr='i < 10', loop_var='i', initialize_expr='i = 0; j = 10 + 200 - 1',
+                       update_expr='i = i + 1; j = j + i', inverted=False)
+    sdfg.add_node(loop1)
+    state1 = loop1.add_state('state1', is_start_block=True)
+    state2 = loop1.add_state('state2')
+    loop1.add_edge(state1, state2, dace.InterstateEdge())
+    state3 = sdfg.add_state('state3')
+    sdfg.add_edge(state0, loop1, dace.InterstateEdge())
+    sdfg.add_edge(loop1, state3, dace.InterstateEdge())
+
+    sdutils.inline_loop_blocks(sdfg)
+
+    states = sdfg.nodes() # Get top-level states only, not all (.states()), in case something went wrong
+    assert len(states) == 8
+    assert state0 in states
+    assert state1 in states
+    assert state2 in states
+    assert state3 in states
+
+    guard_state = None
+    init_state = None
+    tail_state = None
+    for state in sdfg.states():
+        if state.label == 'loop1_guard':
+            guard_state = state
+        elif state.label == 'loop1_init':
+            init_state = state
+        elif state.label == 'loop1_tail':
+            tail_state = state
+    init_edge = sdfg.edges_between(init_state, guard_state)[0]
+    assert 'i' in init_edge.data.assignments
+    assert 'j' in init_edge.data.assignments
+    update_edge = sdfg.edges_between(tail_state, guard_state)[0]
+    assert 'i' in update_edge.data.assignments
+    assert 'j' in update_edge.data.assignments
+
+
+def test_loop_inlining_invalid_update_statement():
+    # Inlining should not be applied here.
+    sdfg = dace.SDFG('inlining')
+    sdfg.add_symbol('j', dace.int32)
+    state0 = sdfg.add_state('state0', is_start_block=True)
+    loop1 = LoopRegion(label='loop1', condition_expr='i < 10', loop_var='i', initialize_expr='i = 0',
+                       update_expr='i = i + 1; j < i', inverted=False)
+    sdfg.add_node(loop1)
+    state1 = loop1.add_state('state1', is_start_block=True)
+    state2 = loop1.add_state('state2')
+    loop1.add_edge(state1, state2, dace.InterstateEdge())
+    state3 = sdfg.add_state('state3')
+    sdfg.add_edge(state0, loop1, dace.InterstateEdge())
+    sdfg.add_edge(loop1, state3, dace.InterstateEdge())
+
+    sdutils.inline_loop_blocks(sdfg)
+
+    nodes = sdfg.nodes()
+    assert len(nodes) == 3
+
+
+if __name__ == '__main__':
+    test_loop_inlining_regular_for()
+    test_loop_inlining_regular_while()
+    test_loop_inlining_do_while()
+    test_loop_inlining_do_for()
+    test_inline_triple_nested_for()
+    test_loop_inlining_for_continue_break()
+    test_loop_inlining_multi_assignments()
+    test_loop_inlining_invalid_update_statement()

From 4b5e2c255becefadf07e70f558952f03a8924fe2 Mon Sep 17 00:00:00 2001
From: Tal Ben-Nun <tbennun@users.noreply.github.com>
Date: Mon, 27 Nov 2023 09:21:31 -0700
Subject: [PATCH 146/163] Fix three issues related to deepcopying elements
 (#1446)

This PR fixes #1439 and #1443 by adapting fields and the deepcopy
operation for states:
1. Skips derived field `parent` being set if a state is deepcopied on
its own
2. Does not add a new field to AST nodes during preprocessing. That
parent-pointing field outlives preprocessing and ends up copying the
entire original AST for short codeblocks.
3. Does not add a new field to states during state propagation.
---
 dace/frontend/python/preprocessing.py     | 11 ++++++-----
 dace/sdfg/propagation.py                  | 11 +++++++----
 dace/sdfg/state.py                        |  9 +++++++++
 dace/transformation/helpers.py            |  1 +
 tests/sdfg/state_test.py                  | 16 ++++++++++++++++
 tests/transformations/loop_to_map_test.py |  1 +
 6 files changed, 40 insertions(+), 9 deletions(-)

diff --git a/dace/frontend/python/preprocessing.py b/dace/frontend/python/preprocessing.py
index 1636e57ad0..90ef506bcd 100644
--- a/dace/frontend/python/preprocessing.py
+++ b/dace/frontend/python/preprocessing.py
@@ -1522,14 +1522,15 @@ def __init__(self, globals: Dict[str, Any]):
         from mpi4py import MPI
         self.globals = globals
         self.MPI = MPI
+        self.parents = {}
         self.parent = None
     
     def visit(self, node):
-        node.parent = self.parent
+        self.parents[node] = self.parent
         self.parent = node
         node = super().visit(node)
         if isinstance(node, ast.AST):
-            self.parent = node.parent
+            self.parent = self.parents[node]
         return node
     
     def visit_Name(self, node: ast.Name) -> Union[ast.Name, ast.Attribute]:
@@ -1540,7 +1541,7 @@ def visit_Name(self, node: ast.Name) -> Union[ast.Name, ast.Attribute]:
                 lattr = ast.Attribute(ast.Name(id='mpi4py', ctx=ast.Load), attr='MPI')
                 if obj is self.MPI.COMM_NULL:
                     newnode = ast.copy_location(ast.Attribute(value=lattr, attr='COMM_NULL'), node)
-                    newnode.parent = node.parent
+                    self.parents[newnode] = self.parents[node]
                     return newnode
         return node
     
@@ -1549,10 +1550,10 @@ def visit_Attribute(self, node: ast.Attribute) -> ast.Attribute:
         if isinstance(node.attr, str) and node.attr == 'Request':
             try:
                 val = astutils.evalnode(node, self.globals)
-                if val is self.MPI.Request and not isinstance(node.parent, ast.Attribute):
+                if val is self.MPI.Request and not isinstance(self.parents[node], ast.Attribute):
                     newnode = ast.copy_location(
                         ast.Attribute(value=ast.Name(id='dace', ctx=ast.Load), attr='MPI_Request'), node)
-                    newnode.parent = node.parent
+                    self.parents[newnode] = self.parents[node]
                     return newnode
             except SyntaxError:
                 pass
diff --git a/dace/sdfg/propagation.py b/dace/sdfg/propagation.py
index 0554775dcd..18c4d7a192 100644
--- a/dace/sdfg/propagation.py
+++ b/dace/sdfg/propagation.py
@@ -565,12 +565,15 @@ def _annotate_loop_ranges(sdfg, unannotated_cycle_states):
 
     :param sdfg: The SDFG in which to look.
     :param unannotated_cycle_states: List of lists. Each sub-list contains the states of one unannotated cycle.
+    :return: A dictionary mapping guard states to their condition edges, if applicable
     """
 
     # We import here to avoid cyclic imports.
     from dace.transformation.interstate.loop_detection import find_for_loop
     from dace.sdfg import utils as sdutils
 
+    condition_edges = {}
+
     for cycle in sdfg.find_cycles():
         # In each cycle, try to identify a valid loop guard state.
         guard = None
@@ -667,7 +670,7 @@ def _annotate_loop_ranges(sdfg, unannotated_cycle_states):
                 for v in loop_states:
                     v.ranges[itervar] = subsets.Range([rng])
                 guard.ranges[itervar] = subsets.Range([rng])
-                guard.condition_edge = sdfg.edges_between(guard, begin)[0]
+                condition_edges[guard] = sdfg.edges_between(guard, begin)[0]
                 guard.is_loop_guard = True
                 guard.itvar = itervar
         else:
@@ -675,6 +678,7 @@ def _annotate_loop_ranges(sdfg, unannotated_cycle_states):
             # dynamically unbounded.
             unannotated_cycle_states.append(cycle)
 
+    return condition_edges
 
 def propagate_states(sdfg, concretize_dynamic_unbounded=False) -> None:
     """
@@ -760,7 +764,7 @@ def propagate_states(sdfg, concretize_dynamic_unbounded=False) -> None:
     # Find any valid for loop constructs and annotate the loop ranges. Any other
     # cycle should be marked as unannotated.
     unannotated_cycle_states = []
-    _annotate_loop_ranges(sdfg, unannotated_cycle_states)
+    condition_edges = _annotate_loop_ranges(sdfg, unannotated_cycle_states)
     if not concretize_dynamic_unbounded:
         # Flatten the list. This keeps the old behavior of propagate_states.
         unannotated_cycle_states = [state for cycle in unannotated_cycle_states for state in cycle]
@@ -869,7 +873,7 @@ def propagate_states(sdfg, concretize_dynamic_unbounded=False) -> None:
                                               (outer_itvar, 0, ceiling((outer_stop - outer_start) / outer_stride)))
                     loop_executions = loop_executions.doit()
 
-                    loop_state = state.condition_edge.dst
+                    loop_state = condition_edges[state].dst
                     end_state = (out_edges[0].dst if out_edges[1].dst == loop_state else out_edges[1].dst)
 
                     traversal_q.append((end_state, state.executions, proposed_dynamic, itvar_stack))
@@ -1142,7 +1146,6 @@ def reset_state_annotations(sdfg):
         state.executions = 0
         state.dynamic_executions = True
         state.ranges = {}
-        state.condition_edge = None
         state.is_loop_guard = False
         state.itervar = None
 
diff --git a/dace/sdfg/state.py b/dace/sdfg/state.py
index ccc30df6ca..7f816abdbb 100644
--- a/dace/sdfg/state.py
+++ b/dace/sdfg/state.py
@@ -1197,7 +1197,16 @@ def __deepcopy__(self, memo):
         result = cls.__new__(cls)
         memo[id(self)] = result
         for k, v in self.__dict__.items():
+            if k == '_parent':  # Skip derivative attributes
+                continue
             setattr(result, k, copy.deepcopy(v, memo))
+
+        for k in ('_parent',):
+            if id(getattr(self, k)) in memo:
+                setattr(result, k, memo[id(getattr(self, k))])
+            else:
+                setattr(result, k, None)
+
         for node in result.nodes():
             if isinstance(node, nd.NestedSDFG):
                 try:
diff --git a/dace/transformation/helpers.py b/dace/transformation/helpers.py
index 9c41e4dec4..b6e7d80b3d 100644
--- a/dace/transformation/helpers.py
+++ b/dace/transformation/helpers.py
@@ -205,6 +205,7 @@ def _copy_state(sdfg: SDFG,
 
     state_copy = copy.deepcopy(state)
     state_copy._label += '_copy'
+    state_copy.parent = sdfg
     sdfg.add_node(state_copy)
 
     in_conditions = []
diff --git a/tests/sdfg/state_test.py b/tests/sdfg/state_test.py
index 48dea04d0b..eb4e97ba66 100644
--- a/tests/sdfg/state_test.py
+++ b/tests/sdfg/state_test.py
@@ -1,5 +1,6 @@
 # Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved.
 import dace
+from dace.transformation.helpers import find_sdfg_control_flow
 
 
 def test_read_write_set():
@@ -42,7 +43,22 @@ def test_read_write_set_y_formation():
 
     assert 'B' not in state.read_and_write_sets()[0]
 
+def test_deepcopy_state():
+    N = dace.symbol('N')
+
+    @dace.program
+    def double_loop(arr: dace.float32[N]):
+        for i in range(N):
+            arr[i] *= 2
+        for i in range(N):
+            arr[i] *= 2
+
+    sdfg = double_loop.to_sdfg()
+    find_sdfg_control_flow(sdfg)
+    sdfg.validate()
+
 
 if __name__ == '__main__':
     test_read_write_set()
     test_read_write_set_y_formation()
+    test_deepcopy_state()
diff --git a/tests/transformations/loop_to_map_test.py b/tests/transformations/loop_to_map_test.py
index 73a0bfc4cc..13abe83434 100644
--- a/tests/transformations/loop_to_map_test.py
+++ b/tests/transformations/loop_to_map_test.py
@@ -757,3 +757,4 @@ def internal_write(inp0: dace.int32[10], inp1: dace.int32[10], out: dace.int32[1
     test_thread_local_transient_multi_state()
     test_nested_loops()
     test_internal_write()
+    test_specialize()

From ec456e66e5851cf89258b7729c9921c8f105f135 Mon Sep 17 00:00:00 2001
From: Tal Ben-Nun <tbennun@gmail.com>
Date: Mon, 27 Nov 2023 08:25:36 -0800
Subject: [PATCH 147/163] Merge fix

---
 dace/sdfg/state.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dace/sdfg/state.py b/dace/sdfg/state.py
index 7f816abdbb..64f47b14bf 100644
--- a/dace/sdfg/state.py
+++ b/dace/sdfg/state.py
@@ -1197,11 +1197,11 @@ def __deepcopy__(self, memo):
         result = cls.__new__(cls)
         memo[id(self)] = result
         for k, v in self.__dict__.items():
-            if k == '_parent':  # Skip derivative attributes
+            if k in ('_parent_graph', '_sdfg'):  # Skip derivative attributes
                 continue
             setattr(result, k, copy.deepcopy(v, memo))
 
-        for k in ('_parent',):
+        for k in ('_parent_graph', '_sdfg'):
             if id(getattr(self, k)) in memo:
                 setattr(result, k, memo[id(getattr(self, k))])
             else:

From cfa0871238f8a364f14ef014ea82653b9f132df4 Mon Sep 17 00:00:00 2001
From: Tal Ben-Nun <tbennun@users.noreply.github.com>
Date: Mon, 27 Nov 2023 23:37:39 -0700
Subject: [PATCH 148/163] Fix CUDA high-dimensional test (#1441)

Fixes invalid ranges used in a test.

Opened following #1337
---
 tests/cuda_highdim_kernel_test.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/cuda_highdim_kernel_test.py b/tests/cuda_highdim_kernel_test.py
index 8a3dade4e5..b9dd36877e 100644
--- a/tests/cuda_highdim_kernel_test.py
+++ b/tests/cuda_highdim_kernel_test.py
@@ -22,7 +22,7 @@ def highdim(A: dace.uint64[N, M, K, L, X, Y, Z, W, U], B: dace.uint64[N, M, K, L
     @dace.mapscope
     def kernel(i: _[5:N - 5], j: _[0:M], k: _[7:K - 1], l: _[0:L]):
         @dace.map
-        def block(a: _[0:X], b: _[0:Y], c: _[1:Z], d: _[2:W - 2], e: _[0:U]):
+        def block(a: _[0:X], b: _[0:Y], c: _[1:Z], d: _[2:W - 1], e: _[0:U]):
             input << A[i, j, k, l, a, b, c, d, e]
             output >> B(1, lambda a, b: a + b)[i, j, k, l]
             output = input
@@ -31,7 +31,7 @@ def block(a: _[0:X], b: _[0:Y], c: _[1:Z], d: _[2:W - 2], e: _[0:U]):
 def makendrange(*args):
     result = []
     for i in range(0, len(args), 2):
-        result.append(slice(args[i], args[i + 1] - 1, 1))
+        result.append(slice(args[i], args[i + 1], 1))
     return result
 
 
@@ -58,7 +58,7 @@ def _test(sdfg):
 
     # Equivalent python code
     for i, j, k, l in dace.ndrange(makendrange(5, N - 5, 0, M, 7, K - 1, 0, L)):
-        for a, b, c, d, e in dace.ndrange(makendrange(0, X, 0, Y, 1, Z, 2, W - 2, 0, U)):
+        for a, b, c, d, e in dace.ndrange(makendrange(0, X, 0, Y, 1, Z, 2, W - 1, 0, U)):
             B_regression[i, j, k, l] += A[i, j, k, l, a, b, c, d, e]
 
     sdfg(A=A, B=B, N=N, M=M, K=K, L=L, X=X, Y=Y, Z=Z, W=W, U=U)

From 54e6860b0920c6b670d70d49621753dd2b1dbb27 Mon Sep 17 00:00:00 2001
From: Florian Deconinck <deconinck.florian@gmail.com>
Date: Tue, 28 Nov 2023 13:47:42 -0500
Subject: [PATCH 149/163] Compress the SDFG generated when failing/invalid for
 larger codebase (#1456)

Relates to #1453
---
 dace/sdfg/sdfg.py       | 4 ++--
 dace/sdfg/validation.py | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/dace/sdfg/sdfg.py b/dace/sdfg/sdfg.py
index 07267ec786..e89c167184 100644
--- a/dace/sdfg/sdfg.py
+++ b/dace/sdfg/sdfg.py
@@ -2191,8 +2191,8 @@ def compile(self, output_file=None, validate=True) -> 'CompiledSDFG':
                 # Generate code for the program by traversing the SDFG state by state
                 program_objects = codegen.generate_code(sdfg, validate=validate)
             except Exception:
-                fpath = os.path.join('_dacegraphs', 'failing.sdfg')
-                self.save(fpath)
+                fpath = os.path.join('_dacegraphs', 'failing.sdfgz')
+                self.save(fpath, compress=True)
                 print(f'Failing SDFG saved for inspection in {os.path.abspath(fpath)}')
                 raise
 
diff --git a/dace/sdfg/validation.py b/dace/sdfg/validation.py
index 45d38e33e2..a3914494c3 100644
--- a/dace/sdfg/validation.py
+++ b/dace/sdfg/validation.py
@@ -264,8 +264,8 @@ def validate_sdfg(sdfg: 'dace.sdfg.SDFG', references: Set[int] = None, **context
         validate_control_flow_region(sdfg, sdfg, initialized_transients, symbols, references, **context)
     except InvalidSDFGError as ex:
         # If the SDFG is invalid, save it
-        fpath = os.path.join('_dacegraphs', 'invalid.sdfg')
-        sdfg.save(fpath, exception=ex)
+        fpath = os.path.join('_dacegraphs', 'invalid.sdfgz')
+        sdfg.save(fpath, exception=ex, compress=True)
         ex.path = fpath
         raise
 

From 16c569b8bb71b0e3dfbe87206c975dc83427beca Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Philip=20M=C3=BCller?=
 <147368808+philip-paul-mueller@users.noreply.github.com>
Date: Wed, 29 Nov 2023 16:35:00 +0100
Subject: [PATCH 150/163] `SDFG.arg_names` was not a member but a class
 variable. (#1457)

The member variable `arg_names` was not created in the constructor. Thus
a class variable was created instead which was shared among all
instances.
---
 dace/sdfg/sdfg.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/dace/sdfg/sdfg.py b/dace/sdfg/sdfg.py
index e89c167184..020fb9dbab 100644
--- a/dace/sdfg/sdfg.py
+++ b/dace/sdfg/sdfg.py
@@ -500,6 +500,7 @@ def __init__(self,
         self._parent_nsdfg_node = None
         self._sdfg_list = [self]
         self._arrays = NestedDict()  # type: Dict[str, dt.Array]
+        self.arg_names = []
         self._labels: Set[str] = set()
         self.global_code = {'frame': CodeBlock("", dtypes.Language.CPP)}
         self.init_code = {'frame': CodeBlock("", dtypes.Language.CPP)}

From edbf49f2339e487d2cabac112bce011dce580dc6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lukas=20Tr=C3=BCmper?= <lukas.truemper@outlook.de>
Date: Wed, 29 Nov 2023 20:27:20 +0100
Subject: [PATCH 151/163] PruneConnectors: Fission into separate states before
 pruning (#1451)

The PruneConnectors transformation currently avoids pruning connectors
of access nodes which are connected to other nodes again. Fissioning
first, pruning, and then fusing states simplifies the whole problem,
because we can simply use the analysis implemented in StateFusion
---
 .../dataflow/prune_connectors.py              | 87 +++++++++++--------
 .../npbench/polybench/floyd_warshall_test.py  | 14 ++-
 .../transformations/prune_connectors_test.py  | 82 +++++++++++++++++
 3 files changed, 146 insertions(+), 37 deletions(-)

diff --git a/dace/transformation/dataflow/prune_connectors.py b/dace/transformation/dataflow/prune_connectors.py
index ecc89bc753..865f28f7d9 100644
--- a/dace/transformation/dataflow/prune_connectors.py
+++ b/dace/transformation/dataflow/prune_connectors.py
@@ -1,12 +1,12 @@
 # Copyright 2019-2021 ETH Zurich and the DaCe authors. All rights reserved.
-from os import stat
-from typing import Any, AnyStr, Dict, Optional, Set, Tuple, Union
+from typing import Set, Tuple
 import re
 
-from dace import dtypes, registry, SDFG, SDFGState, symbolic, properties, data as dt
+from dace import dtypes, SDFG, SDFGState, symbolic, properties, data as dt
 from dace.transformation import transformation as pm, helpers
 from dace.sdfg import nodes, utils
 from dace.sdfg.analysis import cfg
+from dace.sdfg.state import StateSubgraphView
 
 
 @properties.make_properties
@@ -46,23 +46,52 @@ def can_be_applied(self, graph: SDFGState, expr_index: int, sdfg: SDFG, permissi
         # Add WCR outputs to "do not prune" input list
         for e in graph.out_edges(nsdfg):
             if e.data.wcr is not None and e.src_conn in prune_in:
-                if (graph.in_degree(next(iter(graph.in_edges_by_connector(nsdfg, e.src_conn))).src) > 0):
-                    prune_in.remove(e.src_conn)
-        has_before = all(
-            graph.in_degree(graph.memlet_path(e)[0].src) > 0 for e in graph.in_edges(nsdfg) if e.dst_conn in prune_in)
-        has_after = all(
-            graph.out_degree(graph.memlet_path(e)[-1].dst) > 0 for e in graph.out_edges(nsdfg)
-            if e.src_conn in prune_out)
-        if has_before and has_after:
+                prune_in.remove(e.src_conn)
+
+        if not prune_in and not prune_out:
             return False
-        if len(prune_in) > 0 or len(prune_out) > 0:
-            return True
 
-        return False
+        return True
 
     def apply(self, state: SDFGState, sdfg: SDFG):
         nsdfg = self.nsdfg
 
+        # Fission subgraph around nsdfg into its own state to avoid data races
+        predecessors = set()
+        for inedge in state.in_edges(nsdfg):
+            if inedge.data is None:
+                continue
+
+            pred = state.memlet_path(inedge)[0].src
+            if state.in_degree(pred) == 0:
+                continue
+
+            predecessors.add(pred)
+            for e in state.bfs_edges(pred, reverse=True):
+                predecessors.add(e.src)
+
+        subgraph = StateSubgraphView(state, predecessors)
+        pred_state = helpers.state_fission(sdfg, subgraph)
+
+        subgraph_nodes = set()
+        subgraph_nodes.add(nsdfg)
+        for inedge in state.in_edges(nsdfg):
+            if inedge.data is None:
+                continue
+            path = state.memlet_path(inedge)
+            for edge in path:
+                subgraph_nodes.add(edge.src)
+
+        for oedge in state.out_edges(nsdfg):
+            if oedge.data is None:
+                continue
+            path = state.memlet_path(oedge)
+            for edge in path:
+                subgraph_nodes.add(edge.dst)
+
+        subgraph = StateSubgraphView(state, subgraph_nodes)
+        nsdfg_state = helpers.state_fission(sdfg, subgraph)
+
         read_set, write_set = nsdfg.sdfg.read_and_write_sets()
         prune_in = nsdfg.in_connectors.keys() - read_set
         prune_out = nsdfg.out_connectors.keys() - write_set
@@ -70,36 +99,26 @@ def apply(self, state: SDFGState, sdfg: SDFG):
         # Detect which nodes are used, so we can delete unused nodes after the
         # connectors have been pruned
         all_data_used = read_set | write_set
+
         # Add WCR outputs to "do not prune" input list
-        for e in state.out_edges(nsdfg):
+        for e in nsdfg_state.out_edges(nsdfg):
             if e.data.wcr is not None and e.src_conn in prune_in:
-                if (state.in_degree(next(iter(state.in_edges_by_connector(nsdfg, e.src_conn))).src) > 0):
-                    prune_in.remove(e.src_conn)
-        do_not_prune = set()
+                prune_in.remove(e.src_conn)
+
         for conn in prune_in:
-            if any(
-                    state.in_degree(state.memlet_path(e)[0].src) > 0 for e in state.in_edges(nsdfg)
-                    if e.dst_conn == conn):
-                do_not_prune.add(conn)
-                continue
-            for e in state.in_edges_by_connector(nsdfg, conn):
-                state.remove_memlet_path(e, remove_orphans=True)
+            for e in nsdfg_state.in_edges_by_connector(nsdfg, conn):
+                nsdfg_state.remove_memlet_path(e, remove_orphans=True)
 
         for conn in prune_out:
-            if any(
-                    state.out_degree(state.memlet_path(e)[-1].dst) > 0 for e in state.out_edges(nsdfg)
-                    if e.src_conn == conn):
-                do_not_prune.add(conn)
-                continue
-            for e in state.out_edges_by_connector(nsdfg, conn):
-                state.remove_memlet_path(e, remove_orphans=True)
+            for e in nsdfg_state.out_edges_by_connector(nsdfg, conn):
+                nsdfg_state.remove_memlet_path(e, remove_orphans=True)
 
         for conn in prune_in:
-            if conn in nsdfg.sdfg.arrays and conn not in all_data_used and conn not in do_not_prune:
+            if conn in nsdfg.sdfg.arrays and conn not in all_data_used:
                 # If the data is now unused, we can purge it from the SDFG
                 nsdfg.sdfg.remove_data(conn)
         for conn in prune_out:
-            if conn in nsdfg.sdfg.arrays and conn not in all_data_used and conn not in do_not_prune:
+            if conn in nsdfg.sdfg.arrays and conn not in all_data_used:
                 # If the data is now unused, we can purge it from the SDFG
                 nsdfg.sdfg.remove_data(conn)
 
diff --git a/tests/npbench/polybench/floyd_warshall_test.py b/tests/npbench/polybench/floyd_warshall_test.py
index a95a417a19..7bd1e3d91d 100644
--- a/tests/npbench/polybench/floyd_warshall_test.py
+++ b/tests/npbench/polybench/floyd_warshall_test.py
@@ -7,7 +7,7 @@
 import pytest
 import argparse
 from dace.fpga_testing import fpga_test
-from dace.transformation.interstate import FPGATransformSDFG, InlineSDFG
+from dace.transformation.interstate import FPGATransformSDFG, InlineSDFG, StateFusion
 from dace.transformation.dataflow import StreamingMemory, MapFusion, StreamingComposition, PruneConnectors
 from dace.transformation.auto.auto_optimize import auto_optimize, fpga_auto_opt
 
@@ -91,15 +91,23 @@ def run_floyd_warshall(device_type: dace.dtypes.DeviceType):
                                                            }])
 
         assert pruned_conns == 1
+        sdfg.apply_transformations_repeated(StateFusion)
 
         fpga_auto_opt.fpga_rr_interleave_containers_to_banks(sdfg)
 
         # In this case, we want to generate the top-level state as an host-based state,
         # not an FPGA kernel. We need to explicitly indicate that
-        sdfg.states()[0].location["is_FPGA_kernel"] = False
+        for state in sdfg.states():
+            if any([isinstance(node, dace.nodes.NestedSDFG) for node in state.nodes()]):
+                state.location["is_FPGA_kernel"] = False
+
         # we need to specialize both the top-level SDFG and the nested SDFG
         sdfg.specialize(dict(N=N))
-        sdfg.states()[0].nodes()[0].sdfg.specialize(dict(N=N))
+        for state in sdfg.states():
+            for node in state.nodes():
+                if isinstance(node, dace.nodes.NestedSDFG):
+                    node.sdfg.specialize(dict(N=N))
+
         # run program
         sdfg(path=path)
 
diff --git a/tests/transformations/prune_connectors_test.py b/tests/transformations/prune_connectors_test.py
index 1b9ee4369d..e9c7e34a83 100644
--- a/tests/transformations/prune_connectors_test.py
+++ b/tests/transformations/prune_connectors_test.py
@@ -2,9 +2,12 @@
 import argparse
 import numpy as np
 import os
+import copy
 import pytest
 import dace
 from dace.transformation.dataflow import PruneConnectors
+from dace.transformation.helpers import nest_state_subgraph
+from dace.sdfg.state import StateSubgraphView
 
 
 def make_sdfg():
@@ -237,6 +240,84 @@ def test_unused_retval_2():
     assert np.allclose(a, 1)
 
 
+def test_prune_connectors_with_dependencies():
+    sdfg = dace.SDFG('tester')
+    A, A_desc = sdfg.add_array('A', [4], dace.float64)
+    B, B_desc = sdfg.add_array('B', [4], dace.float64)
+    C, C_desc = sdfg.add_array('C', [4], dace.float64)
+    D, D_desc = sdfg.add_array('D', [4], dace.float64)
+
+    state = sdfg.add_state()
+    a = state.add_access("A")
+    b1 = state.add_access("B")
+    b2 = state.add_access("B")
+    c1 = state.add_access("C")
+    c2 = state.add_access("C")
+    d = state.add_access("D")
+
+    _, map_entry_a, map_exit_a = state.add_mapped_tasklet("a",
+                                                          map_ranges={"i": "0:4"},
+                                                          inputs={"_in": dace.Memlet(data="A", subset='i')},
+                                                          outputs={"_out": dace.Memlet(data="B", subset='i')},
+                                                          code="_out = _in + 1")
+    state.add_edge(a, None, map_entry_a, None, dace.Memlet(data="A", subset="0:4"))
+    state.add_edge(map_exit_a, None, b1, None, dace.Memlet(data="B", subset="0:4"))
+
+    tasklet_c, map_entry_c, map_exit_c = state.add_mapped_tasklet("c",
+                                                                  map_ranges={"i": "0:4"},
+                                                                  inputs={"_in": dace.Memlet(data="C", subset='i')},
+                                                                  outputs={"_out": dace.Memlet(data="C", subset='i')},
+                                                                  code="_out = _in + 1")
+    state.add_edge(c1, None, map_entry_c, None, dace.Memlet(data="C", subset="0:4"))
+    state.add_edge(map_exit_c, None, c2, None, dace.Memlet(data="C", subset="0:4"))
+
+    _, map_entry_d, map_exit_d = state.add_mapped_tasklet("d",
+                                                          map_ranges={"i": "0:4"},
+                                                          inputs={"_in": dace.Memlet(data="B", subset='i')},
+                                                          outputs={"_out": dace.Memlet(data="D", subset='i')},
+                                                          code="_out = _in + 1")
+    state.add_edge(b2, None, map_entry_d, None, dace.Memlet(data="B", subset="0:4"))
+    state.add_edge(map_exit_d, None, d, None, dace.Memlet(data="D", subset="0:4"))
+
+    sdfg.fill_scope_connectors()
+
+    subgraph = StateSubgraphView(state, subgraph_nodes=[map_entry_c, map_exit_c, tasklet_c])
+    nsdfg_node = nest_state_subgraph(sdfg, state, subgraph=subgraph)
+
+    nsdfg_node.sdfg.add_datadesc("B1", datadesc=copy.deepcopy(B_desc))
+    nsdfg_node.sdfg.arrays["B1"].transient = False
+    nsdfg_node.sdfg.add_datadesc("B2", datadesc=copy.deepcopy(B_desc))
+    nsdfg_node.sdfg.arrays["B2"].transient = False
+
+    nsdfg_node.add_in_connector("B1")
+    state.add_edge(b1, None, nsdfg_node, "B1", dace.Memlet.from_array(dataname="B", datadesc=B_desc))
+    nsdfg_node.add_out_connector("B2")
+    state.add_edge(nsdfg_node, "B2", b2, None, dace.Memlet.from_array(dataname="B", datadesc=B_desc))
+
+    np_a = np.random.random(4)
+    np_a_ = np.copy(np_a)
+    np_b = np.random.random(4)
+    np_b_ = np.copy(np_b)
+    np_c = np.random.random(4)
+    np_c_ = np.copy(np_c)
+    np_d = np.random.random(4)
+    np_d_ = np.copy(np_d)
+
+    sdfg(A=np_a, B=np_b, C=np_c, D=np_d)
+
+    applied = sdfg.apply_transformations_repeated(PruneConnectors)
+    assert applied == 1
+    assert len(sdfg.states()) == 3
+    assert "B1" not in nsdfg_node.in_connectors
+    assert "B2" not in nsdfg_node.out_connectors
+
+    sdfg(A=np_a_, B=np_b_, C=np_c_, D=np_d_)
+    assert np.allclose(np_a, np_a_)
+    assert np.allclose(np_b, np_b_)
+    assert np.allclose(np_c, np_c_)
+    assert np.allclose(np_d, np_d_)
+
+
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
     parser.add_argument("--N", default=64)
@@ -248,3 +329,4 @@ def test_unused_retval_2():
     test_prune_connectors(True, n=n)
     test_unused_retval()
     test_unused_retval_2()
+    test_prune_connectors_with_dependencies()

From 79cf2ff96d58766327587eddcc9bb7e10fb702a6 Mon Sep 17 00:00:00 2001
From: alexnick83 <31545860+alexnick83@users.noreply.github.com>
Date: Thu, 30 Nov 2023 16:45:44 +0100
Subject: [PATCH 152/163] In-out connector's global source when connector
 becomes out-only at outer SDFG scopes. (#1463)

Adds utility-method support for the case of an in-out nested SDFG
connector that is out-only at outer SDFG scopes.
---
 dace/sdfg/utils.py                        |  5 +++
 tests/sdfg/validation/nested_sdfg_test.py | 48 +++++++++++++++++++++--
 2 files changed, 50 insertions(+), 3 deletions(-)

diff --git a/dace/sdfg/utils.py b/dace/sdfg/utils.py
index d0f1a67ab9..1405901802 100644
--- a/dace/sdfg/utils.py
+++ b/dace/sdfg/utils.py
@@ -1847,6 +1847,11 @@ def get_global_memlet_path_src(sdfg: SDFG, state: SDFGState, edge: MultiConnecto
         if len(pedges) > 0:
             pedge = pedges[0]
             return get_global_memlet_path_src(psdfg, pstate, pedge)
+        else:
+            pedges = list(pstate.out_edges_by_connector(pnode, src.data))
+            if len(pedges) > 0:
+                pedge = pedges[0]
+                return get_global_memlet_path_dst(psdfg, pstate, pedge)
     return src
 
 
diff --git a/tests/sdfg/validation/nested_sdfg_test.py b/tests/sdfg/validation/nested_sdfg_test.py
index 100568507e..67ed8ab2a8 100644
--- a/tests/sdfg/validation/nested_sdfg_test.py
+++ b/tests/sdfg/validation/nested_sdfg_test.py
@@ -1,5 +1,6 @@
 # Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved.
 import dace
+import numpy as np
 
 
 def test_inout_connector_validation_success():
@@ -33,6 +34,48 @@ def test_inout_connector_validation_success():
     return
 
 
+def test_inout_connector_validation_success_2():
+
+    sdfg = dace.SDFG("test_inout_connector_validation_success_2")
+    sdfg.add_array("A", [1], dace.int32)
+
+    nsdfg_0 = dace.SDFG("nested_sdfg_0")
+    nsdfg_0.add_array("B", [1], dace.int32)
+
+    nsdfg_1 = dace.SDFG("nested_sdfg_1")
+    nsdfg_1.add_array("C", [1], dace.int32)
+
+    nstate = nsdfg_1.add_state()
+    read_c = nstate.add_access("C")
+    write_c = nstate.add_access("C")
+    tasklet = nstate.add_tasklet("tasklet", {"__inp"}, {"__out"}, "__out = __inp + 5")
+    nstate.add_edge(read_c, None, tasklet, '__inp', dace.Memlet.from_array('C', nsdfg_1.arrays['C']))
+    nstate.add_edge(tasklet, '__out', write_c, None, dace.Memlet.from_array('C', nsdfg_1.arrays['C']))
+
+    nstate = nsdfg_0.add_state()
+    tasklet_0 = nstate.add_tasklet("tasklet_00", {}, {"__out"}, "__out = 3")
+    write_b_0 = nstate.add_access("B")
+    tasklet_1 = nstate.add_nested_sdfg(nsdfg_1, nsdfg_0, {"C"}, {"C"})
+    write_b_1 = nstate.add_access("B")
+    nstate.add_edge(tasklet_0, '__out', write_b_0, None, dace.Memlet.from_array('B', nsdfg_0.arrays['B']))
+    nstate.add_edge(write_b_0, None, tasklet_1, 'C', dace.Memlet.from_array('B', nsdfg_0.arrays['B']))
+    nstate.add_edge(tasklet_1, 'C', write_b_1, None, dace.Memlet.from_array('B', nsdfg_0.arrays['B']))
+
+    state = sdfg.add_state()
+    tasklet = state.add_nested_sdfg(nsdfg_0, sdfg, {}, {"B"})
+    write_a = state.add_access("A")
+    state.add_edge(tasklet, 'B', write_a, None, dace.Memlet.from_array('A', sdfg.arrays['A']))
+
+    try:
+        sdfg.validate()
+    except dace.sdfg.InvalidSDFGError:
+        assert False, "SDFG should validate"
+
+    A = np.array([1], dtype=np.int32)
+    sdfg(A=A)
+    assert A[0] == 8
+
+
 def test_inout_connector_validation_fail():
 
     sdfg = dace.SDFG("test_inout_connector_validation_fail")
@@ -79,7 +122,6 @@ def mystate(state, src, dst):
         # output path (tasklet[b]->dst)
         state.add_memlet_path(tasklet, dst_node, src_conn='b', memlet=dace.Memlet(data=dst, subset='0'))
 
-
     sub_sdfg = dace.SDFG('nested_sub')
     sub_sdfg.add_scalar('sA', dace.float32)
     sub_sdfg.add_scalar('sB', dace.float32, transient=True)
@@ -92,7 +134,6 @@ def mystate(state, src, dst):
 
     sub_sdfg.add_edge(state0, state1, dace.InterstateEdge())
 
-
     state = sdfg.add_state('s0')
     me, mx = state.add_map('mymap', dict(k='0:2'))
     nsdfg = state.add_nested_sdfg(sub_sdfg, sdfg, {'sA'}, {'sC'})
@@ -101,7 +142,7 @@ def mystate(state, src, dst):
 
     state.add_memlet_path(Ain, me, nsdfg, memlet=dace.Memlet(data='A', subset='k'), dst_conn='sA')
     state.add_memlet_path(nsdfg, mx, Aout, memlet=dace.Memlet(data='A', subset='k'), src_conn='sC')
- 
+
     try:
         sdfg.validate()
     except dace.sdfg.InvalidSDFGError:
@@ -112,5 +153,6 @@ def mystate(state, src, dst):
 
 if __name__ == "__main__":
     test_inout_connector_validation_success()
+    test_inout_connector_validation_success_2()
     test_inout_connector_validation_fail()
     test_nested_sdfg_with_transient_connector()

From b0cd25b9263a3c615ee2f3325167944628fbfde5 Mon Sep 17 00:00:00 2001
From: Tal Ben-Nun <tbennun@users.noreply.github.com>
Date: Mon, 4 Dec 2023 00:40:07 -0800
Subject: [PATCH 153/163] Fix two regressions in v0.15 (#1465)

* Schedule tree: Fix support for empty memlets and array use in
interstate edges
* Move clearing local scope to tasklet processing due to shift in call
stacks in v0.15 that may skip said clearing
---
 dace/codegen/targets/cpu.py                   |  1 +
 .../analysis/schedule_tree/sdfg_to_tree.py    |  7 +++-
 tests/codegen/unparse_tasklet_test.py         | 40 +++++++++++++++++++
 3 files changed, 47 insertions(+), 1 deletion(-)

diff --git a/dace/codegen/targets/cpu.py b/dace/codegen/targets/cpu.py
index 72ca554a4a..3944d05b09 100644
--- a/dace/codegen/targets/cpu.py
+++ b/dace/codegen/targets/cpu.py
@@ -1460,6 +1460,7 @@ def _generate_Tasklet(self, sdfg, dfg, state_id, node, function_stream, callsite
         callsite_stream.write('}', sdfg, state_id, node)
         callsite_stream.write(outer_stream_end.getvalue(), sdfg, state_id, node)
 
+        self._locals.clear_scope(self._ldepth + 1)
         self._dispatcher.defined_vars.exit_scope(node)
 
     def unparse_tasklet(self, sdfg, state_id, dfg, node, function_stream, inner_stream, locals, ldepth,
diff --git a/dace/sdfg/analysis/schedule_tree/sdfg_to_tree.py b/dace/sdfg/analysis/schedule_tree/sdfg_to_tree.py
index a519f24596..78b2280902 100644
--- a/dace/sdfg/analysis/schedule_tree/sdfg_to_tree.py
+++ b/dace/sdfg/analysis/schedule_tree/sdfg_to_tree.py
@@ -88,6 +88,8 @@ def dealias_sdfg(sdfg: SDFG):
                     nsdfg.arrays[name] = child_arr
                 for state in nsdfg.states():
                     for e in state.edges():
+                        if e.data.is_empty():
+                            continue
                         if not state.is_leaf_memlet(e):
                             continue
 
@@ -129,7 +131,10 @@ def dealias_sdfg(sdfg: SDFG):
                                 syms.remove(memlet.data)
                     for s in syms:
                         if s in parent_edges:
-                            repl_dict[s] = str(parent_edges[s].data)
+                            if s in nsdfg.arrays:
+                                repl_dict[s] = parent_edges[s].data.data
+                            else:
+                                repl_dict[s] = str(parent_edges[s].data)
                     e.data.replace_dict(repl_dict)
                 for name in child_names:
                     edge = parent_edges[name]
diff --git a/tests/codegen/unparse_tasklet_test.py b/tests/codegen/unparse_tasklet_test.py
index 5281c109ba..2ed2bd494b 100644
--- a/tests/codegen/unparse_tasklet_test.py
+++ b/tests/codegen/unparse_tasklet_test.py
@@ -1,9 +1,11 @@
 # Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved.
 import dace
 import numpy as np
+import pytest
 
 
 def test_integer_power():
+
     @dace.program
     def powint(A: dace.float64[20], B: dace.float64[20]):
         for i in dace.map[0:20]:
@@ -20,6 +22,7 @@ def powint(A: dace.float64[20], B: dace.float64[20]):
 
 
 def test_integer_power_constant():
+
     @dace.program
     def powint(A: dace.float64[20]):
         for i in dace.map[0:20]:
@@ -35,6 +38,7 @@ def powint(A: dace.float64[20]):
 
 
 def test_equality():
+
     @dace.program
     def nested(a, b, c):
         pass
@@ -61,8 +65,44 @@ def f32_pow_failure(array):
     assert ref.dtype == val.dtype
 
 
+@pytest.mark.gpu
+def test_tasklets_with_same_local_name():
+    sdfg = dace.SDFG('tester')
+    sdfg.add_array('A', [4], dace.float32, dace.StorageType.GPU_Global)
+    state = sdfg.add_state()
+    me, mx = state.add_map('kernel', dict(i='0:1'), schedule=dace.ScheduleType.GPU_Device)
+    t1 = state.add_tasklet(
+        'sgn', {'a'}, {'b'}, '''
+mylocal: dace.float32
+if a > 0:
+    mylocal = 1
+else:
+    mylocal = -1
+b = mylocal
+    ''')
+    t2 = state.add_tasklet(
+        'sgn', {'a'}, {'b'}, '''
+mylocal: dace.float32
+if a > 0:
+    mylocal = 1
+else:
+    mylocal = -1
+b = mylocal
+    ''')
+
+    a = state.add_read('A')
+    b = state.add_write('A')
+    state.add_memlet_path(a, me, t1, dst_conn='a', memlet=dace.Memlet('A[0]'))
+    state.add_memlet_path(a, me, t2, dst_conn='a', memlet=dace.Memlet('A[1]'))
+    state.add_memlet_path(t1, mx, b, src_conn='b', memlet=dace.Memlet('A[2]'))
+    state.add_memlet_path(t2, mx, b, src_conn='b', memlet=dace.Memlet('A[3]'))
+
+    sdfg.compile()
+
+
 if __name__ == '__main__':
     test_integer_power()
     test_integer_power_constant()
     test_equality()
     test_pow_with_implicit_casting()
+    test_tasklets_with_same_local_name()

From 63748436ab8328a326ee7a9f935139228e695534 Mon Sep 17 00:00:00 2001
From: Tal Ben-Nun <tbennun@users.noreply.github.com>
Date: Mon, 4 Dec 2023 08:08:27 -0800
Subject: [PATCH 154/163] Do not serialize non-default fields by default
 (#1452)

Added a configuration entry (enabled by default) that serializes only
the modified fields in an SDFG. This leads to a reduction in size.

Merging this PR is contingent on updating the SDFG renderer to use the
defaults/metadata for properties.

---------

Co-authored-by: Philipp Schaad <schaad.phil@gmail.com>
---
 dace/codegen/codegen.py                     | 14 +++++++++++---
 dace/codegen/targets/cpu.py                 |  4 ++--
 dace/config_schema.yml                      |  8 ++++++++
 dace/properties.py                          | 10 ++++++++++
 dace/sdfg/nodes.py                          |  8 +++++---
 dace/sdfg/sdfg.py                           | 10 ++++++++--
 dace/sdfg/state.py                          |  4 +++-
 dace/serialize.py                           |  3 +++
 dace/transformation/transformation.py       | 16 +++++++++-------
 tests/openmp_test.py                        | 10 ++++++----
 tests/transformations/local_storage_test.py |  6 +++---
 11 files changed, 68 insertions(+), 25 deletions(-)

diff --git a/dace/codegen/codegen.py b/dace/codegen/codegen.py
index b7eed49f17..6e2786660f 100644
--- a/dace/codegen/codegen.py
+++ b/dace/codegen/codegen.py
@@ -165,6 +165,7 @@ def generate_code(sdfg, validate=True) -> List[CodeObject]:
 
     if Config.get_bool('testing', 'serialization'):
         from dace.sdfg import SDFG
+        import difflib
         import filecmp
         import shutil
         import tempfile
@@ -174,9 +175,16 @@ def generate_code(sdfg, validate=True) -> List[CodeObject]:
             sdfg2.save(f'{tmp_dir}/test2.sdfg', hash=False)
             print('Testing SDFG serialization...')
             if not filecmp.cmp(f'{tmp_dir}/test.sdfg', f'{tmp_dir}/test2.sdfg'):
-                shutil.move(f"{tmp_dir}/test.sdfg", "test.sdfg")
-                shutil.move(f"{tmp_dir}/test2.sdfg", "test2.sdfg")
-                raise RuntimeError('SDFG serialization failed - files do not match')
+                with open(f'{tmp_dir}/test.sdfg', 'r') as f1:
+                    with open(f'{tmp_dir}/test2.sdfg', 'r') as f2:
+                        diff = difflib.unified_diff(f1.readlines(),
+                                                    f2.readlines(),
+                                                    fromfile='test.sdfg  (first save)',
+                                                    tofile='test2.sdfg (after roundtrip)')
+                diff = ''.join(diff)
+                shutil.move(f'{tmp_dir}/test.sdfg', 'test.sdfg')
+                shutil.move(f'{tmp_dir}/test2.sdfg', 'test2.sdfg')
+                raise RuntimeError(f'SDFG serialization failed - files do not match:\n{diff}')
 
     # Convert any loop constructs with hierarchical loop regions into simple 1-level state machine loops.
     # TODO (later): Adapt codegen to deal with hierarchical CFGs instead.
diff --git a/dace/codegen/targets/cpu.py b/dace/codegen/targets/cpu.py
index 3944d05b09..7ed8a48cd7 100644
--- a/dace/codegen/targets/cpu.py
+++ b/dace/codegen/targets/cpu.py
@@ -1914,7 +1914,7 @@ def _generate_ConsumeEntry(
                                               'size_t')
 
         # Take quiescence condition into account
-        if node.consume.condition.code is not None:
+        if node.consume.condition is not None:
             condition_string = "[&]() { return %s; }, " % cppunparse.cppunparse(node.consume.condition.code, False)
         else:
             condition_string = ""
@@ -1933,7 +1933,7 @@ def _generate_ConsumeEntry(
             "{num_pes}, {condition}"
             "[&](int {pe_index}, {element_or_chunk}) {{".format(
                 chunksz=node.consume.chunksize,
-                cond="" if node.consume.condition.code is None else "_cond",
+                cond="" if node.consume.condition is None else "_cond",
                 condition=condition_string,
                 stream_in=input_stream.data,  # TODO: stream arrays
                 element_or_chunk=chunk,
diff --git a/dace/config_schema.yml b/dace/config_schema.yml
index 063815e319..87bef94ee4 100644
--- a/dace/config_schema.yml
+++ b/dace/config_schema.yml
@@ -943,6 +943,14 @@ required:
                     When an exception is raised in a deserialization process (e.g., due to missing library node),
                     by default a warning is issued. If this setting is True, the exception will be raised as-is.
 
+            serialize_all_fields:
+                type: bool
+                default: false
+                title: Serialize all unmodified fields in SDFG files
+                description: >
+                    If False (default), saving an SDFG keeps only the modified non-default properties. If True,
+                    saves all fields.
+
     #############################################
     # DaCe library settings
 
diff --git a/dace/properties.py b/dace/properties.py
index e02a54ad1f..5fc9b8dcbe 100644
--- a/dace/properties.py
+++ b/dace/properties.py
@@ -1023,6 +1023,14 @@ def as_string(self, code):
         else:
             self.code = code
 
+    def __eq__(self, other):
+        if isinstance(other, str) or other is None:
+            return self.as_string == other
+        elif isinstance(other, CodeBlock):
+            return self.as_string == other.as_string and self.language == other.language
+        else:
+            return super().__eq__(other)
+
     def to_json(self):
         # Two roundtrips to avoid issues in AST parsing/unparsing of negative
         # numbers, i.e., "(-1)" becomes "(- 1)"
@@ -1382,6 +1390,8 @@ def to_json(self, obj):
     def from_json(obj, context=None):
         if obj is None:
             return None
+        elif isinstance(obj, typeclass):
+            return obj
         elif isinstance(obj, str):
             return TypeClassProperty.from_string(obj)
         elif isinstance(obj, dict):
diff --git a/dace/sdfg/nodes.py b/dace/sdfg/nodes.py
index a28e9fce38..a21974a899 100644
--- a/dace/sdfg/nodes.py
+++ b/dace/sdfg/nodes.py
@@ -1005,8 +1005,10 @@ def __str__(self):
     @property
     def free_symbols(self) -> Set[str]:
         dyn_inputs = set(c for c in self.in_connectors if not c.startswith('IN_'))
-        return ((set(self._consume.num_pes.free_symbols)
-                 | set(self._consume.condition.get_free_symbols())) - dyn_inputs)
+        result = set(self._consume.num_pes.free_symbols)
+        if self._consume.condition is not None:
+            result |= set(self._consume.condition.get_free_symbols())
+        return result - dyn_inputs
 
     def new_symbols(self, sdfg, state, symbols) -> Dict[str, dtypes.typeclass]:
         from dace.codegen.tools.type_inference import infer_expr_type
@@ -1094,7 +1096,7 @@ class Consume(object):
     label = Property(dtype=str, desc="Name of the consume node")
     pe_index = Property(dtype=str, desc="Processing element identifier")
     num_pes = SymbolicProperty(desc="Number of processing elements", default=1)
-    condition = CodeProperty(desc="Quiescence condition", allow_none=True)
+    condition = CodeProperty(desc="Quiescence condition", allow_none=True, default=None)
     schedule = EnumProperty(dtype=dtypes.ScheduleType, desc="Consume schedule", default=dtypes.ScheduleType.Default)
     chunksize = Property(dtype=int, desc="Maximal size of elements to consume at a time", default=1)
     debuginfo = DebugInfoProperty()
diff --git a/dace/sdfg/sdfg.py b/dace/sdfg/sdfg.py
index 020fb9dbab..2e35218a3d 100644
--- a/dace/sdfg/sdfg.py
+++ b/dace/sdfg/sdfg.py
@@ -579,7 +579,8 @@ def to_json(self, hash=False):
         tmp = super().to_json()
 
         # Ensure properties are serialized correctly
-        tmp['attributes']['constants_prop'] = json.loads(dace.serialize.dumps(tmp['attributes']['constants_prop']))
+        if 'constants_prop' in tmp['attributes']:
+            tmp['attributes']['constants_prop'] = json.loads(dace.serialize.dumps(tmp['attributes']['constants_prop']))
 
         tmp['sdfg_list_id'] = int(self.sdfg_id)
         tmp['start_state'] = self._start_block
@@ -604,8 +605,13 @@ def from_json(cls, json_obj, context_info=None):
         nodes = json_obj['nodes']
         edges = json_obj['edges']
 
+        if 'constants_prop' in attrs:
+            constants_prop = dace.serialize.loads(dace.serialize.dumps(attrs['constants_prop']))
+        else:
+            constants_prop = None
+
         ret = SDFG(name=attrs['name'],
-                   constants=dace.serialize.loads(dace.serialize.dumps(attrs['constants_prop'])),
+                   constants=constants_prop,
                    parent=context_info['sdfg'])
 
         dace.serialize.set_properties_from_json(ret,
diff --git a/dace/sdfg/state.py b/dace/sdfg/state.py
index 64f47b14bf..461b18b1a9 100644
--- a/dace/sdfg/state.py
+++ b/dace/sdfg/state.py
@@ -1644,7 +1644,9 @@ def add_consume(self,
         pe_tuple = (elements[0], SymbolicProperty.from_string(elements[1]))
 
         debuginfo = _getdebuginfo(debuginfo or self._default_lineinfo)
-        consume = nd.Consume(name, pe_tuple, CodeBlock(condition, language), schedule, chunksize, debuginfo=debuginfo)
+        if condition is not None:
+            condition = CodeBlock(condition, language)
+        consume = nd.Consume(name, pe_tuple, condition, schedule, chunksize, debuginfo=debuginfo)
         entry = nd.ConsumeEntry(consume)
         exit = nd.ConsumeExit(consume)
 
diff --git a/dace/serialize.py b/dace/serialize.py
index cada479d0f..ef07530905 100644
--- a/dace/serialize.py
+++ b/dace/serialize.py
@@ -175,8 +175,11 @@ def dump(*args, **kwargs):
 
 
 def all_properties_to_json(object_with_properties):
+    save_all_fields = config.Config.get_bool('testing', 'serialize_all_fields')
     retdict = {}
     for x, v in object_with_properties.properties():
+        if not save_all_fields and v == x.default:  # Skip default fields
+            continue
         if x.optional and not x.optional_condition(object_with_properties):
             continue
         retdict[x.attr_name] = x.to_json(v)
diff --git a/dace/transformation/transformation.py b/dace/transformation/transformation.py
index 75e591cb1e..b4cbccdac3 100644
--- a/dace/transformation/transformation.py
+++ b/dace/transformation/transformation.py
@@ -391,12 +391,13 @@ def from_json(json_obj: Dict[str, Any], context: Dict[str, Any] = None) -> 'Patt
                      if ext.__name__ == json_obj['transformation'])
 
         # Recreate subgraph
-        expr = xform.expressions()[json_obj['expr_index']]
-        subgraph = {expr.node(int(k)): int(v) for k, v in json_obj['_subgraph'].items()}
+        expr = xform.expressions()[json_obj.get('expr_index', 0)]
+        subgraph = {expr.node(int(k)): int(v) for k, v in json_obj.get('_subgraph', {}).items()}
 
         # Reconstruct transformation
         ret = xform()
-        ret.setup_match(None, json_obj['sdfg_id'], json_obj['state_id'], subgraph, json_obj['expr_index'])
+        ret.setup_match(None, json_obj.get('sdfg_id', 0), json_obj.get('state_id', 0), subgraph,
+                        json_obj.get('expr_index', 0))
         context = context or {}
         context['transformation'] = ret
         serialize.set_properties_from_json(ret, json_obj, context=context, ignore_properties={'transformation', 'type'})
@@ -652,12 +653,13 @@ def from_json(json_obj: Dict[str, Any], context: Dict[str, Any] = None) -> 'Expa
         xform = pydoc.locate(json_obj['classpath'])
 
         # Recreate subgraph
-        expr = xform.expressions()[json_obj['expr_index']]
-        subgraph = {expr.node(int(k)): int(v) for k, v in json_obj['_subgraph'].items()}
+        expr = xform.expressions()[json_obj.get('expr_index', 0)]
+        subgraph = {expr.node(int(k)): int(v) for k, v in json_obj.get('_subgraph', {}).items()}
 
         # Reconstruct transformation
         ret = xform()
-        ret.setup_match(None, json_obj['sdfg_id'], json_obj['state_id'], subgraph, json_obj['expr_index'])
+        ret.setup_match(None, json_obj.get('sdfg_id', 0), json_obj.get('state_id', 0), subgraph,
+                        json_obj.get('expr_index', 0))
         context = context or {}
         context['transformation'] = ret
         serialize.set_properties_from_json(ret,
@@ -864,7 +866,7 @@ def from_json(json_obj: Dict[str, Any], context: Dict[str, Any] = None) -> 'Subg
 
         # Reconstruct transformation
         ret = xform()
-        ret.setup_match(json_obj['subgraph'], json_obj['sdfg_id'], json_obj['state_id'])
+        ret.setup_match(json_obj.get('subgraph', {}), json_obj.get('sdfg_id', 0), json_obj.get('state_id', 0))
         context = context or {}
         context['transformation'] = ret
         serialize.set_properties_from_json(ret, json_obj, context=context, ignore_properties={'transformation', 'type'})
diff --git a/tests/openmp_test.py b/tests/openmp_test.py
index d842b407fb..6d7cfa355f 100644
--- a/tests/openmp_test.py
+++ b/tests/openmp_test.py
@@ -54,10 +54,7 @@ def test_omp_props():
             break
 
     mapnode.schedule = dtypes.ScheduleType.CPU_Multicore
-    json = sdfg.to_json()
-    assert (key_exists(json, 'omp_num_threads'))
-    assert (key_exists(json, 'omp_schedule'))
-    assert (key_exists(json, 'omp_chunk_size'))
+
     code = sdfg.generate_code()[0].clean_code
     assert ("#pragma omp parallel for" in code)
 
@@ -73,6 +70,11 @@ def test_omp_props():
     code = sdfg.generate_code()[0].clean_code
     assert ("#pragma omp parallel for schedule(guided, 5) num_threads(10)" in code)
 
+    json = sdfg.to_json()
+    assert (key_exists(json, 'omp_num_threads'))
+    assert (key_exists(json, 'omp_schedule'))
+    assert (key_exists(json, 'omp_chunk_size'))
+
 
 def test_omp_parallel():
 
diff --git a/tests/transformations/local_storage_test.py b/tests/transformations/local_storage_test.py
index c39fd9b807..0c1d3a98b0 100644
--- a/tests/transformations/local_storage_test.py
+++ b/tests/transformations/local_storage_test.py
@@ -124,7 +124,7 @@ def test_in_local_storage_implicit():
 
     # Check array was set correctly
     serialized = sdfg.transformation_hist[0].to_json()
-    assert serialized["array"] == None
+    assert "array" not in serialized or serialized["array"] is None
 
 
 def test_out_local_storage_explicit():
@@ -217,7 +217,7 @@ def test_out_local_storage_implicit():
 
     # Check array was set correctly
     serialized = sdfg.transformation_hist[0].to_json()
-    assert serialized["array"] == None
+    assert "array" not in serialized or serialized["array"] is None
 
 
 @dace.program
@@ -250,8 +250,8 @@ def test_uneven(self):
 
 
 if __name__ == '__main__':
-    unittest.main()
     test_in_local_storage_explicit()
     test_in_local_storage_implicit()
     test_out_local_storage_explicit()
     test_out_local_storage_implicit()
+    unittest.main()

From 1c0b0f6b810fa2792951d6a61bd3044915caee36 Mon Sep 17 00:00:00 2001
From: edopao <edoardo.paone@cscs.ch>
Date: Tue, 5 Dec 2023 11:05:45 +0100
Subject: [PATCH 155/163] Fix codegen with data access on inter-state edge
 (#1434)

After uplift to dace v0.15, one SDFG which was working before started to
show compilation errors. The latest DaCe is moving a data access to an
inter-state edge. For the data-access, the symbols that define array
strides are needed for code generation. The SDFG was validated, before
and after the simplify pass, but it did not compile for CPU. When
skipping the simplify pass, the compilation did work. The problem has
been narrowed down to the scalar-to-symbol promotion, which is moving a
data access to an inter-state edge. Then, the method
`_used_symbols_internal` needs to be update to account for data
containers, including symbolic shape and strides.

This issue was reported in #1433. This PR contains a unit test to
reproduce the issue and verify the proposed fix.
---
 dace/sdfg/state.py                         |  4 ++
 tests/codegen/codegen_used_symbols_test.py | 47 ++++++++++++++++++++++
 2 files changed, 51 insertions(+)

diff --git a/dace/sdfg/state.py b/dace/sdfg/state.py
index 461b18b1a9..becebd1c28 100644
--- a/dace/sdfg/state.py
+++ b/dace/sdfg/state.py
@@ -2493,6 +2493,10 @@ def _used_symbols_internal(self,
                 # subracting the (true) free symbols from the edge's assignment keys. This way we can correctly
                 # compute the symbols that are used before being assigned.
                 efsyms = e.data.used_symbols(all_symbols)
+                # collect symbols representing data containers
+                dsyms = {sym for sym in efsyms if sym in self.arrays}
+                for d in dsyms:
+                    efsyms |= {str(sym) for sym in self.arrays[d].used_symbols(all_symbols)}
                 defined_syms |= set(e.data.assignments.keys()) - (efsyms | state_symbols)
                 used_before_assignment.update(efsyms - defined_syms)
                 free_syms |= efsyms
diff --git a/tests/codegen/codegen_used_symbols_test.py b/tests/codegen/codegen_used_symbols_test.py
index afa0ca0a05..1e216e9508 100644
--- a/tests/codegen/codegen_used_symbols_test.py
+++ b/tests/codegen/codegen_used_symbols_test.py
@@ -88,8 +88,55 @@ def test_codegen_used_symbols_gpu():
         pass
 
 
+def test_codegen_edge_assignment_with_indirection():
+    rng = numpy.random.default_rng(42)
+    (M, N, K) = (dace.symbol(x, dace.int32) for x in ['M', 'N', 'K'])
+
+    sdfg = dace.SDFG('edge_assignment_with_indirection')
+    [sdfg.add_symbol(x, dace.int32) for x in {'__indirect_idx', '__neighbor_idx'}]
+    sdfg.add_array('_field', (M,), dace.float64)
+    sdfg.add_array('_table', (N,K), dace.int32)
+    sdfg.add_array('_out', (N,), dace.float64)
+
+    state0 = sdfg.add_state(is_start_block=True)
+    state1 = sdfg.add_state()
+    sdfg.add_edge(state0, state1, dace.InterstateEdge(
+        assignments={'_field_idx': '_table[__indirect_idx, __neighbor_idx]'}
+    ))
+    state1.add_memlet_path(
+        state1.add_access('_field'),
+        state1.add_access('_out'),
+        memlet=dace.Memlet(data='_out', subset='__indirect_idx', other_subset='_field_idx', wcr='lambda x, y: x + y')
+    )
+
+    M, N, K = (5, 4, 2)
+    field = rng.random((M,))
+    out = rng.random((N,))
+    table = numpy.random.randint(0, M, (N, K), numpy.int32)
+
+    TEST_INDIRECT_IDX = numpy.random.randint(0, N)
+    TEST_NEIGHBOR_IDX = numpy.random.randint(0, K)
+
+    reference = numpy.asarray(
+        [
+            out[i] + field[table[i, TEST_NEIGHBOR_IDX]] if i == TEST_INDIRECT_IDX else out[i]
+            for i in range(N)
+        ]
+    )
+
+    sdfg(
+        _field=field, _table=table, _out=out, M=M, N=N, K=K,
+        __indirect_idx=TEST_INDIRECT_IDX,
+        __neighbor_idx=TEST_NEIGHBOR_IDX
+    )
+
+    assert numpy.allclose(out, reference)
+
+
 if __name__ == "__main__":
 
     test_codegen_used_symbols_cpu()
     test_codegen_used_symbols_cpu_2()
     test_codegen_used_symbols_gpu()
+    test_codegen_edge_assignment_with_indirection()
+

From 7f8e51356557bc7d9fb70be072d40531101f37f1 Mon Sep 17 00:00:00 2001
From: BenWeber42 <dev.ben.weber@gmail.com>
Date: Tue, 5 Dec 2023 17:29:53 +0100
Subject: [PATCH 156/163] Changed default of serialize_all_fields to True
 (#1470)

This is in preparation of the `0.15.1` release to avoid introducing
potentially breaking changes.
---
 dace/config_schema.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dace/config_schema.yml b/dace/config_schema.yml
index 87bef94ee4..e6e2d568cc 100644
--- a/dace/config_schema.yml
+++ b/dace/config_schema.yml
@@ -945,10 +945,10 @@ required:
 
             serialize_all_fields:
                 type: bool
-                default: false
+                default: true
                 title: Serialize all unmodified fields in SDFG files
                 description: >
-                    If False (default), saving an SDFG keeps only the modified non-default properties. If True,
+                    If False, saving an SDFG keeps only the modified non-default properties. If True,
                     saves all fields.
 
     #############################################

From 70566755555882c2a1ad3275a9c6a2ec93cf1416 Mon Sep 17 00:00:00 2001
From: BenWeber42 <benjamin.weber@inf.ethz.ch>
Date: Thu, 7 Dec 2023 18:39:50 +0100
Subject: [PATCH 157/163] Bump version to 0.15.1 (#1473)

---
 dace/version.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dace/version.py b/dace/version.py
index a3e6290df8..6fccdee466 100644
--- a/dace/version.py
+++ b/dace/version.py
@@ -1 +1 @@
-__version__ = '0.15'
+__version__ = '0.15.1'

From 38c8f07fb95784b58ee8537ac636cc6603feefb3 Mon Sep 17 00:00:00 2001
From: Jack McIvor <jacktmcivor@gmail.com>
Date: Fri, 8 Dec 2023 17:13:20 +0000
Subject: [PATCH 158/163] Remove unused dependencies (#1459)

Fixes #1458

---------

Co-authored-by: Jack McIvor <j.mcivor@draftkings.com>
---
 requirements.txt | 5 -----
 setup.py         | 2 +-
 2 files changed, 1 insertion(+), 6 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 266b3368c8..e37b2636f3 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,15 +1,12 @@
 aenum==3.1.12
 astunparse==1.6.3
-blinker==1.6.2
 certifi==2023.7.22
 charset-normalizer==3.1.0
 click==8.1.3
 dill==0.3.6
-Flask==2.3.2
 fparser==0.1.3
 idna==3.4
 importlib-metadata==6.6.0
-itsdangerous==2.1.2
 Jinja2==3.1.2
 MarkupSafe==2.1.3
 mpmath==1.3.0
@@ -17,10 +14,8 @@ networkx==3.1
 numpy==1.26.1
 ply==3.11
 PyYAML==6.0.1
-requests==2.31.0
 six==1.16.0
 sympy==1.9
 urllib3==2.0.7
 websockets==11.0.3
-Werkzeug==3.0.1
 zipp==3.15.0
diff --git a/setup.py b/setup.py
index a0ac2e2d49..bd635fb3b7 100644
--- a/setup.py
+++ b/setup.py
@@ -73,7 +73,7 @@
       },
       include_package_data=True,
       install_requires=[
-         'numpy', 'networkx >= 2.5', 'astunparse', 'sympy<=1.9', 'pyyaml', 'ply', 'websockets', 'requests', 'flask',
+         'numpy', 'networkx >= 2.5', 'astunparse', 'sympy<=1.9', 'pyyaml', 'ply', 'websockets', 'jinja2',
           'fparser >= 0.1.3', 'aenum >= 3.1', 'dataclasses; python_version < "3.7"', 'dill',
           'pyreadline;platform_system=="Windows"', 'typing-compat; python_version < "3.8"'
       ] + cmake_requires,

From cfafe0ff8647023a0d859a8820a31a6521404196 Mon Sep 17 00:00:00 2001
From: Christos Kotsalos <kotsaloscv@gmail.com>
Date: Mon, 11 Dec 2023 16:24:10 +0100
Subject: [PATCH 159/163] Small fix for debuginfo that can be None (#1469)

---
 dace/sourcemap.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/dace/sourcemap.py b/dace/sourcemap.py
index e13252ec01..dcac2b6b73 100644
--- a/dace/sourcemap.py
+++ b/dace/sourcemap.py
@@ -4,6 +4,7 @@
 import json
 import os
 import socket
+from typing import Optional
 from dace import Config, dtypes
 from dace.sdfg import state
 from dace.sdfg import nodes
@@ -293,8 +294,8 @@ def mapper(self, sdfg) -> bool:
 
         for nested_sdfg in sdfg.all_sdfgs_recursive():
             # NOTE: SDFGs created with the API may not have debuginfo
-            debuginfo: dtypes.DebugInfo = nested_sdfg.debuginfo
-            if debuginfo.filename:
+            debuginfo: Optional[dtypes.DebugInfo] = nested_sdfg.debuginfo
+            if debuginfo and debuginfo.filename:
                 range_dict[debuginfo.filename].append((debuginfo.start_line, debuginfo.end_line))
 
         self.create_mapping(range_dict)

From c5cc89fab76b65e77020acc48c4f1b4ba01c4126 Mon Sep 17 00:00:00 2001
From: Tal Ben-Nun <tbennun@users.noreply.github.com>
Date: Mon, 11 Dec 2023 07:29:38 -0800
Subject: [PATCH 160/163] Make dynamic map range docs more explicit (#1474)

---
 doc/sdfg/ir.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/sdfg/ir.rst b/doc/sdfg/ir.rst
index f7bbb0ff79..9eb37153d5 100644
--- a/doc/sdfg/ir.rst
+++ b/doc/sdfg/ir.rst
@@ -481,8 +481,8 @@ current entry node, use the :func:`~dace.sdfg.state.StateGraphView.exit_node` me
 
 **Dynamic Map Ranges**: Such ranges can use memlets to define the map ranges directly from data containers, while 
 still retaining the dataflow of a single state. As they are fed into a view connector on the map entry node, their value
-(described by the connector name) can be used in the symbolic expressions of the map range. Only scalar connectors are
-allowed.
+(described by the connector name) can be used in the symbolic expressions of the map range, and anywhere inside the map
+scope as a symbol (same as the iteration variables). Only scalar connectors are allowed.
 
 In the following example, we use dynamic map ranges to compute a sparse matrix-vector multiplication,
 where the vector is dense. Every output row has a defined range (standard, symbolic map), whereas the corresponding rows

From b6e1c9d3f3b341e639585ba05abfcecf2bb862e3 Mon Sep 17 00:00:00 2001
From: Florian Deconinck <deconinck.florian@gmail.com>
Date: Tue, 12 Dec 2023 07:30:50 -0500
Subject: [PATCH 161/163] Pace build optional CI (#1460)

As climate models developed at NOAA and NASA are leveraging DaCe more
and more for their performance backend, we have seen multiple occurrence
of major/minor version breaking downstream.

This optional GitHub action is an attempt to reduce those breakage by
allowing the DaCe ecosystem to pull on a vetted version of the Pace
climate model and run a subset of the regression tests that should
exercise enough DaCe to catch a good amount of errors.

NASA takes responsibility to keep this CI clean and working along. All
non-DaCe issues should be pinged on @FlorianDeconinck.

All data and model are under open source licenses.

---------

Co-authored-by: Philipp Schaad <schaad.phil@gmail.com>
---
 .github/workflows/pace-build-ci.yml | 75 +++++++++++++++++++++++++++++
 1 file changed, 75 insertions(+)
 create mode 100644 .github/workflows/pace-build-ci.yml

diff --git a/.github/workflows/pace-build-ci.yml b/.github/workflows/pace-build-ci.yml
new file mode 100644
index 0000000000..672c891a55
--- /dev/null
+++ b/.github/workflows/pace-build-ci.yml
@@ -0,0 +1,75 @@
+name: NASA/NOAA Pace repository build test
+
+on:
+  workflow_dispatch:
+
+defaults:
+    run:
+      shell: bash
+
+jobs:
+    build_pace:
+      runs-on: ubuntu-latest
+      strategy:
+          matrix:
+              python-version: [3.8.10]
+
+      steps:
+      - uses: actions/checkout@v2
+        with:
+              repository: 'git@github.com:GEOS-ESM/pace.git'
+              ref: 'ci/DaCe'
+              submodules: 'recursive'
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v2
+        with:
+              python-version: ${{ matrix.python-version }}
+      - name: Install dependencies & pull correct DaCe
+        run: |
+          cd pace
+          python -m pip install --upgrade pip wheel setuptools
+          cd external/dace
+          git checkout ${{ github.sha }}
+          cd ../..
+          pip install -e external/gt4py
+          pip install -e external/dace
+          pip install -r requirements_dev.txt
+      - name: Download data
+        run: |
+          cd pace
+          mkdir -p test_data
+          cd test_data
+          wget https://portal.nccs.nasa.gov/datashare/astg/smt/pace-regression-data/8.1.3_c12_6_ranks_standard.D_SW.tar.gz
+          tar -xzvf 8.1.3_c12_6_ranks_standard.D_SW.tar.gz
+          wget https://portal.nccs.nasa.gov/datashare/astg/smt/pace-regression-data/8.1.3_c12_6_ranks_standard.RiemSolverC.tar.gz
+          tar -xzvf 8.1.3_c12_6_ranks_standard.RiemSolverC.tar.gz
+          wget https://portal.nccs.nasa.gov/datashare/astg/smt/pace-regression-data/8.1.3_c12_6_ranks_standard.Remapping.tar.gz
+          tar -xzvf 8.1.3_c12_6_ranks_standard.Remapping.tar.gz
+          cd ../..
+      - name: "Regression test: Riemman Solver on C-grid"
+        run: |
+          export FV3_DACEMODE=BuildAndRun
+          export PACE_CONSTANTS=GFS
+          cd pace
+          pytest -v -s --data_path=./test_data/8.1.3/c12_6ranks_standard/dycore \
+              --backend=dace:cpu --which_modules=Riem_Solver_C \
+              --threshold_overrides_file=./fv3core/tests/savepoint/translate/overrides/standard.yaml \
+              ./fv3core/tests/savepoint
+      - name: "Regression test: D-grid shallow water lagrangian dynamics (D_SW)"
+        run: |
+          export FV3_DACEMODE=BuildAndRun
+          export PACE_CONSTANTS=GFS
+          cd pace
+          pytest -v -s --data_path=./test_data/8.1.3/c12_6ranks_standard/dycore \
+              --backend=dace:cpu --which_modules=D_SW \
+              --threshold_overrides_file=./fv3core/tests/savepoint/translate/overrides/standard.yaml \
+              ./fv3core/tests/savepoint
+      - name: "Regression test: Remapping (on rank 0 only)"
+        run: |
+          export FV3_DACEMODE=BuildAndRun
+          export PACE_CONSTANTS=GFS
+          cd pace
+          pytest -v -s --data_path=./test_data/8.1.3/c12_6ranks_standard/dycore \
+              --backend=dace:cpu --which_modules=Remapping --which_rank=0 \
+              --threshold_overrides_file=./fv3core/tests/savepoint/translate/overrides/standard.yaml \
+              ./fv3core/tests/savepoint

From ae378e1fa7cb7470b8cfab60fd2f8d8e4f29f2a3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Philip=20M=C3=BCller?=
 <147368808+philip-paul-mueller@users.noreply.github.com>
Date: Tue, 12 Dec 2023 18:35:22 +0100
Subject: [PATCH 162/163] Added `nan` to the DaCe `math` namespace (#1437)

Before this was generating an error, because there was no object `nan`
inside the `dace::math` namespace. This commit adds a `nan` object to
the namespace, the implementation is based on `typeless_pi`.
---
 dace/runtime/include/dace/math.h |   8 ++-
 dace/runtime/include/dace/nan.h  | 113 +++++++++++++++++++++++++++++++
 2 files changed, 118 insertions(+), 3 deletions(-)
 create mode 100644 dace/runtime/include/dace/nan.h

diff --git a/dace/runtime/include/dace/math.h b/dace/runtime/include/dace/math.h
index aa4dcb358d..e743f1410f 100644
--- a/dace/runtime/include/dace/math.h
+++ b/dace/runtime/include/dace/math.h
@@ -2,15 +2,16 @@
 #ifndef __DACE_MATH_H
 #define __DACE_MATH_H
 
-#include "pi.h"
-#include "types.h"
-
 #include <complex>
 #include <numeric>
 #include <cmath>
 #include <cfloat>
 #include <type_traits>
 
+#include "pi.h"
+#include "nan.h"
+#include "types.h"
+
 #ifdef __CUDACC__
     #include <thrust/complex.h>
 #endif
@@ -457,6 +458,7 @@ namespace dace
     namespace math
     {       
         static DACE_CONSTEXPR typeless_pi pi{};
+        static DACE_CONSTEXPR typeless_nan nan{};
         //////////////////////////////////////////////////////
         template<typename T>
         DACE_CONSTEXPR DACE_HDFI T exp(const T& a)
diff --git a/dace/runtime/include/dace/nan.h b/dace/runtime/include/dace/nan.h
new file mode 100644
index 0000000000..a8d1eb4c52
--- /dev/null
+++ b/dace/runtime/include/dace/nan.h
@@ -0,0 +1,113 @@
+// Copyright 2019-2021 ETH Zurich and the DaCe authors. All rights reserved.
+#ifndef __DACE_NAN_H
+#define __DACE_NAN_H
+
+// Class to define a stateless NAN and related operators.
+#include <limits>
+
+namespace dace
+{
+    namespace math
+    {
+        //////////////////////////////////////////////////////
+        // Defines a typeless Pi
+        struct typeless_nan
+        {
+            operator int() const = delete;
+            operator float() const
+            {
+                return std::numeric_limits<float>::quiet_NaN();
+            }
+            operator double() const
+            {
+                return std::numeric_limits<double>::quiet_NaN();
+            }
+            operator long double() const
+            {
+                return std::numeric_limits<long double>::quiet_NaN();
+            }
+            typeless_nan operator+() const
+            {
+                return typeless_nan{};
+            }
+            typeless_nan operator-() const
+            {
+                return typeless_nan{};
+            }
+        };
+
+        template<typename T>
+        DACE_CONSTEXPR typename std::enable_if<std::is_floating_point<T>::value, typeless_nan>::type
+        operator*(const T&,  const typeless_nan&) { return typeless_nan{}; }
+
+        template<typename T>
+        DACE_CONSTEXPR typename std::enable_if<std::is_floating_point<T>::value, typeless_nan>::type
+        operator*(const typeless_nan&,  const T&) { return typeless_nan{}; }
+
+        inline typeless_nan
+        operator*(const typeless_nan&,  const typeless_nan&) { return typeless_nan{}; }
+
+
+        template<typename T>
+        DACE_CONSTEXPR typename std::enable_if<std::is_floating_point<T>::value, typeless_nan>::type
+        operator+(const T&,  const typeless_nan&) { return typeless_nan{}; }
+
+        template<typename T>
+        DACE_CONSTEXPR typename std::enable_if<std::is_floating_point<T>::value, typeless_nan>::type
+        operator+(const typeless_nan&,  const T&) { return typeless_nan{}; }
+
+        inline typeless_nan
+        operator+(const typeless_nan&,  const typeless_nan&) { return typeless_nan{}; }
+
+
+        template<typename T>
+        DACE_CONSTEXPR typename std::enable_if<std::is_floating_point<T>::value, typeless_nan>::type
+        operator-(const T&,  const typeless_nan&) { return typeless_nan{}; }
+
+        template<typename T>
+        DACE_CONSTEXPR typename std::enable_if<std::is_floating_point<T>::value, typeless_nan>::type
+        operator-(const typeless_nan&,  const T&) { return typeless_nan{}; }
+
+        inline typeless_nan
+        operator-(const typeless_nan&,  const typeless_nan&) { return typeless_nan{}; }
+
+
+        template<typename T>
+        DACE_CONSTEXPR typename std::enable_if<std::is_floating_point<T>::value, typeless_nan>::type
+        operator/(const T&,  const typeless_nan&) { return typeless_nan{}; }
+
+        template<typename T>
+        DACE_CONSTEXPR typename std::enable_if<std::is_floating_point<T>::value, typeless_nan>::type
+        operator/(const typeless_nan&,  const T&) { return typeless_nan{}; }
+
+        inline typeless_nan
+        operator/(const typeless_nan&,  const typeless_nan&) { return typeless_nan{}; }
+
+
+        template<typename T>
+        DACE_CONSTEXPR typename std::enable_if<std::is_floating_point<T>::value, typeless_nan>::type
+        operator%(const T&,  const typeless_nan&) { return typeless_nan{}; }
+
+        template<typename T>
+        DACE_CONSTEXPR typename std::enable_if<std::is_floating_point<T>::value, typeless_nan>::type
+        operator%(const typeless_nan&,  const T&) { return typeless_nan{}; }
+
+        inline typeless_nan
+        operator%(const typeless_nan&,  const typeless_nan&) { return typeless_nan{}; }
+
+    }
+}
+
+	//These functions allows to perfrom operations with `typeless_nan` instances.
+#	define FADAPT(F) DACE_CONSTEXPR ::dace::math::typeless_nan F (::dace::math::typeless_nan) { return ::dace::math::typeless_nan{}; }
+#	define FADAPT2(F) template<typename T1> DACE_CONSTEXPR dace::math::typeless_nan F (T1&&, dace::math::typeless_nan) { return ::dace::math::typeless_nan{}; }; \
+			  template<typename T2> DACE_CONSTEXPR dace::math::typeless_nan F (dace::math::typeless_nan, T2&&) { return ::dace::math::typeless_nan{}; }; \
+			  DACE_CONSTEXPR ::dace::math::typeless_nan F (dace::math::typeless_nan, dace::math::typeless_nan) { return ::dace::math::typeless_nan{}; }
+        FADAPT(tanh); FADAPT(cos); FADAPT(sin); FADAPT(sqrt); FADAPT(tan);
+        FADAPT(acos); FADAPT(asin); FADAPT(atan); FADAPT(log); FADAPT(exp);
+        FADAPT(floor); FADAPT(ceil); FADAPT(round); FADAPT(abs);
+        FADAPT2(max); FADAPT2(min);
+#       undef FADAPT2
+#	undef FADAPT
+
+#endif  // __DACE_NAN_H

From 2dcd74af8f7141dbcb194163ff1c74149a3035db Mon Sep 17 00:00:00 2001
From: edopao <edoardo.paone@cscs.ch>
Date: Tue, 12 Dec 2023 18:36:12 +0100
Subject: [PATCH 163/163] Fix for floordiv on GPU target (#1471)

This PR addresses a `floordiv` runtime error detected on GPU target:
`std::ifloor` returns zero with integer argument (for example, the
result of an `int` division).

---------

Co-authored-by: Tal Ben-Nun <tbennun@users.noreply.github.com>
---
 dace/runtime/include/dace/math.h |  8 +++++++-
 tests/numpy/common.py            | 14 +++++++++++---
 tests/numpy/gpu_test.py          | 26 ++++++++++++++++++++++++++
 3 files changed, 44 insertions(+), 4 deletions(-)
 create mode 100644 tests/numpy/gpu_test.py

diff --git a/dace/runtime/include/dace/math.h b/dace/runtime/include/dace/math.h
index e743f1410f..afc08a64d3 100644
--- a/dace/runtime/include/dace/math.h
+++ b/dace/runtime/include/dace/math.h
@@ -525,7 +525,13 @@ namespace dace
             return (T)std::pow(a, (T)b);
         }
 
-        template<typename T>
+        template<typename T, typename std::enable_if<std::is_integral<T>::value>::type* = nullptr>
+        DACE_CONSTEXPR DACE_HDFI T ifloor(const T& a)
+        {
+            return a;
+        }
+
+        template<typename T, typename std::enable_if<std::is_floating_point<T>::value>::type* = nullptr>
         DACE_CONSTEXPR DACE_HDFI int ifloor(const T& a)
         {
             return (int)std::floor(a);
diff --git a/tests/numpy/common.py b/tests/numpy/common.py
index 5e84062dec..2784c8a0eb 100644
--- a/tests/numpy/common.py
+++ b/tests/numpy/common.py
@@ -11,7 +11,8 @@
 rng = default_rng(42)
 
 
-def compare_numpy_output(non_zero=False,
+def compare_numpy_output(device=dace.dtypes.DeviceType.CPU,
+                         non_zero=False,
                          positive=False,
                          check_dtype=False,
                          validation_func=None,
@@ -27,6 +28,7 @@ def compare_numpy_output(non_zero=False,
         Note that this should be used *instead* of the `@dace.program`
         annotation, not along with it!
 
+        :param device: Selects the target device for test execution.
         :param non_zero: if `True`, replace `0` inputs with `1`.
         :param positive: if `False`, floats sample from [-10.0, 10.0], and ints
                          sample from [-3, 3). Else, floats sample from
@@ -41,7 +43,7 @@ def compare_numpy_output(non_zero=False,
     """
     def decorator(func):
         def test():
-            dp = dace.program(func)
+            dp = dace.program(device=device)(func)
 
             def get_rand_arr(ddesc):
                 if type(ddesc) is dace.dtypes.typeclass:
@@ -115,7 +117,13 @@ def get_rand_arr(ddesc):
                 numpy_thrown = e
 
             try:
-                dace_result = dp(**dace_input)
+                if device == dace.dtypes.DeviceType.GPU:
+                    sdfg = dp.to_sdfg()
+                    sdfg.apply_gpu_transformations()
+                    dace_result = sdfg(**dace_input)
+                else:
+                    dace_result = dp(**dace_input)
+
             except Exception as e:
                 dace_thrown = e
 
diff --git a/tests/numpy/gpu_test.py b/tests/numpy/gpu_test.py
new file mode 100644
index 0000000000..9225145b86
--- /dev/null
+++ b/tests/numpy/gpu_test.py
@@ -0,0 +1,26 @@
+# Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved.
+import dace
+import pytest
+
+from common import compare_numpy_output
+
+"""
+Test CUDA code generation for a subset of numpy-like functions on GPU target.
+
+Only a subset of the numpy tests is executed on GPU target to keep the test
+execution time within a reasonable limit. This is of particular interest for
+CI regression tests. These testcases are mainly supposed to cover GPU-related
+issues reported to the DaCe porject or special cases for GPU code generation.
+"""
+gpu_device = dace.dtypes.DeviceType.GPU
+
+
+# special case where `dace::math::ifloor` argument is integral
+@pytest.mark.gpu
+@compare_numpy_output(device=gpu_device, non_zero=True, positive=True)
+def test_floordiv(A: dace.int64[5, 5], B: dace.int64[5, 5]):
+    return A // B
+
+
+if __name__ == '__main__':
+    test_floordiv()
\ No newline at end of file