Skip to content

Commit

Permalink
Add support for deferred allocation on GPU global arrays
Browse files Browse the repository at this point in the history
  • Loading branch information
ThrudPrimrose committed Nov 5, 2024
1 parent 93eae37 commit 5b55425
Show file tree
Hide file tree
Showing 3 changed files with 176 additions and 31 deletions.
16 changes: 10 additions & 6 deletions dace/codegen/targets/cpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -539,7 +539,8 @@ def ndcopy_to_strided_copy(
return None


def cpp_offset_expr(d: data.Data, subset_in: subsets.Subset, offset=None, packed_veclen=1, indices=None):
def cpp_offset_expr(d: data.Data, subset_in: subsets.Subset, offset=None, packed_veclen=1, indices=None,
deferred_size_names=None):
""" Creates a C++ expression that can be added to a pointer in order
to offset it to the beginning of the given subset and offset.
Expand Down Expand Up @@ -569,11 +570,13 @@ def cpp_offset_expr(d: data.Data, subset_in: subsets.Subset, offset=None, packed
if packed_veclen > 1:
index /= packed_veclen

size_desc_name = d.size_desc_name
if not (size_desc_name is None):
if not (deferred_size_names is None):
access_str_with_deferred_vars = sym2cpp(index)
def replace_pattern(match):
number = match.group(1)
return deferred_size_names[int(number)]
pattern = r'__dace_defer_dim(\d+)'
access_str = re.sub(pattern, size_desc_name + r'[\1]', access_str_with_deferred_vars)
access_str = re.sub(pattern, replace_pattern, access_str_with_deferred_vars)
return access_str
else:
return sym2cpp(index)
Expand All @@ -588,14 +591,15 @@ def cpp_array_expr(sdfg,
use_other_subset=False,
indices=None,
referenced_array=None,
codegen=None):
codegen=None,
deferred_size_names=None):
""" Converts an Indices/Range object to a C++ array access string. """
subset = memlet.subset if not use_other_subset else memlet.other_subset
s = subset if relative_offset else subsets.Indices(offset)
o = offset if relative_offset else None
desc : dace.Data = (sdfg.arrays[memlet.data] if referenced_array is None else referenced_array)
desc_name = memlet.data
offset_cppstr = cpp_offset_expr(desc, s, o, packed_veclen, indices=indices)
offset_cppstr = cpp_offset_expr(desc, s, o, packed_veclen, indices=indices, deferred_size_names=deferred_size_names)

# NOTE: Are there any cases where a mix of '.' and '->' is needed when traversing nested structs?
# TODO: Study this when changing Structures to be (optionally?) non-pointers.
Expand Down
21 changes: 20 additions & 1 deletion dace/codegen/targets/cpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -1164,7 +1164,26 @@ def process_out_memlets(self,
write_expr = f"*({ptr_str} + {array_expr}) = {in_local_name};"
else:
desc_dtype = desc.dtype
expr = cpp.cpp_array_expr(sdfg, memlet, codegen=self._frame)
# If the storage type if CPU_Heap or GPU_Global then it might be requiring deferred allocation
# We can check if the array requires sepcial access using A_size[0] (CPU) or __A_dim0_size (GPU0)
# by going through the shape and checking for symbols starting with __dace_defer
def check_dace_defer(elements):
for elem in elements:
if isinstance(elem, symbolic.symbol) and str(elem).startswith("__dace_defer"):
return True
return False
deferred_size_names = None
if check_dace_defer(desc.shape):
if desc.storage == dtypes.StorageType.GPU_Global or desc.storage == dtypes.StorageType.CPU_Heap:
deferred_size_names = []
for i, elem in enumerate(desc.shape):
if str(elem).startswith("__dace_defer"):
deferred_size_names.append(f"__{memlet.data}_dim{i}_size" if desc.storage == dtypes.StorageType.GPU_Global else f"{desc.size_desc_name}[{i}]")
else:
deferred_size_names.append(elem)
else:
raise Exception("Deferred Allocation only supported on array storages of type GPU_Global or CPU_Heap")
expr = cpp.cpp_array_expr(sdfg, memlet, codegen=self._frame, deferred_size_names=deferred_size_names)
write_expr = codegen.make_ptr_assignment(in_local_name, conntype, expr, desc_dtype)

# Write out
Expand Down
Loading

0 comments on commit 5b55425

Please sign in to comment.