Skip to content

Commit

Permalink
WIP: Launch kernel with SYCL
Browse files Browse the repository at this point in the history
Signed-off-by: Lukas Sommer <[email protected]>
  • Loading branch information
sommerlukas committed Dec 13, 2024
1 parent 45fae59 commit 38cd9ee
Showing 1 changed file with 14 additions and 0 deletions.
14 changes: 14 additions & 0 deletions python/cutlass/backend/operation.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@

from cuda import __version__, cuda

import dpctl

from cutlass.backend.utils.device import device_cc

_version_splits = [int(x) for x in __version__.split("rc")[0].split(".post")[0].split(".")]
Expand Down Expand Up @@ -122,10 +124,22 @@ def run_without_clusters(self, launch_config, kernel_params, stream=cuda.CUstrea

return err

def run_with_sycl(self, launch_config, kernel_params, param_size, stream):
local_mem = dpctl.experimental.WorkGroupMemory(launch_config.shared_memory_capacity)
raw_arg = dpctl.experimental.RawKernelArg(param_size, kernel_params)
globalSize = [g * l for g, l in zip(launch_config.grid, launch_config.block)]
globalSize.reverse()
localSize = launch_config.block
localSize.reverse()
stream.submit(self.kernel, [raw_arg, local_mem], globalSize, localSize)

def run(self, host_workspace, device_workspace, launch_config, stream=cuda.CUstream(0)):
cArg = (ctypes.c_char * len(host_workspace)).from_buffer(host_workspace)
packed = (ctypes.c_void_p * 1)()
packed[0] = ctypes.addressof(cArg)
if isinstance(stream, dpctl.SyclQueue):
self.run_with_sycl(launch_config, packed[0], len(host_workspace), stream)
return cuda.CUresult.CUDA_SUCCESS

if supports_cluster_launch():
return self.run_with_clusters(launch_config, packed, stream)
Expand Down

0 comments on commit 38cd9ee

Please sign in to comment.