Skip to content

Commit

Permalink
[SYCL] Remove build options from fast kernel cache key (#16101)
Browse files Browse the repository at this point in the history
Build options were removed from Kernel cache key in
#11351 to reduce the kernel lookup
overhead.
This PR removes build options from fast kernel cache key as well.

Quoting #11351
> This can be done because they are either empty or set by the
environment variable so they stay the same for the entire program
lifecycle for the purposes of in-memory caching.
  • Loading branch information
uditagarwal97 authored Nov 18, 2024
1 parent 69572a2 commit 1219972
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 13 deletions.
19 changes: 14 additions & 5 deletions sycl/source/detail/kernel_program_cache.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -163,10 +163,19 @@ class KernelProgramCache {
::boost::unordered_map<ur_program_handle_t, KernelByNameT>;

using KernelFastCacheKeyT =
std::tuple<SerializedObj, ur_device_handle_t, std::string, std::string>;
std::tuple<SerializedObj, /* Serialized spec constants. */
ur_device_handle_t, /* UR device handle pointer */
std::string /* Kernel Name */
>;

using KernelFastCacheValT =
std::tuple<ur_kernel_handle_t, std::mutex *, const KernelArgMask *,
ur_program_handle_t>;
std::tuple<ur_kernel_handle_t, /* UR kernel handle pointer. */
std::mutex *, /* Mutex guarding this kernel. */
const KernelArgMask *, /* Eliminated kernel argument mask. */
ur_program_handle_t /* UR program handle corresponding to this
kernel. */
>;

// This container is used as a fast path for retrieving cached kernels.
// unordered_flat_map is used here to reduce lookup overhead.
// The slow path is used only once for each newly created kernel, so the
Expand Down Expand Up @@ -283,7 +292,7 @@ class KernelProgramCache {
std::unique_lock<std::mutex> Lock(MKernelFastCacheMutex);
auto It = MKernelFastCache.find(CacheKey);
if (It != MKernelFastCache.end()) {
traceKernel("Kernel fetched.", std::get<3>(CacheKey), true);
traceKernel("Kernel fetched.", std::get<2>(CacheKey), true);
return It->second;
}
return std::make_tuple(nullptr, nullptr, nullptr, nullptr);
Expand All @@ -294,7 +303,7 @@ class KernelProgramCache {
std::unique_lock<std::mutex> Lock(MKernelFastCacheMutex);
// if no insertion took place, thus some other thread has already inserted
// smth in the cache
traceKernel("Kernel inserted.", std::get<3>(CacheKey), true);
traceKernel("Kernel inserted.", std::get<2>(CacheKey), true);
MKernelFastCache.emplace(CacheKey, CacheVal);
}

Expand Down
10 changes: 2 additions & 8 deletions sycl/source/detail/program_manager/program_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -969,17 +969,11 @@ ProgramManager::getOrCreateKernel(const ContextImplPtr &ContextImpl,
using KernelArgMaskPairT = KernelProgramCache::KernelArgMaskPairT;

KernelProgramCache &Cache = ContextImpl->getKernelProgramCache();

std::string CompileOpts, LinkOpts;
SerializedObj SpecConsts;
applyOptionsFromEnvironment(CompileOpts, LinkOpts);
// Should always come last!
appendCompileEnvironmentVariablesThatAppend(CompileOpts);
appendLinkEnvironmentVariablesThatAppend(LinkOpts);

ur_device_handle_t UrDevice = DeviceImpl->getHandleRef();

auto key = std::make_tuple(std::move(SpecConsts), UrDevice,
CompileOpts + LinkOpts, KernelName);
auto key = std::make_tuple(std::move(SpecConsts), UrDevice, KernelName);
if (SYCLConfig<SYCL_CACHE_IN_MEM>::get()) {
auto ret_tuple = Cache.tryToGetKernelFast(key);
constexpr size_t Kernel = 0; // see KernelFastCacheValT tuple
Expand Down

0 comments on commit 1219972

Please sign in to comment.