Skip to content

Commit

Permalink
Revert "Raise MSVC warning level from /W3 to /W4"
Browse files Browse the repository at this point in the history
This reverts commit 8e56347.
  • Loading branch information
callumfare committed Nov 8, 2024
1 parent 2aaa261 commit de8aea1
Show file tree
Hide file tree
Showing 68 changed files with 214 additions and 262 deletions.
15 changes: 4 additions & 11 deletions cmake/helpers.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -99,25 +99,18 @@ function(add_ur_target_compile_options name)
elseif(MSVC)
target_compile_options(${name} PRIVATE
$<$<CXX_COMPILER_ID:MSVC>:/MP> # clang-cl.exe does not support /MP
/W4
/wd4456 # Disable: declaration of 'identifier' hides previous local declaration
/wd4457 # Disable: declaration of 'identifier' hides function parameter
/wd4458 # Disable: declaration of 'identifier' hides class member
/wd4459 # Disable: declaration of 'identifier' hides global declaration
/W3
/MD$<$<CONFIG:Debug>:d>
/GS
/DWIN32_LEAN_AND_MEAN
/DNOMINMAX
)

target_compile_definitions(${name} PRIVATE
# _CRT_SECURE_NO_WARNINGS used mainly because of getenv
_CRT_SECURE_NO_WARNINGS
)

if(UR_DEVELOPER_MODE)
# _CRT_SECURE_NO_WARNINGS used mainly because of getenv
# C4267: The compiler detected a conversion from size_t to a smaller type.
target_compile_options(${name} PRIVATE
/WX /GS
/WX /GS /D_CRT_SECURE_NO_WARNINGS /wd4267
)
endif()
endif()
Expand Down
7 changes: 0 additions & 7 deletions examples/collector/collector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,7 @@
#include <string_view>

#include "ur_api.h"

#ifdef _MSC_VER
#pragma warning(disable : 4245)
#endif
#include "xpti/xpti_trace_framework.h"
#ifdef _MSC_VER
#pragma warning(default : 4245)
#endif

constexpr uint16_t TRACE_FN_BEGIN =
static_cast<uint16_t>(xpti::trace_point_type_t::function_with_args_begin);
Expand Down
2 changes: 1 addition & 1 deletion include/ur_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -424,7 +424,7 @@ typedef struct ur_physical_mem_handle_t_ *ur_physical_mem_handle_t;
///////////////////////////////////////////////////////////////////////////////
#ifndef UR_BIT
/// @brief Generic macro for enumerator bit masks
#define UR_BIT(_i) (1U << _i)
#define UR_BIT(_i) (1 << _i)
#endif // UR_BIT

///////////////////////////////////////////////////////////////////////////////
Expand Down
2 changes: 1 addition & 1 deletion scripts/core/common.yml
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ name: "$x_physical_mem_handle_t"
type: macro
desc: "Generic macro for enumerator bit masks"
name: "$X_BIT( _i )"
value: "( 1U << _i )"
value: "( 1 << _i )"
--- #--------------------------------------------------------------------------
type: enum
desc: "Defines Return/Error codes"
Expand Down
7 changes: 3 additions & 4 deletions source/adapters/cuda/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -97,16 +97,15 @@ if (UR_ENABLE_TRACING)
get_target_property(XPTI_SRC_DIR xpti SOURCE_DIR)
set(XPTI_PROXY_SRC "${XPTI_SRC_DIR}/xpti_proxy.cpp")
endif()
add_library(cuda-xpti-proxy STATIC ${XPTI_PROXY_SRC})
target_compile_definitions(cuda-xpti-proxy PRIVATE
target_compile_definitions(${TARGET_NAME} PRIVATE
XPTI_ENABLE_INSTRUMENTATION
XPTI_STATIC_LIBRARY
)
target_include_directories(cuda-xpti-proxy PRIVATE
target_include_directories(${TARGET_NAME} PRIVATE
${XPTI_INCLUDES}
${CUDA_CUPTI_INCLUDE_DIR}
)
target_link_libraries(${TARGET_NAME} PRIVATE cuda-xpti-proxy)
target_sources(${TARGET_NAME} PRIVATE ${XPTI_PROXY_SRC})
endif()

if (CUDA_cupti_LIBRARY)
Expand Down
26 changes: 13 additions & 13 deletions source/adapters/cuda/command_buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ static ur_result_t enqueueCommandBufferFillHelper(
if ((PatternSize == 1) || (PatternSize == 2) || (PatternSize == 4)) {
CUDA_MEMSET_NODE_PARAMS NodeParams = {};
NodeParams.dst = DstPtr;
NodeParams.elementSize = static_cast<unsigned int>(PatternSize);
NodeParams.elementSize = PatternSize;
NodeParams.height = N;
NodeParams.pitch = PatternSize;
NodeParams.width = 1;
Expand Down Expand Up @@ -508,12 +508,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp(
auto &ArgIndices = hKernel->getArgIndices();
CUDA_KERNEL_NODE_PARAMS NodeParams = {};
NodeParams.func = CuFunc;
NodeParams.gridDimX = static_cast<unsigned int>(BlocksPerGrid[0]);
NodeParams.gridDimY = static_cast<unsigned int>(BlocksPerGrid[1]);
NodeParams.gridDimZ = static_cast<unsigned int>(BlocksPerGrid[2]);
NodeParams.blockDimX = static_cast<unsigned int>(ThreadsPerBlock[0]);
NodeParams.blockDimY = static_cast<unsigned int>(ThreadsPerBlock[1]);
NodeParams.blockDimZ = static_cast<unsigned int>(ThreadsPerBlock[2]);
NodeParams.gridDimX = BlocksPerGrid[0];
NodeParams.gridDimY = BlocksPerGrid[1];
NodeParams.gridDimZ = BlocksPerGrid[2];
NodeParams.blockDimX = ThreadsPerBlock[0];
NodeParams.blockDimY = ThreadsPerBlock[1];
NodeParams.blockDimZ = ThreadsPerBlock[2];
NodeParams.sharedMemBytes = LocalSize;
NodeParams.kernelParams = const_cast<void **>(ArgIndices.data());

Expand Down Expand Up @@ -1397,12 +1397,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp(
CUDA_KERNEL_NODE_PARAMS &Params = KernelCommandHandle->Params;

Params.func = CuFunc;
Params.gridDimX = static_cast<unsigned int>(BlocksPerGrid[0]);
Params.gridDimY = static_cast<unsigned int>(BlocksPerGrid[1]);
Params.gridDimZ = static_cast<unsigned int>(BlocksPerGrid[2]);
Params.blockDimX = static_cast<unsigned int>(ThreadsPerBlock[0]);
Params.blockDimY = static_cast<unsigned int>(ThreadsPerBlock[1]);
Params.blockDimZ = static_cast<unsigned int>(ThreadsPerBlock[2]);
Params.gridDimX = BlocksPerGrid[0];
Params.gridDimY = BlocksPerGrid[1];
Params.gridDimZ = BlocksPerGrid[2];
Params.blockDimX = ThreadsPerBlock[0];
Params.blockDimY = ThreadsPerBlock[1];
Params.blockDimZ = ThreadsPerBlock[2];
Params.sharedMemBytes = KernelCommandHandle->Kernel->getLocalSize();
Params.kernelParams =
const_cast<void **>(KernelCommandHandle->Kernel->getArgIndices().data());
Expand Down
4 changes: 2 additions & 2 deletions source/adapters/cuda/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1152,7 +1152,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGet(ur_platform_handle_t hPlatform,

try {
if (pNumDevices) {
*pNumDevices = static_cast<uint32_t>(NumDevices);
*pNumDevices = NumDevices;
}

if (ReturnDevices && phDevices) {
Expand Down Expand Up @@ -1235,7 +1235,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceCreateWithNativeHandle(
ur_result_t UR_APICALL urDeviceGetGlobalTimestamps(ur_device_handle_t hDevice,
uint64_t *pDeviceTimestamp,
uint64_t *pHostTimestamp) {
CUevent Event{};
CUevent Event;
ScopedContext Active(hDevice);

if (pDeviceTimestamp) {
Expand Down
50 changes: 22 additions & 28 deletions source/adapters/cuda/enqueue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ void guessLocalWorkSize(ur_device_handle_t Device, size_t *ThreadsPerBlock,
int MinGrid, MaxBlockSize;
UR_CHECK_ERROR(cuOccupancyMaxPotentialBlockSize(
&MinGrid, &MaxBlockSize, Kernel->get(), NULL, Kernel->getLocalSize(),
static_cast<int>(MaxBlockDim[0])));
MaxBlockDim[0]));

roundToHighestFactorOfGlobalSizeIn3d(ThreadsPerBlock, GlobalSizeNormalized,
MaxBlockDim, MaxBlockSize);
Expand Down Expand Up @@ -208,7 +208,7 @@ setKernelParams([[maybe_unused]] const ur_context_handle_t Context,
MaxWorkGroupSize = Device->getMaxWorkGroupSize();

if (ProvidedLocalWorkGroupSize) {
auto IsValid = [&](size_t Dim) {
auto IsValid = [&](int Dim) {
if (ReqdThreadsPerBlock[Dim] != 0 &&
LocalWorkSize[Dim] != ReqdThreadsPerBlock[Dim])
return UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE;
Expand All @@ -217,8 +217,7 @@ setKernelParams([[maybe_unused]] const ur_context_handle_t Context,
LocalWorkSize[Dim] > MaxThreadsPerBlock[Dim])
return UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE;

if (LocalWorkSize[Dim] >
Device->getMaxWorkItemSizes(static_cast<int>(Dim)))
if (LocalWorkSize[Dim] > Device->getMaxWorkItemSizes(Dim))
return UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE;
// Checks that local work sizes are a divisor of the global work sizes
// which includes that the local work sizes are neither larger than
Expand Down Expand Up @@ -482,13 +481,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch(

auto &ArgIndices = hKernel->getArgIndices();
UR_CHECK_ERROR(cuLaunchKernel(
CuFunc, static_cast<unsigned int>(BlocksPerGrid[0]),
static_cast<unsigned int>(BlocksPerGrid[1]),
static_cast<unsigned int>(BlocksPerGrid[2]),
static_cast<unsigned int>(ThreadsPerBlock[0]),
static_cast<unsigned int>(ThreadsPerBlock[1]),
static_cast<unsigned int>(ThreadsPerBlock[2]), LocalSize, CuStream,
const_cast<void **>(ArgIndices.data()), nullptr));
CuFunc, BlocksPerGrid[0], BlocksPerGrid[1], BlocksPerGrid[2],
ThreadsPerBlock[0], ThreadsPerBlock[1], ThreadsPerBlock[2], LocalSize,
CuStream, const_cast<void **>(ArgIndices.data()), nullptr));

if (LocalSize != 0)
hKernel->clearLocalSize();
Expand Down Expand Up @@ -654,12 +649,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunchCustomExp(
auto &ArgIndices = hKernel->getArgIndices();

CUlaunchConfig launch_config;
launch_config.gridDimX = static_cast<unsigned int>(BlocksPerGrid[0]);
launch_config.gridDimY = static_cast<unsigned int>(BlocksPerGrid[1]);
launch_config.gridDimZ = static_cast<unsigned int>(BlocksPerGrid[2]);
launch_config.blockDimX = static_cast<unsigned int>(ThreadsPerBlock[0]);
launch_config.blockDimY = static_cast<unsigned int>(ThreadsPerBlock[1]);
launch_config.blockDimZ = static_cast<unsigned int>(ThreadsPerBlock[2]);
launch_config.gridDimX = BlocksPerGrid[0];
launch_config.gridDimY = BlocksPerGrid[1];
launch_config.gridDimZ = BlocksPerGrid[2];
launch_config.blockDimX = ThreadsPerBlock[0];
launch_config.blockDimY = ThreadsPerBlock[1];
launch_config.blockDimZ = ThreadsPerBlock[2];

launch_config.sharedMemBytes = LocalSize;
launch_config.hStream = CuStream;
Expand Down Expand Up @@ -984,9 +979,8 @@ ur_result_t commonMemSetLargePattern(CUstream Stream, uint32_t PatternSize,
auto OffsetPtr = Ptr + (step * sizeof(uint8_t));

// set all of the pattern chunks
UR_CHECK_ERROR(cuMemsetD2D8Async(OffsetPtr, Pitch,
static_cast<unsigned char>(Value),
sizeof(uint8_t), Height, Stream));
UR_CHECK_ERROR(cuMemsetD2D8Async(OffsetPtr, Pitch, Value, sizeof(uint8_t),
Height, Stream));
}
return UR_RESULT_SUCCESS;
}
Expand Down Expand Up @@ -1037,9 +1031,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferFill(
break;
}
default: {
UR_CHECK_ERROR(
commonMemSetLargePattern(Stream, static_cast<uint32_t>(patternSize),
size, pPattern, DstDevice));
UR_CHECK_ERROR(commonMemSetLargePattern(Stream, patternSize, size,
pPattern, DstDevice));
break;
}
}
Expand Down Expand Up @@ -1071,6 +1064,7 @@ static size_t imageElementByteSize(CUDA_ARRAY_DESCRIPTOR ArrayDesc) {
return 4;
default:
detail::ur::die("Invalid image format.");
return 0;
}
}

Expand Down Expand Up @@ -1174,7 +1168,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageRead(
CUDA_ARRAY_DESCRIPTOR ArrayDesc;
UR_CHECK_ERROR(cuArrayGetDescriptor(&ArrayDesc, Array));

int ElementByteSize = static_cast<int>(imageElementByteSize(ArrayDesc));
int ElementByteSize = imageElementByteSize(ArrayDesc);

size_t ByteOffsetX = origin.x * ElementByteSize * ArrayDesc.NumChannels;
size_t BytesToCopy = ElementByteSize * ArrayDesc.NumChannels * region.width;
Expand Down Expand Up @@ -1247,7 +1241,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageWrite(
CUDA_ARRAY_DESCRIPTOR ArrayDesc;
UR_CHECK_ERROR(cuArrayGetDescriptor(&ArrayDesc, Array));

int ElementByteSize = static_cast<int>(imageElementByteSize(ArrayDesc));
int ElementByteSize = imageElementByteSize(ArrayDesc);

size_t ByteOffsetX = origin.x * ElementByteSize * ArrayDesc.NumChannels;
size_t BytesToCopy = ElementByteSize * ArrayDesc.NumChannels * region.width;
Expand Down Expand Up @@ -1326,7 +1320,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageCopy(
UR_ASSERT(SrcArrayDesc.NumChannels == DstArrayDesc.NumChannels,
UR_RESULT_ERROR_INVALID_MEM_OBJECT);

int ElementByteSize = static_cast<int>(imageElementByteSize(SrcArrayDesc));
int ElementByteSize = imageElementByteSize(SrcArrayDesc);

size_t DstByteOffsetX =
dstOrigin.x * ElementByteSize * SrcArrayDesc.NumChannels;
Expand Down Expand Up @@ -1511,8 +1505,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill(
CuStream));
break;
default:
commonMemSetLargePattern(CuStream, static_cast<uint32_t>(patternSize),
size, pPattern, (CUdeviceptr)ptr);
commonMemSetLargePattern(CuStream, patternSize, size, pPattern,
(CUdeviceptr)ptr);
break;
}
if (phEvent) {
Expand Down
5 changes: 2 additions & 3 deletions source/adapters/cuda/image.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -284,9 +284,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMPitchedAllocExp(
ur_result_t Result = UR_RESULT_SUCCESS;
try {
ScopedContext Active(hDevice);
UR_CHECK_ERROR(
cuMemAllocPitch((CUdeviceptr *)ppMem, pResultPitch, widthInBytes,
height, static_cast<unsigned int>(elementSizeBytes)));
UR_CHECK_ERROR(cuMemAllocPitch((CUdeviceptr *)ppMem, pResultPitch,
widthInBytes, height, elementSizeBytes));
} catch (ur_result_t error) {
Result = error;
} catch (...) {
Expand Down
4 changes: 2 additions & 2 deletions source/adapters/cuda/kernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -203,8 +203,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSuggestMaxCooperativeGroupCountExp(

int MaxNumActiveGroupsPerCU{0};
UR_CHECK_ERROR(cuOccupancyMaxActiveBlocksPerMultiprocessor(
&MaxNumActiveGroupsPerCU, hKernel->get(),
static_cast<int>(localWorkSize), dynamicSharedMemorySize));
&MaxNumActiveGroupsPerCU, hKernel->get(), localWorkSize,
dynamicSharedMemorySize));
detail::ur::assertion(MaxNumActiveGroupsPerCU >= 0);
// Handle the case where we can't have all SMs active with at least 1 group
// per SM. In that case, the device is still able to run 1 work-group, hence
Expand Down
8 changes: 4 additions & 4 deletions source/adapters/cuda/kernel.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,8 +97,8 @@ struct ur_kernel_handle_t_ {
}
ParamSizes[Index] = Size;
// calculate the insertion point on the array
size_t InsertPos = std::accumulate(
std::begin(ParamSizes), std::begin(ParamSizes) + Index, size_t{0});
size_t InsertPos = std::accumulate(std::begin(ParamSizes),
std::begin(ParamSizes) + Index, 0);
// Update the stored value for the argument
std::memcpy(&Storage[InsertPos], Arg, Size);
Indices[Index] = &Storage[InsertPos];
Expand Down Expand Up @@ -152,8 +152,8 @@ struct ur_kernel_handle_t_ {
const args_index_t &getIndices() const noexcept { return Indices; }

uint32_t getLocalSize() const {
return static_cast<uint32_t>(std::accumulate(
std::begin(OffsetPerIndex), std::end(OffsetPerIndex), size_t{0}));
return std::accumulate(std::begin(OffsetPerIndex),
std::end(OffsetPerIndex), 0);
}
} Args;

Expand Down
4 changes: 2 additions & 2 deletions source/adapters/cuda/program.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -148,8 +148,8 @@ ur_result_t ur_program_handle_t_::buildProgram(const char *BuildOptions) {
}

UR_CHECK_ERROR(cuModuleLoadDataEx(&Module, static_cast<const void *>(Binary),
static_cast<unsigned int>(Options.size()),
Options.data(), OptionVals.data()));
Options.size(), Options.data(),
OptionVals.data()));

BuildStatus = UR_PROGRAM_BUILD_STATUS_SUCCESS;

Expand Down
2 changes: 1 addition & 1 deletion source/adapters/cuda/usm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -325,7 +325,7 @@ umf_result_t USMMemoryProvider::initialize(ur_context_handle_t Ctx,

enum umf_result_t USMMemoryProvider::alloc(size_t Size, size_t Align,
void **Ptr) {
auto Res = allocateImpl(Ptr, Size, static_cast<uint32_t>(Align));
auto Res = allocateImpl(Ptr, Size, Align);
if (Res != UR_RESULT_SUCCESS) {
getLastStatusRef() = Res;
return UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC;
Expand Down
18 changes: 8 additions & 10 deletions source/adapters/level_zero/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -89,19 +89,18 @@ if(UR_BUILD_ADAPTER_L0)
endif()

# TODO: fix level_zero adapter conversion warnings
# C4267: The compiler detected a conversion from size_t to a smaller type.
target_compile_options(ur_adapter_level_zero PRIVATE
$<$<CXX_COMPILER_ID:MSVC>:/wd4805 /wd4244 /wd4267>
$<$<CXX_COMPILER_ID:MSVC>:/wd4805 /wd4244>
)

set_target_properties(ur_adapter_level_zero PROPERTIES
VERSION "${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}"
SOVERSION "${PROJECT_VERSION_MAJOR}"
)

if(CMAKE_CXX_COMPILER_LINKER_ID MATCHES MSVC)
# 0x800: Search for the DLL only in the System32 folder
target_link_options(ur_adapter_level_zero PRIVATE LINKER:/DEPENDENTLOADFLAG:0x800)
if (WIN32)
# 0x800: Search for the DLL only in the System32 folder
target_link_options(ur_adapter_level_zero PRIVATE /DEPENDENTLOADFLAG:0x800)
endif()

target_link_libraries(ur_adapter_level_zero PRIVATE
Expand Down Expand Up @@ -184,19 +183,18 @@ if(UR_BUILD_ADAPTER_L0_V2)
target_compile_definitions(ur_adapter_level_zero_v2 PUBLIC UR_ADAPTER_LEVEL_ZERO_V2)

# TODO: fix level_zero adapter conversion warnings
# C4267: The compiler detected a conversion from size_t to a smaller type.
target_compile_options(ur_adapter_level_zero_v2 PRIVATE
$<$<CXX_COMPILER_ID:MSVC>:/wd4805 /wd4244 /wd4100 /wd4267>
$<$<CXX_COMPILER_ID:MSVC>:/wd4805 /wd4244>
)

set_target_properties(ur_adapter_level_zero_v2 PROPERTIES
VERSION "${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}"
SOVERSION "${PROJECT_VERSION_MAJOR}"
)

if(CMAKE_CXX_COMPILER_LINKER_ID MATCHES MSVC)
# 0x800: Search for the DLL only in the System32 folder
target_link_options(ur_adapter_level_zero_v2 PUBLIC LINKER:/DEPENDENTLOADFLAG:0x800)
if (WIN32)
# 0x800: Search for the DLL only in the System32 folder
target_link_options(ur_adapter_level_zero_v2 PUBLIC /DEPENDENTLOADFLAG:0x800)
endif()

target_link_libraries(ur_adapter_level_zero_v2 PRIVATE
Expand Down
Loading

0 comments on commit de8aea1

Please sign in to comment.