Revert "Raise MSVC warning level from /W3 to /W4"

This reverts commit 8e56347.
callumfare · Nov 8, 2024 · de8aea1 · de8aea1
1 parent 2aaa261
commit de8aea1
Show file tree

Hide file tree

Showing 68 changed files with 214 additions and 262 deletions.
diff --git a/cmake/helpers.cmake b/cmake/helpers.cmake
@@ -99,25 +99,18 @@ function(add_ur_target_compile_options name)
     elseif(MSVC)
         target_compile_options(${name} PRIVATE
             $<$<CXX_COMPILER_ID:MSVC>:/MP>  # clang-cl.exe does not support /MP
-            /W4
-            /wd4456  # Disable: declaration of 'identifier' hides previous local declaration
-            /wd4457  # Disable: declaration of 'identifier' hides function parameter
-            /wd4458  # Disable: declaration of 'identifier' hides class member
-            /wd4459  # Disable: declaration of 'identifier' hides global declaration
+            /W3
             /MD$<$<CONFIG:Debug>:d>
             /GS
             /DWIN32_LEAN_AND_MEAN
             /DNOMINMAX
         )
 
-        target_compile_definitions(${name} PRIVATE
-            # _CRT_SECURE_NO_WARNINGS used mainly because of getenv
-            _CRT_SECURE_NO_WARNINGS
-        )
-
         if(UR_DEVELOPER_MODE)
+            # _CRT_SECURE_NO_WARNINGS used mainly because of getenv
+            # C4267: The compiler detected a conversion from size_t to a smaller type.
             target_compile_options(${name} PRIVATE
-                /WX /GS
+                /WX /GS /D_CRT_SECURE_NO_WARNINGS /wd4267
             )
         endif()
     endif()

diff --git a/examples/collector/collector.cpp b/examples/collector/collector.cpp
@@ -25,14 +25,7 @@
 #include <string_view>
 
 #include "ur_api.h"
-
-#ifdef _MSC_VER
-#pragma warning(disable : 4245)
-#endif
 #include "xpti/xpti_trace_framework.h"
-#ifdef _MSC_VER
-#pragma warning(default : 4245)
-#endif
 
 constexpr uint16_t TRACE_FN_BEGIN =
     static_cast<uint16_t>(xpti::trace_point_type_t::function_with_args_begin);

diff --git a/include/ur_api.h b/include/ur_api.h
@@ -424,7 +424,7 @@ typedef struct ur_physical_mem_handle_t_ *ur_physical_mem_handle_t;
 ///////////////////////////////////////////////////////////////////////////////
 #ifndef UR_BIT
 /// @brief Generic macro for enumerator bit masks
-#define UR_BIT(_i) (1U << _i)
+#define UR_BIT(_i) (1 << _i)
 #endif // UR_BIT
 
 ///////////////////////////////////////////////////////////////////////////////

diff --git a/scripts/core/common.yml b/scripts/core/common.yml
@@ -134,7 +134,7 @@ name: "$x_physical_mem_handle_t"
 type: macro
 desc: "Generic macro for enumerator bit masks"
 name: "$X_BIT( _i )"
-value: "( 1U << _i )"
+value: "( 1 << _i )"
 --- #--------------------------------------------------------------------------
 type: enum
 desc: "Defines Return/Error codes"

diff --git a/source/adapters/cuda/CMakeLists.txt b/source/adapters/cuda/CMakeLists.txt
@@ -97,16 +97,15 @@ if (UR_ENABLE_TRACING)
     get_target_property(XPTI_SRC_DIR xpti SOURCE_DIR)
     set(XPTI_PROXY_SRC "${XPTI_SRC_DIR}/xpti_proxy.cpp")
   endif()
-  add_library(cuda-xpti-proxy STATIC ${XPTI_PROXY_SRC})
-  target_compile_definitions(cuda-xpti-proxy PRIVATE
+  target_compile_definitions(${TARGET_NAME} PRIVATE
     XPTI_ENABLE_INSTRUMENTATION
     XPTI_STATIC_LIBRARY
     )
-  target_include_directories(cuda-xpti-proxy PRIVATE
+  target_include_directories(${TARGET_NAME} PRIVATE
     ${XPTI_INCLUDES}
     ${CUDA_CUPTI_INCLUDE_DIR}
   )
-  target_link_libraries(${TARGET_NAME} PRIVATE cuda-xpti-proxy)
+  target_sources(${TARGET_NAME} PRIVATE ${XPTI_PROXY_SRC})
 endif()
 
 if (CUDA_cupti_LIBRARY)

diff --git a/source/adapters/cuda/command_buffer.cpp b/source/adapters/cuda/command_buffer.cpp
@@ -242,7 +242,7 @@ static ur_result_t enqueueCommandBufferFillHelper(
     if ((PatternSize == 1) || (PatternSize == 2) || (PatternSize == 4)) {
       CUDA_MEMSET_NODE_PARAMS NodeParams = {};
       NodeParams.dst = DstPtr;
-      NodeParams.elementSize = static_cast<unsigned int>(PatternSize);
+      NodeParams.elementSize = PatternSize;
       NodeParams.height = N;
       NodeParams.pitch = PatternSize;
       NodeParams.width = 1;
@@ -508,12 +508,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp(
     auto &ArgIndices = hKernel->getArgIndices();
     CUDA_KERNEL_NODE_PARAMS NodeParams = {};
     NodeParams.func = CuFunc;
-    NodeParams.gridDimX = static_cast<unsigned int>(BlocksPerGrid[0]);
-    NodeParams.gridDimY = static_cast<unsigned int>(BlocksPerGrid[1]);
-    NodeParams.gridDimZ = static_cast<unsigned int>(BlocksPerGrid[2]);
-    NodeParams.blockDimX = static_cast<unsigned int>(ThreadsPerBlock[0]);
-    NodeParams.blockDimY = static_cast<unsigned int>(ThreadsPerBlock[1]);
-    NodeParams.blockDimZ = static_cast<unsigned int>(ThreadsPerBlock[2]);
+    NodeParams.gridDimX = BlocksPerGrid[0];
+    NodeParams.gridDimY = BlocksPerGrid[1];
+    NodeParams.gridDimZ = BlocksPerGrid[2];
+    NodeParams.blockDimX = ThreadsPerBlock[0];
+    NodeParams.blockDimY = ThreadsPerBlock[1];
+    NodeParams.blockDimZ = ThreadsPerBlock[2];
     NodeParams.sharedMemBytes = LocalSize;
     NodeParams.kernelParams = const_cast<void **>(ArgIndices.data());
 
@@ -1397,12 +1397,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp(
   CUDA_KERNEL_NODE_PARAMS &Params = KernelCommandHandle->Params;
 
   Params.func = CuFunc;
-  Params.gridDimX = static_cast<unsigned int>(BlocksPerGrid[0]);
-  Params.gridDimY = static_cast<unsigned int>(BlocksPerGrid[1]);
-  Params.gridDimZ = static_cast<unsigned int>(BlocksPerGrid[2]);
-  Params.blockDimX = static_cast<unsigned int>(ThreadsPerBlock[0]);
-  Params.blockDimY = static_cast<unsigned int>(ThreadsPerBlock[1]);
-  Params.blockDimZ = static_cast<unsigned int>(ThreadsPerBlock[2]);
+  Params.gridDimX = BlocksPerGrid[0];
+  Params.gridDimY = BlocksPerGrid[1];
+  Params.gridDimZ = BlocksPerGrid[2];
+  Params.blockDimX = ThreadsPerBlock[0];
+  Params.blockDimY = ThreadsPerBlock[1];
+  Params.blockDimZ = ThreadsPerBlock[2];
   Params.sharedMemBytes = KernelCommandHandle->Kernel->getLocalSize();
   Params.kernelParams =
       const_cast<void **>(KernelCommandHandle->Kernel->getArgIndices().data());

diff --git a/source/adapters/cuda/device.cpp b/source/adapters/cuda/device.cpp
@@ -1152,7 +1152,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGet(ur_platform_handle_t hPlatform,
 
   try {
     if (pNumDevices) {
-      *pNumDevices = static_cast<uint32_t>(NumDevices);
+      *pNumDevices = NumDevices;
     }
 
     if (ReturnDevices && phDevices) {
@@ -1235,7 +1235,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceCreateWithNativeHandle(
 ur_result_t UR_APICALL urDeviceGetGlobalTimestamps(ur_device_handle_t hDevice,
                                                    uint64_t *pDeviceTimestamp,
                                                    uint64_t *pHostTimestamp) {
-  CUevent Event{};
+  CUevent Event;
   ScopedContext Active(hDevice);
 
   if (pDeviceTimestamp) {

diff --git a/source/adapters/cuda/enqueue.cpp b/source/adapters/cuda/enqueue.cpp
@@ -160,7 +160,7 @@ void guessLocalWorkSize(ur_device_handle_t Device, size_t *ThreadsPerBlock,
   int MinGrid, MaxBlockSize;
   UR_CHECK_ERROR(cuOccupancyMaxPotentialBlockSize(
       &MinGrid, &MaxBlockSize, Kernel->get(), NULL, Kernel->getLocalSize(),
-      static_cast<int>(MaxBlockDim[0])));
+      MaxBlockDim[0]));
 
   roundToHighestFactorOfGlobalSizeIn3d(ThreadsPerBlock, GlobalSizeNormalized,
                                        MaxBlockDim, MaxBlockSize);
@@ -208,7 +208,7 @@ setKernelParams([[maybe_unused]] const ur_context_handle_t Context,
       MaxWorkGroupSize = Device->getMaxWorkGroupSize();
 
       if (ProvidedLocalWorkGroupSize) {
-        auto IsValid = [&](size_t Dim) {
+        auto IsValid = [&](int Dim) {
           if (ReqdThreadsPerBlock[Dim] != 0 &&
               LocalWorkSize[Dim] != ReqdThreadsPerBlock[Dim])
             return UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE;
@@ -217,8 +217,7 @@ setKernelParams([[maybe_unused]] const ur_context_handle_t Context,
               LocalWorkSize[Dim] > MaxThreadsPerBlock[Dim])
             return UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE;
 
-          if (LocalWorkSize[Dim] >
-              Device->getMaxWorkItemSizes(static_cast<int>(Dim)))
+          if (LocalWorkSize[Dim] > Device->getMaxWorkItemSizes(Dim))
             return UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE;
           // Checks that local work sizes are a divisor of the global work sizes
           // which includes that the local work sizes are neither larger than
@@ -482,13 +481,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch(
 
     auto &ArgIndices = hKernel->getArgIndices();
     UR_CHECK_ERROR(cuLaunchKernel(
-        CuFunc, static_cast<unsigned int>(BlocksPerGrid[0]),
-        static_cast<unsigned int>(BlocksPerGrid[1]),
-        static_cast<unsigned int>(BlocksPerGrid[2]),
-        static_cast<unsigned int>(ThreadsPerBlock[0]),
-        static_cast<unsigned int>(ThreadsPerBlock[1]),
-        static_cast<unsigned int>(ThreadsPerBlock[2]), LocalSize, CuStream,
-        const_cast<void **>(ArgIndices.data()), nullptr));
+        CuFunc, BlocksPerGrid[0], BlocksPerGrid[1], BlocksPerGrid[2],
+        ThreadsPerBlock[0], ThreadsPerBlock[1], ThreadsPerBlock[2], LocalSize,
+        CuStream, const_cast<void **>(ArgIndices.data()), nullptr));
 
     if (LocalSize != 0)
       hKernel->clearLocalSize();
@@ -654,12 +649,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunchCustomExp(
     auto &ArgIndices = hKernel->getArgIndices();
 
     CUlaunchConfig launch_config;
-    launch_config.gridDimX = static_cast<unsigned int>(BlocksPerGrid[0]);
-    launch_config.gridDimY = static_cast<unsigned int>(BlocksPerGrid[1]);
-    launch_config.gridDimZ = static_cast<unsigned int>(BlocksPerGrid[2]);
-    launch_config.blockDimX = static_cast<unsigned int>(ThreadsPerBlock[0]);
-    launch_config.blockDimY = static_cast<unsigned int>(ThreadsPerBlock[1]);
-    launch_config.blockDimZ = static_cast<unsigned int>(ThreadsPerBlock[2]);
+    launch_config.gridDimX = BlocksPerGrid[0];
+    launch_config.gridDimY = BlocksPerGrid[1];
+    launch_config.gridDimZ = BlocksPerGrid[2];
+    launch_config.blockDimX = ThreadsPerBlock[0];
+    launch_config.blockDimY = ThreadsPerBlock[1];
+    launch_config.blockDimZ = ThreadsPerBlock[2];
 
     launch_config.sharedMemBytes = LocalSize;
     launch_config.hStream = CuStream;
@@ -984,9 +979,8 @@ ur_result_t commonMemSetLargePattern(CUstream Stream, uint32_t PatternSize,
     auto OffsetPtr = Ptr + (step * sizeof(uint8_t));
 
     // set all of the pattern chunks
-    UR_CHECK_ERROR(cuMemsetD2D8Async(OffsetPtr, Pitch,
-                                     static_cast<unsigned char>(Value),
-                                     sizeof(uint8_t), Height, Stream));
+    UR_CHECK_ERROR(cuMemsetD2D8Async(OffsetPtr, Pitch, Value, sizeof(uint8_t),
+                                     Height, Stream));
   }
   return UR_RESULT_SUCCESS;
 }
@@ -1037,9 +1031,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferFill(
       break;
     }
     default: {
-      UR_CHECK_ERROR(
-          commonMemSetLargePattern(Stream, static_cast<uint32_t>(patternSize),
-                                   size, pPattern, DstDevice));
+      UR_CHECK_ERROR(commonMemSetLargePattern(Stream, patternSize, size,
+                                              pPattern, DstDevice));
       break;
     }
     }
@@ -1071,6 +1064,7 @@ static size_t imageElementByteSize(CUDA_ARRAY_DESCRIPTOR ArrayDesc) {
     return 4;
   default:
     detail::ur::die("Invalid image format.");
+    return 0;
   }
 }
 
@@ -1174,7 +1168,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageRead(
     CUDA_ARRAY_DESCRIPTOR ArrayDesc;
     UR_CHECK_ERROR(cuArrayGetDescriptor(&ArrayDesc, Array));
 
-    int ElementByteSize = static_cast<int>(imageElementByteSize(ArrayDesc));
+    int ElementByteSize = imageElementByteSize(ArrayDesc);
 
     size_t ByteOffsetX = origin.x * ElementByteSize * ArrayDesc.NumChannels;
     size_t BytesToCopy = ElementByteSize * ArrayDesc.NumChannels * region.width;
@@ -1247,7 +1241,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageWrite(
     CUDA_ARRAY_DESCRIPTOR ArrayDesc;
     UR_CHECK_ERROR(cuArrayGetDescriptor(&ArrayDesc, Array));
 
-    int ElementByteSize = static_cast<int>(imageElementByteSize(ArrayDesc));
+    int ElementByteSize = imageElementByteSize(ArrayDesc);
 
     size_t ByteOffsetX = origin.x * ElementByteSize * ArrayDesc.NumChannels;
     size_t BytesToCopy = ElementByteSize * ArrayDesc.NumChannels * region.width;
@@ -1326,7 +1320,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageCopy(
     UR_ASSERT(SrcArrayDesc.NumChannels == DstArrayDesc.NumChannels,
               UR_RESULT_ERROR_INVALID_MEM_OBJECT);
 
-    int ElementByteSize = static_cast<int>(imageElementByteSize(SrcArrayDesc));
+    int ElementByteSize = imageElementByteSize(SrcArrayDesc);
 
     size_t DstByteOffsetX =
         dstOrigin.x * ElementByteSize * SrcArrayDesc.NumChannels;
@@ -1511,8 +1505,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill(
           CuStream));
       break;
     default:
-      commonMemSetLargePattern(CuStream, static_cast<uint32_t>(patternSize),
-                               size, pPattern, (CUdeviceptr)ptr);
+      commonMemSetLargePattern(CuStream, patternSize, size, pPattern,
+                               (CUdeviceptr)ptr);
       break;
     }
     if (phEvent) {

diff --git a/source/adapters/cuda/image.cpp b/source/adapters/cuda/image.cpp
@@ -284,9 +284,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMPitchedAllocExp(
   ur_result_t Result = UR_RESULT_SUCCESS;
   try {
     ScopedContext Active(hDevice);
-    UR_CHECK_ERROR(
-        cuMemAllocPitch((CUdeviceptr *)ppMem, pResultPitch, widthInBytes,
-                        height, static_cast<unsigned int>(elementSizeBytes)));
+    UR_CHECK_ERROR(cuMemAllocPitch((CUdeviceptr *)ppMem, pResultPitch,
+                                   widthInBytes, height, elementSizeBytes));
   } catch (ur_result_t error) {
     Result = error;
   } catch (...) {

diff --git a/source/adapters/cuda/kernel.cpp b/source/adapters/cuda/kernel.cpp
@@ -203,8 +203,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSuggestMaxCooperativeGroupCountExp(
 
     int MaxNumActiveGroupsPerCU{0};
     UR_CHECK_ERROR(cuOccupancyMaxActiveBlocksPerMultiprocessor(
-        &MaxNumActiveGroupsPerCU, hKernel->get(),
-        static_cast<int>(localWorkSize), dynamicSharedMemorySize));
+        &MaxNumActiveGroupsPerCU, hKernel->get(), localWorkSize,
+        dynamicSharedMemorySize));
     detail::ur::assertion(MaxNumActiveGroupsPerCU >= 0);
     // Handle the case where we can't have all SMs active with at least 1 group
     // per SM. In that case, the device is still able to run 1 work-group, hence

diff --git a/source/adapters/cuda/kernel.hpp b/source/adapters/cuda/kernel.hpp
@@ -97,8 +97,8 @@ struct ur_kernel_handle_t_ {
       }
       ParamSizes[Index] = Size;
       // calculate the insertion point on the array
-      size_t InsertPos = std::accumulate(
-          std::begin(ParamSizes), std::begin(ParamSizes) + Index, size_t{0});
+      size_t InsertPos = std::accumulate(std::begin(ParamSizes),
+                                         std::begin(ParamSizes) + Index, 0);
       // Update the stored value for the argument
       std::memcpy(&Storage[InsertPos], Arg, Size);
       Indices[Index] = &Storage[InsertPos];
@@ -152,8 +152,8 @@ struct ur_kernel_handle_t_ {
     const args_index_t &getIndices() const noexcept { return Indices; }
 
     uint32_t getLocalSize() const {
-      return static_cast<uint32_t>(std::accumulate(
-          std::begin(OffsetPerIndex), std::end(OffsetPerIndex), size_t{0}));
+      return std::accumulate(std::begin(OffsetPerIndex),
+                             std::end(OffsetPerIndex), 0);
     }
   } Args;
 

diff --git a/source/adapters/cuda/program.cpp b/source/adapters/cuda/program.cpp
@@ -148,8 +148,8 @@ ur_result_t ur_program_handle_t_::buildProgram(const char *BuildOptions) {
   }
 
   UR_CHECK_ERROR(cuModuleLoadDataEx(&Module, static_cast<const void *>(Binary),
-                                    static_cast<unsigned int>(Options.size()),
-                                    Options.data(), OptionVals.data()));
+                                    Options.size(), Options.data(),
+                                    OptionVals.data()));
 
   BuildStatus = UR_PROGRAM_BUILD_STATUS_SUCCESS;
 

diff --git a/source/adapters/cuda/usm.cpp b/source/adapters/cuda/usm.cpp
@@ -325,7 +325,7 @@ umf_result_t USMMemoryProvider::initialize(ur_context_handle_t Ctx,
 
 enum umf_result_t USMMemoryProvider::alloc(size_t Size, size_t Align,
                                            void **Ptr) {
-  auto Res = allocateImpl(Ptr, Size, static_cast<uint32_t>(Align));
+  auto Res = allocateImpl(Ptr, Size, Align);
   if (Res != UR_RESULT_SUCCESS) {
     getLastStatusRef() = Res;
     return UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC;

diff --git a/source/adapters/level_zero/CMakeLists.txt b/source/adapters/level_zero/CMakeLists.txt
@@ -89,19 +89,18 @@ if(UR_BUILD_ADAPTER_L0)
     endif()
 
     # TODO: fix level_zero adapter conversion warnings
-    # C4267: The compiler detected a conversion from size_t to a smaller type.
     target_compile_options(ur_adapter_level_zero PRIVATE
-        $<$<CXX_COMPILER_ID:MSVC>:/wd4805 /wd4244 /wd4267>
+        $<$<CXX_COMPILER_ID:MSVC>:/wd4805 /wd4244>
     )
 
     set_target_properties(ur_adapter_level_zero PROPERTIES
         VERSION "${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}"
         SOVERSION "${PROJECT_VERSION_MAJOR}"
     )
 
-    if(CMAKE_CXX_COMPILER_LINKER_ID MATCHES MSVC)
-        # 0x800: Search for the DLL only in the System32 folder
-        target_link_options(ur_adapter_level_zero PRIVATE LINKER:/DEPENDENTLOADFLAG:0x800)
+    if (WIN32)
+    # 0x800: Search for the DLL only in the System32 folder
+    target_link_options(ur_adapter_level_zero PRIVATE /DEPENDENTLOADFLAG:0x800)
     endif()
 
     target_link_libraries(ur_adapter_level_zero PRIVATE
@@ -184,19 +183,18 @@ if(UR_BUILD_ADAPTER_L0_V2)
     target_compile_definitions(ur_adapter_level_zero_v2 PUBLIC UR_ADAPTER_LEVEL_ZERO_V2)
 
     # TODO: fix level_zero adapter conversion warnings
-    # C4267: The compiler detected a conversion from size_t to a smaller type.
     target_compile_options(ur_adapter_level_zero_v2 PRIVATE
-        $<$<CXX_COMPILER_ID:MSVC>:/wd4805 /wd4244 /wd4100 /wd4267>
+        $<$<CXX_COMPILER_ID:MSVC>:/wd4805 /wd4244>
     )
 
     set_target_properties(ur_adapter_level_zero_v2 PROPERTIES
         VERSION "${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}"
         SOVERSION "${PROJECT_VERSION_MAJOR}"
     )
 
-    if(CMAKE_CXX_COMPILER_LINKER_ID MATCHES MSVC)
-        # 0x800: Search for the DLL only in the System32 folder
-        target_link_options(ur_adapter_level_zero_v2 PUBLIC LINKER:/DEPENDENTLOADFLAG:0x800)
+    if (WIN32)
+    # 0x800: Search for the DLL only in the System32 folder
+    target_link_options(ur_adapter_level_zero_v2 PUBLIC /DEPENDENTLOADFLAG:0x800)
     endif()
 
     target_link_libraries(ur_adapter_level_zero_v2 PRIVATE