Merge branch 'main' into staging/1.8.0

NVIDIA · Mar 17, 2022 · 05d48aa · 05d48aa
2 parents 9aebbdf + 5d42077
commit 05d48aa
Show file tree

Hide file tree

Showing 4 changed files with 46 additions and 54 deletions.
diff --git a/docs/extended_api/shapes/aligned_size_t.md b/docs/extended_api/shapes/aligned_size_t.md
@@ -20,6 +20,11 @@ struct cuda::aligned_size_t {
 The class template `cuda::aligned_size_t` is a _shape_ representing an extent
   of bytes with a statically defined (address and size) alignment.
 
+*Preconditions*: 
+
+- The _address_ of the extent of bytes must be aligned to an `Alignment` alignment boundary.
+- The _size_ of the extent of bytes must be a multiple of the `Alignment`.
+
 ## Template Parameters
 
 | `Alignment` | The address and size alignement of the byte extent. |
@@ -52,8 +57,8 @@ __global__ void example_kernel(void* dst, void* src, size_t size) {
   // Implementation cannot make assumptions about alignment.
   cuda::memcpy_async(dst, src, size, bar);
 
-  // Implementation can assume that dst, src and size are 16-bytes aligned and
-  // may optimize accordingly.
+  // Implementation can assume that dst and src are 16-bytes aligned,
+  // and that size is a multiple of 16, and may optimize accordingly.
   cuda::memcpy_async(dst, src, cuda::aligned_size_t<16>(size), bar);
 
   bar.arrive_and_wait();

diff --git a/docs/overview.md b/docs/overview.md
@@ -2,7 +2,7 @@
 
 <table><tr>
 <th><b><a href="https://github.com/nvidia/libcudacxx/tree/main/examples">Examples</a></b></th>
-<th><b><a href="https://godbolt.org/z/shc8sG">Godbolt</a></b></th>
+<th><b><a href="https://godbolt.org/z/Kns9vhPEr">Godbolt</a></b></th>
 <th><b><a href="https://nvidia.github.io/libcudacxx">Documentation</a></b></th>
 </tr></table>
 

diff --git a/docs/releases/changelog.md b/docs/releases/changelog.md
@@ -34,6 +34,10 @@ Supported ABI Versions: 4 (default), 3, and 2.
 - #250: Fix pragma typo on MSVC.
 - #251: Add several new compilers versions to our docker suite.
 - #252: Fix several deprecations in Clang 13.
+- #253: Fix truncations and warnings in numerics.
+- #254: Fix warnings in `<array>` tests and move `__cuda_std__` escapes in `<algorithm>`
+- #255: Fix deprecated copy ctor warnings in `__annotated_ptr` for Clang 13.
+- #256: Fix SM detection in the `perform_tests` script.
 
 ## libcu++ 1.7.0
 

diff --git a/docs/setup/building_and_testing.md b/docs/setup/building_and_testing.md
@@ -35,13 +35,13 @@ In a Bash shell:
 
 ```bash
 cd ${LIBCUDACXX_ROOT}
-mkdir -p build
-cd build
-cmake .. \
-  -DLLVM_CONFIG_PATH=$(which llvm-config) \
-  -DCMAKE_CXX_COMPILER=nvcc \
-  -DLIBCXX_NVCC_HOST_COMPILER=g++ \
-  -DLIBCXX_TEST_STANDARD_VER=c++11
+cmake \
+    -S ./ \
+    -B build \
+    -DCMAKE_CXX_COMPILER=$CXX \
+    -DCMAKE_CUDA_COMPILER=$TOOLKIT/bin/nvcc \
+    -DLIBCUDACXX_ENABLE_LIBCUDACXX_TESTS=ON \
+    -DLIBCUDACXX_ENABLE_LIBCXX_TESTS=OFF
 ```
 
 ### Step 2: Build & Run the Tests
@@ -72,13 +72,13 @@ export HOST=executor.nvidia.com
 export USERNAME=ubuntu
 
 cd ${LIBCUDACXX_ROOT}
-mkdir -p build
-cd build
-cmake .. \
-  -DLLVM_CONFIG_PATH=$(which llvm-config) \
-  -DCMAKE_CXX_COMPILER=nvcc \
-  -DLIBCXX_NVCC_HOST_COMPILER=aarch64-linux-gnu-g++ \
-  -DLIBCXX_TEST_STANDARD_VER=c++14 \
+cmake \
+  -S ./ \
+  -B build \
+  -DCMAKE_CUDA_COMPILER=$TOOLKIT/bin/nvcc \
+  -DCMAKE_CXX_COMPILER=aarch64-linux-gnu-g++ \
+  -DLIBCUDACXX_ENABLE_LIBCUDACXX_TESTS=ON \
+  -DLIBCUDACXX_ENABLE_LIBCXX_TESTS=OFF \
   -DLIBCXX_EXECUTOR="SSHExecutor(host='${HOST}', username='${USERNAME}')"
 ```
 
@@ -103,17 +103,15 @@ Follow Step 0 for \*nix native builds/tests.
 In a Bash shell:
 
 ```bash
-export CXX="${LIBCUDACXX_ROOT}/utils/nvidia/nvrtc/nvrtc.sh nvcc"
-
 cd ${LIBCUDACXX_ROOT}
-mkdir -p build
-cd build
-cmake .. \
-  -DCMAKE_C_COMPILER_WORKS=ON \
-  -DLLVM_CONFIG_PATH=$(which llvm-config) \
-  -DLIBCXX_NVCC_HOST_COMPILER=g++ \
-  -DLIBCXX_TEST_STANDARD_VER=c++11 \
-  -DLIBCXX_TEST_WITH_NVRTC=ON
+cmake \
+  -S ./ \
+  -B build \
+  -DCMAKE_CXX_COMPILER=$CC \
+  -DCMAKE_CUDA_COMPILER=$TOOLKIT/bin/nvcc \
+  -DLIBCUDACXX_ENABLE_LIBCUDACXX_TESTS=ON \
+  -DLIBCUDACXX_ENABLE_LIBCXX_TESTS=OFF \
+  -DLIBCUDACXX_TEST_WITH_NVRTC=ON
 ```
 
 ### Step 2: Build & Run the Tests
@@ -124,17 +122,6 @@ Follow Step 2 for \*nix native builds/tests.
 
 ### Step 0: Install Build Requirements
 
-Install [Git for Windows](https://git-scm.com/download/win):
-
-Checkout [the LLVM Git mono repo](https://github.com/llvm/llvm-project) using a
-Git Bash shell:
-
-```bat
-export LLVM_ROOT=/path/to/llvm
-
-git clone https://github.com/llvm/llvm-project.git ${LLVM_ROOT}
-```
-
 [Install Python](https://www.python.org/downloads/windows).
 
 Download [the get-pip.py bootstrap script](https://bootstrap.pypa.io/get-pip.py) and run it.
@@ -158,29 +145,25 @@ If Powershell is desired, it would be best to launch it from within the native t
 In a Visual Studio command prompt:
 
 ```bat
-set LLVM_ROOT=\path\to\llvm
 set LIBCUDACXX_ROOT=\path\to\libcudacxx # Helpful env var pointing to the git repo root.
-
 cd %LIBCUDACXX_ROOT%
-mkdir build
-cd build
-cmake .. ^
+
+cmake ^
+  -S ./ ^
+  -B build ^
   -G "Ninja" ^
-  -DLLVM_PATH=%LLVM_ROOT%\llvm ^
-  -DCMAKE_CXX_COMPILER=nvcc ^
-  -DLIBCXX_NVCC_HOST_COMPILER=cl ^
-  -DCMAKE_CXX_COMPILER_FORCED=ON ^
-  -DCMAKE_C_COMPILER_FORCED=ON
+  -DCMAKE_CXX_COMPILER=cl ^
+  -DCMAKE_CUDA_COMPILER=nvcc ^
+  -DCMAKE_CUDA_COMPILER_FORCED=ON ^
+  -DLIBCUDACXX_ENABLE_LIBCUDACXX_TESTS=ON ^
+  -DLIBCUDACXX_ENABLE_LIBCXX_TESTS=OFF
 ```
 
 ### Step 2: Build & Run the Tests
 
-In a Visual Studio command prompt:
+`SM_ARCH` can be set to any integer value (Ex: "80", "86")
 
 ```bat
-set SM_ARCH=70
-
-cd %LIBCUDACXX_ROOT%\build
-set LIBCXX_SITE_CONFIG=libcxx\test\lit.site.cfg
-lit ..\test -Dcompute_archs=%SM_ARCH% -sv --no-progress-bar
+set LIBCUDACXX_SITE_CONFIG=%LIBCUDACXX_ROOT%\build\test\lit.site.cfg
+lit %LIBCUDACXX_ROOT%\test -Dcompute_archs=%SM_ARCH% -sv --no-progress-bar
 ```