diff --git a/.travis.yml b/.travis.yml index 9963216..3cfb191 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,9 +8,27 @@ cache: matrix: include: - # Weave only works with Nim devel - # Build and test using both gcc and clang - # Build and test on both x86-64 and ARM64 + - os: linux + arch: amd64 + env: + - ARCH=amd64 + - CHANNEL=stable + compiler: gcc + + - os: linux + arch: arm64 + env: + - ARCH=arm64 + - CHANNEL=stable + compiler: gcc + + - os: linux + arch: amd64 + env: + - ARCH=amd64 + - CHANNEL=stable + compiler: clang + - os: linux arch: amd64 env: @@ -33,6 +51,13 @@ matrix: compiler: clang # On OSX we only test against clang (gcc is mapped to clang by default) + - os: osx + arch: amd64 + env: + - ARCH=amd64 + - CHANNEL=stable + compiler: clang + - os: osx arch: amd64 env: diff --git a/README.md b/README.md index 38cf8a8..169d035 100644 --- a/README.md +++ b/README.md @@ -43,29 +43,17 @@ instead of being based on traditional work-stealing with shared-memory deques. ## Installation -As of January 2020, Weave depends on features present in Nim `devel` branch. -Some of those features were fixed after the 0.1~0.3 releases of Weave. -It is recommended to install the `master` version of Weave instead via +Weave can be simply installed with ```bash nimble install weave@#master ``` -Weave `master` must be compiled with a Nim version post-[@abea8037](https://github.com/nim-lang/Nim/commit/abea80376a113fb218c22b6474727c279e694cd3) - -Installing nim `devel` is the easiest way. -Refer to [choosenim](https://github.com/dom96/choosenim) or [Nim repository](https://github.com/nim-lang/Nim) to compile the latest `devel`. - -### C++ compilation - -At the moment C++ compilation is not available on latest Nim + latest Weave. - -The new "dataflow parallelism" feature that -allows delaying parallel tasks depending on arbitrary conditions -requires a data structure (`Pledge`) that is valid in C but invalid in C++. +or for the devel version +```bash +nimble install weave +``` -C++ compilation works with the following combination: -- Weave v0.3.0 -- Nim devel [@bf2e052e](https://github.com/nim-lang/Nim/commit/bf2e052e6d97c1117603480547804dd98d1ada71) +Weave requires at least Nim v1.2.0 ## API @@ -178,14 +166,18 @@ Weave uses Nim's `countProcessors()` in `std/cpuinfo` - [Weave, a state-of-the-art multithreading runtime](#weave-a-state-of-the-art-multithreading-runtime) - [Installation](#installation) - - [C++ compilation](#c-compilation) - [API](#api) - [Task parallelism](#task-parallelism) - [Data parallelism](#data-parallelism) - [Strided loops](#strided-loops) - [Complete list](#complete-list) - [Table of Contents](#table-of-contents) + - [Platforms supported](#platforms-supported) + - [C++ compilation](#c-compilation) + - [Windows 32-bit](#windows-32-bit) + - [Resource-restricted devices](#resource-restricted-devices) - [Backoff mechanism](#backoff-mechanism) + - [Weave using all CPUs](#weave-using-all-cpus) - [Experimental features](#experimental-features) - [Data parallelism (experimental features)](#data-parallelism-experimental-features) - [Awaitable loop](#awaitable-loop) @@ -203,12 +195,53 @@ Weave uses Nim's `countProcessors()` in `std/cpuinfo` - [Research](#research) - [License](#license) +## Platforms supported + +Weave supports all platforms with `pthread` and Windows. +Missing pthread functionality may be emulated or unused. +For example on MacOS, the `pthread` implementation does not expose barrier functionality or affinity settings. + +### C++ compilation + +At the moment C++ compilation is not available on latest Nim + latest Weave. + +The new "dataflow parallelism" feature that +allows delaying parallel tasks depending on arbitrary conditions +requires a data structure (`Pledge`) that is valid in C but invalid in C++. + +C++ compilation works with the following combination: +- Weave v0.3.0 +- Nim devel [@bf2e052e](https://github.com/nim-lang/Nim/commit/bf2e052e6d97c1117603480547804dd98d1ada71) + +### Windows 32-bit + +Windows 32-bit targets cannot use the MinGW compiler as it is missing support +for `EnterSynchronizationBarrier`. MSVC should work instead. + +### Resource-restricted devices + +Weave uses a flexible and efficient memory subsystem that has been optimized for a wide range of hardware: low power Raspberry Pi, phones, laptops, desktops and 30+ cores workstations. +It currently assumes by default that 16KB at least are available on your hardware for a memory pool and that this memory pool can grow as needed. +This can be tuned with `-d:WV_MemArenaSize=2048` to have the base pool use 2KB for example. +The pool size should be a multiple of 256 bytes. +PRs to improve support of very restricted devices are welcome. + ## Backoff mechanism -A Backoff mechanism is enabled by default. It allows workers with no tasks to sleep instead of spining aimlessly and burning CPU. +A Backoff mechanism is enabled by default. It allows workers with no tasks to sleep instead of spinning aimlessly and burning CPU cycles. It can be disabled with `-d:WV_Backoff=off`. +### Weave using all CPUs + +Weave multithreading is cooperative, idle threads send steal requests instead of actively stealing in other workers queue. + +This means that a thread sleeping or stuck in a long computation may starve other threads and they will spin burning CPU cycles. + +- Don't sleep or block a thread as this blocks Weave scheduler. This is a similar to `async`/`await` libraries. +- If you really need to sleep or block the main thread, make sure to empty all the tasks beforehand with `syncRoot(Weave)` in the main thread. The child threads will be put to sleep until new tasks are spawned. +- The `loadBalance(Weave)` call can be used in the middle of heavy computations to force the worker to answer steal requests. This is automatically done in `parallelFor` loops. + ## Experimental features Experimental features might see API and/or implementation changes. @@ -456,7 +489,6 @@ proc main() = main() ``` - ### Lazy Allocation of Flowvars Flowvars can be lazily allocated, this reduces overhead by at least 2x on very fine-grained tasks like Fibonacci or Depth-First-Search that may spawn trillions of tasks in less than diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 4ba8473..a885e41 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -7,48 +7,54 @@ strategy: # Windows_devel_32bit: # VM: 'windows-latest' - # ARCH: x86 # UCPU: i686 - # PLATFORM: x86 # CHANNEL: devel # TEST_LANG: c + Windows_stable_64bit: + VM: 'windows-latest' + UCPU: amd64 + CHANNEL: stable + TEST_LANG: c Windows_devel_64bit: VM: 'windows-latest' UCPU: amd64 - PLATFORM: x64 CHANNEL: devel TEST_LANG: c # Windows_cpp_devel_64bit: # VM: 'windows-latest' # UCPU: amd64 - # PLATFORM: x64 # CHANNEL: devel # TEST_LANG: cpp + Linux_stable_64bit: + VM: 'ubuntu-16.04' + UCPU: amd64 + CHANNEL: stable + TEST_LANG: c Linux_devel_64bit: VM: 'ubuntu-16.04' UCPU: amd64 - PLATFORM: x64 CHANNEL: devel TEST_LANG: c # Linux_cpp_devel_64bit: # VM: 'ubuntu-16.04' # UCPU: amd64 - # PLATFORM: x64 # CHANNEL: devel - # WEAVE_TEST_LANG: cpp + # TEST_LANG: cpp # Linux_devel_32bit: # VM: 'ubuntu-16.04' # ARCH: x86 - # PLATFORM: x86 # UCPU: i686 # CHANNEL: devel # TEST_LANG: c - + MacOS_stable_64bit: + VM: 'macOS-10.14' + UCPU: amd64 + CHANNEL: stable + TEST_LANG: c MacOS_devel_64bit: VM: 'macOS-10.14' UCPU: amd64 - PLATFORM: x64 CHANNEL: devel TEST_LANG: c pool: @@ -58,13 +64,13 @@ steps: - task: CacheBeta@1 displayName: 'cache Nim binaries' inputs: - key: NimBinaries | $(Agent.OS) | $(CHANNEL) | $(PLATFORM) + key: NimBinaries | $(Agent.OS) | $(CHANNEL) | $(UCPU) path: NimBinaries - task: CacheBeta@1 displayName: 'cache MinGW-w64' inputs: - key: mingwCache | 8_1_0 | $(PLATFORM) + key: mingwCache | 8_1_0 | $(UCPU) path: mingwCache condition: eq(variables['Agent.OS'], 'Windows_NT') @@ -76,7 +82,7 @@ steps: echo "PATH=${PATH}" set -e echo "Installing MinGW-w64" - if [[ $PLATFORM == "x86" ]]; then + if [[ $UCPU == "i686" ]]; then MINGW_FILE="i686-8.1.0-release-posix-dwarf-rt_v6-rev0.7z" MINGW_URL="https://sourceforge.net/projects/mingw-w64/files/Toolchains%20targetting%20Win32/Personal%20Builds/mingw-builds/8.1.0/threads-posix/dwarf/${MINGW_FILE}" MINGW_DIR="mingw32" @@ -95,9 +101,19 @@ steps: mkdir -p /c/custom mv "$MINGW_DIR" /c/custom/ popd - echo "##vso[task.prependpath]/c/custom/${MINGW_DIR}/bin" + + # Workaround https://developercommunity.visualstudio.com/content/problem/891929/windows-2019-cygheap-base-mismatch-detected-git-ba.html + echo "##vso[task.prependpath]/usr/bin" + echo "##vso[task.prependpath]/mingw64/bin" + echo "##vso[task.setvariable variable=MINGW_DIR;]$MINGW_DIR" + displayName: 'Install dependencies (Windows)' condition: eq(variables['Agent.OS'], 'Windows_NT') + - powershell: | + # export custom mingw PATH to other tasks + echo "##vso[task.prependpath]c:\custom\$(MINGW_DIR)\bin" + displayName: 'Mingw PATH (Windows)' + condition: eq(variables['Agent.OS'], 'Windows_NT') - bash: | echo "PATH=${PATH}" @@ -121,7 +137,7 @@ steps: - bash: | echo "PATH=${PATH}" gcc -v - echo "UCPU=${UCPU}" + export ucpu=${UCPU} if [ "${CHANNEL}" = stable ]; then BRANCH="v$(curl https://nim-lang.org/channels/stable)" @@ -151,11 +167,19 @@ steps: ./koch tools fi fi - popd # exit nim-CHANNEL + popd # exit nim-${CHANNEL} popd # exit NimBinaries - echo "##vso[task.prependpath]$PWD/NimBinaries/nim-${CHANNEL}/bin" displayName: 'Building Nim' + - powershell: | + echo "##vso[task.prependpath]$pwd\NimBinaries\nim-$(CHANNEL)\bin" + displayName: 'Set env variable (Windows)' + condition: eq(variables['Agent.OS'], 'Windows_NT') + - bash: | + echo "##vso[task.prependpath]$PWD/NimBinaries/nim-${CHANNEL}/bin" + displayName: 'Set env variable (Posix)' + condition: ne(variables['Agent.OS'], 'Windows_NT') + - bash: | echo "PATH=${PATH}" nimble refresh diff --git a/weave.nimble b/weave.nimble index a075cf7..40a4e28 100644 --- a/weave.nimble +++ b/weave.nimble @@ -1,6 +1,6 @@ # Package -version = "0.1.0" +version = "0.4.0" author = "Mamy André-Ratsimbazafy" description = "a state-of-the-art ùultithreading runtime" license = "MIT or Apache License 2.0" @@ -8,7 +8,7 @@ license = "MIT or Apache License 2.0" # Dependencies # requires Nim post abea80376a113fb218c22b6474727c279e694cd3 -requires "nim >= 1.1.1", "synthesis" +requires "nim >= 1.2.0", "synthesis" proc test(flags, path: string) = if not dirExists "build": diff --git a/weave/victims.nim b/weave/victims.nim index cc13a28..eabca48 100644 --- a/weave/victims.nim +++ b/weave/victims.nim @@ -203,7 +203,7 @@ proc evalSplit(task: Task, req: StealRequest, workSharing: bool): int {.inline.} if workSharing: # The real splitting will be done by the child worker # We need to send it enough work for its own children and all the steal requests pending - ascertain: req.thiefID in {myWorker().left, myWorker().right} + ascertain: req.thiefID == myWorker().left or req.thiefID == myWorker().right var left, right = 0'i32 if myWorker().leftIsWaiting: left = approxNumThievesProxy(myWorker().left)