Skip to content

Commit

Permalink
build: NIF binary targeting CUDA (#16)
Browse files Browse the repository at this point in the history
  • Loading branch information
grzuy authored Nov 6, 2023
1 parent b30faf2 commit e6723f6
Show file tree
Hide file tree
Showing 5 changed files with 112 additions and 57 deletions.
59 changes: 53 additions & 6 deletions .github/workflows/binaries.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,15 @@ on:
- ".github/workflows/binaries.yml"
workflow_dispatch:

env:
PROJECT_NAME: "candlex"
PROJECT_DIR: "native/candlex"
PROJECT_VERSION: "0.1.2"
NIF_VERSION: "2.16"

jobs:
build_binary:
name: ${{ matrix.target }} / ${{ matrix.os }}
build_cpu:
name: cpu / ${{ matrix.target }} / ${{ matrix.os }}
runs-on: ${{ matrix.os }}
permissions:
contents: write
Expand All @@ -40,12 +46,53 @@ jobs:
- uses: philss/rustler-precompiled-action@main
id: precompile
with:
project-dir: "native/candlex"
project-name: candlex
project-version: "0.1.2"
project-dir: ${{ env.PROJECT_DIR }}
project-name: ${{ env.PROJECT_NAME }}
project-version: ${{ env.PROJECT_VERSION }}
target: ${{ matrix.target }}
use-cross: ${{ matrix.use-cross }}
nif-version: "2.16"
nif-version: ${{ env.NIF_VERSION }}

- uses: softprops/action-gh-release@v1
with:
draft: true
files: ${{ steps.precompile.outputs.file-path }}
if: startsWith(github.ref, 'refs/tags/')

build_cuda:
name: cuda / ${{ matrix.target }} / ${{ matrix.os }}
runs-on: ubuntu-22.04
permissions:
contents: write
strategy:
fail-fast: false
matrix:
include:
- target: x86_64-unknown-linux-gnu
os: ubuntu-22.04

container:
image: nvidia/cuda:12.2.2-devel-ubuntu22.04

steps:
- run: apt update && apt install -y curl git
- uses: actions/checkout@v4
- uses: dtolnay/rust-toolchain@stable
- run: rustup target add ${{ matrix.target }}

- uses: philss/rustler-precompiled-action@main
id: precompile
env:
CUDA_COMPUTE_CAP: "70"
with:
project-dir: ${{ env.PROJECT_DIR }}
project-name: ${{ env.PROJECT_NAME }}
project-version: ${{ env.PROJECT_VERSION }}
target: ${{ matrix.target }}
use-cross: null
nif-version: ${{ env.NIF_VERSION }}
variant: cuda
cargo-args: "--features cuda"

- uses: softprops/action-gh-release@v1
with:
Expand Down
20 changes: 19 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,19 @@ if no precompiled binary is available for your target environment. Once set, you
must run `mix deps.clean candlex --build` explicitly to force to recompile.
Building has a number of dependencies, see *Building from source* below.

#### `CANDLEX_NIF_TARGET`

The default value is `cpu`, which implies the final the binary supports targeting
only the host CPU.

| Value | Target environment |
| --- | --- |
| cpu | |
| cuda | CUDA 12.x |

To use Candlex with NVidia GPU you need [CUDA](https://developer.nvidia.com/cuda-downloads) compatible with your
GPU drivers.

## Building from source

To build the native binary locally you need to set `CANDLEX_NIF_BUILD=true`.
Expand All @@ -58,11 +71,16 @@ You will need the following installed in your system for the compilation:
* [Git](https://git-scm.com) for fetching candle-core source
* [Rust](https://www.rust-lang.org) with cargo to compile rustler NIFs

### GPU support

To build native binary with GPU support, you need to run in an environment that has CUDA installed,
then you can build with `CANDLEX_NIF_TARGET=cuda`. See the `CANDLEX_NIF_TARGET` for more details.

## Releasing

To publish a new version of this package:

1. Update `@version` in `mix.exs` and `project-version` in `.github/workflows/binaries.yml`.
1. Update `@version` in `mix.exs` and `PROJECT_VERSION` in `.github/workflows/binaries.yml`.
1. `git tag -s <tag-version>` to create new signed tag.
1. `git push origin <tag-version>` to push the tag.
1. Wait for the `binaries.yml` GitHub workflow to build all the NIF binaries.
Expand Down
16 changes: 1 addition & 15 deletions config/config.exs
Original file line number Diff line number Diff line change
@@ -1,17 +1,3 @@
import Config

enable_cuda =
case System.get_env("CUDA") do
nil -> System.find_executable("nvcc") && System.find_executable("nvidia-smi")
"false" -> false
_ -> true
end

crate_features =
if enable_cuda do
[:cuda]
else
[]
end

config :candlex, crate_features: crate_features
config :candlex, use_cuda: System.get_env("CANDLEX_NIF_TARGET") == "cuda"
7 changes: 5 additions & 2 deletions lib/candlex/native.ex
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ defmodule Candlex.Native do

use RustlerPrecompiled,
otp_app: :candlex,
features: Application.compile_env(:candlex, :crate_features, []),
features: if(Application.compile_env(:candlex, :use_cuda), do: [:cuda], else: []),
base_url: "#{source_url}/releases/download/v#{version}",
force_build: System.get_env("CANDLEX_NIF_BUILD") in ["1", "true"],
mode: mode,
Expand All @@ -19,7 +19,10 @@ defmodule Candlex.Native do
"aarch64-unknown-linux-gnu",
"x86_64-apple-darwin",
"x86_64-unknown-linux-gnu"
]
],
variants: %{
"x86_64-unknown-linux-gnu" => [cuda: fn -> Application.compile_env(:candlex, :use_cuda) end]
}

# Rustler will override all the below stub functions with real NIFs
def from_binary(_binary, _dtype, _shape, _device), do: error()
Expand Down
67 changes: 34 additions & 33 deletions native/candlex/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -180,13 +180,21 @@ fn set_cuda_include_dir() -> Result<()> {

#[allow(unused)]
fn compute_cap() -> Result<usize> {
// Grab compute code from nvidia-smi
let mut compute_cap = {
println!("cargo:rerun-if-env-changed=CUDA_COMPUTE_CAP");

// Try to parse compute caps from env
let mut compute_cap = if let Ok(compute_cap_str) = std::env::var("CUDA_COMPUTE_CAP") {
println!("cargo:rustc-env=CUDA_COMPUTE_CAP={compute_cap_str}");
compute_cap_str
.parse::<usize>()
.context("Could not parse code")?
} else {
// Use nvidia-smi to get the current compute cap
let out = std::process::Command::new("nvidia-smi")
.arg("--query-gpu=compute_cap")
.arg("--format=csv")
.output()
.context("`nvidia-smi` failed. Ensure that you have CUDA installed and that `nvidia-smi` is in your PATH.")?;
.arg("--query-gpu=compute_cap")
.arg("--format=csv")
.output()
.context("`nvidia-smi` failed. Ensure that you have CUDA installed and that `nvidia-smi` is in your PATH.")?;
let out = std::str::from_utf8(&out.stdout).context("stdout is not a utf8 string")?;
let mut lines = out.lines();
assert_eq!(
Expand All @@ -197,16 +205,19 @@ fn compute_cap() -> Result<usize> {
.next()
.context("missing line in stdout")?
.replace('.', "");
cap.parse::<usize>()
.with_context(|| format!("cannot parse as int {cap}"))?
let cap = cap
.parse::<usize>()
.with_context(|| format!("cannot parse as int {cap}"))?;
println!("cargo:rustc-env=CUDA_COMPUTE_CAP={cap}");
cap
};

// Grab available GPU codes from nvcc and select the highest one
let max_nvcc_code = {
let (supported_nvcc_codes, max_nvcc_code) = {
let out = std::process::Command::new("nvcc")
.arg("--list-gpu-code")
.output()
.expect("`nvcc` failed. Ensure that you have CUDA installed and that `nvcc` is in your PATH.");
.arg("--list-gpu-code")
.output()
.expect("`nvcc` failed. Ensure that you have CUDA installed and that `nvcc` is in your PATH.");
let out = std::str::from_utf8(&out.stdout).unwrap();

let out = out.lines().collect::<Vec<&str>>();
Expand All @@ -220,31 +231,21 @@ fn compute_cap() -> Result<usize> {
}
}
codes.sort();
if !codes.contains(&compute_cap) {
anyhow::bail!(
"nvcc cannot target gpu arch {compute_cap}. Available nvcc targets are {codes:?}."
);
}
*codes.last().unwrap()
let max_nvcc_code = *codes.last().context("no gpu codes parsed from nvcc")?;
(codes, max_nvcc_code)
};

// If nvidia-smi compute_cap is higher than the highest gpu code from nvcc,
// then choose the highest gpu code in nvcc
// Check that nvcc supports the asked compute caps
if !supported_nvcc_codes.contains(&compute_cap) {
anyhow::bail!(
"nvcc cannot target gpu arch {compute_cap}. Available nvcc targets are {supported_nvcc_codes:?}."
);
}
if compute_cap > max_nvcc_code {
println!(
"cargo:warning=Lowering gpu arch {compute_cap} to max nvcc target {max_nvcc_code}."
);
compute_cap = max_nvcc_code;
anyhow::bail!(
"CUDA compute cap {compute_cap} is higher than the highest gpu code from nvcc {max_nvcc_code}"
);
}

println!("cargo:rerun-if-env-changed=CUDA_COMPUTE_CAP");

if let Ok(compute_cap_str) = std::env::var("CUDA_COMPUTE_CAP") {
compute_cap = compute_cap_str
.parse::<usize>()
.with_context(|| format!("cannot parse as usize '{compute_cap_str}'"))?;
println!("cargo:warning=Using gpu arch {compute_cap} from $CUDA_COMPUTE_CAP");
}
println!("cargo:rustc-env=CUDA_COMPUTE_CAP=sm_{compute_cap}");
Ok(compute_cap)
}

0 comments on commit e6723f6

Please sign in to comment.