diff --git a/.github/workflows/binaries.yml b/.github/workflows/binaries.yml index 210508c..bdaddc6 100644 --- a/.github/workflows/binaries.yml +++ b/.github/workflows/binaries.yml @@ -14,9 +14,15 @@ on: - ".github/workflows/binaries.yml" workflow_dispatch: +env: + PROJECT_NAME: "candlex" + PROJECT_DIR: "native/candlex" + PROJECT_VERSION: "0.1.2" + NIF_VERSION: "2.16" + jobs: - build_binary: - name: ${{ matrix.target }} / ${{ matrix.os }} + build_cpu: + name: cpu / ${{ matrix.target }} / ${{ matrix.os }} runs-on: ${{ matrix.os }} permissions: contents: write @@ -40,12 +46,53 @@ jobs: - uses: philss/rustler-precompiled-action@main id: precompile with: - project-dir: "native/candlex" - project-name: candlex - project-version: "0.1.2" + project-dir: ${{ env.PROJECT_DIR }} + project-name: ${{ env.PROJECT_NAME }} + project-version: ${{ env.PROJECT_VERSION }} target: ${{ matrix.target }} use-cross: ${{ matrix.use-cross }} - nif-version: "2.16" + nif-version: ${{ env.NIF_VERSION }} + + - uses: softprops/action-gh-release@v1 + with: + draft: true + files: ${{ steps.precompile.outputs.file-path }} + if: startsWith(github.ref, 'refs/tags/') + + build_cuda: + name: cuda / ${{ matrix.target }} / ${{ matrix.os }} + runs-on: ubuntu-22.04 + permissions: + contents: write + strategy: + fail-fast: false + matrix: + include: + - target: x86_64-unknown-linux-gnu + os: ubuntu-22.04 + + container: + image: nvidia/cuda:12.2.2-devel-ubuntu22.04 + + steps: + - run: apt update && apt install -y curl git + - uses: actions/checkout@v4 + - uses: dtolnay/rust-toolchain@stable + - run: rustup target add ${{ matrix.target }} + + - uses: philss/rustler-precompiled-action@main + id: precompile + env: + CUDA_COMPUTE_CAP: "70" + with: + project-dir: ${{ env.PROJECT_DIR }} + project-name: ${{ env.PROJECT_NAME }} + project-version: ${{ env.PROJECT_VERSION }} + target: ${{ matrix.target }} + use-cross: null + nif-version: ${{ env.NIF_VERSION }} + variant: cuda + cargo-args: "--features cuda" - uses: softprops/action-gh-release@v1 with: diff --git a/README.md b/README.md index 4805cf5..710c6cb 100644 --- a/README.md +++ b/README.md @@ -48,6 +48,19 @@ if no precompiled binary is available for your target environment. Once set, you must run `mix deps.clean candlex --build` explicitly to force to recompile. Building has a number of dependencies, see *Building from source* below. +#### `CANDLEX_NIF_TARGET` + +The default value is `cpu`, which implies the final the binary supports targeting +only the host CPU. + +| Value | Target environment | +| --- | --- | +| cpu | | +| cuda | CUDA 12.x | + +To use Candlex with NVidia GPU you need [CUDA](https://developer.nvidia.com/cuda-downloads) compatible with your +GPU drivers. + ## Building from source To build the native binary locally you need to set `CANDLEX_NIF_BUILD=true`. @@ -58,11 +71,16 @@ You will need the following installed in your system for the compilation: * [Git](https://git-scm.com) for fetching candle-core source * [Rust](https://www.rust-lang.org) with cargo to compile rustler NIFs +### GPU support + +To build native binary with GPU support, you need to run in an environment that has CUDA installed, +then you can build with `CANDLEX_NIF_TARGET=cuda`. See the `CANDLEX_NIF_TARGET` for more details. + ## Releasing To publish a new version of this package: -1. Update `@version` in `mix.exs` and `project-version` in `.github/workflows/binaries.yml`. +1. Update `@version` in `mix.exs` and `PROJECT_VERSION` in `.github/workflows/binaries.yml`. 1. `git tag -s ` to create new signed tag. 1. `git push origin ` to push the tag. 1. Wait for the `binaries.yml` GitHub workflow to build all the NIF binaries. diff --git a/config/config.exs b/config/config.exs index aecd8ea..aff71b9 100644 --- a/config/config.exs +++ b/config/config.exs @@ -1,17 +1,3 @@ import Config -enable_cuda = - case System.get_env("CUDA") do - nil -> System.find_executable("nvcc") && System.find_executable("nvidia-smi") - "false" -> false - _ -> true - end - -crate_features = - if enable_cuda do - [:cuda] - else - [] - end - -config :candlex, crate_features: crate_features +config :candlex, use_cuda: System.get_env("CANDLEX_NIF_TARGET") == "cuda" diff --git a/lib/candlex/native.ex b/lib/candlex/native.ex index 902bc89..b85f0b6 100644 --- a/lib/candlex/native.ex +++ b/lib/candlex/native.ex @@ -8,7 +8,7 @@ defmodule Candlex.Native do use RustlerPrecompiled, otp_app: :candlex, - features: Application.compile_env(:candlex, :crate_features, []), + features: if(Application.compile_env(:candlex, :use_cuda), do: [:cuda], else: []), base_url: "#{source_url}/releases/download/v#{version}", force_build: System.get_env("CANDLEX_NIF_BUILD") in ["1", "true"], mode: mode, @@ -19,7 +19,10 @@ defmodule Candlex.Native do "aarch64-unknown-linux-gnu", "x86_64-apple-darwin", "x86_64-unknown-linux-gnu" - ] + ], + variants: %{ + "x86_64-unknown-linux-gnu" => [cuda: fn -> Application.compile_env(:candlex, :use_cuda) end] + } # Rustler will override all the below stub functions with real NIFs def from_binary(_binary, _dtype, _shape, _device), do: error() diff --git a/native/candlex/build.rs b/native/candlex/build.rs index 86a9c61..33c4b9d 100644 --- a/native/candlex/build.rs +++ b/native/candlex/build.rs @@ -180,13 +180,21 @@ fn set_cuda_include_dir() -> Result<()> { #[allow(unused)] fn compute_cap() -> Result { - // Grab compute code from nvidia-smi - let mut compute_cap = { + println!("cargo:rerun-if-env-changed=CUDA_COMPUTE_CAP"); + + // Try to parse compute caps from env + let mut compute_cap = if let Ok(compute_cap_str) = std::env::var("CUDA_COMPUTE_CAP") { + println!("cargo:rustc-env=CUDA_COMPUTE_CAP={compute_cap_str}"); + compute_cap_str + .parse::() + .context("Could not parse code")? + } else { + // Use nvidia-smi to get the current compute cap let out = std::process::Command::new("nvidia-smi") - .arg("--query-gpu=compute_cap") - .arg("--format=csv") - .output() - .context("`nvidia-smi` failed. Ensure that you have CUDA installed and that `nvidia-smi` is in your PATH.")?; + .arg("--query-gpu=compute_cap") + .arg("--format=csv") + .output() + .context("`nvidia-smi` failed. Ensure that you have CUDA installed and that `nvidia-smi` is in your PATH.")?; let out = std::str::from_utf8(&out.stdout).context("stdout is not a utf8 string")?; let mut lines = out.lines(); assert_eq!( @@ -197,16 +205,19 @@ fn compute_cap() -> Result { .next() .context("missing line in stdout")? .replace('.', ""); - cap.parse::() - .with_context(|| format!("cannot parse as int {cap}"))? + let cap = cap + .parse::() + .with_context(|| format!("cannot parse as int {cap}"))?; + println!("cargo:rustc-env=CUDA_COMPUTE_CAP={cap}"); + cap }; // Grab available GPU codes from nvcc and select the highest one - let max_nvcc_code = { + let (supported_nvcc_codes, max_nvcc_code) = { let out = std::process::Command::new("nvcc") - .arg("--list-gpu-code") - .output() - .expect("`nvcc` failed. Ensure that you have CUDA installed and that `nvcc` is in your PATH."); + .arg("--list-gpu-code") + .output() + .expect("`nvcc` failed. Ensure that you have CUDA installed and that `nvcc` is in your PATH."); let out = std::str::from_utf8(&out.stdout).unwrap(); let out = out.lines().collect::>(); @@ -220,31 +231,21 @@ fn compute_cap() -> Result { } } codes.sort(); - if !codes.contains(&compute_cap) { - anyhow::bail!( - "nvcc cannot target gpu arch {compute_cap}. Available nvcc targets are {codes:?}." - ); - } - *codes.last().unwrap() + let max_nvcc_code = *codes.last().context("no gpu codes parsed from nvcc")?; + (codes, max_nvcc_code) }; - // If nvidia-smi compute_cap is higher than the highest gpu code from nvcc, - // then choose the highest gpu code in nvcc + // Check that nvcc supports the asked compute caps + if !supported_nvcc_codes.contains(&compute_cap) { + anyhow::bail!( + "nvcc cannot target gpu arch {compute_cap}. Available nvcc targets are {supported_nvcc_codes:?}." + ); + } if compute_cap > max_nvcc_code { - println!( - "cargo:warning=Lowering gpu arch {compute_cap} to max nvcc target {max_nvcc_code}." - ); - compute_cap = max_nvcc_code; + anyhow::bail!( + "CUDA compute cap {compute_cap} is higher than the highest gpu code from nvcc {max_nvcc_code}" + ); } - println!("cargo:rerun-if-env-changed=CUDA_COMPUTE_CAP"); - - if let Ok(compute_cap_str) = std::env::var("CUDA_COMPUTE_CAP") { - compute_cap = compute_cap_str - .parse::() - .with_context(|| format!("cannot parse as usize '{compute_cap_str}'"))?; - println!("cargo:warning=Using gpu arch {compute_cap} from $CUDA_COMPUTE_CAP"); - } - println!("cargo:rustc-env=CUDA_COMPUTE_CAP=sm_{compute_cap}"); Ok(compute_cap) }