Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -800,6 +800,15 @@ if(EXECUTORCH_BUILD_PYBIND)
torch
)

# Build common AOTI functionality if needed by CUDA or Metal backends
if(EXECUTORCH_BUILD_CUDA OR EXECUTORCH_BUILD_METAL)
list(APPEND _dep_libs aoti_common)
endif()

if(EXECUTORCH_BUILD_CUDA)
list(APPEND _dep_libs aoti_cuda)
endif()

if(EXECUTORCH_BUILD_EXTENSION_MODULE)
# Always use static linking for pybindings to avoid runtime symbol
# resolution issues
Expand Down
7 changes: 7 additions & 0 deletions extension/pybindings/portable_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,12 @@
# wouldn't preserve the static type annotations.
#
# Note that all of these are experimental, and subject to change without notice.

# Set dlopen flags to RTLD_GLOBAL to ensure that the symbols in _portable_lib can
# be found by another shared library (for example, in AOTI where we want to load
# an AOTI compiled .so file with needed symbols defined in _portable_lib).
prev = sys.getdlopenflags()
sys.setdlopenflags(prev | os.RTLD_GLOBAL)
from executorch.extension.pybindings._portable_lib import ( # noqa: F401
# Disable "imported but unused" (F401) checks.
_create_profile_block, # noqa: F401
Expand All @@ -73,6 +79,7 @@
MethodMeta, # noqa: F401
Verification, # noqa: F401
)
sys.setdlopenflags(prev)

# Clean up so that `dir(portable_lib)` is the same as `dir(_portable_lib)`
# (apart from some __dunder__ names).
Expand Down
12 changes: 2 additions & 10 deletions install_requirements.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,6 @@
# This will be dynamically set based on CUDA availability and CUDA backend enabled/disabled.
TORCH_NIGHTLY_URL_BASE = "https://download.pytorch.org/whl/nightly"

# Supported CUDA versions - modify this to add/remove supported versions
# Format: tuple of (major, minor) version numbers
SUPPORTED_CUDA_VERSIONS = (
(12, 6),
(12, 8),
(13, 0),
)

# Since ExecuTorch often uses main-branch features of pytorch, only the nightly
# pip versions will have the required features.
#
Expand All @@ -51,7 +43,7 @@ def install_requirements(use_pytorch_nightly):
sys.exit(1)

# Determine the appropriate PyTorch URL based on CUDA delegate status
torch_url = determine_torch_url(TORCH_NIGHTLY_URL_BASE, SUPPORTED_CUDA_VERSIONS)
torch_url = determine_torch_url(TORCH_NIGHTLY_URL_BASE)

# pip packages needed by exir.
TORCH_PACKAGE = [
Expand Down Expand Up @@ -121,7 +113,7 @@ def install_requirements(use_pytorch_nightly):

def install_optional_example_requirements(use_pytorch_nightly):
# Determine the appropriate PyTorch URL based on CUDA delegate status
torch_url = determine_torch_url(TORCH_NIGHTLY_URL_BASE, SUPPORTED_CUDA_VERSIONS)
torch_url = determine_torch_url(TORCH_NIGHTLY_URL_BASE)

print("Installing torch domain libraries")
DOMAIN_LIBRARIES = [
Expand Down
88 changes: 73 additions & 15 deletions install_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,31 +10,57 @@
import re
import subprocess
import sys
from typing import List, Optional

# Supported CUDA versions - modify this to add/remove supported versions
# Format: tuple of (major, minor) version numbers
SUPPORTED_CUDA_VERSIONS = (
(12, 6),
(12, 8),
(13, 0),
)

def _cuda_version_to_pytorch_suffix(major, minor):

def is_cmake_option_on(
cmake_configuration_args: List[str], var_name: str, default: bool
) -> bool:
"""
Generate PyTorch CUDA wheel suffix from CUDA version numbers.
Get a boolean CMake variable, from a list of CMake configuration arguments.
The var_name should not include the "-D" prefix.

Args:
major: CUDA major version (e.g., 12)
minor: CUDA minor version (e.g., 6)
cmake_configuration_args: List of CMake configuration arguments.
var_name: Name of the CMake variable.
default: Default boolean value if the variable is not set.

Returns:
PyTorch wheel suffix string (e.g., "cu126")
Boolean value of the CMake variable.
"""
return f"cu{major}{minor}"
cmake_define = _extract_cmake_define(cmake_configuration_args, var_name)

return _normalize_cmake_bool(cmake_define, default)


def is_cuda_available() -> bool:
"""
Check if CUDA is available on the system by attempting to get the CUDA version.

Returns:
True if CUDA is available and supported, False otherwise.
"""
try:
_get_cuda_version()
return True
except Exception:
return False


@functools.lru_cache(maxsize=1)
def _get_cuda_version(supported_cuda_versions):
def _get_cuda_version():
"""
Get the CUDA version installed on the system using nvcc command.
Returns a tuple (major, minor).

Args:
supported_cuda_versions: List of supported CUDA versions as tuples

Raises:
RuntimeError: if nvcc is not found or version cannot be parsed
"""
Expand All @@ -50,9 +76,9 @@ def _get_cuda_version(supported_cuda_versions):
major, minor = int(match.group(1)), int(match.group(2))

# Check if the detected version is supported
if (major, minor) not in supported_cuda_versions:
if (major, minor) not in SUPPORTED_CUDA_VERSIONS:
available_versions = ", ".join(
[f"{maj}.{min}" for maj, min in supported_cuda_versions]
[f"{maj}.{min}" for maj, min in SUPPORTED_CUDA_VERSIONS]
)
raise RuntimeError(
f"Detected CUDA version {major}.{minor} is not supported. "
Expand All @@ -76,6 +102,39 @@ def _get_cuda_version(supported_cuda_versions):
)


def _extract_cmake_define(args: List[str], name: str) -> Optional[str]:
prefix = f"-D{name}="
for arg in args:
if arg.startswith(prefix):
return arg[len(prefix) :]
return None


def _normalize_cmake_bool(value: Optional[str], default: bool = False) -> bool:
if value is None:
return default
normalized = value.strip().upper()
if normalized in {"ON", "1", "TRUE", "YES"}:
return True
if normalized in {"OFF", "0", "FALSE", "NO"}:
return False
return default


def _cuda_version_to_pytorch_suffix(major, minor):
"""
Generate PyTorch CUDA wheel suffix from CUDA version numbers.

Args:
major: CUDA major version (e.g., 12)
minor: CUDA minor version (e.g., 6)

Returns:
PyTorch wheel suffix string (e.g., "cu126")
"""
return f"cu{major}{minor}"


def _get_pytorch_cuda_url(cuda_version, torch_nightly_url_base):
"""
Get the appropriate PyTorch CUDA URL for the given CUDA version.
Expand All @@ -95,14 +154,13 @@ def _get_pytorch_cuda_url(cuda_version, torch_nightly_url_base):


@functools.lru_cache(maxsize=1)
def determine_torch_url(torch_nightly_url_base, supported_cuda_versions):
def determine_torch_url(torch_nightly_url_base):
"""
Determine the appropriate PyTorch installation URL based on CUDA availability.
Uses @functools.lru_cache to avoid redundant CUDA detection and print statements.

Args:
torch_nightly_url_base: Base URL for PyTorch nightly packages
supported_cuda_versions: List of supported CUDA versions as tuples

Returns:
URL string for PyTorch packages
Expand All @@ -116,7 +174,7 @@ def determine_torch_url(torch_nightly_url_base, supported_cuda_versions):
print("Attempting to detect CUDA via nvcc...")

try:
cuda_version = _get_cuda_version(supported_cuda_versions)
cuda_version = _get_cuda_version()
except Exception as err:
print(f"CUDA detection failed ({err}), using CPU-only PyTorch")
return f"{torch_nightly_url_base}/cpu"
Expand Down
24 changes: 24 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,20 @@
from pathlib import Path
from typing import List, Optional

# Add the current directory to sys.path to import install_utils
CWD = Path(__file__).absolute().parent
# Add the current directory to the Python path so that we can import `install_utils`.
# This is required when running this script with a PEP-517-enabled build backend.
#
# From the PEP-517 documentation: https://peps.python.org/pep-0517
#
# > When importing the module path, we do *not* look in the directory containing
# > the source tree, unless that would be on `sys.path` anyway (e.g. because it
# > is specified in `PYTHONPATH`).
#
sys.path.insert(0, str(CWD)) # this only affects the current process
import install_utils

from setuptools import Extension, setup
from setuptools.command.build import build
from setuptools.command.build_ext import build_ext
Expand Down Expand Up @@ -769,6 +783,12 @@ def run(self): # noqa C901
item for item in re.split(r"\s+", os.environ.get("CMAKE_ARGS", "")) if item
]

# Check if CUDA is available, and if so, enable building the CUDA
# backend by default.
if install_utils.is_cuda_available() and install_utils.is_cmake_option_on(
cmake_configuration_args, "EXECUTORCH_BUILD_CUDA", default=True
):
cmake_configuration_args += ["-DEXECUTORCH_BUILD_CUDA=ON"]
with Buck2EnvironmentFixer():
# Generate the cmake cache from scratch to ensure that the cache state
# is predictable.
Expand Down Expand Up @@ -821,6 +841,10 @@ def run(self): # noqa C901
cmake_build_args += ["--target", "portable_lib"]
cmake_build_args += ["--target", "selective_build"]

if cmake_cache.is_enabled("EXECUTORCH_BUILD_CUDA"):
cmake_build_args += ["--target", "aoti_cuda"]
cmake_build_args += ["--target", "aoti_common"]

if cmake_cache.is_enabled("EXECUTORCH_BUILD_EXTENSION_LLM_RUNNER"):
cmake_build_args += ["--target", "_llm_runner"]

Expand Down
Loading