Skip to content

Commit

Permalink
state before gpu rebranch
Browse files Browse the repository at this point in the history
  • Loading branch information
cjac committed Jan 9, 2025
1 parent c6c09db commit 88f9f7f
Show file tree
Hide file tree
Showing 12 changed files with 1,220 additions and 1,154 deletions.
5 changes: 5 additions & 0 deletions cloudbuild/presubmit.sh
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,11 @@ determine_tests_to_run() {
# Infer the files that changed
mapfile -t DELETED_BUILD_FILES < <(git diff origin/master --name-only --diff-filter=D | grep BUILD)
mapfile -t CHANGED_FILES < <(git diff origin/master --name-only | grep -v template)
for tt in $(git diff origin/master --name-only | grep 'templates/.*/.*\.sh\.in'); do
local genfile=`perl -e "print( q{${tt}} =~ m:templates/(.*?.sh).in: )"`
perl templates/generate-action.pl "${genfile}" > "${genfile}"
CHANGED_FILES+=("${genfile}")
done
echo "Deleted BUILD files: ${DELETED_BUILD_FILES[*]}"
echo "Changed files: ${CHANGED_FILES[*]}"

Expand Down
53 changes: 53 additions & 0 deletions templates/common/install_functions
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#
# Generate repo file under /etc/apt/sources.list.d/
#
function apt_add_repo() {
local -r repo_name="$1"
local -r repo_data="$3" # "http(s)://host/path/uri argument0 .. argumentN"
local -r include_src="${4:-yes}"
local -r kr_path="${5:-/usr/share/keyrings/${repo_name}.gpg}"
local -r repo_path="${6:-/etc/apt/sources.list.d/${repo_name}.list}"

echo "deb [signed-by=${kr_path}] ${repo_data}" > "${repo_path}"
if [[ "${include_src}" == "yes" ]] ; then
echo "deb-src [signed-by=${kr_path}] ${repo_data}" >> "${repo_path}"
fi

apt-get update -qq
}

#
# Generate repo file under /etc/yum.repos.d/
#
function dnf_add_repo() {
local -r repo_name="$1"
local -r repo_url="$3" # "http(s)://host/path/filename.repo"
local -r kr_path="${5:-/etc/pki/rpm-gpg/${repo_name}.gpg}"
local -r repo_path="${6:-/etc/yum.repos.d/${repo_name}.repo}"

curl -s -L "${repo_url}" \
| dd of="${repo_path}" status=progress
# | perl -p -e "s{^gpgkey=.*$}{gpgkey=file://${kr_path}}" \
}

#
# Keyrings default to
# /usr/share/keyrings/${repo_name}.gpg (debian/ubuntu) or
# /etc/pki/rpm-gpg/${repo_name}.gpg (rocky/RHEL)
#
function os_add_repo() {
local -r repo_name="$1"
local -r signing_key_url="$2"
local -r repo_data="$3" # "http(s)://host/path/uri argument0 .. argumentN"
local kr_path
if is_debuntu ; then kr_path="${5:-/usr/share/keyrings/${repo_name}.gpg}"
else kr_path="${5:-/etc/pki/rpm-gpg/${repo_name}.gpg}" ; fi

mkdir -p "$(dirname "${kr_path}")"

curl -fsS --retry-connrefused --retry 10 --retry-max-time 30 "${signing_key_url}" \
| gpg --import --no-default-keyring --keyring "${kr_path}"

if is_debuntu ; then apt_add_repo "${repo_name}" "${signing_key_url}" "${repo_data}" "${4:-yes}" "${kr_path}" "${6:-}"
else dnf_add_repo "${repo_name}" "${signing_key_url}" "${repo_data}" "${4:-yes}" "${kr_path}" "${6:-}" ; fi
}
33 changes: 0 additions & 33 deletions templates/common/yarn_functions
Original file line number Diff line number Diff line change
@@ -1,17 +1,3 @@
function configure_yarn_resources() {
if [[ ! -d "${HADOOP_CONF_DIR}" ]] ; then return 0 ; fi # pre-init scripts
if [[ ! -f "${HADOOP_CONF_DIR}/resource-types.xml" ]]; then
printf '<?xml version="1.0" ?>\n<configuration/>' >"${HADOOP_CONF_DIR}/resource-types.xml"
fi
set_hadoop_property 'resource-types.xml' 'yarn.resource-types' 'yarn.io/gpu'

set_hadoop_property 'capacity-scheduler.xml' \
'yarn.scheduler.capacity.resource-calculator' \
'org.apache.hadoop.yarn.util.resource.DominantResourceCalculator'

set_hadoop_property 'yarn-site.xml' 'yarn.resource-types' 'yarn.io/gpu'
}

# This configuration should be applied only if GPU is attached to the node
function configure_yarn_nodemanager() {
set_hadoop_property 'yarn-site.xml' \
Expand All @@ -37,25 +23,6 @@ function configure_yarn_nodemanager() {
fi
}

function setup_gpu_yarn() {
# This configuration should be run on all nodes
# regardless if they have attached GPUs
configure_yarn_resources

# When there is no GPU, but the installer is executing on a master node:
if [[ "${gpu_count}" == "0" ]] ; then
if [[ "${ROLE}" == "Master" ]]; then
configure_yarn_nodemanager
fi
return 0
fi

install_nvidia_container_toolkit
configure_yarn_nodemanager_gpu
configure_gpu_script
configure_gpu_isolation
}

function yarn_exit_handler() {
# Restart YARN services if they are running already
for svc in resourcemanager nodemanager; do
Expand Down
2 changes: 1 addition & 1 deletion templates/dask/dask.sh.in
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ function main() {

configure_knox_for_dask

local DASK_CLOUD_LOGGING="$(get_metadata_attribute dask-cloud-logging || echo 'false')"
local DASK_CLOUD_LOGGING="$(get_metadata_attribute dask-cloud-logging 'false')"
if [[ "${DASK_CLOUD_LOGGING}" == "true" ]]; then
configure_fluentd_for_dask
fi
Expand Down
18 changes: 10 additions & 8 deletions templates/dask/util_functions
Original file line number Diff line number Diff line change
Expand Up @@ -423,15 +423,16 @@ function install_dask_rapids() {
local dask_version="2024.7"
local dask_spec="dask>=${dask_version}"

if is_cuda12 ; then
local python_spec="python>=3.11"
local cuda_spec="cuda-version>=12,<13"
elif is_cuda11 ; then
local python_spec="python>=3.9"
local cuda_spec="cuda-version>=11,<12.0a0"
local python_spec="python>=3.11"
local cuda_spec="cuda-version>=12,<13"
local cudart_spec="cuda-cudart"
if is_cuda11 ; then
python_spec="python>=3.9"
cuda_spec="cuda-version>=11,<12.0a0"
cudart_spec="cudatoolkit"
fi

rapids_spec="rapids>=${RAPIDS_VERSION}"
local rapids_spec="rapids>=${RAPIDS_VERSION}"
CONDA_PACKAGES=()
local cache_key_name="dask-rapids-${RAPIDS_VERSION}"
if [[ "${DASK_RUNTIME}" == 'yarn' ]]; then
Expand All @@ -443,7 +444,7 @@ function install_dask_rapids() {
# https://github.com/dask/dask-yarn/issues/155

dask_spec="dask<2022.2"
python_spec="python>=3.7,<3.8.0a0"
python_spec="python>=3.9"
rapids_spec="rapids<=${rapids_version}"
if is_ubuntu18 ; then
# the libuuid.so.1 distributed with fiona 1.8.22 dumps core when calling uuid_generate_time_generic
Expand All @@ -454,6 +455,7 @@ function install_dask_rapids() {

CONDA_PACKAGES+=(
"${cuda_spec}"
"${cudart_spec}"
"${rapids_spec}"
"${dask_spec}"
"dask-bigquery"
Expand Down
Loading

0 comments on commit 88f9f7f

Please sign in to comment.