Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Run tests and build Python wheels for aarch64 architecture #3948

Merged
merged 6 commits into from
Feb 16, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion .ci/setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,12 @@ else # Linux
cmake
fi
if [[ $SETUP_CONDA != "false" ]]; then
curl -sL -o conda.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
ARCH=$(uname -m)
if [[ $ARCH == "x86_64" ]]; then
curl -sL -o conda.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
else
curl -sL -o conda.sh https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-Linux-${ARCH}.sh
fi
fi
fi

Expand Down
10 changes: 8 additions & 2 deletions .ci/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -126,9 +126,15 @@ elif [[ $TASK == "bdist" ]]; then
cp dist/lightgbm-$LGB_VER-py3-none-macosx*.whl $BUILD_ARTIFACTSTAGINGDIRECTORY
fi
else
cd $BUILD_DIRECTORY/python-package && python setup.py bdist_wheel --plat-name=manylinux1_x86_64 --python-tag py3 || exit -1
ARCH=$(uname -m)
if [[ $ARCH == "x86_64" ]]; then
PLATFORM="manylinux1_x86_64"
else
PLATFORM="manylinux2014_$ARCH"
fi
cd $BUILD_DIRECTORY/python-package && python setup.py bdist_wheel --plat-name=$PLATFORM --python-tag py3 || exit -1
if [[ $PRODUCES_ARTIFACTS == "true" ]]; then
cp dist/lightgbm-$LGB_VER-py3-none-manylinux1_x86_64.whl $BUILD_ARTIFACTSTAGINGDIRECTORY
cp dist/lightgbm-$LGB_VER-py3-none-$PLATFORM.whl $BUILD_ARTIFACTSTAGINGDIRECTORY
fi
fi
pip install --user $BUILD_DIRECTORY/python-package/dist/*.whl || exit -1
Expand Down
63 changes: 63 additions & 0 deletions .vsts-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,68 @@ jobs:
- bash: $(Build.SourcesDirectory)/.ci/test.sh
displayName: Test
###########################################
- job: QEMU_multiarch
###########################################
variables:
COMPILER: gcc
OS_NAME: 'linux'
PRODUCES_ARTIFACTS: 'true'
pool:
vmImage: ubuntu-latest
timeoutInMinutes: 120
strategy:
matrix:
bdist:
TASK: bdist
ARCH: aarch64
steps:
- script: |
sudo apt-get update
sudo apt-get install --no-install-recommends -y \
binfmt-support \
qemu \
qemu-user \
qemu-user-static
displayName: 'Install QEMU'
- script: |
docker run --rm --privileged multiarch/qemu-user-static --reset -p yes
displayName: 'Enable Docker multi-architecture support'
- script: |
export ROOT_DOCKER_FOLDER=/LightGBM
cat > docker.env <<EOF
AZURE=$AZURE
OS_NAME=$OS_NAME
COMPILER=$COMPILER
TASK=$TASK
METHOD=$METHOD
CONDA_ENV=$CONDA_ENV
PYTHON_VERSION=$PYTHON_VERSION
BUILD_DIRECTORY=$ROOT_DOCKER_FOLDER
LGB_VER=$(head -n 1 VERSION.txt)
PRODUCES_ARTIFACTS=$PRODUCES_ARTIFACTS
BUILD_ARTIFACTSTAGINGDIRECTORY=$BUILD_ARTIFACTSTAGINGDIRECTORY
EOF
cat > docker-script.sh <<EOF
export CONDA=\$HOME/miniconda
export PATH=\$CONDA/bin:\$PATH
$ROOT_DOCKER_FOLDER/.ci/setup.sh || exit -1
$ROOT_DOCKER_FOLDER/.ci/test.sh || exit -1
EOF
docker run \
--rm \
--env-file docker.env \
-v "$(Build.SourcesDirectory)":"$ROOT_DOCKER_FOLDER" \
-v "$(Build.ArtifactStagingDirectory)":"$(Build.ArtifactStagingDirectory)" \
"quay.io/pypa/manylinux2014_$ARCH" \
/bin/bash $ROOT_DOCKER_FOLDER/docker-script.sh
displayName: 'Setup and run tests'
- task: PublishBuildArtifacts@1
condition: and(succeeded(), in(variables['TASK'], 'bdist'), not(startsWith(variables['Build.SourceBranch'], 'refs/pull/')))
inputs:
pathtoPublish: '$(Build.ArtifactStagingDirectory)'
artifactName: PackageAssets
artifactType: container
###########################################
- job: MacOS
###########################################
variables:
Expand Down Expand Up @@ -219,6 +281,7 @@ jobs:
dependsOn:
- Linux
- Linux_latest
- QEMU_multiarch
- MacOS
- Windows
condition: and(succeeded(), not(startsWith(variables['Build.SourceBranch'], 'refs/pull/')))
Expand Down
4 changes: 3 additions & 1 deletion tests/python_package_test/test_dask.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import pickle
import socket
from itertools import groupby
from platform import machine
from os import getenv
from sys import platform

Expand Down Expand Up @@ -43,7 +44,8 @@

pytestmark = [
pytest.mark.skipif(getenv('TASK', '') == 'mpi', reason='Fails to run with MPI interface'),
pytest.mark.skipif(getenv('TASK', '') == 'gpu', reason='Fails to run with GPU interface')
pytest.mark.skipif(getenv('TASK', '') == 'gpu', reason='Fails to run with GPU interface'),
pytest.mark.skipif(machine() != 'x86_64', reason='Fails to run with non-x86_64 architecture')
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In what way did the Dask tests fail?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't remember, sorry! I marked Dask tests to be skipped at the very begging of enabling QEMU and cannot find logs for runs where they were enabled. I think we can investigate Dask support for aarch64 in a follow-up PR. WDYT?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yep it's ok, can be a follow-up

]


Expand Down
21 changes: 17 additions & 4 deletions tests/python_package_test/test_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import math
import os
import pickle
import platform
import random

import numpy as np
Expand Down Expand Up @@ -1047,15 +1048,21 @@ def test_contribs_sparse():
# convert data to dense and get back same contribs
contribs_dense = gbm.predict(X_test.toarray(), pred_contrib=True)
# validate the values are the same
np.testing.assert_allclose(contribs_csr.toarray(), contribs_dense)
if platform.machine() == 'aarch64':
np.testing.assert_allclose(contribs_csr.toarray(), contribs_dense, rtol=1, atol=1e-12)
else:
np.testing.assert_allclose(contribs_csr.toarray(), contribs_dense)
Comment on lines +1051 to +1054
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sometimes contribs_sparse tests are failing, so need to relax assertion tolerance

>       np.testing.assert_allclose(contribs_csr_arr_re, contribs_dense)
E       AssertionError: 
E       Not equal to tolerance rtol=1e-07, atol=0
E       
E       Mismatched elements: 20 / 840 (2.38%)
E       Max absolute difference: 2.22044605e-16
E       Max relative difference: 1.
E        x: array([[ 0.000000e+00,  0.000000e+00,  0.000000e+00,  0.000000e+00,
E               -3.840704e-02, -1.261461e-01, -3.519505e-02,  1.271551e-04,
E                0.000000e+00, -2.690382e-04, -1.218832e-01,  1.221108e-04,...
E        y: array([[ 0.000000e+00,  0.000000e+00, -4.847735e-17,  0.000000e+00,
E               -3.840704e-02, -1.261461e-01, -3.519505e-02,  1.271551e-04,
E                0.000000e+00, -2.690382e-04, -1.218832e-01,  1.221108e-04,...

assert (np.linalg.norm(gbm.predict(X_test, raw_score=True)
- np.sum(contribs_dense, axis=1)) < 1e-4)
# validate using CSC matrix
X_test_csc = X_test.tocsc()
contribs_csc = gbm.predict(X_test_csc, pred_contrib=True)
assert isspmatrix_csc(contribs_csc)
# validate the values are the same
np.testing.assert_allclose(contribs_csc.toarray(), contribs_dense)
if platform.machine() == 'aarch64':
np.testing.assert_allclose(contribs_csc.toarray(), contribs_dense, rtol=1, atol=1e-12)
else:
np.testing.assert_allclose(contribs_csc.toarray(), contribs_dense)


def test_contribs_sparse_multiclass():
Expand Down Expand Up @@ -1087,7 +1094,10 @@ def test_contribs_sparse_multiclass():
contribs_csr_array = np.swapaxes(np.array([sparse_array.todense() for sparse_array in contribs_csr]), 0, 1)
contribs_csr_arr_re = contribs_csr_array.reshape((contribs_csr_array.shape[0],
contribs_csr_array.shape[1] * contribs_csr_array.shape[2]))
np.testing.assert_allclose(contribs_csr_arr_re, contribs_dense)
if platform.machine() == 'aarch64':
np.testing.assert_allclose(contribs_csr_arr_re, contribs_dense, rtol=1, atol=1e-12)
else:
np.testing.assert_allclose(contribs_csr_arr_re, contribs_dense)
contribs_dense_re = contribs_dense.reshape(contribs_csr_array.shape)
assert np.linalg.norm(gbm.predict(X_test, raw_score=True) - np.sum(contribs_dense_re, axis=2)) < 1e-4
# validate using CSC matrix
Expand All @@ -1100,7 +1110,10 @@ def test_contribs_sparse_multiclass():
contribs_csc_array = np.swapaxes(np.array([sparse_array.todense() for sparse_array in contribs_csc]), 0, 1)
contribs_csc_array = contribs_csc_array.reshape((contribs_csc_array.shape[0],
contribs_csc_array.shape[1] * contribs_csc_array.shape[2]))
np.testing.assert_allclose(contribs_csc_array, contribs_dense)
if platform.machine() == 'aarch64':
np.testing.assert_allclose(contribs_csc_array, contribs_dense, rtol=1, atol=1e-12)
else:
np.testing.assert_allclose(contribs_csc_array, contribs_dense)


@pytest.mark.skipif(psutil.virtual_memory().available / 1024 / 1024 / 1024 < 3, reason='not enough RAM')
Expand Down