diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index bf3ac53..7589ce4 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -49,9 +49,26 @@ jobs: - name: Build and test run: mvn test -B + # The skipped tests is because h2o-xgboost isn't ready to use with musl + - name: Test with musl + run: | + docker run --rm -t -v ~/.m2:/root/.m2 -v $(pwd):/feedzai-openml-java -e FDZ_OPENML_JAVA_LIBC="musl" alpine:3.18.4 \ + /bin/sh -c 'apk add clang openjdk8 maven bash git && \ + git config --global --add safe.directory /feedzai-openml-java && \ + git config --global --add safe.directory /feedzai-openml-java/openml-lightgbm/lightgbm-builder/make-lightgbm && \ + cd /feedzai-openml-java && \ + cp openml-lightgbm/lightgbm-builder/make-lightgbm/build/amd64/musl/lib_lightgbm.so /lib/lib_lightgbm.so && \ + mvn test -B -Dsurefire.failIfNoSpecifiedTests=false -Dtest=!ClassifyUnknownCategoryTest#test,!H2OModelProviderTrainTest#trainModelsForAllAlgorithms' + # The skipped tests is because h2o-xgboost isn't ready to use on arm64 - name: Test on arm64 - run: docker run --rm --platform=arm64 -t -v ~/.m2:/root/.m2 -v $(pwd):/feedzai-openml-java maven:3.8-openjdk-8-slim /bin/bash -c 'apt update && apt install -y --no-install-recommends git && git config --global --add safe.directory /feedzai-openml-java && git config --global --add safe.directory /feedzai-openml-java/openml-lightgbm/lightgbm-builder/make-lightgbm && cd /feedzai-openml-java && mvn test -B -DfailIfNoTests=false -Dtest=!ClassifyUnknownCategoryTest#test,!H2OModelProviderTrainTest#trainModelsForAllAlgorithms' + run: | + docker run --rm --platform=arm64 -t -v ~/.m2:/root/.m2 -v $(pwd):/feedzai-openml-java maven:3.8-openjdk-8-slim \ + /bin/bash -c 'apt update && apt install -y --no-install-recommends git && \ + git config --global --add safe.directory /feedzai-openml-java && \ + git config --global --add safe.directory /feedzai-openml-java/openml-lightgbm/lightgbm-builder/make-lightgbm && \ + cd /feedzai-openml-java && \ + mvn test -B -Dsurefire.failIfNoSpecifiedTests=false -Dtest=!ClassifyUnknownCategoryTest#test,!H2OModelProviderTrainTest#trainModelsForAllAlgorithms' - uses: codecov/codecov-action@v1 with: diff --git a/README.md b/README.md index 337226e..6addcb6 100644 --- a/README.md +++ b/README.md @@ -20,6 +20,9 @@ If the build fails compiling for ARM64 you may need to run: docker run --rm --privileged multiarch/qemu-user-static --reset -p yes ``` +To use the models on Operating Systems with musl (supported only for AMD64 architectures), the ENV variable `FDZ_OPENML_JAVA_LIBC` +must be set to `musl`. This variable can take the values `glibc` and `musl`. H2O-xgboost can't be used with musl. + ## Releasing For all releases, as the hotfix branch is ready all that's needed to actually release is to create an annotated tag pointing to the hotfix branch head. diff --git a/openml-lightgbm/lightgbm-builder/make-lightgbm b/openml-lightgbm/lightgbm-builder/make-lightgbm index 86f4579..fdb7fde 160000 --- a/openml-lightgbm/lightgbm-builder/make-lightgbm +++ b/openml-lightgbm/lightgbm-builder/make-lightgbm @@ -1 +1 @@ -Subproject commit 86f4579ed4a3596b66c1a34dc77321a1fdb44899 +Subproject commit fdb7fde9814c4f55b0cb5a2add7d0a1bdb3ed0c0 diff --git a/openml-lightgbm/lightgbm-builder/pom.xml b/openml-lightgbm/lightgbm-builder/pom.xml index 76eacf0..d1c686e 100644 --- a/openml-lightgbm/lightgbm-builder/pom.xml +++ b/openml-lightgbm/lightgbm-builder/pom.xml @@ -15,7 +15,7 @@ com.feedzai.openml.lightgbm lightgbm-lib - v0.1.0 + v0.9.14 jar Openml LightGBM lib @@ -33,8 +33,8 @@ https://github.com/feedzai/fairgbm.git - v0.1.0 - v0.1.0 + v0.9.14 + v0.9.14 diff --git a/openml-lightgbm/lightgbm-provider/pom.xml b/openml-lightgbm/lightgbm-provider/pom.xml index 998c84b..2ba9482 100644 --- a/openml-lightgbm/lightgbm-provider/pom.xml +++ b/openml-lightgbm/lightgbm-provider/pom.xml @@ -25,7 +25,7 @@ OpenML LightGBM Machine Learning Model and Classifier provider - v0.1.0 + v0.9.14 diff --git a/openml-lightgbm/lightgbm-provider/src/main/java/com/feedzai/openml/provider/lightgbm/Infrastructure.java b/openml-lightgbm/lightgbm-provider/src/main/java/com/feedzai/openml/provider/lightgbm/Infrastructure.java new file mode 100644 index 0000000..6b78737 --- /dev/null +++ b/openml-lightgbm/lightgbm-provider/src/main/java/com/feedzai/openml/provider/lightgbm/Infrastructure.java @@ -0,0 +1,53 @@ +package com.feedzai.openml.provider.lightgbm; + +import java.io.IOException; + + +/** + * Enum that represents the infrastructure where code is running and consequent lgbm native libs locations. + */ +public class Infrastructure { + + /** + * The CPU architecture used. + */ + private final CpuArchitecture cpuArchitecture; + + /** + * The libc implementation available. + */ + private final LibcImplementation libcImpl; + + public Infrastructure(final CpuArchitecture cpuArchitecture, final LibcImplementation libcImpl) { + this.cpuArchitecture = cpuArchitecture; + this.libcImpl = libcImpl; + } + + @Override + public String toString() { + return "Infrastructure{" + + "cpuArchitecture=" + cpuArchitecture + + ", libcImpl=" + libcImpl + + '}'; + } + + /** + * Gets the native libraries folder name according to the cpu architecture and libc implementation. + * + * @return the native libraries folder name. + */ + public String getLgbmNativeLibsFolder() throws IOException { + + switch (cpuArchitecture) { + case AARCH64: + if (libcImpl == LibcImplementation.MUSL) { + throw new IOException("Trying to use LightGBM on a musl-based OS with unsupported arm64 architecture."); + } + return cpuArchitecture.getLgbmNativeLibsFolder() + "/"; + case AMD64: + return cpuArchitecture.getLgbmNativeLibsFolder() + "/" + libcImpl.getLibcImpl() + "/"; + default: + throw new IllegalStateException("Unexpected value: " + cpuArchitecture); + } + } +} diff --git a/openml-lightgbm/lightgbm-provider/src/main/java/com/feedzai/openml/provider/lightgbm/LibcImplementation.java b/openml-lightgbm/lightgbm-provider/src/main/java/com/feedzai/openml/provider/lightgbm/LibcImplementation.java new file mode 100644 index 0000000..873b745 --- /dev/null +++ b/openml-lightgbm/lightgbm-provider/src/main/java/com/feedzai/openml/provider/lightgbm/LibcImplementation.java @@ -0,0 +1,27 @@ +package com.feedzai.openml.provider.lightgbm; + +/** + * Enum that represents the libc implementation available on the machine and consequent lgbm native libs locations. + */ +public enum LibcImplementation { + MUSL("musl"), + GLIBC("glibc"); + + /** + * This is the name of available libc implementation and indicates the folder where the lightgbm native libraries are. + */ + private final String libcImpl; + + LibcImplementation(final String libcImpl){ + this.libcImpl = libcImpl; + } + + /** + * Gets the native libraries folder name according to the libc implementation. + * + * @return the native libraries folder name according to the libc implementation. + */ + public String getLibcImpl() { + return libcImpl; + } +} diff --git a/openml-lightgbm/lightgbm-provider/src/main/java/com/feedzai/openml/provider/lightgbm/LightGBMUtils.java b/openml-lightgbm/lightgbm-provider/src/main/java/com/feedzai/openml/provider/lightgbm/LightGBMUtils.java index 9636768..d6e8bb8 100644 --- a/openml-lightgbm/lightgbm-provider/src/main/java/com/feedzai/openml/provider/lightgbm/LightGBMUtils.java +++ b/openml-lightgbm/lightgbm-provider/src/main/java/com/feedzai/openml/provider/lightgbm/LightGBMUtils.java @@ -46,6 +46,11 @@ public class LightGBMUtils { */ static final int BINARY_LGBM_NUM_CLASSES = 1; + /** + * Environment variable with the implementation of libc available on the system. + */ + private static final String FDZ_OPENML_JAVA_LIBC = "FDZ_OPENML_JAVA_LIBC"; + /** * State variable to know if it loadLibs was ever called. */ @@ -60,10 +65,13 @@ static public void loadLibs() { if (!libsLoaded) { final CpuArchitecture cpuArchitecture = getCpuArchitecture(System.getProperty("os.arch")); + final LibcImplementation libcImpl = getLibcImplementation(System.getenv(FDZ_OPENML_JAVA_LIBC)); + final Infrastructure infrastructure = new Infrastructure(cpuArchitecture, libcImpl); + try { - loadSharedLibraryFromJar("libgomp.so.1.0.0", cpuArchitecture); - loadSharedLibraryFromJar("lib_lightgbm.so", cpuArchitecture); - loadSharedLibraryFromJar("lib_lightgbm_swig.so", cpuArchitecture); + loadSharedLibraryFromJar("libgomp.so.1.0.0", infrastructure); + loadSharedLibraryFromJar("lib_lightgbm.so", infrastructure); + loadSharedLibraryFromJar("lib_lightgbm_swig.so", infrastructure); } catch (final IOException ex) { throw new RuntimeException("Failed to load LightGBM shared libraries from jar.", ex); } @@ -82,22 +90,37 @@ static CpuArchitecture getCpuArchitecture(final String cpuArchName) { } } + protected static LibcImplementation getLibcImplementation(final String libcImpl) { + if (libcImpl == null || libcImpl.isEmpty()) { + logger.debug("{} not set, assuming glibc as libc implementation.", FDZ_OPENML_JAVA_LIBC); + return LibcImplementation.GLIBC; + } + + try { + return LibcImplementation.valueOf(libcImpl.toUpperCase().trim()); + } catch (final IllegalArgumentException ex) { + logger.error("Trying to use LightGBM with an unsupported libc implementation {}.", libcImpl, ex); + throw ex; + } + } + /** * Loads a single shared library from the Jar. * * @param sharedLibResourceName library "filename" inside the jar. - * @param cpuArchitecture cpu architecture. + * @param infrastructure infrastructure used composed of CPU architecture and libc implementation. * @throws IOException if any error happens loading the library. */ static private void loadSharedLibraryFromJar( final String sharedLibResourceName, - final CpuArchitecture cpuArchitecture + final Infrastructure infrastructure ) throws IOException { - logger.debug("Loading LightGBM shared lib: {} for {}.", sharedLibResourceName, cpuArchitecture); + logger.debug("Loading LightGBM shared lib: {} for {}.", sharedLibResourceName, infrastructure); + final String libraryPath = infrastructure.getLgbmNativeLibsFolder() + sharedLibResourceName; final InputStream inputStream = LightGBMUtils.class.getClassLoader() - .getResourceAsStream(cpuArchitecture.getLgbmNativeLibsFolder() + "/" + sharedLibResourceName); + .getResourceAsStream(libraryPath); final File tempFile = File.createTempFile("lib", ".so"); final OutputStream outputStream = new FileOutputStream(tempFile); diff --git a/openml-lightgbm/lightgbm-provider/src/test/java/com/feedzai/openml/provider/lightgbm/TestInfrastructure.java b/openml-lightgbm/lightgbm-provider/src/test/java/com/feedzai/openml/provider/lightgbm/TestInfrastructure.java new file mode 100644 index 0000000..0bc867b --- /dev/null +++ b/openml-lightgbm/lightgbm-provider/src/test/java/com/feedzai/openml/provider/lightgbm/TestInfrastructure.java @@ -0,0 +1,45 @@ +/* + * The copyright of this file belongs to Feedzai. The file cannot be + * reproduced in whole or in part, stored in a retrieval system, + * transmitted in any form, or by any means electronic, mechanical, + * photocopying, or otherwise, without the prior permission of the owner. + * + * © 2023 Feedzai, Strictly Confidential + */ +package com.feedzai.openml.provider.lightgbm; + +import org.assertj.core.api.Assertions; +import org.junit.Test; + +import java.io.IOException; + +/** + * Tests the retrieval of native libs folder path for Infrastructure. + * + * @author Renato Azevedo (renato.azevedo@feedzai.com) + */ +public class TestInfrastructure { + @Test + public void unknownInfrastructureCombination() { + Infrastructure infrastructure = new Infrastructure(CpuArchitecture.AARCH64, LibcImplementation.MUSL); + + Assertions.assertThatThrownBy(infrastructure::getLgbmNativeLibsFolder) + .isInstanceOf(IOException.class); + } + + @Test + public void knowsCorrectInfrastructureCombination() throws IOException { + + Infrastructure infra_arm64_glibc = new Infrastructure(CpuArchitecture.AARCH64, LibcImplementation.GLIBC); + Assertions.assertThat(infra_arm64_glibc.getLgbmNativeLibsFolder()) + .isEqualTo("arm64/"); + + Infrastructure infra_amd64_glibc = new Infrastructure(CpuArchitecture.AMD64, LibcImplementation.GLIBC); + Assertions.assertThat(infra_amd64_glibc.getLgbmNativeLibsFolder()) + .isEqualTo("amd64/glibc/"); + + Infrastructure infra_amd64_musl = new Infrastructure(CpuArchitecture.AMD64, LibcImplementation.MUSL); + Assertions.assertThat(infra_amd64_musl.getLgbmNativeLibsFolder()) + .isEqualTo("amd64/musl/"); + } +} diff --git a/openml-lightgbm/lightgbm-provider/src/test/java/com/feedzai/openml/provider/lightgbm/TestLibcImplementation.java b/openml-lightgbm/lightgbm-provider/src/test/java/com/feedzai/openml/provider/lightgbm/TestLibcImplementation.java new file mode 100644 index 0000000..22b8718 --- /dev/null +++ b/openml-lightgbm/lightgbm-provider/src/test/java/com/feedzai/openml/provider/lightgbm/TestLibcImplementation.java @@ -0,0 +1,43 @@ +/* + * The copyright of this file belongs to Feedzai. The file cannot be + * reproduced in whole or in part, stored in a retrieval system, + * transmitted in any form, or by any means electronic, mechanical, + * photocopying, or otherwise, without the prior permission of the owner. + * + * © 2023 Feedzai, Strictly Confidential + */ +package com.feedzai.openml.provider.lightgbm; + +import org.assertj.core.api.Assertions; +import org.junit.Test; + +/** + * Tests the retrieval of libc implementation. + * + * @author Renato Azevedo (renato.azevedo@feedzai.com) + */ +public class TestLibcImplementation { + @Test + public void unknownLibcImplementationThrowsException() { + Assertions.assertThatThrownBy(() -> LightGBMUtils.getLibcImplementation("klibc")) + .isInstanceOf(IllegalArgumentException.class); + } + + @Test + public void defaultLibcImplementationIsGlibc() { + Assertions.assertThat(LightGBMUtils.getLibcImplementation("")) + .isEqualTo(LibcImplementation.GLIBC); + + Assertions.assertThat(LightGBMUtils.getLibcImplementation(null)) + .isEqualTo(LibcImplementation.GLIBC); + } + + @Test + public void canReadKnownLibcImplementation() { + Assertions.assertThat(LightGBMUtils.getLibcImplementation("glibc")) + .isEqualTo(LibcImplementation.GLIBC); + + Assertions.assertThat(LightGBMUtils.getLibcImplementation("musl")) + .isEqualTo(LibcImplementation.MUSL); + } +} diff --git a/pom.xml b/pom.xml index 7c118e7..56ba97e 100644 --- a/pom.xml +++ b/pom.xml @@ -319,6 +319,10 @@ + + org.apache.maven.plugins + maven-surefire-plugin +