diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6f582db8b..43f97c2e2 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -29,30 +29,22 @@ concurrency: cancel-in-progress: true jobs: - GraphAr-on-ubuntu: - runs-on: ubuntu-20.04 + GraphAr-ubuntu-arrow-installed: + runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 with: submodules: true - - name: Cache for ccache - uses: actions/cache@v3 - with: - path: ~/.ccache - key: ${{ matrix.os }}-build-ccache-${{ hashFiles('**/git-modules.txt') }} - restore-keys: | - ${{ matrix.os }}-build-ccache- - - name: Install dependencies run: | # install the latest arrow deb to test arrow wget -c https://apache.jfrog.io/artifactory/arrow/"$(lsb_release --id --short | tr 'A-Z' 'a-z')"/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb \ -P /tmp/ - sudo apt-get install -y -V /tmp/apache-arrow-apt-source-latest-"$(lsb_release --codename --short)".deb + sudo apt-get install -y /tmp/apache-arrow-apt-source-latest-"$(lsb_release --codename --short)".deb sudo apt-get update -y - sudo apt-get install -y libarrow-dev + sudo apt install -y libarrow-dev libarrow-dataset-dev libarrow-acero-dev libparquet-dev sudo apt-get install -y libboost-graph-dev ccache libcurl4-openssl-dev - name: CMake @@ -111,6 +103,46 @@ jobs: popd + - name: Build GraphAr + run: | + pushd build + make -j$(nproc) + popd + + - name: Test + run: | + cd build + export GAR_TEST_DATA=$PWD/../testing/ + make test + + GraphAr-ubuntu-arrow-from-source: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + with: + submodules: true + + - name: Cache for ccache + uses: actions/cache@v3 + with: + path: ~/.ccache + key: ${{ matrix.os }}-build-ccache-${{ hashFiles('**/git-modules.txt') }} + restore-keys: | + ${{ matrix.os }}-build-ccache- + + - name: Install dependencies + run: | + + sudo apt-get update -y + sudo apt-get install -y libboost-graph-dev ccache libcurl4-openssl-dev + + - name: CMake + run: | + mkdir build + pushd build + cmake ../cpp -DCMAKE_BUILD_TYPE=Debug -DBUILD_TESTS=ON -DBUILD_EXAMPLES=ON -DBUILD_ARROW_FROM_SOURCE=ON + popd + - name: Build GraphAr run: | pushd build @@ -124,8 +156,8 @@ jobs: export GAR_TEST_DATA=$PWD/../testing/ make test - GraphAr-on-centos8: - runs-on: ubuntu-22.04 + GraphAr-centos8-arrow-from-source: + runs-on: ubuntu-latest container: image: centos:latest steps: @@ -145,6 +177,6 @@ jobs: run: | mkdir build pushd build - cmake ../cpp + cmake ../cpp -DBUILD_ARROW_FROM_SOURCE=ON make -j$(nproc) popd diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 37e0e2835..cb2f1b357 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -26,6 +26,7 @@ project(graph-archive LANGUAGES C CXX VERSION ${GAR_VERSION}) option(NAMESPACE "User specific namespace, default if GraphArchive" OFF) option(BUILD_TESTS "Build unit tests" OFF) option(BUILD_EXAMPLES "Build examples" OFF) +option(BUILD_ARROW_FROM_SOURCE "Build Arrow from source (ON) or use system-installed Arrow (OFF)" OFF) if (NAMESPACE) add_definitions(-DGAR_NAMESPACE=${NAMESPACE}) @@ -159,8 +160,16 @@ if(OPENSSL_FOUND) endif() endif() -include(apache-arrow) -build_arrow() +if(BUILD_ARROW_FROM_SOURCE) + include(apache-arrow) + build_arrow() +else() + find_package(Arrow REQUIRED) + find_package(ArrowDataset REQUIRED) + find_package(ArrowAcero REQUIRED) + find_package(Parquet REQUIRED) +endif() + macro(get_target_location var target) if(TARGET ${target}) @@ -185,21 +194,37 @@ macro(build_gar) $ $ ) - target_include_directories(gar SYSTEM BEFORE PRIVATE ${GAR_ARROW_INCLUDE_DIR}) + if(BUILD_ARROW_FROM_SOURCE) + target_include_directories(gar SYSTEM BEFORE PRIVATE ${GAR_ARROW_INCLUDE_DIR}) + endif() target_link_libraries(gar PRIVATE Threads::Threads ${CMAKE_DL_LIBS}) if(APPLE) - target_link_libraries(gar PRIVATE -Wl,-force_load gar_arrow_static - "${GAR_PARQUET_STATIC_LIB}" - "${GAR_DATASET_STATIC_LIB}" - "${GAR_ACERO_STATIC_LIB}" - "${GAR_ARROW_BUNDLED_DEPS_STATIC_LIB}") + if(BUILD_ARROW_FROM_SOURCE) + target_link_libraries(gar PRIVATE -Wl,-force_load gar_arrow_static + "${GAR_PARQUET_STATIC_LIB}" + "${GAR_DATASET_STATIC_LIB}" + "${GAR_ACERO_STATIC_LIB}" + "${GAR_ARROW_BUNDLED_DEPS_STATIC_LIB}") + else() + target_link_libraries(gar PRIVATE -Wl,-force_load Arrow::arrow_static + Parquet::parquet_static + ArrowDataset::arrow_dataset_static + ArrowAcero::arrow_acero_static) + endif() else() - target_link_libraries(gar PRIVATE -Wl,--exclude-libs,ALL -Wl,--whole-archive gar_arrow_static - "${GAR_PARQUET_STATIC_LIB}" - "${GAR_DATASET_STATIC_LIB}" - "${GAR_ARROW_ACERO_STATIC_LIB}" - "${GAR_ARROW_BUNDLED_DEPS_STATIC_LIB}" -Wl,--no-whole-archive) + if(BUILD_ARROW_FROM_SOURCE) + target_link_libraries(gar PRIVATE -Wl,--exclude-libs,ALL -Wl,--whole-archive gar_arrow_static + "${GAR_PARQUET_STATIC_LIB}" + "${GAR_DATASET_STATIC_LIB}" + "${GAR_ARROW_ACERO_STATIC_LIB}" + "${GAR_ARROW_BUNDLED_DEPS_STATIC_LIB}" -Wl,--no-whole-archive) + else() + target_link_libraries(gar PRIVATE -Wl,--exclude-libs,ALL -Wl,--whole-archive Arrow::arrow_static + Parquet::parquet_static + ArrowDataset::arrow_dataset_static + ArrowAcero::arrow_acero_static -Wl,--no-whole-archive) + endif() endif() # if OpenSSL library exists, link the OpenSSL library. @@ -231,16 +256,28 @@ if (BUILD_EXAMPLES) add_executable(${E_NAME} examples/${E_NAME}.cc) target_include_directories(${E_NAME} PRIVATE examples ${PROJECT_SOURCE_DIR}/include $) target_include_directories(${E_NAME} SYSTEM PRIVATE ${Boost_INCLUDE_DIRS}) - target_include_directories(${E_NAME} SYSTEM BEFORE PRIVATE ${GAR_ARROW_INCLUDE_DIR}) + if(BUILD_ARROW_FROM_SOURCE) + target_include_directories(${E_NAME} SYSTEM BEFORE PRIVATE ${GAR_ARROW_INCLUDE_DIR}) + endif() target_link_libraries(${E_NAME} PRIVATE gar ${Boost_LIBRARIES} Threads::Threads ${CMAKE_DL_LIBS}) if(APPLE) - target_link_libraries(${E_NAME} PRIVATE -Wl,-force_load gar_arrow_static - "${GAR_PARQUET_STATIC_LIB}" - "${GAR_ARROW_BUNDLED_DEPS_STATIC_LIB}") + if(BUILD_ARROW_FROM_SOURCE) + target_link_libraries(${E_NAME} PRIVATE gar_arrow_static + "${GAR_PARQUET_STATIC_LIB}" + "${GAR_ARROW_BUNDLED_DEPS_STATIC_LIB}") + else() + target_link_libraries(${E_NAME} PRIVATE Arrow::arrow_static + Parquet::parquet_static) + endif() else() - target_link_libraries(${E_NAME} PRIVATE -Wl,--exclude-libs,ALL -Wl,--whole-archive gar_arrow_static - "${GAR_PARQUET_STATIC_LIB}" - "${GAR_ARROW_BUNDLED_DEPS_STATIC_LIB}" -Wl,--no-whole-archive) + if(BUILD_ARROW_FROM_SOURCE) + target_link_libraries(${E_NAME} PRIVATE gar_arrow_static + "${GAR_PARQUET_STATIC_LIB}" + "${GAR_ARROW_BUNDLED_DEPS_STATIC_LIB}") + else() + target_link_libraries(${E_NAME} PRIVATE Arrow::arrow_static + Parquet::parquet_static) + endif() endif() # if OpenSSL library exists, link the OpenSSL library. @@ -300,15 +337,28 @@ if (BUILD_TESTS) cmake_parse_arguments(add_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) add_executable(${target} ${add_test_SRCS}) target_compile_features(${target} PRIVATE cxx_std_17) + if(BUILD_ARROW_FROM_SOURCE) + target_include_directories(${target} SYSTEM BEFORE PRIVATE ${GAR_ARROW_INCLUDE_DIR}) + endif() target_link_libraries(${target} PRIVATE Catch2::Catch2 gar Threads::Threads ${CMAKE_DL_LIBS}) if(APPLE) - target_link_libraries(${target} PRIVATE -Wl,-force_load gar_arrow_static - "${GAR_PARQUET_STATIC_LIB}" - "${GAR_ARROW_BUNDLED_DEPS_STATIC_LIB}") + if(BUILD_ARROW_FROM_SOURCE) + target_link_libraries(${target} PRIVATE gar_arrow_static + "${GAR_PARQUET_STATIC_LIB}" + "${GAR_ARROW_BUNDLED_DEPS_STATIC_LIB}") + else() + target_link_libraries(${target} Arrow::arrow_static + Parquet::parquet_static) + endif() else() - target_link_libraries(${target} PRIVATE -Wl,--exclude-libs,ALL -Wl,--whole-archive gar_arrow_static - "${GAR_PARQUET_STATIC_LIB}" - "${GAR_ARROW_BUNDLED_DEPS_STATIC_LIB}" -Wl,--no-whole-archive) + if(BUILD_ARROW_FROM_SOURCE) + target_link_libraries(${target} PRIVATE gar_arrow_static + "${GAR_PARQUET_STATIC_LIB}" + "${GAR_ARROW_BUNDLED_DEPS_STATIC_LIB}") + else() + target_link_libraries(${target} PRIVATE Arrow::arrow_static + Parquet::parquet_static) + endif() endif() target_include_directories(${target} PRIVATE ${PROJECT_SOURCE_DIR}/include $) target_include_directories(${target} SYSTEM BEFORE PRIVATE ${GAR_ARROW_INCLUDE_DIR}) diff --git a/cpp/README.md b/cpp/README.md index ce20ee491..d732a2b5a 100644 --- a/cpp/README.md +++ b/cpp/README.md @@ -25,6 +25,7 @@ Building requires: - CMake 3.5 or higher - On Linux and macOS, ``make`` build utilities - curl-devel with SSL (Linux) or curl (macOS), for s3 filesystem support +- Apache Arrow C++ (>= 12.0.0, requires `arrow-dev`, `arrow-dataset`, `arrow-acero` and `parquet` modules) for Arrow filesystem support and can use `BUILD_ARROW_FROM_SOURCE` option to build with GraphAr automatically. You can refer to [Apache Arrow Installation](https://arrow.apache.org/install/) to install directly too. Dependencies for optional features: @@ -68,6 +69,17 @@ setting `NAMESPACE` option with cmake: $ make -j8 # if you have 8 CPU cores, otherwise adjust, use -j`nproc` for all cores ``` +Build the Apache Arrow dependency from source: + +By default, GraphAr try to find Apache arrow in the system. This can be configured to build arrow dependency automatically from source: + +```bash + $ mkdir build + $ cd build + $ cmake -DBUILD_ARROW_FROM_SOURCE=ON .. + $ make -j8 +``` + Debug build with unit tests: ```bash diff --git a/cpp/cmake/apache-arrow.cmake b/cpp/cmake/apache-arrow.cmake index 4a37486c6..9e8f60001 100644 --- a/cpp/cmake/apache-arrow.cmake +++ b/cpp/cmake/apache-arrow.cmake @@ -90,7 +90,7 @@ function(build_arrow) find_package(Threads) find_package(Arrow QUIET) - set(ARROW_VERSION_TO_BUILD "10.0.1" CACHE INTERNAL "arrow version") + set(ARROW_VERSION_TO_BUILD "12.0.0" CACHE INTERNAL "arrow version") if (Arrow_FOUND) # arrow is installed, build the same version as the installed one message(STATUS "Found Arrow installed, align to version: ${Arrow_VERSION}") set(ARROW_VERSION_TO_BUILD "${Arrow_VERSION}" CACHE INTERNAL "arrow version") diff --git a/cpp/src/filesystem.cc b/cpp/src/filesystem.cc index f4f7c3eb2..2579ba714 100644 --- a/cpp/src/filesystem.cc +++ b/cpp/src/filesystem.cc @@ -267,11 +267,13 @@ Result FileSystem::GetFileNumOfDir(const std::string& dir_path, Result> FileSystemFromUriOrPath( const std::string& uri_string, std::string* out_path) { - if (arrow::fs::internal::DetectAbsolutePath(uri_string)) { + if (uri_string.length() >= 1 && uri_string[0] == '/') { // if the uri_string is an absolute path, we need to create a local file GAR_RETURN_ON_ARROW_ERROR_AND_ASSIGN( auto arrow_fs, arrow::fs::FileSystemFromUriOrPath(uri_string, out_path)); + // arrow would delete the last slash, so use uri string + *out_path = uri_string; return std::make_shared(arrow_fs); } diff --git a/cpp/test/test_info.cc b/cpp/test/test_info.cc index 941e34714..bf90c5630 100644 --- a/cpp/test/test_info.cc +++ b/cpp/test/test_info.cc @@ -368,6 +368,8 @@ TEST_CASE("test_graph_info_load_from_file") { REQUIRE(edge_infos.size() == 1); } +// ISSUE-187 +#if defined(ARROW_VERSION) && ARROW_VERSION < 12000000 TEST_CASE("test_graph_info_load_from_s3") { std::string path = "s3://graphar/ldbc/ldbc.graph.yml" @@ -381,3 +383,4 @@ TEST_CASE("test_graph_info_load_from_s3") { REQUIRE(vertex_infos.size() == 8); REQUIRE(edge_infos.size() == 23); } +#endif