Skip to content

Commit

Permalink
Support build GraphAr with system installed arrow
Browse files Browse the repository at this point in the history
Signed-off-by: acezen <[email protected]>

Update

Update

Fix

update

Update

update

Update
  • Loading branch information
acezen committed Aug 29, 2023
1 parent 320f868 commit ad1a55e
Show file tree
Hide file tree
Showing 6 changed files with 142 additions and 43 deletions.
62 changes: 47 additions & 15 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,30 +29,22 @@ concurrency:
cancel-in-progress: true

jobs:
GraphAr-on-ubuntu:
runs-on: ubuntu-20.04
GraphAr-ubuntu-arrow-installed:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
with:
submodules: true

- name: Cache for ccache
uses: actions/cache@v3
with:
path: ~/.ccache
key: ${{ matrix.os }}-build-ccache-${{ hashFiles('**/git-modules.txt') }}
restore-keys: |
${{ matrix.os }}-build-ccache-
- name: Install dependencies
run: |
# install the latest arrow deb to test arrow
wget -c https://apache.jfrog.io/artifactory/arrow/"$(lsb_release --id --short | tr 'A-Z' 'a-z')"/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb \
-P /tmp/
sudo apt-get install -y -V /tmp/apache-arrow-apt-source-latest-"$(lsb_release --codename --short)".deb
sudo apt-get install -y /tmp/apache-arrow-apt-source-latest-"$(lsb_release --codename --short)".deb
sudo apt-get update -y
sudo apt-get install -y libarrow-dev
sudo apt install -y libarrow-dev libarrow-dataset-dev libarrow-acero-dev libparquet-dev
sudo apt-get install -y libboost-graph-dev ccache libcurl4-openssl-dev
- name: CMake
Expand Down Expand Up @@ -111,6 +103,46 @@ jobs:
popd
- name: Build GraphAr
run: |
pushd build
make -j$(nproc)
popd
- name: Test
run: |
cd build
export GAR_TEST_DATA=$PWD/../testing/
make test
GraphAr-ubuntu-arrow-from-source:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
with:
submodules: true

- name: Cache for ccache
uses: actions/cache@v3
with:
path: ~/.ccache
key: ${{ matrix.os }}-build-ccache-${{ hashFiles('**/git-modules.txt') }}
restore-keys: |
${{ matrix.os }}-build-ccache-
- name: Install dependencies
run: |
sudo apt-get update -y
sudo apt-get install -y libboost-graph-dev ccache libcurl4-openssl-dev
- name: CMake
run: |
mkdir build
pushd build
cmake ../cpp -DCMAKE_BUILD_TYPE=Debug -DBUILD_TESTS=ON -DBUILD_EXAMPLES=ON -DBUILD_ARROW_FROM_SOURCE=ON
popd
- name: Build GraphAr
run: |
pushd build
Expand All @@ -124,8 +156,8 @@ jobs:
export GAR_TEST_DATA=$PWD/../testing/
make test
GraphAr-on-centos8:
runs-on: ubuntu-22.04
GraphAr-centos8-arrow-from-source:
runs-on: ubuntu-latest
container:
image: centos:latest
steps:
Expand All @@ -145,6 +177,6 @@ jobs:
run: |
mkdir build
pushd build
cmake ../cpp
cmake ../cpp -DBUILD_ARROW_FROM_SOURCE=ON
make -j$(nproc)
popd
102 changes: 76 additions & 26 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ project(graph-archive LANGUAGES C CXX VERSION ${GAR_VERSION})
option(NAMESPACE "User specific namespace, default if GraphArchive" OFF)
option(BUILD_TESTS "Build unit tests" OFF)
option(BUILD_EXAMPLES "Build examples" OFF)
option(BUILD_ARROW_FROM_SOURCE "Build Arrow from source (ON) or use system-installed Arrow (OFF)" OFF)

if (NAMESPACE)
add_definitions(-DGAR_NAMESPACE=${NAMESPACE})
Expand Down Expand Up @@ -159,8 +160,16 @@ if(OPENSSL_FOUND)
endif()
endif()

include(apache-arrow)
build_arrow()
if(BUILD_ARROW_FROM_SOURCE)
include(apache-arrow)
build_arrow()
else()
find_package(Arrow REQUIRED)
find_package(ArrowDataset REQUIRED)
find_package(ArrowAcero REQUIRED)
find_package(Parquet REQUIRED)
endif()


macro(get_target_location var target)
if(TARGET ${target})
Expand All @@ -185,21 +194,37 @@ macro(build_gar)
$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include>
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/mini-yaml>
)
target_include_directories(gar SYSTEM BEFORE PRIVATE ${GAR_ARROW_INCLUDE_DIR})
if(BUILD_ARROW_FROM_SOURCE)
target_include_directories(gar SYSTEM BEFORE PRIVATE ${GAR_ARROW_INCLUDE_DIR})
endif()
target_link_libraries(gar PRIVATE Threads::Threads ${CMAKE_DL_LIBS})

if(APPLE)
target_link_libraries(gar PRIVATE -Wl,-force_load gar_arrow_static
"${GAR_PARQUET_STATIC_LIB}"
"${GAR_DATASET_STATIC_LIB}"
"${GAR_ACERO_STATIC_LIB}"
"${GAR_ARROW_BUNDLED_DEPS_STATIC_LIB}")
if(BUILD_ARROW_FROM_SOURCE)
target_link_libraries(gar PRIVATE -Wl,-force_load gar_arrow_static
"${GAR_PARQUET_STATIC_LIB}"
"${GAR_DATASET_STATIC_LIB}"
"${GAR_ACERO_STATIC_LIB}"
"${GAR_ARROW_BUNDLED_DEPS_STATIC_LIB}")
else()
target_link_libraries(gar PRIVATE -Wl,-force_load Arrow::arrow_static
Parquet::parquet_static
ArrowDataset::arrow_dataset_static
ArrowAcero::arrow_acero_static)
endif()
else()
target_link_libraries(gar PRIVATE -Wl,--exclude-libs,ALL -Wl,--whole-archive gar_arrow_static
"${GAR_PARQUET_STATIC_LIB}"
"${GAR_DATASET_STATIC_LIB}"
"${GAR_ARROW_ACERO_STATIC_LIB}"
"${GAR_ARROW_BUNDLED_DEPS_STATIC_LIB}" -Wl,--no-whole-archive)
if(BUILD_ARROW_FROM_SOURCE)
target_link_libraries(gar PRIVATE -Wl,--exclude-libs,ALL -Wl,--whole-archive gar_arrow_static
"${GAR_PARQUET_STATIC_LIB}"
"${GAR_DATASET_STATIC_LIB}"
"${GAR_ARROW_ACERO_STATIC_LIB}"
"${GAR_ARROW_BUNDLED_DEPS_STATIC_LIB}" -Wl,--no-whole-archive)
else()
target_link_libraries(gar PRIVATE -Wl,--exclude-libs,ALL -Wl,--whole-archive Arrow::arrow_static
Parquet::parquet_static
ArrowDataset::arrow_dataset_static
ArrowAcero::arrow_acero_static -Wl,--no-whole-archive)
endif()
endif()

# if OpenSSL library exists, link the OpenSSL library.
Expand Down Expand Up @@ -231,16 +256,28 @@ if (BUILD_EXAMPLES)
add_executable(${E_NAME} examples/${E_NAME}.cc)
target_include_directories(${E_NAME} PRIVATE examples ${PROJECT_SOURCE_DIR}/include $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/Catch2/single_include>)
target_include_directories(${E_NAME} SYSTEM PRIVATE ${Boost_INCLUDE_DIRS})
target_include_directories(${E_NAME} SYSTEM BEFORE PRIVATE ${GAR_ARROW_INCLUDE_DIR})
if(BUILD_ARROW_FROM_SOURCE)
target_include_directories(${E_NAME} SYSTEM BEFORE PRIVATE ${GAR_ARROW_INCLUDE_DIR})
endif()
target_link_libraries(${E_NAME} PRIVATE gar ${Boost_LIBRARIES} Threads::Threads ${CMAKE_DL_LIBS})
if(APPLE)
target_link_libraries(${E_NAME} PRIVATE -Wl,-force_load gar_arrow_static
"${GAR_PARQUET_STATIC_LIB}"
"${GAR_ARROW_BUNDLED_DEPS_STATIC_LIB}")
if(BUILD_ARROW_FROM_SOURCE)
target_link_libraries(${E_NAME} PRIVATE gar_arrow_static
"${GAR_PARQUET_STATIC_LIB}"
"${GAR_ARROW_BUNDLED_DEPS_STATIC_LIB}")
else()
target_link_libraries(${E_NAME} PRIVATE Arrow::arrow_static
Parquet::parquet_static)
endif()
else()
target_link_libraries(${E_NAME} PRIVATE -Wl,--exclude-libs,ALL -Wl,--whole-archive gar_arrow_static
"${GAR_PARQUET_STATIC_LIB}"
"${GAR_ARROW_BUNDLED_DEPS_STATIC_LIB}" -Wl,--no-whole-archive)
if(BUILD_ARROW_FROM_SOURCE)
target_link_libraries(${E_NAME} PRIVATE gar_arrow_static
"${GAR_PARQUET_STATIC_LIB}"
"${GAR_ARROW_BUNDLED_DEPS_STATIC_LIB}")
else()
target_link_libraries(${E_NAME} PRIVATE Arrow::arrow_static
Parquet::parquet_static)
endif()
endif()

# if OpenSSL library exists, link the OpenSSL library.
Expand Down Expand Up @@ -300,15 +337,28 @@ if (BUILD_TESTS)
cmake_parse_arguments(add_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
add_executable(${target} ${add_test_SRCS})
target_compile_features(${target} PRIVATE cxx_std_17)
if(BUILD_ARROW_FROM_SOURCE)
target_include_directories(${target} SYSTEM BEFORE PRIVATE ${GAR_ARROW_INCLUDE_DIR})
endif()
target_link_libraries(${target} PRIVATE Catch2::Catch2 gar Threads::Threads ${CMAKE_DL_LIBS})
if(APPLE)
target_link_libraries(${target} PRIVATE -Wl,-force_load gar_arrow_static
"${GAR_PARQUET_STATIC_LIB}"
"${GAR_ARROW_BUNDLED_DEPS_STATIC_LIB}")
if(BUILD_ARROW_FROM_SOURCE)
target_link_libraries(${target} PRIVATE gar_arrow_static
"${GAR_PARQUET_STATIC_LIB}"
"${GAR_ARROW_BUNDLED_DEPS_STATIC_LIB}")
else()
target_link_libraries(${target} Arrow::arrow_static
Parquet::parquet_static)
endif()
else()
target_link_libraries(${target} PRIVATE -Wl,--exclude-libs,ALL -Wl,--whole-archive gar_arrow_static
"${GAR_PARQUET_STATIC_LIB}"
"${GAR_ARROW_BUNDLED_DEPS_STATIC_LIB}" -Wl,--no-whole-archive)
if(BUILD_ARROW_FROM_SOURCE)
target_link_libraries(${target} PRIVATE gar_arrow_static
"${GAR_PARQUET_STATIC_LIB}"
"${GAR_ARROW_BUNDLED_DEPS_STATIC_LIB}")
else()
target_link_libraries(${target} PRIVATE Arrow::arrow_static
Parquet::parquet_static)
endif()
endif()
target_include_directories(${target} PRIVATE ${PROJECT_SOURCE_DIR}/include $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/Catch2/single_include>)
target_include_directories(${target} SYSTEM BEFORE PRIVATE ${GAR_ARROW_INCLUDE_DIR})
Expand Down
12 changes: 12 additions & 0 deletions cpp/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ Building requires:
- CMake 3.5 or higher
- On Linux and macOS, ``make`` build utilities
- curl-devel with SSL (Linux) or curl (macOS), for s3 filesystem support
- Apache Arrow C++ (>= 12.0.0, requires `arrow-dev`, `arrow-dataset`, `arrow-acero` and `parquet` modules) for Arrow filesystem support and can use `BUILD_ARROW_FROM_SOURCE` option to build with GraphAr automatically. You can refer to [Apache Arrow Installation](https://arrow.apache.org/install/) to install directly too.

Dependencies for optional features:

Expand Down Expand Up @@ -68,6 +69,17 @@ setting `NAMESPACE` option with cmake:
$ make -j8 # if you have 8 CPU cores, otherwise adjust, use -j`nproc` for all cores
```

Build the Apache Arrow dependency from source:

By default, GraphAr try to find Apache arrow in the system. This can be configured to build arrow dependency automatically from source:

```bash
$ mkdir build
$ cd build
$ cmake -DBUILD_ARROW_FROM_SOURCE=ON ..
$ make -j8
```

Debug build with unit tests:

```bash
Expand Down
2 changes: 1 addition & 1 deletion cpp/cmake/apache-arrow.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ function(build_arrow)

find_package(Threads)
find_package(Arrow QUIET)
set(ARROW_VERSION_TO_BUILD "10.0.1" CACHE INTERNAL "arrow version")
set(ARROW_VERSION_TO_BUILD "12.0.0" CACHE INTERNAL "arrow version")
if (Arrow_FOUND) # arrow is installed, build the same version as the installed one
message(STATUS "Found Arrow installed, align to version: ${Arrow_VERSION}")
set(ARROW_VERSION_TO_BUILD "${Arrow_VERSION}" CACHE INTERNAL "arrow version")
Expand Down
4 changes: 3 additions & 1 deletion cpp/src/filesystem.cc
Original file line number Diff line number Diff line change
Expand Up @@ -267,11 +267,13 @@ Result<IdType> FileSystem::GetFileNumOfDir(const std::string& dir_path,

Result<std::shared_ptr<FileSystem>> FileSystemFromUriOrPath(
const std::string& uri_string, std::string* out_path) {
if (arrow::fs::internal::DetectAbsolutePath(uri_string)) {
if (uri_string.length() >= 1 && uri_string[0] == '/') {
// if the uri_string is an absolute path, we need to create a local file
GAR_RETURN_ON_ARROW_ERROR_AND_ASSIGN(
auto arrow_fs,
arrow::fs::FileSystemFromUriOrPath(uri_string, out_path));
// arrow would delete the last slash, so use uri string
*out_path = uri_string;
return std::make_shared<FileSystem>(arrow_fs);
}

Expand Down
3 changes: 3 additions & 0 deletions cpp/test/test_info.cc
Original file line number Diff line number Diff line change
Expand Up @@ -368,6 +368,8 @@ TEST_CASE("test_graph_info_load_from_file") {
REQUIRE(edge_infos.size() == 1);
}

// ISSUE-187
#if defined(ARROW_VERSION) && ARROW_VERSION < 12000000
TEST_CASE("test_graph_info_load_from_s3") {
std::string path =
"s3://graphar/ldbc/ldbc.graph.yml"
Expand All @@ -381,3 +383,4 @@ TEST_CASE("test_graph_info_load_from_s3") {
REQUIRE(vertex_infos.size() == 8);
REQUIRE(edge_infos.size() == 23);
}
#endif

0 comments on commit ad1a55e

Please sign in to comment.