Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Feat][C++] Support building GraphAr with system installed arrow #230

Merged
merged 9 commits into from
Aug 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 50 additions & 0 deletions .github/workflows/ci-nightly.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
name: GraphAr C++ CI Nightly

on:
schedule:
# The notifications for scheduled workflows are sent to the user who
# last modified the cron syntax in the workflow file.
# Trigger the workflow at 03:00(CST) every day.
- cron: '00 19 * * *'
jobs:
GraphAr-ubuntu-arrow-from-source:
if: ${{ github.ref == 'refs/heads/main' }}
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
with:
submodules: true

- name: Cache for ccache
uses: actions/cache@v3
with:
path: ~/.ccache
key: ${{ matrix.os }}-build-ccache-${{ hashFiles('**/git-modules.txt') }}
restore-keys: |
${{ matrix.os }}-build-ccache-

- name: Install dependencies
run: |

sudo apt-get update -y
sudo apt-get install -y libboost-graph-dev ccache libcurl4-openssl-dev

- name: CMake
run: |
mkdir build
pushd build
cmake ../cpp -DCMAKE_BUILD_TYPE=Debug -DBUILD_TESTS=ON -DBUILD_EXAMPLES=ON -DBUILD_ARROW_FROM_SOURCE=ON
popd

- name: Build GraphAr
run: |
pushd build
make -j$(nproc)
make gar-ccache-stats
popd

- name: Test
run: |
cd build
export GAR_TEST_DATA=$PWD/../testing/
make test
53 changes: 30 additions & 23 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,30 +29,22 @@ concurrency:
cancel-in-progress: true

jobs:
GraphAr-on-ubuntu:
runs-on: ubuntu-20.04
GraphAr-ubuntu-arrow-installed:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
with:
submodules: true

- name: Cache for ccache
uses: actions/cache@v3
with:
path: ~/.ccache
key: ${{ matrix.os }}-build-ccache-${{ hashFiles('**/git-modules.txt') }}
restore-keys: |
${{ matrix.os }}-build-ccache-

- name: Install dependencies
run: |

# install the latest arrow deb to test arrow
wget -c https://apache.jfrog.io/artifactory/arrow/"$(lsb_release --id --short | tr 'A-Z' 'a-z')"/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb \
-P /tmp/
sudo apt-get install -y -V /tmp/apache-arrow-apt-source-latest-"$(lsb_release --codename --short)".deb
sudo apt-get install -y /tmp/apache-arrow-apt-source-latest-"$(lsb_release --codename --short)".deb
sudo apt-get update -y
sudo apt-get install -y libarrow-dev
sudo apt install -y libarrow-dev libarrow-dataset-dev libarrow-acero-dev libparquet-dev
sudo apt-get install -y libboost-graph-dev ccache libcurl4-openssl-dev

- name: CMake
Expand Down Expand Up @@ -115,7 +107,6 @@ jobs:
run: |
pushd build
make -j$(nproc)
make gar-ccache-stats
popd

- name: Test
Expand All @@ -124,24 +115,40 @@ jobs:
export GAR_TEST_DATA=$PWD/../testing/
make test

GraphAr-on-centos8:
runs-on: ubuntu-22.04
GraphAr-centos8-arrow-installed:
runs-on: ubuntu-latest
container:
image: centos:latest
image: centos:7
steps:
- uses: actions/checkout@v3

- name: Set up devtoolset-8
run: |
# install gcc and g++ 8
yum install -y centos-release-scl
yum install -y devtoolset-8

- name: Install dependencies
shell: scl enable devtoolset-8 -- bash --noprofile --norc -eo pipefail {0}
run: |
pushd /etc/yum.repos.d/
sed -i 's/mirrorlist/#mirrorlist/g' /etc/yum.repos.d/CentOS-*
sed -i 's|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g' /etc/yum.repos.d/CentOS-*
popd
yum update -y
dnf groupinstall -y "Development Tools"
yum install -y boost-devel libcurl-devel openssl-devel cmake
# install cmake
yum install -y wget
wget https://cmake.org/files/v3.12/cmake-3.12.3.tar.gz -P /tmp/ && \
tar -zxf /tmp/cmake-3.12.3.tar.gz -C /tmp/ && \
pushd /tmp/cmake-3.12.3 && \
./bootstrap --prefix=/usr/local && \
make -j$(nproc) && \
make install && \
popd
echo "cmake version: $(cmake --version)"

#install arrow
yum install -y epel-release || yum install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-$(cut -d: -f5 /etc/system-release-cpe | cut -d. -f1).noarch.rpm
yum install -y https://apache.jfrog.io/artifactory/arrow/centos/$(cut -d: -f5 /etc/system-release-cpe | cut -d. -f1)/apache-arrow-release-latest.rpm
yum install -y --enablerepo=epel arrow-devel arrow-dataset-devel arrow-acero-devel parquet-devel

- name: Build GraphAr
shell: scl enable devtoolset-8 -- bash --noprofile --norc -eo pipefail {0}
run: |
mkdir build
pushd build
Expand Down
18 changes: 7 additions & 11 deletions .github/workflows/java.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,25 +24,21 @@ concurrency:

jobs:
GraphAr-java:
runs-on: ubuntu-22.04
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
with:
submodules: true

# install GrahpAr C++ library first
- name: Cache for ccache
uses: actions/cache@v3
with:
path: ~/.ccache
key: ${{ matrix.os }}-build-ccache-${{ hashFiles('**/git-modules.txt') }}
restore-keys: |
${{ matrix.os }}-build-ccache-

- name: Install dependencies
run: |
# install the latest arrow deb to test arrow
wget -c https://apache.jfrog.io/artifactory/arrow/"$(lsb_release --id --short | tr 'A-Z' 'a-z')"/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb \
-P /tmp/
sudo apt-get install -y /tmp/apache-arrow-apt-source-latest-"$(lsb_release --codename --short)".deb
sudo apt-get update -y
sudo apt-get install ccache libcurl4-openssl-dev -y
sudo apt install -y libarrow-dev libarrow-dataset-dev libarrow-acero-dev libparquet-dev
sudo apt-get install libcurl4-openssl-dev -y
sudo apt-get install llvm-11 clang-11 lld-11 libclang-11-dev libz-dev -y

- name: Build and Install cpp
Expand Down
104 changes: 77 additions & 27 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ project(graph-archive LANGUAGES C CXX VERSION ${GAR_VERSION})
option(NAMESPACE "User specific namespace, default if GraphArchive" OFF)
option(BUILD_TESTS "Build unit tests" OFF)
option(BUILD_EXAMPLES "Build examples" OFF)
option(BUILD_ARROW_FROM_SOURCE "Build Arrow from source (ON) or use system-installed Arrow (OFF)" OFF)

if (NAMESPACE)
add_definitions(-DGAR_NAMESPACE=${NAMESPACE})
Expand Down Expand Up @@ -159,8 +160,16 @@ if(OPENSSL_FOUND)
endif()
endif()

include(apache-arrow)
build_arrow()
if(BUILD_ARROW_FROM_SOURCE)
include(apache-arrow)
build_arrow()
else()
find_package(Arrow REQUIRED)
find_package(ArrowDataset REQUIRED)
find_package(ArrowAcero REQUIRED)
find_package(Parquet REQUIRED)
endif()


macro(get_target_location var target)
if(TARGET ${target})
Expand All @@ -185,21 +194,37 @@ macro(build_gar)
$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include>
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/mini-yaml>
)
target_include_directories(gar SYSTEM BEFORE PRIVATE ${GAR_ARROW_INCLUDE_DIR})
if(BUILD_ARROW_FROM_SOURCE)
target_include_directories(gar SYSTEM BEFORE PRIVATE ${GAR_ARROW_INCLUDE_DIR})
endif()
target_link_libraries(gar PRIVATE Threads::Threads ${CMAKE_DL_LIBS})

if(APPLE)
target_link_libraries(gar PRIVATE -Wl,-force_load gar_arrow_static
"${GAR_PARQUET_STATIC_LIB}"
"${GAR_DATASET_STATIC_LIB}"
"${GAR_ACERO_STATIC_LIB}"
"${GAR_ARROW_BUNDLED_DEPS_STATIC_LIB}")
if(BUILD_ARROW_FROM_SOURCE)
target_link_libraries(gar PRIVATE -Wl,-force_load gar_arrow_static
"${GAR_PARQUET_STATIC_LIB}"
"${GAR_DATASET_STATIC_LIB}"
"${GAR_ACERO_STATIC_LIB}"
"${GAR_ARROW_BUNDLED_DEPS_STATIC_LIB}")
else()
target_link_libraries(gar PRIVATE -Wl,-force_load Arrow::arrow_static
Parquet::parquet_static
ArrowDataset::arrow_dataset_static
ArrowAcero::arrow_acero_static)
endif()
else()
target_link_libraries(gar PRIVATE -Wl,--exclude-libs,ALL -Wl,--whole-archive gar_arrow_static
"${GAR_PARQUET_STATIC_LIB}"
"${GAR_DATASET_STATIC_LIB}"
"${GAR_ARROW_ACERO_STATIC_LIB}"
"${GAR_ARROW_BUNDLED_DEPS_STATIC_LIB}" -Wl,--no-whole-archive)
if(BUILD_ARROW_FROM_SOURCE)
target_link_libraries(gar PRIVATE -Wl,--exclude-libs,ALL -Wl,--whole-archive gar_arrow_static
"${GAR_PARQUET_STATIC_LIB}"
"${GAR_DATASET_STATIC_LIB}"
"${GAR_ARROW_ACERO_STATIC_LIB}"
"${GAR_ARROW_BUNDLED_DEPS_STATIC_LIB}" -Wl,--no-whole-archive)
else()
target_link_libraries(gar PRIVATE -Wl,--exclude-libs,ALL -Wl,--whole-archive Arrow::arrow_static
Parquet::parquet_static
ArrowDataset::arrow_dataset_static
ArrowAcero::arrow_acero_static -Wl,--no-whole-archive)
endif()
endif()

# if OpenSSL library exists, link the OpenSSL library.
Expand All @@ -208,7 +233,7 @@ macro(build_gar)
target_link_libraries(gar PRIVATE OpenSSL::SSL)
endif()
if (CURL_FOUND)
target_link_libraries(gar PRIVATE CURL::libcurl)
target_link_libraries(gar PRIVATE ${CURL_LIBRARIES})
endif()
if (APPLE)
target_link_libraries(gar "-framework CoreFoundation")
Expand All @@ -231,16 +256,28 @@ if (BUILD_EXAMPLES)
add_executable(${E_NAME} examples/${E_NAME}.cc)
target_include_directories(${E_NAME} PRIVATE examples ${PROJECT_SOURCE_DIR}/include $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/Catch2/single_include>)
target_include_directories(${E_NAME} SYSTEM PRIVATE ${Boost_INCLUDE_DIRS})
target_include_directories(${E_NAME} SYSTEM BEFORE PRIVATE ${GAR_ARROW_INCLUDE_DIR})
if(BUILD_ARROW_FROM_SOURCE)
target_include_directories(${E_NAME} SYSTEM BEFORE PRIVATE ${GAR_ARROW_INCLUDE_DIR})
endif()
target_link_libraries(${E_NAME} PRIVATE gar ${Boost_LIBRARIES} Threads::Threads ${CMAKE_DL_LIBS})
if(APPLE)
target_link_libraries(${E_NAME} PRIVATE -Wl,-force_load gar_arrow_static
"${GAR_PARQUET_STATIC_LIB}"
"${GAR_ARROW_BUNDLED_DEPS_STATIC_LIB}")
if(BUILD_ARROW_FROM_SOURCE)
target_link_libraries(${E_NAME} PRIVATE -Wl,-force_load gar_arrow_static
"${GAR_PARQUET_STATIC_LIB}"
"${GAR_ARROW_BUNDLED_DEPS_STATIC_LIB}")
else()
target_link_libraries(${E_NAME} PRIVATE Arrow::arrow_static
Parquet::parquet_static)
endif()
else()
target_link_libraries(${E_NAME} PRIVATE -Wl,--exclude-libs,ALL -Wl,--whole-archive gar_arrow_static
"${GAR_PARQUET_STATIC_LIB}"
"${GAR_ARROW_BUNDLED_DEPS_STATIC_LIB}" -Wl,--no-whole-archive)
if(BUILD_ARROW_FROM_SOURCE)
target_link_libraries(${E_NAME} PRIVATE -Wl,--exclude-libs,ALL -Wl,--whole-archive gar_arrow_static
"${GAR_PARQUET_STATIC_LIB}"
"${GAR_ARROW_BUNDLED_DEPS_STATIC_LIB}" -Wl,--no-whole-archive)
else()
target_link_libraries(${E_NAME} PRIVATE Arrow::arrow_static
Parquet::parquet_static)
endif()
endif()

# if OpenSSL library exists, link the OpenSSL library.
Expand Down Expand Up @@ -300,15 +337,28 @@ if (BUILD_TESTS)
cmake_parse_arguments(add_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
add_executable(${target} ${add_test_SRCS})
target_compile_features(${target} PRIVATE cxx_std_17)
if(BUILD_ARROW_FROM_SOURCE)
target_include_directories(${target} SYSTEM BEFORE PRIVATE ${GAR_ARROW_INCLUDE_DIR})
endif()
target_link_libraries(${target} PRIVATE Catch2::Catch2 gar Threads::Threads ${CMAKE_DL_LIBS})
if(APPLE)
target_link_libraries(${target} PRIVATE -Wl,-force_load gar_arrow_static
"${GAR_PARQUET_STATIC_LIB}"
"${GAR_ARROW_BUNDLED_DEPS_STATIC_LIB}")
if(BUILD_ARROW_FROM_SOURCE)
target_link_libraries(${target} PRIVATE -Wl,-force_load gar_arrow_static
"${GAR_PARQUET_STATIC_LIB}"
"${GAR_ARROW_BUNDLED_DEPS_STATIC_LIB}")
else()
target_link_libraries(${target} Arrow::arrow_static
Parquet::parquet_static)
endif()
else()
target_link_libraries(${target} PRIVATE -Wl,--exclude-libs,ALL -Wl,--whole-archive gar_arrow_static
"${GAR_PARQUET_STATIC_LIB}"
"${GAR_ARROW_BUNDLED_DEPS_STATIC_LIB}" -Wl,--no-whole-archive)
if(BUILD_ARROW_FROM_SOURCE)
target_link_libraries(${target} PRIVATE -Wl,--exclude-libs,ALL -Wl,--whole-archive gar_arrow_static
"${GAR_PARQUET_STATIC_LIB}"
"${GAR_ARROW_BUNDLED_DEPS_STATIC_LIB}" -Wl,--no-whole-archive)
else()
target_link_libraries(${target} PRIVATE Arrow::arrow_static
Parquet::parquet_static)
endif()
endif()
target_include_directories(${target} PRIVATE ${PROJECT_SOURCE_DIR}/include $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/Catch2/single_include>)
target_include_directories(${target} SYSTEM BEFORE PRIVATE ${GAR_ARROW_INCLUDE_DIR})
Expand Down
12 changes: 12 additions & 0 deletions cpp/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ Building requires:
- CMake 3.5 or higher
- On Linux and macOS, ``make`` build utilities
- curl-devel with SSL (Linux) or curl (macOS), for s3 filesystem support
- Apache Arrow C++ (>= 12.0.0, requires `arrow-dev`, `arrow-dataset`, `arrow-acero` and `parquet` modules) for Arrow filesystem support and can use `BUILD_ARROW_FROM_SOURCE` option to build with GraphAr automatically. You can refer to [Apache Arrow Installation](https://arrow.apache.org/install/) to install Arrow directly too.

Dependencies for optional features:

Expand Down Expand Up @@ -68,6 +69,17 @@ setting `NAMESPACE` option with cmake:
$ make -j8 # if you have 8 CPU cores, otherwise adjust, use -j`nproc` for all cores
```

Build the Apache Arrow dependency from source:

By default, GraphAr try to find Apache Arrow in the system. This can be configured to build Arrow dependency automatically from source:

```bash
$ mkdir build
$ cd build
$ cmake -DBUILD_ARROW_FROM_SOURCE=ON ..
$ make -j8
```

Debug build with unit tests:

```bash
Expand Down
2 changes: 1 addition & 1 deletion cpp/cmake/apache-arrow.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ function(build_arrow)

find_package(Threads)
find_package(Arrow QUIET)
set(ARROW_VERSION_TO_BUILD "10.0.1" CACHE INTERNAL "arrow version")
set(ARROW_VERSION_TO_BUILD "12.0.0" CACHE INTERNAL "arrow version")
if (Arrow_FOUND) # arrow is installed, build the same version as the installed one
message(STATUS "Found Arrow installed, align to version: ${Arrow_VERSION}")
set(ARROW_VERSION_TO_BUILD "${Arrow_VERSION}" CACHE INTERNAL "arrow version")
Expand Down
4 changes: 3 additions & 1 deletion cpp/src/filesystem.cc
Original file line number Diff line number Diff line change
Expand Up @@ -267,11 +267,13 @@ Result<IdType> FileSystem::GetFileNumOfDir(const std::string& dir_path,

Result<std::shared_ptr<FileSystem>> FileSystemFromUriOrPath(
const std::string& uri_string, std::string* out_path) {
if (arrow::fs::internal::DetectAbsolutePath(uri_string)) {
if (uri_string.length() >= 1 && uri_string[0] == '/') {
// if the uri_string is an absolute path, we need to create a local file
GAR_RETURN_ON_ARROW_ERROR_AND_ASSIGN(
auto arrow_fs,
arrow::fs::FileSystemFromUriOrPath(uri_string, out_path));
// arrow would delete the last slash, so use uri string
*out_path = uri_string;
return std::make_shared<FileSystem>(arrow_fs);
}

Expand Down
3 changes: 3 additions & 0 deletions cpp/test/test_info.cc
Original file line number Diff line number Diff line change
Expand Up @@ -368,6 +368,8 @@ TEST_CASE("test_graph_info_load_from_file") {
REQUIRE(edge_infos.size() == 1);
}

// ISSUE-187
#if defined(ARROW_VERSION) && ARROW_VERSION < 12000000
TEST_CASE("test_graph_info_load_from_s3") {
std::string path =
"s3://graphar/ldbc/ldbc.graph.yml"
Expand All @@ -381,3 +383,4 @@ TEST_CASE("test_graph_info_load_from_s3") {
REQUIRE(vertex_infos.size() == 8);
REQUIRE(edge_infos.size() == 23);
}
#endif