Skip to content

Commit

Permalink
[Feat][C++] Support building GraphAr with system installed arrow (#230)
Browse files Browse the repository at this point in the history
Signed-off-by: acezen <qiaozi.zwb@alibaba-inc.com>
  • Loading branch information
acezen committed Aug 29, 2023
1 parent 320f868 commit dacd613
Show file tree
Hide file tree
Showing 8 changed files with 183 additions and 63 deletions.
50 changes: 50 additions & 0 deletions .github/workflows/ci-nightly.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
name: GraphAr C++ CI Nightly

on:
schedule:
# The notifications for scheduled workflows are sent to the user who
# last modified the cron syntax in the workflow file.
# Trigger the workflow at 03:00(CST) every day.
- cron: '00 19 * * *'
jobs:
GraphAr-ubuntu-arrow-from-source:
if: ${{ github.ref == 'refs/heads/main' }}
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
with:
submodules: true

- name: Cache for ccache
uses: actions/cache@v3
with:
path: ~/.ccache
key: ${{ matrix.os }}-build-ccache-${{ hashFiles('**/git-modules.txt') }}
restore-keys: |
${{ matrix.os }}-build-ccache-
- name: Install dependencies
run: |
sudo apt-get update -y
sudo apt-get install -y libboost-graph-dev ccache libcurl4-openssl-dev
- name: CMake
run: |
mkdir build
pushd build
cmake ../cpp -DCMAKE_BUILD_TYPE=Debug -DBUILD_TESTS=ON -DBUILD_EXAMPLES=ON -DBUILD_ARROW_FROM_SOURCE=ON
popd
- name: Build GraphAr
run: |
pushd build
make -j$(nproc)
make gar-ccache-stats
popd
- name: Test
run: |
cd build
export GAR_TEST_DATA=$PWD/../testing/
make test
53 changes: 30 additions & 23 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,30 +29,22 @@ concurrency:
cancel-in-progress: true

jobs:
GraphAr-on-ubuntu:
runs-on: ubuntu-20.04
GraphAr-ubuntu-arrow-installed:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
with:
submodules: true

- name: Cache for ccache
uses: actions/cache@v3
with:
path: ~/.ccache
key: ${{ matrix.os }}-build-ccache-${{ hashFiles('**/git-modules.txt') }}
restore-keys: |
${{ matrix.os }}-build-ccache-
- name: Install dependencies
run: |
# install the latest arrow deb to test arrow
wget -c https://apache.jfrog.io/artifactory/arrow/"$(lsb_release --id --short | tr 'A-Z' 'a-z')"/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb \
-P /tmp/
sudo apt-get install -y -V /tmp/apache-arrow-apt-source-latest-"$(lsb_release --codename --short)".deb
sudo apt-get install -y /tmp/apache-arrow-apt-source-latest-"$(lsb_release --codename --short)".deb
sudo apt-get update -y
sudo apt-get install -y libarrow-dev
sudo apt install -y libarrow-dev libarrow-dataset-dev libarrow-acero-dev libparquet-dev
sudo apt-get install -y libboost-graph-dev ccache libcurl4-openssl-dev
- name: CMake
Expand Down Expand Up @@ -115,7 +107,6 @@ jobs:
run: |
pushd build
make -j$(nproc)
make gar-ccache-stats
popd
- name: Test
Expand All @@ -124,24 +115,40 @@ jobs:
export GAR_TEST_DATA=$PWD/../testing/
make test
GraphAr-on-centos8:
runs-on: ubuntu-22.04
GraphAr-centos8-arrow-installed:
runs-on: ubuntu-latest
container:
image: centos:latest
image: centos:7
steps:
- uses: actions/checkout@v3

- name: Set up devtoolset-8
run: |
# install gcc and g++ 8
yum install -y centos-release-scl
yum install -y devtoolset-8
- name: Install dependencies
shell: scl enable devtoolset-8 -- bash --noprofile --norc -eo pipefail {0}
run: |
pushd /etc/yum.repos.d/
sed -i 's/mirrorlist/#mirrorlist/g' /etc/yum.repos.d/CentOS-*
sed -i 's|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g' /etc/yum.repos.d/CentOS-*
popd
yum update -y
dnf groupinstall -y "Development Tools"
yum install -y boost-devel libcurl-devel openssl-devel cmake
# install cmake
yum install -y wget
wget https://cmake.org/files/v3.12/cmake-3.12.3.tar.gz -P /tmp/ && \
tar -zxf /tmp/cmake-3.12.3.tar.gz -C /tmp/ && \
pushd /tmp/cmake-3.12.3 && \
./bootstrap --prefix=/usr/local && \
make -j$(nproc) && \
make install && \
popd
echo "cmake version: $(cmake --version)"
#install arrow
yum install -y epel-release || yum install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-$(cut -d: -f5 /etc/system-release-cpe | cut -d. -f1).noarch.rpm
yum install -y https://apache.jfrog.io/artifactory/arrow/centos/$(cut -d: -f5 /etc/system-release-cpe | cut -d. -f1)/apache-arrow-release-latest.rpm
yum install -y --enablerepo=epel arrow-devel arrow-dataset-devel arrow-acero-devel parquet-devel
- name: Build GraphAr
shell: scl enable devtoolset-8 -- bash --noprofile --norc -eo pipefail {0}
run: |
mkdir build
pushd build
Expand Down
18 changes: 7 additions & 11 deletions .github/workflows/java.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,25 +24,21 @@ concurrency:

jobs:
GraphAr-java:
runs-on: ubuntu-22.04
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
with:
submodules: true

# install GrahpAr C++ library first
- name: Cache for ccache
uses: actions/cache@v3
with:
path: ~/.ccache
key: ${{ matrix.os }}-build-ccache-${{ hashFiles('**/git-modules.txt') }}
restore-keys: |
${{ matrix.os }}-build-ccache-
- name: Install dependencies
run: |
# install the latest arrow deb to test arrow
wget -c https://apache.jfrog.io/artifactory/arrow/"$(lsb_release --id --short | tr 'A-Z' 'a-z')"/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb \
-P /tmp/
sudo apt-get install -y /tmp/apache-arrow-apt-source-latest-"$(lsb_release --codename --short)".deb
sudo apt-get update -y
sudo apt-get install ccache libcurl4-openssl-dev -y
sudo apt install -y libarrow-dev libarrow-dataset-dev libarrow-acero-dev libparquet-dev
sudo apt-get install libcurl4-openssl-dev -y
sudo apt-get install llvm-11 clang-11 lld-11 libclang-11-dev libz-dev -y
- name: Build and Install cpp
Expand Down
104 changes: 77 additions & 27 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ project(graph-archive LANGUAGES C CXX VERSION ${GAR_VERSION})
option(NAMESPACE "User specific namespace, default if GraphArchive" OFF)
option(BUILD_TESTS "Build unit tests" OFF)
option(BUILD_EXAMPLES "Build examples" OFF)
option(BUILD_ARROW_FROM_SOURCE "Build Arrow from source (ON) or use system-installed Arrow (OFF)" OFF)

if (NAMESPACE)
add_definitions(-DGAR_NAMESPACE=${NAMESPACE})
Expand Down Expand Up @@ -159,8 +160,16 @@ if(OPENSSL_FOUND)
endif()
endif()

include(apache-arrow)
build_arrow()
if(BUILD_ARROW_FROM_SOURCE)
include(apache-arrow)
build_arrow()
else()
find_package(Arrow REQUIRED)
find_package(ArrowDataset REQUIRED)
find_package(ArrowAcero REQUIRED)
find_package(Parquet REQUIRED)
endif()


macro(get_target_location var target)
if(TARGET ${target})
Expand All @@ -185,21 +194,37 @@ macro(build_gar)
$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include>
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/mini-yaml>
)
target_include_directories(gar SYSTEM BEFORE PRIVATE ${GAR_ARROW_INCLUDE_DIR})
if(BUILD_ARROW_FROM_SOURCE)
target_include_directories(gar SYSTEM BEFORE PRIVATE ${GAR_ARROW_INCLUDE_DIR})
endif()
target_link_libraries(gar PRIVATE Threads::Threads ${CMAKE_DL_LIBS})

if(APPLE)
target_link_libraries(gar PRIVATE -Wl,-force_load gar_arrow_static
"${GAR_PARQUET_STATIC_LIB}"
"${GAR_DATASET_STATIC_LIB}"
"${GAR_ACERO_STATIC_LIB}"
"${GAR_ARROW_BUNDLED_DEPS_STATIC_LIB}")
if(BUILD_ARROW_FROM_SOURCE)
target_link_libraries(gar PRIVATE -Wl,-force_load gar_arrow_static
"${GAR_PARQUET_STATIC_LIB}"
"${GAR_DATASET_STATIC_LIB}"
"${GAR_ACERO_STATIC_LIB}"
"${GAR_ARROW_BUNDLED_DEPS_STATIC_LIB}")
else()
target_link_libraries(gar PRIVATE -Wl,-force_load Arrow::arrow_static
Parquet::parquet_static
ArrowDataset::arrow_dataset_static
ArrowAcero::arrow_acero_static)
endif()
else()
target_link_libraries(gar PRIVATE -Wl,--exclude-libs,ALL -Wl,--whole-archive gar_arrow_static
"${GAR_PARQUET_STATIC_LIB}"
"${GAR_DATASET_STATIC_LIB}"
"${GAR_ARROW_ACERO_STATIC_LIB}"
"${GAR_ARROW_BUNDLED_DEPS_STATIC_LIB}" -Wl,--no-whole-archive)
if(BUILD_ARROW_FROM_SOURCE)
target_link_libraries(gar PRIVATE -Wl,--exclude-libs,ALL -Wl,--whole-archive gar_arrow_static
"${GAR_PARQUET_STATIC_LIB}"
"${GAR_DATASET_STATIC_LIB}"
"${GAR_ARROW_ACERO_STATIC_LIB}"
"${GAR_ARROW_BUNDLED_DEPS_STATIC_LIB}" -Wl,--no-whole-archive)
else()
target_link_libraries(gar PRIVATE -Wl,--exclude-libs,ALL -Wl,--whole-archive Arrow::arrow_static
Parquet::parquet_static
ArrowDataset::arrow_dataset_static
ArrowAcero::arrow_acero_static -Wl,--no-whole-archive)
endif()
endif()

# if OpenSSL library exists, link the OpenSSL library.
Expand All @@ -208,7 +233,7 @@ macro(build_gar)
target_link_libraries(gar PRIVATE OpenSSL::SSL)
endif()
if (CURL_FOUND)
target_link_libraries(gar PRIVATE CURL::libcurl)
target_link_libraries(gar PRIVATE ${CURL_LIBRARIES})
endif()
if (APPLE)
target_link_libraries(gar "-framework CoreFoundation")
Expand All @@ -231,16 +256,28 @@ if (BUILD_EXAMPLES)
add_executable(${E_NAME} examples/${E_NAME}.cc)
target_include_directories(${E_NAME} PRIVATE examples ${PROJECT_SOURCE_DIR}/include $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/Catch2/single_include>)
target_include_directories(${E_NAME} SYSTEM PRIVATE ${Boost_INCLUDE_DIRS})
target_include_directories(${E_NAME} SYSTEM BEFORE PRIVATE ${GAR_ARROW_INCLUDE_DIR})
if(BUILD_ARROW_FROM_SOURCE)
target_include_directories(${E_NAME} SYSTEM BEFORE PRIVATE ${GAR_ARROW_INCLUDE_DIR})
endif()
target_link_libraries(${E_NAME} PRIVATE gar ${Boost_LIBRARIES} Threads::Threads ${CMAKE_DL_LIBS})
if(APPLE)
target_link_libraries(${E_NAME} PRIVATE -Wl,-force_load gar_arrow_static
"${GAR_PARQUET_STATIC_LIB}"
"${GAR_ARROW_BUNDLED_DEPS_STATIC_LIB}")
if(BUILD_ARROW_FROM_SOURCE)
target_link_libraries(${E_NAME} PRIVATE -Wl,-force_load gar_arrow_static
"${GAR_PARQUET_STATIC_LIB}"
"${GAR_ARROW_BUNDLED_DEPS_STATIC_LIB}")
else()
target_link_libraries(${E_NAME} PRIVATE Arrow::arrow_static
Parquet::parquet_static)
endif()
else()
target_link_libraries(${E_NAME} PRIVATE -Wl,--exclude-libs,ALL -Wl,--whole-archive gar_arrow_static
"${GAR_PARQUET_STATIC_LIB}"
"${GAR_ARROW_BUNDLED_DEPS_STATIC_LIB}" -Wl,--no-whole-archive)
if(BUILD_ARROW_FROM_SOURCE)
target_link_libraries(${E_NAME} PRIVATE -Wl,--exclude-libs,ALL -Wl,--whole-archive gar_arrow_static
"${GAR_PARQUET_STATIC_LIB}"
"${GAR_ARROW_BUNDLED_DEPS_STATIC_LIB}" -Wl,--no-whole-archive)
else()
target_link_libraries(${E_NAME} PRIVATE Arrow::arrow_static
Parquet::parquet_static)
endif()
endif()

# if OpenSSL library exists, link the OpenSSL library.
Expand Down Expand Up @@ -300,15 +337,28 @@ if (BUILD_TESTS)
cmake_parse_arguments(add_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
add_executable(${target} ${add_test_SRCS})
target_compile_features(${target} PRIVATE cxx_std_17)
if(BUILD_ARROW_FROM_SOURCE)
target_include_directories(${target} SYSTEM BEFORE PRIVATE ${GAR_ARROW_INCLUDE_DIR})
endif()
target_link_libraries(${target} PRIVATE Catch2::Catch2 gar Threads::Threads ${CMAKE_DL_LIBS})
if(APPLE)
target_link_libraries(${target} PRIVATE -Wl,-force_load gar_arrow_static
"${GAR_PARQUET_STATIC_LIB}"
"${GAR_ARROW_BUNDLED_DEPS_STATIC_LIB}")
if(BUILD_ARROW_FROM_SOURCE)
target_link_libraries(${target} PRIVATE -Wl,-force_load gar_arrow_static
"${GAR_PARQUET_STATIC_LIB}"
"${GAR_ARROW_BUNDLED_DEPS_STATIC_LIB}")
else()
target_link_libraries(${target} Arrow::arrow_static
Parquet::parquet_static)
endif()
else()
target_link_libraries(${target} PRIVATE -Wl,--exclude-libs,ALL -Wl,--whole-archive gar_arrow_static
"${GAR_PARQUET_STATIC_LIB}"
"${GAR_ARROW_BUNDLED_DEPS_STATIC_LIB}" -Wl,--no-whole-archive)
if(BUILD_ARROW_FROM_SOURCE)
target_link_libraries(${target} PRIVATE -Wl,--exclude-libs,ALL -Wl,--whole-archive gar_arrow_static
"${GAR_PARQUET_STATIC_LIB}"
"${GAR_ARROW_BUNDLED_DEPS_STATIC_LIB}" -Wl,--no-whole-archive)
else()
target_link_libraries(${target} PRIVATE Arrow::arrow_static
Parquet::parquet_static)
endif()
endif()
target_include_directories(${target} PRIVATE ${PROJECT_SOURCE_DIR}/include $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/Catch2/single_include>)
target_include_directories(${target} SYSTEM BEFORE PRIVATE ${GAR_ARROW_INCLUDE_DIR})
Expand Down
12 changes: 12 additions & 0 deletions cpp/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ Building requires:
- CMake 3.5 or higher
- On Linux and macOS, ``make`` build utilities
- curl-devel with SSL (Linux) or curl (macOS), for s3 filesystem support
- Apache Arrow C++ (>= 12.0.0, requires `arrow-dev`, `arrow-dataset`, `arrow-acero` and `parquet` modules) for Arrow filesystem support and can use `BUILD_ARROW_FROM_SOURCE` option to build with GraphAr automatically. You can refer to [Apache Arrow Installation](https://arrow.apache.org/install/) to install Arrow directly too.

Dependencies for optional features:

Expand Down Expand Up @@ -68,6 +69,17 @@ setting `NAMESPACE` option with cmake:
$ make -j8 # if you have 8 CPU cores, otherwise adjust, use -j`nproc` for all cores
```

Build the Apache Arrow dependency from source:

By default, GraphAr try to find Apache Arrow in the system. This can be configured to build Arrow dependency automatically from source:

```bash
$ mkdir build
$ cd build
$ cmake -DBUILD_ARROW_FROM_SOURCE=ON ..
$ make -j8
```

Debug build with unit tests:

```bash
Expand Down
2 changes: 1 addition & 1 deletion cpp/cmake/apache-arrow.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ function(build_arrow)

find_package(Threads)
find_package(Arrow QUIET)
set(ARROW_VERSION_TO_BUILD "10.0.1" CACHE INTERNAL "arrow version")
set(ARROW_VERSION_TO_BUILD "12.0.0" CACHE INTERNAL "arrow version")
if (Arrow_FOUND) # arrow is installed, build the same version as the installed one
message(STATUS "Found Arrow installed, align to version: ${Arrow_VERSION}")
set(ARROW_VERSION_TO_BUILD "${Arrow_VERSION}" CACHE INTERNAL "arrow version")
Expand Down
4 changes: 3 additions & 1 deletion cpp/src/filesystem.cc
Original file line number Diff line number Diff line change
Expand Up @@ -267,11 +267,13 @@ Result<IdType> FileSystem::GetFileNumOfDir(const std::string& dir_path,

Result<std::shared_ptr<FileSystem>> FileSystemFromUriOrPath(
const std::string& uri_string, std::string* out_path) {
if (arrow::fs::internal::DetectAbsolutePath(uri_string)) {
if (uri_string.length() >= 1 && uri_string[0] == '/') {
// if the uri_string is an absolute path, we need to create a local file
GAR_RETURN_ON_ARROW_ERROR_AND_ASSIGN(
auto arrow_fs,
arrow::fs::FileSystemFromUriOrPath(uri_string, out_path));
// arrow would delete the last slash, so use uri string
*out_path = uri_string;
return std::make_shared<FileSystem>(arrow_fs);
}

Expand Down
3 changes: 3 additions & 0 deletions cpp/test/test_info.cc
Original file line number Diff line number Diff line change
Expand Up @@ -368,6 +368,8 @@ TEST_CASE("test_graph_info_load_from_file") {
REQUIRE(edge_infos.size() == 1);
}

// ISSUE-187
#if defined(ARROW_VERSION) && ARROW_VERSION < 12000000
TEST_CASE("test_graph_info_load_from_s3") {
std::string path =
"s3://graphar/ldbc/ldbc.graph.yml"
Expand All @@ -381,3 +383,4 @@ TEST_CASE("test_graph_info_load_from_s3") {
REQUIRE(vertex_infos.size() == 8);
REQUIRE(edge_infos.size() == 23);
}
#endif

0 comments on commit dacd613

Please sign in to comment.