Skip to content

Commit

Permalink
Enable builds with scikit-build (#10919)
Browse files Browse the repository at this point in the history
This PR changes the Python build system for cudf to use scikit-build and leverage CMake under the hood.

This PR depends on rapidsai/rapids-cmake#198. Once that PR is merged, I can update the pull of rapids-cmake into the cudf Python CMake build.

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Robert Maynard (https://github.com/robertmaynard)
  - AJ Schmidt (https://github.com/ajschmidt8)
  - Ashwin Srinath (https://github.com/shwina)

URL: #10919
  • Loading branch information
vyasr authored May 27, 2022
1 parent 1e0f4e9 commit eeec6a0
Show file tree
Hide file tree
Showing 19 changed files with 348 additions and 132 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ parts/
sdist/
var/
wheels/
_skbuild/
*.egg-info/
.installed.cfg
*.egg
Expand Down Expand Up @@ -162,4 +163,4 @@ dask-worker-space/

# Sphinx docs & build artifacts
docs/cudf/source/api_docs/generated/*
docs/cudf/source/api_docs/api/*
docs/cudf/source/api_docs/api/*
5 changes: 2 additions & 3 deletions build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -324,10 +324,9 @@ fi
if buildAll || hasArg cudf; then

cd ${REPODIR}/python/cudf
python setup.py build_ext --inplace -- -DCMAKE_PREFIX_PATH=${INSTALL_PREFIX} -DCMAKE_LIBRARY_PATH=${LIBCUDF_BUILD_DIR} ${CMAKE_ARGS} -- -j${PARALLEL_LEVEL:-1}
if [[ ${INSTALL_TARGET} != "" ]]; then
PARALLEL_LEVEL=${PARALLEL_LEVEL} python setup.py build_ext -j${PARALLEL_LEVEL} install --single-version-externally-managed --record=record.txt
else
PARALLEL_LEVEL=${PARALLEL_LEVEL} python setup.py build_ext --inplace -j${PARALLEL_LEVEL} --library-dir=${LIBCUDF_BUILD_DIR}
python setup.py install --single-version-externally-managed --record=record.txt -- -DCMAKE_PREFIX_PATH=${INSTALL_PREFIX} -DCMAKE_LIBRARY_PATH=${LIBCUDF_BUILD_DIR} ${CMAKE_ARGS} -- -j${PARALLEL_LEVEL:-1}
fi
fi

Expand Down
2 changes: 1 addition & 1 deletion ci/gpu/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ if [[ -z "$PROJECT_FLASH" || "$PROJECT_FLASH" == "0" ]]; then
################################################################################

gpuci_logger "Build from source"
"$WORKSPACE/build.sh" clean libcudf cudf dask_cudf libcudf_kafka cudf_kafka benchmarks tests --ptds
"$WORKSPACE/build.sh" clean libcudf cudf dask_cudf libcudf_kafka cudf_kafka benchmarks tests --ptds --cmake-args=\"-DFIND_CUDF_CPP=ON\"

################################################################################
# TEST - Run GoogleTest
Expand Down
3 changes: 3 additions & 0 deletions ci/release/update-version.sh
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@ function sed_runner() {
# cpp update
sed_runner 's/'"VERSION ${CURRENT_SHORT_TAG}.*"'/'"VERSION ${NEXT_FULL_TAG}"'/g' cpp/CMakeLists.txt

# Python update
sed_runner 's/'"cudf_version .*)"'/'"cudf_version ${NEXT_FULL_TAG})"'/g' python/cudf/CMakeLists.txt

# cpp libcudf_kafka update
sed_runner 's/'"VERSION ${CURRENT_SHORT_TAG}.*"'/'"VERSION ${NEXT_FULL_TAG}"'/g' cpp/libcudf_kafka/CMakeLists.txt

Expand Down
1 change: 1 addition & 0 deletions conda/environments/cudf_dev_cuda11.5.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ dependencies:
- rmm=22.08.*
- cmake>=3.20.1,!=3.23.0
- cmake_setuptools>=0.1.3
- scikit-build>=0.13.1
- python>=3.7,<3.9
- numba>=0.54
- numpy
Expand Down
4 changes: 2 additions & 2 deletions conda/recipes/cudf/build.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2018-2019, NVIDIA CORPORATION.
# Copyright (c) 2018-2022, NVIDIA CORPORATION.

# This assumes the script is executed from the root of the repo directory
./build.sh cudf
./build.sh cudf --cmake-args=\"-DFIND_CUDF_CPP=ON\"
6 changes: 6 additions & 0 deletions conda/recipes/cudf/conda_build_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,9 @@ cxx_compiler_version:

sysroot_version:
- "2.17"

cmake_version:
- ">=3.20.1,!=3.23.0"

cuda_compiler:
- nvcc
4 changes: 4 additions & 0 deletions conda/recipes/cudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,17 @@ build:

requirements:
build:
- cmake {{ cmake_version }}
- {{ compiler('c') }}
- {{ compiler('cxx') }}
- {{ compiler('cuda') }} {{ cuda_version }}
- sysroot_{{ target_platform }} {{ sysroot_version }}
host:
- protobuf
- python
- cython >=0.29,<0.30
- cmake>=3.20.1,!=3.23.0
- scikit-build>=0.13.1
- setuptools
- numba >=0.54
- dlpack>=0.5,<0.6.0a0
Expand Down
2 changes: 1 addition & 1 deletion cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -752,7 +752,7 @@ install(
)

install(DIRECTORY ${CUDF_SOURCE_DIR}/include/cudf ${CUDF_SOURCE_DIR}/include/cudf_test
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
${CUDF_SOURCE_DIR}/include/nvtext DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
)

install(
Expand Down
65 changes: 65 additions & 0 deletions python/cudf/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# =============================================================================
# Copyright (c) 2022, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
# in compliance with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
# or implied. See the License for the specific language governing permissions and limitations under
# the License.
# =============================================================================

cmake_minimum_required(VERSION 3.20.1 FATAL_ERROR)

set(cudf_version 22.08.00)

include(../../fetch_rapids.cmake)

project(
cudf-python
VERSION ${cudf_version}
LANGUAGES # TODO: Building Python extension modules via the python_extension_module requires the C
# language to be enabled here. The test project that is built in scikit-build to verify
# various linking options for the python library is hardcoded to build with C, so until
# that is fixed we need to keep C.
C CXX
)

option(FIND_CUDF_CPP "Search for existing CUDF C++ installations before defaulting to local files"
OFF
)

# If the user requested it we attempt to find CUDF.
if(FIND_CUDF_CPP)
find_package(cudf ${cudf_version} REQUIRED)
else()
set(cudf_FOUND OFF)
endif()

if(NOT cudf_FOUND)
# TODO: This will not be necessary once we upgrade to CMake 3.22, which will pull in the required
# languages for the C++ project even if this project does not require those languges.
include(rapids-cuda)
rapids_cuda_init_architectures(cudf-python)
enable_language(CUDA)
# Since cudf only enables CUDA optionally we need to manually include the file that
# rapids_cuda_init_architectures relies on `project` including.
include("${CMAKE_PROJECT_cudf-python_INCLUDE}")

set(BUILD_TESTS OFF)
set(BUILD_BENCHMARKS OFF)
add_subdirectory(../../cpp cudf-cpp)

# Since there are multiple subpackages of cudf._lib that require access to libcudf, we place the
# library in the _lib/cpp directory as a single source of truth and modify the other rpaths
# appropriately.
install(TARGETS cudf DESTINATION cudf/_lib/cpp)
endif()

include(rapids-cython)
rapids_cython_init()

add_subdirectory(cudf/_lib)
85 changes: 85 additions & 0 deletions python/cudf/cudf/_lib/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
# =============================================================================
# Copyright (c) 2022, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
# in compliance with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
# or implied. See the License for the specific language governing permissions and limitations under
# the License.
# =============================================================================

set(cython_sources
aggregation.pyx
avro.pyx
binaryop.pyx
column.pyx
concat.pyx
copying.pyx
csv.pyx
datetime.pyx
expressions.pyx
filling.pyx
gpuarrow.pyx
groupby.pyx
hash.pyx
interop.pyx
join.pyx
json.pyx
labeling.pyx
lists.pyx
merge.pyx
null_mask.pyx
orc.pyx
parquet.pyx
partitioning.pyx
quantiles.pyx
reduce.pyx
replace.pyx
reshape.pyx
rolling.pyx
round.pyx
scalar.pyx
search.pyx
sort.pyx
stream_compaction.pyx
string_casting.pyx
text.pyx
transform.pyx
transpose.pyx
types.pyx
unary.pyx
utils.pyx
)
set(linked_libraries cudf::cudf)
rapids_cython_create_modules(
CXX
SOURCE_FILES "${cython_sources}"
LINKED_LIBRARIES "${linked_libraries}"
)

# TODO: Finding NumPy requires finding Development as well. Once this is fixed in CMake (no date
# yet) we can remove the extra component spec.
find_package(Python REQUIRED COMPONENTS Development NumPy)
set(targets_using_numpy gpuarrow interop avro csv orc json parquet)
foreach(target IN LISTS targets_using_numpy)
target_include_directories(${target} PRIVATE "${Python_NumPy_INCLUDE_DIRS}")
endforeach()

# PyArrow relies on the C++ Arrow library already being installed, so we can just find the C++
# library directly and link to the same one. We rely on libcudf's exports to provide the
# arrow_shared_lib and arrow_cuda_shared_lib libraries. That just leaves us to find the ArrowPython
# library on our own.
find_library(arrow_python_shared_library arrow_python REQUIRED)
target_link_libraries(gpuarrow ${arrow_python_shared_library})

foreach(cython_module IN LISTS RAPIDS_CYTHON_CREATED_TARGETS)
set_target_properties(${cython_module} PROPERTIES INSTALL_RPATH "\$ORIGIN;\$ORIGIN/cpp")
endforeach()

add_subdirectory(io)
add_subdirectory(nvtext)
add_subdirectory(strings)
30 changes: 30 additions & 0 deletions python/cudf/cudf/_lib/io/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# =============================================================================
# Copyright (c) 2022, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
# in compliance with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
# or implied. See the License for the specific language governing permissions and limitations under
# the License.
# =============================================================================

set(cython_sources datasource.pyx utils.pyx)
set(linked_libraries cudf::cudf)
rapids_cython_create_modules(
CXX
SOURCE_FILES "${cython_sources}"
LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX io_
)

set(targets_using_numpy io_datasource io_utils)
foreach(target IN LISTS targets_using_numpy)
target_include_directories(${target} PRIVATE "${Python_NumPy_INCLUDE_DIRS}")
endforeach()

foreach(cython_module IN LISTS RAPIDS_CYTHON_CREATED_TARGETS)
set_target_properties(${cython_module} PROPERTIES INSTALL_RPATH "\$ORIGIN;\$ORIGIN/../cpp")
endforeach()
27 changes: 27 additions & 0 deletions python/cudf/cudf/_lib/nvtext/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# =============================================================================
# Copyright (c) 2022, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
# in compliance with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
# or implied. See the License for the specific language governing permissions and limitations under
# the License.
# =============================================================================

set(cython_sources edit_distance.pyx generate_ngrams.pyx ngrams_tokenize.pyx normalize.pyx
replace.pyx stemmer.pyx subword_tokenize.pyx tokenize.pyx
)
set(linked_libraries cudf::cudf)
rapids_cython_create_modules(
CXX
SOURCE_FILES "${cython_sources}"
LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX nvtext_
)

foreach(cython_module IN LISTS RAPIDS_CYTHON_CREATED_TARGETS)
set_target_properties(${cython_module} PROPERTIES INSTALL_RPATH "\$ORIGIN;\$ORIGIN/../cpp")
endforeach()
49 changes: 49 additions & 0 deletions python/cudf/cudf/_lib/strings/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# =============================================================================
# Copyright (c) 2022, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
# in compliance with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
# or implied. See the License for the specific language governing permissions and limitations under
# the License.
# =============================================================================

set(cython_sources
attributes.pyx
capitalize.pyx
case.pyx
char_types.pyx
combine.pyx
contains.pyx
extract.pyx
find.pyx
find_multiple.pyx
findall.pyx
json.pyx
padding.pyx
repeat.pyx
replace.pyx
replace_re.pyx
strip.pyx
substring.pyx
translate.pyx
wrap.pyx
)

set(linked_libraries cudf::cudf)
rapids_cython_create_modules(
CXX
SOURCE_FILES "${cython_sources}"
LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX strings_
)

foreach(cython_module IN LISTS RAPIDS_CYTHON_CREATED_TARGETS)
set_target_properties(${cython_module} PROPERTIES INSTALL_RPATH "\$ORIGIN;\$ORIGIN/../cpp")
endforeach()

add_subdirectory(convert)
add_subdirectory(split)
28 changes: 28 additions & 0 deletions python/cudf/cudf/_lib/strings/convert/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# =============================================================================
# Copyright (c) 2022, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
# in compliance with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
# or implied. See the License for the specific language governing permissions and limitations under
# the License.
# =============================================================================

set(cython_sources convert_fixed_point.pyx convert_floats.pyx convert_integers.pyx
convert_lists.pyx convert_urls.pyx
)

set(linked_libraries cudf::cudf)
rapids_cython_create_modules(
CXX
SOURCE_FILES "${cython_sources}"
LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX strings_
)

foreach(cython_module IN LISTS RAPIDS_CYTHON_CREATED_TARGETS)
set_target_properties(${cython_module} PROPERTIES INSTALL_RPATH "\$ORIGIN;\$ORIGIN/../../cpp")
endforeach()
Loading

0 comments on commit eeec6a0

Please sign in to comment.