From 956620f26086341322967810e33d56613ea6f775 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 18 May 2022 16:03:26 -0700 Subject: [PATCH 01/31] Update gitignore. --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index aee3d072de2..df266677373 100644 --- a/.gitignore +++ b/.gitignore @@ -47,6 +47,7 @@ parts/ sdist/ var/ wheels/ +_skbuild/ *.egg-info/ .installed.cfg *.egg @@ -162,4 +163,4 @@ dask-worker-space/ # Sphinx docs & build artifacts docs/cudf/source/api_docs/generated/* -docs/cudf/source/api_docs/api/* \ No newline at end of file +docs/cudf/source/api_docs/api/* From 0674d2c2b2670c61019027cc066205c449d1cea2 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Thu, 19 May 2022 15:06:11 -0700 Subject: [PATCH 02/31] Initial version with scikit-build, compiles but does not load (haven't added Arrow yet). --- python/cudf/CMakeLists.txt | 64 ++++ python/cudf/cudf/_lib/CMakeLists.txt | 70 ++++ python/cudf/cudf/_lib/io/CMakeLists.txt | 27 ++ python/cudf/cudf/_lib/nvtext/CMakeLists.txt | 30 ++ python/cudf/cudf/_lib/strings/CMakeLists.txt | 42 +++ python/cudf/pyproject.toml | 5 +- python/cudf/setup.py | 330 ++++++++++--------- 7 files changed, 403 insertions(+), 165 deletions(-) create mode 100644 python/cudf/CMakeLists.txt create mode 100644 python/cudf/cudf/_lib/CMakeLists.txt create mode 100644 python/cudf/cudf/_lib/io/CMakeLists.txt create mode 100644 python/cudf/cudf/_lib/nvtext/CMakeLists.txt create mode 100644 python/cudf/cudf/_lib/strings/CMakeLists.txt diff --git a/python/cudf/CMakeLists.txt b/python/cudf/CMakeLists.txt new file mode 100644 index 00000000000..86d07b05cef --- /dev/null +++ b/python/cudf/CMakeLists.txt @@ -0,0 +1,64 @@ +# ============================================================================= +# Copyright (c) 2022, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + +cmake_minimum_required(VERSION 3.20.1 FATAL_ERROR) + +set(cudf_version 22.06.00) + +file(DOWNLOAD https://raw.githubusercontent.com/vyasr/rapids-cmake/feature/rapids_cython_prefix/RAPIDS.cmake + ${CMAKE_BINARY_DIR}/RAPIDS.cmake) +include(${CMAKE_BINARY_DIR}/RAPIDS.cmake) + +project( + cudf-python + VERSION ${cudf_version} + LANGUAGES # TODO: Building Python extension modules via the python_extension_module requires the C + # language to be enabled here. The test project that is built in scikit-build to verify + # various linking options for the python library is hardcoded to build with C, so until + # that is fixed we need to keep C. + C + CXX) + +option(FIND_CUDF_CPP "Search for existing CUDF C++ installations before defaulting to local files" + OFF) + +# If the user requested it we attempt to find CUDF. +if(FIND_CUDF_CPP) + find_package(cudf ${cudf_version} REQUIRED) +else() + set(cudf_FOUND OFF) +endif() + +if(NOT cudf_FOUND) + # TODO: This will not be necessary once we upgrade to CMake 3.22, which will + # pull in the required languages for the C++ project even if this project + # does not require those languges. + include(rapids-cuda) + rapids_cuda_init_architectures(cudf-python) + enable_language(CUDA) + # Since cudf only enables CUDA optionally we need to manually include the file that + # rapids_cuda_init_architectures relies on `project` including. + include("${CMAKE_PROJECT_cudf-python_INCLUDE}") + + set(BUILD_TESTS OFF) + set(BUILD_BENCHMARKS OFF) + add_subdirectory(../../cpp cudf-cpp) + + # TODO: Figure out where to install +endif() + +include(rapids-cython) +rapids_cython_init() + +add_subdirectory(cudf/_lib) diff --git a/python/cudf/cudf/_lib/CMakeLists.txt b/python/cudf/cudf/_lib/CMakeLists.txt new file mode 100644 index 00000000000..5d8dcbc2179 --- /dev/null +++ b/python/cudf/cudf/_lib/CMakeLists.txt @@ -0,0 +1,70 @@ +# ============================================================================= +# Copyright (c) 2022, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + +set(cython_sources + aggregation.pyx + avro.pyx + binaryop.pyx + column.pyx + concat.pyx + csv.pyx + datetime.pyx + expressions.pyx + filling.pyx + gpuarrow.pyx + groupby.pyx + hash.pyx + interop.pyx + join.pyx + json.pyx + labeling.pyx + lists.pyx + merge.pyx + null_mask.pyx + orc.pyx + parquet.pyx + partitioning.pyx + quantiles.pyx + reduce.pyx + replace.pyx + reshape.pyx + rolling.pyx + round.pyx + scalar.pyx + search.pyx + sort.pyx + stream_compaction.pyx + string_casting.pyx + text.pyx + transform.pyx + transpose.pyx + types.pyx + unary.pyx + utils.pyx +) +set(linked_libraries cudf::cudf) +rapids_cython_create_modules( + SOURCE_FILES "${cython_sources}" + LINKED_LIBRARIES "${linked_libraries}" + CXX) + +add_subdirectory(io) +add_subdirectory(nvtext) +add_subdirectory(strings) + +find_package(NumPy REQUIRED) +set(targets_using_numpy gpuarrow interop avro csv orc json parquet) +foreach(target IN LISTS targets_using_numpy) + target_include_directories(${target} PRIVATE "${NumPy_INCLUDE_DIRS}") +endforeach() diff --git a/python/cudf/cudf/_lib/io/CMakeLists.txt b/python/cudf/cudf/_lib/io/CMakeLists.txt new file mode 100644 index 00000000000..96da5dfb03c --- /dev/null +++ b/python/cudf/cudf/_lib/io/CMakeLists.txt @@ -0,0 +1,27 @@ +# ============================================================================= +# Copyright (c) 2022, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + +set(cython_sources datasource.pyx utils.pyx) +set(linked_libraries cudf::cudf) +rapids_cython_create_modules( + SOURCE_FILES "${cython_sources}" + LINKED_LIBRARIES "${linked_libraries}" + MODULE_PREFIX io_ + CXX) + +find_package(NumPy REQUIRED) +set(targets_using_numpy io_datasource io_utils) +foreach(target IN LISTS targets_using_numpy) + target_include_directories(${target} PRIVATE "${NumPy_INCLUDE_DIRS}") +endforeach() diff --git a/python/cudf/cudf/_lib/nvtext/CMakeLists.txt b/python/cudf/cudf/_lib/nvtext/CMakeLists.txt new file mode 100644 index 00000000000..083d1ba01ed --- /dev/null +++ b/python/cudf/cudf/_lib/nvtext/CMakeLists.txt @@ -0,0 +1,30 @@ +# ============================================================================= +# Copyright (c) 2022, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + +set(cython_sources + edit_distance.pyx + generate_ngrams.pyx + ngrams_tokenize.pyx + normalize.pyx + replace.pyx + stemmer.pyx + subword_tokenize.pyx + tokenize.pyx +) +set(linked_libraries cudf::cudf) +rapids_cython_create_modules( + SOURCE_FILES "${cython_sources}" + LINKED_LIBRARIES "${linked_libraries}" + MODULE_PREFIX nvtext_ + CXX) diff --git a/python/cudf/cudf/_lib/strings/CMakeLists.txt b/python/cudf/cudf/_lib/strings/CMakeLists.txt new file mode 100644 index 00000000000..5d7b71531a5 --- /dev/null +++ b/python/cudf/cudf/_lib/strings/CMakeLists.txt @@ -0,0 +1,42 @@ +# ============================================================================= +# Copyright (c) 2022, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + +set(cython_sources + attributes.pyx + capitalize.pyx + case.pyx + char_types.pyx + combine.pyx + contains.pyx + extract.pyx + find.pyx + find_multiple.pyx + findall.pyx + json.pyx + padding.pyx + repeat.pyx + replace.pyx + replace_re.pyx + strip.pyx + substring.pyx + translate.pyx + wrap.pyx +) + +set(linked_libraries cudf::cudf) +rapids_cython_create_modules( + SOURCE_FILES "${cython_sources}" + LINKED_LIBRARIES "${linked_libraries}" + MODULE_PREFIX strings_ + CXX) diff --git a/python/cudf/pyproject.toml b/python/cudf/pyproject.toml index 630efd5b9ec..7eaa5ae2f8d 100644 --- a/python/cudf/pyproject.toml +++ b/python/cudf/pyproject.toml @@ -1,4 +1,4 @@ -# Copyright (c) 2021, NVIDIA CORPORATION. +# Copyright (c) 2021-2022, NVIDIA CORPORATION. [build-system] @@ -6,6 +6,9 @@ requires = [ "wheel", "setuptools", "cython>=0.29,<0.30", + "scikit-build>=0.13.1", + "cmake>=3.20.1,!=3.23.0", + "ninja", ] [tool.black] diff --git a/python/cudf/setup.py b/python/cudf/setup.py index 4a5a0d2186f..56cdeea6392 100644 --- a/python/cudf/setup.py +++ b/python/cudf/setup.py @@ -3,29 +3,28 @@ import os import re import shutil -import subprocess -import sys -import sysconfig - -# Must import in this order: -# setuptools -> Cython.Distutils.build_ext -> setuptools.command.build_ext -# Otherwise, setuptools.command.build_ext ends up inheriting from -# Cython.Distutils.old_build_ext which we do not want -import setuptools - -try: - from Cython.Distutils.build_ext import new_build_ext as _build_ext -except ImportError: - from setuptools.command.build_ext import build_ext as _build_ext - -from distutils.spawn import find_executable -from distutils.sysconfig import get_python_lib - -import numpy as np -import pyarrow as pa -import setuptools.command.build_ext -from setuptools import find_packages, setup -from setuptools.extension import Extension +# import subprocess +# import sys +# import sysconfig +# +# # Must import in this order: +# # setuptools -> Cython.Distutils.build_ext -> setuptools.command.build_ext +# # Otherwise, setuptools.command.build_ext ends up inheriting from +# # Cython.Distutils.old_build_ext which we do not want +# import setuptools +# +# try: +# from Cython.Distutils.build_ext import new_build_ext as _build_ext +# except ImportError: +# from setuptools.command.build_ext import build_ext as _build_ext +# +# from distutils.spawn import find_executable +# from distutils.sysconfig import get_python_lib + +# import numpy as np +# import pyarrow as pa +from setuptools import find_packages +from skbuild import setup import versioneer @@ -56,8 +55,8 @@ "transformers<=4.10.3", ] } - -cython_files = ["cudf/**/*.pyx"] +# +# cython_files = ["cudf/**/*.pyx"] def get_cuda_version_from_header(cuda_include_dir, delimeter=""): @@ -100,137 +99,143 @@ def get_cuda_version_from_header(cuda_include_dir, delimeter=""): install_requires.append( "cupy-cuda" + get_cuda_version_from_header(cuda_include_dir) ) - -CUDF_HOME = os.environ.get( - "CUDF_HOME", - os.path.abspath( - os.path.join(os.path.dirname(os.path.abspath(__file__)), "../../") - ), -) -CUDF_ROOT = os.environ.get( - "CUDF_ROOT", - os.path.abspath( - os.path.join( - os.path.dirname(os.path.abspath(__file__)), "../../cpp/build/" - ) - ), -) - - -class build_ext_and_proto_no_debug(_build_ext): - def build_extensions(self): - def remove_flags(compiler, *flags): - for flag in flags: - try: - compiler.compiler_so = list( - filter((flag).__ne__, compiler.compiler_so) - ) - except Exception: - pass - - # Full optimization - self.compiler.compiler_so.append("-O3") - # Silence '-Wunknown-pragmas' warning - self.compiler.compiler_so.append("-Wno-unknown-pragmas") - # No debug symbols, full optimization, no '-Wstrict-prototypes' warning - remove_flags( - self.compiler, "-g", "-G", "-O1", "-O2", "-Wstrict-prototypes" - ) - super().build_extensions() - - def finalize_options(self): - if self.distribution.ext_modules: - # Delay import this to allow for Cython-less installs - from Cython.Build.Dependencies import cythonize - - nthreads = getattr(self, "parallel", None) # -j option in Py3.5+ - nthreads = int(nthreads) if nthreads else None - self.distribution.ext_modules = cythonize( - self.distribution.ext_modules, - nthreads=nthreads, - force=self.force, - gdb_debug=False, - compiler_directives=dict( - profile=False, language_level=3, embedsignature=True - ), - ) - # Skip calling super() and jump straight to setuptools - setuptools.command.build_ext.build_ext.finalize_options(self) - - def run(self): - # Get protoc - protoc = None - if "PROTOC" in os.environ and os.path.exists(os.environ["PROTOC"]): - protoc = os.environ["PROTOC"] - else: - protoc = find_executable("protoc") - if protoc is None: - sys.stderr.write("protoc not found") - sys.exit(1) - - # Build .proto file - for source in ["cudf/utils/metadata/orc_column_statistics.proto"]: - output = source.replace(".proto", "_pb2.py") - - if not os.path.exists(output) or ( - os.path.getmtime(source) > os.path.getmtime(output) - ): - with open(output, "a") as src: - src.write("# flake8: noqa" + os.linesep) - src.write("# fmt: off" + os.linesep) - subprocess.check_call([protoc, "--python_out=.", source]) - with open(output, "r+") as src: - new_src_content = ( - "# flake8: noqa" - + os.linesep - + "# fmt: off" - + os.linesep - + src.read() - + "# fmt: on" - + os.linesep - ) - src.seek(0) - src.write(new_src_content) - - # Run original Cython build_ext command - _build_ext.run(self) - - -extensions = [ - Extension( - "*", - sources=cython_files, - include_dirs=[ - os.path.abspath(os.path.join(CUDF_HOME, "cpp/include/cudf")), - os.path.abspath(os.path.join(CUDF_HOME, "cpp/include")), - os.path.abspath(os.path.join(CUDF_ROOT, "include")), - os.path.join(CUDF_ROOT, "_deps/libcudacxx-src/include"), - os.path.join(CUDF_ROOT, "_deps/dlpack-src/include"), - os.path.join( - os.path.dirname(sysconfig.get_path("include")), - "rapids/libcudacxx", - ), - os.path.dirname(sysconfig.get_path("include")), - np.get_include(), - pa.get_include(), - cuda_include_dir, - ], - library_dirs=( - pa.get_library_dirs() - + [ - get_python_lib(), - os.path.join(os.sys.prefix, "lib"), - cuda_lib_dir, - ] - ), - libraries=["cudart", "cudf"] + pa.get_libraries() + ["arrow_cuda"], - language="c++", - extra_compile_args=["-std=c++17"], - ) -] - -cmdclass = versioneer.get_cmdclass() -cmdclass["build_ext"] = build_ext_and_proto_no_debug +# +# CUDF_HOME = os.environ.get( +# "CUDF_HOME", +# os.path.abspath( +# os.path.join(os.path.dirname(os.path.abspath(__file__)), "../../") +# ), +# ) +# CUDF_ROOT = os.environ.get( +# "CUDF_ROOT", +# os.path.abspath( +# os.path.join( +# os.path.dirname(os.path.abspath(__file__)), "../../cpp/build/" +# ) +# ), +# ) +# +# +# class build_ext_and_proto_no_debug(_build_ext): +# def build_extensions(self): +# def remove_flags(compiler, *flags): +# for flag in flags: +# try: +# compiler.compiler_so = list( +# filter((flag).__ne__, compiler.compiler_so) +# ) +# except Exception: +# pass +# +# # Full optimization +# self.compiler.compiler_so.append("-O3") +# # Silence '-Wunknown-pragmas' warning +# self.compiler.compiler_so.append("-Wno-unknown-pragmas") +# # No debug symbols, full optimization, no '-Wstrict-prototypes' warning +# remove_flags( +# self.compiler, "-g", "-G", "-O1", "-O2", "-Wstrict-prototypes" +# ) +# super().build_extensions() +# +# def finalize_options(self): +# if self.distribution.ext_modules: +# # Delay import this to allow for Cython-less installs +# from Cython.Build.Dependencies import cythonize +# +# nthreads = getattr(self, "parallel", None) # -j option in Py3.5+ +# nthreads = int(nthreads) if nthreads else None +# self.distribution.ext_modules = cythonize( +# self.distribution.ext_modules, +# nthreads=nthreads, +# force=self.force, +# gdb_debug=False, +# compiler_directives=dict( +# profile=False, language_level=3, embedsignature=True +# ), +# ) +# # Skip calling super() and jump straight to setuptools +# setuptools.command.build_ext.build_ext.finalize_options(self) +# +# def run(self): +# # Get protoc +# protoc = None +# if "PROTOC" in os.environ and os.path.exists(os.environ["PROTOC"]): +# protoc = os.environ["PROTOC"] +# else: +# protoc = find_executable("protoc") +# if protoc is None: +# sys.stderr.write("protoc not found") +# sys.exit(1) +# +# # Build .proto file +# for source in ["cudf/utils/metadata/orc_column_statistics.proto"]: +# output = source.replace(".proto", "_pb2.py") +# +# if not os.path.exists(output) or ( +# os.path.getmtime(source) > os.path.getmtime(output) +# ): +# with open(output, "a") as src: +# src.write("# flake8: noqa" + os.linesep) +# src.write("# fmt: off" + os.linesep) +# subprocess.check_call([protoc, "--python_out=.", source]) +# with open(output, "r+") as src: +# new_src_content = ( +# "# flake8: noqa" +# + os.linesep +# + "# fmt: off" +# + os.linesep +# + src.read() +# + "# fmt: on" +# + os.linesep +# ) +# src.seek(0) +# src.write(new_src_content) +# +# # Run original Cython build_ext command +# _build_ext.run(self) +# +# +# extensions = [ +# Extension( +# "*", +# sources=cython_files, +# include_dirs=[ +# os.path.abspath(os.path.join(CUDF_HOME, "cpp/include/cudf")), +# os.path.abspath(os.path.join(CUDF_HOME, "cpp/include")), +# os.path.abspath(os.path.join(CUDF_ROOT, "include")), +# os.path.join(CUDF_ROOT, "_deps/libcudacxx-src/include"), +# os.path.join(CUDF_ROOT, "_deps/dlpack-src/include"), +# os.path.join( +# os.path.dirname(sysconfig.get_path("include")), +# "rapids/libcudacxx", +# ), +# os.path.dirname(sysconfig.get_path("include")), +# np.get_include(), +# pa.get_include(), +# cuda_include_dir, +# ], +# library_dirs=( +# pa.get_library_dirs() +# + [ +# get_python_lib(), +# os.path.join(os.sys.prefix, "lib"), +# cuda_lib_dir, +# ] +# ), +# libraries=["cudart", "cudf"] + pa.get_libraries() + ["arrow_cuda"], +# language="c++", +# extra_compile_args=["-std=c++17"], +# ) +# ] +# +# cmdclass = versioneer.get_cmdclass() +# cmdclass["build_ext"] = build_ext_and_proto_no_debug + +# import sys +# import numpy as np +# import pyarrow as pa +# print(sys.argv) +# exit() setup( @@ -249,15 +254,12 @@ def run(self): "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", ], - # Include the separately-compiled shared library - setup_requires=["cython", "protobuf"], - ext_modules=extensions, packages=find_packages(include=["cudf", "cudf.*"]), - package_data=dict.fromkeys( - find_packages(include=["cudf._lib*"]), - ["*.pxd"], - ), - cmdclass=cmdclass, + package_data={ + key: ["*.pxd"] + for key in find_packages(include=["cudf._lib"]) + }, + cmdclass=versioneer.get_cmdclass(), install_requires=install_requires, extras_require=extras_require, zip_safe=False, From f8f89968df5bc23b0b93c84c630462896efecdfc Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Thu, 19 May 2022 16:26:42 -0700 Subject: [PATCH 03/31] Find the Arrow Python library. --- python/cudf/cudf/_lib/CMakeLists.txt | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/python/cudf/cudf/_lib/CMakeLists.txt b/python/cudf/cudf/_lib/CMakeLists.txt index 5d8dcbc2179..0717e6653cf 100644 --- a/python/cudf/cudf/_lib/CMakeLists.txt +++ b/python/cudf/cudf/_lib/CMakeLists.txt @@ -68,3 +68,10 @@ set(targets_using_numpy gpuarrow interop avro csv orc json parquet) foreach(target IN LISTS targets_using_numpy) target_include_directories(${target} PRIVATE "${NumPy_INCLUDE_DIRS}") endforeach() + +# PyArrow relies on the C++ Arrow library already being installed, so we can +# just find the C++ library directly and link to the same one. We rely on +# libcudf's exports to provide the ARROW_SHARED_LIB and ARROW_CUDA_SHARED_LIB +# libraries. That just leaves us to find the ArrowPython library on our own. +find_library(arrow_python_shared_library arrow_python REQUIRED) +target_link_libraries(gpuarrow ${arrow_python_shared_library} ${ARROW_SHARED_LIB} ${ARROW_CUDA_SHARED_LIB}) From 03546d95b39fd89cd8f5363f24b035d2e8c3255d Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Thu, 19 May 2022 16:56:26 -0700 Subject: [PATCH 04/31] Install library and update RPATHs. --- python/cudf/CMakeLists.txt | 5 ++++- python/cudf/cudf/_lib/CMakeLists.txt | 5 +++++ python/cudf/cudf/_lib/io/CMakeLists.txt | 5 +++++ python/cudf/cudf/_lib/nvtext/CMakeLists.txt | 5 +++++ python/cudf/cudf/_lib/strings/CMakeLists.txt | 5 +++++ 5 files changed, 24 insertions(+), 1 deletion(-) diff --git a/python/cudf/CMakeLists.txt b/python/cudf/CMakeLists.txt index 86d07b05cef..ae92add26af 100644 --- a/python/cudf/CMakeLists.txt +++ b/python/cudf/CMakeLists.txt @@ -55,7 +55,10 @@ if(NOT cudf_FOUND) set(BUILD_BENCHMARKS OFF) add_subdirectory(../../cpp cudf-cpp) - # TODO: Figure out where to install + # Since there are multiple subpackages of cudf._lib that require access to + # libcudf, we place the library in the _lib/cpp directory as a single source + # of truth and modify the other rpaths appropriately. + install(TARGETS cuspatial DESTINATION cuspatial/_lib/cpp) endif() include(rapids-cython) diff --git a/python/cudf/cudf/_lib/CMakeLists.txt b/python/cudf/cudf/_lib/CMakeLists.txt index 0717e6653cf..8b9fcd464ab 100644 --- a/python/cudf/cudf/_lib/CMakeLists.txt +++ b/python/cudf/cudf/_lib/CMakeLists.txt @@ -75,3 +75,8 @@ endforeach() # libraries. That just leaves us to find the ArrowPython library on our own. find_library(arrow_python_shared_library arrow_python REQUIRED) target_link_libraries(gpuarrow ${arrow_python_shared_library} ${ARROW_SHARED_LIB} ${ARROW_CUDA_SHARED_LIB}) + +# Update the RPATHs to also look in the cpp/ directory for the library. +foreach(cython_module IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) + set_target_properties(${cython_module} PROPERTIES INSTALL_RPATH "\$ORIGIN;\$ORIGIN/cpp") +endforeach() diff --git a/python/cudf/cudf/_lib/io/CMakeLists.txt b/python/cudf/cudf/_lib/io/CMakeLists.txt index 96da5dfb03c..157583bad00 100644 --- a/python/cudf/cudf/_lib/io/CMakeLists.txt +++ b/python/cudf/cudf/_lib/io/CMakeLists.txt @@ -25,3 +25,8 @@ set(targets_using_numpy io_datasource io_utils) foreach(target IN LISTS targets_using_numpy) target_include_directories(${target} PRIVATE "${NumPy_INCLUDE_DIRS}") endforeach() + +# Update the RPATHs to also look in the ../cpp/ directory for the library. +foreach(cython_module IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) + set_target_properties(${cython_module} PROPERTIES INSTALL_RPATH "\$ORIGIN;\$ORIGIN/../cpp") +endforeach() diff --git a/python/cudf/cudf/_lib/nvtext/CMakeLists.txt b/python/cudf/cudf/_lib/nvtext/CMakeLists.txt index 083d1ba01ed..846be94140d 100644 --- a/python/cudf/cudf/_lib/nvtext/CMakeLists.txt +++ b/python/cudf/cudf/_lib/nvtext/CMakeLists.txt @@ -28,3 +28,8 @@ rapids_cython_create_modules( LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX nvtext_ CXX) + +# Update the RPATHs to also look in the ../cpp/ directory for the library. +foreach(cython_module IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) + set_target_properties(${cython_module} PROPERTIES INSTALL_RPATH "\$ORIGIN;\$ORIGIN/../cpp") +endforeach() diff --git a/python/cudf/cudf/_lib/strings/CMakeLists.txt b/python/cudf/cudf/_lib/strings/CMakeLists.txt index 5d7b71531a5..cdb32b37d1a 100644 --- a/python/cudf/cudf/_lib/strings/CMakeLists.txt +++ b/python/cudf/cudf/_lib/strings/CMakeLists.txt @@ -40,3 +40,8 @@ rapids_cython_create_modules( LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX strings_ CXX) + +# Update the RPATHs to also look in the ../cpp/ directory for the library. +foreach(cython_module IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) + set_target_properties(${cython_module} PROPERTIES INSTALL_RPATH "\$ORIGIN;\$ORIGIN/../cpp") +endforeach() From e7e410ffdb592cde5e855717ce0d05c89135aaf4 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Thu, 19 May 2022 17:55:05 -0700 Subject: [PATCH 05/31] Add missing modules, cudf is now importable. --- python/cudf/cudf/_lib/CMakeLists.txt | 1 + python/cudf/cudf/_lib/strings/CMakeLists.txt | 3 ++ .../cudf/_lib/strings/convert/CMakeLists.txt | 33 +++++++++++++++++++ .../cudf/_lib/strings/split/CMakeLists.txt | 30 +++++++++++++++++ 4 files changed, 67 insertions(+) create mode 100644 python/cudf/cudf/_lib/strings/convert/CMakeLists.txt create mode 100644 python/cudf/cudf/_lib/strings/split/CMakeLists.txt diff --git a/python/cudf/cudf/_lib/CMakeLists.txt b/python/cudf/cudf/_lib/CMakeLists.txt index 8b9fcd464ab..e13c98d14f7 100644 --- a/python/cudf/cudf/_lib/CMakeLists.txt +++ b/python/cudf/cudf/_lib/CMakeLists.txt @@ -18,6 +18,7 @@ set(cython_sources binaryop.pyx column.pyx concat.pyx + copying.pyx csv.pyx datetime.pyx expressions.pyx diff --git a/python/cudf/cudf/_lib/strings/CMakeLists.txt b/python/cudf/cudf/_lib/strings/CMakeLists.txt index cdb32b37d1a..edd16252fda 100644 --- a/python/cudf/cudf/_lib/strings/CMakeLists.txt +++ b/python/cudf/cudf/_lib/strings/CMakeLists.txt @@ -45,3 +45,6 @@ rapids_cython_create_modules( foreach(cython_module IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) set_target_properties(${cython_module} PROPERTIES INSTALL_RPATH "\$ORIGIN;\$ORIGIN/../cpp") endforeach() + +add_subdirectory(convert) +add_subdirectory(split) diff --git a/python/cudf/cudf/_lib/strings/convert/CMakeLists.txt b/python/cudf/cudf/_lib/strings/convert/CMakeLists.txt new file mode 100644 index 00000000000..fe41fb19fad --- /dev/null +++ b/python/cudf/cudf/_lib/strings/convert/CMakeLists.txt @@ -0,0 +1,33 @@ +# ============================================================================= +# Copyright (c) 2022, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + +set(cython_sources + convert_fixed_point.pyx + convert_floats.pyx + convert_integers.pyx + convert_lists.pyx + convert_urls.pyx +) + +set(linked_libraries cudf::cudf) +rapids_cython_create_modules( + SOURCE_FILES "${cython_sources}" + LINKED_LIBRARIES "${linked_libraries}" + MODULE_PREFIX strings_ + CXX) + +# Update the RPATHs to also look in the ../../cpp/ directory for the library. +foreach(cython_module IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) + set_target_properties(${cython_module} PROPERTIES INSTALL_RPATH "\$ORIGIN;\$ORIGIN/../../cpp") +endforeach() diff --git a/python/cudf/cudf/_lib/strings/split/CMakeLists.txt b/python/cudf/cudf/_lib/strings/split/CMakeLists.txt new file mode 100644 index 00000000000..a8711b6f8f2 --- /dev/null +++ b/python/cudf/cudf/_lib/strings/split/CMakeLists.txt @@ -0,0 +1,30 @@ +# ============================================================================= +# Copyright (c) 2022, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + +set(cython_sources + partition.pyx + split.pyx +) + +set(linked_libraries cudf::cudf) +rapids_cython_create_modules( + SOURCE_FILES "${cython_sources}" + LINKED_LIBRARIES "${linked_libraries}" + MODULE_PREFIX strings_ + CXX) + +# Update the RPATHs to also look in the ../../cpp/ directory for the library. +foreach(cython_module IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) + set_target_properties(${cython_module} PROPERTIES INSTALL_RPATH "\$ORIGIN;\$ORIGIN/../../cpp") +endforeach() From ac68c2b208ac88dd281feb3115598e61e2d1a6e4 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Thu, 19 May 2022 19:02:47 -0700 Subject: [PATCH 06/31] Temporarily disable binding. --- python/cudf/CMakeLists.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/python/cudf/CMakeLists.txt b/python/cudf/CMakeLists.txt index ae92add26af..6620e1fd1c9 100644 --- a/python/cudf/CMakeLists.txt +++ b/python/cudf/CMakeLists.txt @@ -61,6 +61,12 @@ if(NOT cudf_FOUND) install(TARGETS cuspatial DESTINATION cuspatial/_lib/cpp) endif() +# TODO: Disable binding for now, but we probably want it back eventually. +# Currently this breaks the behavior of our join code because that sets a +# Cython method as a class attribute, which gets bound and therefore passes +# self as an extra argument. +set(CYTHON_FLAGS "--directive embedsignature=True,always_allow_keywords=True") + include(rapids-cython) rapids_cython_init() From a4e96da0ca1057137fd3c6cc5ad8ced435ea0119 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Fri, 20 May 2022 16:04:35 -0700 Subject: [PATCH 07/31] Clean up arrow libraries. --- python/cudf/CMakeLists.txt | 2 +- python/cudf/cudf/_lib/CMakeLists.txt | 5 ++--- python/cudf/cudf/_lib/io/CMakeLists.txt | 1 - python/cudf/cudf/_lib/nvtext/CMakeLists.txt | 1 - python/cudf/cudf/_lib/strings/CMakeLists.txt | 1 - python/cudf/cudf/_lib/strings/convert/CMakeLists.txt | 1 - python/cudf/cudf/_lib/strings/split/CMakeLists.txt | 1 - 7 files changed, 3 insertions(+), 9 deletions(-) diff --git a/python/cudf/CMakeLists.txt b/python/cudf/CMakeLists.txt index 6620e1fd1c9..e9e42145632 100644 --- a/python/cudf/CMakeLists.txt +++ b/python/cudf/CMakeLists.txt @@ -58,7 +58,7 @@ if(NOT cudf_FOUND) # Since there are multiple subpackages of cudf._lib that require access to # libcudf, we place the library in the _lib/cpp directory as a single source # of truth and modify the other rpaths appropriately. - install(TARGETS cuspatial DESTINATION cuspatial/_lib/cpp) + install(TARGETS cudf DESTINATION cudf/_lib/cpp) endif() # TODO: Disable binding for now, but we probably want it back eventually. diff --git a/python/cudf/cudf/_lib/CMakeLists.txt b/python/cudf/cudf/_lib/CMakeLists.txt index e13c98d14f7..3d92240167a 100644 --- a/python/cudf/cudf/_lib/CMakeLists.txt +++ b/python/cudf/cudf/_lib/CMakeLists.txt @@ -72,12 +72,11 @@ endforeach() # PyArrow relies on the C++ Arrow library already being installed, so we can # just find the C++ library directly and link to the same one. We rely on -# libcudf's exports to provide the ARROW_SHARED_LIB and ARROW_CUDA_SHARED_LIB +# libcudf's exports to provide the arrow_shared_lib and arrow_cuda_shared_lib # libraries. That just leaves us to find the ArrowPython library on our own. find_library(arrow_python_shared_library arrow_python REQUIRED) -target_link_libraries(gpuarrow ${arrow_python_shared_library} ${ARROW_SHARED_LIB} ${ARROW_CUDA_SHARED_LIB}) +target_link_libraries(gpuarrow ${arrow_python_shared_library}) -# Update the RPATHs to also look in the cpp/ directory for the library. foreach(cython_module IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) set_target_properties(${cython_module} PROPERTIES INSTALL_RPATH "\$ORIGIN;\$ORIGIN/cpp") endforeach() diff --git a/python/cudf/cudf/_lib/io/CMakeLists.txt b/python/cudf/cudf/_lib/io/CMakeLists.txt index 157583bad00..7b172d30c13 100644 --- a/python/cudf/cudf/_lib/io/CMakeLists.txt +++ b/python/cudf/cudf/_lib/io/CMakeLists.txt @@ -26,7 +26,6 @@ foreach(target IN LISTS targets_using_numpy) target_include_directories(${target} PRIVATE "${NumPy_INCLUDE_DIRS}") endforeach() -# Update the RPATHs to also look in the ../cpp/ directory for the library. foreach(cython_module IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) set_target_properties(${cython_module} PROPERTIES INSTALL_RPATH "\$ORIGIN;\$ORIGIN/../cpp") endforeach() diff --git a/python/cudf/cudf/_lib/nvtext/CMakeLists.txt b/python/cudf/cudf/_lib/nvtext/CMakeLists.txt index 846be94140d..3e862ce76ed 100644 --- a/python/cudf/cudf/_lib/nvtext/CMakeLists.txt +++ b/python/cudf/cudf/_lib/nvtext/CMakeLists.txt @@ -29,7 +29,6 @@ rapids_cython_create_modules( MODULE_PREFIX nvtext_ CXX) -# Update the RPATHs to also look in the ../cpp/ directory for the library. foreach(cython_module IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) set_target_properties(${cython_module} PROPERTIES INSTALL_RPATH "\$ORIGIN;\$ORIGIN/../cpp") endforeach() diff --git a/python/cudf/cudf/_lib/strings/CMakeLists.txt b/python/cudf/cudf/_lib/strings/CMakeLists.txt index edd16252fda..c18885ec4ad 100644 --- a/python/cudf/cudf/_lib/strings/CMakeLists.txt +++ b/python/cudf/cudf/_lib/strings/CMakeLists.txt @@ -41,7 +41,6 @@ rapids_cython_create_modules( MODULE_PREFIX strings_ CXX) -# Update the RPATHs to also look in the ../cpp/ directory for the library. foreach(cython_module IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) set_target_properties(${cython_module} PROPERTIES INSTALL_RPATH "\$ORIGIN;\$ORIGIN/../cpp") endforeach() diff --git a/python/cudf/cudf/_lib/strings/convert/CMakeLists.txt b/python/cudf/cudf/_lib/strings/convert/CMakeLists.txt index fe41fb19fad..705fe23fff6 100644 --- a/python/cudf/cudf/_lib/strings/convert/CMakeLists.txt +++ b/python/cudf/cudf/_lib/strings/convert/CMakeLists.txt @@ -27,7 +27,6 @@ rapids_cython_create_modules( MODULE_PREFIX strings_ CXX) -# Update the RPATHs to also look in the ../../cpp/ directory for the library. foreach(cython_module IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) set_target_properties(${cython_module} PROPERTIES INSTALL_RPATH "\$ORIGIN;\$ORIGIN/../../cpp") endforeach() diff --git a/python/cudf/cudf/_lib/strings/split/CMakeLists.txt b/python/cudf/cudf/_lib/strings/split/CMakeLists.txt index a8711b6f8f2..07012706d98 100644 --- a/python/cudf/cudf/_lib/strings/split/CMakeLists.txt +++ b/python/cudf/cudf/_lib/strings/split/CMakeLists.txt @@ -24,7 +24,6 @@ rapids_cython_create_modules( MODULE_PREFIX strings_ CXX) -# Update the RPATHs to also look in the ../../cpp/ directory for the library. foreach(cython_module IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) set_target_properties(${cython_module} PROPERTIES INSTALL_RPATH "\$ORIGIN;\$ORIGIN/../../cpp") endforeach() From 85ff86d92bc2489190ba5c5e0ab92e0801fa2749 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Fri, 20 May 2022 16:09:13 -0700 Subject: [PATCH 08/31] Clean out most cruft from setup.py. --- python/cudf/setup.py | 159 ------------------------------------------- 1 file changed, 159 deletions(-) diff --git a/python/cudf/setup.py b/python/cudf/setup.py index 56cdeea6392..82c1ef678b1 100644 --- a/python/cudf/setup.py +++ b/python/cudf/setup.py @@ -3,26 +3,7 @@ import os import re import shutil -# import subprocess -# import sys -# import sysconfig -# -# # Must import in this order: -# # setuptools -> Cython.Distutils.build_ext -> setuptools.command.build_ext -# # Otherwise, setuptools.command.build_ext ends up inheriting from -# # Cython.Distutils.old_build_ext which we do not want -# import setuptools -# -# try: -# from Cython.Distutils.build_ext import new_build_ext as _build_ext -# except ImportError: -# from setuptools.command.build_ext import build_ext as _build_ext -# -# from distutils.spawn import find_executable -# from distutils.sysconfig import get_python_lib -# import numpy as np -# import pyarrow as pa from setuptools import find_packages from skbuild import setup @@ -55,8 +36,6 @@ "transformers<=4.10.3", ] } -# -# cython_files = ["cudf/**/*.pyx"] def get_cuda_version_from_header(cuda_include_dir, delimeter=""): @@ -95,147 +74,9 @@ def get_cuda_version_from_header(cuda_include_dir, delimeter=""): raise OSError(f"Invalid CUDA_HOME: directory does not exist: {CUDA_HOME}") cuda_include_dir = os.path.join(CUDA_HOME, "include") -cuda_lib_dir = os.path.join(CUDA_HOME, "lib64") install_requires.append( "cupy-cuda" + get_cuda_version_from_header(cuda_include_dir) ) -# -# CUDF_HOME = os.environ.get( -# "CUDF_HOME", -# os.path.abspath( -# os.path.join(os.path.dirname(os.path.abspath(__file__)), "../../") -# ), -# ) -# CUDF_ROOT = os.environ.get( -# "CUDF_ROOT", -# os.path.abspath( -# os.path.join( -# os.path.dirname(os.path.abspath(__file__)), "../../cpp/build/" -# ) -# ), -# ) -# -# -# class build_ext_and_proto_no_debug(_build_ext): -# def build_extensions(self): -# def remove_flags(compiler, *flags): -# for flag in flags: -# try: -# compiler.compiler_so = list( -# filter((flag).__ne__, compiler.compiler_so) -# ) -# except Exception: -# pass -# -# # Full optimization -# self.compiler.compiler_so.append("-O3") -# # Silence '-Wunknown-pragmas' warning -# self.compiler.compiler_so.append("-Wno-unknown-pragmas") -# # No debug symbols, full optimization, no '-Wstrict-prototypes' warning -# remove_flags( -# self.compiler, "-g", "-G", "-O1", "-O2", "-Wstrict-prototypes" -# ) -# super().build_extensions() -# -# def finalize_options(self): -# if self.distribution.ext_modules: -# # Delay import this to allow for Cython-less installs -# from Cython.Build.Dependencies import cythonize -# -# nthreads = getattr(self, "parallel", None) # -j option in Py3.5+ -# nthreads = int(nthreads) if nthreads else None -# self.distribution.ext_modules = cythonize( -# self.distribution.ext_modules, -# nthreads=nthreads, -# force=self.force, -# gdb_debug=False, -# compiler_directives=dict( -# profile=False, language_level=3, embedsignature=True -# ), -# ) -# # Skip calling super() and jump straight to setuptools -# setuptools.command.build_ext.build_ext.finalize_options(self) -# -# def run(self): -# # Get protoc -# protoc = None -# if "PROTOC" in os.environ and os.path.exists(os.environ["PROTOC"]): -# protoc = os.environ["PROTOC"] -# else: -# protoc = find_executable("protoc") -# if protoc is None: -# sys.stderr.write("protoc not found") -# sys.exit(1) -# -# # Build .proto file -# for source in ["cudf/utils/metadata/orc_column_statistics.proto"]: -# output = source.replace(".proto", "_pb2.py") -# -# if not os.path.exists(output) or ( -# os.path.getmtime(source) > os.path.getmtime(output) -# ): -# with open(output, "a") as src: -# src.write("# flake8: noqa" + os.linesep) -# src.write("# fmt: off" + os.linesep) -# subprocess.check_call([protoc, "--python_out=.", source]) -# with open(output, "r+") as src: -# new_src_content = ( -# "# flake8: noqa" -# + os.linesep -# + "# fmt: off" -# + os.linesep -# + src.read() -# + "# fmt: on" -# + os.linesep -# ) -# src.seek(0) -# src.write(new_src_content) -# -# # Run original Cython build_ext command -# _build_ext.run(self) -# -# -# extensions = [ -# Extension( -# "*", -# sources=cython_files, -# include_dirs=[ -# os.path.abspath(os.path.join(CUDF_HOME, "cpp/include/cudf")), -# os.path.abspath(os.path.join(CUDF_HOME, "cpp/include")), -# os.path.abspath(os.path.join(CUDF_ROOT, "include")), -# os.path.join(CUDF_ROOT, "_deps/libcudacxx-src/include"), -# os.path.join(CUDF_ROOT, "_deps/dlpack-src/include"), -# os.path.join( -# os.path.dirname(sysconfig.get_path("include")), -# "rapids/libcudacxx", -# ), -# os.path.dirname(sysconfig.get_path("include")), -# np.get_include(), -# pa.get_include(), -# cuda_include_dir, -# ], -# library_dirs=( -# pa.get_library_dirs() -# + [ -# get_python_lib(), -# os.path.join(os.sys.prefix, "lib"), -# cuda_lib_dir, -# ] -# ), -# libraries=["cudart", "cudf"] + pa.get_libraries() + ["arrow_cuda"], -# language="c++", -# extra_compile_args=["-std=c++17"], -# ) -# ] -# -# cmdclass = versioneer.get_cmdclass() -# cmdclass["build_ext"] = build_ext_and_proto_no_debug - -# import sys -# import numpy as np -# import pyarrow as pa -# print(sys.argv) -# exit() setup( From 9e39f9a232b74cfe00e7604bdc823d94d48cc168 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Fri, 20 May 2022 16:29:31 -0700 Subject: [PATCH 09/31] Update all ancillary and packaging-related files. --- build.sh | 4 ++-- ci/gpu/build.sh | 2 +- ci/release/update-version.sh | 3 +++ conda/environments/cudf_dev_cuda11.5.yml | 1 + conda/recipes/cudf/build.sh | 2 +- conda/recipes/cudf/meta.yaml | 2 ++ python/cudf/CMakeLists.txt | 4 +--- 7 files changed, 11 insertions(+), 7 deletions(-) diff --git a/build.sh b/build.sh index 6a43674c642..b6c4edd270f 100755 --- a/build.sh +++ b/build.sh @@ -324,10 +324,10 @@ fi if buildAll || hasArg cudf; then cd ${REPODIR}/python/cudf + python setup.py build_ext --inplace -j${PARALLEL_LEVEL:-1} -- -DCMAKE_PREFIX_PATH=${INSTALL_PREFIX} -DCMAKE_LIBRARY_PATH=${LIBCUDF_BUILD_DIR} ${CMAKE_ARGS} if [[ ${INSTALL_TARGET} != "" ]]; then - PARALLEL_LEVEL=${PARALLEL_LEVEL} python setup.py build_ext -j${PARALLEL_LEVEL} install --single-version-externally-managed --record=record.txt + python setup.py install --single-version-externally-managed --record=record.txt -DCMAKE_PREFIX_PATH=${INSTALL_PREFIX} ${CMAKE_ARGS} else - PARALLEL_LEVEL=${PARALLEL_LEVEL} python setup.py build_ext --inplace -j${PARALLEL_LEVEL} --library-dir=${LIBCUDF_BUILD_DIR} fi fi diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index 79678fe7203..cbf5016c785 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -127,7 +127,7 @@ if [[ -z "$PROJECT_FLASH" || "$PROJECT_FLASH" == "0" ]]; then ################################################################################ gpuci_logger "Build from source" - "$WORKSPACE/build.sh" clean libcudf cudf dask_cudf libcudf_kafka cudf_kafka benchmarks tests --ptds + "$WORKSPACE/build.sh" clean libcudf cudf dask_cudf libcudf_kafka cudf_kafka benchmarks tests --ptds --cmake-args=\"-DFIND_CUDF_CPP=ON\" ################################################################################ # TEST - Run GoogleTest diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh index f07d7984cd1..24fd5b025e6 100755 --- a/ci/release/update-version.sh +++ b/ci/release/update-version.sh @@ -34,6 +34,9 @@ function sed_runner() { # cpp update sed_runner 's/'"VERSION ${CURRENT_SHORT_TAG}.*"'/'"VERSION ${NEXT_FULL_TAG}"'/g' cpp/CMakeLists.txt +# Python update +sed_runner 's/'"cudf_version .*)"'/'"cudf_version ${NEXT_FULL_TAG})"'/g' python/cudf/CMakeLists.txt + # cpp libcudf_kafka update sed_runner 's/'"VERSION ${CURRENT_SHORT_TAG}.*"'/'"VERSION ${NEXT_FULL_TAG}"'/g' cpp/libcudf_kafka/CMakeLists.txt diff --git a/conda/environments/cudf_dev_cuda11.5.yml b/conda/environments/cudf_dev_cuda11.5.yml index 92c1345ac0c..372be6799d4 100644 --- a/conda/environments/cudf_dev_cuda11.5.yml +++ b/conda/environments/cudf_dev_cuda11.5.yml @@ -16,6 +16,7 @@ dependencies: - rmm=22.08.* - cmake>=3.20.1,!=3.23.0 - cmake_setuptools>=0.1.3 + - scikit-build>=0.13.1 - python>=3.7,<3.9 - numba>=0.54 - numpy diff --git a/conda/recipes/cudf/build.sh b/conda/recipes/cudf/build.sh index 9cc4e1f9ac4..e1fc9219e90 100644 --- a/conda/recipes/cudf/build.sh +++ b/conda/recipes/cudf/build.sh @@ -1,4 +1,4 @@ # Copyright (c) 2018-2019, NVIDIA CORPORATION. # This assumes the script is executed from the root of the repo directory -./build.sh cudf +./build.sh cudf --cmake-args=\"-DFIND_CUDF_CPP=ON\" diff --git a/conda/recipes/cudf/meta.yaml b/conda/recipes/cudf/meta.yaml index b890c34f06a..05e5254aaf4 100644 --- a/conda/recipes/cudf/meta.yaml +++ b/conda/recipes/cudf/meta.yaml @@ -32,6 +32,8 @@ requirements: - protobuf - python - cython >=0.29,<0.30 + - cmake>=3.20.1,!=3.23.0 + - scikit-build>=0.13.1 - setuptools - numba >=0.54 - dlpack>=0.5,<0.6.0a0 diff --git a/python/cudf/CMakeLists.txt b/python/cudf/CMakeLists.txt index e9e42145632..cd93a10d6d5 100644 --- a/python/cudf/CMakeLists.txt +++ b/python/cudf/CMakeLists.txt @@ -16,9 +16,7 @@ cmake_minimum_required(VERSION 3.20.1 FATAL_ERROR) set(cudf_version 22.06.00) -file(DOWNLOAD https://raw.githubusercontent.com/vyasr/rapids-cmake/feature/rapids_cython_prefix/RAPIDS.cmake - ${CMAKE_BINARY_DIR}/RAPIDS.cmake) -include(${CMAKE_BINARY_DIR}/RAPIDS.cmake) +include(../fetch_rapids.cmake) project( cudf-python From 381bad46a9cfe819793f1b0c1fd703cc6446960c Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Fri, 20 May 2022 16:52:14 -0700 Subject: [PATCH 10/31] Apply cmake-format. --- python/cudf/CMakeLists.txt | 27 +++--- python/cudf/cudf/_lib/CMakeLists.txt | 93 +++++++++---------- python/cudf/cudf/_lib/io/CMakeLists.txt | 6 +- python/cudf/cudf/_lib/nvtext/CMakeLists.txt | 17 +--- python/cudf/cudf/_lib/strings/CMakeLists.txt | 45 +++++---- .../cudf/_lib/strings/convert/CMakeLists.txt | 15 +-- .../cudf/_lib/strings/split/CMakeLists.txt | 12 +-- 7 files changed, 96 insertions(+), 119 deletions(-) diff --git a/python/cudf/CMakeLists.txt b/python/cudf/CMakeLists.txt index cd93a10d6d5..ea16f165334 100644 --- a/python/cudf/CMakeLists.txt +++ b/python/cudf/CMakeLists.txt @@ -25,23 +25,23 @@ project( # language to be enabled here. The test project that is built in scikit-build to verify # various linking options for the python library is hardcoded to build with C, so until # that is fixed we need to keep C. - C - CXX) + C CXX +) option(FIND_CUDF_CPP "Search for existing CUDF C++ installations before defaulting to local files" - OFF) + OFF +) # If the user requested it we attempt to find CUDF. if(FIND_CUDF_CPP) - find_package(cudf ${cudf_version} REQUIRED) + find_package(cudf ${cudf_version} REQUIRED) else() set(cudf_FOUND OFF) endif() if(NOT cudf_FOUND) - # TODO: This will not be necessary once we upgrade to CMake 3.22, which will - # pull in the required languages for the C++ project even if this project - # does not require those languges. + # TODO: This will not be necessary once we upgrade to CMake 3.22, which will pull in the required + # languages for the C++ project even if this project does not require those languges. include(rapids-cuda) rapids_cuda_init_architectures(cudf-python) enable_language(CUDA) @@ -53,16 +53,15 @@ if(NOT cudf_FOUND) set(BUILD_BENCHMARKS OFF) add_subdirectory(../../cpp cudf-cpp) - # Since there are multiple subpackages of cudf._lib that require access to - # libcudf, we place the library in the _lib/cpp directory as a single source - # of truth and modify the other rpaths appropriately. + # Since there are multiple subpackages of cudf._lib that require access to libcudf, we place the + # library in the _lib/cpp directory as a single source of truth and modify the other rpaths + # appropriately. install(TARGETS cudf DESTINATION cudf/_lib/cpp) endif() -# TODO: Disable binding for now, but we probably want it back eventually. -# Currently this breaks the behavior of our join code because that sets a -# Cython method as a class attribute, which gets bound and therefore passes -# self as an extra argument. +# TODO: Disable binding for now, but we probably want it back eventually. Currently this breaks the +# behavior of our join code because that sets a Cython method as a class attribute, which gets bound +# and therefore passes self as an extra argument. set(CYTHON_FLAGS "--directive embedsignature=True,always_allow_keywords=True") include(rapids-cython) diff --git a/python/cudf/cudf/_lib/CMakeLists.txt b/python/cudf/cudf/_lib/CMakeLists.txt index 3d92240167a..b7db0ba6aac 100644 --- a/python/cudf/cudf/_lib/CMakeLists.txt +++ b/python/cudf/cudf/_lib/CMakeLists.txt @@ -13,52 +13,51 @@ # ============================================================================= set(cython_sources - aggregation.pyx - avro.pyx - binaryop.pyx - column.pyx - concat.pyx - copying.pyx - csv.pyx - datetime.pyx - expressions.pyx - filling.pyx - gpuarrow.pyx - groupby.pyx - hash.pyx - interop.pyx - join.pyx - json.pyx - labeling.pyx - lists.pyx - merge.pyx - null_mask.pyx - orc.pyx - parquet.pyx - partitioning.pyx - quantiles.pyx - reduce.pyx - replace.pyx - reshape.pyx - rolling.pyx - round.pyx - scalar.pyx - search.pyx - sort.pyx - stream_compaction.pyx - string_casting.pyx - text.pyx - transform.pyx - transpose.pyx - types.pyx - unary.pyx - utils.pyx + aggregation.pyx + avro.pyx + binaryop.pyx + column.pyx + concat.pyx + copying.pyx + csv.pyx + datetime.pyx + expressions.pyx + filling.pyx + gpuarrow.pyx + groupby.pyx + hash.pyx + interop.pyx + join.pyx + json.pyx + labeling.pyx + lists.pyx + merge.pyx + null_mask.pyx + orc.pyx + parquet.pyx + partitioning.pyx + quantiles.pyx + reduce.pyx + replace.pyx + reshape.pyx + rolling.pyx + round.pyx + scalar.pyx + search.pyx + sort.pyx + stream_compaction.pyx + string_casting.pyx + text.pyx + transform.pyx + transpose.pyx + types.pyx + unary.pyx + utils.pyx ) set(linked_libraries cudf::cudf) rapids_cython_create_modules( - SOURCE_FILES "${cython_sources}" - LINKED_LIBRARIES "${linked_libraries}" - CXX) + SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" CXX +) add_subdirectory(io) add_subdirectory(nvtext) @@ -70,10 +69,10 @@ foreach(target IN LISTS targets_using_numpy) target_include_directories(${target} PRIVATE "${NumPy_INCLUDE_DIRS}") endforeach() -# PyArrow relies on the C++ Arrow library already being installed, so we can -# just find the C++ library directly and link to the same one. We rely on -# libcudf's exports to provide the arrow_shared_lib and arrow_cuda_shared_lib -# libraries. That just leaves us to find the ArrowPython library on our own. +# PyArrow relies on the C++ Arrow library already being installed, so we can just find the C++ +# library directly and link to the same one. We rely on libcudf's exports to provide the +# arrow_shared_lib and arrow_cuda_shared_lib libraries. That just leaves us to find the ArrowPython +# library on our own. find_library(arrow_python_shared_library arrow_python REQUIRED) target_link_libraries(gpuarrow ${arrow_python_shared_library}) diff --git a/python/cudf/cudf/_lib/io/CMakeLists.txt b/python/cudf/cudf/_lib/io/CMakeLists.txt index 7b172d30c13..16d4d65d78a 100644 --- a/python/cudf/cudf/_lib/io/CMakeLists.txt +++ b/python/cudf/cudf/_lib/io/CMakeLists.txt @@ -15,10 +15,8 @@ set(cython_sources datasource.pyx utils.pyx) set(linked_libraries cudf::cudf) rapids_cython_create_modules( - SOURCE_FILES "${cython_sources}" - LINKED_LIBRARIES "${linked_libraries}" - MODULE_PREFIX io_ - CXX) + SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX io_ CXX +) find_package(NumPy REQUIRED) set(targets_using_numpy io_datasource io_utils) diff --git a/python/cudf/cudf/_lib/nvtext/CMakeLists.txt b/python/cudf/cudf/_lib/nvtext/CMakeLists.txt index 3e862ce76ed..27c8eb305e6 100644 --- a/python/cudf/cudf/_lib/nvtext/CMakeLists.txt +++ b/python/cudf/cudf/_lib/nvtext/CMakeLists.txt @@ -12,22 +12,13 @@ # the License. # ============================================================================= -set(cython_sources - edit_distance.pyx - generate_ngrams.pyx - ngrams_tokenize.pyx - normalize.pyx - replace.pyx - stemmer.pyx - subword_tokenize.pyx - tokenize.pyx +set(cython_sources edit_distance.pyx generate_ngrams.pyx ngrams_tokenize.pyx normalize.pyx + replace.pyx stemmer.pyx subword_tokenize.pyx tokenize.pyx ) set(linked_libraries cudf::cudf) rapids_cython_create_modules( - SOURCE_FILES "${cython_sources}" - LINKED_LIBRARIES "${linked_libraries}" - MODULE_PREFIX nvtext_ - CXX) + SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX nvtext_ CXX +) foreach(cython_module IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) set_target_properties(${cython_module} PROPERTIES INSTALL_RPATH "\$ORIGIN;\$ORIGIN/../cpp") diff --git a/python/cudf/cudf/_lib/strings/CMakeLists.txt b/python/cudf/cudf/_lib/strings/CMakeLists.txt index c18885ec4ad..26f504e10da 100644 --- a/python/cudf/cudf/_lib/strings/CMakeLists.txt +++ b/python/cudf/cudf/_lib/strings/CMakeLists.txt @@ -13,33 +13,32 @@ # ============================================================================= set(cython_sources - attributes.pyx - capitalize.pyx - case.pyx - char_types.pyx - combine.pyx - contains.pyx - extract.pyx - find.pyx - find_multiple.pyx - findall.pyx - json.pyx - padding.pyx - repeat.pyx - replace.pyx - replace_re.pyx - strip.pyx - substring.pyx - translate.pyx - wrap.pyx + attributes.pyx + capitalize.pyx + case.pyx + char_types.pyx + combine.pyx + contains.pyx + extract.pyx + find.pyx + find_multiple.pyx + findall.pyx + json.pyx + padding.pyx + repeat.pyx + replace.pyx + replace_re.pyx + strip.pyx + substring.pyx + translate.pyx + wrap.pyx ) set(linked_libraries cudf::cudf) rapids_cython_create_modules( - SOURCE_FILES "${cython_sources}" - LINKED_LIBRARIES "${linked_libraries}" - MODULE_PREFIX strings_ - CXX) + SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX strings_ + CXX +) foreach(cython_module IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) set_target_properties(${cython_module} PROPERTIES INSTALL_RPATH "\$ORIGIN;\$ORIGIN/../cpp") diff --git a/python/cudf/cudf/_lib/strings/convert/CMakeLists.txt b/python/cudf/cudf/_lib/strings/convert/CMakeLists.txt index 705fe23fff6..8ce2691295b 100644 --- a/python/cudf/cudf/_lib/strings/convert/CMakeLists.txt +++ b/python/cudf/cudf/_lib/strings/convert/CMakeLists.txt @@ -12,20 +12,15 @@ # the License. # ============================================================================= -set(cython_sources - convert_fixed_point.pyx - convert_floats.pyx - convert_integers.pyx - convert_lists.pyx - convert_urls.pyx +set(cython_sources convert_fixed_point.pyx convert_floats.pyx convert_integers.pyx + convert_lists.pyx convert_urls.pyx ) set(linked_libraries cudf::cudf) rapids_cython_create_modules( - SOURCE_FILES "${cython_sources}" - LINKED_LIBRARIES "${linked_libraries}" - MODULE_PREFIX strings_ - CXX) + SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX strings_ + CXX +) foreach(cython_module IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) set_target_properties(${cython_module} PROPERTIES INSTALL_RPATH "\$ORIGIN;\$ORIGIN/../../cpp") diff --git a/python/cudf/cudf/_lib/strings/split/CMakeLists.txt b/python/cudf/cudf/_lib/strings/split/CMakeLists.txt index 07012706d98..57350a5b715 100644 --- a/python/cudf/cudf/_lib/strings/split/CMakeLists.txt +++ b/python/cudf/cudf/_lib/strings/split/CMakeLists.txt @@ -12,17 +12,13 @@ # the License. # ============================================================================= -set(cython_sources - partition.pyx - split.pyx -) +set(cython_sources partition.pyx split.pyx) set(linked_libraries cudf::cudf) rapids_cython_create_modules( - SOURCE_FILES "${cython_sources}" - LINKED_LIBRARIES "${linked_libraries}" - MODULE_PREFIX strings_ - CXX) + SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX strings_ + CXX +) foreach(cython_module IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) set_target_properties(${cython_module} PROPERTIES INSTALL_RPATH "\$ORIGIN;\$ORIGIN/../../cpp") From 1846a249ff3db7e460729bd1e52d65ac5aecb317 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Fri, 20 May 2022 16:52:49 -0700 Subject: [PATCH 11/31] Update copyright. --- conda/recipes/cudf/build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conda/recipes/cudf/build.sh b/conda/recipes/cudf/build.sh index e1fc9219e90..216a566a121 100644 --- a/conda/recipes/cudf/build.sh +++ b/conda/recipes/cudf/build.sh @@ -1,4 +1,4 @@ -# Copyright (c) 2018-2019, NVIDIA CORPORATION. +# Copyright (c) 2018-2022, NVIDIA CORPORATION. # This assumes the script is executed from the root of the repo directory ./build.sh cudf --cmake-args=\"-DFIND_CUDF_CPP=ON\" From 7f53c7e5e10b1bd785f4aba05007efe0ceade973 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Sat, 21 May 2022 11:44:58 -0700 Subject: [PATCH 12/31] Switch back to the branch of rapids-cmake for now. --- python/cudf/CMakeLists.txt | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/python/cudf/CMakeLists.txt b/python/cudf/CMakeLists.txt index ea16f165334..5b70d6b06df 100644 --- a/python/cudf/CMakeLists.txt +++ b/python/cudf/CMakeLists.txt @@ -16,7 +16,11 @@ cmake_minimum_required(VERSION 3.20.1 FATAL_ERROR) set(cudf_version 22.06.00) -include(../fetch_rapids.cmake) +# TODO: Once the rapids-cmake PR is merged, we can use the same fetch_rapids.cmake script here. +# include(../../fetch_rapids.cmake) +file(DOWNLOAD https://raw.githubusercontent.com/vyasr/rapids-cmake/feature/rapids_cython_prefix/RAPIDS.cmake + ${CMAKE_BINARY_DIR}/RAPIDS.cmake) +include(${CMAKE_BINARY_DIR}/RAPIDS.cmake) project( cudf-python From fdc2953622c166917781067ae16180972012579a Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 23 May 2022 16:56:43 -0700 Subject: [PATCH 13/31] Fix cmake-format. --- python/cudf/CMakeLists.txt | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/python/cudf/CMakeLists.txt b/python/cudf/CMakeLists.txt index 5b70d6b06df..bbd98c59cda 100644 --- a/python/cudf/CMakeLists.txt +++ b/python/cudf/CMakeLists.txt @@ -18,8 +18,11 @@ set(cudf_version 22.06.00) # TODO: Once the rapids-cmake PR is merged, we can use the same fetch_rapids.cmake script here. # include(../../fetch_rapids.cmake) -file(DOWNLOAD https://raw.githubusercontent.com/vyasr/rapids-cmake/feature/rapids_cython_prefix/RAPIDS.cmake - ${CMAKE_BINARY_DIR}/RAPIDS.cmake) +file( + DOWNLOAD + https://raw.githubusercontent.com/vyasr/rapids-cmake/feature/rapids_cython_prefix/RAPIDS.cmake + ${CMAKE_BINARY_DIR}/RAPIDS.cmake +) include(${CMAKE_BINARY_DIR}/RAPIDS.cmake) project( From 2b8c20fe7d05dbecf6e87f5a8a4bfc23c1dcd24a Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 23 May 2022 17:06:09 -0700 Subject: [PATCH 14/31] Apply black. --- python/cudf/setup.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/python/cudf/setup.py b/python/cudf/setup.py index 82c1ef678b1..606918a1183 100644 --- a/python/cudf/setup.py +++ b/python/cudf/setup.py @@ -97,8 +97,7 @@ def get_cuda_version_from_header(cuda_include_dir, delimeter=""): ], packages=find_packages(include=["cudf", "cudf.*"]), package_data={ - key: ["*.pxd"] - for key in find_packages(include=["cudf._lib"]) + key: ["*.pxd"] for key in find_packages(include=["cudf._lib"]) }, cmdclass=versioneer.get_cmdclass(), install_requires=install_requires, From 360c2c387cd32cc55faddce67072a246241739e0 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 23 May 2022 17:36:52 -0700 Subject: [PATCH 15/31] Fix syntax error in build.sh. --- build.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/build.sh b/build.sh index b6c4edd270f..9272cf3256d 100755 --- a/build.sh +++ b/build.sh @@ -327,7 +327,6 @@ if buildAll || hasArg cudf; then python setup.py build_ext --inplace -j${PARALLEL_LEVEL:-1} -- -DCMAKE_PREFIX_PATH=${INSTALL_PREFIX} -DCMAKE_LIBRARY_PATH=${LIBCUDF_BUILD_DIR} ${CMAKE_ARGS} if [[ ${INSTALL_TARGET} != "" ]]; then python setup.py install --single-version-externally-managed --record=record.txt -DCMAKE_PREFIX_PATH=${INSTALL_PREFIX} ${CMAKE_ARGS} - else fi fi From 472afcd50b081253bd5959997a24442bf994bc80 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 23 May 2022 18:53:01 -0700 Subject: [PATCH 16/31] Update cudf version. --- python/cudf/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cudf/CMakeLists.txt b/python/cudf/CMakeLists.txt index bbd98c59cda..6b313f07913 100644 --- a/python/cudf/CMakeLists.txt +++ b/python/cudf/CMakeLists.txt @@ -14,7 +14,7 @@ cmake_minimum_required(VERSION 3.20.1 FATAL_ERROR) -set(cudf_version 22.06.00) +set(cudf_version 22.08.00) # TODO: Once the rapids-cmake PR is merged, we can use the same fetch_rapids.cmake script here. # include(../../fetch_rapids.cmake) From f798712fdba0a7ca50d4332033a59b69618f3029 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 23 May 2022 20:00:33 -0700 Subject: [PATCH 17/31] Update conda recipe to include the CUDA compiler. --- conda/recipes/cudf/conda_build_config.yaml | 3 +++ conda/recipes/cudf/meta.yaml | 2 ++ 2 files changed, 5 insertions(+) diff --git a/conda/recipes/cudf/conda_build_config.yaml b/conda/recipes/cudf/conda_build_config.yaml index c049d21fd91..8de6aa0cad8 100644 --- a/conda/recipes/cudf/conda_build_config.yaml +++ b/conda/recipes/cudf/conda_build_config.yaml @@ -6,3 +6,6 @@ cxx_compiler_version: sysroot_version: - "2.17" + +cuda_compiler: + - nvcc diff --git a/conda/recipes/cudf/meta.yaml b/conda/recipes/cudf/meta.yaml index 05e5254aaf4..84c24d691e0 100644 --- a/conda/recipes/cudf/meta.yaml +++ b/conda/recipes/cudf/meta.yaml @@ -25,8 +25,10 @@ build: requirements: build: + - cmake {{ cmake_version }} - {{ compiler('c') }} - {{ compiler('cxx') }} + - {{ compiler('cuda') }} {{ cuda_version }} - sysroot_{{ target_platform }} {{ sysroot_version }} host: - protobuf From cad9c79e22eb3059d584c6e785d889700a53e3ff Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 23 May 2022 21:30:28 -0700 Subject: [PATCH 18/31] Update cmake version. --- conda/recipes/cudf/conda_build_config.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/conda/recipes/cudf/conda_build_config.yaml b/conda/recipes/cudf/conda_build_config.yaml index 8de6aa0cad8..d9c3f21448f 100644 --- a/conda/recipes/cudf/conda_build_config.yaml +++ b/conda/recipes/cudf/conda_build_config.yaml @@ -7,5 +7,8 @@ cxx_compiler_version: sysroot_version: - "2.17" +cmake_version: + - ">=3.20.1,!=3.23.0" + cuda_compiler: - nvcc From f0680f06cb10b9df754514501cd07b265f34cfd0 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 24 May 2022 07:48:50 -0700 Subject: [PATCH 19/31] Include nvtext headers in installation. --- cpp/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index ef611e275be..63747875fe0 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -751,7 +751,7 @@ install( EXPORT cudf-exports ) -install(DIRECTORY ${CUDF_SOURCE_DIR}/include/cudf ${CUDF_SOURCE_DIR}/include/cudf_test +install(DIRECTORY ${CUDF_SOURCE_DIR}/include/cudf ${CUDF_SOURCE_DIR}/include/cudf_test ${CUDF_SOURCE_DIR}/include/nvtext DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} ) From 04514995579f3c1ce5b95c0d10a27eb601674c6d Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 24 May 2022 08:01:30 -0700 Subject: [PATCH 20/31] Apply cmake-format. --- cpp/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 63747875fe0..4ddfc0f0bb9 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -751,8 +751,8 @@ install( EXPORT cudf-exports ) -install(DIRECTORY ${CUDF_SOURCE_DIR}/include/cudf ${CUDF_SOURCE_DIR}/include/cudf_test ${CUDF_SOURCE_DIR}/include/nvtext - DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} +install(DIRECTORY ${CUDF_SOURCE_DIR}/include/cudf ${CUDF_SOURCE_DIR}/include/cudf_test + ${CUDF_SOURCE_DIR}/include/nvtext DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} ) install( From 597e7676000f869153834d89be5960a51c57427b Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 24 May 2022 10:21:26 -0700 Subject: [PATCH 21/31] Ensure that all subpackage pxd files are also included in installed package. --- python/cudf/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cudf/setup.py b/python/cudf/setup.py index 606918a1183..a784c8744af 100644 --- a/python/cudf/setup.py +++ b/python/cudf/setup.py @@ -97,7 +97,7 @@ def get_cuda_version_from_header(cuda_include_dir, delimeter=""): ], packages=find_packages(include=["cudf", "cudf.*"]), package_data={ - key: ["*.pxd"] for key in find_packages(include=["cudf._lib"]) + key: ["*.pxd"] for key in find_packages(include=["cudf._lib*"]) }, cmdclass=versioneer.get_cmdclass(), install_requires=install_requires, From 77d1886dadc19f7c7f5907164ce30d2c335418a8 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 24 May 2022 17:39:36 -0700 Subject: [PATCH 22/31] Reinstate protobuf file creation. --- python/cudf/setup.py | 49 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 48 insertions(+), 1 deletion(-) diff --git a/python/cudf/setup.py b/python/cudf/setup.py index a784c8744af..26cff668da5 100644 --- a/python/cudf/setup.py +++ b/python/cudf/setup.py @@ -3,9 +3,13 @@ import os import re import shutil +import subprocess +import sys from setuptools import find_packages from skbuild import setup +from skbuild.command.build_ext import build_ext +from distutils.spawn import find_executable import versioneer @@ -79,6 +83,49 @@ def get_cuda_version_from_header(cuda_include_dir, delimeter=""): ) +class build_ext_and_proto(build_ext): + def run(self): + # Get protoc + protoc = None + if "PROTOC" in os.environ and os.path.exists(os.environ["PROTOC"]): + protoc = os.environ["PROTOC"] + else: + protoc = find_executable("protoc") + if protoc is None: + sys.stderr.write("protoc not found") + sys.exit(1) + + # Build .proto file + for source in ["cudf/utils/metadata/orc_column_statistics.proto"]: + output = source.replace(".proto", "_pb2.py") + + if not os.path.exists(output) or ( + os.path.getmtime(source) > os.path.getmtime(output) + ): + with open(output, "a") as src: + src.write("# flake8: noqa" + os.linesep) + src.write("# fmt: off" + os.linesep) + subprocess.check_call([protoc, "--python_out=.", source]) + with open(output, "r+") as src: + new_src_content = ( + "# flake8: noqa" + + os.linesep + + "# fmt: off" + + os.linesep + + src.read() + + "# fmt: on" + + os.linesep + ) + src.seek(0) + src.write(new_src_content) + + # Run original Cython build_ext command + super().run() + + +cmdclass = versioneer.get_cmdclass() +cmdclass["build_ext"] = build_ext_and_proto + setup( name="cudf", version=versioneer.get_version(), @@ -99,7 +146,7 @@ def get_cuda_version_from_header(cuda_include_dir, delimeter=""): package_data={ key: ["*.pxd"] for key in find_packages(include=["cudf._lib*"]) }, - cmdclass=versioneer.get_cmdclass(), + cmdclass=cmdclass, install_requires=install_requires, extras_require=extras_require, zip_safe=False, From 311984d2c0f1d5b0955ebcca1ebec6567dc1f752 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 24 May 2022 17:41:36 -0700 Subject: [PATCH 23/31] Fix call to installation commands. --- build.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/build.sh b/build.sh index 9272cf3256d..300cb7836bb 100755 --- a/build.sh +++ b/build.sh @@ -324,9 +324,9 @@ fi if buildAll || hasArg cudf; then cd ${REPODIR}/python/cudf - python setup.py build_ext --inplace -j${PARALLEL_LEVEL:-1} -- -DCMAKE_PREFIX_PATH=${INSTALL_PREFIX} -DCMAKE_LIBRARY_PATH=${LIBCUDF_BUILD_DIR} ${CMAKE_ARGS} + python setup.py build_ext --inplace -- -DCMAKE_PREFIX_PATH=${INSTALL_PREFIX} -DCMAKE_LIBRARY_PATH=${LIBCUDF_BUILD_DIR} ${CMAKE_ARGS} -- -j${PARALLEL_LEVEL:-1} if [[ ${INSTALL_TARGET} != "" ]]; then - python setup.py install --single-version-externally-managed --record=record.txt -DCMAKE_PREFIX_PATH=${INSTALL_PREFIX} ${CMAKE_ARGS} + python setup.py install --single-version-externally-managed --record=record.txt -- -DCMAKE_PREFIX_PATH=${INSTALL_PREFIX} -DCMAKE_LIBRARY_PATH=${LIBCUDF_BUILD_DIR} ${CMAKE_ARGS} -j${PARALLEL_LEVEL:-1} fi fi From 19fa1e6501ec3054d20b0b8e03ea8497da6d4eec Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 24 May 2022 17:48:11 -0700 Subject: [PATCH 24/31] Apply linters. --- python/cudf/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cudf/setup.py b/python/cudf/setup.py index 26cff668da5..6c791b5688b 100644 --- a/python/cudf/setup.py +++ b/python/cudf/setup.py @@ -5,11 +5,11 @@ import shutil import subprocess import sys +from distutils.spawn import find_executable from setuptools import find_packages from skbuild import setup from skbuild.command.build_ext import build_ext -from distutils.spawn import find_executable import versioneer From 0bd2c9f5d2fede7cffdf18dc30e863e886e980cd Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 25 May 2022 16:36:06 -0700 Subject: [PATCH 25/31] Fix typo. --- build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.sh b/build.sh index 300cb7836bb..29dd63d4548 100755 --- a/build.sh +++ b/build.sh @@ -326,7 +326,7 @@ if buildAll || hasArg cudf; then cd ${REPODIR}/python/cudf python setup.py build_ext --inplace -- -DCMAKE_PREFIX_PATH=${INSTALL_PREFIX} -DCMAKE_LIBRARY_PATH=${LIBCUDF_BUILD_DIR} ${CMAKE_ARGS} -- -j${PARALLEL_LEVEL:-1} if [[ ${INSTALL_TARGET} != "" ]]; then - python setup.py install --single-version-externally-managed --record=record.txt -- -DCMAKE_PREFIX_PATH=${INSTALL_PREFIX} -DCMAKE_LIBRARY_PATH=${LIBCUDF_BUILD_DIR} ${CMAKE_ARGS} -j${PARALLEL_LEVEL:-1} + python setup.py install --single-version-externally-managed --record=record.txt -- -DCMAKE_PREFIX_PATH=${INSTALL_PREFIX} -DCMAKE_LIBRARY_PATH=${LIBCUDF_BUILD_DIR} ${CMAKE_ARGS} -- -j${PARALLEL_LEVEL:-1} fi fi From 8836cce64143d8080dd6efad499546cc89372633 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 25 May 2022 16:37:56 -0700 Subject: [PATCH 26/31] Reenable binding and make the joiners static methods explicitly. --- python/cudf/CMakeLists.txt | 5 ----- python/cudf/cudf/core/join/join.py | 6 +++--- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/python/cudf/CMakeLists.txt b/python/cudf/CMakeLists.txt index 6b313f07913..4573745abe6 100644 --- a/python/cudf/CMakeLists.txt +++ b/python/cudf/CMakeLists.txt @@ -66,11 +66,6 @@ if(NOT cudf_FOUND) install(TARGETS cudf DESTINATION cudf/_lib/cpp) endif() -# TODO: Disable binding for now, but we probably want it back eventually. Currently this breaks the -# behavior of our join code because that sets a Cython method as a class attribute, which gets bound -# and therefore passes self as an extra argument. -set(CYTHON_FLAGS "--directive embedsignature=True,always_allow_keywords=True") - include(rapids-cython) rapids_cython_init() diff --git a/python/cudf/cudf/core/join/join.py b/python/cudf/cudf/core/join/join.py index 97f28656d1d..0bf43969b72 100644 --- a/python/cudf/cudf/core/join/join.py +++ b/python/cudf/cudf/core/join/join.py @@ -1,7 +1,7 @@ # Copyright (c) 2020-2022, NVIDIA CORPORATION. from __future__ import annotations -from typing import TYPE_CHECKING, Any, Callable, List, cast +from typing import TYPE_CHECKING, Any, ClassVar, List, cast import cudf from cudf import _lib as libcudf @@ -29,7 +29,7 @@ class Merge: # join key. The `joiner` returns a tuple of two Columns # representing the rows to gather from the left- and right- side # tables respectively. - _joiner: Callable = libcudf.join.join + _joiner: ClassVar[staticmethod] = staticmethod(libcudf.join.join) def __init__( self, @@ -445,7 +445,7 @@ def _validate_merge_params( class MergeSemi(Merge): - _joiner: Callable = libcudf.join.semi_join + _joiner: ClassVar[staticmethod] = staticmethod(libcudf.join.semi_join) def _merge_results(self, lhs: Frame, rhs: Frame): # semi-join result includes only lhs columns From b60d380687c2e14d3732671ce504b52edc9f5818 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Fri, 27 May 2022 08:30:30 -0700 Subject: [PATCH 27/31] Address reviews from @robertmaynard. --- python/cudf/cudf/_lib/CMakeLists.txt | 11 ++++++----- python/cudf/cudf/_lib/io/CMakeLists.txt | 3 +-- python/cudf/cudf/_lib/nvtext/CMakeLists.txt | 2 +- python/cudf/cudf/_lib/strings/CMakeLists.txt | 3 +-- python/cudf/cudf/_lib/strings/convert/CMakeLists.txt | 3 +-- python/cudf/cudf/_lib/strings/split/CMakeLists.txt | 3 +-- 6 files changed, 11 insertions(+), 14 deletions(-) diff --git a/python/cudf/cudf/_lib/CMakeLists.txt b/python/cudf/cudf/_lib/CMakeLists.txt index b7db0ba6aac..ad55bc0e0ec 100644 --- a/python/cudf/cudf/_lib/CMakeLists.txt +++ b/python/cudf/cudf/_lib/CMakeLists.txt @@ -56,13 +56,9 @@ set(cython_sources ) set(linked_libraries cudf::cudf) rapids_cython_create_modules( - SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" CXX + CXX SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" ) -add_subdirectory(io) -add_subdirectory(nvtext) -add_subdirectory(strings) - find_package(NumPy REQUIRED) set(targets_using_numpy gpuarrow interop avro csv orc json parquet) foreach(target IN LISTS targets_using_numpy) @@ -79,3 +75,8 @@ target_link_libraries(gpuarrow ${arrow_python_shared_library}) foreach(cython_module IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) set_target_properties(${cython_module} PROPERTIES INSTALL_RPATH "\$ORIGIN;\$ORIGIN/cpp") endforeach() + +add_subdirectory(io) +add_subdirectory(nvtext) +add_subdirectory(strings) + diff --git a/python/cudf/cudf/_lib/io/CMakeLists.txt b/python/cudf/cudf/_lib/io/CMakeLists.txt index 16d4d65d78a..1fbda6b16ba 100644 --- a/python/cudf/cudf/_lib/io/CMakeLists.txt +++ b/python/cudf/cudf/_lib/io/CMakeLists.txt @@ -15,10 +15,9 @@ set(cython_sources datasource.pyx utils.pyx) set(linked_libraries cudf::cudf) rapids_cython_create_modules( - SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX io_ CXX + CXX SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX io_ ) -find_package(NumPy REQUIRED) set(targets_using_numpy io_datasource io_utils) foreach(target IN LISTS targets_using_numpy) target_include_directories(${target} PRIVATE "${NumPy_INCLUDE_DIRS}") diff --git a/python/cudf/cudf/_lib/nvtext/CMakeLists.txt b/python/cudf/cudf/_lib/nvtext/CMakeLists.txt index 27c8eb305e6..efce5cb804a 100644 --- a/python/cudf/cudf/_lib/nvtext/CMakeLists.txt +++ b/python/cudf/cudf/_lib/nvtext/CMakeLists.txt @@ -17,7 +17,7 @@ set(cython_sources edit_distance.pyx generate_ngrams.pyx ngrams_tokenize.pyx nor ) set(linked_libraries cudf::cudf) rapids_cython_create_modules( - SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX nvtext_ CXX + CXX SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX nvtext_ ) foreach(cython_module IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) diff --git a/python/cudf/cudf/_lib/strings/CMakeLists.txt b/python/cudf/cudf/_lib/strings/CMakeLists.txt index 26f504e10da..c235b736344 100644 --- a/python/cudf/cudf/_lib/strings/CMakeLists.txt +++ b/python/cudf/cudf/_lib/strings/CMakeLists.txt @@ -36,8 +36,7 @@ set(cython_sources set(linked_libraries cudf::cudf) rapids_cython_create_modules( - SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX strings_ - CXX + CXX SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX strings_ ) foreach(cython_module IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) diff --git a/python/cudf/cudf/_lib/strings/convert/CMakeLists.txt b/python/cudf/cudf/_lib/strings/convert/CMakeLists.txt index 8ce2691295b..bd1292279e1 100644 --- a/python/cudf/cudf/_lib/strings/convert/CMakeLists.txt +++ b/python/cudf/cudf/_lib/strings/convert/CMakeLists.txt @@ -18,8 +18,7 @@ set(cython_sources convert_fixed_point.pyx convert_floats.pyx convert_integers.p set(linked_libraries cudf::cudf) rapids_cython_create_modules( - SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX strings_ - CXX + CXX SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX strings_ ) foreach(cython_module IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) diff --git a/python/cudf/cudf/_lib/strings/split/CMakeLists.txt b/python/cudf/cudf/_lib/strings/split/CMakeLists.txt index 57350a5b715..fde117d58d1 100644 --- a/python/cudf/cudf/_lib/strings/split/CMakeLists.txt +++ b/python/cudf/cudf/_lib/strings/split/CMakeLists.txt @@ -16,8 +16,7 @@ set(cython_sources partition.pyx split.pyx) set(linked_libraries cudf::cudf) rapids_cython_create_modules( - SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX strings_ - CXX + CXX SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX strings_ ) foreach(cython_module IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) From 1f5120ed9fa8cec942b2bad3220b3f4d5d300847 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Fri, 27 May 2022 08:44:00 -0700 Subject: [PATCH 28/31] Switch back to main branch of rapids-cmake. --- python/cudf/CMakeLists.txt | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/python/cudf/CMakeLists.txt b/python/cudf/CMakeLists.txt index 4573745abe6..927bdca22c5 100644 --- a/python/cudf/CMakeLists.txt +++ b/python/cudf/CMakeLists.txt @@ -16,14 +16,7 @@ cmake_minimum_required(VERSION 3.20.1 FATAL_ERROR) set(cudf_version 22.08.00) -# TODO: Once the rapids-cmake PR is merged, we can use the same fetch_rapids.cmake script here. -# include(../../fetch_rapids.cmake) -file( - DOWNLOAD - https://raw.githubusercontent.com/vyasr/rapids-cmake/feature/rapids_cython_prefix/RAPIDS.cmake - ${CMAKE_BINARY_DIR}/RAPIDS.cmake -) -include(${CMAKE_BINARY_DIR}/RAPIDS.cmake) +include(../../fetch_rapids.cmake) project( cudf-python From 711886abd26a3651aabe5030ef2425d2fe0a7d22 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Fri, 27 May 2022 08:51:59 -0700 Subject: [PATCH 29/31] Run cmake-format. --- python/cudf/cudf/_lib/CMakeLists.txt | 5 +++-- python/cudf/cudf/_lib/io/CMakeLists.txt | 4 +++- python/cudf/cudf/_lib/nvtext/CMakeLists.txt | 4 +++- python/cudf/cudf/_lib/strings/CMakeLists.txt | 4 +++- python/cudf/cudf/_lib/strings/convert/CMakeLists.txt | 4 +++- python/cudf/cudf/_lib/strings/split/CMakeLists.txt | 4 +++- 6 files changed, 18 insertions(+), 7 deletions(-) diff --git a/python/cudf/cudf/_lib/CMakeLists.txt b/python/cudf/cudf/_lib/CMakeLists.txt index ad55bc0e0ec..1f743943ef6 100644 --- a/python/cudf/cudf/_lib/CMakeLists.txt +++ b/python/cudf/cudf/_lib/CMakeLists.txt @@ -56,7 +56,9 @@ set(cython_sources ) set(linked_libraries cudf::cudf) rapids_cython_create_modules( - CXX SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" + CXX + SOURCE_FILES "${cython_sources}" + LINKED_LIBRARIES "${linked_libraries}" ) find_package(NumPy REQUIRED) @@ -79,4 +81,3 @@ endforeach() add_subdirectory(io) add_subdirectory(nvtext) add_subdirectory(strings) - diff --git a/python/cudf/cudf/_lib/io/CMakeLists.txt b/python/cudf/cudf/_lib/io/CMakeLists.txt index 1fbda6b16ba..d7a23bceefe 100644 --- a/python/cudf/cudf/_lib/io/CMakeLists.txt +++ b/python/cudf/cudf/_lib/io/CMakeLists.txt @@ -15,7 +15,9 @@ set(cython_sources datasource.pyx utils.pyx) set(linked_libraries cudf::cudf) rapids_cython_create_modules( - CXX SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX io_ + CXX + SOURCE_FILES "${cython_sources}" + LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX io_ ) set(targets_using_numpy io_datasource io_utils) diff --git a/python/cudf/cudf/_lib/nvtext/CMakeLists.txt b/python/cudf/cudf/_lib/nvtext/CMakeLists.txt index efce5cb804a..d96999a077e 100644 --- a/python/cudf/cudf/_lib/nvtext/CMakeLists.txt +++ b/python/cudf/cudf/_lib/nvtext/CMakeLists.txt @@ -17,7 +17,9 @@ set(cython_sources edit_distance.pyx generate_ngrams.pyx ngrams_tokenize.pyx nor ) set(linked_libraries cudf::cudf) rapids_cython_create_modules( - CXX SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX nvtext_ + CXX + SOURCE_FILES "${cython_sources}" + LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX nvtext_ ) foreach(cython_module IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) diff --git a/python/cudf/cudf/_lib/strings/CMakeLists.txt b/python/cudf/cudf/_lib/strings/CMakeLists.txt index c235b736344..8ed5c5e03c1 100644 --- a/python/cudf/cudf/_lib/strings/CMakeLists.txt +++ b/python/cudf/cudf/_lib/strings/CMakeLists.txt @@ -36,7 +36,9 @@ set(cython_sources set(linked_libraries cudf::cudf) rapids_cython_create_modules( - CXX SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX strings_ + CXX + SOURCE_FILES "${cython_sources}" + LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX strings_ ) foreach(cython_module IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) diff --git a/python/cudf/cudf/_lib/strings/convert/CMakeLists.txt b/python/cudf/cudf/_lib/strings/convert/CMakeLists.txt index bd1292279e1..ea2e3943b5a 100644 --- a/python/cudf/cudf/_lib/strings/convert/CMakeLists.txt +++ b/python/cudf/cudf/_lib/strings/convert/CMakeLists.txt @@ -18,7 +18,9 @@ set(cython_sources convert_fixed_point.pyx convert_floats.pyx convert_integers.p set(linked_libraries cudf::cudf) rapids_cython_create_modules( - CXX SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX strings_ + CXX + SOURCE_FILES "${cython_sources}" + LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX strings_ ) foreach(cython_module IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) diff --git a/python/cudf/cudf/_lib/strings/split/CMakeLists.txt b/python/cudf/cudf/_lib/strings/split/CMakeLists.txt index fde117d58d1..2d23c0d21cb 100644 --- a/python/cudf/cudf/_lib/strings/split/CMakeLists.txt +++ b/python/cudf/cudf/_lib/strings/split/CMakeLists.txt @@ -16,7 +16,9 @@ set(cython_sources partition.pyx split.pyx) set(linked_libraries cudf::cudf) rapids_cython_create_modules( - CXX SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX strings_ + CXX + SOURCE_FILES "${cython_sources}" + LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX strings_ ) foreach(cython_module IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) From efcd8ab7c0b5641988c6fad413bf82e180d45d07 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Fri, 27 May 2022 10:50:30 -0700 Subject: [PATCH 30/31] Switch from scikit-build's NumPy module to CMake's built-in module. --- python/cudf/cudf/_lib/CMakeLists.txt | 6 ++++-- python/cudf/cudf/_lib/io/CMakeLists.txt | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/python/cudf/cudf/_lib/CMakeLists.txt b/python/cudf/cudf/_lib/CMakeLists.txt index 1f743943ef6..37febf2b782 100644 --- a/python/cudf/cudf/_lib/CMakeLists.txt +++ b/python/cudf/cudf/_lib/CMakeLists.txt @@ -61,10 +61,12 @@ rapids_cython_create_modules( LINKED_LIBRARIES "${linked_libraries}" ) -find_package(NumPy REQUIRED) +# TODO: Finding NumPy requires finding Development as well. Once this is fixed +# in CMake (no date yet) we can remove the extra component spec. +find_package(Python REQUIRED COMPONENTS Development NumPy) set(targets_using_numpy gpuarrow interop avro csv orc json parquet) foreach(target IN LISTS targets_using_numpy) - target_include_directories(${target} PRIVATE "${NumPy_INCLUDE_DIRS}") + target_include_directories(${target} PRIVATE "${Python_NumPy_INCLUDE_DIRS}") endforeach() # PyArrow relies on the C++ Arrow library already being installed, so we can just find the C++ diff --git a/python/cudf/cudf/_lib/io/CMakeLists.txt b/python/cudf/cudf/_lib/io/CMakeLists.txt index d7a23bceefe..b12b085fc76 100644 --- a/python/cudf/cudf/_lib/io/CMakeLists.txt +++ b/python/cudf/cudf/_lib/io/CMakeLists.txt @@ -22,7 +22,7 @@ rapids_cython_create_modules( set(targets_using_numpy io_datasource io_utils) foreach(target IN LISTS targets_using_numpy) - target_include_directories(${target} PRIVATE "${NumPy_INCLUDE_DIRS}") + target_include_directories(${target} PRIVATE "${Python_NumPy_INCLUDE_DIRS}") endforeach() foreach(cython_module IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) From 8de6eb949e2ae0cbb274bef2013271903dc41532 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Fri, 27 May 2022 11:18:07 -0700 Subject: [PATCH 31/31] cmake-format. --- python/cudf/cudf/_lib/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/cudf/cudf/_lib/CMakeLists.txt b/python/cudf/cudf/_lib/CMakeLists.txt index 37febf2b782..e806decbf12 100644 --- a/python/cudf/cudf/_lib/CMakeLists.txt +++ b/python/cudf/cudf/_lib/CMakeLists.txt @@ -61,8 +61,8 @@ rapids_cython_create_modules( LINKED_LIBRARIES "${linked_libraries}" ) -# TODO: Finding NumPy requires finding Development as well. Once this is fixed -# in CMake (no date yet) we can remove the extra component spec. +# TODO: Finding NumPy requires finding Development as well. Once this is fixed in CMake (no date +# yet) we can remove the extra component spec. find_package(Python REQUIRED COMPONENTS Development NumPy) set(targets_using_numpy gpuarrow interop avro csv orc json parquet) foreach(target IN LISTS targets_using_numpy)