From 9543e101ac102ffd50a48136d265741398d55646 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Fri, 27 Jan 2023 15:25:50 +0900 Subject: [PATCH 1/3] GH-33701: [C++] Add support for LTO (link time optimization) build (#33847) ### Rationale for this change Some base type classes don't have hidden method implementations. It may define these classes in each translation unit. It may cause one-definition-rule violation with `-flto`. ### What changes are included in this PR? Define at least one hidden method to prevent defining these base type classes in each translation unit. ### Are these changes tested? How to reproduce: ```bash CFLAGS="-flto" CXXFLAGS="-flto" cmake ... cmake --build ... ``` This reports link errors without this change. ### Are there any user-facing changes? No. * Closes: #33701 Authored-by: Sutou Kouhei Signed-off-by: Yibo Cai --- cpp/src/arrow/type.cc | 18 ++++++++++++++++++ cpp/src/arrow/type.h | 27 +++++++++++++++++++++++++++ 2 files changed, 45 insertions(+) diff --git a/cpp/src/arrow/type.cc b/cpp/src/arrow/type.cc index cc31735512bad..2091bfbeb8c57 100644 --- a/cpp/src/arrow/type.cc +++ b/cpp/src/arrow/type.cc @@ -454,6 +454,16 @@ std::string TypeHolder::ToString(const std::vector& types) { // ---------------------------------------------------------------------- +FixedWidthType::~FixedWidthType() {} + +PrimitiveCType::~PrimitiveCType() {} + +NumberType::~NumberType() {} + +IntegerType::~IntegerType() {} + +FloatingPointType::~FloatingPointType() {} + FloatingPointType::Precision HalfFloatType::precision() const { return FloatingPointType::HALF; } @@ -478,6 +488,12 @@ std::ostream& operator<<(std::ostream& os, return os; } +NestedType::~NestedType() {} + +BaseBinaryType::~BaseBinaryType() {} + +BaseListType::~BaseListType() {} + std::string ListType::ToString() const { std::stringstream s; s << "list<" << value_field()->ToString() << ">"; @@ -589,6 +605,8 @@ std::string FixedSizeBinaryType::ToString() const { return ss.str(); } +TemporalType::~TemporalType() {} + // ---------------------------------------------------------------------- // Date types diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h index 05fcb3d615b13..536874392c9d1 100644 --- a/cpp/src/arrow/type.h +++ b/cpp/src/arrow/type.h @@ -288,24 +288,36 @@ std::shared_ptr GetPhysicalType(const std::shared_ptr& type) class ARROW_EXPORT FixedWidthType : public DataType { public: using DataType::DataType; + // This is only for preventing defining this class in each + // translation unit to avoid one-definition-rule violation. + ~FixedWidthType() override; }; /// \brief Base class for all data types representing primitive values class ARROW_EXPORT PrimitiveCType : public FixedWidthType { public: using FixedWidthType::FixedWidthType; + // This is only for preventing defining this class in each + // translation unit to avoid one-definition-rule violation. + ~PrimitiveCType() override; }; /// \brief Base class for all numeric data types class ARROW_EXPORT NumberType : public PrimitiveCType { public: using PrimitiveCType::PrimitiveCType; + // This is only for preventing defining this class in each + // translation unit to avoid one-definition-rule violation. + ~NumberType() override; }; /// \brief Base class for all integral data types class ARROW_EXPORT IntegerType : public NumberType { public: using NumberType::NumberType; + // This is only for preventing defining this class in each + // translation unit to avoid one-definition-rule violation. + ~IntegerType() override; virtual bool is_signed() const = 0; }; @@ -313,6 +325,9 @@ class ARROW_EXPORT IntegerType : public NumberType { class ARROW_EXPORT FloatingPointType : public NumberType { public: using NumberType::NumberType; + // This is only for preventing defining this class in each + // translation unit to avoid one-definition-rule violation. + ~FloatingPointType() override; enum Precision { HALF, SINGLE, DOUBLE }; virtual Precision precision() const = 0; }; @@ -323,6 +338,9 @@ class ParametricType {}; class ARROW_EXPORT NestedType : public DataType, public ParametricType { public: using DataType::DataType; + // This is only for preventing defining this class in each + // translation unit to avoid one-definition-rule violation. + ~NestedType() override; }; /// \brief The combination of a field name and data type, with optional metadata @@ -650,6 +668,9 @@ class ARROW_EXPORT DoubleType class ARROW_EXPORT BaseBinaryType : public DataType { public: using DataType::DataType; + // This is only for preventing defining this class in each + // translation unit to avoid one-definition-rule violation. + ~BaseBinaryType() override; }; constexpr int64_t kBinaryMemoryLimit = std::numeric_limits::max() - 1; @@ -893,6 +914,9 @@ class ARROW_EXPORT Decimal256Type : public DecimalType { class ARROW_EXPORT BaseListType : public NestedType { public: using NestedType::NestedType; + // This is only for preventing defining this class in each + // translation unit to avoid one-definition-rule violation. + ~BaseListType() override; const std::shared_ptr& value_field() const { return children_[0]; } std::shared_ptr value_type() const { return children_[0]->type(); } @@ -1209,6 +1233,9 @@ class ARROW_EXPORT DenseUnionType : public UnionType { class ARROW_EXPORT TemporalType : public FixedWidthType { public: using FixedWidthType::FixedWidthType; + // This is only for preventing defining this class in each + // translation unit to avoid one-definition-rule violation. + ~TemporalType() override; DataTypeLayout layout() const override { return DataTypeLayout( From 58f84a4a05a866c3b73f91bb6b74215f77643bc2 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Tue, 7 Feb 2023 04:22:24 -0400 Subject: [PATCH 2/3] GH-33851: [C++] Update bundled boost version (#33890) This PR updates the bundled version of Boost, as it was suggested by a maintainer of CRAN (R packaging) that the older version of boost might be responsible for an Arrow build failure on Fedora/clang (#33819). Closes #33851. The resulting tarball also has to be uploaded to https://apache.jfrog.io/ui/native/arrow/thirdparty/7.0.0/ (right?) to kick in and almost certainly needs to be tested through a round of CI. I'm not sure how to do either of those things but perhaps @ assignUser does? * Closes: #33851 Authored-by: Dewey Dunnington Signed-off-by: Sutou Kouhei --- cpp/build-support/trim-boost.sh | 11 +++++++---- cpp/cmake_modules/ThirdpartyToolchain.cmake | 12 ++++++++++++ cpp/thirdparty/versions.txt | 6 +++--- 3 files changed, 22 insertions(+), 7 deletions(-) diff --git a/cpp/build-support/trim-boost.sh b/cpp/build-support/trim-boost.sh index 5618c66f28c80..535283ebe16e1 100755 --- a/cpp/build-support/trim-boost.sh +++ b/cpp/build-support/trim-boost.sh @@ -24,22 +24,22 @@ # # To test building Arrow locally with the boost bundle this creates, add: # -# set(BOOST_SOURCE_URL /path/to/arrow/cpp/build-support/boost_1_75_0/boost_1_75_0.tar.gz) +# set(BOOST_SOURCE_URL /path/to/arrow/cpp/build-support/boost_1_81_0/boost_1_81_0.tar.gz) # # to the beginning of the build_boost() macro in ThirdpartyToolchain.cmake, # # or set the env var ARROW_BOOST_URL before calling cmake, like: # -# ARROW_BOOST_URL=/path/to/arrow/cpp/build-support/boost_1_75_0/boost_1_75_0.tar.gz cmake ... +# ARROW_BOOST_URL=/path/to/arrow/cpp/build-support/boost_1_81_0/boost_1_81_0.tar.gz cmake ... # # After running this script, upload the bundle to -# https://github.com/ursa-labs/thirdparty/releases/edit/latest +# https://apache.jfrog.io/artifactory/arrow/thirdparty/ # TODO(ARROW-6407) automate uploading to github set -eu # if version is not defined by the caller, set a default. -: ${BOOST_VERSION:=1.75.0} +: ${BOOST_VERSION:=1.81.0} : ${BOOST_FILE:=boost_${BOOST_VERSION//./_}} : ${BOOST_URL:=https://sourceforge.net/projects/boost/files/boost/${BOOST_VERSION}/${BOOST_FILE}.tar.gz} @@ -66,6 +66,9 @@ fi mkdir -p ${BOOST_FILE} ./dist/bin/bcp ${BOOST_LIBS} ${BOOST_FILE} +# These files are assumed by the thirdparty toolchain but are not copied by bcp +cp bootstrap.sh bootstrap.bat boostcpp.jam boost-build.jam Jamroot LICENSE_1_0.txt INSTALL ${BOOST_FILE}/ + tar -czf ${BOOST_FILE}.tar.gz ${BOOST_FILE}/ # Resulting tarball is in ${BOOST_FILE}/${BOOST_FILE}.tar.gz diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 3eda538fb2e7e..d5fdd1279f221 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -987,6 +987,18 @@ if(MSVC AND ARROW_USE_STATIC_CRT) set(Boost_USE_STATIC_RUNTIME ON) endif() set(Boost_ADDITIONAL_VERSIONS + "1.81.0" + "1.81" + "1.80.0" + "1.80" + "1.79.0" + "1.79" + "1.78.0" + "1.78" + "1.77.0" + "1.77" + "1.76.0" + "1.76" "1.75.0" "1.75" "1.74.0" diff --git a/cpp/thirdparty/versions.txt b/cpp/thirdparty/versions.txt index 2611944cf260e..986f9d03c59ad 100644 --- a/cpp/thirdparty/versions.txt +++ b/cpp/thirdparty/versions.txt @@ -33,8 +33,8 @@ ARROW_AWS_C_COMMON_BUILD_VERSION=v0.6.9 ARROW_AWS_C_COMMON_BUILD_SHA256_CHECKSUM=928a3e36f24d1ee46f9eec360ec5cebfe8b9b8994fe39d4fa74ff51aebb12717 ARROW_AWS_C_EVENT_STREAM_BUILD_VERSION=v0.1.5 ARROW_AWS_C_EVENT_STREAM_BUILD_SHA256_CHECKSUM=f1b423a487b5d6dca118bfc0d0c6cc596dc476b282258a3228e73a8f730422d4 -ARROW_BOOST_BUILD_VERSION=1.75.0 -ARROW_BOOST_BUILD_SHA256_CHECKSUM=267e04a7c0bfe85daf796dedc789c3a27a76707e1c968f0a2a87bb96331e2b61 +ARROW_BOOST_BUILD_VERSION=1.81.0 +ARROW_BOOST_BUILD_SHA256_CHECKSUM=9e0ffae35528c35f90468997bc8d99500bf179cbae355415a89a600c38e13574 ARROW_BROTLI_BUILD_VERSION=v1.0.9 ARROW_BROTLI_BUILD_SHA256_CHECKSUM=f9e8d81d0405ba66d181529af42a3354f838c939095ff99930da6aa9cdf6fe46 ARROW_BZIP2_BUILD_VERSION=1.0.8 @@ -107,7 +107,7 @@ DEPENDENCIES=( "ARROW_AWS_CHECKSUMS_URL aws-checksums-${ARROW_AWS_CHECKSUMS_BUILD_VERSION}.tar.gz https://github.com/awslabs/aws-checksums/archive/${ARROW_AWS_CHECKSUMS_BUILD_VERSION}.tar.gz" "ARROW_AWS_C_COMMON_URL aws-c-common-${ARROW_AWS_C_COMMON_BUILD_VERSION}.tar.gz https://github.com/awslabs/aws-c-common/archive/${ARROW_AWS_C_COMMON_BUILD_VERSION}.tar.gz" "ARROW_AWS_C_EVENT_STREAM_URL aws-c-event-stream-${ARROW_AWS_C_EVENT_STREAM_BUILD_VERSION}.tar.gz https://github.com/awslabs/aws-c-event-stream/archive/${ARROW_AWS_C_EVENT_STREAM_BUILD_VERSION}.tar.gz" - "ARROW_BOOST_URL boost-${ARROW_BOOST_BUILD_VERSION}.tar.gz https://github.com/ursa-labs/thirdparty/releases/download/apache-arrow-7.0.0/boost_${ARROW_BOOST_BUILD_VERSION//./_}.tar.gz" + "ARROW_BOOST_URL boost-${ARROW_BOOST_BUILD_VERSION}.tar.gz https://apache.jfrog.io/artifactory/arrow/thirdparty/7.0.0/boost_${ARROW_BOOST_BUILD_VERSION//./_}.tar.gz" "ARROW_BROTLI_URL brotli-${ARROW_BROTLI_BUILD_VERSION}.tar.gz https://github.com/google/brotli/archive/${ARROW_BROTLI_BUILD_VERSION}.tar.gz" "ARROW_BZIP2_URL bzip2-${ARROW_BZIP2_BUILD_VERSION}.tar.gz https://sourceware.org/pub/bzip2/bzip2-${ARROW_BZIP2_BUILD_VERSION}.tar.gz" "ARROW_CARES_URL cares-${ARROW_CARES_BUILD_VERSION}.tar.gz https://c-ares.haxx.se/download/c-ares-${ARROW_CARES_BUILD_VERSION}.tar.gz" From dda50257cd06148c2bfc9c4809b63a3467d3a2b6 Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Tue, 7 Feb 2023 19:26:17 +0000 Subject: [PATCH 3/3] Regenerate docs, remove README badge, bump version --- r/DESCRIPTION | 2 +- r/R/dplyr-funcs-doc.R | 22 +++++++++++----------- r/README.md | 4 ---- r/man/acero.Rd | 27 ++++++++++++++------------- r/man/arrow-package.Rd | 4 ++-- 5 files changed, 28 insertions(+), 31 deletions(-) diff --git a/r/DESCRIPTION b/r/DESCRIPTION index 9aef5daca742f..736da9be6982e 100644 --- a/r/DESCRIPTION +++ b/r/DESCRIPTION @@ -1,6 +1,6 @@ Package: arrow Title: Integration to 'Apache' 'Arrow' -Version: 11.0.0 +Version: 11.0.0.1 Authors@R: c( person("Neal", "Richardson", email = "neal.p.richardson@gmail.com", role = c("aut")), person("Ian", "Cook", email = "ianmcook@gmail.com", role = c("aut")), diff --git a/r/R/dplyr-funcs-doc.R b/r/R/dplyr-funcs-doc.R index 4858f392ea01b..73dfd9dca41bf 100644 --- a/r/R/dplyr-funcs-doc.R +++ b/r/R/dplyr-funcs-doc.R @@ -99,30 +99,31 @@ #' #' ## base #' -#' * [`-`][-()] #' * [`!`][!()] #' * [`!=`][!=()] -#' * [`*`][*()] -#' * [`/`][/()] -#' * [`&`][&()] -#' * [`%/%`][%/%()] #' * [`%%`][%%()] +#' * [`%/%`][%/%()] #' * [`%in%`][%in%()] -#' * [`^`][^()] +#' * [`&`][&()] +#' * [`*`][*()] #' * [`+`][+()] +#' * [`-`][-()] +#' * [`/`][/()] #' * [`<`][<()] #' * [`<=`][<=()] #' * [`==`][==()] #' * [`>`][>()] #' * [`>=`][>=()] -#' * [`|`][|()] +#' * [`ISOdate()`][base::ISOdate()] +#' * [`ISOdatetime()`][base::ISOdatetime()] +#' * [`^`][^()] #' * [`abs()`][base::abs()] #' * [`acos()`][base::acos()] #' * [`all()`][base::all()] #' * [`any()`][base::any()] -#' * [`as.character()`][base::as.character()] #' * [`as.Date()`][base::as.Date()]: Multiple `tryFormats` not supported in Arrow. #' Consider using the lubridate specialised parsing functions `ymd()`, `ymd()`, etc. +#' * [`as.character()`][base::as.character()] #' * [`as.difftime()`][base::as.difftime()]: only supports `units = "secs"` (the default) #' * [`as.double()`][base::as.double()] #' * [`as.integer()`][base::as.integer()] @@ -153,8 +154,6 @@ #' * [`is.na()`][base::is.na()] #' * [`is.nan()`][base::is.nan()] #' * [`is.numeric()`][base::is.numeric()] -#' * [`ISOdate()`][base::ISOdate()] -#' * [`ISOdatetime()`][base::ISOdatetime()] #' * [`log()`][base::log()] #' * [`log10()`][base::log10()] #' * [`log1p()`][base::log1p()] @@ -186,6 +185,7 @@ #' * [`tolower()`][base::tolower()] #' * [`toupper()`][base::toupper()] #' * [`trunc()`][base::trunc()] +#' * [`|`][|()] #' #' ## bit64 #' @@ -242,8 +242,8 @@ #' * [`format_ISO8601()`][lubridate::format_ISO8601()] #' * [`hour()`][lubridate::hour()] #' * [`is.Date()`][lubridate::is.Date()] -#' * [`is.instant()`][lubridate::is.instant()] #' * [`is.POSIXct()`][lubridate::is.POSIXct()] +#' * [`is.instant()`][lubridate::is.instant()] #' * [`is.timepoint()`][lubridate::is.timepoint()] #' * [`isoweek()`][lubridate::isoweek()] #' * [`isoyear()`][lubridate::isoyear()] diff --git a/r/README.md b/r/README.md index 3551e92bffbf1..ee4036d48f381 100644 --- a/r/README.md +++ b/r/README.md @@ -1,9 +1,5 @@ # arrow -[![cran](https://www.r-pkg.org/badges/version-last-release/arrow)](https://cran.r-project.org/package=arrow) -[![CI](https://github.com/apache/arrow/workflows/R/badge.svg?event=push)](https://github.com/apache/arrow/actions?query=workflow%3AR+branch%3Amaster+event%3Apush) -[![conda-forge](https://img.shields.io/conda/vn/conda-forge/r-arrow.svg)](https://anaconda.org/conda-forge/r-arrow) - [Apache Arrow](https://arrow.apache.org/) is a cross-language development platform for in-memory and larger-than-memory data. It specifies a standardized language-independent columnar memory format for flat and hierarchical diff --git a/r/man/acero.Rd b/r/man/acero.Rd index b8aed28825f80..f4e7d672806a7 100644 --- a/r/man/acero.Rd +++ b/r/man/acero.Rd @@ -6,7 +6,7 @@ \description{ The \code{arrow} package contains methods for 37 \code{dplyr} table functions, many of which are "verbs" that do transformations to one or more tables. -The package also has mappings of 209 R functions to the corresponding +The package also has mappings of 211 R functions to the corresponding functions in the Arrow compute library. These allow you to write code inside of \code{dplyr} methods that call R functions, including many in packages like \code{stringr} and \code{lubridate}, and they will get translated to Arrow and run @@ -85,30 +85,31 @@ as \code{arrow_ascii_is_decimal}. \subsection{base}{ \itemize{ -\item \code{\link[=-]{-}} \item \code{\link[=!]{!}} \item \code{\link[=!=]{!=}} -\item \code{\link[=*]{*}} -\item \code{\link[=/]{/}} -\item \code{\link[=&]{&}} -\item \code{\link[=\%/\%]{\%/\%}} \item \code{\link[=\%\%]{\%\%}} +\item \code{\link[=\%/\%]{\%/\%}} \item \code{\link[=\%in\%]{\%in\%}} -\item \code{\link[=^]{^}} +\item \code{\link[=&]{&}} +\item \code{\link[=*]{*}} \item \code{\link[=+]{+}} +\item \code{\link[=-]{-}} +\item \code{\link[=/]{/}} \item \code{\link[=<]{<}} \item \code{\link[=<=]{<=}} \item \code{\link[===]{==}} \item \code{\link[=>]{>}} \item \code{\link[=>=]{>=}} -\item \code{\link[=|]{|}} +\item \code{\link[base:ISOdatetime]{ISOdate()}} +\item \code{\link[base:ISOdatetime]{ISOdatetime()}} +\item \code{\link[=^]{^}} \item \code{\link[base:MathFun]{abs()}} \item \code{\link[base:Trig]{acos()}} \item \code{\link[base:all]{all()}} \item \code{\link[base:any]{any()}} -\item \code{\link[base:character]{as.character()}} \item \code{\link[base:as.Date]{as.Date()}}: Multiple \code{tryFormats} not supported in Arrow. Consider using the lubridate specialised parsing functions \code{ymd()}, \code{ymd()}, etc. +\item \code{\link[base:character]{as.character()}} \item \code{\link[base:difftime]{as.difftime()}}: only supports \code{units = "secs"} (the default) \item \code{\link[base:double]{as.double()}} \item \code{\link[base:integer]{as.integer()}} @@ -139,8 +140,6 @@ Consider using the lubridate specialised parsing functions \code{ymd()}, \code{y \item \code{\link[base:NA]{is.na()}} \item \code{\link[base:is.finite]{is.nan()}} \item \code{\link[base:numeric]{is.numeric()}} -\item \code{\link[base:ISOdatetime]{ISOdate()}} -\item \code{\link[base:ISOdatetime]{ISOdatetime()}} \item \code{\link[base:Log]{log()}} \item \code{\link[base:Log]{log10()}} \item \code{\link[base:Log]{log1p()}} @@ -172,6 +171,7 @@ Valid values are "s", "ms" (default), "us", "ns". \item \code{\link[base:chartr]{tolower()}} \item \code{\link[base:chartr]{toupper()}} \item \code{\link[base:Round]{trunc()}} +\item \code{\link[=|]{|}} } } @@ -229,12 +229,13 @@ Valid values are "s", "ms" (default), "us", "ns". \item \code{\link[lubridate:parse_date_time]{fast_strptime()}}: non-default values of \code{lt} and \code{cutoff_2000} not supported \item \code{\link[lubridate:round_date]{floor_date()}} \item \code{\link[lubridate:force_tz]{force_tz()}}: Timezone conversion from non-UTC timezone not supported; -When \code{roll = FALSE} and hit a non-existent time, raise an error +\code{roll_dst} values of 'error' and 'boundary' are supported for nonexistent times, +\code{roll_dst} values of 'error', 'pre', and 'post' are supported for ambiguous times. \item \code{\link[lubridate:format_ISO8601]{format_ISO8601()}} \item \code{\link[lubridate:hour]{hour()}} \item \code{\link[lubridate:date_utils]{is.Date()}} -\item \code{\link[lubridate:is.instant]{is.instant()}} \item \code{\link[lubridate:posix_utils]{is.POSIXct()}} +\item \code{\link[lubridate:is.instant]{is.instant()}} \item \code{\link[lubridate:is.instant]{is.timepoint()}} \item \code{\link[lubridate:week]{isoweek()}} \item \code{\link[lubridate:year]{isoyear()}} diff --git a/r/man/arrow-package.Rd b/r/man/arrow-package.Rd index e1b6808f6bf0d..b6cf0509e3edf 100644 --- a/r/man/arrow-package.Rd +++ b/r/man/arrow-package.Rd @@ -18,12 +18,12 @@ Useful links: } \author{ -\strong{Maintainer}: Neal Richardson \email{neal@ursalabs.org} +\strong{Maintainer}: Nic Crane \email{thisisnic@gmail.com} Authors: \itemize{ + \item Neal Richardson \email{neal.p.richardson@gmail.com} \item Ian Cook \email{ianmcook@gmail.com} - \item Nic Crane \email{thisisnic@gmail.com} \item Dewey Dunnington \email{dewey@fishandwhistle.net} (\href{https://orcid.org/0000-0002-9415-4582}{ORCID}) \item Romain François \email{romain@rstudio.com} (\href{https://orcid.org/0000-0002-2444-4226}{ORCID}) \item Jonathan Keane \email{jkeane@gmail.com}