Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/apache/spark into LEGACY_…
Browse files Browse the repository at this point in the history
…1097
  • Loading branch information
itholic committed Oct 17, 2024
2 parents 2a09347 + 175d563 commit 2587706
Show file tree
Hide file tree
Showing 89 changed files with 1,363 additions and 879 deletions.
18 changes: 9 additions & 9 deletions .github/workflows/build_and_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -264,20 +264,20 @@ jobs:
with:
distribution: zulu
java-version: ${{ matrix.java }}
- name: Install Python 3.9
- name: Install Python 3.11
uses: actions/setup-python@v5
# We should install one Python that is higher than 3+ for SQL and Yarn because:
# - SQL component also has Python related tests, for example, IntegratedUDFTestUtils.
# - Yarn has a Python specific test too, for example, YarnClusterSuite.
if: contains(matrix.modules, 'yarn') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-')) || contains(matrix.modules, 'connect')
with:
python-version: '3.9'
python-version: '3.11'
architecture: x64
- name: Install Python packages (Python 3.9)
- name: Install Python packages (Python 3.11)
if: (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-')) || contains(matrix.modules, 'connect')
run: |
python3.9 -m pip install 'numpy>=1.20.0' pyarrow pandas scipy unittest-xml-reporting 'lxml==4.9.4' 'grpcio==1.62.0' 'grpcio-status==1.62.0' 'protobuf==4.25.1'
python3.9 -m pip list
python3.11 -m pip install 'numpy>=1.20.0' pyarrow pandas scipy unittest-xml-reporting 'lxml==4.9.4' 'grpcio==1.62.0' 'grpcio-status==1.62.0' 'protobuf==4.25.1'
python3.11 -m pip list
# Run the tests.
- name: Run tests
env: ${{ fromJSON(inputs.envs) }}
Expand Down Expand Up @@ -608,14 +608,14 @@ jobs:
with:
input: sql/connect/common/src/main
against: 'https://github.com/apache/spark.git#branch=branch-3.5,subdir=connector/connect/common/src/main'
- name: Install Python 3.9
- name: Install Python 3.11
uses: actions/setup-python@v5
with:
python-version: '3.9'
python-version: '3.11'
- name: Install dependencies for Python CodeGen check
run: |
python3.9 -m pip install 'black==23.9.1' 'protobuf==4.25.1' 'mypy==1.8.0' 'mypy-protobuf==3.3.0'
python3.9 -m pip list
python3.11 -m pip install 'black==23.9.1' 'protobuf==4.25.1' 'mypy==1.8.0' 'mypy-protobuf==3.3.0'
python3.11 -m pip list
- name: Python CodeGen check
run: ./dev/connect-check-protos.py

Expand Down
1 change: 1 addition & 0 deletions R/pkg/DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ Collate:
'types.R'
'utils.R'
'window.R'
'zzz.R'
RoxygenNote: 7.1.2
VignetteBuilder: knitr
NeedsCompilation: no
Expand Down
6 changes: 0 additions & 6 deletions R/pkg/R/sparkR.R
Original file line number Diff line number Diff line change
Expand Up @@ -403,12 +403,6 @@ sparkR.session <- function(
sparkPackages = "",
enableHiveSupport = TRUE,
...) {

if (Sys.getenv("SPARKR_SUPPRESS_DEPRECATION_WARNING") == "") {
warning(
"SparkR is deprecated from Apache Spark 4.0.0 and will be removed in a future version.")
}

sparkConfigMap <- convertNamedListToEnv(sparkConfig)
namedParams <- list(...)
if (length(namedParams) > 0) {
Expand Down
30 changes: 30 additions & 0 deletions R/pkg/R/zzz.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# zzz.R - package startup message

.onAttach <- function(...) {
if (Sys.getenv("SPARKR_SUPPRESS_DEPRECATION_WARNING") == "") {
packageStartupMessage(
paste0(
"Warning: ",
"SparkR is deprecated in Apache Spark 4.0.0 and will be removed in a future release. ",
"To continue using Spark in R, we recommend using sparklyr instead: ",
"https://spark.posit.co/get-started/"
)
)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -1363,9 +1363,9 @@ public static UTF8String trimRight(

public static UTF8String[] splitSQL(final UTF8String input, final UTF8String delim,
final int limit, final int collationId) {
if (CollationFactory.fetchCollation(collationId).supportsBinaryEquality) {
if (CollationFactory.fetchCollation(collationId).isUtf8BinaryType) {
return input.split(delim, limit);
} else if (CollationFactory.fetchCollation(collationId).supportsLowercaseEquality) {
} else if (CollationFactory.fetchCollation(collationId).isUtf8LcaseType) {
return lowercaseSplitSQL(input, delim, limit);
} else {
return icuSplitSQL(input, delim, limit, collationId);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,18 @@ public static class Collation {
*/
public final boolean supportsSpaceTrimming;

/**
* Is Utf8 binary type as indicator if collation base type is UTF8 binary. Note currently only
* collations Utf8_Binary and Utf8_Binary_RTRIM are considered as Utf8 binary type.
*/
public final boolean isUtf8BinaryType;

/**
* Is Utf8 lcase type as indicator if collation base type is UTF8 lcase. Note currently only
* collations Utf8_Lcase and Utf8_Lcase_RTRIM are considered as Utf8 Lcase type.
*/
public final boolean isUtf8LcaseType;

public Collation(
String collationName,
String provider,
Expand All @@ -168,24 +180,22 @@ public Collation(
String version,
ToLongFunction<UTF8String> hashFunction,
BiFunction<UTF8String, UTF8String, Boolean> equalsFunction,
boolean supportsBinaryEquality,
boolean supportsBinaryOrdering,
boolean supportsLowercaseEquality,
boolean isUtf8BinaryType,
boolean isUtf8LcaseType,
boolean supportsSpaceTrimming) {
this.collationName = collationName;
this.provider = provider;
this.collator = collator;
this.comparator = comparator;
this.version = version;
this.hashFunction = hashFunction;
this.supportsBinaryEquality = supportsBinaryEquality;
this.supportsBinaryOrdering = supportsBinaryOrdering;
this.supportsLowercaseEquality = supportsLowercaseEquality;
this.isUtf8BinaryType = isUtf8BinaryType;
this.isUtf8LcaseType = isUtf8LcaseType;
this.equalsFunction = equalsFunction;
this.supportsSpaceTrimming = supportsSpaceTrimming;

// De Morgan's Law to check supportsBinaryOrdering => supportsBinaryEquality
assert(!supportsBinaryOrdering || supportsBinaryEquality);
this.supportsBinaryEquality = !supportsSpaceTrimming && isUtf8BinaryType;
this.supportsBinaryOrdering = !supportsSpaceTrimming && isUtf8BinaryType;
this.supportsLowercaseEquality = !supportsSpaceTrimming && isUtf8LcaseType;
// No Collation can simultaneously support binary equality and lowercase equality
assert(!supportsBinaryEquality || !supportsLowercaseEquality);

Expand Down Expand Up @@ -567,9 +577,8 @@ protected Collation buildCollation() {
"1.0",
hashFunction,
equalsFunction,
/* supportsBinaryEquality = */ true,
/* supportsBinaryOrdering = */ true,
/* supportsLowercaseEquality = */ false,
/* isUtf8BinaryType = */ true,
/* isUtf8LcaseType = */ false,
spaceTrimming != SpaceTrimming.NONE);
} else {
Comparator<UTF8String> comparator;
Expand All @@ -595,9 +604,8 @@ protected Collation buildCollation() {
"1.0",
hashFunction,
(s1, s2) -> comparator.compare(s1, s2) == 0,
/* supportsBinaryEquality = */ false,
/* supportsBinaryOrdering = */ false,
/* supportsLowercaseEquality = */ true,
/* isUtf8BinaryType = */ false,
/* isUtf8LcaseType = */ true,
spaceTrimming != SpaceTrimming.NONE);
}
}
Expand Down Expand Up @@ -982,9 +990,8 @@ protected Collation buildCollation() {
ICU_COLLATOR_VERSION,
hashFunction,
(s1, s2) -> comparator.compare(s1, s2) == 0,
/* supportsBinaryEquality = */ false,
/* supportsBinaryOrdering = */ false,
/* supportsLowercaseEquality = */ false,
/* isUtf8BinaryType = */ false,
/* isUtf8LcaseType = */ false,
spaceTrimming != SpaceTrimming.NONE);
}

Expand Down Expand Up @@ -1191,9 +1198,9 @@ public static UTF8String getCollationKey(UTF8String input, int collationId) {
if (collation.supportsSpaceTrimming) {
input = Collation.CollationSpec.applyTrimmingPolicy(input, collationId);
}
if (collation.supportsBinaryEquality) {
if (collation.isUtf8BinaryType) {
return input;
} else if (collation.supportsLowercaseEquality) {
} else if (collation.isUtf8LcaseType) {
return CollationAwareUTF8String.lowerCaseCodePoints(input);
} else {
CollationKey collationKey = collation.collator.getCollationKey(
Expand All @@ -1207,9 +1214,9 @@ public static byte[] getCollationKeyBytes(UTF8String input, int collationId) {
if (collation.supportsSpaceTrimming) {
input = Collation.CollationSpec.applyTrimmingPolicy(input, collationId);
}
if (collation.supportsBinaryEquality) {
if (collation.isUtf8BinaryType) {
return input.getBytes();
} else if (collation.supportsLowercaseEquality) {
} else if (collation.isUtf8LcaseType) {
return CollationAwareUTF8String.lowerCaseCodePoints(input).getBytes();
} else {
return collation.collator.getCollationKey(
Expand Down
Loading

0 comments on commit 2587706

Please sign in to comment.