Skip to content

Commit

Permalink
resolve file conflict
Browse files Browse the repository at this point in the history
  • Loading branch information
panbingkun committed Oct 16, 2024
2 parents cddf80c + 39112e4 commit d237628
Show file tree
Hide file tree
Showing 57 changed files with 1,268 additions and 726 deletions.
18 changes: 9 additions & 9 deletions .github/workflows/build_and_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -264,20 +264,20 @@ jobs:
with:
distribution: zulu
java-version: ${{ matrix.java }}
- name: Install Python 3.9
- name: Install Python 3.11
uses: actions/setup-python@v5
# We should install one Python that is higher than 3+ for SQL and Yarn because:
# - SQL component also has Python related tests, for example, IntegratedUDFTestUtils.
# - Yarn has a Python specific test too, for example, YarnClusterSuite.
if: contains(matrix.modules, 'yarn') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-')) || contains(matrix.modules, 'connect')
with:
python-version: '3.9'
python-version: '3.11'
architecture: x64
- name: Install Python packages (Python 3.9)
- name: Install Python packages (Python 3.11)
if: (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-')) || contains(matrix.modules, 'connect')
run: |
python3.9 -m pip install 'numpy>=1.20.0' pyarrow pandas scipy unittest-xml-reporting 'lxml==4.9.4' 'grpcio==1.62.0' 'grpcio-status==1.62.0' 'protobuf==4.25.1'
python3.9 -m pip list
python3.11 -m pip install 'numpy>=1.20.0' pyarrow pandas scipy unittest-xml-reporting 'lxml==4.9.4' 'grpcio==1.62.0' 'grpcio-status==1.62.0' 'protobuf==4.25.1'
python3.11 -m pip list
# Run the tests.
- name: Run tests
env: ${{ fromJSON(inputs.envs) }}
Expand Down Expand Up @@ -608,14 +608,14 @@ jobs:
with:
input: sql/connect/common/src/main
against: 'https://github.com/apache/spark.git#branch=branch-3.5,subdir=connector/connect/common/src/main'
- name: Install Python 3.9
- name: Install Python 3.11
uses: actions/setup-python@v5
with:
python-version: '3.9'
python-version: '3.11'
- name: Install dependencies for Python CodeGen check
run: |
python3.9 -m pip install 'black==23.9.1' 'protobuf==4.25.1' 'mypy==1.8.0' 'mypy-protobuf==3.3.0'
python3.9 -m pip list
python3.11 -m pip install 'black==23.9.1' 'protobuf==4.25.1' 'mypy==1.8.0' 'mypy-protobuf==3.3.0'
python3.11 -m pip list
- name: Python CodeGen check
run: ./dev/connect-check-protos.py

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1363,9 +1363,9 @@ public static UTF8String trimRight(

public static UTF8String[] splitSQL(final UTF8String input, final UTF8String delim,
final int limit, final int collationId) {
if (CollationFactory.fetchCollation(collationId).supportsBinaryEquality) {
if (CollationFactory.fetchCollation(collationId).isUtf8BinaryType) {
return input.split(delim, limit);
} else if (CollationFactory.fetchCollation(collationId).supportsLowercaseEquality) {
} else if (CollationFactory.fetchCollation(collationId).isUtf8LcaseType) {
return lowercaseSplitSQL(input, delim, limit);
} else {
return icuSplitSQL(input, delim, limit, collationId);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,18 @@ public static class Collation {
*/
public final boolean supportsSpaceTrimming;

/**
* Is Utf8 binary type as indicator if collation base type is UTF8 binary. Note currently only
* collations Utf8_Binary and Utf8_Binary_RTRIM are considered as Utf8 binary type.
*/
public final boolean isUtf8BinaryType;

/**
* Is Utf8 lcase type as indicator if collation base type is UTF8 lcase. Note currently only
* collations Utf8_Lcase and Utf8_Lcase_RTRIM are considered as Utf8 Lcase type.
*/
public final boolean isUtf8LcaseType;

public Collation(
String collationName,
String provider,
Expand All @@ -168,24 +180,22 @@ public Collation(
String version,
ToLongFunction<UTF8String> hashFunction,
BiFunction<UTF8String, UTF8String, Boolean> equalsFunction,
boolean supportsBinaryEquality,
boolean supportsBinaryOrdering,
boolean supportsLowercaseEquality,
boolean isUtf8BinaryType,
boolean isUtf8LcaseType,
boolean supportsSpaceTrimming) {
this.collationName = collationName;
this.provider = provider;
this.collator = collator;
this.comparator = comparator;
this.version = version;
this.hashFunction = hashFunction;
this.supportsBinaryEquality = supportsBinaryEquality;
this.supportsBinaryOrdering = supportsBinaryOrdering;
this.supportsLowercaseEquality = supportsLowercaseEquality;
this.isUtf8BinaryType = isUtf8BinaryType;
this.isUtf8LcaseType = isUtf8LcaseType;
this.equalsFunction = equalsFunction;
this.supportsSpaceTrimming = supportsSpaceTrimming;

// De Morgan's Law to check supportsBinaryOrdering => supportsBinaryEquality
assert(!supportsBinaryOrdering || supportsBinaryEquality);
this.supportsBinaryEquality = !supportsSpaceTrimming && isUtf8BinaryType;
this.supportsBinaryOrdering = !supportsSpaceTrimming && isUtf8BinaryType;
this.supportsLowercaseEquality = !supportsSpaceTrimming && isUtf8LcaseType;
// No Collation can simultaneously support binary equality and lowercase equality
assert(!supportsBinaryEquality || !supportsLowercaseEquality);

Expand Down Expand Up @@ -567,9 +577,8 @@ protected Collation buildCollation() {
"1.0",
hashFunction,
equalsFunction,
/* supportsBinaryEquality = */ true,
/* supportsBinaryOrdering = */ true,
/* supportsLowercaseEquality = */ false,
/* isUtf8BinaryType = */ true,
/* isUtf8LcaseType = */ false,
spaceTrimming != SpaceTrimming.NONE);
} else {
Comparator<UTF8String> comparator;
Expand All @@ -595,9 +604,8 @@ protected Collation buildCollation() {
"1.0",
hashFunction,
(s1, s2) -> comparator.compare(s1, s2) == 0,
/* supportsBinaryEquality = */ false,
/* supportsBinaryOrdering = */ false,
/* supportsLowercaseEquality = */ true,
/* isUtf8BinaryType = */ false,
/* isUtf8LcaseType = */ true,
spaceTrimming != SpaceTrimming.NONE);
}
}
Expand Down Expand Up @@ -982,9 +990,8 @@ protected Collation buildCollation() {
ICU_COLLATOR_VERSION,
hashFunction,
(s1, s2) -> comparator.compare(s1, s2) == 0,
/* supportsBinaryEquality = */ false,
/* supportsBinaryOrdering = */ false,
/* supportsLowercaseEquality = */ false,
/* isUtf8BinaryType = */ false,
/* isUtf8LcaseType = */ false,
spaceTrimming != SpaceTrimming.NONE);
}

Expand Down Expand Up @@ -1191,9 +1198,9 @@ public static UTF8String getCollationKey(UTF8String input, int collationId) {
if (collation.supportsSpaceTrimming) {
input = Collation.CollationSpec.applyTrimmingPolicy(input, collationId);
}
if (collation.supportsBinaryEquality) {
if (collation.isUtf8BinaryType) {
return input;
} else if (collation.supportsLowercaseEquality) {
} else if (collation.isUtf8LcaseType) {
return CollationAwareUTF8String.lowerCaseCodePoints(input);
} else {
CollationKey collationKey = collation.collator.getCollationKey(
Expand All @@ -1207,9 +1214,9 @@ public static byte[] getCollationKeyBytes(UTF8String input, int collationId) {
if (collation.supportsSpaceTrimming) {
input = Collation.CollationSpec.applyTrimmingPolicy(input, collationId);
}
if (collation.supportsBinaryEquality) {
if (collation.isUtf8BinaryType) {
return input.getBytes();
} else if (collation.supportsLowercaseEquality) {
} else if (collation.isUtf8LcaseType) {
return CollationAwareUTF8String.lowerCaseCodePoints(input).getBytes();
} else {
return collation.collator.getCollationKey(
Expand Down
Loading

0 comments on commit d237628

Please sign in to comment.