Skip to content

Commit

Permalink
Merge pull request #1549 from apache/master
Browse files Browse the repository at this point in the history
Create a new pull request by comparing changes across two branches
  • Loading branch information
GulajavaMinistudio authored Sep 4, 2023
2 parents 3348846 + 60d8fc4 commit 2fd732a
Show file tree
Hide file tree
Showing 141 changed files with 4,741 additions and 1,207 deletions.
1 change: 1 addition & 0 deletions .asf.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,4 @@ notifications:
pullrequests: reviews@spark.apache.org
issues: reviews@spark.apache.org
commits: commits@spark.apache.org
jira_options: link label
12 changes: 6 additions & 6 deletions .github/workflows/build_and_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,7 @@ jobs:
name: test-results-${{ matrix.modules }}-${{ matrix.comment }}-${{ matrix.java }}-${{ matrix.hadoop }}-${{ matrix.hive }}
path: "**/target/test-reports/*.xml"
- name: Upload unit tests log files
if: failure()
if: ${{ !success() }}
uses: actions/upload-artifact@v3
with:
name: unit-tests-log-${{ matrix.modules }}-${{ matrix.comment }}-${{ matrix.java }}-${{ matrix.hadoop }}-${{ matrix.hive }}
Expand Down Expand Up @@ -425,7 +425,7 @@ jobs:
run: |
if [[ "$MODULES_TO_TEST" != *"pyspark-ml"* ]] && [[ "$BRANCH" != "branch-3.5" ]]; then
# uninstall libraries dedicated for ML testing
python3.9 -m pip uninstall -y torch torchvision torcheval torchtnt tensorboard mlflow
python3.9 -m pip uninstall -y torch torchvision torcheval torchtnt tensorboard mlflow deepspeed
fi
if [ -f ./dev/free_disk_space_container ]; then
./dev/free_disk_space_container
Expand Down Expand Up @@ -470,7 +470,7 @@ jobs:
name: test-results-${{ matrix.modules }}--8-${{ inputs.hadoop }}-hive2.3
path: "**/target/test-reports/*.xml"
- name: Upload unit tests log files
if: failure()
if: ${{ !success() }}
uses: actions/upload-artifact@v3
with:
name: unit-tests-log-${{ matrix.modules }}--8-${{ inputs.hadoop }}-hive2.3
Expand Down Expand Up @@ -961,7 +961,7 @@ jobs:
name: test-results-tpcds--8-${{ inputs.hadoop }}-hive2.3
path: "**/target/test-reports/*.xml"
- name: Upload unit tests log files
if: failure()
if: ${{ !success() }}
uses: actions/upload-artifact@v3
with:
name: unit-tests-log-tpcds--8-${{ inputs.hadoop }}-hive2.3
Expand Down Expand Up @@ -1028,7 +1028,7 @@ jobs:
name: test-results-docker-integration--8-${{ inputs.hadoop }}-hive2.3
path: "**/target/test-reports/*.xml"
- name: Upload unit tests log files
if: failure()
if: ${{ !success() }}
uses: actions/upload-artifact@v3
with:
name: unit-tests-log-docker-integration--8-${{ inputs.hadoop }}-hive2.3
Expand Down Expand Up @@ -1103,7 +1103,7 @@ jobs:
eval $(minikube docker-env)
build/sbt -Psparkr -Pkubernetes -Pvolcano -Pkubernetes-integration-tests -Dspark.kubernetes.test.driverRequestCores=0.5 -Dspark.kubernetes.test.executorRequestCores=0.2 -Dspark.kubernetes.test.volcanoMaxConcurrencyJobNum=1 -Dtest.exclude.tags=local "kubernetes-integration-tests/test"
- name: Upload Spark on K8S integration tests log files
if: failure()
if: ${{ !success() }}
uses: actions/upload-artifact@v3
with:
name: spark-on-kubernetes-it-log
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,13 @@ public static BloomFilter readFrom(InputStream in) throws IOException {
return BloomFilterImpl.readFrom(in);
}

/**
* Reads in a {@link BloomFilter} from a byte array.
*/
public static BloomFilter readFrom(byte[] bytes) throws IOException {
return BloomFilterImpl.readFrom(bytes);
}

/**
* Computes the optimal k (number of hashes per item inserted in Bloom filter), given the
* expected insertions and total number of bits in the Bloom filter.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,12 @@ public static BloomFilterImpl readFrom(InputStream in) throws IOException {
return filter;
}

public static BloomFilterImpl readFrom(byte[] bytes) throws IOException {
try (ByteArrayInputStream bis = new ByteArrayInputStream(bytes)) {
return readFrom(bis);
}
}

private void writeObject(ObjectOutputStream out) throws IOException {
writeTo(out);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

package org.apache.spark.util.sketch

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
import java.io.ByteArrayOutputStream

import scala.reflect.ClassTag
import scala.util.Random
Expand All @@ -34,9 +34,7 @@ class BloomFilterSuite extends AnyFunSuite { // scalastyle:ignore funsuite
filter.writeTo(out)
out.close()

val in = new ByteArrayInputStream(out.toByteArray)
val deserialized = BloomFilter.readFrom(in)
in.close()
val deserialized = BloomFilter.readFrom(out.toByteArray)

assert(filter == deserialized)
}
Expand Down
28 changes: 23 additions & 5 deletions common/utils/src/main/resources/error/error-classes.json
Original file line number Diff line number Diff line change
Expand Up @@ -1035,6 +1035,11 @@
"Cannot safely cast <colName> <srcType> to <targetType>."
]
},
"EXTRA_COLUMNS" : {
"message" : [
"Cannot write extra columns <extraColumns>."
]
},
"EXTRA_STRUCT_FIELDS" : {
"message" : [
"Cannot write extra fields <extraFields> to the struct <colName>."
Expand Down Expand Up @@ -2215,6 +2220,12 @@
],
"sqlState" : "42607"
},
"NON_FOLDABLE_ARGUMENT" : {
"message" : [
"The function <funcName> requires the parameter <paramName> to be a foldable expression of the type <paramType>, but the actual argument is a non-foldable."
],
"sqlState" : "22024"
},
"NON_LAST_MATCHED_CLAUSE_OMIT_CONDITION" : {
"message" : [
"When there are more than one MATCHED clauses in a MERGE statement, only the last MATCHED clause can omit the condition."
Expand Down Expand Up @@ -2687,6 +2698,18 @@
"Failed to analyze the Python user defined table function: <msg>"
]
},
"TABLE_VALUED_FUNCTION_REQUIRED_METADATA_INCOMPATIBLE_WITH_CALL" : {
"message" : [
"Failed to evaluate the table function <functionName> because its table metadata <requestedMetadata>, but the function call <invalidFunctionCallProperty>."
],
"sqlState" : "22023"
},
"TABLE_VALUED_FUNCTION_REQUIRED_METADATA_INVALID" : {
"message" : [
"Failed to evaluate the table function <functionName> because its table metadata was invalid; <reason>."
],
"sqlState" : "22023"
},
"TABLE_VALUED_FUNCTION_TOO_MANY_TABLE_ARGUMENTS" : {
"message" : [
"There are too many table arguments for table-valued function. It allows one table argument, but got: <num>. If you want to allow it, please set \"spark.sql.allowMultipleTableArguments.enabled\" to \"true\""
Expand Down Expand Up @@ -4029,11 +4052,6 @@
"<funcName>() doesn't support the <mode> mode. Acceptable modes are <permissiveMode> and <failFastMode>."
]
},
"_LEGACY_ERROR_TEMP_1100" : {
"message" : [
"The '<argName>' parameter of function '<funcName>' needs to be a <requiredType> literal."
]
},
"_LEGACY_ERROR_TEMP_1103" : {
"message" : [
"Unsupported component type <clz> in arrays."
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -987,7 +987,7 @@ object functions {
* @group agg_funcs
* @since 3.5.0
*/
def std(e: Column): Column = stddev(e)
def std(e: Column): Column = Column.fn("std", e)

/**
* Aggregate function: alias for `stddev_samp`.
Expand Down Expand Up @@ -2337,15 +2337,15 @@ object functions {
* @group math_funcs
* @since 3.5.0
*/
def ceiling(e: Column, scale: Column): Column = ceil(e, scale)
def ceiling(e: Column, scale: Column): Column = Column.fn("ceiling", e, scale)

/**
* Computes the ceiling of the given value of `e` to 0 decimal places.
*
* @group math_funcs
* @since 3.5.0
*/
def ceiling(e: Column): Column = ceil(e)
def ceiling(e: Column): Column = Column.fn("ceiling", e)

/**
* Convert a number in a string column from one base to another.
Expand Down Expand Up @@ -2800,7 +2800,7 @@ object functions {
* @group math_funcs
* @since 3.5.0
*/
def power(l: Column, r: Column): Column = pow(l, r)
def power(l: Column, r: Column): Column = Column.fn("power", l, r)

/**
* Returns the positive value of dividend mod divisor.
Expand Down Expand Up @@ -2937,7 +2937,7 @@ object functions {
* @group math_funcs
* @since 3.5.0
*/
def sign(e: Column): Column = signum(e)
def sign(e: Column): Column = Column.fn("sign", e)

/**
* Computes the signum of the given value.
Expand Down Expand Up @@ -4428,7 +4428,7 @@ object functions {
* @since 3.5.0
*/
def printf(format: Column, arguments: Column*): Column =
Column.fn("format_string", lit(format) +: arguments: _*)
Column.fn("printf", (format +: arguments): _*)

/**
* Decodes a `str` in 'application/x-www-form-urlencoded' format using a specific encoding
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3235,16 +3235,19 @@ class PlanGenerationTestSuite
private val testDescFilePath: String = s"${IntegrationTestUtils.sparkHome}/connector/" +
"connect/common/src/test/resources/protobuf-tests/common.desc"

test("from_protobuf messageClassName") {
binary.select(
pbFn.from_protobuf(fn.col("bytes"), "org.apache.spark.sql.protobuf.protos.TestProtoObj"))
// TODO(SPARK-45030): Re-enable this test when all Maven test scenarios succeed and there
// are no other negative impacts. For the problem description, please refer to SPARK-45029
ignore("from_protobuf messageClassName") {
binary.select(pbFn.from_protobuf(fn.col("bytes"), classOf[StorageLevel].getName))
}

test("from_protobuf messageClassName options") {
// TODO(SPARK-45030): Re-enable this test when all Maven test scenarios succeed and there
// are no other negative impacts. For the problem description, please refer to SPARK-45029
ignore("from_protobuf messageClassName options") {
binary.select(
pbFn.from_protobuf(
fn.col("bytes"),
"org.apache.spark.sql.protobuf.protos.TestProtoObj",
classOf[StorageLevel].getName,
Map("recursive.fields.max.depth" -> "2").asJava))
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,8 @@ object CheckConnectJvmClientCompatibility {
// functions
ProblemFilters.exclude[Problem]("org.apache.spark.sql.functions.unwrap_udt"),
ProblemFilters.exclude[Problem]("org.apache.spark.sql.functions.udaf"),
ProblemFilters.exclude[DirectMissingMethodProblem](
"org.apache.spark.sql.functions.try_reflect"),

// KeyValueGroupedDataset
ProblemFilters.exclude[Problem](
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Project [summary#0, element_at(id#0, summary#0, None, false) AS id#0, element_at(b#0, summary#0, None, false) AS b#0]
+- Project [id#0, b#0, summary#0]
+- Generate explode([count,mean,stddev,min,max]), false, [summary#0]
+- Aggregate [map(cast(count as string), cast(count(id#0L) as string), cast(mean as string), cast(avg(id#0L) as string), cast(stddev as string), cast(stddev_samp(cast(id#0L as double)) as string), cast(min as string), cast(min(id#0L) as string), cast(max as string), cast(max(id#0L) as string)) AS id#0, map(cast(count as string), cast(count(b#0) as string), cast(mean as string), cast(avg(b#0) as string), cast(stddev as string), cast(stddev_samp(b#0) as string), cast(min as string), cast(min(b#0) as string), cast(max as string), cast(max(b#0) as string)) AS b#0]
+- Aggregate [map(cast(count as string), cast(count(id#0L) as string), cast(mean as string), cast(avg(id#0L) as string), cast(stddev as string), cast(stddev(cast(id#0L as double)) as string), cast(min as string), cast(min(id#0L) as string), cast(max as string), cast(max(id#0L) as string)) AS id#0, map(cast(count as string), cast(count(b#0) as string), cast(mean as string), cast(avg(b#0) as string), cast(stddev as string), cast(stddev(b#0) as string), cast(min as string), cast(min(b#0) as string), cast(max as string), cast(max(b#0) as string)) AS b#0]
+- Project [id#0L, b#0]
+- LocalRelation <empty>, [id#0L, a#0, b#0]

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
Project [CEIL(b#0) AS CEIL(b)#0L]
Project [ceiling(b#0) AS ceiling(b)#0L]
+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
Project [ceil(cast(b#0 as decimal(30,15)), 2) AS ceil(b, 2)#0]
Project [ceiling(cast(b#0 as decimal(30,15)), 2) AS ceiling(b, 2)#0]
+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
Project [java_method(java.util.UUID, fromString, g#0) AS java_method(java.util.UUID, fromString, g)#0]
Project [java_method(java.util.UUID, fromString, g#0, true) AS java_method(java.util.UUID, fromString, g)#0]
+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
Project [format_string(g#0, a#0, g#0) AS format_string(g, a, g)#0]
Project [printf(g#0, a#0, g#0) AS printf(g, a, g)#0]
+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
Project [reflect(java.util.UUID, fromString, g#0) AS reflect(java.util.UUID, fromString, g)#0]
Project [reflect(java.util.UUID, fromString, g#0, true) AS reflect(java.util.UUID, fromString, g)#0]
+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
Project [SIGNUM(b#0) AS SIGNUM(b)#0]
Project [sign(b#0) AS sign(b)#0]
+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
Aggregate [stddev(cast(a#0 as double)) AS stddev(a)#0]
Aggregate [std(cast(a#0 as double)) AS std(a)#0]
+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]

This file was deleted.

Binary file not shown.

This file was deleted.

Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
},
"expressions": [{
"unresolvedFunction": {
"functionName": "ceil",
"functionName": "ceiling",
"arguments": [{
"unresolvedAttribute": {
"unparsedIdentifier": "b"
Expand Down
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
},
"expressions": [{
"unresolvedFunction": {
"functionName": "ceil",
"functionName": "ceiling",
"arguments": [{
"unresolvedAttribute": {
"unparsedIdentifier": "b"
Expand Down
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
},
"expressions": [{
"unresolvedFunction": {
"functionName": "format_string",
"functionName": "printf",
"arguments": [{
"unresolvedAttribute": {
"unparsedIdentifier": "g"
Expand Down
Binary file not shown.
Loading

0 comments on commit 2fd732a

Please sign in to comment.