Skip to content

Commit

Permalink
Merge pull request #1555 from apache/master
Browse files Browse the repository at this point in the history
Create a new pull request by comparing changes across two branches
  • Loading branch information
GulajavaMinistudio authored Sep 12, 2023
2 parents 46fe32d + 5d2d915 commit c84eac4
Show file tree
Hide file tree
Showing 34 changed files with 699 additions and 260 deletions.
5 changes: 0 additions & 5 deletions common/utils/src/main/resources/error/error-classes.json
Original file line number Diff line number Diff line change
Expand Up @@ -4944,11 +4944,6 @@
"Negative values found in <frequencyExpression>"
]
},
"_LEGACY_ERROR_TEMP_2015" : {
"message" : [
"Cannot generate <codeType> code for incomparable type: <dataType>."
]
},
"_LEGACY_ERROR_TEMP_2016" : {
"message" : [
"Can not interpolate <arg> into code block."
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -134,8 +134,6 @@ class SparkSession private[sql] (
} else {
val hash = client.cacheLocalRelation(arrowData, encoder.schema.json)
builder.getCachedLocalRelationBuilder
.setUserId(client.userId)
.setSessionId(client.sessionId)
.setHash(hash)
}
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2632,7 +2632,7 @@ object functions {
* @group math_funcs
* @since 3.4.0
*/
def log(e: Column): Column = Column.fn("log", e)
def log(e: Column): Column = ln(e)

/**
* Computes the natural logarithm of the given column.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -400,11 +400,11 @@ message LocalRelation {

// A local relation that has been cached already.
message CachedLocalRelation {
// (Required) An identifier of the user which created the local relation
string userId = 1;

// (Required) An identifier of the Spark SQL session in which the user created the local relation.
string sessionId = 2;
// `userId` and `sessionId` fields are deleted since the server must always use the active
// session/user rather than arbitrary values provided by the client. It is never valid to access
// a local relation from a different session/user.
reserved 1, 2;
reserved "userId", "sessionId";

// (Required) A sha-256 hash of the serialized local relation in proto, see LocalRelation.
string hash = 3;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
Project [LOG(E(), b#0) AS LOG(E(), b)#0]
Project [ln(b#0) AS ln(b)#0]
+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
},
"expressions": [{
"unresolvedFunction": {
"functionName": "log",
"functionName": "ln",
"arguments": [{
"unresolvedAttribute": {
"unparsedIdentifier": "b"
Expand Down
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -970,7 +970,7 @@ class SparkConnectPlanner(val sessionHolder: SessionHolder) extends Logging {

private def transformCachedLocalRelation(rel: proto.CachedLocalRelation): LogicalPlan = {
val blockManager = session.sparkContext.env.blockManager
val blockId = CacheId(rel.getUserId, rel.getSessionId, rel.getHash)
val blockId = CacheId(sessionHolder.userId, sessionHolder.sessionId, rel.getHash)
val bytes = blockManager.getLocalBytes(blockId)
bytes
.map { blockData =>
Expand Down
2 changes: 1 addition & 1 deletion dev/appveyor-install-dependencies.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ if (!(Test-Path $tools)) {
# ========================== Maven
# Push-Location $tools
#
# $mavenVer = "3.8.8"
# $mavenVer = "3.9.4"
# Start-FileDownload "https://archive.apache.org/dist/maven/maven-3/$mavenVer/binaries/apache-maven-$mavenVer-bin.zip" "maven.zip"
#
# # extract
Expand Down
2 changes: 1 addition & 1 deletion docs/building-spark.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ license: |
## Apache Maven

The Maven-based build is the build of reference for Apache Spark.
Building Spark using Maven requires Maven 3.8.8 and Java 8/11/17.
Building Spark using Maven requires Maven 3.9.4 and Java 8/11/17.
Spark requires Scala 2.12/2.13; support for Scala 2.11 was removed in Spark 3.0.0.

### Setting up Maven's Memory Usage
Expand Down
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@
<java.version>1.8</java.version>
<maven.compiler.source>${java.version}</maven.compiler.source>
<maven.compiler.target>${java.version}</maven.compiler.target>
<maven.version>3.8.8</maven.version>
<maven.version>3.9.4</maven.version>
<exec-maven-plugin.version>3.1.0</exec-maven-plugin.version>
<sbt.project.name>spark</sbt.project.name>
<asm.version>9.5</asm.version>
Expand Down
2 changes: 1 addition & 1 deletion python/pyspark/pandas/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -505,7 +505,7 @@ def empty(self) -> bool:
>>> ps.DataFrame({}, index=list('abc')).index.empty
False
"""
return self._internal.resolved_copy.spark_frame.rdd.isEmpty()
return self._internal.resolved_copy.spark_frame.isEmpty()

@property
def hasnans(self) -> bool:
Expand Down
5 changes: 5 additions & 0 deletions python/pyspark/pandas/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -6097,6 +6097,11 @@ def interpolate(
if isinstance(psser.spark.data_type, (NumericType, BooleanType)):
numeric_col_names.append(psser.name)

if len(numeric_col_names) == 0:
raise TypeError(
"Cannot interpolate with all object-dtype columns in the DataFrame. "
"Try setting at least one column to a numeric dtype."
)
psdf = self[numeric_col_names]
return psdf._apply_series_op(
lambda psser: psser._interpolate(
Expand Down
2 changes: 2 additions & 0 deletions python/pyspark/pandas/tests/series/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,8 @@ def test_empty_series(self):
self.assert_eq(ps.from_pandas(pser_a), pser_a)
self.assert_eq(ps.from_pandas(pser_b), pser_b)

self.assertTrue(pser_a.empty)

def test_all_null_series(self):
pser_a = pd.Series([None, None, None], dtype="float64")
pser_b = pd.Series([None, None, None], dtype="str")
Expand Down
5 changes: 5 additions & 0 deletions python/pyspark/pandas/tests/test_frame_interpolate.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,11 @@ def test_interpolate_error(self):
with self.assertRaisesRegex(ValueError, "invalid limit_area"):
psdf.id.interpolate(limit_area="jump")

with self.assertRaisesRegex(
TypeError, "Cannot interpolate with all object-dtype columns in the DataFrame."
):
ps.DataFrame({"A": ["a", "b", "c"], "B": ["a", "b", "c"]}).interpolate()

def _test_interpolate(self, pobj):
psobj = ps.from_pandas(pobj)
self.assert_eq(psobj.interpolate(), pobj.interpolate())
Expand Down
12 changes: 4 additions & 8 deletions python/pyspark/sql/catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,8 +129,7 @@ def listCatalogs(self, pattern: Optional[str] = None) -> List[CatalogMetadata]:
pattern : str
The pattern that the catalog name needs to match.
.. versionchanged: 3.5.0
Added ``pattern`` argument.
.. versionadded: 3.5.0
Returns
-------
Expand Down Expand Up @@ -201,8 +200,7 @@ def listDatabases(self, pattern: Optional[str] = None) -> List[Database]:
pattern : str
The pattern that the database name needs to match.
.. versionchanged: 3.5.0
Adds ``pattern`` argument.
.. versionadded: 3.5.0
Returns
-------
Expand Down Expand Up @@ -325,8 +323,7 @@ def listTables(
pattern : str
The pattern that the database name needs to match.
.. versionchanged: 3.5.0
Adds ``pattern`` argument.
.. versionadded: 3.5.0
Returns
-------
Expand Down Expand Up @@ -455,8 +452,7 @@ def listFunctions(
pattern : str
The pattern that the function name needs to match.
.. versionchanged: 3.5.0
Adds ``pattern`` argument.
.. versionadded: 3.5.0
Returns
-------
Expand Down
3 changes: 0 additions & 3 deletions python/pyspark/sql/connect/plan.py
Original file line number Diff line number Diff line change
Expand Up @@ -398,9 +398,6 @@ def plan(self, session: "SparkConnectClient") -> proto.Relation:
plan = self._create_proto_relation()
clr = plan.cached_local_relation

if session._user_id:
clr.userId = session._user_id
clr.sessionId = session._session_id
clr.hash = self._hash

return plan
Expand Down
160 changes: 80 additions & 80 deletions python/pyspark/sql/connect/proto/relations_pb2.py

Large diffs are not rendered by default.

15 changes: 1 addition & 14 deletions python/pyspark/sql/connect/proto/relations_pb2.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -1647,28 +1647,15 @@ class CachedLocalRelation(google.protobuf.message.Message):

DESCRIPTOR: google.protobuf.descriptor.Descriptor

USERID_FIELD_NUMBER: builtins.int
SESSIONID_FIELD_NUMBER: builtins.int
HASH_FIELD_NUMBER: builtins.int
userId: builtins.str
"""(Required) An identifier of the user which created the local relation"""
sessionId: builtins.str
"""(Required) An identifier of the Spark SQL session in which the user created the local relation."""
hash: builtins.str
"""(Required) A sha-256 hash of the serialized local relation in proto, see LocalRelation."""
def __init__(
self,
*,
userId: builtins.str = ...,
sessionId: builtins.str = ...,
hash: builtins.str = ...,
) -> None: ...
def ClearField(
self,
field_name: typing_extensions.Literal[
"hash", b"hash", "sessionId", b"sessionId", "userId", b"userId"
],
) -> None: ...
def ClearField(self, field_name: typing_extensions.Literal["hash", b"hash"]) -> None: ...

global___CachedLocalRelation = CachedLocalRelation

Expand Down
39 changes: 19 additions & 20 deletions python/pyspark/sql/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -597,8 +597,7 @@ def printSchema(self, level: Optional[int] = None) -> None:
level : int, optional, default None
How many levels to print for nested schemas.
.. versionchanged:: 3.5.0
Added Level parameter.
.. versionadded:: 3.5.0
Examples
--------
Expand Down Expand Up @@ -2864,14 +2863,14 @@ def sortWithinPartitions(
.. versionchanged:: 3.4.0
Supports Spark Connect.
.. versionchanged:: 4.0.0
Supports column ordinal.
Parameters
----------
cols : int, str, list or :class:`Column`, optional
list of :class:`Column` or column names or column ordinals to sort by.
.. versionchanged:: 4.0.0
Supports column ordinal.
Other Parameters
----------------
ascending : bool or list, optional, default True
Expand Down Expand Up @@ -2928,14 +2927,14 @@ def sort(
.. versionchanged:: 3.4.0
Supports Spark Connect.
.. versionchanged:: 4.0.0
Supports column ordinal.
Parameters
----------
cols : int, str, list, or :class:`Column`, optional
list of :class:`Column` or column names or column ordinals to sort by.
.. versionchanged:: 4.0.0
Supports column ordinal.
Other Parameters
----------------
ascending : bool or list, optional, default True
Expand Down Expand Up @@ -3826,16 +3825,16 @@ def groupBy(self, *cols: "ColumnOrNameOrOrdinal") -> "GroupedData": # type: ign
.. versionchanged:: 3.4.0
Supports Spark Connect.
.. versionchanged:: 4.0.0
Supports column ordinal.
Parameters
----------
cols : list, str or :class:`Column`
cols : list, str, int or :class:`Column`
The columns to group by.
Each element can be a column name (string) or an expression (:class:`Column`)
or a column ordinal (int, 1-based) or list of them.
.. versionchanged:: 4.0.0
Supports column ordinal.
Returns
-------
:class:`GroupedData`
Expand Down Expand Up @@ -3935,16 +3934,16 @@ def rollup(self, *cols: "ColumnOrNameOrOrdinal") -> "GroupedData": # type: igno
.. versionchanged:: 3.4.0
Supports Spark Connect.
.. versionchanged:: 4.0.0
Supports column ordinal.
Parameters
----------
cols : list, str or :class:`Column`
cols : list, str, int or :class:`Column`
The columns to roll-up by.
Each element should be a column name (string) or an expression (:class:`Column`)
or a column ordinal (int, 1-based) or list of them.
.. versionchanged:: 4.0.0
Supports column ordinal.
Returns
-------
:class:`GroupedData`
Expand Down Expand Up @@ -4020,16 +4019,16 @@ def cube(self, *cols: "ColumnOrName") -> "GroupedData": # type: ignore[misc]
.. versionchanged:: 3.4.0
Supports Spark Connect.
.. versionchanged:: 4.0.0
Supports column ordinal.
Parameters
----------
cols : list, str or :class:`Column`
cols : list, str, int or :class:`Column`
The columns to cube by.
Each element should be a column name (string) or an expression (:class:`Column`)
or a column ordinal (int, 1-based) or list of them.
.. versionchanged:: 4.0.0
Supports column ordinal.
Returns
-------
:class:`GroupedData`
Expand Down
Loading

0 comments on commit c84eac4

Please sign in to comment.