Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Spark3: Support for SORT BY Clause #2651

Merged
merged 7 commits into from
Feb 15, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 55 additions & 4 deletions src/sqlfluff/dialects/dialect_spark3.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,8 @@
Sequence("ORDER", "BY"),
Sequence("CLUSTER", "BY"),
Sequence("DISTRIBUTE", "BY"),
# TODO Add PIVOT, LATERAL VIEW, SORT BY and DISTRIBUTE BY clauses
Sequence("SORT", "BY"),
# TODO Add PIVOT, LATERAL VIEW, and DISTRIBUTE BY clauses
"HAVING",
"WINDOW",
Ref("SetOperatorSegment"),
Expand Down Expand Up @@ -280,7 +281,7 @@
# Add Spark Grammar
BucketSpecGrammar=Sequence(
Ref("ClusteredBySpecGrammar"),
Ref("SortSpecGrammar", optional=True),
Ref("SortedBySpecGrammar", optional=True),
"INTO",
Ref("NumericLiteralSegment"),
"BUCKETS",
Expand Down Expand Up @@ -358,7 +359,7 @@
Ref("ResourceFileGrammar"),
Ref("QuotedLiteralSegment"),
),
SortSpecGrammar=Sequence(
SortedBySpecGrammar=Sequence(
"SORTED",
"BY",
Bracketed(
Expand Down Expand Up @@ -1301,10 +1302,10 @@ class SelectStatementSegment(BaseSegment):
).parse_grammar.copy(
# TODO New Rule: Warn of mutual exclusion of following clauses
# DISTRIBUTE, SORT, CLUSTER and ORDER BY if multiple specified
# TODO Insert: SORT BY clauses
insert=[
Ref("ClusterByClauseSegment", optional=True),
Ref("DistributeByClauseSegment", optional=True),
Ref("SortByClauseSegment", optional=True),
],
before=Ref("LimitClauseSegment"),
)
Expand Down Expand Up @@ -1434,6 +1435,56 @@ class GroupingExpressionList(BaseSegment):
)


@spark3_dialect.segment()
class SortByClauseSegment(BaseSegment):
"""A `SORT BY` clause like in `SELECT`.

This clause is mutually exclusive with SORT BY, ORDER BY and DISTRIBUTE BY.
https://spark.apache.org/docs/latest/sql-ref-syntax-qry-select-sortby.html
"""

type = "sort_by_clause"

match_grammar = StartsWith(
Sequence("SORT", "BY"),
terminator=OneOf(
"LIMIT",
"HAVING",
"QUALIFY",
# For window functions
"WINDOW",
Ref("FrameClauseUnitGrammar"),
"SEPARATOR",
),
)
parse_grammar = Sequence(
"SORT",
"BY",
Indent,
Delimited(
Sequence(
OneOf(
Ref("ColumnReferenceSegment"),
# Can `ORDER BY 1`
Ref("NumericLiteralSegment"),
# Can order by an expression
Ref("ExpressionSegment"),
),
OneOf("ASC", "DESC", optional=True),
# NB: This isn't really ANSI, and isn't supported in Mysql,
# but is supported in enough other dialects for it to make
# sense here for now.
Sequence("NULLS", OneOf("FIRST", "LAST"), optional=True),
),
terminator=OneOf(
"LIMIT",
Ref("FrameClauseUnitGrammar"),
),
),
Dedent,
)


# Auxiliary Statements
@spark3_dialect.segment()
class AddExecutablePackage(BaseSegment):
Expand Down
112 changes: 112 additions & 0 deletions test/fixtures/dialects/spark3/select_sort_by.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
-- Sort rows within each partition in ascending manner
SELECT /*+ REPARTITION(zip_code) */
name,
age,
zip_code
FROM person
SORT BY
name;

SELECT
name,
age,
zip_code
FROM person
SORT BY
name;

-- Sort rows within each partition using column position.
SELECT /*+ REPARTITION(zip_code) */
name,
age,
zip_code
FROM person
SORT BY 1;

SELECT
name,
age,
zip_code
FROM person
SORT BY 1;

-- Sort rows within partition in ascending
-- manner keeping null values to be last.
SELECT /*+ REPARTITION(zip_code) */
age,
name,
zip_code
FROM person
SORT BY
age NULLS LAST;

SELECT
age,
name,
zip_code
FROM person
SORT BY
age NULLS LAST;

-- Sort rows by age within each partition in
-- descending manner, which defaults to NULL LAST.
SELECT /*+ REPARTITION(zip_code) */
age,
name,
zip_code
FROM person
SORT BY
age DESC;

SELECT
age,
name,
zip_code
FROM person
SORT BY
age DESC;

-- Sort rows by age within each partition in
-- descending manner keeping null values to be first.
SELECT /*+ REPARTITION(zip_code) */
age,
name,
zip_code
FROM person
SORT BY
age DESC NULLS FIRST;

SELECT
age,
name,
zip_code
FROM person
SORT BY
age DESC NULLS FIRST;

-- Sort rows within each partition based on more
-- than one column with each column having different
-- sort direction.
SELECT /*+ REPARTITION(zip_code) */
name,
age,
zip_code
FROM person
SORT BY
name ASC, age DESC;

SELECT
name,
age,
zip_code
FROM person
SORT BY
name ASC, age DESC;

-- Sort rows within each partition based on result of a function.
SELECT
age,
name
FROM person
SORT BY
LEFT(SUBSTRING_INDEX(name, ' ', -1), 1);
Loading