Skip to content

Commit

Permalink
[SPARK-44778][SQL] Add the alias TIMEDIFF for TIMESTAMPDIFF
Browse files Browse the repository at this point in the history
### What changes were proposed in this pull request?
In the PR, I propose to extend the rules of `primaryExpression` in `SqlBaseParser.g4`, and one more function `TIMEDIFF` which accepts 3-args in the same way as the existing expressions `TIMESTAMPDIFF`.

### Why are the changes needed?
To achieve feature parity w/ other system and make the migration to Spark SQL from such systems easier:
1. Snowflake: https://docs.snowflake.com/en/sql-reference/functions/timediff
2. MySQL/MariaDB: https://dev.mysql.com/doc/refman/8.0/en/date-and-time-functions.html#function_timediff

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
By running the existing test suites:
```
$ PYSPARK_PYTHON=python3 build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite"
```

Closes apache#42435 from MaxGekk/timediff.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Max Gekk <max.gekk@gmail.com>
  • Loading branch information
MaxGekk committed Aug 12, 2023
1 parent 7baf9da commit b9fc5c0
Show file tree
Hide file tree
Showing 17 changed files with 760 additions and 2 deletions.
1 change: 1 addition & 0 deletions docs/sql-ref-ansi-compliance.md
Original file line number Diff line number Diff line change
Expand Up @@ -636,6 +636,7 @@ Below is a list of all the keywords in Spark SQL.
|TERMINATED|non-reserved|non-reserved|non-reserved|
|THEN|reserved|non-reserved|reserved|
|TIME|reserved|non-reserved|reserved|
|TIMEDIFF|non-reserved|non-reserved|non-reserved|
|TIMESTAMP|non-reserved|non-reserved|non-reserved|
|TIMESTAMP_LTZ|non-reserved|non-reserved|non-reserved|
|TIMESTAMP_NTZ|non-reserved|non-reserved|non-reserved|
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -373,6 +373,7 @@ TEMPORARY: 'TEMPORARY' | 'TEMP';
TERMINATED: 'TERMINATED';
THEN: 'THEN';
TIME: 'TIME';
TIMEDIFF: 'TIMEDIFF';
TIMESTAMP: 'TIMESTAMP';
TIMESTAMP_LTZ: 'TIMESTAMP_LTZ';
TIMESTAMP_NTZ: 'TIMESTAMP_NTZ';
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -953,7 +953,7 @@ datetimeUnit
primaryExpression
: name=(CURRENT_DATE | CURRENT_TIMESTAMP | CURRENT_USER | USER) #currentLike
| name=(TIMESTAMPADD | DATEADD | DATE_ADD) LEFT_PAREN (unit=datetimeUnit | invalidUnit=stringLit) COMMA unitsAmount=valueExpression COMMA timestamp=valueExpression RIGHT_PAREN #timestampadd
| name=(TIMESTAMPDIFF | DATEDIFF | DATE_DIFF) LEFT_PAREN (unit=datetimeUnit | invalidUnit=stringLit) COMMA startTimestamp=valueExpression COMMA endTimestamp=valueExpression RIGHT_PAREN #timestampdiff
| name=(TIMESTAMPDIFF | DATEDIFF | DATE_DIFF | TIMEDIFF) LEFT_PAREN (unit=datetimeUnit | invalidUnit=stringLit) COMMA startTimestamp=valueExpression COMMA endTimestamp=valueExpression RIGHT_PAREN #timestampdiff
| CASE whenClause+ (ELSE elseExpression=expression)? END #searchedCase
| CASE value=expression whenClause+ (ELSE elseExpression=expression)? END #simpleCase
| name=(CAST | TRY_CAST) LEFT_PAREN expression AS dataType RIGHT_PAREN #cast
Expand Down Expand Up @@ -1511,6 +1511,7 @@ ansiNonReserved
| TBLPROPERTIES
| TEMPORARY
| TERMINATED
| TIMEDIFF
| TIMESTAMP
| TIMESTAMP_LTZ
| TIMESTAMP_NTZ
Expand Down Expand Up @@ -1853,6 +1854,7 @@ nonReserved
| TERMINATED
| THEN
| TIME
| TIMEDIFF
| TIMESTAMP
| TIMESTAMP_LTZ
| TIMESTAMP_NTZ
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -875,3 +875,71 @@ org.apache.spark.sql.catalyst.parser.ParseException
"fragment" : "timestampdiff('YEAR', date'2022-02-15', date'2023-02-15')"
} ]
}


-- !query
select timediff(QUARTER, timestamp'2023-08-10 01:02:03', timestamp'2022-01-14 01:02:03')
-- !query analysis
[Analyzer test output redacted due to nondeterminism]


-- !query
select timediff(HOUR, timestamp'2022-02-14 01:02:03', timestamp'2022-02-14 12:00:03')
-- !query analysis
[Analyzer test output redacted due to nondeterminism]


-- !query
select timediff(DAY, date'2022-02-15', date'2023-02-15')
-- !query analysis
[Analyzer test output redacted due to nondeterminism]


-- !query
select timediff(SECOND, date'2022-02-15', timestamp'2022-02-14 23:59:59')
-- !query analysis
[Analyzer test output redacted due to nondeterminism]


-- !query
select timediff('MINUTE', timestamp'2023-02-14 01:02:03', timestamp'2023-02-14 02:00:03')
-- !query analysis
org.apache.spark.sql.catalyst.parser.ParseException
{
"errorClass" : "INVALID_PARAMETER_VALUE.DATETIME_UNIT",
"sqlState" : "22023",
"messageParameters" : {
"functionName" : "`timediff`",
"invalidValue" : "'MINUTE'",
"parameter" : "`unit`"
},
"queryContext" : [ {
"objectType" : "",
"objectName" : "",
"startIndex" : 8,
"stopIndex" : 89,
"fragment" : "timediff('MINUTE', timestamp'2023-02-14 01:02:03', timestamp'2023-02-14 02:00:03')"
} ]
}


-- !query
select timediff('YEAR', date'2020-02-15', date'2023-02-15')
-- !query analysis
org.apache.spark.sql.catalyst.parser.ParseException
{
"errorClass" : "INVALID_PARAMETER_VALUE.DATETIME_UNIT",
"sqlState" : "22023",
"messageParameters" : {
"functionName" : "`timediff`",
"invalidValue" : "'YEAR'",
"parameter" : "`unit`"
},
"queryContext" : [ {
"objectType" : "",
"objectName" : "",
"startIndex" : 8,
"stopIndex" : 59,
"fragment" : "timediff('YEAR', date'2020-02-15', date'2023-02-15')"
} ]
}
Original file line number Diff line number Diff line change
Expand Up @@ -1978,3 +1978,71 @@ org.apache.spark.sql.catalyst.parser.ParseException
"fragment" : "timestampdiff('YEAR', date'2022-02-15', date'2023-02-15')"
} ]
}


-- !query
select timediff(QUARTER, timestamp'2023-08-10 01:02:03', timestamp'2022-01-14 01:02:03')
-- !query analysis
[Analyzer test output redacted due to nondeterminism]


-- !query
select timediff(HOUR, timestamp'2022-02-14 01:02:03', timestamp'2022-02-14 12:00:03')
-- !query analysis
[Analyzer test output redacted due to nondeterminism]


-- !query
select timediff(DAY, date'2022-02-15', date'2023-02-15')
-- !query analysis
[Analyzer test output redacted due to nondeterminism]


-- !query
select timediff(SECOND, date'2022-02-15', timestamp'2022-02-14 23:59:59')
-- !query analysis
[Analyzer test output redacted due to nondeterminism]


-- !query
select timediff('MINUTE', timestamp'2023-02-14 01:02:03', timestamp'2023-02-14 02:00:03')
-- !query analysis
org.apache.spark.sql.catalyst.parser.ParseException
{
"errorClass" : "INVALID_PARAMETER_VALUE.DATETIME_UNIT",
"sqlState" : "22023",
"messageParameters" : {
"functionName" : "`timediff`",
"invalidValue" : "'MINUTE'",
"parameter" : "`unit`"
},
"queryContext" : [ {
"objectType" : "",
"objectName" : "",
"startIndex" : 8,
"stopIndex" : 89,
"fragment" : "timediff('MINUTE', timestamp'2023-02-14 01:02:03', timestamp'2023-02-14 02:00:03')"
} ]
}


-- !query
select timediff('YEAR', date'2020-02-15', date'2023-02-15')
-- !query analysis
org.apache.spark.sql.catalyst.parser.ParseException
{
"errorClass" : "INVALID_PARAMETER_VALUE.DATETIME_UNIT",
"sqlState" : "22023",
"messageParameters" : {
"functionName" : "`timediff`",
"invalidValue" : "'YEAR'",
"parameter" : "`unit`"
},
"queryContext" : [ {
"objectType" : "",
"objectName" : "",
"startIndex" : 8,
"stopIndex" : 59,
"fragment" : "timediff('YEAR', date'2020-02-15', date'2023-02-15')"
} ]
}
Original file line number Diff line number Diff line change
Expand Up @@ -947,3 +947,71 @@ org.apache.spark.sql.catalyst.parser.ParseException
"fragment" : "timestampdiff('YEAR', date'2022-02-15', date'2023-02-15')"
} ]
}


-- !query
select timediff(QUARTER, timestamp'2023-08-10 01:02:03', timestamp'2022-01-14 01:02:03')
-- !query analysis
[Analyzer test output redacted due to nondeterminism]


-- !query
select timediff(HOUR, timestamp'2022-02-14 01:02:03', timestamp'2022-02-14 12:00:03')
-- !query analysis
[Analyzer test output redacted due to nondeterminism]


-- !query
select timediff(DAY, date'2022-02-15', date'2023-02-15')
-- !query analysis
[Analyzer test output redacted due to nondeterminism]


-- !query
select timediff(SECOND, date'2022-02-15', timestamp'2022-02-14 23:59:59')
-- !query analysis
[Analyzer test output redacted due to nondeterminism]


-- !query
select timediff('MINUTE', timestamp'2023-02-14 01:02:03', timestamp'2023-02-14 02:00:03')
-- !query analysis
org.apache.spark.sql.catalyst.parser.ParseException
{
"errorClass" : "INVALID_PARAMETER_VALUE.DATETIME_UNIT",
"sqlState" : "22023",
"messageParameters" : {
"functionName" : "`timediff`",
"invalidValue" : "'MINUTE'",
"parameter" : "`unit`"
},
"queryContext" : [ {
"objectType" : "",
"objectName" : "",
"startIndex" : 8,
"stopIndex" : 89,
"fragment" : "timediff('MINUTE', timestamp'2023-02-14 01:02:03', timestamp'2023-02-14 02:00:03')"
} ]
}


-- !query
select timediff('YEAR', date'2020-02-15', date'2023-02-15')
-- !query analysis
org.apache.spark.sql.catalyst.parser.ParseException
{
"errorClass" : "INVALID_PARAMETER_VALUE.DATETIME_UNIT",
"sqlState" : "22023",
"messageParameters" : {
"functionName" : "`timediff`",
"invalidValue" : "'YEAR'",
"parameter" : "`unit`"
},
"queryContext" : [ {
"objectType" : "",
"objectName" : "",
"startIndex" : 8,
"stopIndex" : 59,
"fragment" : "timediff('YEAR', date'2020-02-15', date'2023-02-15')"
} ]
}
Original file line number Diff line number Diff line change
Expand Up @@ -894,3 +894,73 @@ org.apache.spark.sql.catalyst.parser.ParseException
"fragment" : "timestampdiff('YEAR', date'2022-02-15', date'2023-02-15')"
} ]
}


-- !query
select timediff(QUARTER, timestamp'2023-08-10 01:02:03', timestamp'2022-01-14 01:02:03')
-- !query analysis
Project [timestampdiff(QUARTER, cast(2023-08-10 01:02:03 as timestamp), cast(2022-01-14 01:02:03 as timestamp), Some(America/Los_Angeles)) AS timestampdiff(QUARTER, TIMESTAMP_NTZ '2023-08-10 01:02:03', TIMESTAMP_NTZ '2022-01-14 01:02:03')#xL]
+- OneRowRelation


-- !query
select timediff(HOUR, timestamp'2022-02-14 01:02:03', timestamp'2022-02-14 12:00:03')
-- !query analysis
Project [timestampdiff(HOUR, cast(2022-02-14 01:02:03 as timestamp), cast(2022-02-14 12:00:03 as timestamp), Some(America/Los_Angeles)) AS timestampdiff(HOUR, TIMESTAMP_NTZ '2022-02-14 01:02:03', TIMESTAMP_NTZ '2022-02-14 12:00:03')#xL]
+- OneRowRelation


-- !query
select timediff(DAY, date'2022-02-15', date'2023-02-15')
-- !query analysis
[Analyzer test output redacted due to nondeterminism]


-- !query
select timediff(SECOND, date'2022-02-15', timestamp'2022-02-14 23:59:59')
-- !query analysis
[Analyzer test output redacted due to nondeterminism]


-- !query
select timediff('MINUTE', timestamp'2023-02-14 01:02:03', timestamp'2023-02-14 02:00:03')
-- !query analysis
org.apache.spark.sql.catalyst.parser.ParseException
{
"errorClass" : "INVALID_PARAMETER_VALUE.DATETIME_UNIT",
"sqlState" : "22023",
"messageParameters" : {
"functionName" : "`timediff`",
"invalidValue" : "'MINUTE'",
"parameter" : "`unit`"
},
"queryContext" : [ {
"objectType" : "",
"objectName" : "",
"startIndex" : 8,
"stopIndex" : 89,
"fragment" : "timediff('MINUTE', timestamp'2023-02-14 01:02:03', timestamp'2023-02-14 02:00:03')"
} ]
}


-- !query
select timediff('YEAR', date'2020-02-15', date'2023-02-15')
-- !query analysis
org.apache.spark.sql.catalyst.parser.ParseException
{
"errorClass" : "INVALID_PARAMETER_VALUE.DATETIME_UNIT",
"sqlState" : "22023",
"messageParameters" : {
"functionName" : "`timediff`",
"invalidValue" : "'YEAR'",
"parameter" : "`unit`"
},
"queryContext" : [ {
"objectType" : "",
"objectName" : "",
"startIndex" : 8,
"stopIndex" : 59,
"fragment" : "timediff('YEAR', date'2020-02-15', date'2023-02-15')"
} ]
}
Loading

0 comments on commit b9fc5c0

Please sign in to comment.