Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

support EXTRACT on intervals and durations #12514

Merged
merged 1 commit into from
Sep 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 41 additions & 6 deletions datafusion/functions/src/datetime/date_part.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,10 @@ use arrow::array::{Array, ArrayRef, Float64Array};
use arrow::compute::kernels::cast_utils::IntervalUnit;
use arrow::compute::{binary, cast, date_part, DatePart};
use arrow::datatypes::DataType::{
Date32, Date64, Float64, Time32, Time64, Timestamp, Utf8, Utf8View,
Date32, Date64, Duration, Float64, Interval, Time32, Time64, Timestamp, Utf8,
Utf8View,
};
use arrow::datatypes::IntervalUnit::{DayTime, MonthDayNano, YearMonth};
use arrow::datatypes::TimeUnit::{Microsecond, Millisecond, Nanosecond, Second};
use arrow::datatypes::{DataType, TimeUnit};

Expand Down Expand Up @@ -109,6 +111,20 @@ impl DatePartFunc {
Exact(vec![Utf8View, Time64(Microsecond)]),
Exact(vec![Utf8, Time64(Nanosecond)]),
Exact(vec![Utf8View, Time64(Nanosecond)]),
Exact(vec![Utf8, Interval(YearMonth)]),
Exact(vec![Utf8View, Interval(YearMonth)]),
Exact(vec![Utf8, Interval(DayTime)]),
Exact(vec![Utf8View, Interval(DayTime)]),
Exact(vec![Utf8, Interval(MonthDayNano)]),
Exact(vec![Utf8View, Interval(MonthDayNano)]),
Exact(vec![Utf8, Duration(Second)]),
Exact(vec![Utf8View, Duration(Second)]),
Exact(vec![Utf8, Duration(Millisecond)]),
Exact(vec![Utf8View, Duration(Millisecond)]),
Exact(vec![Utf8, Duration(Microsecond)]),
Exact(vec![Utf8View, Duration(Microsecond)]),
Exact(vec![Utf8, Duration(Nanosecond)]),
Exact(vec![Utf8View, Duration(Nanosecond)]),
],
Volatility::Immutable,
),
Expand Down Expand Up @@ -224,10 +240,28 @@ fn seconds(array: &dyn Array, unit: TimeUnit) -> Result<ArrayRef> {
let subsecs = date_part(array, DatePart::Nanosecond)?;
let subsecs = as_int32_array(subsecs.as_ref())?;

let r: Float64Array = binary(secs, subsecs, |secs, subsecs| {
(secs as f64 + (subsecs as f64 / 1_000_000_000_f64)) * sf
})?;
Ok(Arc::new(r))
// Special case where there are no nulls.
if subsecs.null_count() == 0 {
let r: Float64Array = binary(secs, subsecs, |secs, subsecs| {
(secs as f64 + ((subsecs % 1_000_000_000) as f64 / 1_000_000_000_f64)) * sf
})?;
Ok(Arc::new(r))
} else {
// Nulls in secs are preserved, nulls in subsecs are treated as zero to account for the case
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍

// where the number of nanoseconds overflows.
let r: Float64Array = secs
.iter()
.zip(subsecs)
.map(|(secs, subsecs)| {
secs.map(|secs| {
let subsecs = subsecs.unwrap_or(0);
(secs as f64 + ((subsecs % 1_000_000_000) as f64 / 1_000_000_000_f64))
* sf
})
})
.collect();
Ok(Arc::new(r))
}
}

fn epoch(array: &dyn Array) -> Result<ArrayRef> {
Expand Down Expand Up @@ -256,7 +290,8 @@ fn epoch(array: &dyn Array) -> Result<ArrayRef> {
Time64(Nanosecond) => {
as_time64_nanosecond_array(array)?.unary(|x| x as f64 / 1_000_000_000_f64)
}
d => return exec_err!("Can not convert {d:?} to epoch"),
Interval(_) | Duration(_) => return seconds(array, Second),
d => return exec_err!("Cannot convert {d:?} to epoch"),
};
Ok(Arc::new(f))
}
183 changes: 183 additions & 0 deletions datafusion/sqllogictest/test_files/expr.slt
Original file line number Diff line number Diff line change
Expand Up @@ -1472,6 +1472,189 @@ SELECT extract(epoch from arrow_cast('1969-12-31', 'Date64'))
----
-86400

# test_extract_interval
nrc marked this conversation as resolved.
Show resolved Hide resolved

query R
SELECT extract(year from arrow_cast('10 years', 'Interval(YearMonth)'))
----
10

query R
SELECT extract(month from arrow_cast('10 years', 'Interval(YearMonth)'))
----
0

query R
SELECT extract(year from arrow_cast('10 months', 'Interval(YearMonth)'))
----
0

query R
SELECT extract(month from arrow_cast('10 months', 'Interval(YearMonth)'))
----
10

query R
SELECT extract(year from arrow_cast('20 months', 'Interval(YearMonth)'))
----
1

query R
SELECT extract(month from arrow_cast('20 months', 'Interval(YearMonth)'))
----
8

query error DataFusion error: Arrow error: Compute error: Year does not support: Interval\(DayTime\)
SELECT extract(year from arrow_cast('10 days', 'Interval(DayTime)'))

query error DataFusion error: Arrow error: Compute error: Month does not support: Interval\(DayTime\)
SELECT extract(month from arrow_cast('10 days', 'Interval(DayTime)'))

query R
SELECT extract(day from arrow_cast('10 days', 'Interval(DayTime)'))
----
10

query R
SELECT extract(day from arrow_cast('14400 minutes', 'Interval(DayTime)'))
----
0

query R
SELECT extract(minute from arrow_cast('14400 minutes', 'Interval(DayTime)'))
----
14400

query R
SELECT extract(second from arrow_cast('5.1 seconds', 'Interval(DayTime)'))
----
5

query R
SELECT extract(second from arrow_cast('14400 minutes', 'Interval(DayTime)'))
----
864000

query R
SELECT extract(second from arrow_cast('2 months', 'Interval(MonthDayNano)'))
----
0

query R
SELECT extract(second from arrow_cast('2 days', 'Interval(MonthDayNano)'))
----
0

query R
SELECT extract(second from arrow_cast('2 seconds', 'Interval(MonthDayNano)'))
----
2

query R
SELECT extract(seconds from arrow_cast('2 seconds', 'Interval(MonthDayNano)'))
----
2

query R
SELECT extract(epoch from arrow_cast('2 seconds', 'Interval(MonthDayNano)'))
----
2

query R
SELECT extract(milliseconds from arrow_cast('2 seconds', 'Interval(MonthDayNano)'))
----
2000

query R
SELECT extract(second from arrow_cast('2030 milliseconds', 'Interval(MonthDayNano)'))
----
2.03

query R
SELECT extract(second from arrow_cast(NULL, 'Interval(MonthDayNano)'))
----
NULL

statement ok
create table t (id int, i interval) as values
(0, interval '5 months 1 day 10 nanoseconds'),
(1, interval '1 year 3 months'),
(2, interval '3 days 2 milliseconds'),
(3, interval '2 seconds'),
(4, interval '8 months'),
(5, NULL);

query IRR rowsort
select
id,
extract(second from i),
extract(month from i)
from t
order by id;
----
0 0.00000001 5
1 0 15
2 0.002 0
3 2 0
4 0 8
5 NULL NULL

statement ok
drop table t;

# test_extract_duration

query R
SELECT extract(second from arrow_cast(2, 'Duration(Second)'))
nrc marked this conversation as resolved.
Show resolved Hide resolved
----
2

query R
SELECT extract(seconds from arrow_cast(2, 'Duration(Second)'))
----
2

query R
SELECT extract(epoch from arrow_cast(2, 'Duration(Second)'))
----
2

query R
SELECT extract(millisecond from arrow_cast(2, 'Duration(Second)'))
----
2000

query R
SELECT extract(second from arrow_cast(2, 'Duration(Millisecond)'))
----
0.002

query R
SELECT extract(second from arrow_cast(2002, 'Duration(Millisecond)'))
----
2.002

query R
SELECT extract(millisecond from arrow_cast(2002, 'Duration(Millisecond)'))
----
2002

query R
SELECT extract(day from arrow_cast(864000, 'Duration(Second)'))
----
10

query error DataFusion error: Arrow error: Compute error: Month does not support: Duration\(Second\)
SELECT extract(month from arrow_cast(864000, 'Duration(Second)'))

query error DataFusion error: Arrow error: Compute error: Year does not support: Duration\(Second\)
SELECT extract(year from arrow_cast(864000, 'Duration(Second)'))

query R
SELECT extract(day from arrow_cast(NULL, 'Duration(Second)'))
----
NULL

# test_extract_date_part_func

query B
Expand Down