From 768a9a096535c6504c65a029fb1cddd75072b6e8 Mon Sep 17 00:00:00 2001 From: "Robert (Bobby) Evans" Date: Thu, 14 Nov 2024 11:53:06 -0600 Subject: [PATCH 1/4] Update java datetime APIs to match CUDF. Signed-off-by: Robert (Bobby) Evans --- .../main/java/ai/rapids/cudf/ColumnView.java | 104 ++++++++--- .../ai/rapids/cudf/DateTimeComponent.java | 74 ++++++++ .../cudf/DateTimeRoundingFrequency.java | 38 ++++ java/src/main/native/src/ColumnViewJni.cpp | 131 ++++++++----- .../cudf/TimestampColumnVectorTest.java | 176 +++++++++++++++++- 5 files changed, 445 insertions(+), 78 deletions(-) create mode 100644 java/src/main/java/ai/rapids/cudf/DateTimeComponent.java create mode 100644 java/src/main/java/ai/rapids/cudf/DateTimeRoundingFrequency.java diff --git a/java/src/main/java/ai/rapids/cudf/ColumnView.java b/java/src/main/java/ai/rapids/cudf/ColumnView.java index 6bd4e06c47e..410b4d17ec3 100644 --- a/java/src/main/java/ai/rapids/cudf/ColumnView.java +++ b/java/src/main/java/ai/rapids/cudf/ColumnView.java @@ -917,6 +917,16 @@ public final ColumnVector mergeAndSetValidity(BinaryOp mergeOp, ColumnView... co // DATE/TIME ///////////////////////////////////////////////////////////////////////////// + /** + * Extract a particular date time component from a timestamp. + * @param component what should be extracted + * @return a column with the extracted information in it. + */ + public final ColumnVector extractDateTimeComponent(DateTimeComponent component) { + assert type.isTimestampType(); + return new ColumnVector(extractDateTimeComponent(getNativeView(), component.getNativeId())); + } + /** * Get year from a timestamp. *

@@ -925,8 +935,7 @@ public final ColumnVector mergeAndSetValidity(BinaryOp mergeOp, ColumnView... co * @return - A new INT16 vector allocated on the GPU. */ public final ColumnVector year() { - assert type.isTimestampType(); - return new ColumnVector(year(getNativeView())); + return extractDateTimeComponent(DateTimeComponent.YEAR); } /** @@ -937,8 +946,7 @@ public final ColumnVector year() { * @return - A new INT16 vector allocated on the GPU. */ public final ColumnVector month() { - assert type.isTimestampType(); - return new ColumnVector(month(getNativeView())); + return extractDateTimeComponent(DateTimeComponent.MONTH); } /** @@ -949,8 +957,7 @@ public final ColumnVector month() { * @return - A new INT16 vector allocated on the GPU. */ public final ColumnVector day() { - assert type.isTimestampType(); - return new ColumnVector(day(getNativeView())); + return extractDateTimeComponent(DateTimeComponent.DAY); } /** @@ -961,8 +968,7 @@ public final ColumnVector day() { * @return - A new INT16 vector allocated on the GPU. */ public final ColumnVector hour() { - assert type.hasTimeResolution(); - return new ColumnVector(hour(getNativeView())); + return extractDateTimeComponent(DateTimeComponent.HOUR); } /** @@ -973,8 +979,7 @@ public final ColumnVector hour() { * @return - A new INT16 vector allocated on the GPU. */ public final ColumnVector minute() { - assert type.hasTimeResolution(); - return new ColumnVector(minute(getNativeView())); + return extractDateTimeComponent(DateTimeComponent.MINUTE); } /** @@ -985,8 +990,7 @@ public final ColumnVector minute() { * @return A new INT16 vector allocated on the GPU. */ public final ColumnVector second() { - assert type.hasTimeResolution(); - return new ColumnVector(second(getNativeView())); + return extractDateTimeComponent(DateTimeComponent.SECOND); } /** @@ -997,8 +1001,7 @@ public final ColumnVector second() { * @return A new INT16 vector allocated on the GPU. Monday=1, ..., Sunday=7 */ public final ColumnVector weekDay() { - assert type.isTimestampType(); - return new ColumnVector(weekDay(getNativeView())); + return extractDateTimeComponent(DateTimeComponent.WEEKDAY); } /** @@ -1045,6 +1048,16 @@ public final ColumnVector addCalendricalMonths(ColumnView months) { return new ColumnVector(addCalendricalMonths(getNativeView(), months.getNativeView())); } + /** + * Add the specified number of months to the timestamp. + * @param months must be a INT16 column indicating the number of months to add. A negative number + * of months works too. + * @return the updated timestamp + */ + public final ColumnVector addCalendricalMonths(Scalar months) { + return new ColumnVector(addScalarCalendricalMonths(getNativeView(), months.getScalarHandle())); + } + /** * Check to see if the year for this timestamp is a leap year or not. * @return BOOL8 vector of results @@ -1053,6 +1066,45 @@ public final ColumnVector isLeapYear() { return new ColumnVector(isLeapYear(getNativeView())); } + /** + * Extract the number of days in the month + * @return INT16 column of the number of days in the correspond month + */ + public final ColumnVector daysInMonth() { + assert type.isTimestampType(); + return new ColumnVector(daysInMonth(getNativeView())); + } + + /** + * Round the timestamp up to the given frequency keeping the type the same. + * @param freq what part of the timestamp to round. + * @return a timestamp with the same type, but rounded up. + */ + public final ColumnVector dateTimeCeil(DateTimeRoundingFrequency freq) { + assert type.isTimestampType(); + return new ColumnVector(dateTimeCeil(getNativeView(), freq.getNativeId())); + } + + /** + * Round the timestamp down to the given frequency keeping the type the same. + * @param freq what part of the timestamp to round. + * @return a timestamp with the same type, but rounded down. + */ + public final ColumnVector dateTimeFloor(DateTimeRoundingFrequency freq) { + assert type.isTimestampType(); + return new ColumnVector(dateTimeFloor(getNativeView(), freq.getNativeId())); + } + + /** + * Round the timestamp (half up) to the given frequency keeping the type the same. + * @param freq what part of the timestamp to round. + * @return a timestamp with the same type, but rounded (half up). + */ + public final ColumnVector dateTimeRound(DateTimeRoundingFrequency freq) { + assert type.isTimestampType(); + return new ColumnVector(dateTimeRound(getNativeView(), freq.getNativeId())); + } + /** * Rounds all the values in a column to the specified number of decimal places. * @@ -4684,19 +4736,7 @@ private static native long segmentedGather(long sourceColumnHandle, long gatherM private static native long unaryOperation(long viewHandle, int op); - private static native long year(long viewHandle) throws CudfException; - - private static native long month(long viewHandle) throws CudfException; - - private static native long day(long viewHandle) throws CudfException; - - private static native long hour(long viewHandle) throws CudfException; - - private static native long minute(long viewHandle) throws CudfException; - - private static native long second(long viewHandle) throws CudfException; - - private static native long weekDay(long viewHandle) throws CudfException; + private static native long extractDateTimeComponent(long viewHandle, int component); private static native long lastDayOfMonth(long viewHandle) throws CudfException; @@ -4706,8 +4746,18 @@ private static native long segmentedGather(long sourceColumnHandle, long gatherM private static native long addCalendricalMonths(long tsViewHandle, long monthsViewHandle); + private static native long addScalarCalendricalMonths(long tsViewHandle, long scalarHandle); + private static native long isLeapYear(long viewHandle) throws CudfException; + private static native long daysInMonth(long viewHandle) throws CudfException; + + private static native long dateTimeCeil(long viewHandle, int freq); + + private static native long dateTimeFloor(long viewHandle, int freq); + + private static native long dateTimeRound(long viewHandle, int freq); + private static native boolean containsScalar(long columnViewHaystack, long scalarHandle) throws CudfException; private static native long containsVector(long valuesHandle, long searchSpaceHandle) throws CudfException; diff --git a/java/src/main/java/ai/rapids/cudf/DateTimeComponent.java b/java/src/main/java/ai/rapids/cudf/DateTimeComponent.java new file mode 100644 index 00000000000..0f1618e29fb --- /dev/null +++ b/java/src/main/java/ai/rapids/cudf/DateTimeComponent.java @@ -0,0 +1,74 @@ +/* + * + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package ai.rapids.cudf; + +/** + * Types of datetime components that may be extracted. + */ +public enum DateTimeComponent { + /** + * year as an INT16 + */ + YEAR(0), + /** + * month 1 - jan, as an INT16 + */ + MONTH(1), + /** + * Day of the month as an INT16 + */ + DAY(2), + /** + * day of the week, Monday=1, ..., Sunday=7 as an INT16 + */ + WEEKDAY(3), + /** + * hour of the day 24-hour clock as an INT16 + */ + HOUR(4), + /** + * minutes past the hour as an INT16 + */ + MINUTE(5), + /** + * seconds past the minute as an INT16 + */ + SECOND(6), + /** + * milliseconds past the seconds as an INT16 + */ + MILLISECOND(7), + /** + * microseconds past the millisecond as an INT16 + */ + MICROSECOND(8), + /** + * nanoseconds past the microsecond as an INT16 + */ + NANOSECOND(9); + + final int id; + DateTimeComponent(int id) { + this.id = id; + } + + public int getNativeId() { + return id; + } +} diff --git a/java/src/main/java/ai/rapids/cudf/DateTimeRoundingFrequency.java b/java/src/main/java/ai/rapids/cudf/DateTimeRoundingFrequency.java new file mode 100644 index 00000000000..44a7a2f279d --- /dev/null +++ b/java/src/main/java/ai/rapids/cudf/DateTimeRoundingFrequency.java @@ -0,0 +1,38 @@ +/* + * + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package ai.rapids.cudf; + +public enum DateTimeRoundingFrequency { + DAY(0), + HOUR(1), + MINUTE(2), + SECOND(3), + MILLISECOND(4), + MICROSECOND(5), + NANOSECOND(6); + + final int id; + DateTimeRoundingFrequency(int id) { + this.id = id; + } + + public int getNativeId() { + return id; + } +} diff --git a/java/src/main/native/src/ColumnViewJni.cpp b/java/src/main/native/src/ColumnViewJni.cpp index 72f0ad19912..74aeafb01b7 100644 --- a/java/src/main/native/src/ColumnViewJni.cpp +++ b/java/src/main/native/src/ColumnViewJni.cpp @@ -127,6 +127,19 @@ std::size_t calc_device_memory_size(cudf::column_view const& view, bool const pa }); } +cudf::datetime::rounding_frequency as_rounding_freq(jint freq) { + switch (freq) { + case 0: return cudf::datetime::rounding_frequency::DAY; + case 1: return cudf::datetime::rounding_frequency::HOUR; + case 2: return cudf::datetime::rounding_frequency::MINUTE; + case 3: return cudf::datetime::rounding_frequency::SECOND; + case 4: return cudf::datetime::rounding_frequency::MILLISECOND; + case 5: return cudf::datetime::rounding_frequency::MICROSECOND; + case 6: return cudf::datetime::rounding_frequency::NANOSECOND; + default: throw std::invalid_argument("Invalid rounding_frequency"); + } +} + } // anonymous namespace extern "C" { @@ -1099,147 +1112,165 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_round( CATCH_STD(env, 0); } -JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_year(JNIEnv* env, jclass, jlong input_ptr) +JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_extractDateTimeComponent( + JNIEnv* env, jclass, jlong input_ptr, jint component) { JNI_NULL_CHECK(env, input_ptr, "input is null", 0); try { cudf::jni::auto_set_device(env); cudf::column_view const* input = reinterpret_cast(input_ptr); - return release_as_jlong(cudf::datetime::extract_year(*input)); + cudf::datetime::datetime_component comp; + switch (component) { + case 0: comp = cudf::datetime::datetime_component::YEAR; break; + case 1: comp = cudf::datetime::datetime_component::MONTH; break; + case 2: comp = cudf::datetime::datetime_component::DAY; break; + case 3: comp = cudf::datetime::datetime_component::WEEKDAY; break; + case 4: comp = cudf::datetime::datetime_component::HOUR; break; + case 5: comp = cudf::datetime::datetime_component::MINUTE; break; + case 6: comp = cudf::datetime::datetime_component::SECOND; break; + case 7: comp = cudf::datetime::datetime_component::MILLISECOND; break; + case 8: comp = cudf::datetime::datetime_component::MICROSECOND; break; + case 9: comp = cudf::datetime::datetime_component::NANOSECOND; break; + default: + JNI_THROW_NEW(env, "java/lang/IllegalArgumentException", "Invalid component", 0); + } + return release_as_jlong(cudf::datetime::extract_datetime_component(*input, comp)); } CATCH_STD(env, 0); } -JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_month(JNIEnv* env, jclass, jlong input_ptr) +JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_lastDayOfMonth(JNIEnv* env, + jclass, + jlong input_ptr) { JNI_NULL_CHECK(env, input_ptr, "input is null", 0); try { cudf::jni::auto_set_device(env); cudf::column_view const* input = reinterpret_cast(input_ptr); - return release_as_jlong(cudf::datetime::extract_month(*input)); + return release_as_jlong(cudf::datetime::last_day_of_month(*input)); } CATCH_STD(env, 0); } -JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_day(JNIEnv* env, jclass, jlong input_ptr) +JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_dayOfYear(JNIEnv* env, + jclass, + jlong input_ptr) { JNI_NULL_CHECK(env, input_ptr, "input is null", 0); try { cudf::jni::auto_set_device(env); cudf::column_view const* input = reinterpret_cast(input_ptr); - return release_as_jlong(cudf::datetime::extract_day(*input)); + return release_as_jlong(cudf::datetime::day_of_year(*input)); } CATCH_STD(env, 0); } -JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_hour(JNIEnv* env, jclass, jlong input_ptr) +JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_quarterOfYear(JNIEnv* env, + jclass, + jlong input_ptr) { JNI_NULL_CHECK(env, input_ptr, "input is null", 0); try { cudf::jni::auto_set_device(env); cudf::column_view const* input = reinterpret_cast(input_ptr); - return release_as_jlong(cudf::datetime::extract_hour(*input)); + return release_as_jlong(cudf::datetime::extract_quarter(*input)); } CATCH_STD(env, 0); } -JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_minute(JNIEnv* env, jclass, jlong input_ptr) +JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_addCalendricalMonths(JNIEnv* env, + jclass, + jlong ts_ptr, + jlong months_ptr) { - JNI_NULL_CHECK(env, input_ptr, "input is null", 0); + JNI_NULL_CHECK(env, ts_ptr, "ts is null", 0); + JNI_NULL_CHECK(env, months_ptr, "months is null", 0); try { cudf::jni::auto_set_device(env); - cudf::column_view const* input = reinterpret_cast(input_ptr); - return release_as_jlong(cudf::datetime::extract_minute(*input)); + cudf::column_view const* ts = reinterpret_cast(ts_ptr); + cudf::column_view const* months = reinterpret_cast(months_ptr); + return release_as_jlong(cudf::datetime::add_calendrical_months(*ts, *months)); } CATCH_STD(env, 0); } -JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_second(JNIEnv* env, jclass, jlong input_ptr) +JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_addScalarCalendricalMonths(JNIEnv* env, + jclass, + jlong ts_ptr, + jlong months_ptr) { - JNI_NULL_CHECK(env, input_ptr, "input is null", 0); + JNI_NULL_CHECK(env, ts_ptr, "ts is null", 0); + JNI_NULL_CHECK(env, months_ptr, "months is null", 0); try { cudf::jni::auto_set_device(env); - cudf::column_view const* input = reinterpret_cast(input_ptr); - return release_as_jlong(cudf::datetime::extract_second(*input)); + cudf::column_view const* ts = reinterpret_cast(ts_ptr); + cudf::scalar const* months = reinterpret_cast(months_ptr); + return release_as_jlong(cudf::datetime::add_calendrical_months(*ts, *months)); } CATCH_STD(env, 0); } -JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_weekDay(JNIEnv* env, jclass, jlong input_ptr) +JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_isLeapYear(JNIEnv* env, + jclass, + jlong input_ptr) { JNI_NULL_CHECK(env, input_ptr, "input is null", 0); try { cudf::jni::auto_set_device(env); cudf::column_view const* input = reinterpret_cast(input_ptr); - return release_as_jlong(cudf::datetime::extract_weekday(*input)); + return release_as_jlong(cudf::datetime::is_leap_year(*input)); } CATCH_STD(env, 0); } -JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_lastDayOfMonth(JNIEnv* env, - jclass, - jlong input_ptr) +JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_daysInMonth(JNIEnv* env, + jclass, + jlong input_ptr) { JNI_NULL_CHECK(env, input_ptr, "input is null", 0); try { cudf::jni::auto_set_device(env); cudf::column_view const* input = reinterpret_cast(input_ptr); - return release_as_jlong(cudf::datetime::last_day_of_month(*input)); + return release_as_jlong(cudf::datetime::days_in_month(*input)); } CATCH_STD(env, 0); } -JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_dayOfYear(JNIEnv* env, - jclass, - jlong input_ptr) +JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_dateTimeCeil( + JNIEnv* env, jclass, jlong input_ptr, jint freq) { JNI_NULL_CHECK(env, input_ptr, "input is null", 0); try { cudf::jni::auto_set_device(env); cudf::column_view const* input = reinterpret_cast(input_ptr); - return release_as_jlong(cudf::datetime::day_of_year(*input)); + cudf::datetime::rounding_frequency n_freq = as_rounding_freq(freq); + return release_as_jlong(cudf::datetime::ceil_datetimes(*input, n_freq)); } CATCH_STD(env, 0); } -JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_quarterOfYear(JNIEnv* env, - jclass, - jlong input_ptr) +JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_dateTimeFloor( + JNIEnv* env, jclass, jlong input_ptr, jint freq) { JNI_NULL_CHECK(env, input_ptr, "input is null", 0); try { cudf::jni::auto_set_device(env); cudf::column_view const* input = reinterpret_cast(input_ptr); - return release_as_jlong(cudf::datetime::extract_quarter(*input)); + cudf::datetime::rounding_frequency n_freq = as_rounding_freq(freq); + return release_as_jlong(cudf::datetime::floor_datetimes(*input, n_freq)); } CATCH_STD(env, 0); } -JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_addCalendricalMonths(JNIEnv* env, - jclass, - jlong ts_ptr, - jlong months_ptr) -{ - JNI_NULL_CHECK(env, ts_ptr, "ts is null", 0); - JNI_NULL_CHECK(env, months_ptr, "months is null", 0); - try { - cudf::jni::auto_set_device(env); - cudf::column_view const* ts = reinterpret_cast(ts_ptr); - cudf::column_view const* months = reinterpret_cast(months_ptr); - return release_as_jlong(cudf::datetime::add_calendrical_months(*ts, *months)); - } - CATCH_STD(env, 0); -} - -JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_isLeapYear(JNIEnv* env, - jclass, - jlong input_ptr) +JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_dateTimeRound( + JNIEnv* env, jclass, jlong input_ptr, jint freq) { JNI_NULL_CHECK(env, input_ptr, "input is null", 0); try { cudf::jni::auto_set_device(env); cudf::column_view const* input = reinterpret_cast(input_ptr); - return release_as_jlong(cudf::datetime::is_leap_year(*input)); + cudf::datetime::rounding_frequency n_freq = as_rounding_freq(freq); + return release_as_jlong(cudf::datetime::round_datetimes(*input, n_freq)); } CATCH_STD(env, 0); } diff --git a/java/src/test/java/ai/rapids/cudf/TimestampColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/TimestampColumnVectorTest.java index c22acac747e..84631abd281 100644 --- a/java/src/test/java/ai/rapids/cudf/TimestampColumnVectorTest.java +++ b/java/src/test/java/ai/rapids/cudf/TimestampColumnVectorTest.java @@ -1,6 +1,6 @@ /* * - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -237,6 +237,69 @@ public void getSecond() { } } + @Test + public void getExtractMillis() { + try (ColumnVector timestampColumnVector = ColumnVector.timestampMilliSecondsFromLongs(TIMES_MS)) { + assert timestampColumnVector.getType().equals(DType.TIMESTAMP_MILLISECONDS); + try (ColumnVector tmp = timestampColumnVector.extractDateTimeComponent(DateTimeComponent.MILLISECOND); + HostColumnVector result = tmp.copyToHost()) { + assertEquals(238, result.getShort(0)); + assertEquals(115, result.getShort(1)); + assertEquals(929, result.getShort(2)); + } + } + + try (ColumnVector timestampColumnVector = ColumnVector.timestampSecondsFromLongs(TIMES_S); + ColumnVector tmp = timestampColumnVector.extractDateTimeComponent(DateTimeComponent.MILLISECOND); + HostColumnVector result = tmp.copyToHost()) { + assertEquals(0, result.getShort(0)); + assertEquals(0, result.getShort(1)); + assertEquals(0, result.getShort(2)); + } + } + + @Test + public void getExtractMicro() { + try (ColumnVector timestampColumnVector = ColumnVector.timestampMicroSecondsFromLongs(TIMES_US)) { + assert timestampColumnVector.getType().equals(DType.TIMESTAMP_MICROSECONDS); + try (ColumnVector tmp = timestampColumnVector.extractDateTimeComponent(DateTimeComponent.MICROSECOND); + HostColumnVector result = tmp.copyToHost()) { + assertEquals(297, result.getShort(0)); + assertEquals(254, result.getShort(1)); + assertEquals(861, result.getShort(2)); + } + } + + try (ColumnVector timestampColumnVector = ColumnVector.timestampSecondsFromLongs(TIMES_S); + ColumnVector tmp = timestampColumnVector.extractDateTimeComponent(DateTimeComponent.MICROSECOND); + HostColumnVector result = tmp.copyToHost()) { + assertEquals(0, result.getShort(0)); + assertEquals(0, result.getShort(1)); + assertEquals(0, result.getShort(2)); + } + } + + @Test + public void getExtractNano() { + try (ColumnVector timestampColumnVector = ColumnVector.timestampNanoSecondsFromLongs(TIMES_NS)) { + assert timestampColumnVector.getType().equals(DType.TIMESTAMP_NANOSECONDS); + try (ColumnVector tmp = timestampColumnVector.extractDateTimeComponent(DateTimeComponent.NANOSECOND); + HostColumnVector result = tmp.copyToHost()) { + assertEquals(531, result.getShort(0)); + assertEquals(330, result.getShort(1)); + assertEquals(604, result.getShort(2)); + } + } + + try (ColumnVector timestampColumnVector = ColumnVector.timestampSecondsFromLongs(TIMES_S); + ColumnVector tmp = timestampColumnVector.extractDateTimeComponent(DateTimeComponent.NANOSECOND); + HostColumnVector result = tmp.copyToHost()) { + assertEquals(0, result.getShort(0)); + assertEquals(0, result.getShort(1)); + assertEquals(0, result.getShort(2)); + } + } + @Test public void testWeekDay() { try (ColumnVector timestampColumnVector = ColumnVector.timestampMilliSecondsFromLongs(TIMES_MS); @@ -350,6 +413,37 @@ public void testAddMonths() { } } + @Test + public void testAddMonthsScalar() { + long[] EXPECTED = new long[]{ + -129290327762L, //'1965-11-26 14:01:12.238' Friday + 1533384000115L, //'2018-08-04 12:00:00.115' Saturday + 1677310332929L, //'2023-03-25 07:32:12.929' Saturday + -129290327762L, //'1965-12-26 14:01:12.238' Sunday + 1533384000115L}; //'2018-09-04 12:00:00.115' Tuesday + try (ColumnVector timestampColumnVector = ColumnVector.timestampMilliSecondsFromLongs(TIMES_MS); + Scalar months = Scalar.fromShort((short)1); + ColumnVector result = timestampColumnVector.addCalendricalMonths(months); + ColumnVector expected = ColumnVector.timestampMilliSecondsFromLongs(EXPECTED)) { + assertColumnsAreEqual(expected, result); + } + } + + @Test + public void testDaysInMonth() { + short[] EXPECTED = new short[]{ + 31, //'1965-10-26 14:01:12.238' Tuesday + 31, //'2018-07-04 12:00:00.115' Wednesday + 31, //'2023-01-25 07:32:12.929' Wednesday + 31, //'1965-10-26 14:01:12.238' Tuesday + 31}; //'2018-07-04 12:00:00.115' Wednesday + try (ColumnVector timestampColumnVector = ColumnVector.timestampMilliSecondsFromLongs(TIMES_MS); + ColumnVector result = timestampColumnVector.daysInMonth(); + ColumnVector expected = ColumnVector.fromShorts(EXPECTED)) { + assertColumnsAreEqual(expected, result); + } + } + @Test public void testIsLeapYear() { Boolean[] EXPECTED = new Boolean[]{false, false, false, false, false}; @@ -383,6 +477,86 @@ public void testIsLeapYear() { } } + @Test + public void testCeilDays() { + long[] EXPECTED_NS = new long[]{ + -131932800000000000L, //'1965-10-27 00:00:00.000000000' + 1530748800000000000L, //'2018-07-05 00:00:00.000000000' + 1674691200000000000L, //'2023-01-26 00:00:00.000000000' + -131932800000000000L, //'1965-10-27 00:00:00.000000000' + 1530748800000000000L}; //'2018-07-05 00:00:00.000000000' + try (ColumnVector timestampColumnVector = ColumnVector.timestampNanoSecondsFromLongs(TIMES_NS); + ColumnVector result = timestampColumnVector.dateTimeCeil(DateTimeRoundingFrequency.DAY); + ColumnVector expected = ColumnVector.timestampNanoSecondsFromLongs(EXPECTED_NS)) { + assertColumnsAreEqual(expected, result); + } + long[] EXPECTED_US = new long[]{ + -131932800000000L, //'1965-10-27 00:00:00.000000' + 1530748800000000L, //'2018-07-05 00:00:00.000000' + 1674691200000000L, //'2023-01-26 00:00:00.000000' + -131932800000000L, //'1965-10-27 00:00:00.000000' + 1530748800000000L}; //'2018-07-05 00:00:00.000000' + try (ColumnVector timestampColumnVector = ColumnVector.timestampMicroSecondsFromLongs(TIMES_US); + ColumnVector result = timestampColumnVector.dateTimeCeil(DateTimeRoundingFrequency.DAY); + ColumnVector expected = ColumnVector.timestampMicroSecondsFromLongs(EXPECTED_US)) { + assertColumnsAreEqual(expected, result); + } + } + + @Test + public void testFloorDays() { + long[] EXPECTED_NS = new long[]{ + -132019200000000000L, //'1965-10-26 00:00:00.000000000' + 1530662400000000000L, //'2018-07-04 00:00:00.000000000' + 1674604800000000000L, //'2023-01-25 00:00:00.000000000' + -132019200000000000L, //'1965-10-26 00:00:00.000000000' + 1530662400000000000L}; //'2018-07-04 00:00:00.000000000' + try (ColumnVector timestampColumnVector = ColumnVector.timestampNanoSecondsFromLongs(TIMES_NS); + ColumnVector result = timestampColumnVector.dateTimeFloor(DateTimeRoundingFrequency.DAY); + ColumnVector expected = ColumnVector.timestampNanoSecondsFromLongs(EXPECTED_NS)) { + assertColumnsAreEqual(expected, result); + } + + long[] EXPECTED_US = new long[]{ + -132019200000000L, //'1965-10-26 00:00:00.000000' + 1530662400000000L, //'2018-07-04 00:00:00.000000' + 1674604800000000L, //'2023-01-25 00:00:00.000000' + -132019200000000L, //'1965-10-26 00:00:00.000000' + 1530662400000000L}; //'2018-07-04 00:00:00.000000' + try (ColumnVector timestampColumnVector = ColumnVector.timestampMicroSecondsFromLongs(TIMES_US); + ColumnVector result = timestampColumnVector.dateTimeFloor(DateTimeRoundingFrequency.DAY); + ColumnVector expected = ColumnVector.timestampMicroSecondsFromLongs(EXPECTED_US)) { + assertColumnsAreEqual(expected, result); + } + } + + @Test + public void testRoundDays() { + long[] EXPECTED_NS = new long[]{ + -131932800000000000L, //'1965-10-27 00:00:00.000000000' + 1530748800000000000L, //'2018-07-05 00:00:00.000000000' + 1674604800000000000L, //'2023-01-25 00:00:00.000000000' + -131932800000000000L, //'1965-10-27 00:00:00.000000000' + 1530748800000000000L}; //'2018-07-05 00:00:00.000000000' + try (ColumnVector timestampColumnVector = ColumnVector.timestampNanoSecondsFromLongs(TIMES_NS); + ColumnVector result = timestampColumnVector.dateTimeRound(DateTimeRoundingFrequency.DAY); + ColumnVector expected = ColumnVector.timestampNanoSecondsFromLongs(EXPECTED_NS)) { + assertColumnsAreEqual(expected, result); + } + + long[] EXPECTED_US = new long[]{ + -131932800000000L, //'1965-10-27 00:00:00.000000' + 1530748800000000L, //'2018-07-05 00:00:00.000000' + 1674604800000000L, //'2023-01-25 00:00:00.000000' + -131932800000000L, //'1965-10-27 00:00:00.000000' + 1530748800000000L}; //'2018-07-05 00:00:00.000000' + try (ColumnVector timestampColumnVector = ColumnVector.timestampMicroSecondsFromLongs(TIMES_US); + ColumnVector result = timestampColumnVector.dateTimeRound(DateTimeRoundingFrequency.DAY); + ColumnVector expected = ColumnVector.timestampMicroSecondsFromLongs(EXPECTED_US)) { + assertColumnsAreEqual(expected, result); + } + } + @Test public void testCastToTimestamp() { try (ColumnVector timestampMillis = ColumnVector.timestampMilliSecondsFromLongs(TIMES_MS); From 6b61f1d341741b138eb0b33d08888e862b631bcc Mon Sep 17 00:00:00 2001 From: "Robert (Bobby) Evans" Date: Thu, 14 Nov 2024 15:55:01 -0600 Subject: [PATCH 2/4] Formatting --- java/src/main/native/src/ColumnViewJni.cpp | 50 +++++++++++++--------- 1 file changed, 29 insertions(+), 21 deletions(-) diff --git a/java/src/main/native/src/ColumnViewJni.cpp b/java/src/main/native/src/ColumnViewJni.cpp index 74aeafb01b7..906aad84ec3 100644 --- a/java/src/main/native/src/ColumnViewJni.cpp +++ b/java/src/main/native/src/ColumnViewJni.cpp @@ -127,7 +127,8 @@ std::size_t calc_device_memory_size(cudf::column_view const& view, bool const pa }); } -cudf::datetime::rounding_frequency as_rounding_freq(jint freq) { +cudf::datetime::rounding_frequency as_rounding_freq(jint freq) +{ switch (freq) { case 0: return cudf::datetime::rounding_frequency::DAY; case 1: return cudf::datetime::rounding_frequency::HOUR; @@ -1112,8 +1113,10 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_round( CATCH_STD(env, 0); } -JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_extractDateTimeComponent( - JNIEnv* env, jclass, jlong input_ptr, jint component) +JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_extractDateTimeComponent(JNIEnv* env, + jclass, + jlong input_ptr, + jint component) { JNI_NULL_CHECK(env, input_ptr, "input is null", 0); try { @@ -1131,8 +1134,7 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_extractDateTimeComponent( case 7: comp = cudf::datetime::datetime_component::MILLISECOND; break; case 8: comp = cudf::datetime::datetime_component::MICROSECOND; break; case 9: comp = cudf::datetime::datetime_component::NANOSECOND; break; - default: - JNI_THROW_NEW(env, "java/lang/IllegalArgumentException", "Invalid component", 0); + default: JNI_THROW_NEW(env, "java/lang/IllegalArgumentException", "Invalid component", 0); } return release_as_jlong(cudf::datetime::extract_datetime_component(*input, comp)); } @@ -1195,16 +1197,16 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_addCalendricalMonths(JNIE } JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_addScalarCalendricalMonths(JNIEnv* env, - jclass, - jlong ts_ptr, - jlong months_ptr) + jclass, + jlong ts_ptr, + jlong months_ptr) { JNI_NULL_CHECK(env, ts_ptr, "ts is null", 0); JNI_NULL_CHECK(env, months_ptr, "months is null", 0); try { cudf::jni::auto_set_device(env); - cudf::column_view const* ts = reinterpret_cast(ts_ptr); - cudf::scalar const* months = reinterpret_cast(months_ptr); + cudf::column_view const* ts = reinterpret_cast(ts_ptr); + cudf::scalar const* months = reinterpret_cast(months_ptr); return release_as_jlong(cudf::datetime::add_calendrical_months(*ts, *months)); } CATCH_STD(env, 0); @@ -1224,8 +1226,8 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_isLeapYear(JNIEnv* env, } JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_daysInMonth(JNIEnv* env, - jclass, - jlong input_ptr) + jclass, + jlong input_ptr) { JNI_NULL_CHECK(env, input_ptr, "input is null", 0); try { @@ -1236,39 +1238,45 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_daysInMonth(JNIEnv* env, CATCH_STD(env, 0); } -JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_dateTimeCeil( - JNIEnv* env, jclass, jlong input_ptr, jint freq) +JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_dateTimeCeil(JNIEnv* env, + jclass, + jlong input_ptr, + jint freq) { JNI_NULL_CHECK(env, input_ptr, "input is null", 0); try { cudf::jni::auto_set_device(env); - cudf::column_view const* input = reinterpret_cast(input_ptr); + cudf::column_view const* input = reinterpret_cast(input_ptr); cudf::datetime::rounding_frequency n_freq = as_rounding_freq(freq); return release_as_jlong(cudf::datetime::ceil_datetimes(*input, n_freq)); } CATCH_STD(env, 0); } -JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_dateTimeFloor( - JNIEnv* env, jclass, jlong input_ptr, jint freq) +JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_dateTimeFloor(JNIEnv* env, + jclass, + jlong input_ptr, + jint freq) { JNI_NULL_CHECK(env, input_ptr, "input is null", 0); try { cudf::jni::auto_set_device(env); - cudf::column_view const* input = reinterpret_cast(input_ptr); + cudf::column_view const* input = reinterpret_cast(input_ptr); cudf::datetime::rounding_frequency n_freq = as_rounding_freq(freq); return release_as_jlong(cudf::datetime::floor_datetimes(*input, n_freq)); } CATCH_STD(env, 0); } -JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_dateTimeRound( - JNIEnv* env, jclass, jlong input_ptr, jint freq) +JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_dateTimeRound(JNIEnv* env, + jclass, + jlong input_ptr, + jint freq) { JNI_NULL_CHECK(env, input_ptr, "input is null", 0); try { cudf::jni::auto_set_device(env); - cudf::column_view const* input = reinterpret_cast(input_ptr); + cudf::column_view const* input = reinterpret_cast(input_ptr); cudf::datetime::rounding_frequency n_freq = as_rounding_freq(freq); return release_as_jlong(cudf::datetime::round_datetimes(*input, n_freq)); } From bd9eff0d640524f16f6818f2140e4690f5dbe615 Mon Sep 17 00:00:00 2001 From: "Robert (Bobby) Evans" Date: Thu, 14 Nov 2024 16:00:12 -0600 Subject: [PATCH 3/4] Review comments --- java/src/main/java/ai/rapids/cudf/ColumnView.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/java/src/main/java/ai/rapids/cudf/ColumnView.java b/java/src/main/java/ai/rapids/cudf/ColumnView.java index 410b4d17ec3..67c7e6084f1 100644 --- a/java/src/main/java/ai/rapids/cudf/ColumnView.java +++ b/java/src/main/java/ai/rapids/cudf/ColumnView.java @@ -1050,7 +1050,7 @@ public final ColumnVector addCalendricalMonths(ColumnView months) { /** * Add the specified number of months to the timestamp. - * @param months must be a INT16 column indicating the number of months to add. A negative number + * @param months must be a INT16 scalar indicating the number of months to add. A negative number * of months works too. * @return the updated timestamp */ @@ -1068,7 +1068,7 @@ public final ColumnVector isLeapYear() { /** * Extract the number of days in the month - * @return INT16 column of the number of days in the correspond month + * @return INT16 column of the number of days in the corresponding month */ public final ColumnVector daysInMonth() { assert type.isTimestampType(); From c215ed8213988179d4d5c2f7c1664b104db346d6 Mon Sep 17 00:00:00 2001 From: "Robert (Bobby) Evans" Date: Thu, 14 Nov 2024 16:50:15 -0600 Subject: [PATCH 4/4] Better test Signed-off-by: Robert (Bobby) Evans --- .../cudf/TimestampColumnVectorTest.java | 34 +++++++++++++++---- 1 file changed, 28 insertions(+), 6 deletions(-) diff --git a/java/src/test/java/ai/rapids/cudf/TimestampColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/TimestampColumnVectorTest.java index 84631abd281..bac83310c99 100644 --- a/java/src/test/java/ai/rapids/cudf/TimestampColumnVectorTest.java +++ b/java/src/test/java/ai/rapids/cudf/TimestampColumnVectorTest.java @@ -431,13 +431,35 @@ public void testAddMonthsScalar() { @Test public void testDaysInMonth() { + Integer[] DAYS = new Integer[] { + 0, // Jan 1, 1970 + 31, // Feb 1, 1970 + 59, // Mar 1, 1970 + 90, // Apr 1, 1970 + 120, // May 1, 1970 + 151, // June 1, 1970 + 181, // July 1, 1970 + 212, // Aug 1, 1970 + 243, // Sep 1, 1970 + 273, // OCt 1, 1970 + 304, // Nov 1, 1970 + 334 // Dec 1 1970 + }; short[] EXPECTED = new short[]{ - 31, //'1965-10-26 14:01:12.238' Tuesday - 31, //'2018-07-04 12:00:00.115' Wednesday - 31, //'2023-01-25 07:32:12.929' Wednesday - 31, //'1965-10-26 14:01:12.238' Tuesday - 31}; //'2018-07-04 12:00:00.115' Wednesday - try (ColumnVector timestampColumnVector = ColumnVector.timestampMilliSecondsFromLongs(TIMES_MS); + 31, // Jan 1970 + 28, // Feb 1970 + 31, // Mar 1970 + 30, // Apr 1970 + 31, // May 1970 + 30, // June 1970 + 31, // July 1970 + 31, // Aug 1970 + 30, // Sep 1970 + 31, // Oct 1970 + 30, // Nov 1970 + 31 // Dec 1970 + }; + try (ColumnVector timestampColumnVector = ColumnVector.timestampDaysFromBoxedInts(DAYS); ColumnVector result = timestampColumnVector.daysInMonth(); ColumnVector expected = ColumnVector.fromShorts(EXPECTED)) { assertColumnsAreEqual(expected, result);