Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor(function): cast timestamp to variant string instead of variant int #8580

Merged
merged 3 commits into from
Nov 1, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 9 additions & 9 deletions src/query/expression/src/evaluator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ impl<'a> Evaluator<'a> {
}
(scalar, DataType::Variant) => {
let mut buf = Vec::new();
cast_scalar_to_variant(scalar.as_ref(), &mut buf);
cast_scalar_to_variant(scalar.as_ref(), self.tz, &mut buf);
Ok(Scalar::Variant(buf))
}

Expand Down Expand Up @@ -250,11 +250,11 @@ impl<'a> Evaluator<'a> {
}

(Scalar::Timestamp(ts), DataType::String) => Ok(Scalar::String(
timestamp_to_string(ts, &self.tz).as_bytes().to_vec(),
timestamp_to_string(ts, self.tz).as_bytes().to_vec(),
)),

(Scalar::Date(d), DataType::String) => Ok(Scalar::String(
date_to_string(d, &self.tz).as_bytes().to_vec(),
date_to_string(d, self.tz).as_bytes().to_vec(),
)),

// identical types
Expand Down Expand Up @@ -327,7 +327,7 @@ impl<'a> Evaluator<'a> {
})
}
(col, DataType::Variant) => {
let new_col = Column::Variant(cast_scalars_to_variants(col.iter()));
let new_col = Column::Variant(cast_scalars_to_variants(col.iter(), self.tz));
Ok(new_col)
}

Expand Down Expand Up @@ -414,7 +414,7 @@ impl<'a> Evaluator<'a> {
// "YYYY-mm-DD HH:MM:SS.ssssss"
let mut builder = StringColumnBuilder::with_capacity(col.len(), col.len() * 26);
for val in col.iter() {
let s = timestamp_to_string(*val, &self.tz);
let s = timestamp_to_string(*val, self.tz);
builder.put_str(s.as_str());
builder.commit_row();
}
Expand All @@ -426,7 +426,7 @@ impl<'a> Evaluator<'a> {
// "YYYY-mm-DD"
let mut builder = StringColumnBuilder::with_capacity(col.len(), col.len() * 10);
for &val in col.iter() {
let s = date_to_string(val, &self.tz);
let s = date_to_string(val, self.tz);
builder.put_str(s.as_str());
builder.commit_row();
}
Expand Down Expand Up @@ -511,7 +511,7 @@ impl<'a> Evaluator<'a> {
}))
}
(col, DataType::Variant) => {
let new_col = Column::Variant(cast_scalars_to_variants(col.iter()));
let new_col = Column::Variant(cast_scalars_to_variants(col.iter(), self.tz));
Column::Nullable(Box::new(NullableColumn {
validity: constant_bitmap(true, new_col.len()).into(),
column: new_col,
Expand Down Expand Up @@ -621,7 +621,7 @@ impl<'a> Evaluator<'a> {
// "YYYY-mm-DD HH:MM:SS.ssssss"
let mut builder = StringColumnBuilder::with_capacity(col.len(), col.len() * 26);
for val in col.iter() {
let s = timestamp_to_string(*val, &self.tz);
let s = timestamp_to_string(*val, self.tz);
builder.put_str(s.as_str());
builder.commit_row();
}
Expand All @@ -637,7 +637,7 @@ impl<'a> Evaluator<'a> {
// "YYYY-mm-DD"
let mut builder = StringColumnBuilder::with_capacity(col.len(), col.len() * 10);
for &val in col.iter() {
let s = date_to_string(val, &self.tz);
let s = date_to_string(val, self.tz);
builder.put_str(s.as_str());
builder.commit_row();
}
Expand Down
27 changes: 16 additions & 11 deletions src/query/expression/src/types/date.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ use common_datavalues::DateConverter;
use common_io::prelude::BufferReadDateTimeExt;
use common_io::prelude::BufferReadExt;
use common_io::prelude::BufferReader;
use num_traits::AsPrimitive;

use super::number::SimpleDomain;
use crate::property::Domain;
Expand All @@ -29,24 +30,28 @@ use crate::types::DataType;
use crate::types::GenericMap;
use crate::types::ValueType;
use crate::utils::arrow::buffer_into_mut;
use crate::utils::display::display_date;
use crate::values::Column;
use crate::values::Scalar;
use crate::ColumnBuilder;
use crate::ScalarRef;

/// date ranges from 1000-01-01 to 9999-12-31
/// date_max and date_min means days offset from 1970-01-01
/// any date not in the range will be invalid
pub const DATE_MAX: i32 = 2932896;
pub const DATE_FORMAT: &str = "%Y-%m-%d";
/// Minimum valid date, represented by the day offset from 1970-01-01.
pub const DATE_MIN: i32 = -354285;
/// Maximum valid date, represented by the day offset from 1970-01-01.
pub const DATE_MAX: i32 = 2932896;

/// Check if date is within range.
#[inline]
pub fn check_date(days: i64) -> Result<i32, String> {
if (DATE_MIN as i64..=DATE_MAX as i64).contains(&days) {
return Ok(days as i32);
Ok(days as i32)
} else {
Err(format!(
"date `{}` is out of range",
date_to_string(days, chrono_tz::Tz::UTC)
))
}
Err(format!("date `{}` is out of range", display_date(days)))
}

#[derive(Debug, Clone, PartialEq, Eq)]
Expand Down Expand Up @@ -191,9 +196,9 @@ impl ArgType for DateType {
}

#[inline]
pub fn string_to_date(date_str: impl AsRef<[u8]>, tz: &Tz) -> Option<NaiveDate> {
pub fn string_to_date(date_str: impl AsRef<[u8]>, tz: Tz) -> Option<NaiveDate> {
let mut reader = BufferReader::new(std::str::from_utf8(date_str.as_ref()).unwrap().as_bytes());
match reader.read_date_text(tz) {
match reader.read_date_text(&tz) {
Ok(d) => match reader.must_eof() {
Ok(..) => Some(d),
Err(_) => None,
Expand All @@ -203,6 +208,6 @@ pub fn string_to_date(date_str: impl AsRef<[u8]>, tz: &Tz) -> Option<NaiveDate>
}

#[inline]
pub fn date_to_string(date: i32, tz: &Tz) -> String {
date.to_date(tz).format("%Y-%m-%d").to_string()
pub fn date_to_string(date: impl AsPrimitive<i32>, tz: Tz) -> String {
date.as_().to_date(&tz).format(DATE_FORMAT).to_string()
}
54 changes: 11 additions & 43 deletions src/query/expression/src/types/timestamp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,37 +34,20 @@ use crate::values::Scalar;
use crate::ColumnBuilder;
use crate::ScalarRef;

pub const TIMESTAMP_FORMAT: &str = "%Y-%m-%d %H:%M:%S%.6f";
/// Minimum valid timestamp `1000-01-01 00:00:00.000000`, represented by the microsecs offset from 1970-01-01.
pub const TIMESTAMP_MIN: i64 = -30610224000000000;
/// Maximum valid timestamp `9999-12-31 23:59:59.999999`, represented by the microsecs offset from 1970-01-01.
pub const TIMESTAMP_MAX: i64 = 253402300799999999;

pub const MICROS_IN_A_SEC: i64 = 1_000_000;
pub const MICROS_IN_A_MILLI: i64 = 1_000;
pub const MICROS_IN_A_MICRO: i64 = 1;

/// timestamp ranges from 1000-01-01 00:00:00.000000 to 9999-12-31 23:59:59.999999
/// timestamp_max and timestamp_min means days offset from 1970-01-01 00:00:00.000000
/// any timestamp not in the range will be invalid
pub const TIMESTAMP_MAX: i64 = 253402300799999999;
pub const TIMESTAMP_MIN: i64 = -30610224000000000;
pub const MICROSECONDS: i64 = 1_000_000;

pub const PRECISION_MICRO: u8 = 6;
pub const PRECISION_MILLI: u8 = 3;
pub const PRECISION_SEC: u8 = 0;

/// check when converting number to timestamp and return precision and the base.
#[inline]
pub fn check_number_to_timestamp(n: i64) -> Result<i64, String> {
let base = if (-31536000000..=31536000000).contains(&n) {
MICROS_IN_A_SEC
} else if (-31536000000000..=31536000000000).contains(&n) {
MICROS_IN_A_MILLI
} else if (TIMESTAMP_MIN..=TIMESTAMP_MAX).contains(&n) {
MICROS_IN_A_MICRO
} else {
return Err(format!("timestamp `{}` is out of range", n));
};
Ok(n * base)
}

/// check the micros in timestamp value.
/// Check if the timestamp value is valid.
#[inline]
pub fn check_timestamp(micros: i64) -> Result<i64, String> {
if (TIMESTAMP_MIN..=TIMESTAMP_MAX).contains(&micros) {
Expand All @@ -73,7 +56,6 @@ pub fn check_timestamp(micros: i64) -> Result<i64, String> {
Err(format!("timestamp `{}` is out of range", micros))
}
}

#[derive(Debug, Clone, PartialEq, Eq)]
pub struct TimestampType;

Expand Down Expand Up @@ -223,22 +205,10 @@ pub fn microseconds_to_days(micros: i64) -> i32 {
(microseconds_to_seconds(micros) / 24 / 3600) as i32
}

pub fn get_format_string(precision: u8) -> String {
assert!(matches!(
precision,
PRECISION_MICRO | PRECISION_MILLI | PRECISION_SEC
));
if precision == 0 {
"%Y-%m-%d %H:%M:%S".to_string()
} else {
format!("%Y-%m-%d %H:%M:%S%.{}f", precision)
}
}

#[inline]
pub fn string_to_timestamp(ts_str: impl AsRef<[u8]>, tz: &Tz) -> Option<DateTime<Tz>> {
pub fn string_to_timestamp(ts_str: impl AsRef<[u8]>, tz: Tz) -> Option<DateTime<Tz>> {
let mut reader = BufferReader::new(std::str::from_utf8(ts_str.as_ref()).unwrap().as_bytes());
match reader.read_timestamp_text(tz) {
match reader.read_timestamp_text(&tz) {
Ok(dt) => match reader.must_eof() {
Ok(..) => Some(dt),
Err(_) => None,
Expand All @@ -248,8 +218,6 @@ pub fn string_to_timestamp(ts_str: impl AsRef<[u8]>, tz: &Tz) -> Option<DateTime
}

#[inline]
pub fn timestamp_to_string(ts: i64, tz: &Tz) -> String {
ts.to_timestamp(tz)
.format(get_format_string(PRECISION_MICRO).as_str())
.to_string()
pub fn timestamp_to_string(ts: i64, tz: Tz) -> String {
ts.to_timestamp(tz).format(TIMESTAMP_FORMAT).to_string()
}
21 changes: 14 additions & 7 deletions src/query/expression/src/types/variant.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,11 @@

use std::ops::Range;

use common_datavalues::Tz;

use super::date::date_to_string;
use super::number::NumberScalar;
use super::timestamp::timestamp_to_string;
use crate::property::Domain;
use crate::types::string::StringColumn;
use crate::types::string::StringColumnBuilder;
Expand Down Expand Up @@ -156,7 +160,7 @@ impl ArgType for VariantType {
}
}

pub fn cast_scalar_to_variant(scalar: ScalarRef, buf: &mut Vec<u8>) {
pub fn cast_scalar_to_variant(scalar: ScalarRef, tz: Tz, buf: &mut Vec<u8>) {
let value = match scalar {
ScalarRef::Null => common_jsonb::Value::Null,
ScalarRef::EmptyArray => common_jsonb::Value::Array(vec![]),
Expand All @@ -174,15 +178,15 @@ pub fn cast_scalar_to_variant(scalar: ScalarRef, buf: &mut Vec<u8>) {
},
ScalarRef::Boolean(b) => common_jsonb::Value::Bool(b),
ScalarRef::String(s) => common_jsonb::Value::String(String::from_utf8_lossy(s)),
ScalarRef::Timestamp(ts) => ts.into(),
ScalarRef::Date(d) => d.into(),
ScalarRef::Timestamp(ts) => timestamp_to_string(ts, tz).into(),
ScalarRef::Date(d) => date_to_string(d, tz).into(),
ScalarRef::Array(col) => {
let items = cast_scalars_to_variants(col.iter());
let items = cast_scalars_to_variants(col.iter(), tz);
common_jsonb::build_array(items.iter(), buf).expect("failed to build jsonb array");
return;
}
ScalarRef::Tuple(fields) => {
let values = cast_scalars_to_variants(fields);
let values = cast_scalars_to_variants(fields, tz);
common_jsonb::build_object(
values
.iter()
Expand All @@ -201,11 +205,14 @@ pub fn cast_scalar_to_variant(scalar: ScalarRef, buf: &mut Vec<u8>) {
value.to_vec(buf);
}

pub fn cast_scalars_to_variants(scalars: impl IntoIterator<Item = ScalarRef>) -> StringColumn {
pub fn cast_scalars_to_variants(
scalars: impl IntoIterator<Item = ScalarRef>,
tz: Tz,
) -> StringColumn {
let iter = scalars.into_iter();
let mut builder = StringColumnBuilder::with_capacity(iter.size_hint().0, 0);
for scalar in iter {
cast_scalar_to_variant(scalar, &mut builder.data);
cast_scalar_to_variant(scalar, tz, &mut builder.data);
builder.commit_row();
}
builder.build()
Expand Down
41 changes: 19 additions & 22 deletions src/query/expression/src/utils/date_helper.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,20 +28,20 @@ use crate::types::date::check_date;
use crate::types::timestamp::check_timestamp;

pub trait DateConverter {
fn to_date(&self, tz: &Tz) -> Date<Tz>;
fn to_timestamp(&self, tz: &Tz) -> DateTime<Tz>;
fn to_date(&self, tz: Tz) -> Date<Tz>;
fn to_timestamp(&self, tz: Tz) -> DateTime<Tz>;
}

impl<T> DateConverter for T
where T: AsPrimitive<i64>
{
fn to_date(&self, tz: &Tz) -> Date<Tz> {
fn to_date(&self, tz: Tz) -> Date<Tz> {
let mut dt = tz.ymd(1970, 1, 1);
dt = dt.checked_add_signed(Duration::days(self.as_())).unwrap();
dt
}

fn to_timestamp(&self, tz: &Tz) -> DateTime<Tz> {
fn to_timestamp(&self, tz: Tz) -> DateTime<Tz> {
// Can't use `tz.timestamp_nanos(self.as_() * 1000)` directly, is may cause multiply with overflow.
let micros = self.as_();
let (mut secs, mut nanos) = (micros / 1_000_000, (micros % 1_000_000) * 1_000);
Expand Down Expand Up @@ -110,22 +110,19 @@ macro_rules! impl_interval_year_month {

impl $name {
pub fn eval_date(date: i32, delta: impl AsPrimitive<i64>) -> Result<i32, String> {
let date = date.to_date(&Tz::UTC);
let new_date = $op(date.year(), date.month(), date.day(), delta.as_());
new_date.and_then(|d| {
check_date(
d.signed_duration_since(NaiveDate::from_ymd(1970, 1, 1))
.num_days(),
)
})
let date = date.to_date(Tz::UTC);
let new_date = $op(date.year(), date.month(), date.day(), delta.as_())?;
check_date(
new_date
.signed_duration_since(NaiveDate::from_ymd(1970, 1, 1))
.num_days(),
)
}

pub fn eval_timestamp(ts: i64, delta: impl AsPrimitive<i64>) -> Result<i64, String> {
let ts = ts.to_timestamp(&Tz::UTC);
let new_ts = $op(ts.year(), ts.month(), ts.day(), delta.as_());
new_ts.and_then(|t| {
check_timestamp(NaiveDateTime::new(t, ts.time()).timestamp_micros())
})
let ts = ts.to_timestamp(Tz::UTC);
let new_ts = $op(ts.year(), ts.month(), ts.day(), delta.as_())?;
check_timestamp(NaiveDateTime::new(new_ts, ts.time()).timestamp_micros())
}
}
};
Expand Down Expand Up @@ -179,12 +176,12 @@ pub trait ToNumber<N> {
pub struct ToNumberImpl;

impl ToNumberImpl {
pub fn eval_timestamp<T: ToNumber<R>, R>(ts: i64, tz: &Tz) -> R {
pub fn eval_timestamp<T: ToNumber<R>, R>(ts: i64, tz: Tz) -> R {
let dt = ts.to_timestamp(tz);
T::to_number(&dt)
}

pub fn eval_date<T: ToNumber<R>, R>(date: i32, tz: &Tz) -> R {
pub fn eval_date<T: ToNumber<R>, R>(date: i32, tz: Tz) -> R {
let dt = date.to_date(tz).and_hms(0, 0, 0);
T::to_number(&dt)
}
Expand Down Expand Up @@ -285,7 +282,7 @@ pub enum Round {
Day,
}

pub fn round_timestamp(ts: i64, tz: &Tz, round: Round) -> i64 {
pub fn round_timestamp(ts: i64, tz: Tz, round: Round) -> i64 {
let dt = tz.timestamp(ts / 1_000_000, 0_u32);
let res = match round {
Round::Second => dt,
Expand Down Expand Up @@ -330,12 +327,12 @@ pub fn round_timestamp(ts: i64, tz: &Tz, round: Round) -> i64 {
pub struct DateRounder;

impl DateRounder {
pub fn eval_timestamp<T: ToNumber<i32>>(ts: i64, tz: &Tz) -> i32 {
pub fn eval_timestamp<T: ToNumber<i32>>(ts: i64, tz: Tz) -> i32 {
let dt = ts.to_timestamp(tz);
T::to_number(&dt)
}

pub fn eval_date<T: ToNumber<i32>>(date: i32, tz: &Tz) -> i32 {
pub fn eval_date<T: ToNumber<i32>>(date: i32, tz: Tz) -> i32 {
let dt = date.to_date(tz).and_hms(0, 0, 0);
T::to_number(&dt)
}
Expand Down
Loading