Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add DictionaryVector DataType #1061

Merged
merged 14 commits into from
Feb 23, 2023
4 changes: 3 additions & 1 deletion src/api/src/helper.rs
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,9 @@ impl TryFrom<ConcreteDataType> for ColumnDataTypeWrapper {
TimestampType::Microsecond(_) => ColumnDataType::TimestampMicrosecond,
TimestampType::Nanosecond(_) => ColumnDataType::TimestampNanosecond,
},
ConcreteDataType::Null(_) | ConcreteDataType::List(_) => {
ConcreteDataType::Null(_)
| ConcreteDataType::List(_)
| ConcreteDataType::Dictionary(_) => {
return error::IntoColumnDataTypeSnafu { from: datatype }.fail()
}
});
Expand Down
5 changes: 3 additions & 2 deletions src/common/grpc-expr/src/insert.rs
Original file line number Diff line number Diff line change
Expand Up @@ -419,8 +419,9 @@ fn convert_values(data_type: &ConcreteDataType, values: Values) -> Vec<Value> {
.into_iter()
.map(|v| Value::Timestamp(Timestamp::new_millisecond(v)))
.collect(),
ConcreteDataType::Null(_) => unreachable!(),
ConcreteDataType::List(_) => unreachable!(),
ConcreteDataType::Null(_) | ConcreteDataType::List(_) | ConcreteDataType::Dictionary(_) => {
unreachable!()
}
}
}

Expand Down
2 changes: 1 addition & 1 deletion src/common/grpc/src/select.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ macro_rules! convert_arrow_array_to_grpc_vals {
return Ok(vals);
},
)+
ConcreteDataType::Null(_) | ConcreteDataType::List(_) => unreachable!("Should not send {:?} in gRPC", $data_type),
ConcreteDataType::Null(_) | ConcreteDataType::List(_) | ConcreteDataType::Dictionary(_) => unreachable!("Should not send {:?} in gRPC", $data_type),
}
}};
}
Expand Down
4 changes: 3 additions & 1 deletion src/common/substrait/src/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,9 @@ pub fn from_concrete_type(ty: ConcreteDataType, nullability: Option<bool>) -> Re
ConcreteDataType::Timestamp(_) => {
build_substrait_kind!(Timestamp, Timestamp, nullability, 0)
}
ConcreteDataType::List(_) => UnsupportedConcreteTypeSnafu { ty }.fail()?,
ConcreteDataType::List(_) | ConcreteDataType::Dictionary(_) => {
UnsupportedConcreteTypeSnafu { ty }.fail()?
}
};

Ok(SType { kind })
Expand Down
51 changes: 26 additions & 25 deletions src/datanode/src/tests/promql_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -103,21 +103,22 @@ async fn sql_insert_tql_query_ceil() {
('host1', 49, 3333.3, 99000);
"#,
"TQL EVAL (0,100,10) ceil(http_requests_total{host=\"host1\"})",
"+---------------------+-------------------------------+----------------------------------+-------+\
\n| ts | ceil(http_requests_total.cpu) | ceil(http_requests_total.memory) | host |\
\n+---------------------+-------------------------------+----------------------------------+-------+\
\n| 1970-01-01T00:00:00 | 67 | 1024 | host1 |\
\n| 1970-01-01T00:00:10 | 100 | 20480 | host1 |\
\n| 1970-01-01T00:00:20 | 100 | 20480 | host1 |\
\n| 1970-01-01T00:00:30 | 32 | 8192 | host1 |\
\n| 1970-01-01T00:00:40 | 96 | 334 | host1 |\
\n| 1970-01-01T00:00:50 | 12424 | 1334 | host1 |\
\n| 1970-01-01T00:01:00 | 12424 | 1334 | host1 |\
\n| 1970-01-01T00:01:10 | 12424 | 1334 | host1 |\
\n| 1970-01-01T00:01:20 | 0 | 2334 | host1 |\
\n| 1970-01-01T00:01:30 | 0 | 2334 | host1 |\
\n| 1970-01-01T00:01:40 | 49 | 3334 | host1 |\
\n+---------------------+-------------------------------+----------------------------------+-------+")
"+---------------------+-----------+--------------+-------+\
\n| ts | ceil(cpu) | ceil(memory) | host |\
\n+---------------------+-----------+--------------+-------+\
\n| 1970-01-01T00:00:00 | 67 | 1024 | host1 |\
\n| 1970-01-01T00:00:10 | 100 | 20480 | host1 |\
\n| 1970-01-01T00:00:20 | 100 | 20480 | host1 |\
\n| 1970-01-01T00:00:30 | 32 | 8192 | host1 |\
\n| 1970-01-01T00:00:40 | 96 | 334 | host1 |\
\n| 1970-01-01T00:00:50 | 12424 | 1334 | host1 |\
\n| 1970-01-01T00:01:00 | 12424 | 1334 | host1 |\
\n| 1970-01-01T00:01:10 | 12424 | 1334 | host1 |\
\n| 1970-01-01T00:01:20 | 0 | 2334 | host1 |\
\n| 1970-01-01T00:01:30 | 0 | 2334 | host1 |\
\n| 1970-01-01T00:01:40 | 49 | 3334 | host1 |\
\n+---------------------+-----------+--------------+-------+",
)
.await;
}

Expand Down Expand Up @@ -150,16 +151,16 @@ async fn sql_insert_promql_query_ceil() {
UNIX_EPOCH.checked_add(Duration::from_secs(100)).unwrap(),
Duration::from_secs(5),
Duration::from_secs(1),
"+---------------------+-------------------------------+----------------------------------+-------+\
\n| ts | ceil(http_requests_total.cpu) | ceil(http_requests_total.memory) | host |\
\n+---------------------+-------------------------------+----------------------------------+-------+\
\n| 1970-01-01T00:00:00 | 67 | 1024 | host1 |\
\n| 1970-01-01T00:00:05 | 67 | 4096 | host1 |\
\n| 1970-01-01T00:00:10 | 100 | 20480 | host1 |\
\n| 1970-01-01T00:01:20 | 0 | 2334 | host1 |\
\n| 1970-01-01T00:01:40 | 49 | 3334 | host1 |\
\n| 1970-01-01T00:00:50 | 12424 | 1334 | host1 |\
\n+---------------------+-------------------------------+----------------------------------+-------+"
"+---------------------+-----------+--------------+-------+\
\n| ts | ceil(cpu) | ceil(memory) | host |\
\n+---------------------+-----------+--------------+-------+\
\n| 1970-01-01T00:00:00 | 67 | 1024 | host1 |\
\n| 1970-01-01T00:00:05 | 67 | 4096 | host1 |\
\n| 1970-01-01T00:00:10 | 100 | 20480 | host1 |\
\n| 1970-01-01T00:00:50 | 12424 | 1334 | host1 |\
\n| 1970-01-01T00:01:20 | 0 | 2334 | host1 |\
\n| 1970-01-01T00:01:40 | 49 | 3334 | host1 |\
\n+---------------------+-----------+--------------+-------+",
)
.await;
}
Expand Down
21 changes: 17 additions & 4 deletions src/datatypes/src/data_type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,10 @@ use serde::{Deserialize, Serialize};
use crate::error::{self, Error, Result};
use crate::type_id::LogicalTypeId;
use crate::types::{
BinaryType, BooleanType, DateTimeType, DateType, Float32Type, Float64Type, Int16Type,
Int32Type, Int64Type, Int8Type, ListType, NullType, StringType, TimestampMicrosecondType,
TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType, TimestampType,
UInt16Type, UInt32Type, UInt64Type, UInt8Type,
BinaryType, BooleanType, DateTimeType, DateType, DictionaryType, Float32Type, Float64Type,
Int16Type, Int32Type, Int64Type, Int8Type, ListType, NullType, StringType,
TimestampMicrosecondType, TimestampMillisecondType, TimestampNanosecondType,
TimestampSecondType, TimestampType, UInt16Type, UInt32Type, UInt64Type, UInt8Type,
};
use crate::value::Value;
use crate::vectors::MutableVector;
Expand Down Expand Up @@ -59,6 +59,7 @@ pub enum ConcreteDataType {

// Compound types:
List(ListType),
Dictionary(DictionaryType),
}

// TODO(yingwen): Refactor these `is_xxx()` methods, such as adding a `properties()` method
Expand Down Expand Up @@ -169,6 +170,11 @@ impl TryFrom<&ArrowDataType> for ConcreteDataType {
ArrowDataType::List(field) => Self::List(ListType::new(
ConcreteDataType::from_arrow_type(field.data_type()),
)),
ArrowDataType::Dictionary(key_type, value_type) => {
let key_type = ConcreteDataType::from_arrow_type(key_type);
let value_type = ConcreteDataType::from_arrow_type(value_type);
Self::Dictionary(DictionaryType::new(key_type, value_type))
}
_ => {
return error::UnsupportedArrowTypeSnafu {
arrow_type: dt.clone(),
Expand Down Expand Up @@ -243,6 +249,13 @@ impl ConcreteDataType {
pub fn list_datatype(item_type: ConcreteDataType) -> ConcreteDataType {
ConcreteDataType::List(ListType::new(item_type))
}

pub fn dictionary_datatype(
key_type: ConcreteDataType,
value_type: ConcreteDataType,
) -> ConcreteDataType {
ConcreteDataType::Dictionary(DictionaryType::new(key_type, value_type))
}
}

/// Data type abstraction.
Expand Down
5 changes: 5 additions & 0 deletions src/datatypes/src/type_id.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ pub enum LogicalTypeId {
TimestampNanosecond,

List,
Dictionary,
}

impl LogicalTypeId {
Expand Down Expand Up @@ -88,6 +89,10 @@ impl LogicalTypeId {
LogicalTypeId::List => {
ConcreteDataType::list_datatype(ConcreteDataType::null_datatype())
}
LogicalTypeId::Dictionary => ConcreteDataType::dictionary_datatype(
ConcreteDataType::null_datatype(),
ConcreteDataType::null_datatype(),
),
}
}
}
3 changes: 2 additions & 1 deletion src/datatypes/src/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,17 +16,18 @@ mod binary_type;
mod boolean_type;
mod date_type;
mod datetime_type;
mod dictionary_type;
mod list_type;
mod null_type;
mod primitive_type;
mod string_type;

mod timestamp_type;

pub use binary_type::BinaryType;
pub use boolean_type::BooleanType;
pub use date_type::DateType;
pub use datetime_type::DateTimeType;
pub use dictionary_type::DictionaryType;
pub use list_type::ListType;
pub use null_type::NullType;
pub use primitive_type::{
Expand Down
91 changes: 91 additions & 0 deletions src/datatypes/src/types/dictionary_type.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use arrow::datatypes::DataType as ArrowDataType;
use serde::{Deserialize, Serialize};

use crate::data_type::{ConcreteDataType, DataType};
use crate::type_id::LogicalTypeId;
use crate::value::Value;
use crate::vectors::MutableVector;

/// Used to represent the Dictionary datatype.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct DictionaryType {
// Use Box to avoid recursive dependency, as enum ConcreteDataType depends on DictionaryType.
/// The type of Dictionary key.
key_type: Box<ConcreteDataType>,
/// The type of Dictionary value.
value_type: Box<ConcreteDataType>,
}

impl Default for DictionaryType {
fn default() -> Self {
DictionaryType::new(
ConcreteDataType::null_datatype(),
ConcreteDataType::null_datatype(),
)
}
}

impl DictionaryType {
/// Create a new `DictionaryType` whose item's data type is `item_type`.
pub fn new(key_type: ConcreteDataType, value_type: ConcreteDataType) -> Self {
DictionaryType {
key_type: Box::new(key_type),
value_type: Box::new(value_type),
}
}

/// Returns the key data type.
#[inline]
pub fn key_type(&self) -> &ConcreteDataType {
&self.key_type
}

/// Returns the value data type.
#[inline]
pub fn value_type(&self) -> &ConcreteDataType {
&self.value_type
}
}

impl DataType for DictionaryType {
fn name(&self) -> &str {
"Dictionary"
}

fn logical_type_id(&self) -> LogicalTypeId {
LogicalTypeId::Dictionary
}

fn default_value(&self) -> Value {
unimplemented!()
}

fn as_arrow_type(&self) -> ArrowDataType {
ArrowDataType::Dictionary(
Box::new(self.key_type.as_arrow_type()),
Box::new(self.value_type.as_arrow_type()),
)
}

fn create_mutable_vector(&self, _capacity: usize) -> Box<dyn MutableVector> {
unimplemented!()
}

fn is_timestamp_compatible(&self) -> bool {
false
}
}
15 changes: 15 additions & 0 deletions src/datatypes/src/value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,10 @@ fn to_null_value(output_type: &ConcreteDataType) -> ScalarValue {
ConcreteDataType::List(_) => {
ScalarValue::List(None, Box::new(new_item_field(output_type.as_arrow_type())))
}
ConcreteDataType::Dictionary(dict) => ScalarValue::Dictionary(
Box::new(dict.key_type().as_arrow_type()),
Box::new(to_null_value(dict.value_type())),
),
}
}

Expand Down Expand Up @@ -513,6 +517,17 @@ impl Ord for ListValue {
}
}

// TODO(ruihang): Implement this type
/// Dictionary value.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct DictionaryValue {
/// Inner values datatypes
key_type: ConcreteDataType,
value_type: ConcreteDataType,
}

impl Eq for DictionaryValue {}

impl TryFrom<ScalarValue> for Value {
type Error = error::Error;

Expand Down
2 changes: 1 addition & 1 deletion src/datatypes/src/vectors/eq.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ fn equal(lhs: &dyn Vector, rhs: &dyn Vector) -> bool {
},
List(_) => is_vector_eq!(ListVector, lhs, rhs),
UInt8(_) | UInt16(_) | UInt32(_) | UInt64(_) | Int8(_) | Int16(_) | Int32(_) | Int64(_)
| Float32(_) | Float64(_) => {
| Float32(_) | Float64(_) | Dictionary(_) => {
with_match_primitive_type_id!(lhs_type.logical_type_id(), |$T| {
let lhs = lhs.as_any().downcast_ref::<PrimitiveVector<$T>>().unwrap();
let rhs = rhs.as_any().downcast_ref::<PrimitiveVector<$T>>().unwrap();
Expand Down
Loading