Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: bump arrow, parquet, datafusion and tonic #1386

Merged
merged 4 commits into from
Apr 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
196 changes: 120 additions & 76 deletions Cargo.lock

Large diffs are not rendered by default.

26 changes: 13 additions & 13 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -52,33 +52,33 @@ edition = "2021"
license = "Apache-2.0"

[workspace.dependencies]
arrow = { version = "36.0" }
arrow-array = "36.0"
arrow-flight = "36.0"
arrow-schema = { version = "36.0", features = ["serde"] }
arrow = { version = "37.0" }
arrow-array = "37.0"
arrow-flight = "37.0"
arrow-schema = { version = "37.0", features = ["serde"] }
async-stream = "0.3"
async-trait = "0.1"
chrono = { version = "0.4", features = ["serde"] }
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "21bf4ffccadfeea824ab6e29c0b872930d0e190a" }
datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git", rev = "21bf4ffccadfeea824ab6e29c0b872930d0e190a" }
datafusion-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "21bf4ffccadfeea824ab6e29c0b872930d0e190a" }
datafusion-optimizer = { git = "https://github.com/apache/arrow-datafusion.git", rev = "21bf4ffccadfeea824ab6e29c0b872930d0e190a" }
datafusion-physical-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "21bf4ffccadfeea824ab6e29c0b872930d0e190a" }
datafusion-sql = { git = "https://github.com/apache/arrow-datafusion.git", rev = "21bf4ffccadfeea824ab6e29c0b872930d0e190a" }
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "74a778ca6016a853a3c3add3fa8c6f12f4fe4561" }
datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git", rev = "74a778ca6016a853a3c3add3fa8c6f12f4fe4561" }
datafusion-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "74a778ca6016a853a3c3add3fa8c6f12f4fe4561" }
datafusion-optimizer = { git = "https://github.com/apache/arrow-datafusion.git", rev = "74a778ca6016a853a3c3add3fa8c6f12f4fe4561" }
datafusion-physical-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "74a778ca6016a853a3c3add3fa8c6f12f4fe4561" }
datafusion-sql = { git = "https://github.com/apache/arrow-datafusion.git", rev = "74a778ca6016a853a3c3add3fa8c6f12f4fe4561" }
futures = "0.3"
futures-util = "0.3"
parquet = "36.0"
parquet = "37.0"
paste = "1.0"
prost = "0.11"
rand = "0.8"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
snafu = { version = "0.7", features = ["backtraces"] }
sqlparser = "0.32"
sqlparser = "0.33"
tempfile = "3"
tokio = { version = "1.24.2", features = ["full"] }
tokio-util = { version = "0.7", features = ["io-util"] }
tonic = { version = "0.8", features = ["tls"] }
tonic = { version = "0.9", features = ["tls"] }
uuid = { version = "1", features = ["serde", "v4", "fast-rng"] }

[profile.release]
Expand Down
4 changes: 2 additions & 2 deletions benchmarks/src/bin/nyc-taxi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ fn convert_record_batch(record_batch: RecordBatch) -> (Vec<Column>, u32) {
column_name: field.name().clone(),
values: Some(values),
null_mask: array
.data()
.to_data()
.nulls()
.map(|bitmap| bitmap.buffer().as_slice().to_vec())
.unwrap_or_default(),
Expand Down Expand Up @@ -225,7 +225,7 @@ fn build_values(column: &ArrayRef) -> (Values, ColumnDataType) {
| DataType::FixedSizeList(_, _)
| DataType::LargeList(_)
| DataType::Struct(_)
| DataType::Union(_, _, _)
| DataType::Union(_, _)
| DataType::Dictionary(_, _)
| DataType::Decimal128(_, _)
| DataType::Decimal256(_, _)
Expand Down
4 changes: 2 additions & 2 deletions src/api/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@ common-base = { path = "../common/base" }
common-error = { path = "../common/error" }
common-time = { path = "../common/time" }
datatypes = { path = "../datatypes" }
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "59afacdae59eae4241cfaf851021361caaeaed21" }
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "0bebe5f69c91cdfbce85cb8f45f9fcd28185261c" }
prost.workspace = true
snafu = { version = "0.7", features = ["backtraces"] }
tonic.workspace = true

[build-dependencies]
tonic-build = "0.8"
tonic-build = "0.9"
2 changes: 1 addition & 1 deletion src/common/function-macro/src/range_fn.rs
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ fn build_calc_fn(
fn calc(input: &[ColumnarValue]) -> Result<ColumnarValue, DataFusionError> {
assert_eq!(input.len(), #num_params);

#( let #range_array_names = RangeArray::try_new(extract_array(&input[#param_numbers])?.data().clone().into())?; )*
#( let #range_array_names = RangeArray::try_new(extract_array(&input[#param_numbers])?.to_data().into())?; )*

// TODO(ruihang): add ensure!()

Expand Down
8 changes: 8 additions & 0 deletions src/common/substrait/src/df_expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -752,6 +752,14 @@ mod utils {
BuiltinScalarFunction::Uuid => "uuid",
BuiltinScalarFunction::Struct => "struct",
BuiltinScalarFunction::ArrowTypeof => "arrow_type_of",
BuiltinScalarFunction::Acosh => "acosh",
BuiltinScalarFunction::Asinh => "asinh",
BuiltinScalarFunction::Atanh => "atanh",
BuiltinScalarFunction::Cbrt => "cbrt",
BuiltinScalarFunction::Cosh => "cosh",
BuiltinScalarFunction::Pi => "pi",
BuiltinScalarFunction::Sinh => "sinh",
BuiltinScalarFunction::Tanh => "tanh",
}
}
}
Expand Down
10 changes: 5 additions & 5 deletions src/datanode/src/sql/copy_table_from.rs
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ mod tests {
(
DataType::Timestamp(
datatypes::arrow::datatypes::TimeUnit::Second,
Some("UTC".to_string()),
Some("UTC".into()),
),
true,
),
Expand All @@ -251,14 +251,14 @@ mod tests {
(
DataType::Timestamp(
datatypes::arrow::datatypes::TimeUnit::Second,
Some("UTC".to_string()),
Some("UTC".into()),
),
true,
),
(
DataType::Timestamp(
datatypes::arrow::datatypes::TimeUnit::Second,
Some("PDT".to_string()),
Some("PDT".into()),
),
true,
),
Expand All @@ -269,14 +269,14 @@ mod tests {
(
DataType::Timestamp(
datatypes::arrow::datatypes::TimeUnit::Second,
Some("UTC".to_string()),
Some("UTC".into()),
),
true,
),
(
DataType::Timestamp(
datatypes::arrow::datatypes::TimeUnit::Millisecond,
Some("UTC".to_string()),
Some("UTC".into()),
),
true,
),
Expand Down
2 changes: 1 addition & 1 deletion src/datatypes/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ test = []

[dependencies]
arrow.workspace = true
arrow-array = "36"
arrow-array.workspace = true
arrow-schema.workspace = true
common-base = { path = "../common/base" }
common-error = { path = "../common/error" }
Expand Down
2 changes: 1 addition & 1 deletion src/datatypes/src/data_type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -362,7 +362,7 @@ mod tests {
ConcreteDataType::String(_)
));
assert_eq!(
ConcreteDataType::from_arrow_type(&ArrowDataType::List(Box::new(Field::new(
ConcreteDataType::from_arrow_type(&ArrowDataType::List(Arc::new(Field::new(
"item",
ArrowDataType::Int32,
true,
Expand Down
2 changes: 1 addition & 1 deletion src/datatypes/src/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,7 @@ impl TryFrom<Arc<ArrowSchema>> for Schema {
let mut column_schemas = Vec::with_capacity(arrow_schema.fields.len());
let mut name_to_index = HashMap::with_capacity(arrow_schema.fields.len());
for field in &arrow_schema.fields {
let column_schema = ColumnSchema::try_from(field)?;
let column_schema = ColumnSchema::try_from(field.as_ref())?;
name_to_index.insert(field.name().to_string(), column_schemas.len());
column_schemas.push(column_schema);
}
Expand Down
6 changes: 4 additions & 2 deletions src/datatypes/src/types/list_type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.

use std::sync::Arc;

use arrow::datatypes::{DataType as ArrowDataType, Field};
use serde::{Deserialize, Serialize};

Expand Down Expand Up @@ -63,7 +65,7 @@ impl DataType for ListType {
}

fn as_arrow_type(&self) -> ArrowDataType {
let field = Box::new(Field::new("item", self.item_type.as_arrow_type(), true));
let field = Arc::new(Field::new("item", self.item_type.as_arrow_type(), true));
ArrowDataType::List(field)
}

Expand Down Expand Up @@ -94,7 +96,7 @@ mod tests {
t.default_value()
);
assert_eq!(
ArrowDataType::List(Box::new(Field::new("item", ArrowDataType::Boolean, true))),
ArrowDataType::List(Arc::new(Field::new("item", ArrowDataType::Boolean, true))),
t.as_arrow_type()
);
assert_eq!(ConcreteDataType::boolean_datatype(), *t.item_type());
Expand Down
5 changes: 3 additions & 2 deletions src/datatypes/src/value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
use std::cmp::Ordering;
use std::fmt::{Display, Formatter};
use std::str::FromStr;
use std::sync::Arc;

use arrow::datatypes::{DataType as ArrowDataType, Field};
use common_base::bytes::{Bytes, StringBytes};
Expand Down Expand Up @@ -271,7 +272,7 @@ fn to_null_value(output_type: &ConcreteDataType) -> ScalarValue {
ConcreteDataType::DateTime(_) => ScalarValue::Date64(None),
ConcreteDataType::Timestamp(t) => timestamp_to_scalar_value(t.unit(), None),
ConcreteDataType::List(_) => {
ScalarValue::List(None, Box::new(new_item_field(output_type.as_arrow_type())))
ScalarValue::List(None, Arc::new(new_item_field(output_type.as_arrow_type())))
}
ConcreteDataType::Dictionary(dict) => ScalarValue::Dictionary(
Box::new(dict.key_type().as_arrow_type()),
Expand Down Expand Up @@ -490,7 +491,7 @@ impl ListValue {

Ok(ScalarValue::List(
vs,
Box::new(new_item_field(output_type.item_type().as_arrow_type())),
Arc::new(new_item_field(output_type.item_type().as_arrow_type())),
))
}
}
Expand Down
5 changes: 2 additions & 3 deletions src/datatypes/src/vectors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -217,8 +217,7 @@ macro_rules! impl_try_from_arrow_array_for_vector {
.with_context(|| crate::error::ConversionSnafu {
from: std::format!("{:?}", array.as_ref().data_type()),
})?
.data()
.clone();
.to_data();

let concrete_array = $Array::from(data);
Ok($Vector::from(concrete_array))
Expand All @@ -229,7 +228,7 @@ macro_rules! impl_try_from_arrow_array_for_vector {

macro_rules! impl_validity_for_vector {
($array: expr) => {
Validity::from_array_data($array.data())
Validity::from_array_data($array.to_data())
};
}

Expand Down
14 changes: 4 additions & 10 deletions src/datatypes/src/vectors/binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,13 +38,7 @@ impl BinaryVector {
}

fn to_array_data(&self) -> ArrayData {
self.array.data().clone()
}

fn from_array_data(data: ArrayData) -> BinaryVector {
BinaryVector {
array: BinaryArray::from(data),
}
self.array.to_data()
}
}

Expand Down Expand Up @@ -106,8 +100,8 @@ impl Vector for BinaryVector {
}

fn slice(&self, offset: usize, length: usize) -> VectorRef {
let data = self.array.data().slice(offset, length);
Arc::new(Self::from_array_data(data))
let array = self.array.slice(offset, length);
Arc::new(Self { array })
}

fn get(&self, index: usize) -> Value {
Expand Down Expand Up @@ -286,7 +280,7 @@ mod tests {
#[test]
fn test_from_arrow_array() {
let arrow_array = BinaryArray::from_iter_values([vec![1, 2, 3], vec![1, 2, 3]]);
let original = BinaryArray::from(arrow_array.data().clone());
let original = BinaryArray::from(arrow_array.to_data());
let vector = BinaryVector::from(arrow_array);
assert_eq!(original, vector.array);
}
Expand Down
6 changes: 3 additions & 3 deletions src/datatypes/src/vectors/boolean.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ impl BooleanVector {
}

fn to_array_data(&self) -> ArrayData {
self.array.data().clone()
self.array.to_data()
}

fn from_array_data(data: ArrayData) -> BooleanVector {
Expand Down Expand Up @@ -132,7 +132,7 @@ impl Vector for BooleanVector {
}

fn slice(&self, offset: usize, length: usize) -> VectorRef {
let data = self.array.data().slice(offset, length);
let data = self.array.to_data().slice(offset, length);
Arc::new(Self::from_array_data(data))
}

Expand Down Expand Up @@ -259,7 +259,7 @@ mod tests {
assert!(!v.is_const());
assert!(v.validity().is_all_valid());
assert!(!v.only_null());
assert_eq!(64, v.memory_size());
assert_eq!(2, v.memory_size());

for (i, b) in bools.iter().enumerate() {
assert!(!v.is_null(i));
Expand Down
4 changes: 2 additions & 2 deletions src/datatypes/src/vectors/date.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ pub type DateVectorBuilder = PrimitiveVectorBuilder<DateType>;
mod tests {
use std::sync::Arc;

use arrow::array::Array;
use arrow_array::ArrayRef;
use common_time::date::Date;

use super::*;
Expand Down Expand Up @@ -84,7 +84,7 @@ mod tests {
#[test]
fn test_date_from_arrow() {
let vector = DateVector::from_slice([1, 2]);
let arrow = vector.as_arrow().slice(0, vector.len());
let arrow: ArrayRef = Arc::new(vector.as_arrow().slice(0, vector.len()));
let vector2 = DateVector::try_from_arrow_array(&arrow).unwrap();
assert_eq!(vector, vector2);
}
Expand Down
5 changes: 3 additions & 2 deletions src/datatypes/src/vectors/datetime.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ mod tests {
use std::sync::Arc;

use arrow::array::{Array, PrimitiveArray};
use arrow_array::ArrayRef;
use common_time::DateTime;
use datafusion_common::from_slice::FromSlice;

Expand Down Expand Up @@ -108,8 +109,8 @@ mod tests {
#[test]
fn test_datetime_from_arrow() {
let vector = DateTimeVector::from_wrapper_slice([DateTime::new(1), DateTime::new(2)]);
let arrow = vector.as_arrow().slice(0, vector.len());
let vector2 = DateTimeVector::try_from_arrow_array(&arrow).unwrap();
let arrow: ArrayRef = Arc::new(vector.as_arrow().slice(0, vector.len())) as _;
let vector2 = DateTimeVector::try_from_arrow_array(arrow).unwrap();
assert_eq!(vector, vector2);
}
}
4 changes: 2 additions & 2 deletions src/datatypes/src/vectors/helper.rs
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,7 @@ impl Helper {
| ArrowDataType::LargeList(_)
| ArrowDataType::FixedSizeList(_, _)
| ArrowDataType::Struct(_)
| ArrowDataType::Union(_, _, _)
| ArrowDataType::Union(_, _)
| ArrowDataType::Dictionary(_, _)
| ArrowDataType::Decimal128(_, _)
| ArrowDataType::Decimal256(_, _)
Expand Down Expand Up @@ -359,7 +359,7 @@ mod tests {
ScalarValue::Int32(Some(1)),
ScalarValue::Int32(Some(2)),
]),
Box::new(Field::new("item", ArrowDataType::Int32, true)),
Arc::new(Field::new("item", ArrowDataType::Int32, true)),
);
let vector = Helper::try_from_scalar_value(value, 3).unwrap();
assert_eq!(
Expand Down
8 changes: 4 additions & 4 deletions src/datatypes/src/vectors/list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ impl ListVector {
}

fn to_array_data(&self) -> ArrayData {
self.array.data().clone()
self.array.to_data()
}

fn from_array_data_and_type(data: ArrayData, item_type: ConcreteDataType) -> Self {
Expand Down Expand Up @@ -106,7 +106,7 @@ impl Vector for ListVector {
}

fn slice(&self, offset: usize, length: usize) -> VectorRef {
let data = self.array.data().slice(offset, length);
let data = self.array.to_data().slice(offset, length);
Arc::new(Self::from_array_data_and_type(data, self.item_type.clone()))
}

Expand Down Expand Up @@ -345,7 +345,7 @@ impl ScalarVectorBuilder for ListVectorBuilder {
let len = self.len();
let values_vector = self.values_builder.to_vector();
let values_arr = values_vector.to_arrow_array();
let values_data = values_arr.data();
let values_data = values_arr.to_data();

let offset_buffer = self.offsets_builder.finish();
let null_bit_buffer = self.null_buffer_builder.finish();
Expand All @@ -355,7 +355,7 @@ impl ScalarVectorBuilder for ListVectorBuilder {
let array_data_builder = ArrayData::builder(data_type)
.len(len)
.add_buffer(offset_buffer)
.add_child_data(values_data.clone())
.add_child_data(values_data)
.null_bit_buffer(null_bit_buffer);

let array_data = unsafe { array_data_builder.build_unchecked() };
Expand Down
Loading