Skip to content

Commit

Permalink
test case for delta-io#1374
Browse files Browse the repository at this point in the history
  • Loading branch information
cmackenzie1 committed May 18, 2023
1 parent dde2f7e commit 5b42954
Show file tree
Hide file tree
Showing 10 changed files with 97 additions and 19 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{"protocol":{"minReaderVersion":1,"minWriterVersion":1}}
{"metaData":{"id":"3431dac3-0ec4-4c02-9bd7-cbf3f1b98523","name":null,"description":null,"format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"timestamp\",\"type\":\"timestamp\",\"nullable\":true,\"metadata\":{}},{\"name\":\"temperature\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"date\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":["date"],"createdTime":1684452961889,"configuration":{}}}
{"commitInfo":{"timestamp":1684452961890,"operation":"CREATE TABLE","operationParameters":{"mode":"ErrorIfExists","location":"file:///Users/cole/github.com/cmackenzie1/delta-rs/rust/tests/data/issue_1374","metadata":"{\"configuration\":{},\"created_time\":1684452961889,\"description\":null,\"format\":{\"options\":{},\"provider\":\"parquet\"},\"id\":\"3431dac3-0ec4-4c02-9bd7-cbf3f1b98523\",\"name\":null,\"partition_columns\":[\"date\"],\"schema\":{\"fields\":[{\"metadata\":{},\"name\":\"timestamp\",\"nullable\":true,\"type\":\"timestamp\"},{\"metadata\":{},\"name\":\"temperature\",\"nullable\":true,\"type\":\"integer\"},{\"metadata\":{},\"name\":\"date\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"}}","protocol":"{\"minReaderVersion\":1,\"minWriterVersion\":1}"},"clientVersion":"delta-rs.0.11.0"}}
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{"add":{"path":"date=2023-05-17/part-00000-9579982b-cbd4-4212-ac17-8c9dbe362c04-c000.snappy.parquet","size":925,"partitionValues":{"date":"2023-05-17"},"modificationTime":1684452961913,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"temperature\":20,\"timestamp\":\"2023-05-17T17:00:00.000Z\"},\"maxValues\":{\"temperature\":20,\"timestamp\":\"2023-05-17T17:00:00.000Z\"},\"nullCount\":{\"temperature\":0,\"timestamp\":0}}","tags":null}}
{"add":{"path":"date=2023-05-18/part-00000-9ab6bf58-cac5-42b2-a97e-9961470a3bb8-c000.snappy.parquet","size":925,"partitionValues":{"date":"2023-05-18"},"modificationTime":1684452961914,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"temperature\":20,\"timestamp\":\"2023-05-18T18:00:00.000Z\"},\"maxValues\":{\"temperature\":20,\"timestamp\":\"2023-05-18T18:00:00.000Z\"},\"nullCount\":{\"timestamp\":0,\"temperature\":0}}","tags":null}}
{"commitInfo":{"timestamp":1684452961914,"clientVersion":"delta-rs.0.11.0"}}
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{"add":{"path":"date=2023-05-20/part-00000-5f3e10b7-416f-44d7-a2b0-1e08e64476fe-c000.snappy.parquet","size":925,"partitionValues":{"date":"2023-05-20"},"modificationTime":1684452961921,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"temperature\":20,\"timestamp\":\"2023-05-20T20:00:00.000Z\"},\"maxValues\":{\"timestamp\":\"2023-05-20T20:00:00.000Z\",\"temperature\":20},\"nullCount\":{\"timestamp\":0,\"temperature\":0}}","tags":null}}
{"add":{"path":"date=2023-05-19/part-00000-1e58be07-a5c9-4deb-86c6-d713d6721dd6-c000.snappy.parquet","size":925,"partitionValues":{"date":"2023-05-19"},"modificationTime":1684452961922,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"temperature\":20,\"timestamp\":\"2023-05-19T19:00:00.000Z\"},\"maxValues\":{\"timestamp\":\"2023-05-19T19:00:00.000Z\",\"temperature\":20},\"nullCount\":{\"temperature\":0,\"timestamp\":0}}","tags":null}}
{"commitInfo":{"timestamp":1684452961922,"clientVersion":"delta-rs.0.11.0"}}
1 change: 1 addition & 0 deletions rust/tests/data/issue_1374/_delta_log/_last_checkpoint
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"parts":null,"size":20791,"version":1}
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
106 changes: 87 additions & 19 deletions rust/tests/datafusion_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,25 @@ use arrow::datatypes::{
DataType as ArrowDataType, Field as ArrowField, Schema as ArrowSchema, TimeUnit,
};
use arrow::record_batch::RecordBatch;

use common::datafusion::context_with_delta_table_factory;
use datafusion::assert_batches_sorted_eq;
use datafusion::datasource::TableProvider;
use datafusion::datasource::{TableProvider};
use datafusion::execution::context::{SessionContext, SessionState, TaskContext};
use datafusion::physical_plan::coalesce_partitions::CoalescePartitionsExec;
use datafusion::physical_plan::{common::collect, file_format::ParquetExec, metrics::Label};
use datafusion::physical_plan::{visit_execution_plan, ExecutionPlan, ExecutionPlanVisitor};
use datafusion_common::scalar::ScalarValue;
use datafusion_common::ScalarValue::*;
use datafusion_common::{DataFusionError, Result};
use datafusion_expr::Expr;
use datafusion_expr::{Expr};
use datafusion_proto::bytes::{
physical_plan_from_bytes_with_extension_codec, physical_plan_to_bytes_with_extension_codec,
};
use deltalake::checkpoints::create_checkpoint;
use deltalake::writer::{DeltaWriter, JsonWriter};
use deltalake::{SchemaDataType, SchemaField};
use serde_json::json;
use url::Url;

use deltalake::action::SaveMode;
Expand Down Expand Up @@ -842,31 +847,94 @@ async fn test_issue_1292_datafusion_sql_projection() -> Result<()> {
}

#[tokio::test]
async fn test_issue_1291_datafusion_sql_partitioned_data() -> Result<()> {
async fn test_issue_1374() -> Result<()> {
let _ = std::fs::remove_dir_all("./tests/data/issue_1374"); // cleanup previous runs

let mut table = DeltaOps::try_from_uri("./tests/data/issue_1374")
.await?
.create()
.with_columns(vec![
SchemaField::new(
"timestamp".to_string(),
SchemaDataType::primitive("timestamp".to_string()),
true,
HashMap::new(),
),
SchemaField::new(
"temperature".to_string(),
SchemaDataType::primitive("integer".to_string()),
true,
HashMap::new(),
),
SchemaField::new(
"date".to_string(),
SchemaDataType::primitive("string".to_string()),
true,
HashMap::new(),
),
])
.with_partition_columns(vec!["date"])
.await?;

let mut writer = JsonWriter::for_table(&table)?;
writer
.write(vec![
json!({
"date": "2023-05-17",
"timestamp": "2023-05-17T17:00:00Z",
"temperature": 20
}),
json!({
"date": "2023-05-18",
"timestamp": "2023-05-18T18:00:00Z",
"temperature": 20
}),
])
.await?;
writer.flush_and_commit(&mut table).await?;
create_checkpoint(&table).await.unwrap();

// Add some more data past the checkpoint
// For some reason, commenting this write and flush out
// allows the query to succeed
writer
.write(vec![
json!({
"date": "2023-05-19",
"timestamp": "2023-05-19T19:00:00Z",
"temperature": 20
}),
json!({
"date": "2023-05-20",
"timestamp": "2023-05-20T20:00:00Z",
"temperature": 20
}),
])
.await?;
writer.flush_and_commit(&mut table).await?;

let ctx = SessionContext::new();
let table = deltalake::open_table("./tests/data/http_requests")
let table = deltalake::open_table("./tests/data/issue_1374")
.await
.unwrap();
ctx.register_table("http_requests", Arc::new(table))?;
ctx.register_table("t", Arc::new(table))?;

let batches = ctx
.sql(
"SELECT \"ClientRequestURI\", date FROM http_requests WHERE date > '2023-04-13' LIMIT 5",
)
.await?
.sql(r#"SELECT timestamp, temperature, date FROM t WHERE timestamp < '2023-05-20T00:00:00Z'"#)
.await
.unwrap()
.collect()
.await?;
.await
.unwrap();

let expected = vec![
"+------------------+------------+",
"| ClientRequestURI | date |",
"+------------------+------------+",
"| / | 2023-04-14 |",
"| / | 2023-04-14 |",
"| / | 2023-04-14 |",
"| / | 2023-04-14 |",
"| / | 2023-04-14 |",
"+------------------+------------+",
"+---------------------+-------------+------------+",
"| timestamp | temperature | date |",
"+---------------------+-------------+------------+",
"| 2023-05-17T17:00:00 | 20 | 2023-05-17 |",
"| 2023-05-18T18:00:00 | 20 | 2023-05-18 |",
"| 2023-05-19T19:00:00 | 20 | 2023-05-19 |",
"+---------------------+-------------+------------+",
];

assert_batches_sorted_eq!(&expected, &batches);
Expand Down

0 comments on commit 5b42954

Please sign in to comment.