diff --git a/rust/tests/data/issue_1374/_delta_log/00000000000000000000.json b/rust/tests/data/issue_1374/_delta_log/00000000000000000000.json new file mode 100644 index 0000000000..493d40ebfb --- /dev/null +++ b/rust/tests/data/issue_1374/_delta_log/00000000000000000000.json @@ -0,0 +1,3 @@ +{"protocol":{"minReaderVersion":1,"minWriterVersion":1}} +{"metaData":{"id":"d5ad9276-c21f-474e-bfa8-996099dce265","name":null,"description":null,"format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"timestamp\",\"type\":\"timestamp\",\"nullable\":true,\"metadata\":{}},{\"name\":\"temperature\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"date\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":["date"],"createdTime":1684886484991,"configuration":{}}} +{"commitInfo":{"timestamp":1684886484992,"operation":"CREATE TABLE","operationParameters":{"mode":"ErrorIfExists","metadata":"{\"configuration\":{},\"created_time\":1684886484991,\"description\":null,\"format\":{\"options\":{},\"provider\":\"parquet\"},\"id\":\"d5ad9276-c21f-474e-bfa8-996099dce265\",\"name\":null,\"partition_columns\":[\"date\"],\"schema\":{\"fields\":[{\"metadata\":{},\"name\":\"timestamp\",\"nullable\":true,\"type\":\"timestamp\"},{\"metadata\":{},\"name\":\"temperature\",\"nullable\":true,\"type\":\"integer\"},{\"metadata\":{},\"name\":\"date\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"}}","protocol":"{\"minReaderVersion\":1,\"minWriterVersion\":1}","location":"file:///Users/cole/github.com/cmackenzie1/delta-rs/rust/tests/data/issue_1374"},"clientVersion":"delta-rs.0.11.0"}} \ No newline at end of file diff --git a/rust/tests/data/issue_1374/_delta_log/00000000000000000001.checkpoint.parquet b/rust/tests/data/issue_1374/_delta_log/00000000000000000001.checkpoint.parquet new file mode 100644 index 0000000000..ea7b775bb5 Binary files /dev/null and b/rust/tests/data/issue_1374/_delta_log/00000000000000000001.checkpoint.parquet differ diff --git a/rust/tests/data/issue_1374/_delta_log/00000000000000000001.json b/rust/tests/data/issue_1374/_delta_log/00000000000000000001.json new file mode 100644 index 0000000000..74ce0bf390 --- /dev/null +++ b/rust/tests/data/issue_1374/_delta_log/00000000000000000001.json @@ -0,0 +1,3 @@ +{"add":{"path":"date=2023-05-24/part-00000-e2b01fc6-a906-4008-82df-e98efdcdd49c-c000.snappy.parquet","size":1021,"partitionValues":{"date":"2023-05-24"},"modificationTime":1684886485017,"dataChange":true,"stats":"{\"numRecords\":10,\"minValues\":{\"timestamp\":null,\"temperature\":8},\"maxValues\":{\"timestamp\":\"2023-05-24T00:01:25.014Z\",\"temperature\":90},\"nullCount\":{\"temperature\":0,\"timestamp\":0}}","tags":null}} +{"add":{"path":"date=2023-05-24/part-00000-e2b01fc6-a906-4008-82df-e98efdcdd47d-c000.snappy.parquet","size":1021,"partitionValues":{"date":"2023-05-24"},"modificationTime":1684886485017,"dataChange":true,"stats":"{\"numRecords\":10,\"minValues\":{\"timestamp\":\"2023-05-24T00:01:25.014Z\",\"temperature\":8},\"maxValues\":{\"timestamp\":\"2023-05-24T00:01:25.014Z\",\"temperature\":90},\"nullCount\":{\"temperature\":0,\"timestamp\":0}}","tags":null}} +{"commitInfo":{"timestamp":1685483647338,"clientVersion":"delta-rs.0.11.0"}} \ No newline at end of file diff --git a/rust/tests/data/issue_1374/_delta_log/_last_checkpoint b/rust/tests/data/issue_1374/_delta_log/_last_checkpoint new file mode 100644 index 0000000000..1c0d1f36c5 --- /dev/null +++ b/rust/tests/data/issue_1374/_delta_log/_last_checkpoint @@ -0,0 +1 @@ +{"parts":null,"size":20622,"version":1} \ No newline at end of file diff --git a/rust/tests/data/issue_1374/date=2023-05-24/part-00000-e2b01fc6-a906-4008-82df-e98efdcdd47d-c000.snappy.parquet b/rust/tests/data/issue_1374/date=2023-05-24/part-00000-e2b01fc6-a906-4008-82df-e98efdcdd47d-c000.snappy.parquet new file mode 100644 index 0000000000..cc6a8f345f Binary files /dev/null and b/rust/tests/data/issue_1374/date=2023-05-24/part-00000-e2b01fc6-a906-4008-82df-e98efdcdd47d-c000.snappy.parquet differ diff --git a/rust/tests/data/issue_1374/date=2023-05-24/part-00000-e2b01fc6-a906-4008-82df-e98efdcdd49c-c000.snappy.parquet b/rust/tests/data/issue_1374/date=2023-05-24/part-00000-e2b01fc6-a906-4008-82df-e98efdcdd49c-c000.snappy.parquet new file mode 100644 index 0000000000..cc6a8f345f Binary files /dev/null and b/rust/tests/data/issue_1374/date=2023-05-24/part-00000-e2b01fc6-a906-4008-82df-e98efdcdd49c-c000.snappy.parquet differ diff --git a/rust/tests/datafusion_test.rs b/rust/tests/datafusion_test.rs index 19bc0b6e90..8ce726199d 100644 --- a/rust/tests/datafusion_test.rs +++ b/rust/tests/datafusion_test.rs @@ -9,7 +9,6 @@ use arrow::datatypes::{ DataType as ArrowDataType, Field as ArrowField, Schema as ArrowSchema, TimeUnit, }; use arrow::record_batch::RecordBatch; -use common::datafusion::context_with_delta_table_factory; use datafusion::assert_batches_sorted_eq; use datafusion::datasource::TableProvider; use datafusion::execution::context::{SessionContext, SessionState, TaskContext}; @@ -25,6 +24,7 @@ use datafusion_proto::bytes::{ }; use url::Url; +use common::datafusion::context_with_delta_table_factory; use deltalake::action::SaveMode; use deltalake::delta_datafusion::{DeltaPhysicalCodec, DeltaScan}; use deltalake::operations::create::CreateBuilder; @@ -723,7 +723,7 @@ async fn test_files_scanned() -> Result<()> { // assert_eq!(metrics.num_scanned_files(), 1); // Check pruning for null partitions. Since there are no record count statistics pruning cannot be done - // let e = col("k").is_not_null(); + // let e = col("k").is_not_null();z // let metrics = get_scan_metrics(&table, &state, &[e]).await?; // assert_eq!(metrics.num_scanned_files(), 2); @@ -842,31 +842,35 @@ async fn test_issue_1292_datafusion_sql_projection() -> Result<()> { } #[tokio::test] -async fn test_issue_1291_datafusion_sql_partitioned_data() -> Result<()> { +async fn test_issue_1374() -> Result<()> { + env_logger::init(); + let ctx = SessionContext::new(); - let table = deltalake::open_table("./tests/data/http_requests") + let table = deltalake::open_table("./tests/data/issue_1374") .await .unwrap(); - ctx.register_table("http_requests", Arc::new(table))?; + ctx.register_table("t", Arc::new(table))?; let batches = ctx .sql( - "SELECT \"ClientRequestURI\", date FROM http_requests WHERE date > '2023-04-13' LIMIT 5", + r#"SELECT * + FROM t + WHERE timestamp BETWEEN '2023-05-24T00:00:00.000Z' AND '2023-05-25T00:00:00.000Z' + LIMIT 5 + "#, ) .await? .collect() .await?; let expected = vec![ - "+------------------+------------+", - "| ClientRequestURI | date |", - "+------------------+------------+", - "| / | 2023-04-14 |", - "| / | 2023-04-14 |", - "| / | 2023-04-14 |", - "| / | 2023-04-14 |", - "| / | 2023-04-14 |", - "+------------------+------------+", + "+---------------------+-------------+------------+", + "| timestamp | temperature | date |", + "+---------------------+-------------+------------+", + "| 2023-05-17T17:00:00 | 20 | 2023-05-17 |", + "| 2023-05-18T18:00:00 | 20 | 2023-05-18 |", + "| 2023-05-19T19:00:00 | 20 | 2023-05-19 |", + "+---------------------+-------------+------------+", ]; assert_batches_sorted_eq!(&expected, &batches);