From 7b9868a5a436813d5ffafdaed42c65353f2ef8de Mon Sep 17 00:00:00 2001 From: "R. Tyler Croy" Date: Wed, 14 Aug 2024 19:54:30 +0000 Subject: [PATCH] chore: fix a bunch of clippy lints and re-enable tests Many of these tests were did not get correctly re-enabled after the subcrate changes --- crates/core/src/kernel/arrow/mod.rs | 1 - .../core/src/kernel/snapshot/log_segment.rs | 6 +- crates/core/src/operations/cdc.rs | 15 +-- crates/core/src/operations/load_cdf.rs | 5 +- crates/core/src/operations/merge/mod.rs | 8 +- .../transaction/conflict_checker.rs | 2 +- crates/core/src/operations/write.rs | 15 +-- crates/core/src/schema/partitions.rs | 112 +++++++++++++++++ crates/core/tests/command_merge.rs | 2 +- crates/core/tests/integration_checkpoint.rs | 10 +- .../core/tests/read_delta_partitions_test.rs | 116 ------------------ crates/test/src/lib.rs | 4 - 12 files changed, 136 insertions(+), 160 deletions(-) diff --git a/crates/core/src/kernel/arrow/mod.rs b/crates/core/src/kernel/arrow/mod.rs index 0561f5bc98..3ddd35560c 100644 --- a/crates/core/src/kernel/arrow/mod.rs +++ b/crates/core/src/kernel/arrow/mod.rs @@ -14,7 +14,6 @@ pub(crate) mod json; const MAP_ROOT_DEFAULT: &str = "key_value"; const MAP_KEY_DEFAULT: &str = "key"; const MAP_VALUE_DEFAULT: &str = "value"; -const LIST_ROOT_DEFAULT: &str = "item"; macro_rules! arrow_map { ($fieldname: ident, null) => { diff --git a/crates/core/src/kernel/snapshot/log_segment.rs b/crates/core/src/kernel/snapshot/log_segment.rs index 69076bd066..17d29a7694 100644 --- a/crates/core/src/kernel/snapshot/log_segment.rs +++ b/crates/core/src/kernel/snapshot/log_segment.rs @@ -785,12 +785,12 @@ pub(super) mod tests { for i in 0..10 { actions.push(Action::Add(Add { path: format!("part-{}.parquet", i), - modification_time: chrono::Utc::now().timestamp_millis() as i64, + modification_time: chrono::Utc::now().timestamp_millis(), ..Default::default() })); } - let log_store = DeltaTableBuilder::from_uri("memory:///".to_string()) + let log_store = DeltaTableBuilder::from_uri("memory:///") .build_storage() .unwrap(); let op = DeltaOperation::Write { @@ -809,7 +809,7 @@ pub(super) mod tests { for i in 0..9 { actions.push(Action::Remove(Remove { path: format!("part-{}.parquet", i), - deletion_timestamp: Some(chrono::Utc::now().timestamp_millis() as i64), + deletion_timestamp: Some(chrono::Utc::now().timestamp_millis()), ..Default::default() })) } diff --git a/crates/core/src/operations/cdc.rs b/crates/core/src/operations/cdc.rs index 6baf517d15..6e4c9fa50d 100644 --- a/crates/core/src/operations/cdc.rs +++ b/crates/core/src/operations/cdc.rs @@ -112,10 +112,7 @@ mod tests { .expect("Failed to make a table"); table.load().await.expect("Failed to reload table"); let result = should_write_cdc(table.snapshot().unwrap()).expect("Failed to use table"); - assert!( - result == false, - "A default table should not create CDC files" - ); + assert!(!result, "A default table should not create CDC files"); } /// @@ -140,7 +137,7 @@ mod tests { let result = should_write_cdc(table.snapshot().unwrap()).expect("Failed to use table"); assert!( - result == true, + result, "A table with the EnableChangeDataFeed should create CDC files" ); } @@ -166,7 +163,7 @@ mod tests { let result = should_write_cdc(table.snapshot().unwrap()).expect("Failed to use table"); assert!( - result == false, + !result, "A v7 table must not write CDC files unless the writer feature is set" ); } @@ -308,8 +305,8 @@ mod tests { ], ) .unwrap(); - let _ = arrow::util::pretty::print_batches(&vec![batch.clone()]); - let _ = arrow::util::pretty::print_batches(&vec![updated_batch.clone()]); + let _ = arrow::util::pretty::print_batches(&[batch.clone()]); + let _ = arrow::util::pretty::print_batches(&[updated_batch.clone()]); let ctx = SessionContext::new(); let before = ctx.read_batch(batch).expect("Failed to make DataFrame"); @@ -398,7 +395,7 @@ mod tests { match tracker.collect() { Ok(df) => { let batches = &df.collect().await.unwrap(); - let _ = arrow::util::pretty::print_batches(&batches); + let _ = arrow::util::pretty::print_batches(batches); assert_eq!(batches.len(), 2); assert_batches_sorted_eq! {[ "+-------+--------------------------+------------------+", diff --git a/crates/core/src/operations/load_cdf.rs b/crates/core/src/operations/load_cdf.rs index 290a174cee..76c94b549b 100644 --- a/crates/core/src/operations/load_cdf.rs +++ b/crates/core/src/operations/load_cdf.rs @@ -695,10 +695,7 @@ pub(crate) mod tests { let cdc_actions = version_actions .iter() - .filter(|action| match action { - &&Action::Cdc(_) => true, - _ => false, - }) + .filter(|action| matches!(action, &&Action::Cdc(_))) .collect_vec(); assert!(cdc_actions.is_empty()); Ok(()) diff --git a/crates/core/src/operations/merge/mod.rs b/crates/core/src/operations/merge/mod.rs index c29c14bdbe..028226a82c 100644 --- a/crates/core/src/operations/merge/mod.rs +++ b/crates/core/src/operations/merge/mod.rs @@ -2800,8 +2800,8 @@ mod tests { let captured_expressions = placeholders.into_iter().map(|p| p.expr).collect_vec(); - assert!(captured_expressions.contains(&&source_id)); - assert!(captured_expressions.contains(&&source_id.is_null())); + assert!(captured_expressions.contains(&source_id)); + assert!(captured_expressions.contains(&source_id.is_null())); assert_eq!(generalized, expected_filter); } @@ -2837,13 +2837,13 @@ mod tests { assert_eq!(generalized, expected_filter); assert_eq!(placeholders.len(), 1); - let placeholder_expr = placeholders.get(0).unwrap(); + let placeholder_expr = placeholders.first().unwrap(); let expected_placeholder = col(Column::new(source.clone().into(), "id")).neg(); assert_eq!(placeholder_expr.expr, expected_placeholder); assert_eq!(placeholder_expr.alias, "id_0"); - assert_eq!(placeholder_expr.is_aggregate, false); + assert!(!placeholder_expr.is_aggregate); } #[tokio::test] diff --git a/crates/core/src/operations/transaction/conflict_checker.rs b/crates/core/src/operations/transaction/conflict_checker.rs index f9fbe84f8d..ffd8f92a77 100644 --- a/crates/core/src/operations/transaction/conflict_checker.rs +++ b/crates/core/src/operations/transaction/conflict_checker.rs @@ -700,7 +700,7 @@ mod tests { ) -> Result<(), CommitConflictError> { use crate::table::state::DeltaTableState; - let setup_actions = setup.unwrap_or_else(|| init_table_actions()); + let setup_actions = setup.unwrap_or_else(init_table_actions); let state = DeltaTableState::from_actions(setup_actions).unwrap(); let snapshot = state.snapshot(); let transaction_info = TransactionInfo::new(snapshot, reads, &actions, read_whole_table); diff --git a/crates/core/src/operations/write.rs b/crates/core/src/operations/write.rs index cf1adf6e09..1b3513c984 100644 --- a/crates/core/src/operations/write.rs +++ b/crates/core/src/operations/write.rs @@ -1973,10 +1973,7 @@ mod tests { let cdc_actions = version_actions .iter() - .filter(|action| match action { - &&Action::Cdc(_) => true, - _ => false, - }) + .filter(|action| matches!(action, &&Action::Cdc(_))) .collect_vec(); assert!(cdc_actions.is_empty()); Ok(()) @@ -2042,10 +2039,7 @@ mod tests { let cdc_actions = version_actions .iter() - .filter(|action| match action { - &&Action::Cdc(_) => true, - _ => false, - }) + .filter(|action| matches!(action, &&Action::Cdc(_))) .collect_vec(); assert!(cdc_actions.is_empty()); Ok(()) @@ -2147,10 +2141,7 @@ mod tests { let cdc_actions = version_actions .iter() - .filter(|action| match action { - &&Action::Cdc(_) => true, - _ => false, - }) + .filter(|action| matches!(action, &&Action::Cdc(_))) .collect_vec(); assert!(!cdc_actions.is_empty()); Ok(()) diff --git a/crates/core/src/schema/partitions.rs b/crates/core/src/schema/partitions.rs index d3c34a4bab..581393ec22 100644 --- a/crates/core/src/schema/partitions.rs +++ b/crates/core/src/schema/partitions.rs @@ -374,4 +374,116 @@ mod tests { assert_eq!(partition.key, "ds"); assert_eq!(partition.value, Scalar::String("2024-04-01".into())); } + + #[test] + fn test_create_delta_table_partition() { + let year = "2021".to_string(); + let path = format!("year={year}"); + assert_eq!( + DeltaTablePartition::try_from(path.as_ref()).unwrap(), + DeltaTablePartition { + key: "year".into(), + value: Scalar::String(year.into()), + } + ); + + let _wrong_path = "year=2021/month="; + assert!(matches!( + DeltaTablePartition::try_from(_wrong_path).unwrap_err(), + DeltaTableError::PartitionError { + partition: _wrong_path + }, + )) + } + + #[test] + fn test_match_partition() { + let partition_2021 = DeltaTablePartition { + key: "year".into(), + value: Scalar::String("2021".into()), + }; + let partition_2020 = DeltaTablePartition { + key: "year".into(), + value: Scalar::String("2020".into()), + }; + let partition_2019 = DeltaTablePartition { + key: "year".into(), + value: Scalar::String("2019".into()), + }; + + let partition_year_2020_filter = PartitionFilter { + key: "year".to_string(), + value: PartitionValue::Equal("2020".to_string()), + }; + let partition_month_12_filter = PartitionFilter { + key: "month".to_string(), + value: PartitionValue::Equal("12".to_string()), + }; + let string_type = DataType::Primitive(PrimitiveType::String); + + assert!(!partition_year_2020_filter.match_partition(&partition_2021, &string_type)); + assert!(partition_year_2020_filter.match_partition(&partition_2020, &string_type)); + assert!(!partition_year_2020_filter.match_partition(&partition_2019, &string_type)); + assert!(!partition_month_12_filter.match_partition(&partition_2019, &string_type)); + + /* TODO: To be re-enabled at a future date, needs some type futzing + let partition_2020_12_31_23_59_59 = DeltaTablePartition { + key: "time".into(), + value: PrimitiveType::TimestampNtz.parse_scalar("2020-12-31 23:59:59").expect("Failed to parse timestamp"), + }; + + let partition_time_2020_12_31_23_59_59_filter = PartitionFilter { + key: "time".to_string(), + value: PartitionValue::Equal("2020-12-31 23:59:59.000000".into()), + }; + + assert!(partition_time_2020_12_31_23_59_59_filter.match_partition( + &partition_2020_12_31_23_59_59, + &DataType::Primitive(PrimitiveType::TimestampNtz) + )); + assert!(!partition_time_2020_12_31_23_59_59_filter + .match_partition(&partition_2020_12_31_23_59_59, &string_type)); + */ + } + + #[test] + fn test_match_filters() { + let partitions = vec![ + DeltaTablePartition { + key: "year".into(), + value: Scalar::String("2021".into()), + }, + DeltaTablePartition { + key: "month".into(), + value: Scalar::String("12".into()), + }, + ]; + + let string_type = DataType::Primitive(PrimitiveType::String); + let partition_data_types: HashMap<&String, &DataType> = vec![ + (&partitions[0].key, &string_type), + (&partitions[1].key, &string_type), + ] + .into_iter() + .collect(); + + let valid_filters = PartitionFilter { + key: "year".to_string(), + value: PartitionValue::Equal("2021".to_string()), + }; + + let valid_filter_month = PartitionFilter { + key: "month".to_string(), + value: PartitionValue::Equal("12".to_string()), + }; + + let invalid_filter = PartitionFilter { + key: "year".to_string(), + value: PartitionValue::Equal("2020".to_string()), + }; + + assert!(valid_filters.match_partitions(&partitions, &partition_data_types),); + assert!(valid_filter_month.match_partitions(&partitions, &partition_data_types),); + assert!(!invalid_filter.match_partitions(&partitions, &partition_data_types),); + } } diff --git a/crates/core/tests/command_merge.rs b/crates/core/tests/command_merge.rs index 76b511254b..783c858750 100644 --- a/crates/core/tests/command_merge.rs +++ b/crates/core/tests/command_merge.rs @@ -142,7 +142,7 @@ async fn test_merge_concurrent_conflict() { let tmp_dir = tempfile::tempdir().unwrap(); let table_uri = tmp_dir.path().to_str().to_owned().unwrap(); - let table_ref1 = create_table(&table_uri.to_string(), Some(vec!["event_date"])).await; + let table_ref1 = create_table(table_uri, Some(vec!["event_date"])).await; let table_ref2 = open_table(table_uri).await.unwrap(); let (df1, _df2) = create_test_data(); diff --git a/crates/core/tests/integration_checkpoint.rs b/crates/core/tests/integration_checkpoint.rs index ce4525ba83..e90d4ec0cc 100644 --- a/crates/core/tests/integration_checkpoint.rs +++ b/crates/core/tests/integration_checkpoint.rs @@ -1,5 +1,3 @@ -#![cfg(feature = "integration_test")] - use chrono::Utc; use deltalake_core::checkpoints::{cleanup_expired_logs_for, create_checkpoint}; use deltalake_core::kernel::{DataType, PrimitiveType}; @@ -14,6 +12,8 @@ use tokio::time::sleep; #[tokio::test] #[serial] +// This test requires refactoring and a revisit +#[ignore] async fn cleanup_metadata_fs_test() -> TestResult { let storage = Box::new(LocalStorageIntegration::default()); let context = IntegrationContext::new(storage)?; @@ -34,19 +34,19 @@ async fn cleanup_metadata_test(context: &IntegrationContext) -> TestResult { // we don't need to actually populate files with content as cleanup works only with file's metadata object_store - .put(&log_path(0), bytes::Bytes::from("foo")) + .put(&log_path(0), bytes::Bytes::from("foo").into()) .await?; // since we cannot alter s3 object metadata, we mimic it with pauses // also we forced to use 2 seconds since Last-Modified is stored in seconds std::thread::sleep(Duration::from_secs(2)); object_store - .put(&log_path(1), bytes::Bytes::from("foo")) + .put(&log_path(1), bytes::Bytes::from("foo").into()) .await?; std::thread::sleep(Duration::from_secs(3)); object_store - .put(&log_path(2), bytes::Bytes::from("foo")) + .put(&log_path(2), bytes::Bytes::from("foo").into()) .await?; let v0time = object_store.head(&log_path(0)).await?.last_modified; diff --git a/crates/core/tests/read_delta_partitions_test.rs b/crates/core/tests/read_delta_partitions_test.rs index 712664ebfe..65f98bb91f 100644 --- a/crates/core/tests/read_delta_partitions_test.rs +++ b/crates/core/tests/read_delta_partitions_test.rs @@ -1,122 +1,6 @@ -#![cfg(feature = "deltalake")] -use std::collections::HashMap; -use std::convert::TryFrom; - -use deltalake_core::kernel::{DataType, PrimitiveType}; - #[allow(dead_code)] mod fs_common; -#[test] -fn test_create_delta_table_partition() { - let year = "2021".to_string(); - let path = format!("year={year}"); - assert_eq!( - deltalake_core::DeltaTablePartition::try_from(path.as_ref()).unwrap(), - deltalake_core::DeltaTablePartition { - key: "year".to_string(), - value: year - } - ); - - let _wrong_path = "year=2021/month="; - assert!(matches!( - deltalake_core::DeltaTablePartition::try_from(_wrong_path).unwrap_err(), - deltalake_core::errors::DeltaTableError::PartitionError { - partition: _wrong_path - }, - )) -} - -#[test] -fn test_match_partition() { - let partition_2021 = deltalake_core::DeltaTablePartition { - key: "year".to_string(), - value: "2021".to_string(), - }; - let partition_2020 = deltalake_core::DeltaTablePartition { - key: "year".to_string(), - value: "2020".to_string(), - }; - let partition_2019 = deltalake_core::DeltaTablePartition { - key: "year".to_string(), - value: "2019".to_string(), - }; - - let partition_year_2020_filter = deltalake_core::PartitionFilter { - key: "year".to_string(), - value: deltalake_core::PartitionValue::Equal("2020".to_string()), - }; - let partition_month_12_filter = deltalake_core::PartitionFilter { - key: "month".to_string(), - value: deltalake_core::PartitionValue::Equal("12".to_string()), - }; - let string_type = DataType::Primitive(PrimitiveType::String); - - assert!(!partition_year_2020_filter.match_partition(&partition_2021, &string_type)); - assert!(partition_year_2020_filter.match_partition(&partition_2020, &string_type)); - assert!(!partition_year_2020_filter.match_partition(&partition_2019, &string_type)); - assert!(!partition_month_12_filter.match_partition(&partition_2019, &string_type)); - - let partition_2020_12_31_23_59_59 = deltalake_core::DeltaTablePartition { - key: "time".to_string(), - value: "2020-12-31 23:59:59".to_string(), - }; - - let partition_time_2020_12_31_23_59_59_filter = deltalake_core::PartitionFilter { - key: "time".to_string(), - value: deltalake_core::PartitionValue::Equal("2020-12-31 23:59:59.000000".to_string()), - }; - - assert!(partition_time_2020_12_31_23_59_59_filter.match_partition( - &partition_2020_12_31_23_59_59, - &DataType::Primitive(PrimitiveType::Timestamp) - )); - assert!(!partition_time_2020_12_31_23_59_59_filter - .match_partition(&partition_2020_12_31_23_59_59, &string_type)); -} - -#[test] -fn test_match_filters() { - let partitions = vec![ - deltalake_core::DeltaTablePartition { - key: "year".to_string(), - value: "2021".to_string(), - }, - deltalake_core::DeltaTablePartition { - key: "month".to_string(), - value: "12".to_string(), - }, - ]; - - let string_type = DataType::Primitive(PrimitiveType::String); - let partition_data_types: HashMap<&String, &DataType> = vec![ - (&partitions[0].key, &string_type), - (&partitions[1].key, &string_type), - ] - .into_iter() - .collect(); - - let valid_filters = deltalake_core::PartitionFilter { - key: "year".to_string(), - value: deltalake_core::PartitionValue::Equal("2021".to_string()), - }; - - let valid_filter_month = deltalake_core::PartitionFilter { - key: "month".to_string(), - value: deltalake_core::PartitionValue::Equal("12".to_string()), - }; - - let invalid_filter = deltalake_core::PartitionFilter { - key: "year".to_string(), - value: deltalake_core::PartitionValue::Equal("2020".to_string()), - }; - - assert!(valid_filters.match_partitions(&partitions, &partition_data_types),); - assert!(valid_filter_month.match_partitions(&partitions, &partition_data_types),); - assert!(!invalid_filter.match_partitions(&partitions, &partition_data_types),); -} - #[tokio::test] async fn read_null_partitions_from_checkpoint() { use deltalake_core::kernel::Add; diff --git a/crates/test/src/lib.rs b/crates/test/src/lib.rs index 56781aaa3c..dd8c2a2951 100644 --- a/crates/test/src/lib.rs +++ b/crates/test/src/lib.rs @@ -46,10 +46,6 @@ impl TestContext { let backend_ref = backend.as_ref().map(|s| s.as_str()); match backend_ref { Ok("LOCALFS") | Err(std::env::VarError::NotPresent) => setup_local_context().await, - #[cfg(feature = "azure")] - Ok("AZURE_GEN2") => adls::setup_azure_gen2_context().await, - #[cfg(feature = "hdfs")] - Ok("HDFS") => hdfs::setup_hdfs_context(), _ => panic!("Invalid backend for delta-rs tests"), } }