Skip to content

Commit

Permalink
Add tests that verify crc checks
Browse files Browse the repository at this point in the history
  • Loading branch information
xmakro committed Sep 1, 2024
1 parent 48b9241 commit da633e4
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 0 deletions.
55 changes: 55 additions & 0 deletions parquet/tests/arrow_reader/checksum.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
use std::path::PathBuf;

use arrow::util::test_util::parquet_test_data;
use parquet::arrow::arrow_reader::ArrowReaderBuilder;

#[test]
fn test_datapage_v1_corrupt_checksum() {
let errors = read_file_batch_errors("datapage_v1-corrupt-checksum.parquet");
assert_eq!(errors, [
Err("Parquet argument error: Parquet error: Page CRC checksum mismatch".to_string()),
Ok(()),
Ok(()),
Err("Parquet argument error: Parquet error: Page CRC checksum mismatch".to_string()),
Err("Parquet argument error: Parquet error: Not all children array length are the same!".to_string())
]);
}

#[test]
fn test_datapage_v1_uncompressed_checksum() {
let errors = read_file_batch_errors("datapage_v1-uncompressed-checksum.parquet");
assert_eq!(errors, [Ok(()), Ok(()), Ok(()), Ok(()), Ok(())]);
}

#[test]
fn test_datapage_v1_snappy_compressed_checksum() {
let errors = read_file_batch_errors("datapage_v1-snappy-compressed-checksum.parquet");
assert_eq!(errors, [Ok(()), Ok(()), Ok(()), Ok(()), Ok(())]);
}


#[test]
fn test_plain_dict_uncompressed_checksum() {
let errors = read_file_batch_errors("plain-dict-uncompressed-checksum.parquet");
assert_eq!(errors, [Ok(())]);
}
#[test]
fn test_rle_dict_snappy_checksum() {
let errors = read_file_batch_errors("rle-dict-snappy-checksum.parquet");
assert_eq!(errors, [Ok(())]);
}

/// Reads a file and returns a vector with one element per record batch.
/// The record batch data is replaced with () and errors are stringified.
fn read_file_batch_errors(name: &str) -> Vec<Result<(), String>> {
let path = PathBuf::from(parquet_test_data()).join(name);
println!("Reading file: {:?}", path);
let file = std::fs::File::open(&path).unwrap();
let reader = ArrowReaderBuilder::try_new(file).unwrap().build().unwrap();
reader
.map(|x| match x {
Ok(_) => Ok(()),
Err(e) => Err(e.to_string()),
})
.collect()
}
2 changes: 2 additions & 0 deletions parquet/tests/arrow_reader/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ use std::sync::Arc;
use tempfile::NamedTempFile;

mod bad_data;
#[cfg(feature = "crc")]
mod checksum;
mod statistics;

// returns a struct array with columns "int32_col", "float32_col" and "float64_col" with the specified values
Expand Down

0 comments on commit da633e4

Please sign in to comment.