From 1a2c9251b6adef1526dd52e57b71db74a290112a Mon Sep 17 00:00:00 2001
From: Scott Donnelly <scott@donnel.ly>
Date: Mon, 29 Apr 2024 19:38:10 +0100
Subject: [PATCH] test: add more tests for InclusiveMetricsEvaluator

---
 .../visitors/inclusive_metrics_evaluator.rs   | 1621 ++++++++++++++++-
 1 file changed, 1569 insertions(+), 52 deletions(-)
diff --git a/crates/iceberg/src/expr/visitors/inclusive_metrics_evaluator.rs b/crates/iceberg/src/expr/visitors/inclusive_metrics_evaluator.rs
index e72c3fe88..5605c02ba 100644
--- a/crates/iceberg/src/expr/visitors/inclusive_metrics_evaluator.rs
+++ b/crates/iceberg/src/expr/visitors/inclusive_metrics_evaluator.rs
@@ -47,6 +47,13 @@ impl<'a> InclusiveMetricsEvaluator<'a> {
             return ROWS_CANNOT_MATCH;
         }
 
+        // Older version don't correctly implement record count from avro file and thus
+        // set record count -1 when importing avro tables to iceberg tables. This should
+        // be updated once we implemented and set correct record count.
+        if data_file.record_count < 0 {
+            return ROWS_MIGHT_MATCH;
+        }
+
         let mut evaluator = Self::new(data_file);
         visit(&mut evaluator, filter)
     }
@@ -75,14 +82,14 @@ impl<'a> InclusiveMetricsEvaluator<'a> {
         let nan_count = self.nan_count(field_id);
         let value_count = self.value_count(field_id);
 
-        nan_count == value_count
+        nan_count.is_some() && nan_count == value_count
     }
 
     fn contains_nulls_only(&self, field_id: i32) -> bool {
         let null_count = self.null_count(field_id);
         let value_count = self.value_count(field_id);
 
-        null_count == value_count
+        null_count.is_some() && null_count == value_count
     }
 
     fn may_contain_null(&self, field_id: i32) -> bool {
@@ -126,7 +133,7 @@ impl<'a> InclusiveMetricsEvaluator<'a> {
                 ));
             };
 
-            if cmp_fn(datum.literal(), bound) {
+            if cmp_fn(bound, datum.literal()) {
                 return ROWS_MIGHT_MATCH;
             }
 
@@ -137,8 +144,6 @@ impl<'a> InclusiveMetricsEvaluator<'a> {
     }
 }
 
-// Remove this annotation once all todos have been removed
-#[allow(unused_variables)]
 impl BoundPredicateVisitor for InclusiveMetricsEvaluator<'_> {
     type T = bool;
 
@@ -303,8 +308,8 @@ impl BoundPredicateVisitor for InclusiveMetricsEvaluator<'_> {
 
     fn not_eq(
         &mut self,
-        reference: &BoundReference,
-        datum: &Datum,
+        _reference: &BoundReference,
+        _datum: &Datum,
         _predicate: &BoundPredicate,
     ) -> crate::Result<bool> {
         // Because the bounds are not necessarily a min or max value,
@@ -325,7 +330,7 @@ impl BoundPredicateVisitor for InclusiveMetricsEvaluator<'_> {
             return ROWS_CANNOT_MATCH;
         }
 
-        let PrimitiveLiteral::String(prefix) = datum.literal() else {
+        let PrimitiveLiteral::String(datum) = datum.literal() else {
             return Err(Error::new(
                 ErrorKind::Unexpected,
                 "Cannot use StartsWith operator on non-string values",
@@ -340,11 +345,12 @@ impl BoundPredicateVisitor for InclusiveMetricsEvaluator<'_> {
                 ));
             };
 
-            let length = lower_bound.len().min(prefix.len());
+            let prefix_length = lower_bound.chars().count().min(datum.chars().count());
 
             // truncate lower bound so that its length
             // is not greater than the length of prefix
-            if prefix[..length] < lower_bound[..length] {
+            let truncated_lower_bound = lower_bound.chars().take(prefix_length).collect::<String>();
+            if datum < &truncated_lower_bound {
                 return ROWS_CANNOT_MATCH;
             }
         }
@@ -357,11 +363,12 @@ impl BoundPredicateVisitor for InclusiveMetricsEvaluator<'_> {
                 ));
             };
 
-            let length = upper_bound.len().min(prefix.len());
+            let prefix_length = upper_bound.chars().count().min(datum.chars().count());
 
             // truncate upper bound so that its length
             // is not greater than the length of prefix
-            if prefix[..length] > upper_bound[..length] {
+            let truncated_upper_bound = upper_bound.chars().take(prefix_length).collect::<String>();
+            if datum > &truncated_upper_bound {
                 return ROWS_CANNOT_MATCH;
             }
         }
@@ -407,7 +414,9 @@ impl BoundPredicateVisitor for InclusiveMetricsEvaluator<'_> {
             return ROWS_MIGHT_MATCH;
         }
 
-        if lower_bound_str[..prefix.len()] == *prefix {
+        let prefix_len = prefix.chars().count();
+
+        if lower_bound_str.chars().take(prefix_len).collect::<String>() == *prefix {
             // lower bound matches the prefix
 
             let Some(upper_bound) = self.upper_bound(field_id) else {
@@ -422,11 +431,11 @@ impl BoundPredicateVisitor for InclusiveMetricsEvaluator<'_> {
             };
 
             // if upper is shorter than the prefix then upper can't start with the prefix
-            if upper_bound.len() < prefix.len() {
+            if upper_bound.chars().count() < prefix_len {
                 return ROWS_MIGHT_MATCH;
             }
 
-            if upper_bound[..prefix.len()] == *prefix {
+            if upper_bound.chars().take(prefix_len).collect::<String>() == *prefix {
                 // both bounds match the prefix, so all rows must match the
                 // prefix and therefore do not satisfy the predicate
                 return ROWS_CANNOT_MATCH;
@@ -494,8 +503,8 @@ impl BoundPredicateVisitor for InclusiveMetricsEvaluator<'_> {
 
     fn not_in(
         &mut self,
-        reference: &BoundReference,
-        literals: &FnvHashSet<Datum>,
+        _reference: &BoundReference,
+        _literals: &FnvHashSet<Datum>,
         _predicate: &BoundPredicate,
     ) -> crate::Result<bool> {
         // Because the bounds are not necessarily a min or max value,
@@ -508,14 +517,26 @@ impl BoundPredicateVisitor for InclusiveMetricsEvaluator<'_> {
 #[cfg(test)]
 mod test {
     use crate::expr::visitors::inclusive_metrics_evaluator::InclusiveMetricsEvaluator;
-    use crate::expr::PredicateOperator::NotNull;
-    use crate::expr::{Bind, BoundPredicate, Predicate, Reference, UnaryExpression};
+    use crate::expr::PredicateOperator::{
+        Eq, GreaterThan, GreaterThanOrEq, In, IsNan, IsNull, LessThan, LessThanOrEq, NotEq, NotIn,
+        NotNan, NotNull, NotStartsWith, StartsWith,
+    };
+    use crate::expr::{
+        BinaryExpression, Bind, BoundPredicate, Predicate, Reference, SetExpression,
+        UnaryExpression,
+    };
     use crate::spec::{
-        DataContentType, DataFile, DataFileFormat, FieldSummary, NestedField, PartitionField,
+        DataContentType, DataFile, DataFileFormat, Datum, Literal, NestedField, PartitionField,
         PartitionSpec, PrimitiveType, Schema, Struct, Transform, Type,
     };
+    use fnv::FnvHashSet;
+    use std::collections::HashMap;
+    use std::ops::Not;
     use std::sync::Arc;
 
+    const INT_MIN_VALUE: i32 = 30;
+    const INT_MAX_VALUE: i32 = 79;
+
     #[test]
     fn test_data_file_no_partitions() {
         let (table_schema_ref, _partition_spec_ref) = create_test_schema_and_partition_spec();
@@ -526,8 +547,7 @@ mod test {
 
         let case_sensitive = false;
 
-        let manifest_file_partitions = vec![];
-        let data_file = create_test_data_file(manifest_file_partitions);
+        let data_file = create_test_data_file();
 
         let result =
             InclusiveMetricsEvaluator::eval(&partition_filter, &data_file, case_sensitive).unwrap();
@@ -538,76 +558,1185 @@ mod test {
     #[test]
     fn test_all_nulls() {
         let result =
-            InclusiveMetricsEvaluator::eval(&not_null("all_nulls"), get_test_file(), true).unwrap();
-
+            InclusiveMetricsEvaluator::eval(&not_null("all_nulls"), &get_test_file_1(), true)
+                .unwrap();
         assert!(!result, "Should skip: no non-null value in all null column");
+
+        let result =
+            InclusiveMetricsEvaluator::eval(&less_than("all_nulls", "a"), &get_test_file_1(), true)
+                .unwrap();
+        assert!(!result, "Should skip: LessThan on an all null column");
+
+        let result = InclusiveMetricsEvaluator::eval(
+            &less_than_or_equal("all_nulls", "a"),
+            &get_test_file_1(),
+            true,
+        )
+        .unwrap();
+        assert!(
+            !result,
+            "Should skip: LessThanOrEqual on an all null column"
+        );
+
+        let result = InclusiveMetricsEvaluator::eval(
+            &greater_than("all_nulls", "a"),
+            &get_test_file_1(),
+            true,
+        )
+        .unwrap();
+        assert!(!result, "Should skip: GreaterThan on an all null column");
+
+        let result = InclusiveMetricsEvaluator::eval(
+            &greater_than_or_equal("all_nulls", "a"),
+            &get_test_file_1(),
+            true,
+        )
+        .unwrap();
+        assert!(
+            !result,
+            "Should skip: GreaterThanOrEqual on an all null column"
+        );
+
+        let result =
+            InclusiveMetricsEvaluator::eval(&equal("all_nulls", "a"), &get_test_file_1(), true)
+                .unwrap();
+        assert!(!result, "Should skip: Equal on an all null column");
+
+        let result = InclusiveMetricsEvaluator::eval(
+            &starts_with("all_nulls", "a"),
+            &get_test_file_1(),
+            true,
+        )
+        .unwrap();
+        assert!(!result, "Should skip: StartsWith on an all null column");
+
+        let result = InclusiveMetricsEvaluator::eval(
+            &not_starts_with("all_nulls", "a"),
+            &get_test_file_1(),
+            true,
+        )
+        .unwrap();
+        assert!(result, "Should read: NotStartsWith on an all null column");
+
+        let result =
+            InclusiveMetricsEvaluator::eval(&not_null("some_nulls"), &get_test_file_1(), true)
+                .unwrap();
+        assert!(
+            result,
+            "Should read: col with some nulls could contain a non-null value"
+        );
+
+        let result =
+            InclusiveMetricsEvaluator::eval(&not_null("no_nulls"), &get_test_file_1(), true)
+                .unwrap();
+        assert!(
+            result,
+            "Should read: col with all nulls contains a non-null value"
+        );
     }
 
     #[test]
-    fn test_no_nulls() {}
+    fn test_no_nulls() {
+        let result =
+            InclusiveMetricsEvaluator::eval(&is_null("all_nulls"), &get_test_file_1(), true)
+                .unwrap();
+        assert!(
+            result,
+            "Should read: col with all nulls contains a non-null value"
+        );
 
-    #[test]
-    fn test_is_nan() {}
+        let result =
+            InclusiveMetricsEvaluator::eval(&is_null("some_nulls"), &get_test_file_1(), true)
+                .unwrap();
+        assert!(
+            result,
+            "Should read: col with some nulls could contain a non-null value"
+        );
+
+        let result =
+            InclusiveMetricsEvaluator::eval(&is_null("no_nulls"), &get_test_file_1(), true)
+                .unwrap();
+        assert!(
+            !result,
+            "Should skip: col with no nulls can't contains a non-null value"
+        );
+    }
 
     #[test]
-    fn test_not_nan() {}
+    fn test_is_nan() {
+        let result =
+            InclusiveMetricsEvaluator::eval(&is_nan("all_nans"), &get_test_file_1(), true).unwrap();
+        assert!(
+            result,
+            "Should read: col with all nans must contains a nan value"
+        );
+
+        let result =
+            InclusiveMetricsEvaluator::eval(&is_nan("some_nans"), &get_test_file_1(), true)
+                .unwrap();
+        assert!(
+            result,
+            "Should read: col with some nans could contains a nan value"
+        );
+
+        let result =
+            InclusiveMetricsEvaluator::eval(&is_nan("no_nans"), &get_test_file_1(), true).unwrap();
+        assert!(
+            !result,
+            "Should skip: col with no nans can't contains a nan value"
+        );
+
+        let result =
+            InclusiveMetricsEvaluator::eval(&is_nan("all_nulls_double"), &get_test_file_1(), true)
+                .unwrap();
+        assert!(
+            !result,
+            "Should skip: col with no nans can't contains a nan value"
+        );
+
+        let result =
+            InclusiveMetricsEvaluator::eval(&is_nan("no_nan_stats"), &get_test_file_1(), true)
+                .unwrap();
+        assert!(
+            result,
+            "Should read: no guarantee col is nan-free without nan stats"
+        );
+
+        let result =
+            InclusiveMetricsEvaluator::eval(&is_nan("all_nans_v1_stats"), &get_test_file_1(), true)
+                .unwrap();
+        assert!(
+            result,
+            "Should read: col with all nans must contains a nan value"
+        );
+
+        let result =
+            InclusiveMetricsEvaluator::eval(&is_nan("nan_and_null_only"), &get_test_file_1(), true)
+                .unwrap();
+        assert!(
+            result,
+            "Should read: col with nans and nulls must contain a nan value"
+        );
+    }
 
     #[test]
-    fn test_required_column() {}
+    fn test_not_nan() {
+        let result =
+            InclusiveMetricsEvaluator::eval(&not_nan("all_nans"), &get_test_file_1(), true)
+                .unwrap();
+        assert!(
+            !result,
+            "Should read: col with all nans must contains a nan value"
+        );
+
+        let result =
+            InclusiveMetricsEvaluator::eval(&not_nan("some_nans"), &get_test_file_1(), true)
+                .unwrap();
+        assert!(
+            result,
+            "Should read: col with some nans could contains a nan value"
+        );
+
+        let result =
+            InclusiveMetricsEvaluator::eval(&not_nan("no_nans"), &get_test_file_1(), true).unwrap();
+        assert!(
+            result,
+            "Should read: col with no nans might contains a non-nan value"
+        );
+
+        let result =
+            InclusiveMetricsEvaluator::eval(&not_nan("all_nulls_double"), &get_test_file_1(), true)
+                .unwrap();
+        assert!(
+            result,
+            "Should read: col with no nans can't contains a nan value"
+        );
+
+        let result =
+            InclusiveMetricsEvaluator::eval(&not_nan("no_nan_stats"), &get_test_file_1(), true)
+                .unwrap();
+        assert!(
+            result,
+            "Should read: no guarantee col is nan-free without nan stats"
+        );
+
+        let result = InclusiveMetricsEvaluator::eval(
+            &not_nan("all_nans_v1_stats"),
+            &get_test_file_1(),
+            true,
+        )
+        .unwrap();
+        assert!(
+            result,
+            "Should read: col with all nans must contains a nan value"
+        );
+
+        let result = InclusiveMetricsEvaluator::eval(
+            &not_nan("nan_and_null_only"),
+            &get_test_file_1(),
+            true,
+        )
+        .unwrap();
+        assert!(
+            result,
+            "Should read: col with nans and nulls may contain a non-nan value"
+        );
+    }
 
     #[test]
-    fn test_missing_column() {}
+    fn test_required_column() {
+        let result =
+            InclusiveMetricsEvaluator::eval(&not_null("required"), &get_test_file_1(), true)
+                .unwrap();
+        assert!(result, "Should read: required columns are always non-null");
+
+        let result =
+            InclusiveMetricsEvaluator::eval(&is_null("required"), &get_test_file_1(), true)
+                .unwrap();
+        assert!(!result, "Should skip: required columns are always non-null");
+    }
 
     #[test]
-    fn test_missing_stats() {}
+    #[should_panic]
+    fn test_missing_column() {
+        let _result =
+            InclusiveMetricsEvaluator::eval(&less_than("missing", "a"), &get_test_file_1(), true);
+    }
 
     #[test]
-    fn test_zero_record_file() {}
+    fn test_missing_stats() {
+        let missing_stats_datafile = create_test_data_file();
+
+        let expressions = [
+            less_than_int("no_stats", 5),
+            less_than_or_equal_int("no_stats", 30),
+            equal_int("no_stats", 70),
+            greater_than_int("no_stats", 78),
+            greater_than_or_equal_int("no_stats", 90),
+            not_equal_int("no_stats", 101),
+            is_null("no_stats"),
+            not_null("no_stats"),
+            // is_nan("no_stats"),
+            // not_nan("no_stats"),
+        ];
+
+        for expression in expressions {
+            let result =
+                InclusiveMetricsEvaluator::eval(&expression, &missing_stats_datafile, true)
+                    .unwrap();
+
+            assert!(
+                result,
+                "Should read if stats are missing for {:?}",
+                &expression
+            );
+        }
+    }
 
     #[test]
-    fn test_and() {}
+    fn test_zero_record_file() {
+        let zero_records_datafile = create_zero_records_data_file();
+
+        let expressions = [
+            less_than_int("no_stats", 5),
+            less_than_or_equal_int("no_stats", 30),
+            equal_int("no_stats", 70),
+            greater_than_int("no_stats", 78),
+            greater_than_or_equal_int("no_stats", 90),
+            not_equal_int("no_stats", 101),
+            is_null("no_stats"),
+            not_null("no_stats"),
+            // is_nan("no_stats"),
+            // not_nan("no_stats"),
+        ];
+
+        for expression in expressions {
+            let result =
+                InclusiveMetricsEvaluator::eval(&expression, &zero_records_datafile, true).unwrap();
+
+            assert!(
+                result,
+                "Should skip if data file has zero records (expression: {:?})",
+                &expression
+            );
+        }
+    }
 
     #[test]
-    fn test_or() {}
+    fn test_not() {
+        // Not sure if we need a test for this, as we'd expect,
+        // as a precondition, that rewrite-not has already been applied.
+
+        let result = InclusiveMetricsEvaluator::eval(
+            &not_less_than_int("id", INT_MIN_VALUE - 25),
+            &get_test_file_1(),
+            true,
+        )
+        .unwrap();
+        assert!(result, "Should read: not(false)");
+
+        let result = InclusiveMetricsEvaluator::eval(
+            &not_greater_than_int("id", INT_MIN_VALUE - 25),
+            &get_test_file_1(),
+            true,
+        )
+        .unwrap();
+        assert!(!result, "Should skip: not(true)");
+    }
 
     #[test]
-    fn test_integer_lt() {}
+    fn test_and() {
+        let schema = create_test_schema();
+        let filter = Predicate::Binary(BinaryExpression::new(
+            LessThan,
+            Reference::new("id"),
+            Datum::int(INT_MIN_VALUE - 25),
+        ))
+        .and(Predicate::Binary(BinaryExpression::new(
+            GreaterThanOrEq,
+            Reference::new("id"),
+            Datum::int(INT_MIN_VALUE - 30),
+        )));
+
+        let bound_pred = filter.bind(schema.clone(), true).unwrap();
+
+        let result =
+            InclusiveMetricsEvaluator::eval(&bound_pred, &get_test_file_1(), true).unwrap();
+        assert!(!result, "Should skip: and(false, true)");
+
+        let schema = create_test_schema();
+        let filter = Predicate::Binary(BinaryExpression::new(
+            LessThan,
+            Reference::new("id"),
+            Datum::int(INT_MIN_VALUE - 25),
+        ))
+        .and(Predicate::Binary(BinaryExpression::new(
+            GreaterThanOrEq,
+            Reference::new("id"),
+            Datum::int(INT_MAX_VALUE + 1),
+        )));
+
+        let bound_pred = filter.bind(schema.clone(), true).unwrap();
+
+        let result =
+            InclusiveMetricsEvaluator::eval(&bound_pred, &get_test_file_1(), true).unwrap();
+        assert!(!result, "Should skip: and(false, false)");
+
+        let schema = create_test_schema();
+        let filter = Predicate::Binary(BinaryExpression::new(
+            GreaterThan,
+            Reference::new("id"),
+            Datum::int(INT_MIN_VALUE - 25),
+        ))
+        .and(Predicate::Binary(BinaryExpression::new(
+            LessThanOrEq,
+            Reference::new("id"),
+            Datum::int(INT_MIN_VALUE),
+        )));
+
+        let bound_pred = filter.bind(schema.clone(), true).unwrap();
+
+        let result =
+            InclusiveMetricsEvaluator::eval(&bound_pred, &get_test_file_1(), true).unwrap();
+        assert!(result, "Should read: and(true, true)");
+    }
 
     #[test]
-    fn test_integer_lt_eq() {}
+    fn test_or() {
+        let schema = create_test_schema();
+        let filter = Predicate::Binary(BinaryExpression::new(
+            LessThan,
+            Reference::new("id"),
+            Datum::int(INT_MIN_VALUE - 25),
+        ))
+        .or(Predicate::Binary(BinaryExpression::new(
+            GreaterThanOrEq,
+            Reference::new("id"),
+            Datum::int(INT_MIN_VALUE - 30),
+        )));
+
+        let bound_pred = filter.bind(schema.clone(), true).unwrap();
+
+        let result =
+            InclusiveMetricsEvaluator::eval(&bound_pred, &get_test_file_1(), true).unwrap();
+        assert!(result, "Should read: or(false, true)");
+
+        let schema = create_test_schema();
+        let filter = Predicate::Binary(BinaryExpression::new(
+            LessThan,
+            Reference::new("id"),
+            Datum::int(INT_MIN_VALUE - 25),
+        ))
+        .or(Predicate::Binary(BinaryExpression::new(
+            GreaterThanOrEq,
+            Reference::new("id"),
+            Datum::int(INT_MAX_VALUE + 1),
+        )));
+
+        let bound_pred = filter.bind(schema.clone(), true).unwrap();
+
+        let result =
+            InclusiveMetricsEvaluator::eval(&bound_pred, &get_test_file_1(), true).unwrap();
+        assert!(!result, "Should skip: or(false, false)");
+    }
 
     #[test]
-    fn test_integer_gt() {}
+    fn test_integer_lt() {
+        let result = InclusiveMetricsEvaluator::eval(
+            &less_than_int("id", INT_MIN_VALUE - 25),
+            &get_test_file_1(),
+            true,
+        )
+        .unwrap();
+        assert!(!result, "Should skip: id range below lower bound (5 < 30)");
+
+        let result = InclusiveMetricsEvaluator::eval(
+            &less_than_int("id", INT_MIN_VALUE),
+            &get_test_file_1(),
+            true,
+        )
+        .unwrap();
+        assert!(
+            !result,
+            "Should skip: id range below lower bound (30 is not < 30)"
+        );
+
+        let result = InclusiveMetricsEvaluator::eval(
+            &less_than_int("id", INT_MIN_VALUE + 1),
+            &get_test_file_1(),
+            true,
+        )
+        .unwrap();
+        assert!(result, "Should read: one possible id");
+
+        let result = InclusiveMetricsEvaluator::eval(
+            &less_than_int("id", INT_MAX_VALUE),
+            &get_test_file_1(),
+            true,
+        )
+        .unwrap();
+        assert!(result, "Should read: many possible ids");
+    }
 
     #[test]
-    fn test_integer_gt_eq() {}
+    fn test_integer_lt_eq() {
+        let result = InclusiveMetricsEvaluator::eval(
+            &less_than_or_equal_int("id", INT_MIN_VALUE - 25),
+            &get_test_file_1(),
+            true,
+        )
+        .unwrap();
+        assert!(!result, "Should skip: id range below lower bound (5 < 30)");
+
+        let result = InclusiveMetricsEvaluator::eval(
+            &less_than_or_equal_int("id", INT_MIN_VALUE - 1),
+            &get_test_file_1(),
+            true,
+        )
+        .unwrap();
+        assert!(!result, "Should skip: id range below lower bound (29 < 30)");
+
+        let result = InclusiveMetricsEvaluator::eval(
+            &less_than_or_equal_int("id", INT_MIN_VALUE),
+            &get_test_file_1(),
+            true,
+        )
+        .unwrap();
+        assert!(result, "Should read: one possible id");
+
+        let result = InclusiveMetricsEvaluator::eval(
+            &less_than_or_equal_int("id", INT_MAX_VALUE),
+            &get_test_file_1(),
+            true,
+        )
+        .unwrap();
+        assert!(result, "Should read: many possible ids");
+    }
 
     #[test]
-    fn test_integer_eq() {}
+    fn test_integer_gt() {
+        let result = InclusiveMetricsEvaluator::eval(
+            &greater_than_int("id", INT_MAX_VALUE + 6),
+            &get_test_file_1(),
+            true,
+        )
+        .unwrap();
+        assert!(!result, "Should skip: id range above upper bound (85 > 79)");
+
+        let result = InclusiveMetricsEvaluator::eval(
+            &greater_than_int("id", INT_MAX_VALUE),
+            &get_test_file_1(),
+            true,
+        )
+        .unwrap();
+        assert!(
+            !result,
+            "Should skip: id range above upper bound (79 is not > 79)"
+        );
+
+        let result = InclusiveMetricsEvaluator::eval(
+            &greater_than_int("id", INT_MAX_VALUE - 1),
+            &get_test_file_1(),
+            true,
+        )
+        .unwrap();
+        assert!(result, "Should read: one possible id");
+
+        let result = InclusiveMetricsEvaluator::eval(
+            &greater_than_int("id", INT_MAX_VALUE - 4),
+            &get_test_file_1(),
+            true,
+        )
+        .unwrap();
+        assert!(result, "Should read: many possible ids");
+    }
 
     #[test]
-    fn test_integer_not_eq() {}
+    fn test_integer_gt_eq() {
+        let result = InclusiveMetricsEvaluator::eval(
+            &greater_than_or_equal_int("id", INT_MAX_VALUE + 6),
+            &get_test_file_1(),
+            true,
+        )
+        .unwrap();
+        assert!(!result, "Should skip: id range above upper bound (85 < 79)");
+
+        let result = InclusiveMetricsEvaluator::eval(
+            &greater_than_or_equal_int("id", INT_MAX_VALUE + 1),
+            &get_test_file_1(),
+            true,
+        )
+        .unwrap();
+        assert!(!result, "Should skip: id range above upper bound (80 > 79)");
+
+        let result = InclusiveMetricsEvaluator::eval(
+            &greater_than_or_equal_int("id", INT_MAX_VALUE),
+            &get_test_file_1(),
+            true,
+        )
+        .unwrap();
+        assert!(result, "Should read: one possible id");
+
+        let result = InclusiveMetricsEvaluator::eval(
+            &greater_than_or_equal_int("id", INT_MAX_VALUE - 4),
+            &get_test_file_1(),
+            true,
+        )
+        .unwrap();
+        assert!(result, "Should read: many possible ids");
+    }
 
     #[test]
-    fn test_integer_not_eq_rewritten() {}
+    fn test_integer_eq() {
+        let result = InclusiveMetricsEvaluator::eval(
+            &equal_int("id", INT_MIN_VALUE - 25),
+            &get_test_file_1(),
+            true,
+        )
+        .unwrap();
+        assert!(!result, "Should skip: id below lower bound");
+
+        let result = InclusiveMetricsEvaluator::eval(
+            &equal_int("id", INT_MIN_VALUE - 1),
+            &get_test_file_1(),
+            true,
+        )
+        .unwrap();
+        assert!(!result, "Should skip: id below lower bound");
+
+        let result = InclusiveMetricsEvaluator::eval(
+            &equal_int("id", INT_MIN_VALUE),
+            &get_test_file_1(),
+            true,
+        )
+        .unwrap();
+        assert!(result, "Should read: id equal to lower bound");
+
+        let result = InclusiveMetricsEvaluator::eval(
+            &equal_int("id", INT_MAX_VALUE - 4),
+            &get_test_file_1(),
+            true,
+        )
+        .unwrap();
+        assert!(result, "Should read: id between lower and upper bounds");
+
+        let result = InclusiveMetricsEvaluator::eval(
+            &equal_int("id", INT_MAX_VALUE),
+            &get_test_file_1(),
+            true,
+        )
+        .unwrap();
+        assert!(result, "Should read: id equal to upper bound");
+
+        let result = InclusiveMetricsEvaluator::eval(
+            &equal_int("id", INT_MAX_VALUE + 1),
+            &get_test_file_1(),
+            true,
+        )
+        .unwrap();
+        assert!(!result, "Should skip: id above upper bound");
+
+        let result = InclusiveMetricsEvaluator::eval(
+            &equal_int("id", INT_MAX_VALUE + 6),
+            &get_test_file_1(),
+            true,
+        )
+        .unwrap();
+        assert!(!result, "Should skip: id above upper bound");
+    }
 
     #[test]
-    fn test_case_insensitive_integer_not_eq_rewritten() {}
+    fn test_integer_not_eq() {
+        let result = InclusiveMetricsEvaluator::eval(
+            &not_equal_int("id", INT_MIN_VALUE - 25),
+            &get_test_file_1(),
+            true,
+        )
+        .unwrap();
+        assert!(result, "Should read: id below lower bound");
+
+        let result = InclusiveMetricsEvaluator::eval(
+            &not_equal_int("id", INT_MIN_VALUE - 1),
+            &get_test_file_1(),
+            true,
+        )
+        .unwrap();
+        assert!(result, "Should read: id below lower bound");
+
+        let result = InclusiveMetricsEvaluator::eval(
+            &not_equal_int("id", INT_MIN_VALUE),
+            &get_test_file_1(),
+            true,
+        )
+        .unwrap();
+        assert!(result, "Should read: id equal to lower bound");
+
+        let result = InclusiveMetricsEvaluator::eval(
+            &not_equal_int("id", INT_MAX_VALUE - 4),
+            &get_test_file_1(),
+            true,
+        )
+        .unwrap();
+        assert!(result, "Should read: id between lower and upper bounds");
+
+        let result = InclusiveMetricsEvaluator::eval(
+            &not_equal_int("id", INT_MAX_VALUE),
+            &get_test_file_1(),
+            true,
+        )
+        .unwrap();
+        assert!(result, "Should read: id equal to upper bound");
+
+        let result = InclusiveMetricsEvaluator::eval(
+            &not_equal_int("id", INT_MAX_VALUE + 1),
+            &get_test_file_1(),
+            true,
+        )
+        .unwrap();
+        assert!(result, "Should read: id above upper bound");
+
+        let result = InclusiveMetricsEvaluator::eval(
+            &not_equal_int("id", INT_MAX_VALUE + 6),
+            &get_test_file_1(),
+            true,
+        )
+        .unwrap();
+        assert!(result, "Should read: id above upper bound");
+    }
+
+    fn test_case_insensitive_integer_not_eq_rewritten() {
+        let result = InclusiveMetricsEvaluator::eval(
+            &equal_int_not_case_insensitive("ID", INT_MIN_VALUE - 25),
+            &get_test_file_1(),
+            true,
+        )
+        .unwrap();
+        assert!(result, "Should read: id below lower bound");
+
+        let result = InclusiveMetricsEvaluator::eval(
+            &equal_int_not_case_insensitive("ID", INT_MIN_VALUE - 1),
+            &get_test_file_1(),
+            true,
+        )
+        .unwrap();
+        assert!(result, "Should read: id below lower bound");
+
+        let result = InclusiveMetricsEvaluator::eval(
+            &equal_int_not_case_insensitive("ID", INT_MIN_VALUE),
+            &get_test_file_1(),
+            true,
+        )
+        .unwrap();
+        assert!(result, "Should read: id equal to lower bound");
+
+        let result = InclusiveMetricsEvaluator::eval(
+            &equal_int_not_case_insensitive("ID", INT_MAX_VALUE - 4),
+            &get_test_file_1(),
+            true,
+        )
+        .unwrap();
+        assert!(result, "Should read: id between lower and upper bound");
+
+        let result = InclusiveMetricsEvaluator::eval(
+            &equal_int_not_case_insensitive("ID", INT_MAX_VALUE),
+            &get_test_file_1(),
+            true,
+        )
+        .unwrap();
+        assert!(result, "Should read: id equal to upper bound");
+
+        let result = InclusiveMetricsEvaluator::eval(
+            &equal_int_not_case_insensitive("ID", INT_MAX_VALUE + 1),
+            &get_test_file_1(),
+            true,
+        )
+        .unwrap();
+        assert!(result, "Should read: id above upper bound");
+
+        let result = InclusiveMetricsEvaluator::eval(
+            &equal_int_not_case_insensitive("ID", INT_MAX_VALUE + 6),
+            &get_test_file_1(),
+            true,
+        )
+        .unwrap();
+        assert!(result, "Should read: id above upper bound");
+    }
 
     #[test]
-    fn test_case_sensitive_integer_not_eq_rewritten() {}
+    #[should_panic]
+    fn test_case_sensitive_integer_not_eq_rewritten() {
+        let _result =
+            InclusiveMetricsEvaluator::eval(&equal_int_not("ID", 5), &get_test_file_1(), true)
+                .unwrap();
+    }
 
     #[test]
-    fn test_string_starts_with() {}
+    fn test_string_starts_with() {
+        let result = InclusiveMetricsEvaluator::eval(
+            &starts_with("required", "a"),
+            &get_test_file_1(),
+            true,
+        )
+        .unwrap();
+        assert!(result, "Should read: no stats");
+
+        let result = InclusiveMetricsEvaluator::eval(
+            &starts_with("required", "a"),
+            &get_test_file_2(),
+            true,
+        )
+        .unwrap();
+        assert!(result, "Should read: range matches");
+
+        let result = InclusiveMetricsEvaluator::eval(
+            &starts_with("required", "aa"),
+            &get_test_file_2(),
+            true,
+        )
+        .unwrap();
+        assert!(result, "Should read: range matches");
+
+        let result = InclusiveMetricsEvaluator::eval(
+            &starts_with("required", "aaa"),
+            &get_test_file_2(),
+            true,
+        )
+        .unwrap();
+        assert!(result, "Should read: range matches");
+
+        let result = InclusiveMetricsEvaluator::eval(
+            &starts_with("required", "1s"),
+            &get_test_file_3(),
+            true,
+        )
+        .unwrap();
+        assert!(result, "Should read: range matches");
+
+        let result = InclusiveMetricsEvaluator::eval(
+            &starts_with("required", "1str1x"),
+            &get_test_file_3(),
+            true,
+        )
+        .unwrap();
+        assert!(result, "Should read: range matches");
+
+        let result = InclusiveMetricsEvaluator::eval(
+            &starts_with("required", "ff"),
+            &get_test_file_4(),
+            true,
+        )
+        .unwrap();
+        assert!(result, "Should read: range matches");
+
+        let result = InclusiveMetricsEvaluator::eval(
+            &starts_with("required", "aB"),
+            &get_test_file_2(),
+            true,
+        )
+        .unwrap();
+        assert!(!result, "Should skip: range does not match");
+
+        let result = InclusiveMetricsEvaluator::eval(
+            &starts_with("required", "dWX"),
+            &get_test_file_2(),
+            true,
+        )
+        .unwrap();
+        assert!(!result, "Should skip: range does not match");
+
+        let result = InclusiveMetricsEvaluator::eval(
+            &starts_with("required", "5"),
+            &get_test_file_3(),
+            true,
+        )
+        .unwrap();
+        assert!(!result, "Should skip: range does not match");
+
+        let result = InclusiveMetricsEvaluator::eval(
+            &starts_with("required", "3str3x"),
+            &get_test_file_3(),
+            true,
+        )
+        .unwrap();
+        assert!(!result, "Should skip: range does not match");
+
+        let result = InclusiveMetricsEvaluator::eval(
+            &starts_with("some_empty", "房东整租霍"),
+            &get_test_file_1(),
+            true,
+        )
+        .unwrap();
+        assert!(result, "Should read: range does matches");
+
+        let result = InclusiveMetricsEvaluator::eval(
+            &starts_with("all_nulls", ""),
+            &get_test_file_1(),
+            true,
+        )
+        .unwrap();
+        assert!(!result, "Should skip: range does not match");
+
+        // Note: This string has been created manually by taking
+        // the string "イロハニホヘト", which is an upper bound in
+        // the datafile returned by get_test_file_4(), truncating it
+        // to four character, and then appending the "ボ" character,
+        // which occupies the next code point after the 5th
+        // character in the string above, "ホ".
+        // In the Java implementation of Iceberg, this is done by
+        // the `truncateStringMax` function, but we don't yet have
+        // this implemented in iceberg-rust.
+        let above_max = "イロハニボ";
+
+        let result = InclusiveMetricsEvaluator::eval(
+            &starts_with("required", above_max),
+            &get_test_file_4(),
+            true,
+        )
+        .unwrap();
+        assert!(!result, "Should skip: range does not match");
+    }
 
     #[test]
-    fn test_string_not_starts_with() {}
+    fn test_string_not_starts_with() {
+        let result = InclusiveMetricsEvaluator::eval(
+            &not_starts_with("required", "a"),
+            &get_test_file_1(),
+            true,
+        )
+        .unwrap();
+        assert!(result, "Should read: no stats");
+
+        let result = InclusiveMetricsEvaluator::eval(
+            &not_starts_with("required", "a"),
+            &get_test_file_2(),
+            true,
+        )
+        .unwrap();
+        assert!(result, "Should read: range matches");
+
+        let result = InclusiveMetricsEvaluator::eval(
+            &not_starts_with("required", "aa"),
+            &get_test_file_2(),
+            true,
+        )
+        .unwrap();
+        assert!(result, "Should read: range matches");
+
+        let result = InclusiveMetricsEvaluator::eval(
+            &not_starts_with("required", "aaa"),
+            &get_test_file_2(),
+            true,
+        )
+        .unwrap();
+        assert!(result, "Should read: range matches");
+
+        let result = InclusiveMetricsEvaluator::eval(
+            &not_starts_with("required", "1s"),
+            &get_test_file_3(),
+            true,
+        )
+        .unwrap();
+        assert!(result, "Should read: range matches");
+
+        let result = InclusiveMetricsEvaluator::eval(
+            &not_starts_with("required", "1str1x"),
+            &get_test_file_3(),
+            true,
+        )
+        .unwrap();
+        assert!(result, "Should read: range matches");
+
+        let result = InclusiveMetricsEvaluator::eval(
+            &not_starts_with("required", "ff"),
+            &get_test_file_4(),
+            true,
+        )
+        .unwrap();
+        assert!(result, "Should read: range matches");
+
+        let result = InclusiveMetricsEvaluator::eval(
+            &not_starts_with("required", "aB"),
+            &get_test_file_2(),
+            true,
+        )
+        .unwrap();
+        assert!(result, "Should read: range matches");
+
+        let result = InclusiveMetricsEvaluator::eval(
+            &not_starts_with("required", "dWX"),
+            &get_test_file_2(),
+            true,
+        )
+        .unwrap();
+        assert!(result, "Should read: range matches");
+
+        let result = InclusiveMetricsEvaluator::eval(
+            &not_starts_with("required", "5"),
+            &get_test_file_3(),
+            true,
+        )
+        .unwrap();
+        assert!(result, "Should read: range matches");
+
+        let result = InclusiveMetricsEvaluator::eval(
+            &not_starts_with("required", "3str3x"),
+            &get_test_file_3(),
+            true,
+        )
+        .unwrap();
+        assert!(result, "Should read: range matches");
+
+        let above_max = "イロハニホヘト";
+        let result = InclusiveMetricsEvaluator::eval(
+            &not_starts_with("required", above_max),
+            &get_test_file_4(),
+            true,
+        )
+        .unwrap();
+        assert!(result, "Should read: range matches");
+    }
 
     #[test]
-    fn test_integer_in() {}
+    fn test_integer_in() {
+        let result = InclusiveMetricsEvaluator::eval(
+            &r#in_int("id", &[INT_MIN_VALUE - 25, INT_MIN_VALUE - 24]),
+            &get_test_file_1(),
+            true,
+        )
+        .unwrap();
+        assert!(
+            !result,
+            "Should skip: id below lower bound (5 < 30, 6 < 30)"
+        );
+
+        let result = InclusiveMetricsEvaluator::eval(
+            &r#in_int("id", &[INT_MIN_VALUE - 2, INT_MIN_VALUE - 1]),
+            &get_test_file_1(),
+            true,
+        )
+        .unwrap();
+        assert!(
+            !result,
+            "Should skip: id below lower bound (28 < 30, 29 < 30)"
+        );
+
+        let result = InclusiveMetricsEvaluator::eval(
+            &r#in_int("id", &[INT_MIN_VALUE - 1, INT_MIN_VALUE]),
+            &get_test_file_1(),
+            true,
+        )
+        .unwrap();
+        assert!(result, "Should read: id equal to lower bound (30 == 30)");
+
+        let result = InclusiveMetricsEvaluator::eval(
+            &r#in_int("id", &[INT_MAX_VALUE - 4, INT_MAX_VALUE - 3]),
+            &get_test_file_1(),
+            true,
+        )
+        .unwrap();
+        assert!(
+            result,
+            "Should read: id between lower and upper bounds (30 < 75 < 79, 30 < 76 < 79)"
+        );
+
+        let result = InclusiveMetricsEvaluator::eval(
+            &r#in_int("id", &[INT_MAX_VALUE, INT_MAX_VALUE + 1]),
+            &get_test_file_1(),
+            true,
+        )
+        .unwrap();
+        assert!(result, "Should read: id equal to upper bound (79 == 79)");
+
+        let result = InclusiveMetricsEvaluator::eval(
+            &r#in_int("id", &[INT_MAX_VALUE + 1, INT_MAX_VALUE + 2]),
+            &get_test_file_1(),
+            true,
+        )
+        .unwrap();
+        assert!(
+            !result,
+            "Should skip: id above upper bound (80 > 79, 81 > 79)"
+        );
+
+        let result = InclusiveMetricsEvaluator::eval(
+            &r#in_int("id", &[INT_MAX_VALUE + 6, INT_MAX_VALUE + 7]),
+            &get_test_file_1(),
+            true,
+        )
+        .unwrap();
+        assert!(
+            !result,
+            "Should skip: id above upper bound (85 > 79, 86 > 79)"
+        );
+
+        let result = InclusiveMetricsEvaluator::eval(
+            &r#in_str("all_nulls", &["abc", "def"]),
+            &get_test_file_1(),
+            true,
+        )
+        .unwrap();
+        assert!(!result, "Should skip: in on all nulls column");
+
+        let result = InclusiveMetricsEvaluator::eval(
+            &r#in_str("some_nulls", &["abc", "def"]),
+            &get_test_file_1(),
+            true,
+        )
+        .unwrap();
+        assert!(result, "Should read: in on some nulls column");
+
+        let result = InclusiveMetricsEvaluator::eval(
+            &r#in_str("no_nulls", &["abc", "def"]),
+            &get_test_file_1(),
+            true,
+        )
+        .unwrap();
+        assert!(result, "Should read: in on no nulls column");
+
+        let ids = (-400..=0).collect::<Vec<_>>();
+        let result =
+            InclusiveMetricsEvaluator::eval(&r#in_int("id", &ids), &get_test_file_1(), true)
+                .unwrap();
+        assert!(
+            result,
+            "Should read: number of items in In expression greater than threshold"
+        );
+    }
 
     #[test]
-    fn test_integer_not_in() {}
+    fn test_integer_not_in() {
+        let result = InclusiveMetricsEvaluator::eval(
+            &r#not_in_int("id", &[INT_MIN_VALUE - 25, INT_MIN_VALUE - 24]),
+            &get_test_file_1(),
+            true,
+        )
+        .unwrap();
+        assert!(result, "Should read: id below lower bound (5 < 30, 6 < 30)");
+
+        let result = InclusiveMetricsEvaluator::eval(
+            &r#not_in_int("id", &[INT_MIN_VALUE - 2, INT_MIN_VALUE - 1]),
+            &get_test_file_1(),
+            true,
+        )
+        .unwrap();
+        assert!(
+            result,
+            "Should read: id below lower bound (28 < 30, 29 < 30)"
+        );
+
+        let result = InclusiveMetricsEvaluator::eval(
+            &r#not_in_int("id", &[INT_MIN_VALUE - 1, INT_MIN_VALUE]),
+            &get_test_file_1(),
+            true,
+        )
+        .unwrap();
+        assert!(result, "Should read: id equal to lower bound (30 == 30)");
+
+        let result = InclusiveMetricsEvaluator::eval(
+            &r#not_in_int("id", &[INT_MAX_VALUE - 4, INT_MAX_VALUE - 3]),
+            &get_test_file_1(),
+            true,
+        )
+        .unwrap();
+        assert!(
+            result,
+            "Should read: id between lower and upper bounds (30 < 75 < 79, 30 < 76 < 79)"
+        );
+
+        let result = InclusiveMetricsEvaluator::eval(
+            &r#not_in_int("id", &[INT_MAX_VALUE, INT_MAX_VALUE + 1]),
+            &get_test_file_1(),
+            true,
+        )
+        .unwrap();
+        assert!(result, "Should read: id equal to upper bound (79 == 79)");
+
+        let result = InclusiveMetricsEvaluator::eval(
+            &r#not_in_int("id", &[INT_MAX_VALUE + 1, INT_MAX_VALUE + 2]),
+            &get_test_file_1(),
+            true,
+        )
+        .unwrap();
+        assert!(
+            result,
+            "Should read: id above upper bound (80 > 79, 81 > 79)"
+        );
+
+        let result = InclusiveMetricsEvaluator::eval(
+            &r#not_in_int("id", &[INT_MAX_VALUE + 6, INT_MAX_VALUE + 7]),
+            &get_test_file_1(),
+            true,
+        )
+        .unwrap();
+        assert!(
+            result,
+            "Should read: id above upper bound (85 > 79, 86 > 79)"
+        );
+
+        let result = InclusiveMetricsEvaluator::eval(
+            &r#not_in_str("all_nulls", &["abc", "def"]),
+            &get_test_file_1(),
+            true,
+        )
+        .unwrap();
+        assert!(result, "Should read: NotIn on all nulls column");
+
+        let result = InclusiveMetricsEvaluator::eval(
+            &r#not_in_str("some_nulls", &["abc", "def"]),
+            &get_test_file_1(),
+            true,
+        )
+        .unwrap();
+        assert!(result, "Should read: NotIn on some nulls column");
+
+        let result = InclusiveMetricsEvaluator::eval(
+            &r#not_in_str("no_nulls", &["abc", "def"]),
+            &get_test_file_1(),
+            true,
+        )
+        .unwrap();
+        assert!(result, "Should read: NotIn on no nulls column");
+    }
 
     fn create_test_schema_and_partition_spec() -> (Arc<Schema>, Arc<PartitionSpec>) {
         let table_schema = Schema::builder()
@@ -640,6 +1769,238 @@ mod test {
         filter.bind(schema.clone(), true).unwrap()
     }
 
+    fn is_null(reference: &str) -> BoundPredicate {
+        let schema = create_test_schema();
+        let filter = Predicate::Unary(UnaryExpression::new(IsNull, Reference::new(reference)));
+        filter.bind(schema.clone(), true).unwrap()
+    }
+
+    fn not_nan(reference: &str) -> BoundPredicate {
+        let schema = create_test_schema();
+        let filter = Predicate::Unary(UnaryExpression::new(NotNan, Reference::new(reference)));
+        filter.bind(schema.clone(), true).unwrap()
+    }
+
+    fn is_nan(reference: &str) -> BoundPredicate {
+        let schema = create_test_schema();
+        let filter = Predicate::Unary(UnaryExpression::new(IsNan, Reference::new(reference)));
+        filter.bind(schema.clone(), true).unwrap()
+    }
+
+    fn less_than(reference: &str, str_literal: &str) -> BoundPredicate {
+        let schema = create_test_schema();
+        let filter = Predicate::Binary(BinaryExpression::new(
+            LessThan,
+            Reference::new(reference),
+            Datum::string(str_literal),
+        ));
+        filter.bind(schema.clone(), true).unwrap()
+    }
+
+    fn less_than_or_equal(reference: &str, str_literal: &str) -> BoundPredicate {
+        let schema = create_test_schema();
+        let filter = Predicate::Binary(BinaryExpression::new(
+            LessThanOrEq,
+            Reference::new(reference),
+            Datum::string(str_literal),
+        ));
+        filter.bind(schema.clone(), true).unwrap()
+    }
+
+    fn greater_than(reference: &str, str_literal: &str) -> BoundPredicate {
+        let schema = create_test_schema();
+        let filter = Predicate::Binary(BinaryExpression::new(
+            GreaterThan,
+            Reference::new(reference),
+            Datum::string(str_literal),
+        ));
+        filter.bind(schema.clone(), true).unwrap()
+    }
+
+    fn greater_than_or_equal(reference: &str, str_literal: &str) -> BoundPredicate {
+        let schema = create_test_schema();
+        let filter = Predicate::Binary(BinaryExpression::new(
+            GreaterThanOrEq,
+            Reference::new(reference),
+            Datum::string(str_literal),
+        ));
+        filter.bind(schema.clone(), true).unwrap()
+    }
+
+    fn equal(reference: &str, str_literal: &str) -> BoundPredicate {
+        let schema = create_test_schema();
+        let filter = Predicate::Binary(BinaryExpression::new(
+            Eq,
+            Reference::new(reference),
+            Datum::string(str_literal),
+        ));
+        filter.bind(schema.clone(), true).unwrap()
+    }
+
+    fn less_than_int(reference: &str, int_literal: i32) -> BoundPredicate {
+        let schema = create_test_schema();
+        let filter = Predicate::Binary(BinaryExpression::new(
+            LessThan,
+            Reference::new(reference),
+            Datum::int(int_literal),
+        ));
+        filter.bind(schema.clone(), true).unwrap()
+    }
+
+    fn not_less_than_int(reference: &str, int_literal: i32) -> BoundPredicate {
+        let schema = create_test_schema();
+        let filter = Predicate::Binary(BinaryExpression::new(
+            LessThan,
+            Reference::new(reference),
+            Datum::int(int_literal),
+        ))
+        .not();
+        filter.bind(schema.clone(), true).unwrap()
+    }
+
+    fn less_than_or_equal_int(reference: &str, int_literal: i32) -> BoundPredicate {
+        let schema = create_test_schema();
+        let filter = Predicate::Binary(BinaryExpression::new(
+            LessThanOrEq,
+            Reference::new(reference),
+            Datum::int(int_literal),
+        ));
+        filter.bind(schema.clone(), true).unwrap()
+    }
+
+    fn greater_than_int(reference: &str, int_literal: i32) -> BoundPredicate {
+        let schema = create_test_schema();
+        let filter = Predicate::Binary(BinaryExpression::new(
+            GreaterThan,
+            Reference::new(reference),
+            Datum::int(int_literal),
+        ));
+        filter.bind(schema.clone(), true).unwrap()
+    }
+
+    fn not_greater_than_int(reference: &str, int_literal: i32) -> BoundPredicate {
+        let schema = create_test_schema();
+        let filter = Predicate::Binary(BinaryExpression::new(
+            GreaterThan,
+            Reference::new(reference),
+            Datum::int(int_literal),
+        ))
+        .not();
+        filter.bind(schema.clone(), true).unwrap()
+    }
+
+    fn greater_than_or_equal_int(reference: &str, int_literal: i32) -> BoundPredicate {
+        let schema = create_test_schema();
+        let filter = Predicate::Binary(BinaryExpression::new(
+            GreaterThanOrEq,
+            Reference::new(reference),
+            Datum::int(int_literal),
+        ));
+        filter.bind(schema.clone(), true).unwrap()
+    }
+
+    fn equal_int(reference: &str, int_literal: i32) -> BoundPredicate {
+        let schema = create_test_schema();
+        let filter = Predicate::Binary(BinaryExpression::new(
+            Eq,
+            Reference::new(reference),
+            Datum::int(int_literal),
+        ));
+        filter.bind(schema.clone(), true).unwrap()
+    }
+
+    fn equal_int_not(reference: &str, int_literal: i32) -> BoundPredicate {
+        let schema = create_test_schema();
+        let filter = Predicate::Binary(BinaryExpression::new(
+            Eq,
+            Reference::new(reference),
+            Datum::int(int_literal),
+        ))
+        .not();
+        filter.bind(schema.clone(), true).unwrap()
+    }
+
+    fn equal_int_not_case_insensitive(reference: &str, int_literal: i32) -> BoundPredicate {
+        let schema = create_test_schema();
+        let filter = Predicate::Binary(BinaryExpression::new(
+            Eq,
+            Reference::new(reference),
+            Datum::int(int_literal),
+        ))
+        .not();
+        filter.bind(schema.clone(), false).unwrap()
+    }
+
+    fn not_equal_int(reference: &str, int_literal: i32) -> BoundPredicate {
+        let schema = create_test_schema();
+        let filter = Predicate::Binary(BinaryExpression::new(
+            NotEq,
+            Reference::new(reference),
+            Datum::int(int_literal),
+        ));
+        filter.bind(schema.clone(), true).unwrap()
+    }
+
+    fn starts_with(reference: &str, str_literal: &str) -> BoundPredicate {
+        let schema = create_test_schema();
+        let filter = Predicate::Binary(BinaryExpression::new(
+            StartsWith,
+            Reference::new(reference),
+            Datum::string(str_literal),
+        ));
+        filter.bind(schema.clone(), true).unwrap()
+    }
+
+    fn not_starts_with(reference: &str, str_literal: &str) -> BoundPredicate {
+        let schema = create_test_schema();
+        let filter = Predicate::Binary(BinaryExpression::new(
+            NotStartsWith,
+            Reference::new(reference),
+            Datum::string(str_literal),
+        ));
+        filter.bind(schema.clone(), true).unwrap()
+    }
+
+    fn in_int(reference: &str, int_literals: &[i32]) -> BoundPredicate {
+        let schema = create_test_schema();
+        let filter = Predicate::Set(SetExpression::new(
+            In,
+            Reference::new(reference),
+            FnvHashSet::from_iter(int_literals.iter().map(|&lit| Datum::int(lit))),
+        ));
+        filter.bind(schema.clone(), true).unwrap()
+    }
+
+    fn in_str(reference: &str, str_literals: &[&str]) -> BoundPredicate {
+        let schema = create_test_schema();
+        let filter = Predicate::Set(SetExpression::new(
+            In,
+            Reference::new(reference),
+            FnvHashSet::from_iter(str_literals.iter().map(Datum::string)),
+        ));
+        filter.bind(schema.clone(), true).unwrap()
+    }
+
+    fn not_in_int(reference: &str, int_literals: &[i32]) -> BoundPredicate {
+        let schema = create_test_schema();
+        let filter = Predicate::Set(SetExpression::new(
+            NotIn,
+            Reference::new(reference),
+            FnvHashSet::from_iter(int_literals.iter().map(|&lit| Datum::int(lit))),
+        ));
+        filter.bind(schema.clone(), true).unwrap()
+    }
+
+    fn not_in_str(reference: &str, str_literals: &[&str]) -> BoundPredicate {
+        let schema = create_test_schema();
+        let filter = Predicate::Set(SetExpression::new(
+            NotIn,
+            Reference::new(reference),
+            FnvHashSet::from_iter(str_literals.iter().map(Datum::string)),
+        ));
+        filter.bind(schema.clone(), true).unwrap()
+    }
+
     fn create_test_schema() -> Arc<Schema> {
         let table_schema = Schema::builder()
             .with_fields(vec![
@@ -654,7 +2015,7 @@ mod test {
                     Type::Primitive(PrimitiveType::Int),
                 )),
                 Arc::new(NestedField::required(
-                    2,
+                    3,
                     "required",
                     Type::Primitive(PrimitiveType::String),
                 )),
@@ -717,12 +2078,10 @@ mod test {
             .build()
             .unwrap();
 
-        let table_schema_ref = Arc::new(table_schema);
-
-        table_schema_ref
+        Arc::new(table_schema)
     }
 
-    fn create_test_data_file(_manifest_file_partitions: Vec<FieldSummary>) -> DataFile {
+    fn create_test_data_file() -> DataFile {
         DataFile {
             content: DataContentType::Data,
             file_path: "/test/path".to_string(),
@@ -742,4 +2101,162 @@ mod test {
             sort_order_id: None,
         }
     }
+
+    fn create_zero_records_data_file() -> DataFile {
+        DataFile {
+            content: DataContentType::Data,
+            file_path: "/test/path".to_string(),
+            file_format: DataFileFormat::Parquet,
+            partition: Struct::empty(),
+            record_count: 0,
+            file_size_in_bytes: 10,
+            column_sizes: Default::default(),
+            value_counts: Default::default(),
+            null_value_counts: Default::default(),
+            nan_value_counts: Default::default(),
+            lower_bounds: Default::default(),
+            upper_bounds: Default::default(),
+            key_metadata: vec![],
+            split_offsets: vec![],
+            equality_ids: vec![],
+            sort_order_id: None,
+        }
+    }
+
+    fn get_test_file_1() -> DataFile {
+        DataFile {
+            content: DataContentType::Data,
+            file_path: "/test/path".to_string(),
+            file_format: DataFileFormat::Parquet,
+            partition: Struct::empty(),
+            record_count: 50,
+            file_size_in_bytes: 10,
+
+            value_counts: HashMap::from([
+                (4, 50),
+                (5, 50),
+                (6, 50),
+                (7, 50),
+                (8, 50),
+                (9, 50),
+                (10, 50),
+                (11, 50),
+                (12, 50),
+                (13, 50),
+                (14, 50),
+            ]),
+
+            null_value_counts: HashMap::from([
+                (4, 50),
+                (5, 10),
+                (6, 0),
+                (10, 50),
+                (11, 0),
+                (12, 1),
+                (14, 0),
+            ]),
+
+            nan_value_counts: HashMap::from([(7, 50), (8, 10), (9, 0)]),
+
+            lower_bounds: HashMap::from([
+                (1, Literal::int(INT_MIN_VALUE)),
+                (11, Literal::float(f32::NAN)),
+                (12, Literal::double(f64::NAN)),
+                (14, Literal::string("")),
+            ]),
+
+            upper_bounds: HashMap::from([
+                (1, Literal::int(INT_MAX_VALUE)),
+                (11, Literal::float(f32::NAN)),
+                (12, Literal::double(f64::NAN)),
+                (14, Literal::string("房东整租霍营小区二层两居室")),
+            ]),
+
+            column_sizes: Default::default(),
+            key_metadata: vec![],
+            split_offsets: vec![],
+            equality_ids: vec![],
+            sort_order_id: None,
+        }
+    }
+    fn get_test_file_2() -> DataFile {
+        DataFile {
+            content: DataContentType::Data,
+            file_path: "file_2.avro".to_string(),
+            file_format: DataFileFormat::Parquet,
+            partition: Struct::empty(),
+            record_count: 50,
+            file_size_in_bytes: 10,
+
+            value_counts: HashMap::from([(3, 20)]),
+
+            null_value_counts: HashMap::from([(3, 2)]),
+
+            nan_value_counts: HashMap::default(),
+
+            lower_bounds: HashMap::from([(3, Literal::string("aa"))]),
+
+            upper_bounds: HashMap::from([(3, Literal::string("dC"))]),
+
+            column_sizes: Default::default(),
+            key_metadata: vec![],
+            split_offsets: vec![],
+            equality_ids: vec![],
+            sort_order_id: None,
+        }
+    }
+
+    fn get_test_file_3() -> DataFile {
+        DataFile {
+            content: DataContentType::Data,
+            file_path: "file_3.avro".to_string(),
+            file_format: DataFileFormat::Parquet,
+            partition: Struct::empty(),
+            record_count: 50,
+            file_size_in_bytes: 10,
+
+            value_counts: HashMap::from([(3, 20)]),
+
+            null_value_counts: HashMap::from([(3, 2)]),
+
+            nan_value_counts: HashMap::default(),
+
+            lower_bounds: HashMap::from([(3, Literal::string("1str1"))]),
+
+            upper_bounds: HashMap::from([(3, Literal::string("3str3"))]),
+
+            column_sizes: Default::default(),
+            key_metadata: vec![],
+            split_offsets: vec![],
+            equality_ids: vec![],
+            sort_order_id: None,
+        }
+    }
+
+    fn get_test_file_4() -> DataFile {
+        DataFile {
+            content: DataContentType::Data,
+            file_path: "file_4.avro".to_string(),
+            file_format: DataFileFormat::Parquet,
+            partition: Struct::empty(),
+            record_count: 50,
+            file_size_in_bytes: 10,
+
+            value_counts: HashMap::from([(3, 20)]),
+
+            null_value_counts: HashMap::from([(3, 2)]),
+
+            nan_value_counts: HashMap::default(),
+
+            lower_bounds: HashMap::from([(3, Literal::string("abc"))]),
+
+            upper_bounds: HashMap::from([(3, Literal::string("イロハニホヘト"))]),
+
+            column_sizes: Default::default(),
+            key_metadata: vec![],
+            split_offsets: vec![],
+            equality_ids: vec![],
+            sort_order_id: None,
+        }
+    }
 }