Skip to content

Commit

Permalink
Evaluate null_regex for string type in csv (now such values will be p…
Browse files Browse the repository at this point in the history
…arsed as `Null` rather than `""`) (#4942)

* fix: add null_regex for string type in csv

* Update arrow-csv/src/reader/mod.rs

Co-authored-by: Raphael Taylor-Davies <1781103+tustvold@users.noreply.github.com>

---------

Co-authored-by: Raphael Taylor-Davies <1781103+tustvold@users.noreply.github.com>
  • Loading branch information
haohuaijin and tustvold authored Oct 16, 2023
1 parent ce2a958 commit 95b015c
Showing 1 changed file with 8 additions and 6 deletions.
14 changes: 8 additions & 6 deletions arrow-csv/src/reader/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -791,7 +791,10 @@ fn parse(
}
DataType::Utf8 => Ok(Arc::new(
rows.iter()
.map(|row| Some(row.get(i)))
.map(|row| {
let s = row.get(i);
(!null_regex.is_null(s)).then_some(s)
})
.collect::<StringArray>(),
) as ArrayRef),
DataType::Dictionary(key_type, value_type)
Expand Down Expand Up @@ -1495,7 +1498,7 @@ mod tests {
let schema = Arc::new(Schema::new(vec![
Field::new("c_int", DataType::UInt64, false),
Field::new("c_float", DataType::Float32, true),
Field::new("c_string", DataType::Utf8, false),
Field::new("c_string", DataType::Utf8, true),
Field::new("c_bool", DataType::Boolean, false),
]));

Expand Down Expand Up @@ -1596,8 +1599,7 @@ mod tests {
assert!(batch.column(0).is_null(1));
assert!(batch.column(1).is_null(2));
assert!(batch.column(3).is_null(4));
// String won't be empty
assert!(!batch.column(2).is_null(3));
assert!(batch.column(2).is_null(3));
assert!(!batch.column(2).is_null(4));
}

Expand Down Expand Up @@ -2237,8 +2239,8 @@ mod tests {

fn err_test(csv: &[u8], expected: &str) {
let schema = Arc::new(Schema::new(vec![
Field::new("text1", DataType::Utf8, false),
Field::new("text2", DataType::Utf8, false),
Field::new("text1", DataType::Utf8, true),
Field::new("text2", DataType::Utf8, true),
]));
let buffer = std::io::BufReader::with_capacity(2, Cursor::new(csv));
let b = ReaderBuilder::new(schema)
Expand Down

0 comments on commit 95b015c

Please sign in to comment.