From 57e567fb279a95a94d2e3db4a3ca2111f3b21a24 Mon Sep 17 00:00:00 2001
From: Ritchie Vink <ritchie46@gmail.com>
Date: Tue, 27 Dec 2022 12:52:59 +0100
Subject: [PATCH] fix(rust, python): csv, read escaped "" as missing (#5912)

---
 polars/polars-io/src/csv/buffer.rs  | 13 +++++++------
 py-polars/tests/unit/io/test_csv.py | 22 ++++++++++++++++++++++
 2 files changed, 29 insertions(+), 6 deletions(-)

diff --git a/polars/polars-io/src/csv/buffer.rs b/polars/polars-io/src/csv/buffer.rs
index 3413039a4010c..f4bf9919678c3 100644
--- a/polars/polars-io/src/csv/buffer.rs
+++ b/polars/polars-io/src/csv/buffer.rs
@@ -95,7 +95,7 @@ where
                         let bytes = skip_whitespace(bytes);
                         return self.parse_bytes(bytes, ignore_errors, needs_escaping);
                     }
-                    if ignore_errors {
+                    if ignore_errors || bytes.is_empty() {
                         self.append_null()
                     } else {
                         return Err(PolarsError::ComputeError("".into()));
@@ -349,16 +349,17 @@ impl ParsedBuffer for BooleanChunkedBuilder {
         &mut self,
         bytes: &[u8],
         ignore_errors: bool,
-        _needs_escaping: bool,
+        needs_escaping: bool,
     ) -> PolarsResult<()> {
+        let bytes = if needs_escaping {
+            &bytes[1..bytes.len() - 1]
+        } else {
+            bytes
+        };
         if bytes.eq_ignore_ascii_case(b"false") {
             self.append_value(false);
         } else if bytes.eq_ignore_ascii_case(b"true") {
             self.append_value(true);
-        } else if bytes.eq_ignore_ascii_case(b"\"false\"") {
-            self.append_value(false);
-        } else if bytes.eq_ignore_ascii_case(b"\"true\"") {
-            self.append_value(true);
         } else if ignore_errors || bytes.is_empty() {
             self.append_null();
         } else {
diff --git a/py-polars/tests/unit/io/test_csv.py b/py-polars/tests/unit/io/test_csv.py
index 492cdca718899..304bf27ccd834 100644
--- a/py-polars/tests/unit/io/test_csv.py
+++ b/py-polars/tests/unit/io/test_csv.py
@@ -961,3 +961,25 @@ def test_csv_single_categorical_null() -> None:
 
     assert df.dtypes == [pl.Utf8, pl.Categorical, pl.Utf8]
     assert df.to_dict(False) == {"x": ["A"], "y": [None], "z": ["A"]}
+
+
+def test_csv_quoted_missing() -> None:
+    csv = '''"col1"|"col2"|"col3"|"col4"
+"0"|"Free text with a line
+break"|"123"|"456"
+"1"|"Free text without a linebreak"|""|"789"
+"0"|"Free text with 
+two 
+linebreaks"|"101112"|"131415"'''  # noqa: W291
+    assert pl.read_csv(csv.encode(), sep="|", dtypes={"col3": pl.Int32}).to_dict(
+        False
+    ) == {
+        "col1": [0, 1, 0],
+        "col2": [
+            "Free text with a line\nbreak",
+            "Free text without a linebreak",
+            "Free text with \ntwo \nlinebreaks",
+        ],
+        "col3": [123, None, 101112],
+        "col4": [456, 789, 131415],
+    }