From 1ad993e2f0a249a4e054e951d01a2d36f2462732 Mon Sep 17 00:00:00 2001 From: francisco souza Date: Wed, 20 Nov 2019 10:59:41 -0500 Subject: [PATCH 1/5] io/parsers: ensure decimal is str on PythonParser Fixes #29650. --- pandas/io/parsers.py | 7 +++++-- pandas/tests/io/parser/test_python_parser_only.py | 8 +++++--- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index cf1511c1221b3..4478cfe2e4616 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -488,7 +488,7 @@ def _read(filepath_or_buffer: FilePathOrBuffer, kwds): "cache_dates": True, "thousands": None, "comment": None, - "decimal": b".", + "decimal": ".", # 'engine': 'c', "parse_dates": False, "keep_date_col": False, @@ -568,7 +568,7 @@ def parser_f( # Quoting, Compression, and File Format compression="infer", thousands=None, - decimal=b".", + decimal=".", lineterminator=None, quotechar='"', quoting=csv.QUOTE_MINIMAL, @@ -2344,6 +2344,9 @@ def __init__(self, f, **kwds): if len(self.decimal) != 1: raise ValueError("Only length-1 decimal markers supported") + if isinstance(self.decimal, bytes): + self.decimal = self.decimal.decode() + if self.thousands is None: self.nonnum = re.compile( r"[^-^0-9^{decimal}]+".format(decimal=self.decimal) diff --git a/pandas/tests/io/parser/test_python_parser_only.py b/pandas/tests/io/parser/test_python_parser_only.py index 5b381e43e3e19..8a68f2d36c92d 100644 --- a/pandas/tests/io/parser/test_python_parser_only.py +++ b/pandas/tests/io/parser/test_python_parser_only.py @@ -185,8 +185,10 @@ def test_read_csv_buglet_4x_multi_index2(python_parser_only): tm.assert_frame_equal(result, expected) -@pytest.mark.parametrize("add_footer", [True, False]) -def test_skipfooter_with_decimal(python_parser_only, add_footer): +@pytest.mark.parametrize( + "add_footer, decimal", [(True, "#"), (False, "#"), (True, b"#"), (False, b"#")], +) +def test_skipfooter_with_decimal(python_parser_only, add_footer, decimal): # see gh-6971 data = "1#2\n3#4" parser = python_parser_only @@ -200,7 +202,7 @@ def test_skipfooter_with_decimal(python_parser_only, add_footer): else: kwargs = dict() - result = parser.read_csv(StringIO(data), names=["a"], decimal="#", **kwargs) + result = parser.read_csv(StringIO(data), names=["a"], decimal=decimal, **kwargs) tm.assert_frame_equal(result, expected) From bca300ce1be0976227911573d510de5d74e2dcbe Mon Sep 17 00:00:00 2001 From: francisco souza Date: Wed, 20 Nov 2019 16:18:51 -0500 Subject: [PATCH 2/5] Address PR feedback --- pandas/io/parsers.py | 9 +++++---- pandas/tests/io/parser/test_python_parser_only.py | 5 ++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 4478cfe2e4616..a94bc9a8d40e2 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -2276,7 +2276,11 @@ def __init__(self, f, **kwds): self.dtype = kwds["dtype"] self.thousands = kwds["thousands"] - self.decimal = kwds["decimal"] + + if isinstance(kwds["decimal"], bytes): + self.decimal = kwds["decimal"].decode() + else: + self.decimal = kwds["decimal"] self.comment = kwds["comment"] self._comment_lines = [] @@ -2344,9 +2348,6 @@ def __init__(self, f, **kwds): if len(self.decimal) != 1: raise ValueError("Only length-1 decimal markers supported") - if isinstance(self.decimal, bytes): - self.decimal = self.decimal.decode() - if self.thousands is None: self.nonnum = re.compile( r"[^-^0-9^{decimal}]+".format(decimal=self.decimal) diff --git a/pandas/tests/io/parser/test_python_parser_only.py b/pandas/tests/io/parser/test_python_parser_only.py index 8a68f2d36c92d..c8335782b42ae 100644 --- a/pandas/tests/io/parser/test_python_parser_only.py +++ b/pandas/tests/io/parser/test_python_parser_only.py @@ -185,9 +185,8 @@ def test_read_csv_buglet_4x_multi_index2(python_parser_only): tm.assert_frame_equal(result, expected) -@pytest.mark.parametrize( - "add_footer, decimal", [(True, "#"), (False, "#"), (True, b"#"), (False, b"#")], -) +@pytest.mark.parametrize("add_footer", [True, False]) +@pytest.mark.parametrize("decimal", ["#", b"#"]) def test_skipfooter_with_decimal(python_parser_only, add_footer, decimal): # see gh-6971 data = "1#2\n3#4" From 257f233943e8480185144d0fd578bcec4485be9b Mon Sep 17 00:00:00 2001 From: francisco souza Date: Wed, 20 Nov 2019 16:24:10 -0500 Subject: [PATCH 3/5] doc/source/whatsnew: add decimal fix to v1.0.0 --- doc/source/whatsnew/v1.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 54640ff576338..ee6a20d15ab90 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -487,6 +487,7 @@ I/O - Bug in :meth:`Styler.background_gradient` not able to work with dtype ``Int64`` (:issue:`28869`) - Bug in :meth:`DataFrame.to_clipboard` which did not work reliably in ipython (:issue:`22707`) - Bug in :func:`read_json` where default encoding was not set to ``utf-8`` (:issue:`29565`) +- Bug in :class:`PythonParser` where str and bytes were being mixed when dealing with the decimal field (:issue:`29650`) - Plotting From 9393043fcebc828766fc6209107e260a24010a26 Mon Sep 17 00:00:00 2001 From: francisco souza Date: Thu, 21 Nov 2019 15:54:15 -0500 Subject: [PATCH 4/5] Address PR feedback --- pandas/io/parsers.py | 7 +------ pandas/tests/io/parser/test_python_parser_only.py | 5 ++--- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index a94bc9a8d40e2..d0959dd1dc314 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -2276,12 +2276,7 @@ def __init__(self, f, **kwds): self.dtype = kwds["dtype"] self.thousands = kwds["thousands"] - - if isinstance(kwds["decimal"], bytes): - self.decimal = kwds["decimal"].decode() - else: - self.decimal = kwds["decimal"] - + self.decimal = kwds["decimal"] self.comment = kwds["comment"] self._comment_lines = [] diff --git a/pandas/tests/io/parser/test_python_parser_only.py b/pandas/tests/io/parser/test_python_parser_only.py index c8335782b42ae..5b381e43e3e19 100644 --- a/pandas/tests/io/parser/test_python_parser_only.py +++ b/pandas/tests/io/parser/test_python_parser_only.py @@ -186,8 +186,7 @@ def test_read_csv_buglet_4x_multi_index2(python_parser_only): @pytest.mark.parametrize("add_footer", [True, False]) -@pytest.mark.parametrize("decimal", ["#", b"#"]) -def test_skipfooter_with_decimal(python_parser_only, add_footer, decimal): +def test_skipfooter_with_decimal(python_parser_only, add_footer): # see gh-6971 data = "1#2\n3#4" parser = python_parser_only @@ -201,7 +200,7 @@ def test_skipfooter_with_decimal(python_parser_only, add_footer, decimal): else: kwargs = dict() - result = parser.read_csv(StringIO(data), names=["a"], decimal=decimal, **kwargs) + result = parser.read_csv(StringIO(data), names=["a"], decimal="#", **kwargs) tm.assert_frame_equal(result, expected) From 4b262ba0aeb19d11f68d7b31b45d5e67dcf6a8fb Mon Sep 17 00:00:00 2001 From: francisco souza Date: Thu, 21 Nov 2019 15:56:41 -0500 Subject: [PATCH 5/5] Add type annotation for decimal --- pandas/io/parsers.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index d0959dd1dc314..bbec148b8745d 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -568,7 +568,7 @@ def parser_f( # Quoting, Compression, and File Format compression="infer", thousands=None, - decimal=".", + decimal: str = ".", lineterminator=None, quotechar='"', quoting=csv.QUOTE_MINIMAL, @@ -2277,6 +2277,7 @@ def __init__(self, f, **kwds): self.dtype = kwds["dtype"] self.thousands = kwds["thousands"] self.decimal = kwds["decimal"] + self.comment = kwds["comment"] self._comment_lines = []