diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 3ec9797f3d5c39..6b566c29a77c9a 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -202,49 +202,32 @@ Backwards incompatible API changes ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ :func:`DataFrame.to_csv` now uses :func:`os.linesep` rather than ``'\n'`` - for the default line terminator(:issue:`20353`). -- This change only affects when running on Windows, where ``'\r\n'`` was used for line terminator + for the default line terminator (:issue:`20353`). +This change only affects when running on Windows, where ``'\r\n'`` was used for line terminator even when ``'\n'`` was passed in ``line_terminator``. - - Strictly speeaking, all ``'\n'``s appear in data and line terminator of CSV were converted into ``'\r\n'``s. - - This problem was resolved by passing file object with ``newline='\n'`` option as output, rather than file name. Previous Behavior on Windows: .. code-block:: ipython - In [1]: import pandas as pd +In [1]: data = pd.DataFrame({ + ...: "string_with_lf":["a\nbc"], + ...: "string_with_crlf":["a\r\nbc"] + ...: }) - In [2]: data = pd.DataFrame({ - ...: "string_with_lf":["abc","d\nef","g\nh\n\ni"], - ...: "string_with_crlf":["abc","d\r\nef","g\r\nh\r\n\r\ni"] - ...: }) +In [2]: data.to_csv("test.csv",index=False,line_terminator='\n') - In [3]: data.to_csv("test.csv",index=False,line_terminator='\n') +In [3]: with open("test.csv", mode='rb') as f: + ...: print(f.read()) +b'string_with_lf,string_with_crlf\r\n"a\r\nbc","a\r\r\nbc"\r\n' - In [4]: print(pd.read_csv("test.csv")) - string_with_lf string_with_crlf - 0 abc abc - 1 d\r\nef d\r\r\nef - 2 g\r\nh\r\n\r\ni g\r\r\nh\r\r\n\r\r\ni +In [4]: with open("test2.csv", mode='w', newline='\n') as f: + ...: data.to_csv(f,index=False,line_terminator='\n') - In [5]: with open("test.csv", mode='rb') as f: - ...: print(f.read()) - b'string_with_lf,string_with_crlf\r\nabc,abc\r\n"d\r\nef","d\r\r\nef"\r\n"g\r\nh - \r\n\r\ni","g\r\r\nh\r\r\n\r\r\ni"\r\n' +In [5]: with open("test2.csv", mode='rb') as f: + ...: print(f.read()) +b'string_with_lf,string_with_crlf\n"a\nbc","a\r\nbc"\n' - In [6]: with open("test2.csv", mode='w', newline='\n') as f: - ...: data.to_csv(f,index=False,line_terminator='\n') - - In [7]: print(pd.read_csv("test2.csv")) - string_with_lf string_with_crlf - 0 abc abc - 1 d\nef d\r\nef - 2 g\nh\n\ni g\r\nh\r\n\r\ni - - In [8]: with open("test2.csv", mode='rb') as f: - ...: print(f.read()) - b'string_with_lf,string_with_crlf\nabc,abc\n"d\nef","d\r\nef"\n"g\nh\n\ni","g\r\ - nh\r\n\r\ni"\n' New Behavior on Windows: @@ -254,27 +237,17 @@ New Behavior on Windows: .. code-block:: ipython - In [1]: import pandas as pd - - In [2]: data = pd.DataFrame({ - ...: "string_with_lf":["abc","d\nef","g\nh\n\ni"], - ...: "string_with_crlf":["abc","d\r\nef","g\r\nh\r\n\r\ni"] - ...: }) +In [1]: data = pd.DataFrame({ + ...: "string_with_lf":["a\nbc"], + ...: "string_with_crlf":["a\r\nbc"] + ...: }) - In [3]: data.to_csv("test.csv",index=False,line_terminator='\n') +In [2]: data.to_csv("test.csv",index=False,line_terminator='\n') - In [4]: pd.read_csv("test.csv") - Out[4]: - string_with_lf string_with_crlf - 0 abc abc - 1 d\nef d\r\nef - 2 g\nh\n\ni g\r\nh\r\n\r\ni +In [3]: with open("test.csv", mode='rb') as f: + ...: print(f.read()) +b'string_with_lf,string_with_crlf\n"a\nbc","a\r\nbc"\n' - In [5]: with open("test.csv", mode='rb') as f: - ...: binary_str=f.read() - ...: binary_str - Out[5]: b'string_with_lf,string_with_crlf\nabc,abc\n"d\nef","d\r\nef"\n"g\nh\n\n - i","g\r\nh\r\n\r\ni"\n' - On windows, the value of ``os.linesep`` is ``'\r\n'``, so if ``line_terminator`` is not set, ``'\r\n'`` is used for line terminator. @@ -282,71 +255,34 @@ New Behavior on Windows: .. code-block:: ipython - In [1]: import pandas as pd +In [1]: data = pd.DataFrame({ + ...: "string_with_lf":["a\nbc"], + ...: "string_with_crlf":["a\r\nbc"] + ...: }) - In [2]: data = pd.DataFrame({ - ...: "string_with_lf":["abc","d\nef","g\nh\n\ni"], - ...: "string_with_crlf":["abc","d\r\nef","g\r\nh\r\n\r\ni"] - ...: }) +In [2]: data.to_csv("test.csv",index=False) - In [3]: data.to_csv("test.csv",index=False) +In [3]: with open("test.csv", mode='rb') as f: + ...: print(f.read()) +b'string_with_lf,string_with_crlf\r\n"a\nbc","a\r\nbc"\r\n' - In [4]: pd.read_csv("test.csv") - Out[4]: - string_with_lf string_with_crlf - 0 abc abc - 1 d\nef d\r\nef - 2 g\nh\n\ni g\r\nh\r\n\r\ni - - In [5]: with open("test.csv", mode='rb') as f: - ...: binary_str=f.read() - ...: binary_str - Out[5]: b'string_with_lf,string_with_crlf\r\nabc,abc\r\n"d\nef","d\r\nef"\r\n"g\ - nh\n\ni","g\r\nh\r\n\r\ni"\r\n' - As default value of ``line_terminator`` changes, just passing file object with ``newline='\n'`` does not set ``'\n'`` to line terminator. Pass ``line_terminator='\n'`` explicitly. .. code-block:: ipython - In [1]: import pandas as pd - - In [2]: data = pd.DataFrame({ - ...: "string_with_lf":["abc","d\nef","g\nh\n\ni"], - ...: "string_with_crlf":["abc","d\r\nef","g\r\nh\r\n\r\ni"] - ...: }) - - In [3]: with open("test2.csv", mode='w', newline='\n') as f: - ...: data.to_csv(f,index=False) - - In [4]: pd.read_csv("test2.csv") - Out[4]: - string_with_lf string_with_crlf - 0 abc abc - 1 d\nef d\r\nef - 2 g\nh\n\ni g\r\nh\r\n\r\ni - - In [5]: with open("test2.csv", mode='rb') as f: - ...: binary_str=f.read() - ...: binary_str - Out[5]: b'string_with_lf,string_with_crlf\r\nabc,abc\r\n"d\nef","d\r\nef"\r\n"g\ - nh\n\ni","g\r\nh\r\n\r\ni"\r\n' - - In [6]: with open("test2.csv", mode='w', newline='\n') as f: - ...: data.to_csv(f,index=False,line_terminator='\n') - - In [7]: pd.read_csv("test2.csv") - Out[7]: - string_with_lf string_with_crlf - 0 abc abc - 1 d\nef d\r\nef - 2 g\nh\n\ni g\r\nh\r\n\r\ni - - In [8]: with open("test2.csv", mode='rb') as f: - ...: binary_str=f.read() - ...: binary_str - Out[8]: b'string_with_lf,string_with_crlf\nabc,abc\n"d\nef","d\r\nef"\n"g\nh\n\n - i","g\r\nh\r\n\r\ni"\n' +In [1]: data = pd.DataFrame({ + ...: "string_with_lf":["a\nbc"], + ...: "string_with_crlf":["a\r\nbc"] + ...: }) + +In [2]: with open("test2.csv", mode='w', newline='\n') as f: + ...: data.to_csv(f,index=False) + +In [3]: with open("test2.csv", mode='rb') as f: + ...: print(f.read()) +b'string_with_lf,string_with_crlf\r\n"a\nbc","a\r\nbc"\r\n' .. _whatsnew_0240.api_breaking.interval_values: