From 16b97a0f15a355fcbc254ede849ec063808ef940 Mon Sep 17 00:00:00 2001 From: David Liu <36486871+david-liu-brattle-1@users.noreply.github.com> Date: Fri, 1 Feb 2019 15:53:56 -0500 Subject: [PATCH] BUG: to_clipboard text truncated for Python 3 on Windows for UTF-16 text (#25040) --- doc/source/whatsnew/v0.25.0.rst | 1 + pandas/io/clipboard/windows.py | 11 ++++++++--- pandas/tests/io/test_clipboard.py | 20 ++++++++++++++++++-- 3 files changed, 27 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 304a9dd117e45a..55b9957763ff6e 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -165,6 +165,7 @@ MultiIndex I/O ^^^ +- Fixed bug in missing text when using :meth:`to_clipboard` if copying utf-16 characters in Python 3 on Windows (:issue:`25040`) - - - diff --git a/pandas/io/clipboard/windows.py b/pandas/io/clipboard/windows.py index 3d979a61b5f2d5..4f5275af693b71 100644 --- a/pandas/io/clipboard/windows.py +++ b/pandas/io/clipboard/windows.py @@ -29,6 +29,7 @@ def init_windows_clipboard(): HINSTANCE, HMENU, BOOL, UINT, HANDLE) windll = ctypes.windll + msvcrt = ctypes.CDLL('msvcrt') safeCreateWindowExA = CheckedCall(windll.user32.CreateWindowExA) safeCreateWindowExA.argtypes = [DWORD, LPCSTR, LPCSTR, DWORD, INT, INT, @@ -71,6 +72,10 @@ def init_windows_clipboard(): safeGlobalUnlock.argtypes = [HGLOBAL] safeGlobalUnlock.restype = BOOL + wcslen = CheckedCall(msvcrt.wcslen) + wcslen.argtypes = [c_wchar_p] + wcslen.restype = UINT + GMEM_MOVEABLE = 0x0002 CF_UNICODETEXT = 13 @@ -129,13 +134,13 @@ def copy_windows(text): # If the hMem parameter identifies a memory object, # the object must have been allocated using the # function with the GMEM_MOVEABLE flag. - count = len(text) + 1 + count = wcslen(text) + 1 handle = safeGlobalAlloc(GMEM_MOVEABLE, count * sizeof(c_wchar)) locked_handle = safeGlobalLock(handle) - ctypes.memmove(c_wchar_p(locked_handle), - c_wchar_p(text), count * sizeof(c_wchar)) + ctypes.memmove(c_wchar_p(locked_handle), c_wchar_p(text), + count * sizeof(c_wchar)) safeGlobalUnlock(handle) safeSetClipboardData(CF_UNICODETEXT, handle) diff --git a/pandas/tests/io/test_clipboard.py b/pandas/tests/io/test_clipboard.py index 8eb26d9f3dec5c..565db92210b0ab 100644 --- a/pandas/tests/io/test_clipboard.py +++ b/pandas/tests/io/test_clipboard.py @@ -12,6 +12,7 @@ from pandas.util import testing as tm from pandas.util.testing import makeCustomDataframe as mkdf +from pandas.io.clipboard import clipboard_get, clipboard_set from pandas.io.clipboard.exceptions import PyperclipException try: @@ -30,8 +31,8 @@ def build_kwargs(sep, excel): return kwargs -@pytest.fixture(params=['delims', 'utf8', 'string', 'long', 'nonascii', - 'colwidth', 'mixed', 'float', 'int']) +@pytest.fixture(params=['delims', 'utf8', 'utf16', 'string', 'long', + 'nonascii', 'colwidth', 'mixed', 'float', 'int']) def df(request): data_type = request.param @@ -41,6 +42,10 @@ def df(request): elif data_type == 'utf8': return pd.DataFrame({'a': ['µasd', 'Ωœ∑´'], 'b': ['øπ∆˚¬', 'œ∑´®']}) + elif data_type == 'utf16': + return pd.DataFrame({'a': ['\U0001f44d\U0001f44d', + '\U0001f44d\U0001f44d'], + 'b': ['abc', 'def']}) elif data_type == 'string': return mkdf(5, 3, c_idx_type='s', r_idx_type='i', c_idx_names=[None], r_idx_names=[None]) @@ -225,3 +230,14 @@ def test_invalid_encoding(self, df): @pytest.mark.parametrize('enc', ['UTF-8', 'utf-8', 'utf8']) def test_round_trip_valid_encodings(self, enc, df): self.check_round_trip_frame(df, encoding=enc) + + +@pytest.mark.single +@pytest.mark.clipboard +@pytest.mark.skipif(not _DEPS_INSTALLED, + reason="clipboard primitives not installed") +@pytest.mark.parametrize('data', [u'\U0001f44d...', u'Ωœ∑´...', 'abcd...']) +def test_raw_roundtrip(data): + # PR #25040 wide unicode wasn't copied correctly on PY3 on windows + clipboard_set(data) + assert data == clipboard_get()