Skip to content

Commit

Permalink
BUG: to_clipboard text truncated for Python 3 on Windows for UTF-16 t…
Browse files Browse the repository at this point in the history
…ext (#25040)
  • Loading branch information
david-liu-brattle-1 authored and jreback committed Feb 1, 2019
1 parent 25ff472 commit 89dd4d6
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 5 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.25.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,7 @@ MultiIndex
I/O
^^^

- Fixed bug in missing text when using :meth:`to_clipboard` if copying utf-16 characters in Python 3 on Windows (:issue:`25040`)
-
-
-
Expand Down
11 changes: 8 additions & 3 deletions pandas/io/clipboard/windows.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ def init_windows_clipboard():
HINSTANCE, HMENU, BOOL, UINT, HANDLE)

windll = ctypes.windll
msvcrt = ctypes.CDLL('msvcrt')

safeCreateWindowExA = CheckedCall(windll.user32.CreateWindowExA)
safeCreateWindowExA.argtypes = [DWORD, LPCSTR, LPCSTR, DWORD, INT, INT,
Expand Down Expand Up @@ -71,6 +72,10 @@ def init_windows_clipboard():
safeGlobalUnlock.argtypes = [HGLOBAL]
safeGlobalUnlock.restype = BOOL

wcslen = CheckedCall(msvcrt.wcslen)
wcslen.argtypes = [c_wchar_p]
wcslen.restype = UINT

GMEM_MOVEABLE = 0x0002
CF_UNICODETEXT = 13

Expand Down Expand Up @@ -129,13 +134,13 @@ def copy_windows(text):
# If the hMem parameter identifies a memory object,
# the object must have been allocated using the
# function with the GMEM_MOVEABLE flag.
count = len(text) + 1
count = wcslen(text) + 1
handle = safeGlobalAlloc(GMEM_MOVEABLE,
count * sizeof(c_wchar))
locked_handle = safeGlobalLock(handle)

ctypes.memmove(c_wchar_p(locked_handle),
c_wchar_p(text), count * sizeof(c_wchar))
ctypes.memmove(c_wchar_p(locked_handle), c_wchar_p(text),
count * sizeof(c_wchar))

safeGlobalUnlock(handle)
safeSetClipboardData(CF_UNICODETEXT, handle)
Expand Down
20 changes: 18 additions & 2 deletions pandas/tests/io/test_clipboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from pandas.util import testing as tm
from pandas.util.testing import makeCustomDataframe as mkdf

from pandas.io.clipboard import clipboard_get, clipboard_set
from pandas.io.clipboard.exceptions import PyperclipException

try:
Expand All @@ -30,8 +31,8 @@ def build_kwargs(sep, excel):
return kwargs


@pytest.fixture(params=['delims', 'utf8', 'string', 'long', 'nonascii',
'colwidth', 'mixed', 'float', 'int'])
@pytest.fixture(params=['delims', 'utf8', 'utf16', 'string', 'long',
'nonascii', 'colwidth', 'mixed', 'float', 'int'])
def df(request):
data_type = request.param

Expand All @@ -41,6 +42,10 @@ def df(request):
elif data_type == 'utf8':
return pd.DataFrame({'a': ['µasd', 'Ωœ∑´'],
'b': ['øπ∆˚¬', 'œ∑´®']})
elif data_type == 'utf16':
return pd.DataFrame({'a': ['\U0001f44d\U0001f44d',
'\U0001f44d\U0001f44d'],
'b': ['abc', 'def']})
elif data_type == 'string':
return mkdf(5, 3, c_idx_type='s', r_idx_type='i',
c_idx_names=[None], r_idx_names=[None])
Expand Down Expand Up @@ -225,3 +230,14 @@ def test_invalid_encoding(self, df):
@pytest.mark.parametrize('enc', ['UTF-8', 'utf-8', 'utf8'])
def test_round_trip_valid_encodings(self, enc, df):
self.check_round_trip_frame(df, encoding=enc)


@pytest.mark.single
@pytest.mark.clipboard
@pytest.mark.skipif(not _DEPS_INSTALLED,
reason="clipboard primitives not installed")
@pytest.mark.parametrize('data', [u'\U0001f44d...', u'Ωœ∑´...', 'abcd...'])
def test_raw_roundtrip(data):
# PR #25040 wide unicode wasn't copied correctly on PY3 on windows
clipboard_set(data)
assert data == clipboard_get()

0 comments on commit 89dd4d6

Please sign in to comment.