Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: to_clipboard text truncated for Python 3 on Windows for UTF-16 text #25040

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.25.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,7 @@ MultiIndex
I/O
^^^

- Fixed bug in missing text when using :meth:`to_clipboard` if copying utf-16 characters in Python 3 on Windows (:issue:`25040`)
-
-
-
Expand Down
11 changes: 8 additions & 3 deletions pandas/io/clipboard/windows.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ def init_windows_clipboard():
HINSTANCE, HMENU, BOOL, UINT, HANDLE)

windll = ctypes.windll
msvcrt = ctypes.CDLL('msvcrt')

safeCreateWindowExA = CheckedCall(windll.user32.CreateWindowExA)
safeCreateWindowExA.argtypes = [DWORD, LPCSTR, LPCSTR, DWORD, INT, INT,
Expand Down Expand Up @@ -71,6 +72,10 @@ def init_windows_clipboard():
safeGlobalUnlock.argtypes = [HGLOBAL]
safeGlobalUnlock.restype = BOOL

wcslen = CheckedCall(msvcrt.wcslen)
wcslen.argtypes = [c_wchar_p]
wcslen.restype = UINT

GMEM_MOVEABLE = 0x0002
CF_UNICODETEXT = 13

Expand Down Expand Up @@ -129,13 +134,13 @@ def copy_windows(text):
# If the hMem parameter identifies a memory object,
# the object must have been allocated using the
# function with the GMEM_MOVEABLE flag.
count = len(text) + 1
count = wcslen(text) + 1
handle = safeGlobalAlloc(GMEM_MOVEABLE,
count * sizeof(c_wchar))
locked_handle = safeGlobalLock(handle)

ctypes.memmove(c_wchar_p(locked_handle),
c_wchar_p(text), count * sizeof(c_wchar))
ctypes.memmove(c_wchar_p(locked_handle), c_wchar_p(text),
count * sizeof(c_wchar))

safeGlobalUnlock(handle)
safeSetClipboardData(CF_UNICODETEXT, handle)
Expand Down
20 changes: 18 additions & 2 deletions pandas/tests/io/test_clipboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from pandas.util import testing as tm
from pandas.util.testing import makeCustomDataframe as mkdf

from pandas.io.clipboard import clipboard_get, clipboard_set
from pandas.io.clipboard.exceptions import PyperclipException

try:
Expand All @@ -30,8 +31,8 @@ def build_kwargs(sep, excel):
return kwargs


@pytest.fixture(params=['delims', 'utf8', 'string', 'long', 'nonascii',
'colwidth', 'mixed', 'float', 'int'])
@pytest.fixture(params=['delims', 'utf8', 'utf16', 'string', 'long',
'nonascii', 'colwidth', 'mixed', 'float', 'int'])
def df(request):
data_type = request.param

Expand All @@ -41,6 +42,10 @@ def df(request):
elif data_type == 'utf8':
return pd.DataFrame({'a': ['µasd', 'Ωœ∑´'],
'b': ['øπ∆˚¬', 'œ∑´®']})
elif data_type == 'utf16':
return pd.DataFrame({'a': ['\U0001f44d\U0001f44d',
'\U0001f44d\U0001f44d'],
'b': ['abc', 'def']})
elif data_type == 'string':
return mkdf(5, 3, c_idx_type='s', r_idx_type='i',
c_idx_names=[None], r_idx_names=[None])
Expand Down Expand Up @@ -225,3 +230,14 @@ def test_invalid_encoding(self, df):
@pytest.mark.parametrize('enc', ['UTF-8', 'utf-8', 'utf8'])
def test_round_trip_valid_encodings(self, enc, df):
self.check_round_trip_frame(df, encoding=enc)


@pytest.mark.single
@pytest.mark.clipboard
@pytest.mark.skipif(not _DEPS_INSTALLED,
reason="clipboard primitives not installed")
@pytest.mark.parametrize('data', [u'\U0001f44d...', u'Ωœ∑´...', 'abcd...'])
def test_raw_roundtrip(data):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Reference PR number as a comment below this function definition.

# PR #25040 wide unicode wasn't copied correctly on PY3 on windows
clipboard_set(data)
assert data == clipboard_get()