From cda1bd3c9d3b2cecdeeba0c498cd2df83fbdb535 Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Tue, 30 May 2023 11:00:29 +0100 Subject: [PATCH] gh-88745: Add _winapi.CopyFile2 and update shutil.copy2 to use it (GH-105055) --- .../pycore_global_objects_fini_generated.h | 3 + Include/internal/pycore_global_strings.h | 3 + .../internal/pycore_runtime_init_generated.h | 3 + .../internal/pycore_unicodeobject_generated.h | 9 ++ Lib/shutil.py | 25 +++++ ...3-05-29-11-38-53.gh-issue-88745.cldf9G.rst | 3 + Modules/_winapi.c | 93 +++++++++++++++++++ Modules/clinic/_winapi.c.h | 72 +++++++++++++- 8 files changed, 210 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Windows/2023-05-29-11-38-53.gh-issue-88745.cldf9G.rst diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h index a83f8fc49fc5ef..546ba6d4c5520f 100644 --- a/Include/internal/pycore_global_objects_fini_generated.h +++ b/Include/internal/pycore_global_objects_fini_generated.h @@ -919,6 +919,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(exc_value)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(excepthook)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(exception)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(existing_file_name)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(exp)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(extend)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(extra_tokens)); @@ -1071,6 +1072,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(namespaces)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(narg)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(ndigits)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(new_file_name)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(new_limit)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(newline)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(newlines)); @@ -1125,6 +1127,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(priority)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(progress)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(progress_handler)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(progress_routine)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(proto)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(protocol)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(ps1)); diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h index dd6a62f53a9989..088bd96c756b49 100644 --- a/Include/internal/pycore_global_strings.h +++ b/Include/internal/pycore_global_strings.h @@ -407,6 +407,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(exc_value) STRUCT_FOR_ID(excepthook) STRUCT_FOR_ID(exception) + STRUCT_FOR_ID(existing_file_name) STRUCT_FOR_ID(exp) STRUCT_FOR_ID(extend) STRUCT_FOR_ID(extra_tokens) @@ -559,6 +560,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(namespaces) STRUCT_FOR_ID(narg) STRUCT_FOR_ID(ndigits) + STRUCT_FOR_ID(new_file_name) STRUCT_FOR_ID(new_limit) STRUCT_FOR_ID(newline) STRUCT_FOR_ID(newlines) @@ -613,6 +615,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(priority) STRUCT_FOR_ID(progress) STRUCT_FOR_ID(progress_handler) + STRUCT_FOR_ID(progress_routine) STRUCT_FOR_ID(proto) STRUCT_FOR_ID(protocol) STRUCT_FOR_ID(ps1) diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h index d689f717eaf94f..2963423f607108 100644 --- a/Include/internal/pycore_runtime_init_generated.h +++ b/Include/internal/pycore_runtime_init_generated.h @@ -913,6 +913,7 @@ extern "C" { INIT_ID(exc_value), \ INIT_ID(excepthook), \ INIT_ID(exception), \ + INIT_ID(existing_file_name), \ INIT_ID(exp), \ INIT_ID(extend), \ INIT_ID(extra_tokens), \ @@ -1065,6 +1066,7 @@ extern "C" { INIT_ID(namespaces), \ INIT_ID(narg), \ INIT_ID(ndigits), \ + INIT_ID(new_file_name), \ INIT_ID(new_limit), \ INIT_ID(newline), \ INIT_ID(newlines), \ @@ -1119,6 +1121,7 @@ extern "C" { INIT_ID(priority), \ INIT_ID(progress), \ INIT_ID(progress_handler), \ + INIT_ID(progress_routine), \ INIT_ID(proto), \ INIT_ID(protocol), \ INIT_ID(ps1), \ diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h index db6a157ee7afbf..9e13a9491b7da5 100644 --- a/Include/internal/pycore_unicodeobject_generated.h +++ b/Include/internal/pycore_unicodeobject_generated.h @@ -1062,6 +1062,9 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { string = &_Py_ID(exception); assert(_PyUnicode_CheckConsistency(string, 1)); _PyUnicode_InternInPlace(interp, &string); + string = &_Py_ID(existing_file_name); + assert(_PyUnicode_CheckConsistency(string, 1)); + _PyUnicode_InternInPlace(interp, &string); string = &_Py_ID(exp); assert(_PyUnicode_CheckConsistency(string, 1)); _PyUnicode_InternInPlace(interp, &string); @@ -1518,6 +1521,9 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { string = &_Py_ID(ndigits); assert(_PyUnicode_CheckConsistency(string, 1)); _PyUnicode_InternInPlace(interp, &string); + string = &_Py_ID(new_file_name); + assert(_PyUnicode_CheckConsistency(string, 1)); + _PyUnicode_InternInPlace(interp, &string); string = &_Py_ID(new_limit); assert(_PyUnicode_CheckConsistency(string, 1)); _PyUnicode_InternInPlace(interp, &string); @@ -1680,6 +1686,9 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { string = &_Py_ID(progress_handler); assert(_PyUnicode_CheckConsistency(string, 1)); _PyUnicode_InternInPlace(interp, &string); + string = &_Py_ID(progress_routine); + assert(_PyUnicode_CheckConsistency(string, 1)); + _PyUnicode_InternInPlace(interp, &string); string = &_Py_ID(proto); assert(_PyUnicode_CheckConsistency(string, 1)); _PyUnicode_InternInPlace(interp, &string); diff --git a/Lib/shutil.py b/Lib/shutil.py index 7d1a3d00011f37..3f2864af517e7d 100644 --- a/Lib/shutil.py +++ b/Lib/shutil.py @@ -42,6 +42,8 @@ if sys.platform == 'win32': import _winapi +else: + _winapi = None COPY_BUFSIZE = 1024 * 1024 if _WINDOWS else 64 * 1024 # This should never be removed, see rationale in: @@ -435,6 +437,29 @@ def copy2(src, dst, *, follow_symlinks=True): """ if os.path.isdir(dst): dst = os.path.join(dst, os.path.basename(src)) + + if hasattr(_winapi, "CopyFile2"): + src_ = os.fsdecode(src) + dst_ = os.fsdecode(dst) + flags = _winapi.COPY_FILE_ALLOW_DECRYPTED_DESTINATION # for compat + if not follow_symlinks: + flags |= _winapi.COPY_FILE_COPY_SYMLINK + try: + _winapi.CopyFile2(src_, dst_, flags) + return dst + except OSError as exc: + if (exc.winerror == _winapi.ERROR_PRIVILEGE_NOT_HELD + and not follow_symlinks): + # Likely encountered a symlink we aren't allowed to create. + # Fall back on the old code + pass + elif exc.winerror == _winapi.ERROR_ACCESS_DENIED: + # Possibly encountered a hidden or readonly file we can't + # overwrite. Fall back on old code + pass + else: + raise + copyfile(src, dst, follow_symlinks=follow_symlinks) copystat(src, dst, follow_symlinks=follow_symlinks) return dst diff --git a/Misc/NEWS.d/next/Windows/2023-05-29-11-38-53.gh-issue-88745.cldf9G.rst b/Misc/NEWS.d/next/Windows/2023-05-29-11-38-53.gh-issue-88745.cldf9G.rst new file mode 100644 index 00000000000000..258eb89d50d9f5 --- /dev/null +++ b/Misc/NEWS.d/next/Windows/2023-05-29-11-38-53.gh-issue-88745.cldf9G.rst @@ -0,0 +1,3 @@ +Improve performance of :func:`shutil.copy2` by using the operating system's +``CopyFile2`` function. This may result in subtle changes to metadata copied +along with some files, bringing them in line with normal OS behavior. diff --git a/Modules/_winapi.c b/Modules/_winapi.c index 1e02dbc1a4bfd1..bbc9facd227c9e 100644 --- a/Modules/_winapi.c +++ b/Modules/_winapi.c @@ -1947,6 +1947,7 @@ _winapi_GetFileType_impl(PyObject *module, HANDLE handle) return result; } + /*[clinic input] _winapi._mimetypes_read_windows_registry @@ -2075,6 +2076,67 @@ _winapi_NeedCurrentDirectoryForExePath_impl(PyObject *module, return result; } + +/*[clinic input] +_winapi.CopyFile2 + + existing_file_name: LPCWSTR + new_file_name: LPCWSTR + flags: DWORD + progress_routine: object = None + +Copies a file from one name to a new name. + +This is implemented using the CopyFile2 API, which preserves all stat +and metadata information apart from security attributes. + +progress_routine is reserved for future use, but is currently not +implemented. Its value is ignored. +[clinic start generated code]*/ + +static PyObject * +_winapi_CopyFile2_impl(PyObject *module, LPCWSTR existing_file_name, + LPCWSTR new_file_name, DWORD flags, + PyObject *progress_routine) +/*[clinic end generated code: output=43d960d9df73d984 input=fb976b8d1492d130]*/ +{ + HRESULT hr; + COPYFILE2_EXTENDED_PARAMETERS params = { sizeof(COPYFILE2_EXTENDED_PARAMETERS) }; + + if (PySys_Audit("_winapi.CopyFile2", "uuI", + existing_file_name, new_file_name, flags) < 0) { + return NULL; + } + + params.dwCopyFlags = flags; + /* For future implementation. We ignore the value for now so that + users only have to test for 'CopyFile2' existing and not whether + the additional parameter exists. + if (progress_routine != Py_None) { + params.pProgressRoutine = _winapi_CopyFile2ProgressRoutine; + params.pvCallbackContext = Py_NewRef(progress_routine); + } + */ + Py_BEGIN_ALLOW_THREADS; + hr = CopyFile2(existing_file_name, new_file_name, ¶ms); + Py_END_ALLOW_THREADS; + /* For future implementation. + if (progress_routine != Py_None) { + Py_DECREF(progress_routine); + } + */ + if (FAILED(hr)) { + if ((hr & 0xFFFF0000) == 0x80070000) { + PyErr_SetFromWindowsErr(hr & 0xFFFF); + } else { + PyErr_SetFromWindowsErr(hr); + } + return NULL; + } + Py_RETURN_NONE; +} + + static PyMethodDef winapi_functions[] = { _WINAPI_CLOSEHANDLE_METHODDEF _WINAPI_CONNECTNAMEDPIPE_METHODDEF @@ -2110,6 +2172,7 @@ static PyMethodDef winapi_functions[] = { _WINAPI_GETFILETYPE_METHODDEF _WINAPI__MIMETYPES_READ_WINDOWS_REGISTRY_METHODDEF _WINAPI_NEEDCURRENTDIRECTORYFOREXEPATH_METHODDEF + _WINAPI_COPYFILE2_METHODDEF {NULL, NULL} }; @@ -2146,6 +2209,7 @@ static int winapi_exec(PyObject *m) WINAPI_CONSTANT(F_DWORD, CREATE_NEW_PROCESS_GROUP); WINAPI_CONSTANT(F_DWORD, DUPLICATE_SAME_ACCESS); WINAPI_CONSTANT(F_DWORD, DUPLICATE_CLOSE_SOURCE); + WINAPI_CONSTANT(F_DWORD, ERROR_ACCESS_DENIED); WINAPI_CONSTANT(F_DWORD, ERROR_ALREADY_EXISTS); WINAPI_CONSTANT(F_DWORD, ERROR_BROKEN_PIPE); WINAPI_CONSTANT(F_DWORD, ERROR_IO_PENDING); @@ -2159,6 +2223,7 @@ static int winapi_exec(PyObject *m) WINAPI_CONSTANT(F_DWORD, ERROR_OPERATION_ABORTED); WINAPI_CONSTANT(F_DWORD, ERROR_PIPE_BUSY); WINAPI_CONSTANT(F_DWORD, ERROR_PIPE_CONNECTED); + WINAPI_CONSTANT(F_DWORD, ERROR_PRIVILEGE_NOT_HELD); WINAPI_CONSTANT(F_DWORD, ERROR_SEM_TIMEOUT); WINAPI_CONSTANT(F_DWORD, FILE_FLAG_FIRST_PIPE_INSTANCE); WINAPI_CONSTANT(F_DWORD, FILE_FLAG_OVERLAPPED); @@ -2252,6 +2317,34 @@ static int winapi_exec(PyObject *m) WINAPI_CONSTANT(F_DWORD, LCMAP_TRADITIONAL_CHINESE); WINAPI_CONSTANT(F_DWORD, LCMAP_UPPERCASE); + WINAPI_CONSTANT(F_DWORD, COPY_FILE_ALLOW_DECRYPTED_DESTINATION); + WINAPI_CONSTANT(F_DWORD, COPY_FILE_COPY_SYMLINK); + WINAPI_CONSTANT(F_DWORD, COPY_FILE_FAIL_IF_EXISTS); + WINAPI_CONSTANT(F_DWORD, COPY_FILE_NO_BUFFERING); + WINAPI_CONSTANT(F_DWORD, COPY_FILE_NO_OFFLOAD); + WINAPI_CONSTANT(F_DWORD, COPY_FILE_OPEN_SOURCE_FOR_WRITE); + WINAPI_CONSTANT(F_DWORD, COPY_FILE_RESTARTABLE); + WINAPI_CONSTANT(F_DWORD, COPY_FILE_REQUEST_SECURITY_PRIVILEGES); + WINAPI_CONSTANT(F_DWORD, COPY_FILE_RESUME_FROM_PAUSE); +#ifndef COPY_FILE_REQUEST_COMPRESSED_TRAFFIC + // Only defined in newer WinSDKs + #define COPY_FILE_REQUEST_COMPRESSED_TRAFFIC 0x10000000 +#endif + WINAPI_CONSTANT(F_DWORD, COPY_FILE_REQUEST_COMPRESSED_TRAFFIC); + + WINAPI_CONSTANT(F_DWORD, COPYFILE2_CALLBACK_CHUNK_STARTED); + WINAPI_CONSTANT(F_DWORD, COPYFILE2_CALLBACK_CHUNK_FINISHED); + WINAPI_CONSTANT(F_DWORD, COPYFILE2_CALLBACK_STREAM_STARTED); + WINAPI_CONSTANT(F_DWORD, COPYFILE2_CALLBACK_STREAM_FINISHED); + WINAPI_CONSTANT(F_DWORD, COPYFILE2_CALLBACK_POLL_CONTINUE); + WINAPI_CONSTANT(F_DWORD, COPYFILE2_CALLBACK_ERROR); + + WINAPI_CONSTANT(F_DWORD, COPYFILE2_PROGRESS_CONTINUE); + WINAPI_CONSTANT(F_DWORD, COPYFILE2_PROGRESS_CANCEL); + WINAPI_CONSTANT(F_DWORD, COPYFILE2_PROGRESS_STOP); + WINAPI_CONSTANT(F_DWORD, COPYFILE2_PROGRESS_QUIET); + WINAPI_CONSTANT(F_DWORD, COPYFILE2_PROGRESS_PAUSE); + WINAPI_CONSTANT("i", NULL); return 0; diff --git a/Modules/clinic/_winapi.c.h b/Modules/clinic/_winapi.c.h index 7bc63e612be348..3767b19d76db05 100644 --- a/Modules/clinic/_winapi.c.h +++ b/Modules/clinic/_winapi.c.h @@ -1411,4 +1411,74 @@ _winapi_NeedCurrentDirectoryForExePath(PyObject *module, PyObject *arg) return return_value; } -/*[clinic end generated code: output=96ea65ece7912d0a input=a9049054013a1b77]*/ + +PyDoc_STRVAR(_winapi_CopyFile2__doc__, +"CopyFile2($module, /, existing_file_name, new_file_name, flags,\n" +" progress_routine=None)\n" +"--\n" +"\n" +"Copies a file from one name to a new name.\n" +"\n" +"This is implemented using the CopyFile2 API, which preserves all stat\n" +"and metadata information apart from security attributes.\n" +"\n" +"progress_routine is reserved for future use, but is currently not\n" +"implemented. Its value is ignored."); + +#define _WINAPI_COPYFILE2_METHODDEF \ + {"CopyFile2", _PyCFunction_CAST(_winapi_CopyFile2), METH_FASTCALL|METH_KEYWORDS, _winapi_CopyFile2__doc__}, + +static PyObject * +_winapi_CopyFile2_impl(PyObject *module, LPCWSTR existing_file_name, + LPCWSTR new_file_name, DWORD flags, + PyObject *progress_routine); + +static PyObject * +_winapi_CopyFile2(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 4 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_item = { &_Py_ID(existing_file_name), &_Py_ID(new_file_name), &_Py_ID(flags), &_Py_ID(progress_routine), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"existing_file_name", "new_file_name", "flags", "progress_routine", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .format = "O&O&k|O:CopyFile2", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + LPCWSTR existing_file_name = NULL; + LPCWSTR new_file_name = NULL; + DWORD flags; + PyObject *progress_routine = Py_None; + + if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser, + _PyUnicode_WideCharString_Converter, &existing_file_name, _PyUnicode_WideCharString_Converter, &new_file_name, &flags, &progress_routine)) { + goto exit; + } + return_value = _winapi_CopyFile2_impl(module, existing_file_name, new_file_name, flags, progress_routine); + +exit: + /* Cleanup for existing_file_name */ + PyMem_Free((void *)existing_file_name); + /* Cleanup for new_file_name */ + PyMem_Free((void *)new_file_name); + + return return_value; +} +/*[clinic end generated code: output=be1343b3759e0c96 input=a9049054013a1b77]*/