diff --git a/.github/actionlint.yaml b/.github/actionlint.yaml new file mode 100644 index 00000000000000..3701f7297ffeb2 --- /dev/null +++ b/.github/actionlint.yaml @@ -0,0 +1,10 @@ +self-hosted-runner: + labels: ["ubuntu-24.04-aarch64", "windows-aarch64"] + +config-variables: null + +paths: + .github/workflows/**/*.yml: + ignore: + - 1st argument of function call is not assignable + - SC2(015|038|086|091|097|098|129|155) \ No newline at end of file diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 1f2204d3f2c6fe..b3c4f179b513a4 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -150,16 +150,28 @@ jobs: needs: check_source if: fromJSON(needs.check_source.outputs.run_tests) strategy: + fail-fast: false matrix: + os: + - windows-latest arch: - - Win32 - - x64 - - arm64 + - x64 free-threading: - - false - - true + - false + - true + include: + - os: windows-latest # FIXME(diegorusso): change to os: windows-aarch64 + arch: arm64 + free-threading: false + - os: windows-latest # FIXME(diegorusso): change to os: windows-aarch64 + arch: arm64 + free-threading: true + - os: windows-latest + arch: Win32 + free-threading: false uses: ./.github/workflows/reusable-windows.yml with: + os: ${{ matrix.os }} arch: ${{ matrix.arch }} free-threading: ${{ matrix.free-threading }} diff --git a/.github/workflows/reusable-ubuntu.yml b/.github/workflows/reusable-ubuntu.yml index ec39025504efd1..642354f8b4f61b 100644 --- a/.github/workflows/reusable-ubuntu.yml +++ b/.github/workflows/reusable-ubuntu.yml @@ -20,7 +20,7 @@ jobs: strategy: fail-fast: false matrix: - os: [ubuntu-24.04] + os: [ubuntu-24.04, ubuntu-24.04-aarch64] env: FORCE_COLOR: 1 OPENSSL_VER: 3.0.15 @@ -82,11 +82,11 @@ jobs: - name: Build CPython out-of-tree if: ${{ inputs.free-threading }} working-directory: ${{ env.CPYTHON_BUILDDIR }} - run: make -j4 + run: make -j - name: Build CPython out-of-tree (for compiler warning check) if: ${{ !inputs.free-threading}} working-directory: ${{ env.CPYTHON_BUILDDIR }} - run: set -o pipefail; make -j4 --output-sync 2>&1 | tee compiler_output_ubuntu.txt + run: set -o pipefail; make -j --output-sync 2>&1 | tee compiler_output_ubuntu.txt - name: Display build info working-directory: ${{ env.CPYTHON_BUILDDIR }} run: make pythoninfo diff --git a/.github/workflows/reusable-windows.yml b/.github/workflows/reusable-windows.yml index dcfc62d7f5d145..12b68d68466d62 100644 --- a/.github/workflows/reusable-windows.yml +++ b/.github/workflows/reusable-windows.yml @@ -3,6 +3,10 @@ name: Reusable Windows on: workflow_call: inputs: + os: + description: OS to run on + required: true + type: string arch: description: CPU architecture required: true @@ -19,10 +23,8 @@ env: jobs: build: - name: >- - build${{ inputs.arch != 'arm64' && ' and test' || '' }} - (${{ inputs.arch }}) - runs-on: windows-latest + name: 'build and test (${{ inputs.arch }})' + runs-on: ${{ inputs.os }} timeout-minutes: 60 steps: - uses: actions/checkout@v4 @@ -31,17 +33,17 @@ jobs: run: echo "::add-matcher::.github/problem-matchers/msvc.json" - name: Build CPython run: >- - .\PCbuild\build.bat + .\\PCbuild\\build.bat -e -d -v -p ${{ inputs.arch }} ${{ fromJSON(inputs.free-threading) && '--disable-gil' || '' }} - - name: Display build info + - name: Display build info # FIXME(diegorusso): remove the `if` if: inputs.arch != 'arm64' - run: .\python.bat -m test.pythoninfo - - name: Tests + run: .\\python.bat -m test.pythoninfo + - name: Tests # FIXME(diegorusso): remove the `if` if: inputs.arch != 'arm64' run: >- - .\PCbuild\rt.bat + .\\PCbuild\\rt.bat -p ${{ inputs.arch }} -d -q --fast-ci ${{ fromJSON(inputs.free-threading) && '--disable-gil' || '' }} diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ec769d7ff70314..ccaf2390d99fae 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -57,13 +57,9 @@ repos: - id: check-github-workflows - repo: https://github.com/rhysd/actionlint - rev: v1.7.3 + rev: v1.7.4 hooks: - id: actionlint - args: [ - -ignore=1st argument of function call is not assignable, - -ignore=SC2(015|038|086|091|097|098|129|155), - ] - repo: https://github.com/sphinx-contrib/sphinx-lint rev: v1.0.0 diff --git a/Doc/library/argparse.rst b/Doc/library/argparse.rst index b397791c87d0f4..440638e9065764 100644 --- a/Doc/library/argparse.rst +++ b/Doc/library/argparse.rst @@ -192,6 +192,12 @@ arguments it contains. The default message can be overridden with the The ``%(prog)s`` format specifier is available to fill in the program name in your usage messages. +When a custom usage message is specified for the main parser, you may also want to +consider passing the ``prog`` argument to :meth:`~ArgumentParser.add_subparsers` +or the ``prog`` and the ``usage`` arguments to +:meth:`~_SubParsersAction.add_parser`, to ensure consistent command prefixes and +usage information across subparsers. + .. _description: @@ -583,6 +589,14 @@ are strings:: >>> parser.parse_args(['--action', 'sumn', 1, 2, 3]) tester.py: error: argument --action: invalid choice: 'sumn', maybe you meant 'sum'? (choose from 'sum', 'max') +If you're writing code that needs to be compatible with older Python versions +and want to opportunistically use ``suggest_on_error`` when it's available, you +can set it as an attribute after initializing the parser instead of using the +keyword argument:: + + >>> parser = argparse.ArgumentParser(description='Process some integers.') + >>> parser.suggest_on_error = True + .. versionadded:: 3.14 @@ -1810,6 +1824,10 @@ Sub-commands .. versionchanged:: 3.7 New *required* keyword-only parameter. + .. versionchanged:: 3.14 + Subparser's *prog* is no longer affected by a custom usage message in + the main parser. + FileType objects ^^^^^^^^^^^^^^^^ diff --git a/Doc/library/ctypes.rst b/Doc/library/ctypes.rst index 8ed75d9b7560b9..f490f7563b58a5 100644 --- a/Doc/library/ctypes.rst +++ b/Doc/library/ctypes.rst @@ -1413,13 +1413,15 @@ way is to instantiate one of the following classes: .. class:: OleDLL(name, mode=DEFAULT_MODE, handle=None, use_errno=False, use_last_error=False, winmode=None) - Windows only: Instances of this class represent loaded shared libraries, + Instances of this class represent loaded shared libraries, functions in these libraries use the ``stdcall`` calling convention, and are assumed to return the windows specific :class:`HRESULT` code. :class:`HRESULT` values contain information specifying whether the function call failed or succeeded, together with additional error code. If the return value signals a failure, an :class:`OSError` is automatically raised. + .. availability:: Windows + .. versionchanged:: 3.3 :exc:`WindowsError` used to be raised, which is now an alias of :exc:`OSError`. @@ -1431,14 +1433,17 @@ way is to instantiate one of the following classes: .. class:: WinDLL(name, mode=DEFAULT_MODE, handle=None, use_errno=False, use_last_error=False, winmode=None) - Windows only: Instances of this class represent loaded shared libraries, + Instances of this class represent loaded shared libraries, functions in these libraries use the ``stdcall`` calling convention, and are assumed to return :c:expr:`int` by default. + .. availability:: Windows + .. versionchanged:: 3.12 The *name* parameter can now be a :term:`path-like object`. + The Python :term:`global interpreter lock` is released before calling any function exported by these libraries, and reacquired afterwards. @@ -1574,13 +1579,17 @@ These prefabricated library loaders are available: .. data:: windll :noindex: - Windows only: Creates :class:`WinDLL` instances. + Creates :class:`WinDLL` instances. + + .. availability:: Windows .. data:: oledll :noindex: - Windows only: Creates :class:`OleDLL` instances. + Creates :class:`OleDLL` instances. + + .. availability:: Windows .. data:: pydll @@ -1746,11 +1755,13 @@ See :ref:`ctypes-callback-functions` for examples. .. function:: WINFUNCTYPE(restype, *argtypes, use_errno=False, use_last_error=False) - Windows only: The returned function prototype creates functions that use the + The returned function prototype creates functions that use the ``stdcall`` calling convention. The function will release the GIL during the call. *use_errno* and *use_last_error* have the same meaning as above. + .. availability:: Windows + .. function:: PYFUNCTYPE(restype, *argtypes) @@ -1981,17 +1992,21 @@ Utility functions .. function:: DllCanUnloadNow() - Windows only: This function is a hook which allows implementing in-process + This function is a hook which allows implementing in-process COM servers with ctypes. It is called from the DllCanUnloadNow function that the _ctypes extension dll exports. + .. availability:: Windows + .. function:: DllGetClassObject() - Windows only: This function is a hook which allows implementing in-process + This function is a hook which allows implementing in-process COM servers with ctypes. It is called from the DllGetClassObject function that the ``_ctypes`` extension dll exports. + .. availability:: Windows + .. function:: find_library(name) :module: ctypes.util @@ -2007,7 +2022,7 @@ Utility functions .. function:: find_msvcrt() :module: ctypes.util - Windows only: return the filename of the VC runtime library used by Python, + Returns the filename of the VC runtime library used by Python, and by the extension modules. If the name of the library cannot be determined, ``None`` is returned. @@ -2015,20 +2030,27 @@ Utility functions with a call to the ``free(void *)``, it is important that you use the function in the same library that allocated the memory. + .. availability:: Windows + .. function:: FormatError([code]) - Windows only: Returns a textual description of the error code *code*. If no + Returns a textual description of the error code *code*. If no error code is specified, the last error code is used by calling the Windows api function GetLastError. + .. availability:: Windows + .. function:: GetLastError() - Windows only: Returns the last error code set by Windows in the calling thread. + Returns the last error code set by Windows in the calling thread. This function calls the Windows ``GetLastError()`` function directly, it does not return the ctypes-private copy of the error code. + .. availability:: Windows + + .. function:: get_errno() Returns the current value of the ctypes-private copy of the system @@ -2038,11 +2060,14 @@ Utility functions .. function:: get_last_error() - Windows only: returns the current value of the ctypes-private copy of the system + Returns the current value of the ctypes-private copy of the system :data:`!LastError` variable in the calling thread. + .. availability:: Windows + .. audit-event:: ctypes.get_last_error "" ctypes.get_last_error + .. function:: memmove(dst, src, count) Same as the standard C memmove library function: copies *count* bytes from @@ -2091,10 +2116,12 @@ Utility functions .. function:: set_last_error(value) - Windows only: set the current value of the ctypes-private copy of the system + Sets the current value of the ctypes-private copy of the system :data:`!LastError` variable in the calling thread to *value* and return the previous value. + .. availability:: Windows + .. audit-event:: ctypes.set_last_error error ctypes.set_last_error @@ -2115,12 +2142,14 @@ Utility functions .. function:: WinError(code=None, descr=None) - Windows only: this function is probably the worst-named thing in ctypes. It + This function is probably the worst-named thing in ctypes. It creates an instance of :exc:`OSError`. If *code* is not specified, ``GetLastError`` is called to determine the error code. If *descr* is not specified, :func:`FormatError` is called to get a textual description of the error. + .. availability:: Windows + .. versionchanged:: 3.3 An instance of :exc:`WindowsError` used to be created, which is now an alias of :exc:`OSError`. @@ -2484,9 +2513,11 @@ These are the fundamental ctypes data types: .. class:: HRESULT - Windows only: Represents a :c:type:`!HRESULT` value, which contains success or + Represents a :c:type:`!HRESULT` value, which contains success or error information for a function or method call. + .. availability:: Windows + .. class:: py_object @@ -2755,7 +2786,7 @@ Exceptions .. exception:: COMError(hresult, text, details) - Windows only: This exception is raised when a COM method call failed. + This exception is raised when a COM method call failed. .. attribute:: hresult @@ -2775,4 +2806,6 @@ Exceptions identifier. *progid* is the ``ProgID`` of the interface that defined the error. + .. availability:: Windows + .. versionadded:: next diff --git a/Doc/library/importlib.metadata.rst b/Doc/library/importlib.metadata.rst index 37cd237357aa4b..d80255f5313061 100644 --- a/Doc/library/importlib.metadata.rst +++ b/Doc/library/importlib.metadata.rst @@ -133,7 +133,7 @@ Entry points Details of a collection of installed entry points. - Also provides a ``.groups`` attribute that reports all identifed entry + Also provides a ``.groups`` attribute that reports all identified entry point groups, and a ``.names`` attribute that reports all identified entry point names. diff --git a/Doc/library/multiprocessing.rst b/Doc/library/multiprocessing.rst index 036b8f44b9ff3b..783cb025826483 100644 --- a/Doc/library/multiprocessing.rst +++ b/Doc/library/multiprocessing.rst @@ -291,7 +291,7 @@ processes: of corruption from processes using different ends of the pipe at the same time. - The :meth:`~Connection.send` method serializes the the object and + The :meth:`~Connection.send` method serializes the object and :meth:`~Connection.recv` re-creates the object. Synchronization between processes @@ -828,7 +828,7 @@ For an example of the usage of queues for interprocess communication see used for receiving messages and ``conn2`` can only be used for sending messages. - The :meth:`~multiprocessing.Connection.send` method serializes the the object using + The :meth:`~multiprocessing.Connection.send` method serializes the object using :mod:`pickle` and the :meth:`~multiprocessing.Connection.recv` re-creates the object. .. class:: Queue([maxsize]) diff --git a/Doc/library/shutil.rst b/Doc/library/shutil.rst index d25701c087ed07..2a8592f8bd69c1 100644 --- a/Doc/library/shutil.rst +++ b/Doc/library/shutil.rst @@ -491,12 +491,6 @@ Directory and files operations or ends with an extension that is in ``PATHEXT``; and filenames that have no extension can now be found. - .. versionchanged:: 3.12.1 - On Windows, if *mode* includes ``os.X_OK``, executables with an - extension in ``PATHEXT`` will be preferred over executables without a - matching extension. - This brings behavior closer to that of Python 3.11. - .. exception:: Error This exception collects exceptions that are raised during a multi-file diff --git a/Doc/library/socket.rst b/Doc/library/socket.rst index 6358d140484c78..73d495c055ff6e 100644 --- a/Doc/library/socket.rst +++ b/Doc/library/socket.rst @@ -979,7 +979,7 @@ The :mod:`socket` module also offers various network-related services: These addresses should generally be tried in order until a connection succeeds (possibly tried in parallel, for example, using a `Happy Eyeballs`_ algorithm). In these cases, limiting the *type* and/or *proto* can help eliminate - unsuccessful or unusable connecton attempts. + unsuccessful or unusable connection attempts. Some systems will, however, only return a single address. (For example, this was reported on Solaris and AIX configurations.) diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h index 5e4bcbf835a4d0..87cdcb5b119d15 100644 --- a/Include/internal/pycore_interp.h +++ b/Include/internal/pycore_interp.h @@ -283,6 +283,8 @@ struct _is { /* the initial PyInterpreterState.threads.head */ _PyThreadStateImpl _initial_thread; + // _initial_thread should be the last field of PyInterpreterState. + // See https://github.com/python/cpython/issues/127117. }; diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 58e583eabbcc46..53280875f10429 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1223,7 +1223,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[266] = { [UNARY_NOT] = { true, INSTR_FMT_IX, HAS_PURE_FLAG }, [UNPACK_EX] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [UNPACK_SEQUENCE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, - [UNPACK_SEQUENCE_LIST] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG }, + [UNPACK_SEQUENCE_LIST] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG }, [UNPACK_SEQUENCE_TUPLE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG }, [UNPACK_SEQUENCE_TWO_TUPLE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG }, [WITH_EXCEPT_START] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, diff --git a/Include/internal/pycore_runtime.h b/Include/internal/pycore_runtime.h index 2f2cec22cf1589..86d024535fdda8 100644 --- a/Include/internal/pycore_runtime.h +++ b/Include/internal/pycore_runtime.h @@ -169,6 +169,12 @@ typedef struct pyruntimestate { struct _Py_unicode_runtime_state unicode_state; struct _types_runtime_state types; +#if defined(__EMSCRIPTEN__) && defined(PY_CALL_TRAMPOLINE) + // Used in "Python/emscripten_trampoline.c" to choose between type + // reflection trampoline and EM_JS trampoline. + bool wasm_type_reflection_available; +#endif + /* All the objects that are shared by the runtime's interpreters. */ struct _Py_cached_objects cached_objects; struct _Py_static_objects static_objects; @@ -189,13 +195,8 @@ typedef struct pyruntimestate { /* _PyRuntimeState.interpreters.main */ PyInterpreterState _main_interpreter; - -#if defined(__EMSCRIPTEN__) && defined(PY_CALL_TRAMPOLINE) - // Used in "Python/emscripten_trampoline.c" to choose between type - // reflection trampoline and EM_JS trampoline. - bool wasm_type_reflection_available; -#endif - + // _main_interpreter should be the last field of _PyRuntimeState. + // See https://github.com/python/cpython/issues/127117. } _PyRuntimeState; diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index 588e57f6cd97e0..90a3118352f7ae 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -99,8 +99,7 @@ _PyStackRef_FromPyObjectSteal(PyObject *obj) assert(obj != NULL); // Make sure we don't take an already tagged value. assert(((uintptr_t)obj & Py_TAG_BITS) == 0); - unsigned int tag = _Py_IsImmortal(obj) ? (Py_TAG_DEFERRED) : Py_TAG_PTR; - return ((_PyStackRef){.bits = ((uintptr_t)(obj)) | tag}); + return (_PyStackRef){ .bits = (uintptr_t)obj }; } # define PyStackRef_FromPyObjectSteal(obj) _PyStackRef_FromPyObjectSteal(_PyObject_CAST(obj)) @@ -190,9 +189,16 @@ static const _PyStackRef PyStackRef_NULL = { .bits = 0 }; #endif // Py_GIL_DISABLED -// Note: this is a macro because MSVC (Windows) has trouble inlining it. +// Check if a stackref is exactly the same as another stackref, including the +// the deferred bit. This can only be used safely if you know that the deferred +// bits of `a` and `b` match. +#define PyStackRef_IsExactly(a, b) \ + (assert(((a).bits & Py_TAG_BITS) == ((b).bits & Py_TAG_BITS)), (a).bits == (b).bits) -#define PyStackRef_Is(a, b) ((a).bits == (b).bits) +// Checks that mask out the deferred bit in the free threading build. +#define PyStackRef_IsNone(ref) (PyStackRef_AsPyObjectBorrow(ref) == Py_None) +#define PyStackRef_IsTrue(ref) (PyStackRef_AsPyObjectBorrow(ref) == Py_True) +#define PyStackRef_IsFalse(ref) (PyStackRef_AsPyObjectBorrow(ref) == Py_False) // Converts a PyStackRef back to a PyObject *, converting the // stackref to a new reference. diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 1b2880cb6bb67e..1c1f478c3833c8 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -112,7 +112,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_UNPACK_SEQUENCE] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_UNPACK_SEQUENCE_TWO_TUPLE] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, [_UNPACK_SEQUENCE_TUPLE] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, - [_UNPACK_SEQUENCE_LIST] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, + [_UNPACK_SEQUENCE_LIST] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG, [_UNPACK_EX] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_STORE_ATTR] = HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_DELETE_ATTR] = HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, diff --git a/InternalDocs/README.md b/InternalDocs/README.md index 2ef6e653ac19d4..dbc858b276833c 100644 --- a/InternalDocs/README.md +++ b/InternalDocs/README.md @@ -24,9 +24,7 @@ Compiling Python Source Code Runtime Objects --- -- [Code Objects (coming soon)](code_objects.md) - -- [The Source Code Locations Table](locations.md) +- [Code Objects](code_objects.md) - [Generators (coming soon)](generators.md) diff --git a/InternalDocs/code_objects.md b/InternalDocs/code_objects.md index 284a8b7aee5765..bee4a9d0a08915 100644 --- a/InternalDocs/code_objects.md +++ b/InternalDocs/code_objects.md @@ -1,5 +1,139 @@ -Code objects -============ +# Code objects -Coming soon. +A `CodeObject` is a builtin Python type that represents a compiled executable, +such as a compiled function or class. +It contains a sequence of bytecode instructions along with its associated +metadata: data which is necessary to execute the bytecode instructions (such +as the values of the constants they access) or context information such as +the source code location, which is useful for debuggers and other tools. + +Since 3.11, the final field of the `PyCodeObject` C struct is an array +of indeterminate length containing the bytecode, `code->co_code_adaptive`. +(In older versions the code object was a +[`bytes`](https://docs.python.org/dev/library/stdtypes.html#bytes) +object, `code->co_code`; this was changed to save an allocation and to +allow it to be mutated.) + +Code objects are typically produced by the bytecode [compiler](compiler.md), +although they are often written to disk by one process and read back in by another. +The disk version of a code object is serialized using the +[marshal](https://docs.python.org/dev/library/marshal.html) protocol. + +Code objects are nominally immutable. +Some fields (including `co_code_adaptive` and fields for runtime +information such as `_co_monitoring`) are mutable, but mutable fields are +not included when code objects are hashed or compared. + +## Source code locations + +Whenever an exception occurs, the interpreter adds a traceback entry to +the exception for the current frame, as well as each frame on the stack that +it unwinds. +The `tb_lineno` field of a traceback entry is (lazily) set to the line +number of the instruction that was executing in the frame at the time of +the exception. +This field is computed from the locations table, `co_linetable`, by the function +[`PyCode_Addr2Line`](https://docs.python.org/dev/c-api/code.html#c.PyCode_Addr2Line). +Despite its name, `co_linetable` includes more than line numbers; it represents +a 4-number source location for every instruction, indicating the precise line +and column at which it begins and ends. This is a significant amount of data, +so a compact format is very important. + +Note that traceback objects don't store all this information -- they store the start line +number, for backward compatibility, and the "last instruction" value. +The rest can be computed from the last instruction (`tb_lasti`) with the help of the +locations table. For Python code, there is a convenience method +(`codeobject.co_positions`)[https://docs.python.org/dev/reference/datamodel.html#codeobject.co_positions] +which returns an iterator of `({line}, {endline}, {column}, {endcolumn})` tuples, +one per instruction. +There is also `co_lines()` which returns an iterator of `({start}, {end}, {line})` tuples, +where `{start}` and `{end}` are bytecode offsets. +The latter is described by [`PEP 626`](https://peps.python.org/pep-0626/); it is more +compact, but doesn't return end line numbers or column offsets. +From C code, you need to call +[`PyCode_Addr2Location`](https://docs.python.org/dev/c-api/code.html#c.PyCode_Addr2Location). + +As the locations table is only consulted when displaying a traceback and when +tracing (to pass the line number to the tracing function), lookup is not +performance critical. +In order to reduce the overhead during tracing, the mapping from instruction offset to +line number is cached in the ``_co_linearray`` field. + +### Format of the locations table + +The `co_linetable` bytes object of code objects contains a compact +representation of the source code positions of instructions, which are +returned by the `co_positions()` iterator. + +> [!NOTE] +> `co_linetable` is not to be confused with `co_lnotab`. +> For backwards compatibility, `co_lnotab` exposes the format +> as it existed in Python 3.10 and lower: this older format +> stores only the start line for each instruction. +> It is lazily created from `co_linetable` when accessed. +> See [`Objects/lnotab_notes.txt`](../Objects/lnotab_notes.txt) for more details. + +`co_linetable` consists of a sequence of location entries. +Each entry starts with a byte with the most significant bit set, followed by zero or more bytes with the most significant bit unset. + +Each entry contains the following information: +* The number of code units covered by this entry (length) +* The start line +* The end line +* The start column +* The end column + +The first byte has the following format: + +Bit 7 | Bits 3-6 | Bits 0-2 + ---- | ---- | ---- + 1 | Code | Length (in code units) - 1 + +The codes are enumerated in the `_PyCodeLocationInfoKind` enum. + +## Variable-length integer encodings + +Integers are often encoded using a variable-length integer encoding + +### Unsigned integers (`varint`) + +Unsigned integers are encoded in 6-bit chunks, least significant first. +Each chunk but the last has bit 6 set. +For example: + +* 63 is encoded as `0x3f` +* 200 is encoded as `0x48`, `0x03` + +### Signed integers (`svarint`) + +Signed integers are encoded by converting them to unsigned integers, using the following function: +```Python +def convert(s): + if s < 0: + return ((-s)<<1) | 1 + else: + return (s<<1) +``` + +*Location entries* + +The meaning of the codes and the following bytes are as follows: + +Code | Meaning | Start line | End line | Start column | End column + ---- | ---- | ---- | ---- | ---- | ---- + 0-9 | Short form | Δ 0 | Δ 0 | See below | See below + 10-12 | One line form | Δ (code - 10) | Δ 0 | unsigned byte | unsigned byte + 13 | No column info | Δ svarint | Δ 0 | None | None + 14 | Long form | Δ svarint | Δ varint | varint | varint + 15 | No location | None | None | None | None + +The Δ means the value is encoded as a delta from another value: +* Start line: Delta from the previous start line, or `co_firstlineno` for the first entry. +* End line: Delta from the start line + +*The short forms* + +Codes 0-9 are the short forms. The short form consists of two bytes, the second byte holding additional column information. The code is the start column divided by 8 (and rounded down). +* Start column: `(code*8) + ((second_byte>>4)&7)` +* End column: `start_column + (second_byte&15)` diff --git a/InternalDocs/compiler.md b/InternalDocs/compiler.md index 37964bd99428df..ed4cfb23ca51f7 100644 --- a/InternalDocs/compiler.md +++ b/InternalDocs/compiler.md @@ -443,14 +443,12 @@ reference to the source code (filename, etc). All of this is implemented by Code objects ============ -The result of `PyAST_CompileObject()` is a `PyCodeObject` which is defined in +The result of `_PyAST_Compile()` is a `PyCodeObject` which is defined in [Include/cpython/code.h](../Include/cpython/code.h). And with that you now have executable Python bytecode! -The code objects (byte code) are executed in [Python/ceval.c](../Python/ceval.c). -This file will also need a new case statement for the new opcode in the big switch -statement in `_PyEval_EvalFrameDefault()`. - +The code objects (byte code) are executed in `_PyEval_EvalFrameDefault()` +in [Python/ceval.c](../Python/ceval.c). Important files =============== diff --git a/InternalDocs/interpreter.md b/InternalDocs/interpreter.md index dcfddc99370c0e..4c10cbbed37735 100644 --- a/InternalDocs/interpreter.md +++ b/InternalDocs/interpreter.md @@ -16,7 +16,7 @@ from the instruction definitions in [Python/bytecodes.c](../Python/bytecodes.c) which are written in [a DSL](../Tools/cases_generator/interpreter_definition.md) developed for this purpose. -Recall that the [Python Compiler](compiler.md) produces a [`CodeObject`](code_object.md), +Recall that the [Python Compiler](compiler.md) produces a [`CodeObject`](code_objects.md), which contains the bytecode instructions along with static data that is required to execute them, such as the consts list, variable names, [exception table](exception_handling.md#format-of-the-exception-table), and so on. diff --git a/InternalDocs/locations.md b/InternalDocs/locations.md deleted file mode 100644 index 91a7824e2a8e4d..00000000000000 --- a/InternalDocs/locations.md +++ /dev/null @@ -1,69 +0,0 @@ -# Locations table - -The `co_linetable` bytes object of code objects contains a compact -representation of the source code positions of instructions, which are -returned by the `co_positions()` iterator. - -`co_linetable` consists of a sequence of location entries. -Each entry starts with a byte with the most significant bit set, followed by zero or more bytes with most significant bit unset. - -Each entry contains the following information: -* The number of code units covered by this entry (length) -* The start line -* The end line -* The start column -* The end column - -The first byte has the following format: - -Bit 7 | Bits 3-6 | Bits 0-2 - ---- | ---- | ---- - 1 | Code | Length (in code units) - 1 - -The codes are enumerated in the `_PyCodeLocationInfoKind` enum. - -## Variable length integer encodings - -Integers are often encoded using a variable length integer encoding - -### Unsigned integers (varint) - -Unsigned integers are encoded in 6 bit chunks, least significant first. -Each chunk but the last has bit 6 set. -For example: - -* 63 is encoded as `0x3f` -* 200 is encoded as `0x48`, `0x03` - -### Signed integers (svarint) - -Signed integers are encoded by converting them to unsigned integers, using the following function: -```Python -def convert(s): - if s < 0: - return ((-s)<<1) | 1 - else: - return (s<<1) -``` - -## Location entries - -The meaning of the codes and the following bytes are as follows: - -Code | Meaning | Start line | End line | Start column | End column - ---- | ---- | ---- | ---- | ---- | ---- - 0-9 | Short form | Δ 0 | Δ 0 | See below | See below - 10-12 | One line form | Δ (code - 10) | Δ 0 | unsigned byte | unsigned byte - 13 | No column info | Δ svarint | Δ 0 | None | None - 14 | Long form | Δ svarint | Δ varint | varint | varint - 15 | No location | None | None | None | None - -The Δ means the value is encoded as a delta from another value: -* Start line: Delta from the previous start line, or `co_firstlineno` for the first entry. -* End line: Delta from the start line - -### The short forms - -Codes 0-9 are the short forms. The short form consists of two bytes, the second byte holding additional column information. The code is the start column divided by 8 (and rounded down). -* Start column: `(code*8) + ((second_byte>>4)&7)` -* End column: `start_column + (second_byte&15)` diff --git a/Lib/argparse.py b/Lib/argparse.py index 7a84f5dc24cf46..5cc52c3490ddea 100644 --- a/Lib/argparse.py +++ b/Lib/argparse.py @@ -1875,7 +1875,7 @@ def add_subparsers(self, **kwargs): formatter = self._get_formatter() positionals = self._get_positional_actions() groups = self._mutually_exclusive_groups - formatter.add_usage(self.usage, positionals, groups, '') + formatter.add_usage(None, positionals, groups, '') kwargs['prog'] = formatter.format_help().strip() # create the parsers action and add it to the positionals list diff --git a/Lib/concurrent/futures/thread.py b/Lib/concurrent/futures/thread.py index 16cc5533d429ef..909359b648709f 100644 --- a/Lib/concurrent/futures/thread.py +++ b/Lib/concurrent/futures/thread.py @@ -41,6 +41,7 @@ def _python_exit(): os.register_at_fork(before=_global_shutdown_lock.acquire, after_in_child=_global_shutdown_lock._at_fork_reinit, after_in_parent=_global_shutdown_lock.release) + os.register_at_fork(after_in_child=_threads_queues.clear) class WorkerContext: diff --git a/Lib/nturl2path.py b/Lib/nturl2path.py index 3308ee7c1c784e..66092e4821a0ec 100644 --- a/Lib/nturl2path.py +++ b/Lib/nturl2path.py @@ -22,6 +22,9 @@ def url2pathname(url): elif url[:12] == '//localhost/': # Skip past 'localhost' authority. url = url[11:] + if url[:3] == '///': + # Skip past extra slash before UNC drive in URL path. + url = url[1:] # Windows itself uses ":" even in URLs. url = url.replace(':', '|') if not '|' in url: diff --git a/Lib/shutil.py b/Lib/shutil.py index dd3e0e0c5da54b..171489ca41f2a7 100644 --- a/Lib/shutil.py +++ b/Lib/shutil.py @@ -1550,21 +1550,21 @@ def which(cmd, mode=os.F_OK | os.X_OK, path=None): if sys.platform == "win32": # PATHEXT is necessary to check on Windows. pathext_source = os.getenv("PATHEXT") or _WIN_DEFAULT_PATHEXT - pathext = [ext for ext in pathext_source.split(os.pathsep) if ext] + pathext = pathext_source.split(os.pathsep) + pathext = [ext.rstrip('.') for ext in pathext if ext] if use_bytes: pathext = [os.fsencode(ext) for ext in pathext] - files = ([cmd] + [cmd + ext for ext in pathext]) + files = [cmd + ext for ext in pathext] - # gh-109590. If we are looking for an executable, we need to look - # for a PATHEXT match. The first cmd is the direct match - # (e.g. python.exe instead of python) - # Check that direct match first if and only if the extension is in PATHEXT - # Otherwise check it last - suffix = os.path.splitext(files[0])[1].upper() - if mode & os.X_OK and not any(suffix == ext.upper() for ext in pathext): - files.append(files.pop(0)) + # If X_OK in mode, simulate the cmd.exe behavior: look at direct + # match if and only if the extension is in PATHEXT. + # If X_OK not in mode, simulate the first result of where.exe: + # always look at direct match before a PATHEXT match. + normcmd = cmd.upper() + if not (mode & os.X_OK) or any(normcmd.endswith(ext.upper()) for ext in pathext): + files.insert(0, cmd) else: # On other platforms you don't have things like PATHEXT to tell you # what file suffixes are executable, so just pass on cmd as-is. @@ -1573,7 +1573,7 @@ def which(cmd, mode=os.F_OK | os.X_OK, path=None): seen = set() for dir in path: normdir = os.path.normcase(dir) - if not normdir in seen: + if normdir not in seen: seen.add(normdir) for thefile in files: name = os.path.join(dir, thefile) diff --git a/Lib/test/libregrtest/run_workers.py b/Lib/test/libregrtest/run_workers.py index 0ca86a986ea436..424085a0050eb5 100644 --- a/Lib/test/libregrtest/run_workers.py +++ b/Lib/test/libregrtest/run_workers.py @@ -457,7 +457,7 @@ def get_running(workers: list[WorkerThread]) -> str | None: running: list[str] = [] for worker in workers: test_name = worker.test_name - if not test_name: + if test_name == _NOT_RUNNING: continue dt = time.monotonic() - worker.start_time if dt >= PROGRESS_MIN_TIME: diff --git a/Lib/test/support/strace_helper.py b/Lib/test/support/strace_helper.py index 90281b47274299..eab16ea3e2889f 100644 --- a/Lib/test/support/strace_helper.py +++ b/Lib/test/support/strace_helper.py @@ -71,6 +71,27 @@ def sections(self): return sections +def _filter_memory_call(call): + # mmap can operate on a fd or "MAP_ANONYMOUS" which gives a block of memory. + # Ignore "MAP_ANONYMOUS + the "MAP_ANON" alias. + if call.syscall == "mmap" and "MAP_ANON" in call.args[3]: + return True + + if call.syscall in ("munmap", "mprotect"): + return True + + return False + + +def filter_memory(syscalls): + """Filter out memory allocation calls from File I/O calls. + + Some calls (mmap, munmap, etc) can be used on files or to just get a block + of memory. Use this function to filter out the memory related calls from + other calls.""" + + return [call for call in syscalls if not _filter_memory_call(call)] + @support.requires_subprocess() def strace_python(code, strace_flags, check=True): @@ -93,8 +114,6 @@ def _make_error(reason, details): "-c", textwrap.dedent(code), __run_using_command=[_strace_binary] + strace_flags, - # Don't want to trace our JIT's own mmap and mprotect calls: - PYTHON_JIT="0", ) except OSError as err: return _make_error("Caught OSError", err) @@ -145,9 +164,14 @@ def get_events(code, strace_flags, prelude, cleanup): return all_sections['code'] -def get_syscalls(code, strace_flags, prelude="", cleanup=""): +def get_syscalls(code, strace_flags, prelude="", cleanup="", + ignore_memory=True): """Get the syscalls which a given chunk of python code generates""" events = get_events(code, strace_flags, prelude=prelude, cleanup=cleanup) + + if ignore_memory: + events = filter_memory(events) + return [ev.syscall for ev in events] @@ -177,5 +201,5 @@ def requires_strace(): return unittest.skipUnless(_can_strace(), "Requires working strace") -__all__ = ["get_events", "get_syscalls", "requires_strace", "strace_python", - "StraceEvent", "StraceResult"] +__all__ = ["filter_memory", "get_events", "get_syscalls", "requires_strace", + "strace_python", "StraceEvent", "StraceResult"] diff --git a/Lib/test/test_argparse.py b/Lib/test/test_argparse.py index eff3c278183d88..c3b7d741f144b0 100644 --- a/Lib/test/test_argparse.py +++ b/Lib/test/test_argparse.py @@ -2409,16 +2409,17 @@ def assertArgumentParserError(self, *args, **kwargs): self.assertRaises(ArgumentParserError, *args, **kwargs) def _get_parser(self, subparser_help=False, prefix_chars=None, - aliases=False): + aliases=False, usage=None): # create a parser with a subparsers argument if prefix_chars: parser = ErrorRaisingArgumentParser( - prog='PROG', description='main description', prefix_chars=prefix_chars) + prog='PROG', description='main description', usage=usage, + prefix_chars=prefix_chars) parser.add_argument( prefix_chars[0] * 2 + 'foo', action='store_true', help='foo help') else: parser = ErrorRaisingArgumentParser( - prog='PROG', description='main description') + prog='PROG', description='main description', usage=usage) parser.add_argument( '--foo', action='store_true', help='foo help') parser.add_argument( @@ -2455,7 +2456,8 @@ def _get_parser(self, subparser_help=False, prefix_chars=None, parser2.add_argument('z', type=complex, nargs='*', help='z help') # add third sub-parser - parser3_kwargs = dict(description='3 description') + parser3_kwargs = dict(description='3 description', + usage='PROG --foo bar 3 t ...') if subparser_help: parser3_kwargs['help'] = '3 help' parser3 = subparsers.add_parser('3', **parser3_kwargs) @@ -2477,6 +2479,47 @@ def test_parse_args_failures(self): args = args_str.split() self.assertArgumentParserError(self.parser.parse_args, args) + def test_parse_args_failures_details(self): + for args_str, usage_str, error_str in [ + ('', + 'usage: PROG [-h] [--foo] bar {1,2,3} ...', + 'PROG: error: the following arguments are required: bar'), + ('0.5 1 -y', + 'usage: PROG bar 1 [-h] [-w W] {a,b,c}', + 'PROG bar 1: error: the following arguments are required: x'), + ('0.5 3', + 'usage: PROG --foo bar 3 t ...', + 'PROG bar 3: error: the following arguments are required: t'), + ]: + with self.subTest(args_str): + args = args_str.split() + with self.assertRaises(ArgumentParserError) as cm: + self.parser.parse_args(args) + self.assertEqual(cm.exception.args[0], 'SystemExit') + self.assertEqual(cm.exception.args[2], f'{usage_str}\n{error_str}\n') + + def test_parse_args_failures_details_custom_usage(self): + parser = self._get_parser(usage='PROG [--foo] bar 1 [-w W] {a,b,c}\n' + ' PROG --foo bar 3 t ...') + for args_str, usage_str, error_str in [ + ('', + 'usage: PROG [--foo] bar 1 [-w W] {a,b,c}\n' + ' PROG --foo bar 3 t ...', + 'PROG: error: the following arguments are required: bar'), + ('0.5 1 -y', + 'usage: PROG bar 1 [-h] [-w W] {a,b,c}', + 'PROG bar 1: error: the following arguments are required: x'), + ('0.5 3', + 'usage: PROG --foo bar 3 t ...', + 'PROG bar 3: error: the following arguments are required: t'), + ]: + with self.subTest(args_str): + args = args_str.split() + with self.assertRaises(ArgumentParserError) as cm: + parser.parse_args(args) + self.assertEqual(cm.exception.args[0], 'SystemExit') + self.assertEqual(cm.exception.args[2], f'{usage_str}\n{error_str}\n') + def test_parse_args(self): # check some non-failure cases: self.assertEqual( diff --git a/Lib/test/test_concurrent_futures/test_thread_pool.py b/Lib/test/test_concurrent_futures/test_thread_pool.py index 2b5bea9f4055a2..4324241b374967 100644 --- a/Lib/test/test_concurrent_futures/test_thread_pool.py +++ b/Lib/test/test_concurrent_futures/test_thread_pool.py @@ -66,6 +66,25 @@ def submit(pool): with futures.ProcessPoolExecutor(1, mp_context=mp.get_context('fork')) as workers: workers.submit(tuple) + @support.requires_fork() + @unittest.skipUnless(hasattr(os, 'register_at_fork'), 'need os.register_at_fork') + def test_process_fork_from_a_threadpool(self): + # bpo-43944: clear concurrent.futures.thread._threads_queues after fork, + # otherwise child process will try to join parent thread + def fork_process_and_return_exitcode(): + # Ignore the warning about fork with threads. + with self.assertWarnsRegex(DeprecationWarning, + r"use of fork\(\) may lead to deadlocks in the child"): + p = mp.get_context('fork').Process(target=lambda: 1) + p.start() + p.join() + return p.exitcode + + with futures.ThreadPoolExecutor(1) as pool: + process_exitcode = pool.submit(fork_process_and_return_exitcode).result() + + self.assertEqual(process_exitcode, 0) + def test_executor_map_current_future_cancel(self): stop_event = threading.Event() log = [] diff --git a/Lib/test/test_ctypes/test_win32_com_foreign_func.py b/Lib/test/test_ctypes/test_win32_com_foreign_func.py new file mode 100644 index 00000000000000..651c9277d59af9 --- /dev/null +++ b/Lib/test/test_ctypes/test_win32_com_foreign_func.py @@ -0,0 +1,188 @@ +import ctypes +import gc +import sys +import unittest +from ctypes import POINTER, byref, c_void_p +from ctypes.wintypes import BYTE, DWORD, WORD + +if sys.platform != "win32": + raise unittest.SkipTest("Windows-specific test") + + +from _ctypes import COMError +from ctypes import HRESULT + + +COINIT_APARTMENTTHREADED = 0x2 +CLSCTX_SERVER = 5 +S_OK = 0 +OUT = 2 +TRUE = 1 +E_NOINTERFACE = -2147467262 + + +class GUID(ctypes.Structure): + # https://learn.microsoft.com/en-us/windows/win32/api/guiddef/ns-guiddef-guid + _fields_ = [ + ("Data1", DWORD), + ("Data2", WORD), + ("Data3", WORD), + ("Data4", BYTE * 8), + ] + + +def create_proto_com_method(name, index, restype, *argtypes): + proto = ctypes.WINFUNCTYPE(restype, *argtypes) + + def make_method(*args): + foreign_func = proto(index, name, *args) + + def call(self, *args, **kwargs): + return foreign_func(self, *args, **kwargs) + + return call + + return make_method + + +def create_guid(name): + guid = GUID() + # https://learn.microsoft.com/en-us/windows/win32/api/combaseapi/nf-combaseapi-clsidfromstring + ole32.CLSIDFromString(name, byref(guid)) + return guid + + +def is_equal_guid(guid1, guid2): + # https://learn.microsoft.com/en-us/windows/win32/api/objbase/nf-objbase-isequalguid + return ole32.IsEqualGUID(byref(guid1), byref(guid2)) + + +ole32 = ctypes.oledll.ole32 + +IID_IUnknown = create_guid("{00000000-0000-0000-C000-000000000046}") +IID_IStream = create_guid("{0000000C-0000-0000-C000-000000000046}") +IID_IPersist = create_guid("{0000010C-0000-0000-C000-000000000046}") +CLSID_ShellLink = create_guid("{00021401-0000-0000-C000-000000000046}") + +# https://learn.microsoft.com/en-us/windows/win32/api/unknwn/nf-unknwn-iunknown-queryinterface(refiid_void) +proto_query_interface = create_proto_com_method( + "QueryInterface", 0, HRESULT, POINTER(GUID), POINTER(c_void_p) +) +# https://learn.microsoft.com/en-us/windows/win32/api/unknwn/nf-unknwn-iunknown-addref +proto_add_ref = create_proto_com_method("AddRef", 1, ctypes.c_long) +# https://learn.microsoft.com/en-us/windows/win32/api/unknwn/nf-unknwn-iunknown-release +proto_release = create_proto_com_method("Release", 2, ctypes.c_long) +# https://learn.microsoft.com/en-us/windows/win32/api/objidl/nf-objidl-ipersist-getclassid +proto_get_class_id = create_proto_com_method( + "GetClassID", 3, HRESULT, POINTER(GUID) +) + + +class ForeignFunctionsThatWillCallComMethodsTests(unittest.TestCase): + def setUp(self): + # https://learn.microsoft.com/en-us/windows/win32/api/combaseapi/nf-combaseapi-coinitializeex + ole32.CoInitializeEx(None, COINIT_APARTMENTTHREADED) + + def tearDown(self): + # https://learn.microsoft.com/en-us/windows/win32/api/combaseapi/nf-combaseapi-couninitialize + ole32.CoUninitialize() + gc.collect() + + @staticmethod + def create_shelllink_persist(typ): + ppst = typ() + # https://learn.microsoft.com/en-us/windows/win32/api/combaseapi/nf-combaseapi-cocreateinstance + ole32.CoCreateInstance( + byref(CLSID_ShellLink), + None, + CLSCTX_SERVER, + byref(IID_IPersist), + byref(ppst), + ) + return ppst + + def test_without_paramflags_and_iid(self): + class IUnknown(c_void_p): + QueryInterface = proto_query_interface() + AddRef = proto_add_ref() + Release = proto_release() + + class IPersist(IUnknown): + GetClassID = proto_get_class_id() + + ppst = self.create_shelllink_persist(IPersist) + + clsid = GUID() + hr_getclsid = ppst.GetClassID(byref(clsid)) + self.assertEqual(S_OK, hr_getclsid) + self.assertEqual(TRUE, is_equal_guid(CLSID_ShellLink, clsid)) + + self.assertEqual(2, ppst.AddRef()) + self.assertEqual(3, ppst.AddRef()) + + punk = IUnknown() + hr_qi = ppst.QueryInterface(IID_IUnknown, punk) + self.assertEqual(S_OK, hr_qi) + self.assertEqual(3, punk.Release()) + + with self.assertRaises(OSError) as e: + punk.QueryInterface(IID_IStream, IUnknown()) + self.assertEqual(E_NOINTERFACE, e.exception.winerror) + + self.assertEqual(2, ppst.Release()) + self.assertEqual(1, ppst.Release()) + self.assertEqual(0, ppst.Release()) + + def test_with_paramflags_and_without_iid(self): + class IUnknown(c_void_p): + QueryInterface = proto_query_interface(None) + AddRef = proto_add_ref() + Release = proto_release() + + class IPersist(IUnknown): + GetClassID = proto_get_class_id(((OUT, "pClassID"),)) + + ppst = self.create_shelllink_persist(IPersist) + + clsid = ppst.GetClassID() + self.assertEqual(TRUE, is_equal_guid(CLSID_ShellLink, clsid)) + + punk = IUnknown() + hr_qi = ppst.QueryInterface(IID_IUnknown, punk) + self.assertEqual(S_OK, hr_qi) + self.assertEqual(1, punk.Release()) + + with self.assertRaises(OSError) as e: + ppst.QueryInterface(IID_IStream, IUnknown()) + self.assertEqual(E_NOINTERFACE, e.exception.winerror) + + self.assertEqual(0, ppst.Release()) + + def test_with_paramflags_and_iid(self): + class IUnknown(c_void_p): + QueryInterface = proto_query_interface(None, IID_IUnknown) + AddRef = proto_add_ref() + Release = proto_release() + + class IPersist(IUnknown): + GetClassID = proto_get_class_id(((OUT, "pClassID"),), IID_IPersist) + + ppst = self.create_shelllink_persist(IPersist) + + clsid = ppst.GetClassID() + self.assertEqual(TRUE, is_equal_guid(CLSID_ShellLink, clsid)) + + punk = IUnknown() + hr_qi = ppst.QueryInterface(IID_IUnknown, punk) + self.assertEqual(S_OK, hr_qi) + self.assertEqual(1, punk.Release()) + + with self.assertRaises(COMError) as e: + ppst.QueryInterface(IID_IStream, IUnknown()) + self.assertEqual(E_NOINTERFACE, e.exception.hresult) + + self.assertEqual(0, ppst.Release()) + + +if __name__ == '__main__': + unittest.main() diff --git a/Lib/test/test_fileio.py b/Lib/test/test_fileio.py index d60aabcdf1ae22..e681417e15d34b 100644 --- a/Lib/test/test_fileio.py +++ b/Lib/test/test_fileio.py @@ -364,8 +364,7 @@ def testErrnoOnClosedReadinto(self, f): @strace_helper.requires_strace() def test_syscalls_read(self): - """Check that the set of system calls produced by the I/O stack is what - is expected for various read cases. + """Check set of system calls during common I/O patterns It's expected as bits of the I/O implementation change, this will need to change. The goal is to catch changes that unintentionally add @@ -383,6 +382,11 @@ def check_readall(name, code, prelude="", cleanup="", prelude=prelude, cleanup=cleanup) + # Some system calls (ex. mmap) can be used for both File I/O and + # memory allocation. Filter out the ones used for memory + # allocation. + syscalls = strace_helper.filter_memory(syscalls) + # The first call should be an open that returns a # file descriptor (fd). Afer that calls may vary. Once the file # is opened, check calls refer to it by fd as the filename diff --git a/Lib/test/test_opcache.py b/Lib/test/test_opcache.py index a0292b31af1be5..1a6eac236009c3 100644 --- a/Lib/test/test_opcache.py +++ b/Lib/test/test_opcache.py @@ -1339,6 +1339,37 @@ def to_bool_str(): self.assert_specialized(to_bool_str, "TO_BOOL_STR") self.assert_no_opcode(to_bool_str, "TO_BOOL") + @cpython_only + @requires_specialization_ft + def test_unpack_sequence(self): + def f(): + for _ in range(100): + a, b = 1, 2 + self.assertEqual(a, 1) + self.assertEqual(b, 2) + + f() + self.assert_specialized(f, "UNPACK_SEQUENCE_TWO_TUPLE") + self.assert_no_opcode(f, "UNPACK_SEQUENCE") + + def g(): + for _ in range(100): + a, = 1, + self.assertEqual(a, 1) + + g() + self.assert_specialized(g, "UNPACK_SEQUENCE_TUPLE") + self.assert_no_opcode(g, "UNPACK_SEQUENCE") + + def x(): + for _ in range(100): + a, b = [1, 2] + self.assertEqual(a, 1) + self.assertEqual(b, 2) + + x() + self.assert_specialized(x, "UNPACK_SEQUENCE_LIST") + self.assert_no_opcode(x, "UNPACK_SEQUENCE") if __name__ == "__main__": unittest.main() diff --git a/Lib/test/test_shutil.py b/Lib/test/test_shutil.py index 37e54d23b22516..1f18b1f09b5858 100644 --- a/Lib/test/test_shutil.py +++ b/Lib/test/test_shutil.py @@ -70,18 +70,17 @@ def wrap(*args, **kwargs): os.rename = builtin_rename return wrap -def write_file(path, content, binary=False): +def create_file(path, content=b''): """Write *content* to a file located at *path*. If *path* is a tuple instead of a string, os.path.join will be used to - make a path. If *binary* is true, the file will be opened in binary - mode. + make a path. """ if isinstance(path, tuple): path = os.path.join(*path) - mode = 'wb' if binary else 'w' - encoding = None if binary else "utf-8" - with open(path, mode, encoding=encoding) as fp: + if isinstance(content, str): + content = content.encode() + with open(path, 'xb') as fp: fp.write(content) def write_test_file(path, size): @@ -190,7 +189,7 @@ def test_rmtree_works_on_bytes(self): tmp = self.mkdtemp() victim = os.path.join(tmp, 'killme') os.mkdir(victim) - write_file(os.path.join(victim, 'somefile'), 'foo') + create_file(os.path.join(victim, 'somefile'), 'foo') victim = os.fsencode(victim) self.assertIsInstance(victim, bytes) shutil.rmtree(victim) @@ -242,7 +241,7 @@ def test_rmtree_works_on_symlinks(self): for d in dir1, dir2, dir3: os.mkdir(d) file1 = os.path.join(tmp, 'file1') - write_file(file1, 'foo') + create_file(file1, 'foo') link1 = os.path.join(dir1, 'link1') os.symlink(dir2, link1) link2 = os.path.join(dir1, 'link2') @@ -304,7 +303,7 @@ def test_rmtree_works_on_junctions(self): for d in dir1, dir2, dir3: os.mkdir(d) file1 = os.path.join(tmp, 'file1') - write_file(file1, 'foo') + create_file(file1, 'foo') link1 = os.path.join(dir1, 'link1') _winapi.CreateJunction(dir2, link1) link2 = os.path.join(dir1, 'link2') @@ -327,7 +326,7 @@ def test_rmtree_errors(self): # existing file tmpdir = self.mkdtemp() filename = os.path.join(tmpdir, "tstfile") - write_file(filename, "") + create_file(filename) with self.assertRaises(NotADirectoryError) as cm: shutil.rmtree(filename) self.assertEqual(cm.exception.filename, filename) @@ -347,7 +346,7 @@ def test_rmtree_errors(self): def test_rmtree_errors_onerror(self): tmpdir = self.mkdtemp() filename = os.path.join(tmpdir, "tstfile") - write_file(filename, "") + create_file(filename) errors = [] def onerror(*args): errors.append(args) @@ -365,7 +364,7 @@ def onerror(*args): def test_rmtree_errors_onexc(self): tmpdir = self.mkdtemp() filename = os.path.join(tmpdir, "tstfile") - write_file(filename, "") + create_file(filename) errors = [] def onexc(*args): errors.append(args) @@ -547,7 +546,7 @@ def raiser(fn, *args, **kwargs): os.lstat = raiser os.mkdir(TESTFN) - write_file((TESTFN, 'foo'), 'foo') + create_file((TESTFN, 'foo'), 'foo') shutil.rmtree(TESTFN) finally: os.lstat = orig_lstat @@ -618,7 +617,7 @@ def test_rmtree_with_dir_fd(self): self.addCleanup(os.close, dir_fd) os.mkdir(fullname) os.mkdir(os.path.join(fullname, 'subdir')) - write_file(os.path.join(fullname, 'subdir', 'somefile'), 'foo') + create_file(os.path.join(fullname, 'subdir', 'somefile'), 'foo') self.assertTrue(os.path.exists(fullname)) shutil.rmtree(victim, dir_fd=dir_fd) self.assertFalse(os.path.exists(fullname)) @@ -658,7 +657,7 @@ def test_rmtree_on_junction(self): src = os.path.join(TESTFN, 'cheese') dst = os.path.join(TESTFN, 'shop') os.mkdir(src) - open(os.path.join(src, 'spam'), 'wb').close() + create_file(os.path.join(src, 'spam')) _winapi.CreateJunction(src, dst) self.assertRaises(OSError, shutil.rmtree, dst) shutil.rmtree(dst, ignore_errors=True) @@ -718,7 +717,7 @@ def _onexc(fn, path, exc): for path in dirs: os.mkdir(path) for path in files: - write_file(path, '') + create_file(path) old_modes = [os.stat(path).st_mode for path in paths] @@ -757,9 +756,9 @@ def test_copytree_simple(self): dst_dir = os.path.join(self.mkdtemp(), 'destination') self.addCleanup(shutil.rmtree, src_dir) self.addCleanup(shutil.rmtree, os.path.dirname(dst_dir)) - write_file((src_dir, 'test.txt'), '123') + create_file((src_dir, 'test.txt'), '123') os.mkdir(os.path.join(src_dir, 'test_dir')) - write_file((src_dir, 'test_dir', 'test.txt'), '456') + create_file((src_dir, 'test_dir', 'test.txt'), '456') shutil.copytree(src_dir, dst_dir) self.assertTrue(os.path.isfile(os.path.join(dst_dir, 'test.txt'))) @@ -777,11 +776,11 @@ def test_copytree_dirs_exist_ok(self): self.addCleanup(shutil.rmtree, src_dir) self.addCleanup(shutil.rmtree, dst_dir) - write_file((src_dir, 'nonexisting.txt'), '123') + create_file((src_dir, 'nonexisting.txt'), '123') os.mkdir(os.path.join(src_dir, 'existing_dir')) os.mkdir(os.path.join(dst_dir, 'existing_dir')) - write_file((dst_dir, 'existing_dir', 'existing.txt'), 'will be replaced') - write_file((src_dir, 'existing_dir', 'existing.txt'), 'has been replaced') + create_file((dst_dir, 'existing_dir', 'existing.txt'), 'will be replaced') + create_file((src_dir, 'existing_dir', 'existing.txt'), 'has been replaced') shutil.copytree(src_dir, dst_dir, dirs_exist_ok=True) self.assertTrue(os.path.isfile(os.path.join(dst_dir, 'nonexisting.txt'))) @@ -804,7 +803,7 @@ def test_copytree_symlinks(self): sub_dir = os.path.join(src_dir, 'sub') os.mkdir(src_dir) os.mkdir(sub_dir) - write_file((src_dir, 'file.txt'), 'foo') + create_file((src_dir, 'file.txt'), 'foo') src_link = os.path.join(sub_dir, 'link') dst_link = os.path.join(dst_dir, 'sub/link') os.symlink(os.path.join(src_dir, 'file.txt'), @@ -835,16 +834,16 @@ def test_copytree_with_exclude(self): src_dir = self.mkdtemp() try: dst_dir = join(self.mkdtemp(), 'destination') - write_file((src_dir, 'test.txt'), '123') - write_file((src_dir, 'test.tmp'), '123') + create_file((src_dir, 'test.txt'), '123') + create_file((src_dir, 'test.tmp'), '123') os.mkdir(join(src_dir, 'test_dir')) - write_file((src_dir, 'test_dir', 'test.txt'), '456') + create_file((src_dir, 'test_dir', 'test.txt'), '456') os.mkdir(join(src_dir, 'test_dir2')) - write_file((src_dir, 'test_dir2', 'test.txt'), '456') + create_file((src_dir, 'test_dir2', 'test.txt'), '456') os.mkdir(join(src_dir, 'test_dir2', 'subdir')) os.mkdir(join(src_dir, 'test_dir2', 'subdir2')) - write_file((src_dir, 'test_dir2', 'subdir', 'test.txt'), '456') - write_file((src_dir, 'test_dir2', 'subdir2', 'test.py'), '456') + create_file((src_dir, 'test_dir2', 'subdir', 'test.txt'), '456') + create_file((src_dir, 'test_dir2', 'subdir2', 'test.py'), '456') # testing glob-like patterns try: @@ -903,7 +902,7 @@ def test_copytree_arg_types_of_ignore(self): os.mkdir(join(src_dir)) os.mkdir(join(src_dir, 'test_dir')) os.mkdir(os.path.join(src_dir, 'test_dir', 'subdir')) - write_file((src_dir, 'test_dir', 'subdir', 'test.txt'), '456') + create_file((src_dir, 'test_dir', 'subdir', 'test.txt'), '456') invocations = [] @@ -943,9 +942,9 @@ def test_copytree_retains_permissions(self): self.addCleanup(shutil.rmtree, tmp_dir) os.chmod(src_dir, 0o777) - write_file((src_dir, 'permissive.txt'), '123') + create_file((src_dir, 'permissive.txt'), '123') os.chmod(os.path.join(src_dir, 'permissive.txt'), 0o777) - write_file((src_dir, 'restrictive.txt'), '456') + create_file((src_dir, 'restrictive.txt'), '456') os.chmod(os.path.join(src_dir, 'restrictive.txt'), 0o600) restrictive_subdir = tempfile.mkdtemp(dir=src_dir) self.addCleanup(os_helper.rmtree, restrictive_subdir) @@ -988,8 +987,7 @@ def custom_cpfun(a, b): flag = [] src = self.mkdtemp() dst = tempfile.mktemp(dir=self.mkdtemp()) - with open(os.path.join(src, 'foo'), 'w', encoding='utf-8') as f: - f.close() + create_file(os.path.join(src, 'foo')) shutil.copytree(src, dst, copy_function=custom_cpfun) self.assertEqual(len(flag), 1) @@ -1024,9 +1022,9 @@ def test_copytree_named_pipe(self): def test_copytree_special_func(self): src_dir = self.mkdtemp() dst_dir = os.path.join(self.mkdtemp(), 'destination') - write_file((src_dir, 'test.txt'), '123') + create_file((src_dir, 'test.txt'), '123') os.mkdir(os.path.join(src_dir, 'test_dir')) - write_file((src_dir, 'test_dir', 'test.txt'), '456') + create_file((src_dir, 'test_dir', 'test.txt'), '456') copied = [] def _copy(src, dst): @@ -1039,7 +1037,7 @@ def _copy(src, dst): def test_copytree_dangling_symlinks(self): src_dir = self.mkdtemp() valid_file = os.path.join(src_dir, 'test.txt') - write_file(valid_file, 'abc') + create_file(valid_file, 'abc') dir_a = os.path.join(src_dir, 'dir_a') os.mkdir(dir_a) for d in src_dir, dir_a: @@ -1067,8 +1065,7 @@ def test_copytree_symlink_dir(self): src_dir = self.mkdtemp() dst_dir = os.path.join(self.mkdtemp(), 'destination') os.mkdir(os.path.join(src_dir, 'real_dir')) - with open(os.path.join(src_dir, 'real_dir', 'test.txt'), 'wb'): - pass + create_file(os.path.join(src_dir, 'real_dir', 'test.txt')) os.symlink(os.path.join(src_dir, 'real_dir'), os.path.join(src_dir, 'link_to_dir'), target_is_directory=True) @@ -1088,7 +1085,7 @@ def test_copytree_return_value(self): dst_dir = src_dir + "dest" self.addCleanup(shutil.rmtree, dst_dir, True) src = os.path.join(src_dir, 'foo') - write_file(src, 'foo') + create_file(src, 'foo') rv = shutil.copytree(src_dir, dst_dir) self.assertEqual(['foo'], os.listdir(rv)) @@ -1100,7 +1097,7 @@ def test_copytree_subdirectory(self): dst_dir = os.path.join(src_dir, "somevendor", "1.0") os.makedirs(src_dir) src = os.path.join(src_dir, 'pol') - write_file(src, 'pol') + create_file(src, 'pol') rv = shutil.copytree(src_dir, dst_dir) self.assertEqual(['pol'], os.listdir(rv)) @@ -1115,8 +1112,8 @@ def test_copymode_follow_symlinks(self): dst = os.path.join(tmp_dir, 'bar') src_link = os.path.join(tmp_dir, 'baz') dst_link = os.path.join(tmp_dir, 'quux') - write_file(src, 'foo') - write_file(dst, 'foo') + create_file(src, 'foo') + create_file(dst, 'foo') os.symlink(src, src_link) os.symlink(dst, dst_link) os.chmod(src, stat.S_IRWXU|stat.S_IRWXG) @@ -1147,8 +1144,8 @@ def test_copymode_symlink_to_symlink(self): dst = os.path.join(tmp_dir, 'bar') src_link = os.path.join(tmp_dir, 'baz') dst_link = os.path.join(tmp_dir, 'quux') - write_file(src, 'foo') - write_file(dst, 'foo') + create_file(src, 'foo') + create_file(dst, 'foo') os.symlink(src, src_link) os.symlink(dst, dst_link) os.chmod(src, stat.S_IRWXU|stat.S_IRWXG) @@ -1178,8 +1175,8 @@ def test_copymode_symlink_to_symlink_wo_lchmod(self): dst = os.path.join(tmp_dir, 'bar') src_link = os.path.join(tmp_dir, 'baz') dst_link = os.path.join(tmp_dir, 'quux') - write_file(src, 'foo') - write_file(dst, 'foo') + create_file(src, 'foo') + create_file(dst, 'foo') os.symlink(src, src_link) os.symlink(dst, dst_link) shutil.copymode(src_link, dst_link, follow_symlinks=False) # silent fail @@ -1193,11 +1190,11 @@ def test_copystat_symlinks(self): dst = os.path.join(tmp_dir, 'bar') src_link = os.path.join(tmp_dir, 'baz') dst_link = os.path.join(tmp_dir, 'qux') - write_file(src, 'foo') + create_file(src, 'foo') src_stat = os.stat(src) os.utime(src, (src_stat.st_atime, src_stat.st_mtime - 42.0)) # ensure different mtimes - write_file(dst, 'bar') + create_file(dst, 'bar') self.assertNotEqual(os.stat(src).st_mtime, os.stat(dst).st_mtime) os.symlink(src, src_link) os.symlink(dst, dst_link) @@ -1235,8 +1232,8 @@ def test_copystat_handles_harmless_chflags_errors(self): tmpdir = self.mkdtemp() file1 = os.path.join(tmpdir, 'file1') file2 = os.path.join(tmpdir, 'file2') - write_file(file1, 'xxx') - write_file(file2, 'xxx') + create_file(file1, 'xxx') + create_file(file2, 'xxx') def make_chflags_raiser(err): ex = OSError() @@ -1262,9 +1259,9 @@ def _chflags_raiser(path, flags, *, follow_symlinks=True): def test_copyxattr(self): tmp_dir = self.mkdtemp() src = os.path.join(tmp_dir, 'foo') - write_file(src, 'foo') + create_file(src, 'foo') dst = os.path.join(tmp_dir, 'bar') - write_file(dst, 'bar') + create_file(dst, 'bar') # no xattr == no problem shutil._copyxattr(src, dst) @@ -1278,7 +1275,7 @@ def test_copyxattr(self): os.getxattr(dst, 'user.foo')) # check errors don't affect other attrs os.remove(dst) - write_file(dst, 'bar') + create_file(dst, 'bar') os_error = OSError(errno.EPERM, 'EPERM') def _raise_on_user_foo(fname, attr, val, **kwargs): @@ -1308,15 +1305,15 @@ def _raise_on_src(fname, *, follow_symlinks=True): # test that shutil.copystat copies xattrs src = os.path.join(tmp_dir, 'the_original') srcro = os.path.join(tmp_dir, 'the_original_ro') - write_file(src, src) - write_file(srcro, srcro) + create_file(src, src) + create_file(srcro, srcro) os.setxattr(src, 'user.the_value', b'fiddly') os.setxattr(srcro, 'user.the_value', b'fiddly') os.chmod(srcro, 0o444) dst = os.path.join(tmp_dir, 'the_copy') dstro = os.path.join(tmp_dir, 'the_copy_ro') - write_file(dst, dst) - write_file(dstro, dstro) + create_file(dst, dst) + create_file(dstro, dstro) shutil.copystat(src, dst) shutil.copystat(srcro, dstro) self.assertEqual(os.getxattr(dst, 'user.the_value'), b'fiddly') @@ -1332,13 +1329,13 @@ def test_copyxattr_symlinks(self): tmp_dir = self.mkdtemp() src = os.path.join(tmp_dir, 'foo') src_link = os.path.join(tmp_dir, 'baz') - write_file(src, 'foo') + create_file(src, 'foo') os.symlink(src, src_link) os.setxattr(src, 'trusted.foo', b'42') os.setxattr(src_link, 'trusted.foo', b'43', follow_symlinks=False) dst = os.path.join(tmp_dir, 'bar') dst_link = os.path.join(tmp_dir, 'qux') - write_file(dst, 'bar') + create_file(dst, 'bar') os.symlink(dst, dst_link) shutil._copyxattr(src_link, dst_link, follow_symlinks=False) self.assertEqual(os.getxattr(dst_link, 'trusted.foo', follow_symlinks=False), b'43') @@ -1351,7 +1348,7 @@ def test_copyxattr_symlinks(self): def _copy_file(self, method): fname = 'test.txt' tmpdir = self.mkdtemp() - write_file((tmpdir, fname), 'xxx') + create_file((tmpdir, fname), 'xxx') file1 = os.path.join(tmpdir, fname) tmpdir2 = self.mkdtemp() method(file1, tmpdir2) @@ -1370,7 +1367,7 @@ def test_copy_symlinks(self): src = os.path.join(tmp_dir, 'foo') dst = os.path.join(tmp_dir, 'bar') src_link = os.path.join(tmp_dir, 'baz') - write_file(src, 'foo') + create_file(src, 'foo') os.symlink(src, src_link) if hasattr(os, 'lchmod'): os.lchmod(src_link, stat.S_IRWXU | stat.S_IRWXO) @@ -1412,7 +1409,7 @@ def test_copy2_symlinks(self): src = os.path.join(tmp_dir, 'foo') dst = os.path.join(tmp_dir, 'bar') src_link = os.path.join(tmp_dir, 'baz') - write_file(src, 'foo') + create_file(src, 'foo') os.symlink(src, src_link) if hasattr(os, 'lchmod'): os.lchmod(src_link, stat.S_IRWXU | stat.S_IRWXO) @@ -1446,7 +1443,7 @@ def test_copy2_xattr(self): tmp_dir = self.mkdtemp() src = os.path.join(tmp_dir, 'foo') dst = os.path.join(tmp_dir, 'bar') - write_file(src, 'foo') + create_file(src, 'foo') os.setxattr(src, 'user.foo', b'42') shutil.copy2(src, dst) self.assertEqual( @@ -1460,7 +1457,7 @@ def test_copy_return_value(self): src_dir = self.mkdtemp() dst_dir = self.mkdtemp() src = os.path.join(src_dir, 'foo') - write_file(src, 'foo') + create_file(src, 'foo') rv = fn(src, dst_dir) self.assertEqual(rv, os.path.join(dst_dir, 'foo')) rv = fn(src, os.path.join(dst_dir, 'bar')) @@ -1477,7 +1474,7 @@ def _test_copy_dir(self, copy_func): src_file = os.path.join(src_dir, 'foo') dir2 = self.mkdtemp() dst = os.path.join(src_dir, 'does_not_exist/') - write_file(src_file, 'foo') + create_file(src_file, 'foo') if sys.platform == "win32": err = PermissionError else: @@ -1497,7 +1494,7 @@ def test_copyfile_symlinks(self): dst = os.path.join(tmp_dir, 'dst') dst_link = os.path.join(tmp_dir, 'dst_link') link = os.path.join(tmp_dir, 'link') - write_file(src, 'foo') + create_file(src, 'foo') os.symlink(src, link) # don't follow shutil.copyfile(link, dst_link, follow_symlinks=False) @@ -1514,8 +1511,7 @@ def test_dont_copy_file_onto_link_to_itself(self): src = os.path.join(TESTFN, 'cheese') dst = os.path.join(TESTFN, 'shop') try: - with open(src, 'w', encoding='utf-8') as f: - f.write('cheddar') + create_file(src, 'cheddar') try: os.link(src, dst) except PermissionError as e: @@ -1534,8 +1530,7 @@ def test_dont_copy_file_onto_symlink_to_itself(self): src = os.path.join(TESTFN, 'cheese') dst = os.path.join(TESTFN, 'shop') try: - with open(src, 'w', encoding='utf-8') as f: - f.write('cheddar') + create_file(src, 'cheddar') # Using `src` here would mean we end up with a symlink pointing # to TESTFN/TESTFN/cheese, while it should point at # TESTFN/cheese. @@ -1570,7 +1565,7 @@ def test_copyfile_return_value(self): dst_dir = self.mkdtemp() dst_file = os.path.join(dst_dir, 'bar') src_file = os.path.join(src_dir, 'foo') - write_file(src_file, 'foo') + create_file(src_file, 'foo') rv = shutil.copyfile(src_file, dst_file) self.assertTrue(os.path.exists(rv)) self.assertEqual(read_file(src_file), read_file(dst_file)) @@ -1580,7 +1575,7 @@ def test_copyfile_same_file(self): # are the same. src_dir = self.mkdtemp() src_file = os.path.join(src_dir, 'foo') - write_file(src_file, 'foo') + create_file(src_file, 'foo') self.assertRaises(SameFileError, shutil.copyfile, src_file, src_file) # But Error should work too, to stay backward compatible. self.assertRaises(Error, shutil.copyfile, src_file, src_file) @@ -1597,7 +1592,7 @@ def test_copyfile_nonexistent_dir(self): src_dir = self.mkdtemp() src_file = os.path.join(src_dir, 'foo') dst = os.path.join(src_dir, 'does_not_exist/') - write_file(src_file, 'foo') + create_file(src_file, 'foo') self.assertRaises(FileNotFoundError, shutil.copyfile, src_file, dst) def test_copyfile_copy_dir(self): @@ -1608,7 +1603,7 @@ def test_copyfile_copy_dir(self): src_file = os.path.join(src_dir, 'foo') dir2 = self.mkdtemp() dst = os.path.join(src_dir, 'does_not_exist/') - write_file(src_file, 'foo') + create_file(src_file, 'foo') if sys.platform == "win32": err = PermissionError else: @@ -1634,13 +1629,13 @@ def _create_files(self, base_dir='dist'): root_dir = self.mkdtemp() dist = os.path.join(root_dir, base_dir) os.makedirs(dist, exist_ok=True) - write_file((dist, 'file1'), 'xxx') - write_file((dist, 'file2'), 'xxx') + create_file((dist, 'file1'), 'xxx') + create_file((dist, 'file2'), 'xxx') os.mkdir(os.path.join(dist, 'sub')) - write_file((dist, 'sub', 'file3'), 'xxx') + create_file((dist, 'sub', 'file3'), 'xxx') os.mkdir(os.path.join(dist, 'sub2')) if base_dir: - write_file((root_dir, 'outer'), 'xxx') + create_file((root_dir, 'outer'), 'xxx') return root_dir, base_dir @support.requires_zlib() @@ -2221,7 +2216,7 @@ def test_chown(self): dirname = self.mkdtemp() filename = tempfile.mktemp(dir=dirname) linkname = os.path.join(dirname, "chown_link") - write_file(filename, 'testing chown function') + create_file(filename, 'testing chown function') os.symlink(filename, linkname) with self.assertRaises(ValueError): @@ -2314,37 +2309,41 @@ def check_chown(path, uid=None, gid=None): class TestWhich(BaseTest, unittest.TestCase): def setUp(self): - self.temp_dir = self.mkdtemp(prefix="Tmp") + temp_dir = self.mkdtemp(prefix="Tmp") + base_dir = os.path.join(temp_dir, TESTFN + '-basedir') + os.mkdir(base_dir) + self.dir = os.path.join(base_dir, TESTFN + '-dir') + os.mkdir(self.dir) + self.other_dir = os.path.join(base_dir, TESTFN + '-dir2') + os.mkdir(self.other_dir) # Give the temp_file an ".exe" suffix for all. # It's needed on Windows and not harmful on other platforms. - self.temp_file = tempfile.NamedTemporaryFile(dir=self.temp_dir, - prefix="Tmp", - suffix=".Exe") - os.chmod(self.temp_file.name, stat.S_IXUSR) - self.addCleanup(self.temp_file.close) - self.dir, self.file = os.path.split(self.temp_file.name) + self.file = TESTFN + '.Exe' + self.filepath = os.path.join(self.dir, self.file) + self.create_file(self.filepath) self.env_path = self.dir self.curdir = os.curdir self.ext = ".EXE" - def to_text_type(self, s): - ''' - In this class we're testing with str, so convert s to a str - ''' - if isinstance(s, bytes): - return s.decode() - return s + to_text_type = staticmethod(os.fsdecode) + + def create_file(self, path): + create_file(path) + os.chmod(path, 0o755) + + def assertNormEqual(self, actual, expected): + self.assertEqual(os.path.normcase(actual), os.path.normcase(expected)) def test_basic(self): # Given an EXE in a directory, it should be returned. rv = shutil.which(self.file, path=self.dir) - self.assertEqual(rv, self.temp_file.name) + self.assertEqual(rv, self.filepath) def test_absolute_cmd(self): # When given the fully qualified path to an executable that exists, # it should be returned. - rv = shutil.which(self.temp_file.name, path=self.temp_dir) - self.assertEqual(rv, self.temp_file.name) + rv = shutil.which(self.filepath, path=self.other_dir) + self.assertEqual(rv, self.filepath) def test_relative_cmd(self): # When given the relative path with a directory part to an executable @@ -2352,7 +2351,7 @@ def test_relative_cmd(self): base_dir, tail_dir = os.path.split(self.dir) relpath = os.path.join(tail_dir, self.file) with os_helper.change_cwd(path=base_dir): - rv = shutil.which(relpath, path=self.temp_dir) + rv = shutil.which(relpath, path=self.other_dir) self.assertEqual(rv, relpath) # But it shouldn't be searched in PATH directories (issue #16957). with os_helper.change_cwd(path=self.dir): @@ -2363,9 +2362,8 @@ def test_relative_cmd(self): "test is for non win32") def test_cwd_non_win32(self): # Issue #16957 - base_dir = os.path.dirname(self.dir) with os_helper.change_cwd(path=self.dir): - rv = shutil.which(self.file, path=base_dir) + rv = shutil.which(self.file, path=self.other_dir) # non-win32: shouldn't match in the current directory. self.assertIsNone(rv) @@ -2375,57 +2373,32 @@ def test_cwd_win32(self): base_dir = os.path.dirname(self.dir) with os_helper.change_cwd(path=self.dir): with unittest.mock.patch('shutil._win_path_needs_curdir', return_value=True): - rv = shutil.which(self.file, path=base_dir) + rv = shutil.which(self.file, path=self.other_dir) # Current directory implicitly on PATH self.assertEqual(rv, os.path.join(self.curdir, self.file)) with unittest.mock.patch('shutil._win_path_needs_curdir', return_value=False): - rv = shutil.which(self.file, path=base_dir) + rv = shutil.which(self.file, path=self.other_dir) # Current directory not on PATH self.assertIsNone(rv) @unittest.skipUnless(sys.platform == "win32", "test is for win32") def test_cwd_win32_added_before_all_other_path(self): - base_dir = pathlib.Path(os.fsdecode(self.dir)) - - elsewhere_in_path_dir = base_dir / 'dir1' - elsewhere_in_path_dir.mkdir() - match_elsewhere_in_path = elsewhere_in_path_dir / 'hello.exe' - match_elsewhere_in_path.touch() - - exe_in_cwd = base_dir / 'hello.exe' - exe_in_cwd.touch() - - with os_helper.change_cwd(path=base_dir): - with unittest.mock.patch('shutil._win_path_needs_curdir', return_value=True): - rv = shutil.which('hello.exe', path=elsewhere_in_path_dir) - - self.assertEqual(os.path.abspath(rv), os.path.abspath(exe_in_cwd)) - - @unittest.skipUnless(sys.platform == "win32", - "test is for win32") - def test_pathext_match_before_path_full_match(self): - base_dir = pathlib.Path(os.fsdecode(self.dir)) - dir1 = base_dir / 'dir1' - dir2 = base_dir / 'dir2' - dir1.mkdir() - dir2.mkdir() - - pathext_match = dir1 / 'hello.com.exe' - path_match = dir2 / 'hello.com' - pathext_match.touch() - path_match.touch() - - test_path = os.pathsep.join([str(dir1), str(dir2)]) - assert os.path.basename(shutil.which( - 'hello.com', path=test_path, mode = os.F_OK - )).lower() == 'hello.com.exe' + other_file_path = os.path.join(self.other_dir, self.file) + self.create_file(other_file_path) + with unittest.mock.patch('shutil._win_path_needs_curdir', return_value=True): + with os_helper.change_cwd(path=self.dir): + rv = shutil.which(self.file, path=self.other_dir) + self.assertEqual(rv, os.path.join(self.curdir, self.file)) + with os_helper.change_cwd(path=self.other_dir): + rv = shutil.which(self.file, path=self.dir) + self.assertEqual(rv, os.path.join(self.curdir, self.file)) @os_helper.skip_if_dac_override def test_non_matching_mode(self): # Set the file read-only and ask for writeable files. - os.chmod(self.temp_file.name, stat.S_IREAD) - if os.access(self.temp_file.name, os.W_OK): + os.chmod(self.filepath, stat.S_IREAD) + if os.access(self.filepath, os.W_OK): self.skipTest("can't set the file read-only") rv = shutil.which(self.file, path=self.dir, mode=os.W_OK) self.assertIsNone(rv) @@ -2447,13 +2420,13 @@ def test_pathext_checking(self): # Ask for the file without the ".exe" extension, then ensure that # it gets found properly with the extension. rv = shutil.which(self.file[:-4], path=self.dir) - self.assertEqual(rv, self.temp_file.name[:-4] + self.ext) + self.assertEqual(rv, self.filepath[:-4] + self.ext) def test_environ_path(self): with os_helper.EnvironmentVarGuard() as env: env['PATH'] = self.env_path rv = shutil.which(self.file) - self.assertEqual(rv, self.temp_file.name) + self.assertEqual(rv, self.filepath) def test_environ_path_empty(self): # PATH='': no match @@ -2467,12 +2440,9 @@ def test_environ_path_empty(self): self.assertIsNone(rv) def test_environ_path_cwd(self): - expected_cwd = os.path.basename(self.temp_file.name) + expected_cwd = self.file if sys.platform == "win32": - curdir = os.curdir - if isinstance(expected_cwd, bytes): - curdir = os.fsencode(curdir) - expected_cwd = os.path.join(curdir, expected_cwd) + expected_cwd = os.path.join(self.curdir, expected_cwd) # PATH=':': explicitly looks in the current directory with os_helper.EnvironmentVarGuard() as env: @@ -2497,14 +2467,14 @@ def test_environ_path_missing(self): create=True), \ support.swap_attr(os, 'defpath', self.dir): rv = shutil.which(self.file) - self.assertEqual(rv, self.temp_file.name) + self.assertEqual(rv, self.filepath) # with confstr with unittest.mock.patch('os.confstr', return_value=self.dir, \ create=True), \ support.swap_attr(os, 'defpath', ''): rv = shutil.which(self.file) - self.assertEqual(rv, self.temp_file.name) + self.assertEqual(rv, self.filepath) def test_empty_path(self): base_dir = os.path.dirname(self.dir) @@ -2522,50 +2492,88 @@ def test_empty_path_no_PATH(self): @unittest.skipUnless(sys.platform == "win32", 'test specific to Windows') def test_pathext(self): - ext = self.to_text_type(".xyz") - temp_filexyz = tempfile.NamedTemporaryFile(dir=self.temp_dir, - prefix=self.to_text_type("Tmp2"), suffix=ext) - os.chmod(temp_filexyz.name, stat.S_IXUSR) - self.addCleanup(temp_filexyz.close) - - # strip path and extension - program = os.path.basename(temp_filexyz.name) - program = os.path.splitext(program)[0] - + ext = '.xyz' + cmd = self.to_text_type(TESTFN2) + cmdext = cmd + self.to_text_type(ext) + filepath = os.path.join(self.dir, cmdext) + self.create_file(filepath) with os_helper.EnvironmentVarGuard() as env: - env['PATHEXT'] = ext if isinstance(ext, str) else ext.decode() - rv = shutil.which(program, path=self.temp_dir) - self.assertEqual(rv, temp_filexyz.name) + env['PATHEXT'] = ext + self.assertEqual(shutil.which(cmd, path=self.dir), filepath) + self.assertEqual(shutil.which(cmdext, path=self.dir), filepath) # Issue 40592: See https://bugs.python.org/issue40592 @unittest.skipUnless(sys.platform == "win32", 'test specific to Windows') def test_pathext_with_empty_str(self): - ext = self.to_text_type(".xyz") - temp_filexyz = tempfile.NamedTemporaryFile(dir=self.temp_dir, - prefix=self.to_text_type("Tmp2"), suffix=ext) - self.addCleanup(temp_filexyz.close) + ext = '.xyz' + cmd = self.to_text_type(TESTFN2) + cmdext = cmd + self.to_text_type(ext) + filepath = os.path.join(self.dir, cmdext) + self.create_file(filepath) + with os_helper.EnvironmentVarGuard() as env: + env['PATHEXT'] = ext + ';' # note the ; + self.assertEqual(shutil.which(cmd, path=self.dir), filepath) + self.assertEqual(shutil.which(cmdext, path=self.dir), filepath) - # strip path and extension - program = os.path.basename(temp_filexyz.name) - program = os.path.splitext(program)[0] + @unittest.skipUnless(sys.platform == "win32", 'test specific to Windows') + def test_pathext_with_multidot_extension(self): + ext = '.foo.bar' + cmd = self.to_text_type(TESTFN2) + cmdext = cmd + self.to_text_type(ext) + filepath = os.path.join(self.dir, cmdext) + self.create_file(filepath) + with os_helper.EnvironmentVarGuard() as env: + env['PATHEXT'] = ext + self.assertEqual(shutil.which(cmd, path=self.dir), filepath) + self.assertEqual(shutil.which(cmdext, path=self.dir), filepath) + @unittest.skipUnless(sys.platform == "win32", 'test specific to Windows') + def test_pathext_with_null_extension(self): + cmd = self.to_text_type(TESTFN2) + cmddot = cmd + self.to_text_type('.') + filepath = os.path.join(self.dir, cmd) + self.create_file(filepath) with os_helper.EnvironmentVarGuard() as env: - env['PATHEXT'] = f"{ext if isinstance(ext, str) else ext.decode()};" # note the ; - rv = shutil.which(program, path=self.temp_dir) - self.assertEqual(rv, temp_filexyz.name) + env['PATHEXT'] = '.xyz' + self.assertIsNone(shutil.which(cmd, path=self.dir)) + self.assertIsNone(shutil.which(cmddot, path=self.dir)) + env['PATHEXT'] = '.xyz;.' # note the . + self.assertEqual(shutil.which(cmd, path=self.dir), filepath) + self.assertEqual(shutil.which(cmddot, path=self.dir), + filepath + self.to_text_type('.')) + env['PATHEXT'] = '.xyz;..' # multiple dots + self.assertEqual(shutil.which(cmd, path=self.dir), filepath) + self.assertEqual(shutil.which(cmddot, path=self.dir), + filepath + self.to_text_type('.')) + + @unittest.skipUnless(sys.platform == "win32", 'test specific to Windows') + def test_pathext_extension_ends_with_dot(self): + ext = '.xyz' + cmd = self.to_text_type(TESTFN2) + cmdext = cmd + self.to_text_type(ext) + dot = self.to_text_type('.') + filepath = os.path.join(self.dir, cmdext) + self.create_file(filepath) + with os_helper.EnvironmentVarGuard() as env: + env['PATHEXT'] = ext + '.' + self.assertEqual(shutil.which(cmd, path=self.dir), filepath) # cmd.exe hangs here + self.assertEqual(shutil.which(cmdext, path=self.dir), filepath) + self.assertIsNone(shutil.which(cmd + dot, path=self.dir)) + self.assertIsNone(shutil.which(cmdext + dot, path=self.dir)) # See GH-75586 @unittest.skipUnless(sys.platform == "win32", 'test specific to Windows') def test_pathext_applied_on_files_in_path(self): + ext = '.xyz' + cmd = self.to_text_type(TESTFN2) + cmdext = cmd + self.to_text_type(ext) + filepath = os.path.join(self.dir, cmdext) + self.create_file(filepath) with os_helper.EnvironmentVarGuard() as env: - env["PATH"] = self.temp_dir if isinstance(self.temp_dir, str) else self.temp_dir.decode() - env["PATHEXT"] = ".test" - - test_path = os.path.join(self.temp_dir, self.to_text_type("test_program.test")) - open(test_path, 'w').close() - os.chmod(test_path, 0o755) - - self.assertEqual(shutil.which(self.to_text_type("test_program")), test_path) + env["PATH"] = os.fsdecode(self.dir) + env["PATHEXT"] = ext + self.assertEqual(shutil.which(cmd), filepath) + self.assertEqual(shutil.which(cmdext), filepath) # See GH-75586 @unittest.skipUnless(sys.platform == "win32", 'test specific to Windows') @@ -2581,49 +2589,107 @@ def test_win_path_needs_curdir(self): self.assertFalse(shutil._win_path_needs_curdir('dontcare', os.X_OK)) need_curdir_mock.assert_called_once_with('dontcare') - # See GH-109590 @unittest.skipUnless(sys.platform == "win32", 'test specific to Windows') - def test_pathext_preferred_for_execute(self): - with os_helper.EnvironmentVarGuard() as env: - env["PATH"] = self.temp_dir if isinstance(self.temp_dir, str) else self.temp_dir.decode() - env["PATHEXT"] = ".test" - - exe = os.path.join(self.temp_dir, self.to_text_type("test.exe")) - open(exe, 'w').close() - os.chmod(exe, 0o755) + def test_same_dir_with_pathext_extension(self): + cmd = self.file # with .exe extension + # full match + self.assertNormEqual(shutil.which(cmd, path=self.dir), self.filepath) + self.assertNormEqual(shutil.which(cmd, path=self.dir, mode=os.F_OK), + self.filepath) + + cmd2 = cmd + self.to_text_type('.com') # with .exe.com extension + other_file_path = os.path.join(self.dir, cmd2) + self.create_file(other_file_path) + + # full match + self.assertNormEqual(shutil.which(cmd, path=self.dir), self.filepath) + self.assertNormEqual(shutil.which(cmd, path=self.dir, mode=os.F_OK), + self.filepath) + self.assertNormEqual(shutil.which(cmd2, path=self.dir), other_file_path) + self.assertNormEqual(shutil.which(cmd2, path=self.dir, mode=os.F_OK), + other_file_path) - # default behavior allows a direct match if nothing in PATHEXT matches - self.assertEqual(shutil.which(self.to_text_type("test.exe")), exe) - - dot_test = os.path.join(self.temp_dir, self.to_text_type("test.exe.test")) - open(dot_test, 'w').close() - os.chmod(dot_test, 0o755) - - # now we have a PATHEXT match, so it take precedence - self.assertEqual(shutil.which(self.to_text_type("test.exe")), dot_test) - - # but if we don't use os.X_OK we don't change the order based off PATHEXT - # and therefore get the direct match. - self.assertEqual(shutil.which(self.to_text_type("test.exe"), mode=os.F_OK), exe) - - # See GH-109590 @unittest.skipUnless(sys.platform == "win32", 'test specific to Windows') - def test_pathext_given_extension_preferred(self): - with os_helper.EnvironmentVarGuard() as env: - env["PATH"] = self.temp_dir if isinstance(self.temp_dir, str) else self.temp_dir.decode() - env["PATHEXT"] = ".exe2;.exe" + def test_same_dir_without_pathext_extension(self): + cmd = self.file[:-4] # without .exe extension + # pathext match + self.assertNormEqual(shutil.which(cmd, path=self.dir), self.filepath) + self.assertNormEqual(shutil.which(cmd, path=self.dir, mode=os.F_OK), + self.filepath) + + # without extension + other_file_path = os.path.join(self.dir, cmd) + self.create_file(other_file_path) + + # pathext match if mode contains X_OK + self.assertNormEqual(shutil.which(cmd, path=self.dir), self.filepath) + # full match + self.assertNormEqual(shutil.which(cmd, path=self.dir, mode=os.F_OK), + other_file_path) + self.assertNormEqual(shutil.which(self.file, path=self.dir), self.filepath) + self.assertNormEqual(shutil.which(self.file, path=self.dir, mode=os.F_OK), + self.filepath) - exe = os.path.join(self.temp_dir, self.to_text_type("test.exe")) - open(exe, 'w').close() - os.chmod(exe, 0o755) - - exe2 = os.path.join(self.temp_dir, self.to_text_type("test.exe2")) - open(exe2, 'w').close() - os.chmod(exe2, 0o755) + @unittest.skipUnless(sys.platform == "win32", 'test specific to Windows') + def test_dir_order_with_pathext_extension(self): + cmd = self.file # with .exe extension + search_path = os.pathsep.join([os.fsdecode(self.other_dir), + os.fsdecode(self.dir)]) + # full match in the second directory + self.assertNormEqual(shutil.which(cmd, path=search_path), self.filepath) + self.assertNormEqual(shutil.which(cmd, path=search_path, mode=os.F_OK), + self.filepath) + + cmd2 = cmd + self.to_text_type('.com') # with .exe.com extension + other_file_path = os.path.join(self.other_dir, cmd2) + self.create_file(other_file_path) + + # pathext match in the first directory + self.assertNormEqual(shutil.which(cmd, path=search_path), other_file_path) + self.assertNormEqual(shutil.which(cmd, path=search_path, mode=os.F_OK), + other_file_path) + # full match in the first directory + self.assertNormEqual(shutil.which(cmd2, path=search_path), other_file_path) + self.assertNormEqual(shutil.which(cmd2, path=search_path, mode=os.F_OK), + other_file_path) + + # full match in the first directory + search_path = os.pathsep.join([os.fsdecode(self.dir), + os.fsdecode(self.other_dir)]) + self.assertEqual(shutil.which(cmd, path=search_path), self.filepath) + self.assertEqual(shutil.which(cmd, path=search_path, mode=os.F_OK), + self.filepath) - # even though .exe2 is preferred in PATHEXT, we matched directly to test.exe - self.assertEqual(shutil.which(self.to_text_type("test.exe")), exe) - self.assertEqual(shutil.which(self.to_text_type("test")), exe2) + @unittest.skipUnless(sys.platform == "win32", 'test specific to Windows') + def test_dir_order_without_pathext_extension(self): + cmd = self.file[:-4] # without .exe extension + search_path = os.pathsep.join([os.fsdecode(self.other_dir), + os.fsdecode(self.dir)]) + # pathext match in the second directory + self.assertNormEqual(shutil.which(cmd, path=search_path), self.filepath) + self.assertNormEqual(shutil.which(cmd, path=search_path, mode=os.F_OK), + self.filepath) + + # without extension + other_file_path = os.path.join(self.other_dir, cmd) + self.create_file(other_file_path) + + # pathext match in the second directory + self.assertNormEqual(shutil.which(cmd, path=search_path), self.filepath) + # full match in the first directory + self.assertNormEqual(shutil.which(cmd, path=search_path, mode=os.F_OK), + other_file_path) + # full match in the second directory + self.assertNormEqual(shutil.which(self.file, path=search_path), self.filepath) + self.assertNormEqual(shutil.which(self.file, path=search_path, mode=os.F_OK), + self.filepath) + + # pathext match in the first directory + search_path = os.pathsep.join([os.fsdecode(self.dir), + os.fsdecode(self.other_dir)]) + self.assertNormEqual(shutil.which(cmd, path=search_path), self.filepath) + self.assertNormEqual(shutil.which(cmd, path=search_path, mode=os.F_OK), + self.filepath) class TestWhichBytes(TestWhich): @@ -2631,18 +2697,12 @@ def setUp(self): TestWhich.setUp(self) self.dir = os.fsencode(self.dir) self.file = os.fsencode(self.file) - self.temp_file.name = os.fsencode(self.temp_file.name) - self.temp_dir = os.fsencode(self.temp_dir) + self.filepath = os.fsencode(self.filepath) + self.other_dir = os.fsencode(self.other_dir) self.curdir = os.fsencode(self.curdir) self.ext = os.fsencode(self.ext) - def to_text_type(self, s): - ''' - In this class we're testing with bytes, so convert s to a bytes - ''' - if isinstance(s, str): - return s.encode() - return s + to_text_type = staticmethod(os.fsencode) class TestMove(BaseTest, unittest.TestCase): @@ -2653,8 +2713,7 @@ def setUp(self): self.dst_dir = self.mkdtemp() self.src_file = os.path.join(self.src_dir, filename) self.dst_file = os.path.join(self.dst_dir, filename) - with open(self.src_file, "wb") as f: - f.write(b"spam") + create_file(self.src_file, b"spam") def _check_move_file(self, src, dst, real_dst): with open(src, "rb") as f: @@ -2732,8 +2791,7 @@ def test_move_dir_altsep_to_dir(self): def test_existing_file_inside_dest_dir(self): # A file with the same name inside the destination dir already exists. - with open(self.dst_file, "wb"): - pass + create_file(self.dst_file) self.assertRaises(shutil.Error, shutil.move, self.src_file, self.dst_dir) def test_dont_move_dir_in_itself(self): @@ -3148,8 +3206,7 @@ def test_empty_file(self): dstname = TESTFN + 'dst' self.addCleanup(lambda: os_helper.unlink(srcname)) self.addCleanup(lambda: os_helper.unlink(dstname)) - with open(srcname, "wb"): - pass + create_file(srcname) with open(srcname, "rb") as src: with open(dstname, "wb") as dst: @@ -3272,7 +3329,7 @@ def test_blocksize_arg(self): self.assertEqual(blocksize, os.path.getsize(TESTFN)) # ...unless we're dealing with a small file. os_helper.unlink(TESTFN2) - write_file(TESTFN2, b"hello", binary=True) + create_file(TESTFN2, b"hello") self.addCleanup(os_helper.unlink, TESTFN2 + '3') self.assertRaises(ZeroDivisionError, shutil.copyfile, TESTFN2, TESTFN2 + '3') diff --git a/Lib/test/test_threading.py b/Lib/test/test_threading.py index 3ca5f5ce1b7068..fb6d268e5869f4 100644 --- a/Lib/test/test_threading.py +++ b/Lib/test/test_threading.py @@ -1171,6 +1171,40 @@ def __del__(self): self.assertEqual(out.strip(), b"OK") self.assertIn(b"can't create new thread at interpreter shutdown", err) + def test_start_new_thread_failed(self): + # gh-109746: if Python fails to start newly created thread + # due to failure of underlying PyThread_start_new_thread() call, + # its state should be removed from interpreter' thread states list + # to avoid its double cleanup + try: + from resource import setrlimit, RLIMIT_NPROC + except ImportError as err: + self.skipTest(err) # RLIMIT_NPROC is specific to Linux and BSD + code = """if 1: + import resource + import _thread + + def f(): + print("shouldn't be printed") + + limits = resource.getrlimit(resource.RLIMIT_NPROC) + [_, hard] = limits + resource.setrlimit(resource.RLIMIT_NPROC, (0, hard)) + + try: + _thread.start_new_thread(f, ()) + except RuntimeError: + print('ok') + else: + print('skip') + """ + _, out, err = assert_python_ok("-u", "-c", code) + out = out.strip() + if out == b'skip': + self.skipTest('RLIMIT_NPROC had no effect; probably superuser') + self.assertEqual(out, b'ok') + self.assertEqual(err, b'') + @cpython_only def test_finalize_daemon_thread_hang(self): if support.check_sanitizer(thread=True, memory=True): diff --git a/Lib/test/test_tools/i18n_data/messages.pot b/Lib/test/test_tools/i18n_data/messages.pot index ddfbd18349ef4f..8d66fbc4f3a937 100644 --- a/Lib/test/test_tools/i18n_data/messages.pot +++ b/Lib/test/test_tools/i18n_data/messages.pot @@ -15,53 +15,75 @@ msgstr "" "Generated-By: pygettext.py 1.5\n" -#: messages.py:5 +#: messages.py:16 msgid "" msgstr "" -#: messages.py:8 messages.py:9 +#: messages.py:19 messages.py:20 msgid "parentheses" msgstr "" -#: messages.py:12 +#: messages.py:23 msgid "Hello, world!" msgstr "" -#: messages.py:15 +#: messages.py:26 msgid "" "Hello,\n" " multiline!\n" msgstr "" -#: messages.py:29 +#: messages.py:46 messages.py:89 messages.py:90 messages.py:93 messages.py:94 +#: messages.py:99 +msgid "foo" +msgid_plural "foos" +msgstr[0] "" +msgstr[1] "" + +#: messages.py:47 +msgid "something" +msgstr "" + +#: messages.py:50 msgid "Hello, {}!" msgstr "" -#: messages.py:33 +#: messages.py:54 msgid "1" msgstr "" -#: messages.py:33 +#: messages.py:54 msgid "2" msgstr "" -#: messages.py:34 messages.py:35 +#: messages.py:55 messages.py:56 msgid "A" msgstr "" -#: messages.py:34 messages.py:35 +#: messages.py:55 messages.py:56 msgid "B" msgstr "" -#: messages.py:36 +#: messages.py:57 msgid "set" msgstr "" -#: messages.py:42 +#: messages.py:63 msgid "nested string" msgstr "" -#: messages.py:47 +#: messages.py:68 msgid "baz" msgstr "" +#: messages.py:91 messages.py:92 messages.py:95 messages.py:96 +msgctxt "context" +msgid "foo" +msgid_plural "foos" +msgstr[0] "" +msgstr[1] "" + +#: messages.py:100 +msgid "domain foo" +msgstr "" + diff --git a/Lib/test/test_tools/i18n_data/messages.py b/Lib/test/test_tools/i18n_data/messages.py index f220294b8d5c67..1e03f4e556830d 100644 --- a/Lib/test/test_tools/i18n_data/messages.py +++ b/Lib/test/test_tools/i18n_data/messages.py @@ -1,5 +1,16 @@ # Test message extraction -from gettext import gettext as _ +from gettext import ( + gettext, + ngettext, + pgettext, + npgettext, + dgettext, + dngettext, + dpgettext, + dnpgettext +) + +_ = gettext # Empty string _("") @@ -21,13 +32,23 @@ _(None) _(1) _(False) -_(x="kwargs are not allowed") +_(("invalid")) +_(["invalid"]) +_({"invalid"}) +_("string"[3]) +_("string"[:3]) +_({"string": "foo"}) + +# pygettext does not allow keyword arguments, but both xgettext and pybabel do +_(x="kwargs work!") + +# Unusual, but valid arguments _("foo", "bar") _("something", x="something else") # .format() _("Hello, {}!").format("world") # valid -_("Hello, {}!".format("world")) # invalid +_("Hello, {}!".format("world")) # invalid, but xgettext and pybabel extract the first string # Nested structures _("1"), _("2") @@ -62,3 +83,28 @@ def _(x): def _(x="don't extract me"): pass + + +# Other gettext functions +gettext("foo") +ngettext("foo", "foos", 1) +pgettext("context", "foo") +npgettext("context", "foo", "foos", 1) +dgettext("domain", "foo") +dngettext("domain", "foo", "foos", 1) +dpgettext("domain", "context", "foo") +dnpgettext("domain", "context", "foo", "foos", 1) + +# Complex arguments +ngettext("foo", "foos", 42 + (10 - 20)) +dgettext(["some", {"complex"}, ("argument",)], "domain foo") + +# Invalid calls which are not extracted +gettext() +ngettext('foo') +pgettext('context') +npgettext('context', 'foo') +dgettext('domain') +dngettext('domain', 'foo') +dpgettext('domain', 'context') +dnpgettext('domain', 'context', 'foo') diff --git a/Lib/test/test_tools/test_i18n.py b/Lib/test/test_tools/test_i18n.py index 6f71f0976819f1..29c3423e234d20 100644 --- a/Lib/test/test_tools/test_i18n.py +++ b/Lib/test/test_tools/test_i18n.py @@ -332,14 +332,14 @@ def test_calls_in_fstring_with_multiple_args(self): msgids = self.extract_docstrings_from_str(dedent('''\ f"{_('foo', 'bar')}" ''')) - self.assertNotIn('foo', msgids) + self.assertIn('foo', msgids) self.assertNotIn('bar', msgids) def test_calls_in_fstring_with_keyword_args(self): msgids = self.extract_docstrings_from_str(dedent('''\ f"{_('foo', bar='baz')}" ''')) - self.assertNotIn('foo', msgids) + self.assertIn('foo', msgids) self.assertNotIn('bar', msgids) self.assertNotIn('baz', msgids) diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py index e1c1d3170d9807..a204ef41c3ce90 100644 --- a/Lib/test/test_urllib.py +++ b/Lib/test/test_urllib.py @@ -1492,7 +1492,7 @@ def test_url2pathname_win(self): # UNC paths self.assertEqual(fn('//server/path/to/file'), '\\\\server\\path\\to\\file') self.assertEqual(fn('////server/path/to/file'), '\\\\server\\path\\to\\file') - self.assertEqual(fn('/////server/path/to/file'), '\\\\\\server\\path\\to\\file') + self.assertEqual(fn('/////server/path/to/file'), '\\\\server\\path\\to\\file') # Localhost paths self.assertEqual(fn('//localhost/C:/path/to/file'), 'C:\\path\\to\\file') self.assertEqual(fn('//localhost/C|/path/to/file'), 'C:\\path\\to\\file') diff --git a/Lib/test/translationdata/argparse/msgids.txt b/Lib/test/translationdata/argparse/msgids.txt index 2b012906436e85..ae89ac74726ecf 100644 --- a/Lib/test/translationdata/argparse/msgids.txt +++ b/Lib/test/translationdata/argparse/msgids.txt @@ -8,6 +8,8 @@ argument %(argument_name)s: %(message)s argument '%(argument_name)s' is deprecated can't open '%(filename)s': %(error)s command '%(parser_name)s' is deprecated +conflicting option string: %s +expected %s argument expected at least one argument expected at most one argument expected one argument diff --git a/Lib/test/translationdata/optparse/msgids.txt b/Lib/test/translationdata/optparse/msgids.txt index ac5317c736af8c..8f405a2bf26dbe 100644 --- a/Lib/test/translationdata/optparse/msgids.txt +++ b/Lib/test/translationdata/optparse/msgids.txt @@ -1,3 +1,4 @@ +%(option)s option requires %(number)d argument %prog [options] %s option does not take a value Options diff --git a/Misc/NEWS.d/3.13.0a6.rst b/Misc/NEWS.d/3.13.0a6.rst index b9cdbc4e146d5a..2740b4f0d967ba 100644 --- a/Misc/NEWS.d/3.13.0a6.rst +++ b/Misc/NEWS.d/3.13.0a6.rst @@ -642,7 +642,7 @@ Also in the corresponding :class:`ipaddress.IPv4Network` and .. nonce: OToJnG .. section: Library -In :mod:`encodings.idna`, any capitalization of the the ACE prefix +In :mod:`encodings.idna`, any capitalization of the ACE prefix (``xn--``) is now acceptable. Patch by Pepijn de Vos and Zackery Spytz. .. diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2023-09-22-21-01-56.gh-issue-109746.32MHt9.rst b/Misc/NEWS.d/next/Core_and_Builtins/2023-09-22-21-01-56.gh-issue-109746.32MHt9.rst new file mode 100644 index 00000000000000..2d350c33aa6975 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2023-09-22-21-01-56.gh-issue-109746.32MHt9.rst @@ -0,0 +1 @@ +If :func:`!_thread.start_new_thread` fails to start a new thread, it deletes its state from interpreter and thus avoids its repeated cleanup on finalization. diff --git a/Misc/NEWS.d/next/Library/2023-02-15-23-54-42.gh-issue-88110.KU6erv.rst b/Misc/NEWS.d/next/Library/2023-02-15-23-54-42.gh-issue-88110.KU6erv.rst new file mode 100644 index 00000000000000..42a83edc3ba68d --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-02-15-23-54-42.gh-issue-88110.KU6erv.rst @@ -0,0 +1,2 @@ +Fixed :class:`multiprocessing.Process` reporting a ``.exitcode`` of 1 even on success when +using the ``"fork"`` start method while using a :class:`concurrent.futures.ThreadPoolExecutor`. diff --git a/Misc/NEWS.d/next/Library/2024-10-23-20-05-54.gh-issue-86463.jvFTI_.rst b/Misc/NEWS.d/next/Library/2024-10-23-20-05-54.gh-issue-86463.jvFTI_.rst new file mode 100644 index 00000000000000..9ac155770e2254 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-23-20-05-54.gh-issue-86463.jvFTI_.rst @@ -0,0 +1,2 @@ +The ``usage`` parameter of :class:`argparse.ArgumentParser` no longer +affects the default value of the ``prog`` parameter in subparsers. diff --git a/Misc/NEWS.d/next/Library/2024-11-21-16-23-16.gh-issue-127065.cfL1zd.rst b/Misc/NEWS.d/next/Library/2024-11-21-16-23-16.gh-issue-127065.cfL1zd.rst new file mode 100644 index 00000000000000..83457da467ffa9 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-11-21-16-23-16.gh-issue-127065.cfL1zd.rst @@ -0,0 +1,2 @@ +Fix crash when calling a :func:`operator.methodcaller` instance from +multiple threads in the free threading build. diff --git a/Misc/NEWS.d/next/Library/2024-11-22-03-40-02.gh-issue-127078.gI_PaP.rst b/Misc/NEWS.d/next/Library/2024-11-22-03-40-02.gh-issue-127078.gI_PaP.rst new file mode 100644 index 00000000000000..a84c06f3c7a273 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-11-22-03-40-02.gh-issue-127078.gI_PaP.rst @@ -0,0 +1,2 @@ +Fix issue where :func:`urllib.request.url2pathname` failed to discard an +extra slash before a UNC drive in the URL path on Windows. diff --git a/Misc/NEWS.d/next/Library/2024-11-22-10-42-34.gh-issue-127035.UnbDlr.rst b/Misc/NEWS.d/next/Library/2024-11-22-10-42-34.gh-issue-127035.UnbDlr.rst new file mode 100644 index 00000000000000..6bb7abfdd50040 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-11-22-10-42-34.gh-issue-127035.UnbDlr.rst @@ -0,0 +1,4 @@ +Fix :mod:`shutil.which` on Windows. Now it looks at direct match if and only +if the command ends with a PATHEXT extension or X_OK is not in mode. Support +extensionless files if "." is in PATHEXT. Support PATHEXT extensions that end +with a dot. diff --git a/Misc/NEWS.d/next/Tests/2024-11-20-18-49-01.gh-issue-127076.DHnXxo.rst b/Misc/NEWS.d/next/Tests/2024-11-20-18-49-01.gh-issue-127076.DHnXxo.rst new file mode 100644 index 00000000000000..39323604bbef56 --- /dev/null +++ b/Misc/NEWS.d/next/Tests/2024-11-20-18-49-01.gh-issue-127076.DHnXxo.rst @@ -0,0 +1,2 @@ +Filter out memory-related ``mmap``, ``munmap``, and ``mprotect`` calls from +file-related ones when testing :mod:`io` behavior using strace. diff --git a/Misc/NEWS.d/next/Tools-Demos/2024-11-16-20-47-20.gh-issue-126700.ayrHv4.rst b/Misc/NEWS.d/next/Tools-Demos/2024-11-16-20-47-20.gh-issue-126700.ayrHv4.rst new file mode 100644 index 00000000000000..c08ad9d7059904 --- /dev/null +++ b/Misc/NEWS.d/next/Tools-Demos/2024-11-16-20-47-20.gh-issue-126700.ayrHv4.rst @@ -0,0 +1 @@ +Add support for multi-argument :mod:`gettext` functions in :program:`pygettext.py`. diff --git a/Modules/_operator.c b/Modules/_operator.c index 7e0d1f3df87e4d..6c1945174ab7cd 100644 --- a/Modules/_operator.c +++ b/Modules/_operator.c @@ -1602,6 +1602,7 @@ typedef struct { vectorcallfunc vectorcall; } methodcallerobject; +#ifndef Py_GIL_DISABLED static int _methodcaller_initialize_vectorcall(methodcallerobject* mc) { PyObject* args = mc->xargs; @@ -1664,6 +1665,7 @@ methodcaller_vectorcall( (PyTuple_GET_SIZE(mc->xargs)) | PY_VECTORCALL_ARGUMENTS_OFFSET, mc->vectorcall_kwnames); } +#endif /* AC 3.5: variable number of arguments, not currently support by AC */ @@ -1703,7 +1705,14 @@ methodcaller_new(PyTypeObject *type, PyObject *args, PyObject *kwds) mc->vectorcall_args = 0; +#ifdef Py_GIL_DISABLED + // gh-127065: The current implementation of methodcaller_vectorcall + // is not thread-safe because it modifies the `vectorcall_args` array, + // which is shared across calls. + mc->vectorcall = NULL; +#else mc->vectorcall = (vectorcallfunc)methodcaller_vectorcall; +#endif PyObject_GC_Track(mc); return (PyObject *)mc; diff --git a/Modules/_threadmodule.c b/Modules/_threadmodule.c index d4408aa9e42d9d..f2a420ac1c589d 100644 --- a/Modules/_threadmodule.c +++ b/Modules/_threadmodule.c @@ -421,6 +421,7 @@ ThreadHandle_start(ThreadHandle *self, PyObject *func, PyObject *args, PyThread_handle_t os_handle; if (PyThread_start_joinable_thread(thread_run, boot, &ident, &os_handle)) { PyThreadState_Clear(boot->tstate); + PyThreadState_Delete(boot->tstate); thread_bootstate_free(boot, 1); PyErr_SetString(ThreadError, "can't start new thread"); goto start_failed; diff --git a/Objects/lnotab_notes.txt b/Objects/lnotab_notes.txt index 0f3599340318f0..335e441cfded3d 100644 --- a/Objects/lnotab_notes.txt +++ b/Objects/lnotab_notes.txt @@ -1,7 +1,7 @@ Description of the internal format of the line number table in Python 3.10 and earlier. -(For 3.11 onwards, see Objects/locations.md) +(For 3.11 onwards, see InternalDocs/code_objects.md) Conceptually, the line number table consists of a sequence of triples: start-offset (inclusive), end-offset (exclusive), line-number. diff --git a/PCbuild/_freeze_module.vcxproj.filters b/PCbuild/_freeze_module.vcxproj.filters index 09a5f4d30ef490..3842f52e514bb4 100644 --- a/PCbuild/_freeze_module.vcxproj.filters +++ b/PCbuild/_freeze_module.vcxproj.filters @@ -239,6 +239,8 @@ Source Files + Source Files + Source Files diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 6ee886c2ba0fc8..88e96afe4151f5 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -376,7 +376,7 @@ dummy_func( pure inst(UNARY_NOT, (value -- res)) { assert(PyStackRef_BoolCheck(value)); - res = PyStackRef_Is(value, PyStackRef_False) + res = PyStackRef_IsFalse(value) ? PyStackRef_True : PyStackRef_False; DEAD(value); } @@ -441,7 +441,7 @@ dummy_func( inst(TO_BOOL_NONE, (unused/1, unused/2, value -- res)) { // This one is a bit weird, because we expect *some* failures: - EXIT_IF(!PyStackRef_Is(value, PyStackRef_None)); + EXIT_IF(!PyStackRef_IsNone(value)); DEAD(value); STAT_INC(TO_BOOL, hit); res = PyStackRef_False; @@ -651,9 +651,7 @@ dummy_func( // specializations, but there is no output. // At the end we just skip over the STORE_FAST. op(_BINARY_OP_INPLACE_ADD_UNICODE, (left, right --)) { - #ifndef NDEBUG PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); - #endif PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); int next_oparg; @@ -664,7 +662,7 @@ dummy_func( next_oparg = CURRENT_OPERAND0(); #endif _PyStackRef *target_local = &GETLOCAL(next_oparg); - DEOPT_IF(!PyStackRef_Is(*target_local, left)); + DEOPT_IF(PyStackRef_AsPyObjectBorrow(*target_local) != left_o); STAT_INC(BINARY_OP, hit); /* Handle `left = left + right` or `left += right` for str. * @@ -1141,7 +1139,7 @@ dummy_func( gen_frame->previous = frame; DISPATCH_INLINED(gen_frame); } - if (PyStackRef_Is(v, PyStackRef_None) && PyIter_Check(receiver_o)) { + if (PyStackRef_IsNone(v) && PyIter_Check(receiver_o)) { retval_o = Py_TYPE(receiver_o)->tp_iternext(receiver_o); } else { @@ -1249,7 +1247,7 @@ dummy_func( inst(POP_EXCEPT, (exc_value -- )) { _PyErr_StackItem *exc_info = tstate->exc_info; Py_XSETREF(exc_info->exc_value, - PyStackRef_Is(exc_value, PyStackRef_None) + PyStackRef_IsNone(exc_value) ? NULL : PyStackRef_AsPyObjectSteal(exc_value)); } @@ -1381,7 +1379,7 @@ dummy_func( }; specializing op(_SPECIALIZE_UNPACK_SEQUENCE, (counter/1, seq -- seq)) { - #if ENABLE_SPECIALIZATION + #if ENABLE_SPECIALIZATION_FT if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; _Py_Specialize_UnpackSequence(seq, next_instr, oparg); @@ -1389,7 +1387,7 @@ dummy_func( } OPCODE_DEFERRED_INC(UNPACK_SEQUENCE); ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); - #endif /* ENABLE_SPECIALIZATION */ + #endif /* ENABLE_SPECIALIZATION_FT */ (void)seq; (void)counter; } @@ -1429,12 +1427,24 @@ dummy_func( inst(UNPACK_SEQUENCE_LIST, (unused/1, seq -- values[oparg])) { PyObject *seq_o = PyStackRef_AsPyObjectBorrow(seq); DEOPT_IF(!PyList_CheckExact(seq_o)); - DEOPT_IF(PyList_GET_SIZE(seq_o) != oparg); + #ifdef Py_GIL_DISABLED + PyCriticalSection cs; + PyCriticalSection_Begin(&cs, seq_o); + #endif + if (PyList_GET_SIZE(seq_o) != oparg) { + #ifdef Py_GIL_DISABLED + PyCriticalSection_End(&cs); + #endif + DEOPT_IF(true); + } STAT_INC(UNPACK_SEQUENCE, hit); PyObject **items = _PyList_ITEMS(seq_o); for (int i = oparg; --i >= 0; ) { *values++ = PyStackRef_FromPyObjectNew(items[i]); } + #ifdef Py_GIL_DISABLED + PyCriticalSection_End(&cs); + #endif DECREF_INPUTS(); } @@ -2490,13 +2500,7 @@ dummy_func( } inst(IS_OP, (left, right -- b)) { -#ifdef Py_GIL_DISABLED - // On free-threaded builds, objects are conditionally immortalized. - // So their bits don't always compare equally. int res = Py_Is(PyStackRef_AsPyObjectBorrow(left), PyStackRef_AsPyObjectBorrow(right)) ^ oparg; -#else - int res = PyStackRef_Is(left, right) ^ oparg; -#endif DECREF_INPUTS(); b = res ? PyStackRef_True : PyStackRef_False; } @@ -2525,7 +2529,7 @@ dummy_func( } OPCODE_DEFERRED_INC(CONTAINS_OP); ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); - #endif /* ENABLE_SPECIALIZATION */ + #endif /* ENABLE_SPECIALIZATION_FT */ } macro(CONTAINS_OP) = _SPECIALIZE_CONTAINS_OP + _CONTAINS_OP; @@ -2703,7 +2707,7 @@ dummy_func( replaced op(_POP_JUMP_IF_FALSE, (cond -- )) { assert(PyStackRef_BoolCheck(cond)); - int flag = PyStackRef_Is(cond, PyStackRef_False); + int flag = PyStackRef_IsFalse(cond); DEAD(cond); RECORD_BRANCH_TAKEN(this_instr[1].cache, flag); JUMPBY(oparg * flag); @@ -2711,14 +2715,14 @@ dummy_func( replaced op(_POP_JUMP_IF_TRUE, (cond -- )) { assert(PyStackRef_BoolCheck(cond)); - int flag = PyStackRef_Is(cond, PyStackRef_True); + int flag = PyStackRef_IsTrue(cond); DEAD(cond); RECORD_BRANCH_TAKEN(this_instr[1].cache, flag); JUMPBY(oparg * flag); } op(_IS_NONE, (value -- b)) { - if (PyStackRef_Is(value, PyStackRef_None)) { + if (PyStackRef_IsNone(value)) { b = PyStackRef_True; DEAD(value); } @@ -3762,7 +3766,7 @@ dummy_func( inst(EXIT_INIT_CHECK, (should_be_none -- )) { assert(STACK_LEVEL() == 2); - if (!PyStackRef_Is(should_be_none, PyStackRef_None)) { + if (!PyStackRef_IsNone(should_be_none)) { PyErr_Format(PyExc_TypeError, "__init__() should return None, not '%.200s'", Py_TYPE(PyStackRef_AsPyObjectBorrow(should_be_none))->tp_name); @@ -4722,7 +4726,7 @@ dummy_func( inst(INSTRUMENTED_POP_JUMP_IF_TRUE, (unused/1 -- )) { _PyStackRef cond = POP(); assert(PyStackRef_BoolCheck(cond)); - int flag = PyStackRef_Is(cond, PyStackRef_True); + int flag = PyStackRef_IsTrue(cond); int offset = flag * oparg; RECORD_BRANCH_TAKEN(this_instr[1].cache, flag); INSTRUMENTED_JUMP(this_instr, next_instr + offset, PY_MONITORING_EVENT_BRANCH); @@ -4731,7 +4735,7 @@ dummy_func( inst(INSTRUMENTED_POP_JUMP_IF_FALSE, (unused/1 -- )) { _PyStackRef cond = POP(); assert(PyStackRef_BoolCheck(cond)); - int flag = PyStackRef_Is(cond, PyStackRef_False); + int flag = PyStackRef_IsFalse(cond); int offset = flag * oparg; RECORD_BRANCH_TAKEN(this_instr[1].cache, flag); INSTRUMENTED_JUMP(this_instr, next_instr + offset, PY_MONITORING_EVENT_BRANCH); @@ -4739,7 +4743,7 @@ dummy_func( inst(INSTRUMENTED_POP_JUMP_IF_NONE, (unused/1 -- )) { _PyStackRef value_stackref = POP(); - int flag = PyStackRef_Is(value_stackref, PyStackRef_None); + int flag = PyStackRef_IsNone(value_stackref); int offset; if (flag) { offset = oparg; @@ -4755,7 +4759,7 @@ dummy_func( inst(INSTRUMENTED_POP_JUMP_IF_NOT_NONE, (unused/1 -- )) { _PyStackRef value_stackref = POP(); int offset; - int nflag = PyStackRef_Is(value_stackref, PyStackRef_None); + int nflag = PyStackRef_IsNone(value_stackref); if (nflag) { offset = 0; } @@ -4790,21 +4794,21 @@ dummy_func( ///////// Tier-2 only opcodes ///////// op (_GUARD_IS_TRUE_POP, (flag -- )) { - int is_true = PyStackRef_Is(flag, PyStackRef_True); + int is_true = PyStackRef_IsTrue(flag); DEAD(flag); SYNC_SP(); EXIT_IF(!is_true); } op (_GUARD_IS_FALSE_POP, (flag -- )) { - int is_false = PyStackRef_Is(flag, PyStackRef_False); + int is_false = PyStackRef_IsFalse(flag); DEAD(flag); SYNC_SP(); EXIT_IF(!is_false); } op (_GUARD_IS_NONE_POP, (val -- )) { - int is_none = PyStackRef_Is(val, PyStackRef_None); + int is_none = PyStackRef_IsNone(val); if (!is_none) { PyStackRef_CLOSE(val); SYNC_SP(); @@ -4814,7 +4818,7 @@ dummy_func( } op (_GUARD_IS_NOT_NONE_POP, (val -- )) { - int is_none = PyStackRef_Is(val, PyStackRef_None); + int is_none = PyStackRef_IsNone(val); PyStackRef_CLOSE(val); SYNC_SP(); EXIT_IF(is_none); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 5c7138a94214a8..5af970ec4ae219 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -445,7 +445,7 @@ _PyStackRef res; value = stack_pointer[-1]; assert(PyStackRef_BoolCheck(value)); - res = PyStackRef_Is(value, PyStackRef_False) + res = PyStackRef_IsFalse(value) ? PyStackRef_True : PyStackRef_False; stack_pointer[-1] = res; break; @@ -519,7 +519,7 @@ _PyStackRef res; value = stack_pointer[-1]; // This one is a bit weird, because we expect *some* failures: - if (!PyStackRef_Is(value, PyStackRef_None)) { + if (!PyStackRef_IsNone(value)) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } @@ -822,9 +822,7 @@ _PyStackRef left; right = stack_pointer[-1]; left = stack_pointer[-2]; - #ifndef NDEBUG PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); - #endif PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); int next_oparg; #if TIER_ONE @@ -834,7 +832,7 @@ next_oparg = CURRENT_OPERAND0(); #endif _PyStackRef *target_local = &GETLOCAL(next_oparg); - if (!PyStackRef_Is(*target_local, left)) { + if (PyStackRef_AsPyObjectBorrow(*target_local) != left_o) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } @@ -1522,7 +1520,7 @@ _PyErr_StackItem *exc_info = tstate->exc_info; _PyFrame_SetStackPointer(frame, stack_pointer); Py_XSETREF(exc_info->exc_value, - PyStackRef_Is(exc_value, PyStackRef_None) + PyStackRef_IsNone(exc_value) ? NULL : PyStackRef_AsPyObjectSteal(exc_value)); stack_pointer = _PyFrame_GetStackPointer(frame); stack_pointer += -1; @@ -1711,15 +1709,33 @@ UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } + #ifdef Py_GIL_DISABLED + PyCriticalSection cs; + _PyFrame_SetStackPointer(frame, stack_pointer); + PyCriticalSection_Begin(&cs, seq_o); + stack_pointer = _PyFrame_GetStackPointer(frame); + #endif if (PyList_GET_SIZE(seq_o) != oparg) { - UOP_STAT_INC(uopcode, miss); - JUMP_TO_JUMP_TARGET(); + #ifdef Py_GIL_DISABLED + _PyFrame_SetStackPointer(frame, stack_pointer); + PyCriticalSection_End(&cs); + stack_pointer = _PyFrame_GetStackPointer(frame); + #endif + if (true) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } } STAT_INC(UNPACK_SEQUENCE, hit); PyObject **items = _PyList_ITEMS(seq_o); for (int i = oparg; --i >= 0; ) { *values++ = PyStackRef_FromPyObjectNew(items[i]); } + #ifdef Py_GIL_DISABLED + _PyFrame_SetStackPointer(frame, stack_pointer); + PyCriticalSection_End(&cs); + stack_pointer = _PyFrame_GetStackPointer(frame); + #endif PyStackRef_CLOSE(seq); stack_pointer += -1 + oparg; assert(WITHIN_STACK_BOUNDS()); @@ -3092,13 +3108,7 @@ oparg = CURRENT_OPARG(); right = stack_pointer[-1]; left = stack_pointer[-2]; - #ifdef Py_GIL_DISABLED - // On free-threaded builds, objects are conditionally immortalized. - // So their bits don't always compare equally. int res = Py_Is(PyStackRef_AsPyObjectBorrow(left), PyStackRef_AsPyObjectBorrow(right)) ^ oparg; - #else - int res = PyStackRef_Is(left, right) ^ oparg; - #endif PyStackRef_CLOSE(left); PyStackRef_CLOSE(right); b = res ? PyStackRef_True : PyStackRef_False; @@ -3302,7 +3312,7 @@ _PyStackRef value; _PyStackRef b; value = stack_pointer[-1]; - if (PyStackRef_Is(value, PyStackRef_None)) { + if (PyStackRef_IsNone(value)) { b = PyStackRef_True; } else { @@ -4544,7 +4554,7 @@ _PyStackRef should_be_none; should_be_none = stack_pointer[-1]; assert(STACK_LEVEL() == 2); - if (!PyStackRef_Is(should_be_none, PyStackRef_None)) { + if (!PyStackRef_IsNone(should_be_none)) { _PyFrame_SetStackPointer(frame, stack_pointer); PyErr_Format(PyExc_TypeError, "__init__() should return None, not '%.200s'", @@ -5625,7 +5635,7 @@ case _GUARD_IS_TRUE_POP: { _PyStackRef flag; flag = stack_pointer[-1]; - int is_true = PyStackRef_Is(flag, PyStackRef_True); + int is_true = PyStackRef_IsTrue(flag); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); if (!is_true) { @@ -5638,7 +5648,7 @@ case _GUARD_IS_FALSE_POP: { _PyStackRef flag; flag = stack_pointer[-1]; - int is_false = PyStackRef_Is(flag, PyStackRef_False); + int is_false = PyStackRef_IsFalse(flag); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); if (!is_false) { @@ -5651,7 +5661,7 @@ case _GUARD_IS_NONE_POP: { _PyStackRef val; val = stack_pointer[-1]; - int is_none = PyStackRef_Is(val, PyStackRef_None); + int is_none = PyStackRef_IsNone(val); if (!is_none) { PyStackRef_CLOSE(val); stack_pointer += -1; @@ -5669,7 +5679,7 @@ case _GUARD_IS_NOT_NONE_POP: { _PyStackRef val; val = stack_pointer[-1]; - int is_none = PyStackRef_Is(val, PyStackRef_None); + int is_none = PyStackRef_IsNone(val); PyStackRef_CLOSE(val); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); diff --git a/Python/frozen.c b/Python/frozen.c index 627f2ff9413562..15d256b6743e0a 100644 --- a/Python/frozen.c +++ b/Python/frozen.c @@ -84,7 +84,6 @@ static const struct _frozen stdlib_modules[] = { {"genericpath", _Py_M__genericpath, (int)sizeof(_Py_M__genericpath), false}, {"ntpath", _Py_M__ntpath, (int)sizeof(_Py_M__ntpath), false}, {"posixpath", _Py_M__posixpath, (int)sizeof(_Py_M__posixpath), false}, - {"os.path", _Py_M__posixpath, (int)sizeof(_Py_M__posixpath), false}, {"os", _Py_M__os, (int)sizeof(_Py_M__os), false}, {"site", _Py_M__site, (int)sizeof(_Py_M__site), false}, {"stat", _Py_M__stat, (int)sizeof(_Py_M__stat), false}, @@ -116,7 +115,6 @@ const struct _frozen *_PyImport_FrozenTest = test_modules; static const struct _module_alias aliases[] = { {"_frozen_importlib", "importlib._bootstrap"}, {"_frozen_importlib_external", "importlib._bootstrap_external"}, - {"os.path", "posixpath"}, {"__hello_alias__", "__hello__"}, {"__phello_alias__", "__hello__"}, {"__phello_alias__.spam", "__hello__"}, diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 13947849942cd4..36ec727eed3fc9 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -183,9 +183,7 @@ /* Skip 1 cache entry */ // _BINARY_OP_INPLACE_ADD_UNICODE { - #ifndef NDEBUG PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); - #endif PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); int next_oparg; #if TIER_ONE @@ -195,7 +193,7 @@ next_oparg = CURRENT_OPERAND0(); #endif _PyStackRef *target_local = &GETLOCAL(next_oparg); - DEOPT_IF(!PyStackRef_Is(*target_local, left), BINARY_OP); + DEOPT_IF(PyStackRef_AsPyObjectBorrow(*target_local) != left_o, BINARY_OP); STAT_INC(BINARY_OP, hit); /* Handle `left = left + right` or `left += right` for str. * @@ -3405,7 +3403,7 @@ } OPCODE_DEFERRED_INC(CONTAINS_OP); ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); - #endif /* ENABLE_SPECIALIZATION */ + #endif /* ENABLE_SPECIALIZATION_FT */ } // _CONTAINS_OP { @@ -3824,7 +3822,7 @@ _PyStackRef should_be_none; should_be_none = stack_pointer[-1]; assert(STACK_LEVEL() == 2); - if (!PyStackRef_Is(should_be_none, PyStackRef_None)) { + if (!PyStackRef_IsNone(should_be_none)) { _PyFrame_SetStackPointer(frame, stack_pointer); PyErr_Format(PyExc_TypeError, "__init__() should return None, not '%.200s'", @@ -4760,7 +4758,7 @@ /* Skip 1 cache entry */ _PyStackRef cond = POP(); assert(PyStackRef_BoolCheck(cond)); - int flag = PyStackRef_Is(cond, PyStackRef_False); + int flag = PyStackRef_IsFalse(cond); int offset = flag * oparg; RECORD_BRANCH_TAKEN(this_instr[1].cache, flag); INSTRUMENTED_JUMP(this_instr, next_instr + offset, PY_MONITORING_EVENT_BRANCH); @@ -4774,7 +4772,7 @@ INSTRUCTION_STATS(INSTRUMENTED_POP_JUMP_IF_NONE); /* Skip 1 cache entry */ _PyStackRef value_stackref = POP(); - int flag = PyStackRef_Is(value_stackref, PyStackRef_None); + int flag = PyStackRef_IsNone(value_stackref); int offset; if (flag) { offset = oparg; @@ -4796,7 +4794,7 @@ /* Skip 1 cache entry */ _PyStackRef value_stackref = POP(); int offset; - int nflag = PyStackRef_Is(value_stackref, PyStackRef_None); + int nflag = PyStackRef_IsNone(value_stackref); if (nflag) { offset = 0; } @@ -4819,7 +4817,7 @@ /* Skip 1 cache entry */ _PyStackRef cond = POP(); assert(PyStackRef_BoolCheck(cond)); - int flag = PyStackRef_Is(cond, PyStackRef_True); + int flag = PyStackRef_IsTrue(cond); int offset = flag * oparg; RECORD_BRANCH_TAKEN(this_instr[1].cache, flag); INSTRUMENTED_JUMP(this_instr, next_instr + offset, PY_MONITORING_EVENT_BRANCH); @@ -5040,13 +5038,7 @@ _PyStackRef b; right = stack_pointer[-1]; left = stack_pointer[-2]; - #ifdef Py_GIL_DISABLED - // On free-threaded builds, objects are conditionally immortalized. - // So their bits don't always compare equally. int res = Py_Is(PyStackRef_AsPyObjectBorrow(left), PyStackRef_AsPyObjectBorrow(right)) ^ oparg; - #else - int res = PyStackRef_Is(left, right) ^ oparg; - #endif PyStackRef_CLOSE(left); PyStackRef_CLOSE(right); b = res ? PyStackRef_True : PyStackRef_False; @@ -6663,7 +6655,7 @@ _PyErr_StackItem *exc_info = tstate->exc_info; _PyFrame_SetStackPointer(frame, stack_pointer); Py_XSETREF(exc_info->exc_value, - PyStackRef_Is(exc_value, PyStackRef_None) + PyStackRef_IsNone(exc_value) ? NULL : PyStackRef_AsPyObjectSteal(exc_value)); stack_pointer = _PyFrame_GetStackPointer(frame); stack_pointer += -1; @@ -6680,7 +6672,7 @@ /* Skip 1 cache entry */ cond = stack_pointer[-1]; assert(PyStackRef_BoolCheck(cond)); - int flag = PyStackRef_Is(cond, PyStackRef_False); + int flag = PyStackRef_IsFalse(cond); RECORD_BRANCH_TAKEN(this_instr[1].cache, flag); JUMPBY(oparg * flag); stack_pointer += -1; @@ -6700,7 +6692,7 @@ // _IS_NONE { value = stack_pointer[-1]; - if (PyStackRef_Is(value, PyStackRef_None)) { + if (PyStackRef_IsNone(value)) { b = PyStackRef_True; } else { @@ -6712,7 +6704,7 @@ { cond = b; assert(PyStackRef_BoolCheck(cond)); - int flag = PyStackRef_Is(cond, PyStackRef_True); + int flag = PyStackRef_IsTrue(cond); RECORD_BRANCH_TAKEN(this_instr[1].cache, flag); JUMPBY(oparg * flag); } @@ -6733,7 +6725,7 @@ // _IS_NONE { value = stack_pointer[-1]; - if (PyStackRef_Is(value, PyStackRef_None)) { + if (PyStackRef_IsNone(value)) { b = PyStackRef_True; } else { @@ -6745,7 +6737,7 @@ { cond = b; assert(PyStackRef_BoolCheck(cond)); - int flag = PyStackRef_Is(cond, PyStackRef_False); + int flag = PyStackRef_IsFalse(cond); RECORD_BRANCH_TAKEN(this_instr[1].cache, flag); JUMPBY(oparg * flag); } @@ -6763,7 +6755,7 @@ /* Skip 1 cache entry */ cond = stack_pointer[-1]; assert(PyStackRef_BoolCheck(cond)); - int flag = PyStackRef_Is(cond, PyStackRef_True); + int flag = PyStackRef_IsTrue(cond); RECORD_BRANCH_TAKEN(this_instr[1].cache, flag); JUMPBY(oparg * flag); stack_pointer += -1; @@ -7100,7 +7092,7 @@ gen_frame->previous = frame; DISPATCH_INLINED(gen_frame); } - if (PyStackRef_Is(v, PyStackRef_None) && PyIter_Check(receiver_o)) { + if (PyStackRef_IsNone(v) && PyIter_Check(receiver_o)) { _PyFrame_SetStackPointer(frame, stack_pointer); retval_o = Py_TYPE(receiver_o)->tp_iternext(receiver_o); stack_pointer = _PyFrame_GetStackPointer(frame); @@ -7880,7 +7872,7 @@ /* Skip 2 cache entries */ value = stack_pointer[-1]; // This one is a bit weird, because we expect *some* failures: - DEOPT_IF(!PyStackRef_Is(value, PyStackRef_None), TO_BOOL); + DEOPT_IF(!PyStackRef_IsNone(value), TO_BOOL); STAT_INC(TO_BOOL, hit); res = PyStackRef_False; stack_pointer[-1] = res; @@ -7955,7 +7947,7 @@ _PyStackRef res; value = stack_pointer[-1]; assert(PyStackRef_BoolCheck(value)); - res = PyStackRef_Is(value, PyStackRef_False) + res = PyStackRef_IsFalse(value) ? PyStackRef_True : PyStackRef_False; stack_pointer[-1] = res; DISPATCH(); @@ -7994,7 +7986,7 @@ seq = stack_pointer[-1]; uint16_t counter = read_u16(&this_instr[1].cache); (void)counter; - #if ENABLE_SPECIALIZATION + #if ENABLE_SPECIALIZATION_FT if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; _PyFrame_SetStackPointer(frame, stack_pointer); @@ -8004,7 +7996,7 @@ } OPCODE_DEFERRED_INC(UNPACK_SEQUENCE); ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); - #endif /* ENABLE_SPECIALIZATION */ + #endif /* ENABLE_SPECIALIZATION_FT */ (void)seq; (void)counter; } @@ -8035,12 +8027,30 @@ values = &stack_pointer[-1]; PyObject *seq_o = PyStackRef_AsPyObjectBorrow(seq); DEOPT_IF(!PyList_CheckExact(seq_o), UNPACK_SEQUENCE); - DEOPT_IF(PyList_GET_SIZE(seq_o) != oparg, UNPACK_SEQUENCE); + #ifdef Py_GIL_DISABLED + PyCriticalSection cs; + _PyFrame_SetStackPointer(frame, stack_pointer); + PyCriticalSection_Begin(&cs, seq_o); + stack_pointer = _PyFrame_GetStackPointer(frame); + #endif + if (PyList_GET_SIZE(seq_o) != oparg) { + #ifdef Py_GIL_DISABLED + _PyFrame_SetStackPointer(frame, stack_pointer); + PyCriticalSection_End(&cs); + stack_pointer = _PyFrame_GetStackPointer(frame); + #endif + DEOPT_IF(true, UNPACK_SEQUENCE); + } STAT_INC(UNPACK_SEQUENCE, hit); PyObject **items = _PyList_ITEMS(seq_o); for (int i = oparg; --i >= 0; ) { *values++ = PyStackRef_FromPyObjectNew(items[i]); } + #ifdef Py_GIL_DISABLED + _PyFrame_SetStackPointer(frame, stack_pointer); + PyCriticalSection_End(&cs); + stack_pointer = _PyFrame_GetStackPointer(frame); + #endif PyStackRef_CLOSE(seq); stack_pointer += -1 + oparg; assert(WITHIN_STACK_BOUNDS()); diff --git a/Python/pystate.c b/Python/pystate.c index 975eb6d4fbd0f2..3ceae229f75cd0 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -1779,7 +1779,9 @@ tstate_delete_common(PyThreadState *tstate, int release_gil) if (tstate->_status.bound_gilstate) { unbind_gilstate_tstate(tstate); } - unbind_tstate(tstate); + if (tstate->_status.bound) { + unbind_tstate(tstate); + } // XXX Move to PyThreadState_Clear()? clear_datastack(tstate); diff --git a/Python/specialize.c b/Python/specialize.c index c69f61c8b449a1..c1f4b0601cc8d5 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -715,6 +715,7 @@ specialize(_Py_CODEUNIT *instr, uint8_t specialized_opcode) SPEC_FAIL_OTHER); return; } + STAT_INC(_PyOpcode_Deopt[specialized_opcode], success); set_counter((_Py_BackoffCounter *)instr + 1, adaptive_counter_cooldown()); } @@ -2487,39 +2488,33 @@ _Py_Specialize_UnpackSequence(_PyStackRef seq_st, _Py_CODEUNIT *instr, int oparg { PyObject *seq = PyStackRef_AsPyObjectBorrow(seq_st); - assert(ENABLE_SPECIALIZATION); + assert(ENABLE_SPECIALIZATION_FT); assert(_PyOpcode_Caches[UNPACK_SEQUENCE] == INLINE_CACHE_ENTRIES_UNPACK_SEQUENCE); - _PyUnpackSequenceCache *cache = (_PyUnpackSequenceCache *)(instr + 1); if (PyTuple_CheckExact(seq)) { if (PyTuple_GET_SIZE(seq) != oparg) { SPECIALIZATION_FAIL(UNPACK_SEQUENCE, SPEC_FAIL_EXPECTED_ERROR); - goto failure; + unspecialize(instr); + return; } if (PyTuple_GET_SIZE(seq) == 2) { - instr->op.code = UNPACK_SEQUENCE_TWO_TUPLE; - goto success; + specialize(instr, UNPACK_SEQUENCE_TWO_TUPLE); + return; } - instr->op.code = UNPACK_SEQUENCE_TUPLE; - goto success; + specialize(instr, UNPACK_SEQUENCE_TUPLE); + return; } if (PyList_CheckExact(seq)) { if (PyList_GET_SIZE(seq) != oparg) { SPECIALIZATION_FAIL(UNPACK_SEQUENCE, SPEC_FAIL_EXPECTED_ERROR); - goto failure; + unspecialize(instr); + return; } - instr->op.code = UNPACK_SEQUENCE_LIST; - goto success; + specialize(instr, UNPACK_SEQUENCE_LIST); + return; } SPECIALIZATION_FAIL(UNPACK_SEQUENCE, unpack_sequence_fail_kind(seq)); -failure: - STAT_INC(UNPACK_SEQUENCE, failure); - instr->op.code = UNPACK_SEQUENCE; - cache->counter = adaptive_counter_backoff(cache->counter); - return; -success: - STAT_INC(UNPACK_SEQUENCE, success); - cache->counter = adaptive_counter_cooldown(); + unspecialize(instr); } #ifdef Py_STATS diff --git a/Tools/build/freeze_modules.py b/Tools/build/freeze_modules.py index 7f1dee18319749..8f74abdc83db47 100644 --- a/Tools/build/freeze_modules.py +++ b/Tools/build/freeze_modules.py @@ -63,9 +63,6 @@ 'genericpath', 'ntpath', 'posixpath', - # We must explicitly mark os.path as a frozen module - # even though it will never be imported. - f'{OS_PATH} : os.path', 'os', 'site', 'stat', diff --git a/Tools/c-analyzer/cpython/_analyzer.py b/Tools/c-analyzer/cpython/_analyzer.py index f07fa8af495e17..6204353e9bd26a 100644 --- a/Tools/c-analyzer/cpython/_analyzer.py +++ b/Tools/c-analyzer/cpython/_analyzer.py @@ -280,12 +280,26 @@ def _is_kwlist(decl): vartype = ''.join(str(decl.vartype).split()) return vartype == 'char*[]' +def _is_local_static_mutex(decl): + if not hasattr(decl, "vartype"): + return False + + if not hasattr(decl, "parent") or decl.parent is None: + # We only want to allow local variables + return False + + vartype = decl.vartype + return (vartype.typespec == 'PyMutex') and (decl.storage == 'static') def _has_other_supported_type(decl): if hasattr(decl, 'file') and decl.file.filename.endswith('.c.h'): assert 'clinic' in decl.file.filename, (decl,) if decl.name == '_kwtuple': return True + if _is_local_static_mutex(decl): + # GH-127081: Local static mutexes are used to + # wrap libc functions that aren't thread safe + return True vartype = str(decl.vartype).split() if vartype[0] == 'struct': vartype = vartype[1:] diff --git a/Tools/c-analyzer/cpython/ignored.tsv b/Tools/c-analyzer/cpython/ignored.tsv index 4327a111eedbaf..686f3935d91bda 100644 --- a/Tools/c-analyzer/cpython/ignored.tsv +++ b/Tools/c-analyzer/cpython/ignored.tsv @@ -739,7 +739,6 @@ Modules/expat/xmlrole.c - declClose - Modules/expat/xmlrole.c - error - ## other -Modules/grpmodule.c grp_getgrall_impl getgrall_mutex - Modules/_io/_iomodule.c - _PyIO_Module - Modules/_sqlite/module.c - _sqlite3module - Modules/clinic/md5module.c.h _md5_md5 _keywords - diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index e02e07ec748231..eca851e6de87ae 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -548,7 +548,10 @@ def has_error_without_pop(op: parser.InstDef) -> bool: "PyStackRef_FromPyObjectImmortal", "PyStackRef_FromPyObjectNew", "PyStackRef_FromPyObjectSteal", - "PyStackRef_Is", + "PyStackRef_IsExactly", + "PyStackRef_IsNone", + "PyStackRef_IsTrue", + "PyStackRef_IsFalse", "PyStackRef_IsNull", "PyStackRef_None", "PyStackRef_TYPE", diff --git a/Tools/i18n/pygettext.py b/Tools/i18n/pygettext.py index 0d16e8f7da0071..f78ff16bff9039 100755 --- a/Tools/i18n/pygettext.py +++ b/Tools/i18n/pygettext.py @@ -163,16 +163,13 @@ import time import getopt import ast -import token import tokenize +from collections import defaultdict +from dataclasses import dataclass, field +from operator import itemgetter __version__ = '1.5' -default_keywords = ['_'] -DEFAULTKEYWORDS = ', '.join(default_keywords) - -EMPTYSTRING = '' - # The normal pot-file header. msgmerge and Emacs's po-mode work better if it's # there. @@ -306,12 +303,64 @@ def getFilesForName(name): return [] +# Key is the function name, value is a dictionary mapping argument positions to the +# type of the argument. The type is one of 'msgid', 'msgid_plural', or 'msgctxt'. +DEFAULTKEYWORDS = { + '_': {0: 'msgid'}, + 'gettext': {0: 'msgid'}, + 'ngettext': {0: 'msgid', 1: 'msgid_plural'}, + 'pgettext': {0: 'msgctxt', 1: 'msgid'}, + 'npgettext': {0: 'msgctxt', 1: 'msgid', 2: 'msgid_plural'}, + 'dgettext': {1: 'msgid'}, + 'dngettext': {1: 'msgid', 2: 'msgid_plural'}, + 'dpgettext': {1: 'msgctxt', 2: 'msgid'}, + 'dnpgettext': {1: 'msgctxt', 2: 'msgid', 3: 'msgid_plural'}, +} + + +def matches_spec(message, spec): + """Check if a message has all the keys defined by the keyword spec.""" + return all(key in message for key in spec.values()) + + +@dataclass(frozen=True) +class Location: + filename: str + lineno: int + + def __lt__(self, other): + return (self.filename, self.lineno) < (other.filename, other.lineno) + + +@dataclass +class Message: + msgid: str + msgid_plural: str | None + msgctxt: str | None + locations: set[Location] = field(default_factory=set) + is_docstring: bool = False + + def add_location(self, filename, lineno, msgid_plural=None, *, is_docstring=False): + if self.msgid_plural is None: + self.msgid_plural = msgid_plural + self.locations.add(Location(filename, lineno)) + self.is_docstring |= is_docstring + + +def key_for(msgid, msgctxt=None): + if msgctxt is not None: + return (msgctxt, msgid) + return msgid + + class TokenEater: def __init__(self, options): self.__options = options self.__messages = {} self.__state = self.__waiting - self.__data = [] + self.__data = defaultdict(str) + self.__curr_arg = 0 + self.__curr_keyword = None self.__lineno = -1 self.__freshmodule = 1 self.__curfile = None @@ -331,7 +380,7 @@ def __waiting(self, ttype, tstring, lineno): # module docstring? if self.__freshmodule: if ttype == tokenize.STRING and is_literal_string(tstring): - self.__addentry(safe_eval(tstring), lineno, isdocstring=1) + self.__addentry({'msgid': safe_eval(tstring)}, lineno, is_docstring=True) self.__freshmodule = 0 return if ttype in (tokenize.COMMENT, tokenize.NL, tokenize.ENCODING): @@ -346,6 +395,7 @@ def __waiting(self, ttype, tstring, lineno): return if ttype == tokenize.NAME and tstring in opts.keywords: self.__state = self.__keywordseen + self.__curr_keyword = tstring return if ttype == tokenize.STRING: maybe_fstring = ast.parse(tstring, mode='eval').body @@ -397,7 +447,8 @@ def __waiting(self, ttype, tstring, lineno): }, file=sys.stderr) continue if isinstance(arg.value, str): - self.__addentry(arg.value, lineno) + self.__curr_keyword = func_name + self.__addentry({'msgid': arg.value}, lineno) def __suiteseen(self, ttype, tstring, lineno): # skip over any enclosure pairs until we see the colon @@ -413,7 +464,7 @@ def __suiteseen(self, ttype, tstring, lineno): def __suitedocstring(self, ttype, tstring, lineno): # ignore any intervening noise if ttype == tokenize.STRING and is_literal_string(tstring): - self.__addentry(safe_eval(tstring), lineno, isdocstring=1) + self.__addentry({'msgid': safe_eval(tstring)}, lineno, is_docstring=True) self.__state = self.__waiting elif ttype not in (tokenize.NEWLINE, tokenize.INDENT, tokenize.COMMENT): @@ -422,44 +473,90 @@ def __suitedocstring(self, ttype, tstring, lineno): def __keywordseen(self, ttype, tstring, lineno): if ttype == tokenize.OP and tstring == '(': - self.__data = [] + self.__data.clear() + self.__curr_arg = 0 + self.__enclosurecount = 0 self.__lineno = lineno self.__state = self.__openseen else: self.__state = self.__waiting def __openseen(self, ttype, tstring, lineno): - if ttype == tokenize.OP and tstring == ')': - # We've seen the last of the translatable strings. Record the - # line number of the first line of the strings and update the list - # of messages seen. Reset state for the next batch. If there - # were no strings inside _(), then just ignore this entry. - if self.__data: - self.__addentry(EMPTYSTRING.join(self.__data)) - self.__state = self.__waiting - elif ttype == tokenize.STRING and is_literal_string(tstring): - self.__data.append(safe_eval(tstring)) - elif ttype not in [tokenize.COMMENT, token.INDENT, token.DEDENT, - token.NEWLINE, tokenize.NL]: - # warn if we see anything else than STRING or whitespace - print(_( - '*** %(file)s:%(lineno)s: Seen unexpected token "%(token)s"' - ) % { - 'token': tstring, - 'file': self.__curfile, - 'lineno': self.__lineno - }, file=sys.stderr) - self.__state = self.__waiting + spec = self.__options.keywords[self.__curr_keyword] + arg_type = spec.get(self.__curr_arg) + expect_string_literal = arg_type is not None + + if ttype == tokenize.OP and self.__enclosurecount == 0: + if tstring == ')': + # We've seen the last of the translatable strings. Record the + # line number of the first line of the strings and update the list + # of messages seen. Reset state for the next batch. If there + # were no strings inside _(), then just ignore this entry. + if self.__data: + self.__addentry(self.__data) + self.__state = self.__waiting + return + elif tstring == ',': + # Advance to the next argument + self.__curr_arg += 1 + return + + if expect_string_literal: + if ttype == tokenize.STRING and is_literal_string(tstring): + self.__data[arg_type] += safe_eval(tstring) + elif ttype not in (tokenize.COMMENT, tokenize.INDENT, tokenize.DEDENT, + tokenize.NEWLINE, tokenize.NL): + # We are inside an argument which is a translatable string and + # we encountered a token that is not a string. This is an error. + self.warn_unexpected_token(tstring) + self.__enclosurecount = 0 + self.__state = self.__waiting + elif ttype == tokenize.OP: + if tstring in '([{': + self.__enclosurecount += 1 + elif tstring in ')]}': + self.__enclosurecount -= 1 def __ignorenext(self, ttype, tstring, lineno): self.__state = self.__waiting - def __addentry(self, msg, lineno=None, isdocstring=0): + def __addentry(self, msg, lineno=None, *, is_docstring=False): + msgid = msg.get('msgid') + if msgid in self.__options.toexclude: + return + if not is_docstring: + spec = self.__options.keywords[self.__curr_keyword] + if not matches_spec(msg, spec): + return if lineno is None: lineno = self.__lineno - if not msg in self.__options.toexclude: - entry = (self.__curfile, lineno) - self.__messages.setdefault(msg, {})[entry] = isdocstring + msgctxt = msg.get('msgctxt') + msgid_plural = msg.get('msgid_plural') + key = key_for(msgid, msgctxt) + if key in self.__messages: + self.__messages[key].add_location( + self.__curfile, + lineno, + msgid_plural, + is_docstring=is_docstring, + ) + else: + self.__messages[key] = Message( + msgid=msgid, + msgid_plural=msgid_plural, + msgctxt=msgctxt, + locations={Location(self.__curfile, lineno)}, + is_docstring=is_docstring, + ) + + def warn_unexpected_token(self, token): + print(_( + '*** %(file)s:%(lineno)s: Seen unexpected token "%(token)s"' + ) % { + 'token': token, + 'file': self.__curfile, + 'lineno': self.__lineno + }, file=sys.stderr) def set_filename(self, filename): self.__curfile = filename @@ -472,55 +569,54 @@ def write(self, fp): print(pot_header % {'time': timestamp, 'version': __version__, 'charset': encoding, 'encoding': '8bit'}, file=fp) - # Sort the entries. First sort each particular entry's keys, then - # sort all the entries by their first item. - reverse = {} - for k, v in self.__messages.items(): - keys = sorted(v.keys()) - reverse.setdefault(tuple(keys), []).append((k, v)) - rkeys = sorted(reverse.keys()) - for rkey in rkeys: - rentries = reverse[rkey] - rentries.sort() - for k, v in rentries: - # If the entry was gleaned out of a docstring, then add a - # comment stating so. This is to aid translators who may wish - # to skip translating some unimportant docstrings. - isdocstring = any(v.values()) - # k is the message string, v is a dictionary-set of (filename, - # lineno) tuples. We want to sort the entries in v first by - # file name and then by line number. - v = sorted(v.keys()) - if not options.writelocations: - pass + + # Sort locations within each message by filename and lineno + sorted_keys = [ + (key, sorted(msg.locations)) + for key, msg in self.__messages.items() + ] + # Sort messages by locations + # For example, a message with locations [('test.py', 1), ('test.py', 2)] will + # appear before a message with locations [('test.py', 1), ('test.py', 3)] + sorted_keys.sort(key=itemgetter(1)) + + for key, locations in sorted_keys: + msg = self.__messages[key] + if options.writelocations: # location comments are different b/w Solaris and GNU: - elif options.locationstyle == options.SOLARIS: - for filename, lineno in v: - d = {'filename': filename, 'lineno': lineno} - print(_( - '# File: %(filename)s, line: %(lineno)d') % d, file=fp) + if options.locationstyle == options.SOLARIS: + for location in locations: + print(f'# File: {location.filename}, line: {location.lineno}', file=fp) elif options.locationstyle == options.GNU: # fit as many locations on one line, as long as the # resulting line length doesn't exceed 'options.width' locline = '#:' - for filename, lineno in v: - d = {'filename': filename, 'lineno': lineno} - s = _(' %(filename)s:%(lineno)d') % d + for location in locations: + s = f' {location.filename}:{location.lineno}' if len(locline) + len(s) <= options.width: locline = locline + s else: print(locline, file=fp) - locline = "#:" + s + locline = f'#:{s}' if len(locline) > 2: print(locline, file=fp) - if isdocstring: - print('#, docstring', file=fp) - print('msgid', normalize(k, encoding), file=fp) + if msg.is_docstring: + # If the entry was gleaned out of a docstring, then add a + # comment stating so. This is to aid translators who may wish + # to skip translating some unimportant docstrings. + print('#, docstring', file=fp) + if msg.msgctxt is not None: + print('msgctxt', normalize(msg.msgctxt, encoding), file=fp) + print('msgid', normalize(msg.msgid, encoding), file=fp) + if msg.msgid_plural is not None: + print('msgid_plural', normalize(msg.msgid_plural, encoding), file=fp) + print('msgstr[0] ""', file=fp) + print('msgstr[1] ""\n', file=fp) + else: print('msgstr ""\n', file=fp) def main(): - global default_keywords try: opts, args = getopt.getopt( sys.argv[1:], @@ -557,7 +653,7 @@ class Options: locations = {'gnu' : options.GNU, 'solaris' : options.SOLARIS, } - + no_default_keywords = False # parse options for opt, arg in opts: if opt in ('-h', '--help'): @@ -573,7 +669,7 @@ class Options: elif opt in ('-k', '--keyword'): options.keywords.append(arg) elif opt in ('-K', '--no-default-keywords'): - default_keywords = [] + no_default_keywords = True elif opt in ('-n', '--add-location'): options.writelocations = 1 elif opt in ('--no-location',): @@ -613,7 +709,9 @@ class Options: make_escapes(not options.escape) # calculate all keywords - options.keywords.extend(default_keywords) + options.keywords = {kw: {0: 'msgid'} for kw in options.keywords} + if not no_default_keywords: + options.keywords |= DEFAULTKEYWORDS # initialize list of strings to exclude if options.excludefilename: