diff --git a/Include/cpython/unicodeobject.h b/Include/cpython/unicodeobject.h index fcd9c28e030c1f9..dc8f6437c0e2c4e 100644 --- a/Include/cpython/unicodeobject.h +++ b/Include/cpython/unicodeobject.h @@ -461,112 +461,6 @@ PyAPI_FUNC(const char *) PyUnicode_AsUTF8(PyObject *unicode); #define _PyUnicode_AsString PyUnicode_AsUTF8 -/* --- UTF-7 Codecs ------------------------------------------------------- */ - -PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF7( - PyObject *unicode, /* Unicode object */ - int base64SetO, /* Encode RFC2152 Set O characters in base64 */ - int base64WhiteSpace, /* Encode whitespace (sp, ht, nl, cr) in base64 */ - const char *errors /* error handling */ - ); - -/* --- UTF-8 Codecs ------------------------------------------------------- */ - -PyAPI_FUNC(PyObject*) _PyUnicode_AsUTF8String( - PyObject *unicode, - const char *errors); - -/* --- UTF-32 Codecs ------------------------------------------------------ */ - -PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF32( - PyObject *object, /* Unicode object */ - const char *errors, /* error handling */ - int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */ - ); - -/* --- UTF-16 Codecs ------------------------------------------------------ */ - -/* Returns a Python string object holding the UTF-16 encoded value of - the Unicode data. - - If byteorder is not 0, output is written according to the following - byte order: - - byteorder == -1: little endian - byteorder == 0: native byte order (writes a BOM mark) - byteorder == 1: big endian - - If byteorder is 0, the output string will always start with the - Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is - prepended. -*/ -PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF16( - PyObject* unicode, /* Unicode object */ - const char *errors, /* error handling */ - int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */ - ); - -/* --- Unicode-Escape Codecs ---------------------------------------------- */ - -/* Variant of PyUnicode_DecodeUnicodeEscape that supports partial decoding. */ -PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscapeStateful( - const char *string, /* Unicode-Escape encoded string */ - Py_ssize_t length, /* size of string */ - const char *errors, /* error handling */ - Py_ssize_t *consumed /* bytes consumed */ -); -/* Helper for PyUnicode_DecodeUnicodeEscape that detects invalid escape - chars. */ -PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscapeInternal( - const char *string, /* Unicode-Escape encoded string */ - Py_ssize_t length, /* size of string */ - const char *errors, /* error handling */ - Py_ssize_t *consumed, /* bytes consumed */ - const char **first_invalid_escape /* on return, points to first - invalid escaped char in - string. */ -); - -/* --- Raw-Unicode-Escape Codecs ---------------------------------------------- */ - -/* Variant of PyUnicode_DecodeRawUnicodeEscape that supports partial decoding. */ -PyAPI_FUNC(PyObject*) _PyUnicode_DecodeRawUnicodeEscapeStateful( - const char *string, /* Unicode-Escape encoded string */ - Py_ssize_t length, /* size of string */ - const char *errors, /* error handling */ - Py_ssize_t *consumed /* bytes consumed */ -); - -/* --- Latin-1 Codecs ----------------------------------------------------- */ - -PyAPI_FUNC(PyObject*) _PyUnicode_AsLatin1String( - PyObject* unicode, - const char* errors); - -/* --- ASCII Codecs ------------------------------------------------------- */ - -PyAPI_FUNC(PyObject*) _PyUnicode_AsASCIIString( - PyObject* unicode, - const char* errors); - -/* --- Character Map Codecs ----------------------------------------------- */ - -/* Translate an Unicode object by applying a character mapping table to - it and return the resulting Unicode object. - - The mapping table must map Unicode ordinal integers to Unicode strings, - Unicode ordinal integers or None (causing deletion of the character). - - Mapping tables may be dictionaries or sequences. Unmapped character - ordinals (ones which cause a LookupError) are left untouched and - are copied as-is. -*/ -PyAPI_FUNC(PyObject*) _PyUnicode_EncodeCharmap( - PyObject *unicode, /* Unicode object */ - PyObject *mapping, /* encoding mapping */ - const char *errors /* error handling */ - ); - /* --- Decimal Encoder ---------------------------------------------------- */ /* Coverts a Unicode object holding a decimal value to an ASCII string diff --git a/Include/internal/pycore_unicodeobject.h b/Include/internal/pycore_unicodeobject.h index da01f57f962793f..9e8e7255d99cc85 100644 --- a/Include/internal/pycore_unicodeobject.h +++ b/Include/internal/pycore_unicodeobject.h @@ -177,6 +177,106 @@ PyAPI_FUNC(int) _PyUnicode_FormatAdvancedWriter( Py_ssize_t start, Py_ssize_t end); +/* --- UTF-7 Codecs ------------------------------------------------------- */ + +extern PyObject* _PyUnicode_EncodeUTF7( + PyObject *unicode, /* Unicode object */ + int base64SetO, /* Encode RFC2152 Set O characters in base64 */ + int base64WhiteSpace, /* Encode whitespace (sp, ht, nl, cr) in base64 */ + const char *errors); /* error handling */ + +/* --- UTF-8 Codecs ------------------------------------------------------- */ + +PyAPI_FUNC(PyObject*) _PyUnicode_AsUTF8String( + PyObject *unicode, + const char *errors); + +/* --- UTF-32 Codecs ------------------------------------------------------ */ + +extern PyObject* _PyUnicode_EncodeUTF32( + PyObject *object, /* Unicode object */ + const char *errors, /* error handling */ + int byteorder); /* byteorder to use 0=BOM+native;-1=LE,1=BE */ + +/* --- UTF-16 Codecs ------------------------------------------------------ */ + +/* Returns a Python string object holding the UTF-16 encoded value of + the Unicode data. + + If byteorder is not 0, output is written according to the following + byte order: + + byteorder == -1: little endian + byteorder == 0: native byte order (writes a BOM mark) + byteorder == 1: big endian + + If byteorder is 0, the output string will always start with the + Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is + prepended. +*/ +extern PyObject* _PyUnicode_EncodeUTF16( + PyObject* unicode, /* Unicode object */ + const char *errors, /* error handling */ + int byteorder); /* byteorder to use 0=BOM+native;-1=LE,1=BE */ + +/* --- Unicode-Escape Codecs ---------------------------------------------- */ + +/* Variant of PyUnicode_DecodeUnicodeEscape that supports partial decoding. */ +extern PyObject* _PyUnicode_DecodeUnicodeEscapeStateful( + const char *string, /* Unicode-Escape encoded string */ + Py_ssize_t length, /* size of string */ + const char *errors, /* error handling */ + Py_ssize_t *consumed); /* bytes consumed */ + +/* Helper for PyUnicode_DecodeUnicodeEscape that detects invalid escape + chars. */ +extern PyObject* _PyUnicode_DecodeUnicodeEscapeInternal( + const char *string, /* Unicode-Escape encoded string */ + Py_ssize_t length, /* size of string */ + const char *errors, /* error handling */ + Py_ssize_t *consumed, /* bytes consumed */ + const char **first_invalid_escape); /* on return, points to first + invalid escaped char in + string. */ + +/* --- Raw-Unicode-Escape Codecs ---------------------------------------------- */ + +/* Variant of PyUnicode_DecodeRawUnicodeEscape that supports partial decoding. */ +extern PyObject* _PyUnicode_DecodeRawUnicodeEscapeStateful( + const char *string, /* Unicode-Escape encoded string */ + Py_ssize_t length, /* size of string */ + const char *errors, /* error handling */ + Py_ssize_t *consumed); /* bytes consumed */ + +/* --- Latin-1 Codecs ----------------------------------------------------- */ + +extern PyObject* _PyUnicode_AsLatin1String( + PyObject* unicode, + const char* errors); + +/* --- ASCII Codecs ------------------------------------------------------- */ + +extern PyObject* _PyUnicode_AsASCIIString( + PyObject* unicode, + const char* errors); + +/* --- Character Map Codecs ----------------------------------------------- */ + +/* Translate an Unicode object by applying a character mapping table to + it and return the resulting Unicode object. + + The mapping table must map Unicode ordinal integers to Unicode strings, + Unicode ordinal integers or None (causing deletion of the character). + + Mapping tables may be dictionaries or sequences. Unmapped character + ordinals (ones which cause a LookupError) are left untouched and + are copied as-is. +*/ +extern PyObject* _PyUnicode_EncodeCharmap( + PyObject *unicode, /* Unicode object */ + PyObject *mapping, /* encoding mapping */ + const char *errors); /* error handling */ + /* --- Methods & Slots ---------------------------------------------------- */ extern PyObject* _PyUnicode_JoinArray( diff --git a/Parser/string_parser.c b/Parser/string_parser.c index 20459e894634949..bc1f99d607ae4d6 100644 --- a/Parser/string_parser.c +++ b/Parser/string_parser.c @@ -1,6 +1,7 @@ #include #include +#include "pycore_unicodeobject.h" // _PyUnicode_DecodeUnicodeEscapeInternal() #include "tokenizer.h" #include "pegen.h"