diff --git a/Modules/ThirdParty/Expat/src/expat/COPYING b/Modules/ThirdParty/Expat/src/expat/COPYING index fc97b02d909..3c0142e71c8 100644 --- a/Modules/ThirdParty/Expat/src/expat/COPYING +++ b/Modules/ThirdParty/Expat/src/expat/COPYING @@ -1,5 +1,5 @@ -Copyright (c) 1998, 1999, 2000 Thai Open Source Software Center Ltd - and Clark Cooper +Copyright (c) 1998-2000 Thai Open Source Software Center Ltd and Clark Cooper +Copyright (c) 2001-2019 Expat maintainers Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the diff --git a/Modules/ThirdParty/Expat/src/expat/ascii.h b/Modules/ThirdParty/Expat/src/expat/ascii.h index 6376b1f311b..1f594d2e54b 100644 --- a/Modules/ThirdParty/Expat/src/expat/ascii.h +++ b/Modules/ThirdParty/Expat/src/expat/ascii.h @@ -1,6 +1,36 @@ /* -Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd -See the file COPYING for copying permission. + __ __ _ + ___\ \/ /_ __ __ _| |_ + / _ \\ /| '_ \ / _` | __| + | __// \| |_) | (_| | |_ + \___/_/\_\ .__/ \__,_|\__| + |_| XML parser + + Copyright (c) 1999-2000 Thai Open Source Software Center Ltd + Copyright (c) 2000 Clark Cooper + Copyright (c) 2002 Fred L. Drake, Jr. + Copyright (c) 2007 Karl Waclawek + Copyright (c) 2017 Sebastian Pipping + Licensed under the MIT license: + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to permit + persons to whom the Software is furnished to do so, subject to the + following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN + NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + USE OR OTHER DEALINGS IN THE SOFTWARE. */ #define ASCII_A 0x41 @@ -69,7 +99,7 @@ See the file COPYING for copying permission. #define ASCII_9 0x39 #define ASCII_TAB 0x09 -#define ASCII_SPACE 0x20 +#define ASCII_SPACE 0x20 #define ASCII_EXCL 0x21 #define ASCII_QUOT 0x22 #define ASCII_AMP 0x26 @@ -84,3 +114,10 @@ See the file COPYING for copying permission. #define ASCII_LSQB 0x5B #define ASCII_RSQB 0x5D #define ASCII_UNDERSCORE 0x5F +#define ASCII_LPAREN 0x28 +#define ASCII_RPAREN 0x29 +#define ASCII_FF 0x0C +#define ASCII_SLASH 0x2F +#define ASCII_HASH 0x23 +#define ASCII_PIPE 0x7C +#define ASCII_COMMA 0x2C diff --git a/Modules/ThirdParty/Expat/src/expat/asciitab.h b/Modules/ThirdParty/Expat/src/expat/asciitab.h index eb445cc52c1..af766fb2478 100644 --- a/Modules/ThirdParty/Expat/src/expat/asciitab.h +++ b/Modules/ThirdParty/Expat/src/expat/asciitab.h @@ -1,37 +1,66 @@ /* -Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd -See the file COPYING for copying permission. + __ __ _ + ___\ \/ /_ __ __ _| |_ + / _ \\ /| '_ \ / _` | __| + | __// \| |_) | (_| | |_ + \___/_/\_\ .__/ \__,_|\__| + |_| XML parser + + Copyright (c) 1997-2000 Thai Open Source Software Center Ltd + Copyright (c) 2000 Clark Cooper + Copyright (c) 2002 Fred L. Drake, Jr. + Copyright (c) 2017 Sebastian Pipping + Licensed under the MIT license: + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to permit + persons to whom the Software is furnished to do so, subject to the + following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN + NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* 0x00 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, -/* 0x04 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, -/* 0x08 */ BT_NONXML, BT_S, BT_LF, BT_NONXML, -/* 0x0C */ BT_NONXML, BT_CR, BT_NONXML, BT_NONXML, -/* 0x10 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, -/* 0x14 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, -/* 0x18 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, -/* 0x1C */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, -/* 0x20 */ BT_S, BT_EXCL, BT_QUOT, BT_NUM, -/* 0x24 */ BT_OTHER, BT_PERCNT, BT_AMP, BT_APOS, -/* 0x28 */ BT_LPAR, BT_RPAR, BT_AST, BT_PLUS, -/* 0x2C */ BT_COMMA, BT_MINUS, BT_NAME, BT_SOL, -/* 0x30 */ BT_DIGIT, BT_DIGIT, BT_DIGIT, BT_DIGIT, -/* 0x34 */ BT_DIGIT, BT_DIGIT, BT_DIGIT, BT_DIGIT, -/* 0x38 */ BT_DIGIT, BT_DIGIT, BT_COLON, BT_SEMI, -/* 0x3C */ BT_LT, BT_EQUALS, BT_GT, BT_QUEST, -/* 0x40 */ BT_OTHER, BT_HEX, BT_HEX, BT_HEX, -/* 0x44 */ BT_HEX, BT_HEX, BT_HEX, BT_NMSTRT, -/* 0x48 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0x4C */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0x50 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0x54 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0x58 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_LSQB, -/* 0x5C */ BT_OTHER, BT_RSQB, BT_OTHER, BT_NMSTRT, -/* 0x60 */ BT_OTHER, BT_HEX, BT_HEX, BT_HEX, -/* 0x64 */ BT_HEX, BT_HEX, BT_HEX, BT_NMSTRT, -/* 0x68 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0x6C */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0x70 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0x74 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0x78 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_OTHER, -/* 0x7C */ BT_VERBAR, BT_OTHER, BT_OTHER, BT_OTHER, + /* 0x04 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, + /* 0x08 */ BT_NONXML, BT_S, BT_LF, BT_NONXML, + /* 0x0C */ BT_NONXML, BT_CR, BT_NONXML, BT_NONXML, + /* 0x10 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, + /* 0x14 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, + /* 0x18 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, + /* 0x1C */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, + /* 0x20 */ BT_S, BT_EXCL, BT_QUOT, BT_NUM, + /* 0x24 */ BT_OTHER, BT_PERCNT, BT_AMP, BT_APOS, + /* 0x28 */ BT_LPAR, BT_RPAR, BT_AST, BT_PLUS, + /* 0x2C */ BT_COMMA, BT_MINUS, BT_NAME, BT_SOL, + /* 0x30 */ BT_DIGIT, BT_DIGIT, BT_DIGIT, BT_DIGIT, + /* 0x34 */ BT_DIGIT, BT_DIGIT, BT_DIGIT, BT_DIGIT, + /* 0x38 */ BT_DIGIT, BT_DIGIT, BT_COLON, BT_SEMI, + /* 0x3C */ BT_LT, BT_EQUALS, BT_GT, BT_QUEST, + /* 0x40 */ BT_OTHER, BT_HEX, BT_HEX, BT_HEX, + /* 0x44 */ BT_HEX, BT_HEX, BT_HEX, BT_NMSTRT, + /* 0x48 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0x4C */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0x50 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0x54 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0x58 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_LSQB, + /* 0x5C */ BT_OTHER, BT_RSQB, BT_OTHER, BT_NMSTRT, + /* 0x60 */ BT_OTHER, BT_HEX, BT_HEX, BT_HEX, + /* 0x64 */ BT_HEX, BT_HEX, BT_HEX, BT_NMSTRT, + /* 0x68 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0x6C */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0x70 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0x74 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0x78 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_OTHER, + /* 0x7C */ BT_VERBAR, BT_OTHER, BT_OTHER, BT_OTHER, diff --git a/Modules/ThirdParty/Expat/src/expat/expat.h b/Modules/ThirdParty/Expat/src/expat/expat.h index c48e67e1c80..b7d6d354801 100644 --- a/Modules/ThirdParty/Expat/src/expat/expat.h +++ b/Modules/ThirdParty/Expat/src/expat/expat.h @@ -1,34 +1,133 @@ /* -Copyright (c) 1998, 1999, 2000 Thai Open Source Software Center Ltd -See the file COPYING for copying permission. + __ __ _ + ___\ \/ /_ __ __ _| |_ + / _ \\ /| '_ \ / _` | __| + | __// \| |_) | (_| | |_ + \___/_/\_\ .__/ \__,_|\__| + |_| XML parser + + Copyright (c) 1997-2000 Thai Open Source Software Center Ltd + Copyright (c) 2000 Clark Cooper + Copyright (c) 2000-2005 Fred L. Drake, Jr. + Copyright (c) 2001-2002 Greg Stein + Copyright (c) 2002-2016 Karl Waclawek + Copyright (c) 2016-2021 Sebastian Pipping + Copyright (c) 2016 Cristian Rodríguez + Copyright (c) 2016 Thomas Beutlich + Copyright (c) 2017 Rhodri James + Licensed under the MIT license: + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to permit + persons to whom the Software is furnished to do so, subject to the + following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN + NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef XmlParse_INCLUDED -#define XmlParse_INCLUDED 1 +#ifndef Expat_INCLUDED +#define Expat_INCLUDED 1 #include - -#include "expatDllConfig.h" - -#if defined(_WIN32) && !defined(ITK_EXPAT_STATIC) -# if defined(ITKEXPAT_EXPORTS) -# define XMLPARSEAPI(type) __declspec( dllexport ) type __cdecl -# else -# define XMLPARSEAPI(type) __declspec( dllimport ) type __cdecl -# endif -#else -# define XMLPARSEAPI(type) type -#endif +#include "expat_external.h" #ifdef __cplusplus extern "C" { #endif -typedef void *XML_Parser; +struct XML_ParserStruct; +typedef struct XML_ParserStruct *XML_Parser; + +typedef unsigned char XML_Bool; +#define XML_TRUE ((XML_Bool)1) +#define XML_FALSE ((XML_Bool)0) + +/* The XML_Status enum gives the possible return values for several + API functions. The preprocessor #defines are included so this + stanza can be added to code that still needs to support older + versions of Expat 1.95.x: + + #ifndef XML_STATUS_OK + #define XML_STATUS_OK 1 + #define XML_STATUS_ERROR 0 + #endif -/* Information is UTF-8 encoded. */ -typedef char XML_Char; -typedef char XML_LChar; + Otherwise, the #define hackery is quite ugly and would have been + dropped. +*/ +enum XML_Status { + XML_STATUS_ERROR = 0, +#define XML_STATUS_ERROR XML_STATUS_ERROR + XML_STATUS_OK = 1, +#define XML_STATUS_OK XML_STATUS_OK + XML_STATUS_SUSPENDED = 2 +#define XML_STATUS_SUSPENDED XML_STATUS_SUSPENDED +}; + +enum XML_Error { + XML_ERROR_NONE, + XML_ERROR_NO_MEMORY, + XML_ERROR_SYNTAX, + XML_ERROR_NO_ELEMENTS, + XML_ERROR_INVALID_TOKEN, + XML_ERROR_UNCLOSED_TOKEN, + XML_ERROR_PARTIAL_CHAR, + XML_ERROR_TAG_MISMATCH, + XML_ERROR_DUPLICATE_ATTRIBUTE, + XML_ERROR_JUNK_AFTER_DOC_ELEMENT, + XML_ERROR_PARAM_ENTITY_REF, + XML_ERROR_UNDEFINED_ENTITY, + XML_ERROR_RECURSIVE_ENTITY_REF, + XML_ERROR_ASYNC_ENTITY, + XML_ERROR_BAD_CHAR_REF, + XML_ERROR_BINARY_ENTITY_REF, + XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF, + XML_ERROR_MISPLACED_XML_PI, + XML_ERROR_UNKNOWN_ENCODING, + XML_ERROR_INCORRECT_ENCODING, + XML_ERROR_UNCLOSED_CDATA_SECTION, + XML_ERROR_EXTERNAL_ENTITY_HANDLING, + XML_ERROR_NOT_STANDALONE, + XML_ERROR_UNEXPECTED_STATE, + XML_ERROR_ENTITY_DECLARED_IN_PE, + XML_ERROR_FEATURE_REQUIRES_XML_DTD, + XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING, + /* Added in 1.95.7. */ + XML_ERROR_UNBOUND_PREFIX, + /* Added in 1.95.8. */ + XML_ERROR_UNDECLARING_PREFIX, + XML_ERROR_INCOMPLETE_PE, + XML_ERROR_XML_DECL, + XML_ERROR_TEXT_DECL, + XML_ERROR_PUBLICID, + XML_ERROR_SUSPENDED, + XML_ERROR_NOT_SUSPENDED, + XML_ERROR_ABORTED, + XML_ERROR_FINISHED, + XML_ERROR_SUSPEND_PE, + /* Added in 2.0. */ + XML_ERROR_RESERVED_PREFIX_XML, + XML_ERROR_RESERVED_PREFIX_XMLNS, + XML_ERROR_RESERVED_NAMESPACE_URI, + /* Added in 2.2.1. */ + XML_ERROR_INVALID_ARGUMENT, + /* Added in 2.3.0. */ + XML_ERROR_NO_BUFFER, + /* Added in 2.4.0. */ + XML_ERROR_AMPLIFICATION_LIMIT_BREACH +}; enum XML_Content_Type { XML_CTYPE_EMPTY = 1, @@ -67,66 +166,54 @@ enum XML_Content_Quant { typedef struct XML_cp XML_Content; struct XML_cp { - enum XML_Content_Type type; - enum XML_Content_Quant quant; - XML_Char * name; - unsigned int numchildren; - XML_Content * children; + enum XML_Content_Type type; + enum XML_Content_Quant quant; + XML_Char *name; + unsigned int numchildren; + XML_Content *children; }; - /* This is called for an element declaration. See above for description of the model argument. It's the caller's responsibility to free model when finished with it. */ - -typedef void (*XML_ElementDeclHandler) (void *userData, - const XML_Char *name, - XML_Content *model); +typedef void(XMLCALL *XML_ElementDeclHandler)(void *userData, + const XML_Char *name, + XML_Content *model); XMLPARSEAPI(void) -XML_SetElementDeclHandler(XML_Parser parser, - XML_ElementDeclHandler eldecl); - -/* - The Attlist declaration handler is called for *each* attribute. So - a single Attlist declaration with multiple attributes declared will - generate multiple calls to this handler. The "default" parameter - may be NULL in the case of the "#IMPLIED" or "#REQUIRED" keyword. - The "isrequired" parameter will be true and the default value will - be NULL in the case of "#REQUIRED". If "isrequired" is true and - default is non-NULL, then this is a "#FIXED" default. - */ - -typedef void (*XML_AttlistDeclHandler) (void *userData, - const XML_Char *elname, - const XML_Char *attname, - const XML_Char *att_type, - const XML_Char *dflt, - int isrequired); +XML_SetElementDeclHandler(XML_Parser parser, XML_ElementDeclHandler eldecl); + +/* The Attlist declaration handler is called for *each* attribute. So + a single Attlist declaration with multiple attributes declared will + generate multiple calls to this handler. The "default" parameter + may be NULL in the case of the "#IMPLIED" or "#REQUIRED" + keyword. The "isrequired" parameter will be true and the default + value will be NULL in the case of "#REQUIRED". If "isrequired" is + true and default is non-NULL, then this is a "#FIXED" default. +*/ +typedef void(XMLCALL *XML_AttlistDeclHandler)( + void *userData, const XML_Char *elname, const XML_Char *attname, + const XML_Char *att_type, const XML_Char *dflt, int isrequired); XMLPARSEAPI(void) -XML_SetAttlistDeclHandler(XML_Parser parser, - XML_AttlistDeclHandler attdecl); - - - /* The XML declaration handler is called for *both* XML declarations and - text declarations. The way to distinguish is that the version parameter - will be null for text declarations. The encoding parameter may be null - for XML declarations. The standalone parameter will be -1, 0, or 1 - indicating respectively that there was no standalone parameter in - the declaration, that it was given as no, or that it was given as yes. - */ - -typedef void (*XML_XmlDeclHandler) (void *userData, - const XML_Char *version, - const XML_Char *encoding, - int standalone); +XML_SetAttlistDeclHandler(XML_Parser parser, XML_AttlistDeclHandler attdecl); + +/* The XML declaration handler is called for *both* XML declarations + and text declarations. The way to distinguish is that the version + parameter will be NULL for text declarations. The encoding + parameter may be NULL for XML declarations. The standalone + parameter will be -1, 0, or 1 indicating respectively that there + was no standalone parameter in the declaration, that it was given + as no, or that it was given as yes. +*/ +typedef void(XMLCALL *XML_XmlDeclHandler)(void *userData, + const XML_Char *version, + const XML_Char *encoding, + int standalone); XMLPARSEAPI(void) -XML_SetXmlDeclHandler(XML_Parser parser, - XML_XmlDeclHandler xmldecl); - +XML_SetXmlDeclHandler(XML_Parser parser, XML_XmlDeclHandler xmldecl); typedef struct { void *(*malloc_fcn)(size_t size); @@ -135,26 +222,26 @@ typedef struct { } XML_Memory_Handling_Suite; /* Constructs a new parser; encoding is the encoding specified by the -external protocol or null if there is none specified. */ - + external protocol or NULL if there is none specified. +*/ XMLPARSEAPI(XML_Parser) XML_ParserCreate(const XML_Char *encoding); /* Constructs a new parser and namespace processor. Element type -names and attribute names that belong to a namespace will be expanded; -unprefixed attribute names are never expanded; unprefixed element type -names are expanded only if there is a default namespace. The expanded -name is the concatenation of the namespace URI, the namespace -separator character, and the local part of the name. If the namespace -separator is '\0' then the namespace URI and the local part will be -concatenated without any separator. When a namespace is not declared, -the name and prefix will be passed through without expansion. */ - + names and attribute names that belong to a namespace will be + expanded; unprefixed attribute names are never expanded; unprefixed + element type names are expanded only if there is a default + namespace. The expanded name is the concatenation of the namespace + URI, the namespace separator character, and the local part of the + name. If the namespace separator is '\0' then the namespace URI + and the local part will be concatenated without any separator. + It is a programming error to use the separator '\0' with namespace + triplets (see XML_SetReturnNSTriplet). +*/ XMLPARSEAPI(XML_Parser) XML_ParserCreateNS(const XML_Char *encoding, XML_Char namespaceSeparator); - -/* Constructs a new parser using the memory management suit referred to +/* Constructs a new parser using the memory management suite referred to by memsuite. If memsuite is NULL, then use the standard library memory suite. If namespaceSeparator is non-NULL it creates a parser with namespace processing as described above. The character pointed at @@ -163,250 +250,295 @@ XML_ParserCreateNS(const XML_Char *encoding, XML_Char namespaceSeparator); All further memory operations used for the created parser will come from the given suite. */ - XMLPARSEAPI(XML_Parser) XML_ParserCreate_MM(const XML_Char *encoding, const XML_Memory_Handling_Suite *memsuite, const XML_Char *namespaceSeparator); -/* atts is array of name/value pairs, terminated by 0; - names and values are 0 terminated. */ +/* Prepare a parser object to be re-used. This is particularly + valuable when memory allocation overhead is disproportionately high, + such as when a large number of small documnents need to be parsed. + All handlers are cleared from the parser, except for the + unknownEncodingHandler. The parser's external state is re-initialized + except for the values of ns and ns_triplets. -typedef void (*XML_StartElementHandler)(void *userData, - const XML_Char *name, - const XML_Char **atts); + Added in Expat 1.95.3. +*/ +XMLPARSEAPI(XML_Bool) +XML_ParserReset(XML_Parser parser, const XML_Char *encoding); -typedef void (*XML_EndElementHandler)(void *userData, - const XML_Char *name); +/* atts is array of name/value pairs, terminated by 0; + names and values are 0 terminated. +*/ +typedef void(XMLCALL *XML_StartElementHandler)(void *userData, + const XML_Char *name, + const XML_Char **atts); +typedef void(XMLCALL *XML_EndElementHandler)(void *userData, + const XML_Char *name); /* s is not 0 terminated. */ -typedef void (*XML_CharacterDataHandler)(void *userData, - const XML_Char *s, - int len); +typedef void(XMLCALL *XML_CharacterDataHandler)(void *userData, + const XML_Char *s, int len); /* target and data are 0 terminated */ -typedef void (*XML_ProcessingInstructionHandler)(void *userData, - const XML_Char *target, - const XML_Char *data); +typedef void(XMLCALL *XML_ProcessingInstructionHandler)(void *userData, + const XML_Char *target, + const XML_Char *data); /* data is 0 terminated */ -typedef void (*XML_CommentHandler)(void *userData, const XML_Char *data); - -typedef void (*XML_StartCdataSectionHandler)(void *userData); -typedef void (*XML_EndCdataSectionHandler)(void *userData); - -/* This is called for any characters in the XML document for -which there is no applicable handler. This includes both -characters that are part of markup which is of a kind that is -not reported (comments, markup declarations), or characters -that are part of a construct which could be reported but -for which no handler has been supplied. The characters are passed -exactly as they were in the XML document except that -they will be encoded in UTF-8. Line boundaries are not normalized. -Note that a byte order mark character is not passed to the default handler. -There are no guarantees about how characters are divided between calls -to the default handler: for example, a comment might be split between -multiple calls. */ - -typedef void (*XML_DefaultHandler)(void *userData, - const XML_Char *s, - int len); +typedef void(XMLCALL *XML_CommentHandler)(void *userData, const XML_Char *data); + +typedef void(XMLCALL *XML_StartCdataSectionHandler)(void *userData); +typedef void(XMLCALL *XML_EndCdataSectionHandler)(void *userData); + +/* This is called for any characters in the XML document for which + there is no applicable handler. This includes both characters that + are part of markup which is of a kind that is not reported + (comments, markup declarations), or characters that are part of a + construct which could be reported but for which no handler has been + supplied. The characters are passed exactly as they were in the XML + document except that they will be encoded in UTF-8 or UTF-16. + Line boundaries are not normalized. Note that a byte order mark + character is not passed to the default handler. There are no + guarantees about how characters are divided between calls to the + default handler: for example, a comment might be split between + multiple calls. +*/ +typedef void(XMLCALL *XML_DefaultHandler)(void *userData, const XML_Char *s, + int len); /* This is called for the start of the DOCTYPE declaration, before - any DTD or internal subset is parsed. */ - -typedef void (*XML_StartDoctypeDeclHandler)(void *userData, - const XML_Char *doctypeName, - const XML_Char *sysid, - const XML_Char *pubid, - int has_internal_subset); + any DTD or internal subset is parsed. +*/ +typedef void(XMLCALL *XML_StartDoctypeDeclHandler)(void *userData, + const XML_Char *doctypeName, + const XML_Char *sysid, + const XML_Char *pubid, + int has_internal_subset); /* This is called for the start of the DOCTYPE declaration when the -closing > is encountered, but after processing any external subset. */ -typedef void (*XML_EndDoctypeDeclHandler)(void *userData); + closing > is encountered, but after processing any external + subset. +*/ +typedef void(XMLCALL *XML_EndDoctypeDeclHandler)(void *userData); /* This is called for entity declarations. The is_parameter_entity argument will be non-zero if the entity is a parameter entity, zero otherwise. For internal entities (), value will - be non-null and systemId, publicID, and notationName will be null. - The value string is NOT null terminated; the length is provided in + be non-NULL and systemId, publicID, and notationName will be NULL. + The value string is NOT null-terminated; the length is provided in the value_length argument. Since it is legal to have zero-length values, do not use this argument to test for internal entities. - For external entities, value will be null and systemId will be non-null. - The publicId argument will be null unless a public identifier was - provided. The notationName argument will have a non-null value only - for unparsed entity declarations. + For external entities, value will be NULL and systemId will be + non-NULL. The publicId argument will be NULL unless a public + identifier was provided. The notationName argument will have a + non-NULL value only for unparsed entity declarations. + + Note that is_parameter_entity can't be changed to XML_Bool, since + that would break binary compatibility. */ +typedef void(XMLCALL *XML_EntityDeclHandler)( + void *userData, const XML_Char *entityName, int is_parameter_entity, + const XML_Char *value, int value_length, const XML_Char *base, + const XML_Char *systemId, const XML_Char *publicId, + const XML_Char *notationName); -typedef void (*XML_EntityDeclHandler) (void *userData, - const XML_Char *entityName, - int is_parameter_entity, - const XML_Char *value, - int value_length, - const XML_Char *base, - const XML_Char *systemId, - const XML_Char *publicId, - const XML_Char *notationName); - XMLPARSEAPI(void) -XML_SetEntityDeclHandler(XML_Parser parser, - XML_EntityDeclHandler handler); +XML_SetEntityDeclHandler(XML_Parser parser, XML_EntityDeclHandler handler); /* OBSOLETE -- OBSOLETE -- OBSOLETE - This handler has been superceded by the EntityDeclHandler above. + This handler has been superseded by the EntityDeclHandler above. It is provided here for backward compatibility. -This is called for a declaration of an unparsed (NDATA) -entity. The base argument is whatever was set by XML_SetBase. -The entityName, systemId and notationName arguments will never be null. -The other arguments may be. */ - -typedef void (*XML_UnparsedEntityDeclHandler)(void *userData, - const XML_Char *entityName, - const XML_Char *base, - const XML_Char *systemId, - const XML_Char *publicId, - const XML_Char *notationName); - -/* This is called for a declaration of notation. -The base argument is whatever was set by XML_SetBase. -The notationName will never be null. The other arguments can be. */ - -typedef void (*XML_NotationDeclHandler)(void *userData, - const XML_Char *notationName, - const XML_Char *base, - const XML_Char *systemId, - const XML_Char *publicId); + + This is called for a declaration of an unparsed (NDATA) entity. + The base argument is whatever was set by XML_SetBase. The + entityName, systemId and notationName arguments will never be + NULL. The other arguments may be. +*/ +typedef void(XMLCALL *XML_UnparsedEntityDeclHandler)( + void *userData, const XML_Char *entityName, const XML_Char *base, + const XML_Char *systemId, const XML_Char *publicId, + const XML_Char *notationName); + +/* This is called for a declaration of notation. The base argument is + whatever was set by XML_SetBase. The notationName will never be + NULL. The other arguments can be. +*/ +typedef void(XMLCALL *XML_NotationDeclHandler)(void *userData, + const XML_Char *notationName, + const XML_Char *base, + const XML_Char *systemId, + const XML_Char *publicId); /* When namespace processing is enabled, these are called once for -each namespace declaration. The call to the start and end element -handlers occur between the calls to the start and end namespace -declaration handlers. For an xmlns attribute, prefix will be null. -For an xmlns="" attribute, uri will be null. */ - -typedef void (*XML_StartNamespaceDeclHandler)(void *userData, - const XML_Char *prefix, - const XML_Char *uri); - -typedef void (*XML_EndNamespaceDeclHandler)(void *userData, - const XML_Char *prefix); - -/* This is called if the document is not standalone (it has an -external subset or a reference to a parameter entity, but does not -have standalone="yes"). If this handler returns 0, then processing -will not continue, and the parser will return a -XML_ERROR_NOT_STANDALONE error. */ - -typedef int (*XML_NotStandaloneHandler)(void *userData); - -/* This is called for a reference to an external parsed general entity. -The referenced entity is not automatically parsed. -The application can parse it immediately or later using -XML_ExternalEntityParserCreate. -The parser argument is the parser parsing the entity containing the reference; -it can be passed as the parser argument to XML_ExternalEntityParserCreate. -The systemId argument is the system identifier as specified in the entity -declaration; it will not be null. -The base argument is the system identifier that should be used as the base for -resolving systemId if systemId was relative; this is set by XML_SetBase; -it may be null. -The publicId argument is the public identifier as specified in the entity -declaration, or null if none was specified; the whitespace in the public -identifier will have been normalized as required by the XML spec. -The context argument specifies the parsing context in the format -expected by the context argument to -XML_ExternalEntityParserCreate; context is valid only until the handler -returns, so if the referenced entity is to be parsed later, it must be copied. -The handler should return 0 if processing should not continue because of -a fatal error in the handling of the external entity. -In this case the calling parser will return an -XML_ERROR_EXTERNAL_ENTITY_HANDLING error. -Note that unlike other handlers the first argument is the parser, not -userData. */ - -typedef int (*XML_ExternalEntityRefHandler)(XML_Parser parser, - const XML_Char *context, - const XML_Char *base, - const XML_Char *systemId, - const XML_Char *publicId); - -/* This structure is filled in by the XML_UnknownEncodingHandler -to provide information to the parser about encodings that are unknown -to the parser. -The map[b] member gives information about byte sequences -whose first byte is b. -If map[b] is c where c is >= 0, then b by itself encodes the Unicode scalar -value c. -If map[b] is -1, then the byte sequence is malformed. -If map[b] is -n, where n >= 2, then b is the first byte of an n-byte -sequence that encodes a single Unicode scalar value. -The data member will be passed as the first argument to the convert function. -The convert function is used to convert multibyte sequences; -s will point to a n-byte sequence where map[(unsigned char)*s] == -n. -The convert function must return the Unicode scalar value -represented by this byte sequence or -1 if the byte sequence is malformed. -The convert function may be null if the encoding is a single-byte encoding, -that is if map[b] >= -1 for all bytes b. -When the parser is finished with the encoding, then if release is not null, -it will call release passing it the data member; -once release has been called, the convert function will not be called again. - -Expat places certain restrictions on the encodings that are supported -using this mechanism. - -1. Every ASCII character that can appear in a well-formed XML document, -other than the characters - - $@\^`{}~ - -must be represented by a single byte, and that byte must be the -same byte that represents that character in ASCII. - -2. No character may require more than 4 bytes to encode. - -3. All characters encoded must have Unicode scalar values <= 0xFFFF, (i.e., -characters that would be encoded by surrogates in UTF-16 are not -allowed). Note that this restriction doesn't apply to the built-in -support for UTF-8 and UTF-16. - -4. No Unicode character may be encoded by more than one distinct sequence -of bytes. */ + each namespace declaration. The call to the start and end element + handlers occur between the calls to the start and end namespace + declaration handlers. For an xmlns attribute, prefix will be + NULL. For an xmlns="" attribute, uri will be NULL. +*/ +typedef void(XMLCALL *XML_StartNamespaceDeclHandler)(void *userData, + const XML_Char *prefix, + const XML_Char *uri); + +typedef void(XMLCALL *XML_EndNamespaceDeclHandler)(void *userData, + const XML_Char *prefix); + +/* This is called if the document is not standalone, that is, it has an + external subset or a reference to a parameter entity, but does not + have standalone="yes". If this handler returns XML_STATUS_ERROR, + then processing will not continue, and the parser will return a + XML_ERROR_NOT_STANDALONE error. + If parameter entity parsing is enabled, then in addition to the + conditions above this handler will only be called if the referenced + entity was actually read. +*/ +typedef int(XMLCALL *XML_NotStandaloneHandler)(void *userData); + +/* This is called for a reference to an external parsed general + entity. The referenced entity is not automatically parsed. The + application can parse it immediately or later using + XML_ExternalEntityParserCreate. + + The parser argument is the parser parsing the entity containing the + reference; it can be passed as the parser argument to + XML_ExternalEntityParserCreate. The systemId argument is the + system identifier as specified in the entity declaration; it will + not be NULL. + + The base argument is the system identifier that should be used as + the base for resolving systemId if systemId was relative; this is + set by XML_SetBase; it may be NULL. + + The publicId argument is the public identifier as specified in the + entity declaration, or NULL if none was specified; the whitespace + in the public identifier will have been normalized as required by + the XML spec. + + The context argument specifies the parsing context in the format + expected by the context argument to XML_ExternalEntityParserCreate; + context is valid only until the handler returns, so if the + referenced entity is to be parsed later, it must be copied. + context is NULL only when the entity is a parameter entity. + + The handler should return XML_STATUS_ERROR if processing should not + continue because of a fatal error in the handling of the external + entity. In this case the calling parser will return an + XML_ERROR_EXTERNAL_ENTITY_HANDLING error. + + Note that unlike other handlers the first argument is the parser, + not userData. +*/ +typedef int(XMLCALL *XML_ExternalEntityRefHandler)(XML_Parser parser, + const XML_Char *context, + const XML_Char *base, + const XML_Char *systemId, + const XML_Char *publicId); + +/* This is called in two situations: + 1) An entity reference is encountered for which no declaration + has been read *and* this is not an error. + 2) An internal entity reference is read, but not expanded, because + XML_SetDefaultHandler has been called. + Note: skipped parameter entities in declarations and skipped general + entities in attribute values cannot be reported, because + the event would be out of sync with the reporting of the + declarations or attribute values +*/ +typedef void(XMLCALL *XML_SkippedEntityHandler)(void *userData, + const XML_Char *entityName, + int is_parameter_entity); + +/* This structure is filled in by the XML_UnknownEncodingHandler to + provide information to the parser about encodings that are unknown + to the parser. + + The map[b] member gives information about byte sequences whose + first byte is b. + + If map[b] is c where c is >= 0, then b by itself encodes the + Unicode scalar value c. + If map[b] is -1, then the byte sequence is malformed. + + If map[b] is -n, where n >= 2, then b is the first byte of an + n-byte sequence that encodes a single Unicode scalar value. + + The data member will be passed as the first argument to the convert + function. + + The convert function is used to convert multibyte sequences; s will + point to a n-byte sequence where map[(unsigned char)*s] == -n. The + convert function must return the Unicode scalar value represented + by this byte sequence or -1 if the byte sequence is malformed. + + The convert function may be NULL if the encoding is a single-byte + encoding, that is if map[b] >= -1 for all bytes b. + + When the parser is finished with the encoding, then if release is + not NULL, it will call release passing it the data member; once + release has been called, the convert function will not be called + again. + + Expat places certain restrictions on the encodings that are supported + using this mechanism. + + 1. Every ASCII character that can appear in a well-formed XML document, + other than the characters + + $@\^`{}~ + + must be represented by a single byte, and that byte must be the + same byte that represents that character in ASCII. + + 2. No character may require more than 4 bytes to encode. + + 3. All characters encoded must have Unicode scalar values <= + 0xFFFF, (i.e., characters that would be encoded by surrogates in + UTF-16 are not allowed). Note that this restriction doesn't + apply to the built-in support for UTF-8 and UTF-16. + + 4. No Unicode character may be encoded by more than one distinct + sequence of bytes. +*/ typedef struct { int map[256]; void *data; - int (*convert)(void *data, const char *s); - void (*release)(void *data); + int(XMLCALL *convert)(void *data, const char *s); + void(XMLCALL *release)(void *data); } XML_Encoding; /* This is called for an encoding that is unknown to the parser. -The encodingHandlerData argument is that which was passed as the -second argument to XML_SetUnknownEncodingHandler. -The name argument gives the name of the encoding as specified in -the encoding declaration. -If the callback can provide information about the encoding, -it must fill in the XML_Encoding structure, and return 1. -Otherwise it must return 0. -If info does not describe a suitable encoding, -then the parser will return an XML_UNKNOWN_ENCODING error. */ - -typedef int (*XML_UnknownEncodingHandler)(void *encodingHandlerData, - const XML_Char *name, - XML_Encoding *info); - -XMLPARSEAPI(void) -XML_SetElementHandler(XML_Parser parser, - XML_StartElementHandler start, + + The encodingHandlerData argument is that which was passed as the + second argument to XML_SetUnknownEncodingHandler. + + The name argument gives the name of the encoding as specified in + the encoding declaration. + + If the callback can provide information about the encoding, it must + fill in the XML_Encoding structure, and return XML_STATUS_OK. + Otherwise it must return XML_STATUS_ERROR. + + If info does not describe a suitable encoding, then the parser will + return an XML_ERROR_UNKNOWN_ENCODING error. +*/ +typedef int(XMLCALL *XML_UnknownEncodingHandler)(void *encodingHandlerData, + const XML_Char *name, + XML_Encoding *info); + +XMLPARSEAPI(void) +XML_SetElementHandler(XML_Parser parser, XML_StartElementHandler start, XML_EndElementHandler end); XMLPARSEAPI(void) -XML_SetStartElementHandler(XML_Parser, XML_StartElementHandler); +XML_SetStartElementHandler(XML_Parser parser, XML_StartElementHandler handler); XMLPARSEAPI(void) -XML_SetEndElementHandler(XML_Parser, XML_EndElementHandler); +XML_SetEndElementHandler(XML_Parser parser, XML_EndElementHandler handler); XMLPARSEAPI(void) XML_SetCharacterDataHandler(XML_Parser parser, @@ -416,8 +548,7 @@ XMLPARSEAPI(void) XML_SetProcessingInstructionHandler(XML_Parser parser, XML_ProcessingInstructionHandler handler); XMLPARSEAPI(void) -XML_SetCommentHandler(XML_Parser parser, - XML_CommentHandler handler); +XML_SetCommentHandler(XML_Parser parser, XML_CommentHandler handler); XMLPARSEAPI(void) XML_SetCdataSectionHandler(XML_Parser parser, @@ -433,24 +564,21 @@ XML_SetEndCdataSectionHandler(XML_Parser parser, XML_EndCdataSectionHandler end); /* This sets the default handler and also inhibits expansion of -internal entities. The entity reference will be passed to the default -handler. */ - + internal entities. These entity references will be passed to the + default handler, or to the skipped entity handler, if one is set. +*/ XMLPARSEAPI(void) -XML_SetDefaultHandler(XML_Parser parser, - XML_DefaultHandler handler); +XML_SetDefaultHandler(XML_Parser parser, XML_DefaultHandler handler); /* This sets the default handler but does not inhibit expansion of -internal entities. The entity reference will not be passed to the -default handler. */ - + internal entities. The entity reference will not be passed to the + default handler. +*/ XMLPARSEAPI(void) -XML_SetDefaultHandlerExpand(XML_Parser parser, - XML_DefaultHandler handler); +XML_SetDefaultHandlerExpand(XML_Parser parser, XML_DefaultHandler handler); XMLPARSEAPI(void) -XML_SetDoctypeDeclHandler(XML_Parser parser, - XML_StartDoctypeDeclHandler start, +XML_SetDoctypeDeclHandler(XML_Parser parser, XML_StartDoctypeDeclHandler start, XML_EndDoctypeDeclHandler end); XMLPARSEAPI(void) @@ -458,16 +586,14 @@ XML_SetStartDoctypeDeclHandler(XML_Parser parser, XML_StartDoctypeDeclHandler start); XMLPARSEAPI(void) -XML_SetEndDoctypeDeclHandler(XML_Parser parser, - XML_EndDoctypeDeclHandler end); +XML_SetEndDoctypeDeclHandler(XML_Parser parser, XML_EndDoctypeDeclHandler end); XMLPARSEAPI(void) XML_SetUnparsedEntityDeclHandler(XML_Parser parser, XML_UnparsedEntityDeclHandler handler); XMLPARSEAPI(void) -XML_SetNotationDeclHandler(XML_Parser parser, - XML_NotationDeclHandler handler); +XML_SetNotationDeclHandler(XML_Parser parser, XML_NotationDeclHandler handler); XMLPARSEAPI(void) XML_SetNamespaceDeclHandler(XML_Parser parser, @@ -490,32 +616,41 @@ XMLPARSEAPI(void) XML_SetExternalEntityRefHandler(XML_Parser parser, XML_ExternalEntityRefHandler handler); -/* If a non-null value for arg is specified here, then it will be passed -as the first argument to the external entity ref handler instead -of the parser object. */ +/* If a non-NULL value for arg is specified here, then it will be + passed as the first argument to the external entity ref handler + instead of the parser object. +*/ +XMLPARSEAPI(void) +XML_SetExternalEntityRefHandlerArg(XML_Parser parser, void *arg); + XMLPARSEAPI(void) -XML_SetExternalEntityRefHandlerArg(XML_Parser, void *arg); +XML_SetSkippedEntityHandler(XML_Parser parser, + XML_SkippedEntityHandler handler); XMLPARSEAPI(void) XML_SetUnknownEncodingHandler(XML_Parser parser, XML_UnknownEncodingHandler handler, void *encodingHandlerData); -/* This can be called within a handler for a start element, end element, -processing instruction or character data. It causes the corresponding -markup to be passed to the default handler. */ +/* This can be called within a handler for a start element, end + element, processing instruction or character data. It causes the + corresponding markup to be passed to the default handler. +*/ XMLPARSEAPI(void) XML_DefaultCurrent(XML_Parser parser); /* If do_nst is non-zero, and namespace processing is in effect, and a name has a prefix (i.e. an explicit namespace qualifier) then - that name is returned as a triplet in a single - string separated by the separator character specified when the parser - was created: URI + sep + local_name + sep + prefix. + that name is returned as a triplet in a single string separated by + the separator character specified when the parser was created: URI + + sep + local_name + sep + prefix. If do_nst is zero, then namespace information is returned in the - default manner (URI + sep + local_name) whether or not the names + default manner (URI + sep + local_name) whether or not the name has a prefix. + + Note: Calling XML_SetReturnNSTriplet after XML_Parse or + XML_ParseBuffer has no effect. */ XMLPARSEAPI(void) @@ -525,83 +660,202 @@ XML_SetReturnNSTriplet(XML_Parser parser, int do_nst); XMLPARSEAPI(void) XML_SetUserData(XML_Parser parser, void *userData); -/* Returns the last value set by XML_SetUserData or null. */ +/* Returns the last value set by XML_SetUserData or NULL. */ #define XML_GetUserData(parser) (*(void **)(parser)) -/* This is equivalent to supplying an encoding argument -to XML_ParserCreate. It must not be called after XML_Parse -or XML_ParseBuffer. */ - -XMLPARSEAPI(int) +/* This is equivalent to supplying an encoding argument to + XML_ParserCreate. On success XML_SetEncoding returns non-zero, + zero otherwise. + Note: Calling XML_SetEncoding after XML_Parse or XML_ParseBuffer + has no effect and returns XML_STATUS_ERROR. +*/ +XMLPARSEAPI(enum XML_Status) XML_SetEncoding(XML_Parser parser, const XML_Char *encoding); -/* If this function is called, then the parser will be passed -as the first argument to callbacks instead of userData. -The userData will still be accessible using XML_GetUserData. */ - +/* If this function is called, then the parser will be passed as the + first argument to callbacks instead of userData. The userData will + still be accessible using XML_GetUserData. +*/ XMLPARSEAPI(void) XML_UseParserAsHandlerArg(XML_Parser parser); -/* Sets the base to be used for resolving relative URIs in system -identifiers in declarations. Resolving relative identifiers is left -to the application: this value will be passed through as the base -argument to the XML_ExternalEntityRefHandler, XML_NotationDeclHandler -and XML_UnparsedEntityDeclHandler. The base argument will be copied. -Returns zero if out of memory, non-zero otherwise. */ +/* If useDTD == XML_TRUE is passed to this function, then the parser + will assume that there is an external subset, even if none is + specified in the document. In such a case the parser will call the + externalEntityRefHandler with a value of NULL for the systemId + argument (the publicId and context arguments will be NULL as well). + Note: For the purpose of checking WFC: Entity Declared, passing + useDTD == XML_TRUE will make the parser behave as if the document + had a DTD with an external subset. + Note: If this function is called, then this must be done before + the first call to XML_Parse or XML_ParseBuffer, since it will + have no effect after that. Returns + XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING. + Note: If the document does not have a DOCTYPE declaration at all, + then startDoctypeDeclHandler and endDoctypeDeclHandler will not + be called, despite an external subset being parsed. + Note: If XML_DTD is not defined when Expat is compiled, returns + XML_ERROR_FEATURE_REQUIRES_XML_DTD. + Note: If parser == NULL, returns XML_ERROR_INVALID_ARGUMENT. +*/ +XMLPARSEAPI(enum XML_Error) +XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD); -XMLPARSEAPI(int) +/* Sets the base to be used for resolving relative URIs in system + identifiers in declarations. Resolving relative identifiers is + left to the application: this value will be passed through as the + base argument to the XML_ExternalEntityRefHandler, + XML_NotationDeclHandler and XML_UnparsedEntityDeclHandler. The base + argument will be copied. Returns XML_STATUS_ERROR if out of memory, + XML_STATUS_OK otherwise. +*/ +XMLPARSEAPI(enum XML_Status) XML_SetBase(XML_Parser parser, const XML_Char *base); XMLPARSEAPI(const XML_Char *) XML_GetBase(XML_Parser parser); /* Returns the number of the attribute/value pairs passed in last call -to the XML_StartElementHandler that were specified in the start-tag -rather than defaulted. Each attribute/value pair counts as 2; thus -this correspondds to an index into the atts array passed to the -XML_StartElementHandler. */ - + to the XML_StartElementHandler that were specified in the start-tag + rather than defaulted. Each attribute/value pair counts as 2; thus + this corresponds to an index into the atts array passed to the + XML_StartElementHandler. Returns -1 if parser == NULL. +*/ XMLPARSEAPI(int) XML_GetSpecifiedAttributeCount(XML_Parser parser); /* Returns the index of the ID attribute passed in the last call to -XML_StartElementHandler, or -1 if there is no ID attribute. Each -attribute/value pair counts as 2; thus this correspondds to an index -into the atts array passed to the XML_StartElementHandler. */ - + XML_StartElementHandler, or -1 if there is no ID attribute or + parser == NULL. Each attribute/value pair counts as 2; thus this + corresponds to an index into the atts array passed to the + XML_StartElementHandler. +*/ XMLPARSEAPI(int) XML_GetIdAttributeIndex(XML_Parser parser); -/* Parses some input. Returns 0 if a fatal error is detected. -The last call to XML_Parse must have isFinal true; -len may be zero for this call (or any other). */ -XMLPARSEAPI(int) +#ifdef XML_ATTR_INFO +/* Source file byte offsets for the start and end of attribute names and values. + The value indices are exclusive of surrounding quotes; thus in a UTF-8 source + file an attribute value of "blah" will yield: + info->valueEnd - info->valueStart = 4 bytes. +*/ +typedef struct { + XML_Index nameStart; /* Offset to beginning of the attribute name. */ + XML_Index nameEnd; /* Offset after the attribute name's last byte. */ + XML_Index valueStart; /* Offset to beginning of the attribute value. */ + XML_Index valueEnd; /* Offset after the attribute value's last byte. */ +} XML_AttrInfo; + +/* Returns an array of XML_AttrInfo structures for the attribute/value pairs + passed in last call to the XML_StartElementHandler that were specified + in the start-tag rather than defaulted. Each attribute/value pair counts + as 1; thus the number of entries in the array is + XML_GetSpecifiedAttributeCount(parser) / 2. +*/ +XMLPARSEAPI(const XML_AttrInfo *) +XML_GetAttributeInfo(XML_Parser parser); +#endif + +/* Parses some input. Returns XML_STATUS_ERROR if a fatal error is + detected. The last call to XML_Parse must have isFinal true; len + may be zero for this call (or any other). + + Though the return values for these functions has always been + described as a Boolean value, the implementation, at least for the + 1.95.x series, has always returned exactly one of the XML_Status + values. +*/ +XMLPARSEAPI(enum XML_Status) XML_Parse(XML_Parser parser, const char *s, int len, int isFinal); XMLPARSEAPI(void *) XML_GetBuffer(XML_Parser parser, int len); -XMLPARSEAPI(int) +XMLPARSEAPI(enum XML_Status) XML_ParseBuffer(XML_Parser parser, int len, int isFinal); +/* Stops parsing, causing XML_Parse() or XML_ParseBuffer() to return. + Must be called from within a call-back handler, except when aborting + (resumable = 0) an already suspended parser. Some call-backs may + still follow because they would otherwise get lost. Examples: + - endElementHandler() for empty elements when stopped in + startElementHandler(), + - endNameSpaceDeclHandler() when stopped in endElementHandler(), + and possibly others. + + Can be called from most handlers, including DTD related call-backs, + except when parsing an external parameter entity and resumable != 0. + Returns XML_STATUS_OK when successful, XML_STATUS_ERROR otherwise. + Possible error codes: + - XML_ERROR_SUSPENDED: when suspending an already suspended parser. + - XML_ERROR_FINISHED: when the parser has already finished. + - XML_ERROR_SUSPEND_PE: when suspending while parsing an external PE. + + When resumable != 0 (true) then parsing is suspended, that is, + XML_Parse() and XML_ParseBuffer() return XML_STATUS_SUSPENDED. + Otherwise, parsing is aborted, that is, XML_Parse() and XML_ParseBuffer() + return XML_STATUS_ERROR with error code XML_ERROR_ABORTED. + + *Note*: + This will be applied to the current parser instance only, that is, if + there is a parent parser then it will continue parsing when the + externalEntityRefHandler() returns. It is up to the implementation of + the externalEntityRefHandler() to call XML_StopParser() on the parent + parser (recursively), if one wants to stop parsing altogether. + + When suspended, parsing can be resumed by calling XML_ResumeParser(). +*/ +XMLPARSEAPI(enum XML_Status) +XML_StopParser(XML_Parser parser, XML_Bool resumable); + +/* Resumes parsing after it has been suspended with XML_StopParser(). + Must not be called from within a handler call-back. Returns same + status codes as XML_Parse() or XML_ParseBuffer(). + Additional error code XML_ERROR_NOT_SUSPENDED possible. + + *Note*: + This must be called on the most deeply nested child parser instance + first, and on its parent parser only after the child parser has finished, + to be applied recursively until the document entity's parser is restarted. + That is, the parent parser will not resume by itself and it is up to the + application to call XML_ResumeParser() on it at the appropriate moment. +*/ +XMLPARSEAPI(enum XML_Status) +XML_ResumeParser(XML_Parser parser); + +enum XML_Parsing { XML_INITIALIZED, XML_PARSING, XML_FINISHED, XML_SUSPENDED }; + +typedef struct { + enum XML_Parsing parsing; + XML_Bool finalBuffer; +} XML_ParsingStatus; + +/* Returns status of parser with respect to being initialized, parsing, + finished, or suspended and processing the final buffer. + XXX XML_Parse() and XML_ParseBuffer() should return XML_ParsingStatus, + XXX with XML_FINISHED_OK or XML_FINISHED_ERROR replacing XML_FINISHED +*/ +XMLPARSEAPI(void) +XML_GetParsingStatus(XML_Parser parser, XML_ParsingStatus *status); + /* Creates an XML_Parser object that can parse an external general -entity; context is a '\0'-terminated string specifying the parse -context; encoding is a '\0'-terminated string giving the name of the -externally specified encoding, or null if there is no externally -specified encoding. The context string consists of a sequence of -tokens separated by formfeeds (\f); a token consisting of a name -specifies that the general entity of the name is open; a token of the -form prefix=uri specifies the namespace for a particular prefix; a -token of the form =uri specifies the default namespace. This can be -called at any point after the first call to an -ExternalEntityRefHandler so longer as the parser has not yet been -freed. The new parser is completely independent and may safely be -used in a separate thread. The handlers and userData are initialized -from the parser argument. Returns 0 if out of memory. Otherwise -returns a new XML_Parser object. */ + entity; context is a '\0'-terminated string specifying the parse + context; encoding is a '\0'-terminated string giving the name of + the externally specified encoding, or NULL if there is no + externally specified encoding. The context string consists of a + sequence of tokens separated by formfeeds (\f); a token consisting + of a name specifies that the general entity of the name is open; a + token of the form prefix=uri specifies the namespace for a + particular prefix; a token of the form =uri specifies the default + namespace. This can be called at any point after the first call to + an ExternalEntityRefHandler so longer as the parser has not yet + been freed. The new parser is completely independent and may + safely be used in a separate thread. The handlers and userData are + initialized from the parser argument. Returns NULL if out of memory. + Otherwise returns a new XML_Parser object. +*/ XMLPARSEAPI(XML_Parser) -XML_ExternalEntityParserCreate(XML_Parser parser, - const XML_Char *context, +XML_ExternalEntityParserCreate(XML_Parser parser, const XML_Char *context, const XML_Char *encoding); enum XML_ParamEntityParsing { @@ -611,76 +865,75 @@ enum XML_ParamEntityParsing { }; /* Controls parsing of parameter entities (including the external DTD -subset). If parsing of parameter entities is enabled, then references -to external parameter entities (including the external DTD subset) -will be passed to the handler set with -XML_SetExternalEntityRefHandler. The context passed will be 0. -Unlike external general entities, external parameter entities can only -be parsed synchronously. If the external parameter entity is to be -parsed, it must be parsed during the call to the external entity ref -handler: the complete sequence of XML_ExternalEntityParserCreate, -XML_Parse/XML_ParseBuffer and XML_ParserFree calls must be made during -this call. After XML_ExternalEntityParserCreate has been called to -create the parser for the external parameter entity (context must be 0 -for this call), it is illegal to make any calls on the old parser -until XML_ParserFree has been called on the newly created parser. If -the library has been compiled without support for parameter entity -parsing (ie without XML_DTD being defined), then -XML_SetParamEntityParsing will return 0 if parsing of parameter -entities is requested; otherwise it will return non-zero. */ - + subset). If parsing of parameter entities is enabled, then + references to external parameter entities (including the external + DTD subset) will be passed to the handler set with + XML_SetExternalEntityRefHandler. The context passed will be 0. + + Unlike external general entities, external parameter entities can + only be parsed synchronously. If the external parameter entity is + to be parsed, it must be parsed during the call to the external + entity ref handler: the complete sequence of + XML_ExternalEntityParserCreate, XML_Parse/XML_ParseBuffer and + XML_ParserFree calls must be made during this call. After + XML_ExternalEntityParserCreate has been called to create the parser + for the external parameter entity (context must be 0 for this + call), it is illegal to make any calls on the old parser until + XML_ParserFree has been called on the newly created parser. + If the library has been compiled without support for parameter + entity parsing (ie without XML_DTD being defined), then + XML_SetParamEntityParsing will return 0 if parsing of parameter + entities is requested; otherwise it will return non-zero. + Note: If XML_SetParamEntityParsing is called after XML_Parse or + XML_ParseBuffer, then it has no effect and will always return 0. + Note: If parser == NULL, the function will do nothing and return 0. +*/ XMLPARSEAPI(int) XML_SetParamEntityParsing(XML_Parser parser, enum XML_ParamEntityParsing parsing); -enum XML_Error { - XML_ERROR_NONE, - XML_ERROR_NO_MEMORY, - XML_ERROR_SYNTAX, - XML_ERROR_NO_ELEMENTS, - XML_ERROR_INVALID_TOKEN, - XML_ERROR_UNCLOSED_TOKEN, - XML_ERROR_PARTIAL_CHAR, - XML_ERROR_TAG_MISMATCH, - XML_ERROR_DUPLICATE_ATTRIBUTE, - XML_ERROR_JUNK_AFTER_DOC_ELEMENT, - XML_ERROR_PARAM_ENTITY_REF, - XML_ERROR_UNDEFINED_ENTITY, - XML_ERROR_RECURSIVE_ENTITY_REF, - XML_ERROR_ASYNC_ENTITY, - XML_ERROR_BAD_CHAR_REF, - XML_ERROR_BINARY_ENTITY_REF, - XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF, - XML_ERROR_MISPLACED_XML_PI, - XML_ERROR_UNKNOWN_ENCODING, - XML_ERROR_INCORRECT_ENCODING, - XML_ERROR_UNCLOSED_CDATA_SECTION, - XML_ERROR_EXTERNAL_ENTITY_HANDLING, - XML_ERROR_NOT_STANDALONE, - XML_ERROR_UNEXPECTED_STATE -}; - -/* If XML_Parse or XML_ParseBuffer have returned 0, then XML_GetErrorCode -returns information about the error. */ +/* Sets the hash salt to use for internal hash calculations. + Helps in preventing DoS attacks based on predicting hash + function behavior. This must be called before parsing is started. + Returns 1 if successful, 0 when called after parsing has started. + Note: If parser == NULL, the function will do nothing and return 0. +*/ +XMLPARSEAPI(int) +XML_SetHashSalt(XML_Parser parser, unsigned long hash_salt); +/* If XML_Parse or XML_ParseBuffer have returned XML_STATUS_ERROR, then + XML_GetErrorCode returns information about the error. +*/ XMLPARSEAPI(enum XML_Error) XML_GetErrorCode(XML_Parser parser); -/* These functions return information about the current parse location. -They may be called when XML_Parse or XML_ParseBuffer return 0; -in this case the location is the location of the character at which -the error was detected. -They may also be called from any other callback called to report -some parse event; in this the location is the location of the first -of the sequence of characters that generated the event. */ - -XMLPARSEAPI(int) XML_GetCurrentLineNumber(XML_Parser parser); -XMLPARSEAPI(int) XML_GetCurrentColumnNumber(XML_Parser parser); -XMLPARSEAPI(long) XML_GetCurrentByteIndex(XML_Parser parser); +/* These functions return information about the current parse + location. They may be called from any callback called to report + some parse event; in this case the location is the location of the + first of the sequence of characters that generated the event. When + called from callbacks generated by declarations in the document + prologue, the location identified isn't as neatly defined, but will + be within the relevant markup. When called outside of the callback + functions, the position indicated will be just past the last parse + event (regardless of whether there was an associated callback). + + They may also be called after returning from a call to XML_Parse + or XML_ParseBuffer. If the return value is XML_STATUS_ERROR then + the location is the location of the character at which the error + was detected; otherwise the location is the location of the last + parse event, as described above. + + Note: XML_GetCurrentLineNumber and XML_GetCurrentColumnNumber + return 0 to indicate an error. + Note: XML_GetCurrentByteIndex returns -1 to indicate an error. +*/ +XMLPARSEAPI(XML_Size) XML_GetCurrentLineNumber(XML_Parser parser); +XMLPARSEAPI(XML_Size) XML_GetCurrentColumnNumber(XML_Parser parser); +XMLPARSEAPI(XML_Index) XML_GetCurrentByteIndex(XML_Parser parser); /* Return the number of bytes in the current event. -Returns 0 if the event is in an internal entity. */ - + Returns 0 if the event is in an internal entity. +*/ XMLPARSEAPI(int) XML_GetCurrentByteCount(XML_Parser parser); @@ -688,29 +941,44 @@ XML_GetCurrentByteCount(XML_Parser parser); the integer pointed to by offset to the offset within this buffer of the current parse position, and sets the integer pointed to by size to the size of this buffer (the number of input bytes). Otherwise - returns a null pointer. Also returns a null pointer if a parse isn't + returns a NULL pointer. Also returns a NULL pointer if a parse isn't active. NOTE: The character pointer returned should not be used outside - the handler that makes the call. */ - + the handler that makes the call. +*/ XMLPARSEAPI(const char *) -XML_GetInputContext(XML_Parser parser, - int *offset, - int *size); +XML_GetInputContext(XML_Parser parser, int *offset, int *size); /* For backwards compatibility with previous versions. */ #define XML_GetErrorLineNumber XML_GetCurrentLineNumber #define XML_GetErrorColumnNumber XML_GetCurrentColumnNumber #define XML_GetErrorByteIndex XML_GetCurrentByteIndex +/* Frees the content model passed to the element declaration handler */ +XMLPARSEAPI(void) +XML_FreeContentModel(XML_Parser parser, XML_Content *model); + +/* Exposing the memory handling functions used in Expat */ +XMLPARSEAPI(void *) +XML_ATTR_MALLOC +XML_ATTR_ALLOC_SIZE(2) +XML_MemMalloc(XML_Parser parser, size_t size); + +XMLPARSEAPI(void *) +XML_ATTR_ALLOC_SIZE(3) +XML_MemRealloc(XML_Parser parser, void *ptr, size_t size); + +XMLPARSEAPI(void) +XML_MemFree(XML_Parser parser, void *ptr); + /* Frees memory used by the parser. */ XMLPARSEAPI(void) XML_ParserFree(XML_Parser parser); /* Returns a string describing the error. */ XMLPARSEAPI(const XML_LChar *) -XML_ErrorString(int code); +XML_ErrorString(enum XML_Error code); /* Return a string containing the version number of this expat */ XMLPARSEAPI(const XML_LChar *) @@ -723,17 +991,60 @@ typedef struct { } XML_Expat_Version; /* Return an XML_Expat_Version structure containing numeric version - number information for this version of expat */ - + number information for this version of expat. +*/ XMLPARSEAPI(XML_Expat_Version) XML_ExpatVersionInfo(void); -#define XML_MAJOR_VERSION 1 -#define XML_MINOR_VERSION 95 -#define XML_MICRO_VERSION 2 +/* Added in Expat 1.95.5. */ +enum XML_FeatureEnum { + XML_FEATURE_END = 0, + XML_FEATURE_UNICODE, + XML_FEATURE_UNICODE_WCHAR_T, + XML_FEATURE_DTD, + XML_FEATURE_CONTEXT_BYTES, + XML_FEATURE_MIN_SIZE, + XML_FEATURE_SIZEOF_XML_CHAR, + XML_FEATURE_SIZEOF_XML_LCHAR, + XML_FEATURE_NS, + XML_FEATURE_LARGE_SIZE, + XML_FEATURE_ATTR_INFO, + /* Added in Expat 2.4.0. */ + XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT, + XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT + /* Additional features must be added to the end of this enum. */ +}; + +typedef struct { + enum XML_FeatureEnum feature; + const XML_LChar *name; + long int value; +} XML_Feature; + +XMLPARSEAPI(const XML_Feature *) +XML_GetFeatureList(void); + +#ifdef XML_DTD +/* Added in Expat 2.4.0. */ +XMLPARSEAPI(XML_Bool) +XML_SetBillionLaughsAttackProtectionMaximumAmplification( + XML_Parser parser, float maximumAmplificationFactor); + +/* Added in Expat 2.4.0. */ +XMLPARSEAPI(XML_Bool) +XML_SetBillionLaughsAttackProtectionActivationThreshold( + XML_Parser parser, unsigned long long activationThresholdBytes); +#endif + +/* Expat follows the semantic versioning convention. + See http://semver.org. +*/ +#define XML_MAJOR_VERSION 2 +#define XML_MINOR_VERSION 4 +#define XML_MICRO_VERSION 1 #ifdef __cplusplus } #endif -#endif /* not XmlParse_INCLUDED */ +#endif /* not Expat_INCLUDED */ diff --git a/Modules/ThirdParty/Expat/src/expat/expat_config.h.cmake b/Modules/ThirdParty/Expat/src/expat/expat_config.h.cmake new file mode 100644 index 00000000000..173fed168c1 --- /dev/null +++ b/Modules/ThirdParty/Expat/src/expat/expat_config.h.cmake @@ -0,0 +1,115 @@ +/* expat_config.h.cmake. Based upon generated expat_config.h.in. */ + +/* 1234 = LIL_ENDIAN, 4321 = BIGENDIAN */ +#cmakedefine BYTEORDER @BYTEORDER@ + +/* Define to 1 if you have the `arc4random' function. */ +#cmakedefine HAVE_ARC4RANDOM + +/* Define to 1 if you have the `arc4random_buf' function. */ +#cmakedefine HAVE_ARC4RANDOM_BUF + +/* Define to 1 if you have the header file. */ +#cmakedefine HAVE_DLFCN_H + +/* Define to 1 if you have the header file. */ +#cmakedefine HAVE_FCNTL_H + +/* Define to 1 if you have the `getpagesize' function. */ +#cmakedefine HAVE_GETPAGESIZE + +/* Define to 1 if you have the `getrandom' function. */ +#cmakedefine HAVE_GETRANDOM + +/* Define to 1 if you have the header file. */ +#cmakedefine HAVE_INTTYPES_H + +/* Define to 1 if you have the `bsd' library (-lbsd). */ +#cmakedefine HAVE_LIBBSD + +/* Define to 1 if you have the header file. */ +#cmakedefine HAVE_MEMORY_H + +/* Define to 1 if you have a working `mmap' system call. */ +#cmakedefine HAVE_MMAP + +/* Define to 1 if you have the header file. */ +#cmakedefine HAVE_STDINT_H + +/* Define to 1 if you have the header file. */ +#cmakedefine HAVE_STDLIB_H + +/* Define to 1 if you have the header file. */ +#cmakedefine HAVE_STRINGS_H + +/* Define to 1 if you have the header file. */ +#cmakedefine HAVE_STRING_H + +/* Define to 1 if you have `syscall' and `SYS_getrandom'. */ +#cmakedefine HAVE_SYSCALL_GETRANDOM + +/* Define to 1 if you have the header file. */ +#cmakedefine HAVE_SYS_STAT_H + +/* Define to 1 if you have the header file. */ +#cmakedefine HAVE_SYS_TYPES_H + +/* Define to 1 if you have the header file. */ +#cmakedefine HAVE_UNISTD_H + +/* Name of package */ +#define PACKAGE "@PACKAGE_NAME@" + +/* Define to the address where bug reports for this package should be sent. */ +#cmakedefine PACKAGE_BUGREPORT "@PACKAGE_BUGREPORT@" + +/* Define to the full name of this package. */ +#cmakedefine PACKAGE_NAME "@PACKAGE_NAME@" + +/* Define to the full name and version of this package. */ +#cmakedefine PACKAGE_STRING "@PACKAGE_STRING@" + +/* Define to the one symbol short name of this package. */ +#cmakedefine PACKAGE_TARNAME "@PACKAGE_TARNAME@" + +/* Define to the home page for this package. */ +#define PACKAGE_URL "" + +/* Define to the version of this package. */ +#cmakedefine PACKAGE_VERSION "@PACKAGE_VERSION@" + +/* Define to 1 if you have the ANSI C header files. */ +#cmakedefine STDC_HEADERS + +/* whether byteorder is bigendian */ +#cmakedefine WORDS_BIGENDIAN + +/* Define to allow retrieving the byte offsets for attribute names and values. + */ +#cmakedefine XML_ATTR_INFO + +/* Define to specify how much context to retain around the current parse + point. */ +#cmakedefine XML_CONTEXT_BYTES @XML_CONTEXT_BYTES@ + +#if ! defined(_WIN32) +/* Define to include code reading entropy from `/dev/urandom'. */ + #cmakedefine XML_DEV_URANDOM +#endif + +/* Define to make parameter entity parsing functionality available. */ +#cmakedefine XML_DTD + +/* Define to make XML Namespaces functionality available. */ +#cmakedefine XML_NS + +/* Define to __FUNCTION__ or "" if `__func__' does not conform to ANSI C. */ +#ifdef _MSC_VER +# define __func__ __FUNCTION__ +#endif + +/* Define to `long' if does not define. */ +#cmakedefine off_t @OFF_T@ + +/* Define to `unsigned' if does not define. */ +#cmakedefine size_t @SIZE_T@ diff --git a/Modules/ThirdParty/Expat/src/expat/expat_external.h b/Modules/ThirdParty/Expat/src/expat/expat_external.h new file mode 100644 index 00000000000..8829f770910 --- /dev/null +++ b/Modules/ThirdParty/Expat/src/expat/expat_external.h @@ -0,0 +1,165 @@ +/* + __ __ _ + ___\ \/ /_ __ __ _| |_ + / _ \\ /| '_ \ / _` | __| + | __// \| |_) | (_| | |_ + \___/_/\_\ .__/ \__,_|\__| + |_| XML parser + + Copyright (c) 1997-2000 Thai Open Source Software Center Ltd + Copyright (c) 2000 Clark Cooper + Copyright (c) 2000-2004 Fred L. Drake, Jr. + Copyright (c) 2001-2002 Greg Stein + Copyright (c) 2002-2006 Karl Waclawek + Copyright (c) 2016 Cristian Rodríguez + Copyright (c) 2016-2019 Sebastian Pipping + Copyright (c) 2017 Rhodri James + Copyright (c) 2018 Yury Gribov + Licensed under the MIT license: + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to permit + persons to whom the Software is furnished to do so, subject to the + following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN + NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +#ifndef Expat_External_INCLUDED +#define Expat_External_INCLUDED 1 + +/* External API definitions */ + +/* Expat tries very hard to make the API boundary very specifically + defined. There are two macros defined to control this boundary; + each of these can be defined before including this header to + achieve some different behavior, but doing so it not recommended or + tested frequently. + + XMLCALL - The calling convention to use for all calls across the + "library boundary." This will default to cdecl, and + try really hard to tell the compiler that's what we + want. + + XMLIMPORT - Whatever magic is needed to note that a function is + to be imported from a dynamically loaded library + (.dll, .so, or .sl, depending on your platform). + + The XMLCALL macro was added in Expat 1.95.7. The only one which is + expected to be directly useful in client code is XMLCALL. + + Note that on at least some Unix versions, the Expat library must be + compiled with the cdecl calling convention as the default since + system headers may assume the cdecl convention. +*/ +#ifndef XMLCALL +# if defined(_MSC_VER) +# define XMLCALL __cdecl +# elif defined(__GNUC__) && defined(__i386) && ! defined(__INTEL_COMPILER) +# define XMLCALL __attribute__((cdecl)) +# else +/* For any platform which uses this definition and supports more than + one calling convention, we need to extend this definition to + declare the convention used on that platform, if it's possible to + do so. + + If this is the case for your platform, please file a bug report + with information on how to identify your platform via the C + pre-processor and how to specify the same calling convention as the + platform's malloc() implementation. +*/ +# define XMLCALL +# endif +#endif /* not defined XMLCALL */ + +#if ! defined(XML_STATIC) && ! defined(XMLIMPORT) +# ifndef XML_BUILDING_EXPAT +/* using Expat from an application */ + +# if defined(_MSC_EXTENSIONS) && ! defined(__BEOS__) && ! defined(__CYGWIN__) +# define XMLIMPORT __declspec(dllimport) +# endif + +# endif +#endif /* not defined XML_STATIC */ + +#ifndef XML_ENABLE_VISIBILITY +# define XML_ENABLE_VISIBILITY 0 +#endif + +#if ! defined(XMLIMPORT) && XML_ENABLE_VISIBILITY +# define XMLIMPORT __attribute__((visibility("default"))) +#endif + +/* If we didn't define it above, define it away: */ +#ifndef XMLIMPORT +# define XMLIMPORT +#endif + +#if defined(__GNUC__) \ + && (__GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 96)) +# define XML_ATTR_MALLOC __attribute__((__malloc__)) +#else +# define XML_ATTR_MALLOC +#endif + +#if defined(__GNUC__) \ + && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)) +# define XML_ATTR_ALLOC_SIZE(x) __attribute__((__alloc_size__(x))) +#else +# define XML_ATTR_ALLOC_SIZE(x) +#endif + +#define XMLPARSEAPI(type) XMLIMPORT type XMLCALL + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef XML_UNICODE_WCHAR_T +# ifndef XML_UNICODE +# define XML_UNICODE +# endif +# if defined(__SIZEOF_WCHAR_T__) && (__SIZEOF_WCHAR_T__ != 2) +# error "sizeof(wchar_t) != 2; Need -fshort-wchar for both Expat and libc" +# endif +#endif + +#ifdef XML_UNICODE /* Information is UTF-16 encoded. */ +# ifdef XML_UNICODE_WCHAR_T +typedef wchar_t XML_Char; +typedef wchar_t XML_LChar; +# else +typedef unsigned short XML_Char; +typedef char XML_LChar; +# endif /* XML_UNICODE_WCHAR_T */ +#else /* Information is UTF-8 encoded. */ +typedef char XML_Char; +typedef char XML_LChar; +#endif /* XML_UNICODE */ + +#ifdef XML_LARGE_SIZE /* Use large integers for file/stream positions. */ +typedef long long XML_Index; +typedef unsigned long long XML_Size; +#else +typedef long XML_Index; +typedef unsigned long XML_Size; +#endif /* XML_LARGE_SIZE */ + +#ifdef __cplusplus +} +#endif + +#endif /* not Expat_External_INCLUDED */ diff --git a/Modules/ThirdParty/Expat/src/expat/iasciitab.h b/Modules/ThirdParty/Expat/src/expat/iasciitab.h index 55dbc398b87..5d8646f2a31 100644 --- a/Modules/ThirdParty/Expat/src/expat/iasciitab.h +++ b/Modules/ThirdParty/Expat/src/expat/iasciitab.h @@ -1,38 +1,67 @@ /* -Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd -See the file COPYING for copying permission. + __ __ _ + ___\ \/ /_ __ __ _| |_ + / _ \\ /| '_ \ / _` | __| + | __// \| |_) | (_| | |_ + \___/_/\_\ .__/ \__,_|\__| + |_| XML parser + + Copyright (c) 1997-2000 Thai Open Source Software Center Ltd + Copyright (c) 2000 Clark Cooper + Copyright (c) 2002 Fred L. Drake, Jr. + Copyright (c) 2017 Sebastian Pipping + Licensed under the MIT license: + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to permit + persons to whom the Software is furnished to do so, subject to the + following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN + NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* Like asciitab.h, except that 0xD has code BT_S rather than BT_CR */ /* 0x00 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, -/* 0x04 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, -/* 0x08 */ BT_NONXML, BT_S, BT_LF, BT_NONXML, -/* 0x0C */ BT_NONXML, BT_S, BT_NONXML, BT_NONXML, -/* 0x10 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, -/* 0x14 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, -/* 0x18 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, -/* 0x1C */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, -/* 0x20 */ BT_S, BT_EXCL, BT_QUOT, BT_NUM, -/* 0x24 */ BT_OTHER, BT_PERCNT, BT_AMP, BT_APOS, -/* 0x28 */ BT_LPAR, BT_RPAR, BT_AST, BT_PLUS, -/* 0x2C */ BT_COMMA, BT_MINUS, BT_NAME, BT_SOL, -/* 0x30 */ BT_DIGIT, BT_DIGIT, BT_DIGIT, BT_DIGIT, -/* 0x34 */ BT_DIGIT, BT_DIGIT, BT_DIGIT, BT_DIGIT, -/* 0x38 */ BT_DIGIT, BT_DIGIT, BT_COLON, BT_SEMI, -/* 0x3C */ BT_LT, BT_EQUALS, BT_GT, BT_QUEST, -/* 0x40 */ BT_OTHER, BT_HEX, BT_HEX, BT_HEX, -/* 0x44 */ BT_HEX, BT_HEX, BT_HEX, BT_NMSTRT, -/* 0x48 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0x4C */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0x50 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0x54 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0x58 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_LSQB, -/* 0x5C */ BT_OTHER, BT_RSQB, BT_OTHER, BT_NMSTRT, -/* 0x60 */ BT_OTHER, BT_HEX, BT_HEX, BT_HEX, -/* 0x64 */ BT_HEX, BT_HEX, BT_HEX, BT_NMSTRT, -/* 0x68 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0x6C */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0x70 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0x74 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0x78 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_OTHER, -/* 0x7C */ BT_VERBAR, BT_OTHER, BT_OTHER, BT_OTHER, + /* 0x04 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, + /* 0x08 */ BT_NONXML, BT_S, BT_LF, BT_NONXML, + /* 0x0C */ BT_NONXML, BT_S, BT_NONXML, BT_NONXML, + /* 0x10 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, + /* 0x14 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, + /* 0x18 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, + /* 0x1C */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, + /* 0x20 */ BT_S, BT_EXCL, BT_QUOT, BT_NUM, + /* 0x24 */ BT_OTHER, BT_PERCNT, BT_AMP, BT_APOS, + /* 0x28 */ BT_LPAR, BT_RPAR, BT_AST, BT_PLUS, + /* 0x2C */ BT_COMMA, BT_MINUS, BT_NAME, BT_SOL, + /* 0x30 */ BT_DIGIT, BT_DIGIT, BT_DIGIT, BT_DIGIT, + /* 0x34 */ BT_DIGIT, BT_DIGIT, BT_DIGIT, BT_DIGIT, + /* 0x38 */ BT_DIGIT, BT_DIGIT, BT_COLON, BT_SEMI, + /* 0x3C */ BT_LT, BT_EQUALS, BT_GT, BT_QUEST, + /* 0x40 */ BT_OTHER, BT_HEX, BT_HEX, BT_HEX, + /* 0x44 */ BT_HEX, BT_HEX, BT_HEX, BT_NMSTRT, + /* 0x48 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0x4C */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0x50 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0x54 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0x58 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_LSQB, + /* 0x5C */ BT_OTHER, BT_RSQB, BT_OTHER, BT_NMSTRT, + /* 0x60 */ BT_OTHER, BT_HEX, BT_HEX, BT_HEX, + /* 0x64 */ BT_HEX, BT_HEX, BT_HEX, BT_NMSTRT, + /* 0x68 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0x6C */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0x70 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0x74 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0x78 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_OTHER, + /* 0x7C */ BT_VERBAR, BT_OTHER, BT_OTHER, BT_OTHER, diff --git a/Modules/ThirdParty/Expat/src/expat/internal.h b/Modules/ThirdParty/Expat/src/expat/internal.h new file mode 100644 index 00000000000..444eba0fb03 --- /dev/null +++ b/Modules/ThirdParty/Expat/src/expat/internal.h @@ -0,0 +1,163 @@ +/* internal.h + + Internal definitions used by Expat. This is not needed to compile + client code. + + The following calling convention macros are defined for frequently + called functions: + + FASTCALL - Used for those internal functions that have a simple + body and a low number of arguments and local variables. + + PTRCALL - Used for functions called though function pointers. + + PTRFASTCALL - Like PTRCALL, but for low number of arguments. + + inline - Used for selected internal functions for which inlining + may improve performance on some platforms. + + Note: Use of these macros is based on judgement, not hard rules, + and therefore subject to change. + __ __ _ + ___\ \/ /_ __ __ _| |_ + / _ \\ /| '_ \ / _` | __| + | __// \| |_) | (_| | |_ + \___/_/\_\ .__/ \__,_|\__| + |_| XML parser + + Copyright (c) 2002-2003 Fred L. Drake, Jr. + Copyright (c) 2002-2006 Karl Waclawek + Copyright (c) 2003 Greg Stein + Copyright (c) 2016-2021 Sebastian Pipping + Copyright (c) 2018 Yury Gribov + Copyright (c) 2019 David Loffredo + Licensed under the MIT license: + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to permit + persons to whom the Software is furnished to do so, subject to the + following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN + NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +#if defined(__GNUC__) && defined(__i386__) && ! defined(__MINGW32__) +/* We'll use this version by default only where we know it helps. + + regparm() generates warnings on Solaris boxes. See SF bug #692878. + + Instability reported with egcs on a RedHat Linux 7.3. + Let's comment out: + #define FASTCALL __attribute__((stdcall, regparm(3))) + and let's try this: +*/ +# define FASTCALL __attribute__((regparm(3))) +# define PTRFASTCALL __attribute__((regparm(3))) +#endif + +/* Using __fastcall seems to have an unexpected negative effect under + MS VC++, especially for function pointers, so we won't use it for + now on that platform. It may be reconsidered for a future release + if it can be made more effective. + Likely reason: __fastcall on Windows is like stdcall, therefore + the compiler cannot perform stack optimizations for call clusters. +*/ + +/* Make sure all of these are defined if they aren't already. */ + +#ifndef FASTCALL +# define FASTCALL +#endif + +#ifndef PTRCALL +# define PTRCALL +#endif + +#ifndef PTRFASTCALL +# define PTRFASTCALL +#endif + +#ifndef XML_MIN_SIZE +# if ! defined(__cplusplus) && ! defined(inline) +# ifdef __GNUC__ +# define inline __inline +# endif /* __GNUC__ */ +# endif +#endif /* XML_MIN_SIZE */ + +#ifdef __cplusplus +# define inline inline +#else +# ifndef inline +# define inline +# endif +#endif + +#include // ULONG_MAX + +#if defined(_WIN32) && ! defined(__USE_MINGW_ANSI_STDIO) +# define EXPAT_FMT_ULL(midpart) "%" midpart "I64u" +# if defined(_WIN64) // Note: modifiers "td" and "zu" do not work for MinGW +# define EXPAT_FMT_PTRDIFF_T(midpart) "%" midpart "I64d" +# define EXPAT_FMT_SIZE_T(midpart) "%" midpart "I64u" +# else +# define EXPAT_FMT_PTRDIFF_T(midpart) "%" midpart "d" +# define EXPAT_FMT_SIZE_T(midpart) "%" midpart "u" +# endif +#else +# define EXPAT_FMT_ULL(midpart) "%" midpart "llu" +# if ! defined(ULONG_MAX) +# error Compiler did not define ULONG_MAX for us +# elif ULONG_MAX == 18446744073709551615u // 2^64-1 +# define EXPAT_FMT_PTRDIFF_T(midpart) "%" midpart "ld" +# define EXPAT_FMT_SIZE_T(midpart) "%" midpart "lu" +# else +# define EXPAT_FMT_PTRDIFF_T(midpart) "%" midpart "d" +# define EXPAT_FMT_SIZE_T(midpart) "%" midpart "u" +# endif +#endif + +#ifndef UNUSED_P +# define UNUSED_P(p) (void)p +#endif + +/* NOTE BEGIN If you ever patch these defaults to greater values + for non-attack XML payload in your environment, + please file a bug report with libexpat. Thank you! +*/ +#define EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT \ + 100.0f +#define EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT \ + 8388608 // 8 MiB, 2^23 +/* NOTE END */ + +#include "expat.h" // so we can use type XML_Parser below + +#ifdef __cplusplus +extern "C" { +#endif + +void _INTERNAL_trim_to_complete_utf8_characters(const char *from, + const char **fromLimRef); + +#if defined(XML_DTD) +unsigned long long testingAccountingGetCountBytesDirect(XML_Parser parser); +unsigned long long testingAccountingGetCountBytesIndirect(XML_Parser parser); +const char *unsignedCharToPrintable(unsigned char c); +#endif + +#ifdef __cplusplus +} +#endif diff --git a/Modules/ThirdParty/Expat/src/expat/latin1tab.h b/Modules/ThirdParty/Expat/src/expat/latin1tab.h index 178b1d186dd..b681d278af6 100644 --- a/Modules/ThirdParty/Expat/src/expat/latin1tab.h +++ b/Modules/ThirdParty/Expat/src/expat/latin1tab.h @@ -1,37 +1,66 @@ /* -Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd -See the file COPYING for copying permission. + __ __ _ + ___\ \/ /_ __ __ _| |_ + / _ \\ /| '_ \ / _` | __| + | __// \| |_) | (_| | |_ + \___/_/\_\ .__/ \__,_|\__| + |_| XML parser + + Copyright (c) 1997-2000 Thai Open Source Software Center Ltd + Copyright (c) 2000 Clark Cooper + Copyright (c) 2002 Fred L. Drake, Jr. + Copyright (c) 2017 Sebastian Pipping + Licensed under the MIT license: + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to permit + persons to whom the Software is furnished to do so, subject to the + following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN + NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* 0x80 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, -/* 0x84 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, -/* 0x88 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, -/* 0x8C */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, -/* 0x90 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, -/* 0x94 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, -/* 0x98 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, -/* 0x9C */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, -/* 0xA0 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, -/* 0xA4 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, -/* 0xA8 */ BT_OTHER, BT_OTHER, BT_NMSTRT, BT_OTHER, -/* 0xAC */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, -/* 0xB0 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, -/* 0xB4 */ BT_OTHER, BT_NMSTRT, BT_OTHER, BT_NAME, -/* 0xB8 */ BT_OTHER, BT_OTHER, BT_NMSTRT, BT_OTHER, -/* 0xBC */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, -/* 0xC0 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0xC4 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0xC8 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0xCC */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0xD0 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0xD4 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_OTHER, -/* 0xD8 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0xDC */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0xE0 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0xE4 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0xE8 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0xEC */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0xF0 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0xF4 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_OTHER, -/* 0xF8 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, -/* 0xFC */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0x84 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, + /* 0x88 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, + /* 0x8C */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, + /* 0x90 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, + /* 0x94 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, + /* 0x98 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, + /* 0x9C */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, + /* 0xA0 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, + /* 0xA4 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, + /* 0xA8 */ BT_OTHER, BT_OTHER, BT_NMSTRT, BT_OTHER, + /* 0xAC */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, + /* 0xB0 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, + /* 0xB4 */ BT_OTHER, BT_NMSTRT, BT_OTHER, BT_NAME, + /* 0xB8 */ BT_OTHER, BT_OTHER, BT_NMSTRT, BT_OTHER, + /* 0xBC */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, + /* 0xC0 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0xC4 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0xC8 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0xCC */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0xD0 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0xD4 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_OTHER, + /* 0xD8 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0xDC */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0xE0 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0xE4 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0xE8 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0xEC */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0xF0 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0xF4 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_OTHER, + /* 0xF8 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, + /* 0xFC */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, diff --git a/Modules/ThirdParty/Expat/src/expat/nametab.h b/Modules/ThirdParty/Expat/src/expat/nametab.h index b05e62c77a6..63485446b96 100644 --- a/Modules/ThirdParty/Expat/src/expat/nametab.h +++ b/Modules/ThirdParty/Expat/src/expat/nametab.h @@ -1,150 +1,136 @@ +/* + __ __ _ + ___\ \/ /_ __ __ _| |_ + / _ \\ /| '_ \ / _` | __| + | __// \| |_) | (_| | |_ + \___/_/\_\ .__/ \__,_|\__| + |_| XML parser + + Copyright (c) 2000 Clark Cooper + Copyright (c) 2017 Sebastian Pipping + Licensed under the MIT license: + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to permit + persons to whom the Software is furnished to do so, subject to the + following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN + NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + static const unsigned namingBitmap[] = { -0x00000000, 0x00000000, 0x00000000, 0x00000000, -0x00000000, 0x00000000, 0x00000000, 0x00000000, -0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, -0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, -0x00000000, 0x04000000, 0x87FFFFFE, 0x07FFFFFE, -0x00000000, 0x00000000, 0xFF7FFFFF, 0xFF7FFFFF, -0xFFFFFFFF, 0x7FF3FFFF, 0xFFFFFDFE, 0x7FFFFFFF, -0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE00F, 0xFC31FFFF, -0x00FFFFFF, 0x00000000, 0xFFFF0000, 0xFFFFFFFF, -0xFFFFFFFF, 0xF80001FF, 0x00000003, 0x00000000, -0x00000000, 0x00000000, 0x00000000, 0x00000000, -0xFFFFD740, 0xFFFFFFFB, 0x547F7FFF, 0x000FFFFD, -0xFFFFDFFE, 0xFFFFFFFF, 0xDFFEFFFF, 0xFFFFFFFF, -0xFFFF0003, 0xFFFFFFFF, 0xFFFF199F, 0x033FCFFF, -0x00000000, 0xFFFE0000, 0x027FFFFF, 0xFFFFFFFE, -0x0000007F, 0x00000000, 0xFFFF0000, 0x000707FF, -0x00000000, 0x07FFFFFE, 0x000007FE, 0xFFFE0000, -0xFFFFFFFF, 0x7CFFFFFF, 0x002F7FFF, 0x00000060, -0xFFFFFFE0, 0x23FFFFFF, 0xFF000000, 0x00000003, -0xFFF99FE0, 0x03C5FDFF, 0xB0000000, 0x00030003, -0xFFF987E0, 0x036DFDFF, 0x5E000000, 0x001C0000, -0xFFFBAFE0, 0x23EDFDFF, 0x00000000, 0x00000001, -0xFFF99FE0, 0x23CDFDFF, 0xB0000000, 0x00000003, -0xD63DC7E0, 0x03BFC718, 0x00000000, 0x00000000, -0xFFFDDFE0, 0x03EFFDFF, 0x00000000, 0x00000003, -0xFFFDDFE0, 0x03EFFDFF, 0x40000000, 0x00000003, -0xFFFDDFE0, 0x03FFFDFF, 0x00000000, 0x00000003, -0x00000000, 0x00000000, 0x00000000, 0x00000000, -0xFFFFFFFE, 0x000D7FFF, 0x0000003F, 0x00000000, -0xFEF02596, 0x200D6CAE, 0x0000001F, 0x00000000, -0x00000000, 0x00000000, 0xFFFFFEFF, 0x000003FF, -0x00000000, 0x00000000, 0x00000000, 0x00000000, -0x00000000, 0x00000000, 0x00000000, 0x00000000, -0x00000000, 0xFFFFFFFF, 0xFFFF003F, 0x007FFFFF, -0x0007DAED, 0x50000000, 0x82315001, 0x002C62AB, -0x40000000, 0xF580C900, 0x00000007, 0x02010800, -0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, -0x0FFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x03FFFFFF, -0x3F3FFFFF, 0xFFFFFFFF, 0xAAFF3F3F, 0x3FFFFFFF, -0xFFFFFFFF, 0x5FDFFFFF, 0x0FCF1FDC, 0x1FDC1FFF, -0x00000000, 0x00004C40, 0x00000000, 0x00000000, -0x00000007, 0x00000000, 0x00000000, 0x00000000, -0x00000080, 0x000003FE, 0xFFFFFFFE, 0xFFFFFFFF, -0x001FFFFF, 0xFFFFFFFE, 0xFFFFFFFF, 0x07FFFFFF, -0xFFFFFFE0, 0x00001FFF, 0x00000000, 0x00000000, -0x00000000, 0x00000000, 0x00000000, 0x00000000, -0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, -0xFFFFFFFF, 0x0000003F, 0x00000000, 0x00000000, -0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, -0xFFFFFFFF, 0x0000000F, 0x00000000, 0x00000000, -0x00000000, 0x07FF6000, 0x87FFFFFE, 0x07FFFFFE, -0x00000000, 0x00800000, 0xFF7FFFFF, 0xFF7FFFFF, -0x00FFFFFF, 0x00000000, 0xFFFF0000, 0xFFFFFFFF, -0xFFFFFFFF, 0xF80001FF, 0x00030003, 0x00000000, -0xFFFFFFFF, 0xFFFFFFFF, 0x0000003F, 0x00000003, -0xFFFFD7C0, 0xFFFFFFFB, 0x547F7FFF, 0x000FFFFD, -0xFFFFDFFE, 0xFFFFFFFF, 0xDFFEFFFF, 0xFFFFFFFF, -0xFFFF007B, 0xFFFFFFFF, 0xFFFF199F, 0x033FCFFF, -0x00000000, 0xFFFE0000, 0x027FFFFF, 0xFFFFFFFE, -0xFFFE007F, 0xBBFFFFFB, 0xFFFF0016, 0x000707FF, -0x00000000, 0x07FFFFFE, 0x0007FFFF, 0xFFFF03FF, -0xFFFFFFFF, 0x7CFFFFFF, 0xFFEF7FFF, 0x03FF3DFF, -0xFFFFFFEE, 0xF3FFFFFF, 0xFF1E3FFF, 0x0000FFCF, -0xFFF99FEE, 0xD3C5FDFF, 0xB080399F, 0x0003FFCF, -0xFFF987E4, 0xD36DFDFF, 0x5E003987, 0x001FFFC0, -0xFFFBAFEE, 0xF3EDFDFF, 0x00003BBF, 0x0000FFC1, -0xFFF99FEE, 0xF3CDFDFF, 0xB0C0398F, 0x0000FFC3, -0xD63DC7EC, 0xC3BFC718, 0x00803DC7, 0x0000FF80, -0xFFFDDFEE, 0xC3EFFDFF, 0x00603DDF, 0x0000FFC3, -0xFFFDDFEC, 0xC3EFFDFF, 0x40603DDF, 0x0000FFC3, -0xFFFDDFEC, 0xC3FFFDFF, 0x00803DCF, 0x0000FFC3, -0x00000000, 0x00000000, 0x00000000, 0x00000000, -0xFFFFFFFE, 0x07FF7FFF, 0x03FF7FFF, 0x00000000, -0xFEF02596, 0x3BFF6CAE, 0x03FF3F5F, 0x00000000, -0x03000000, 0xC2A003FF, 0xFFFFFEFF, 0xFFFE03FF, -0xFEBF0FDF, 0x02FE3FFF, 0x00000000, 0x00000000, -0x00000000, 0x00000000, 0x00000000, 0x00000000, -0x00000000, 0x00000000, 0x1FFF0000, 0x00000002, -0x000000A0, 0x003EFFFE, 0xFFFFFFFE, 0xFFFFFFFF, -0x661FFFFF, 0xFFFFFFFE, 0xFFFFFFFF, 0x77FFFFFF, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x04000000, + 0x87FFFFFE, 0x07FFFFFE, 0x00000000, 0x00000000, 0xFF7FFFFF, 0xFF7FFFFF, + 0xFFFFFFFF, 0x7FF3FFFF, 0xFFFFFDFE, 0x7FFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFE00F, 0xFC31FFFF, 0x00FFFFFF, 0x00000000, 0xFFFF0000, 0xFFFFFFFF, + 0xFFFFFFFF, 0xF80001FF, 0x00000003, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0xFFFFD740, 0xFFFFFFFB, 0x547F7FFF, 0x000FFFFD, + 0xFFFFDFFE, 0xFFFFFFFF, 0xDFFEFFFF, 0xFFFFFFFF, 0xFFFF0003, 0xFFFFFFFF, + 0xFFFF199F, 0x033FCFFF, 0x00000000, 0xFFFE0000, 0x027FFFFF, 0xFFFFFFFE, + 0x0000007F, 0x00000000, 0xFFFF0000, 0x000707FF, 0x00000000, 0x07FFFFFE, + 0x000007FE, 0xFFFE0000, 0xFFFFFFFF, 0x7CFFFFFF, 0x002F7FFF, 0x00000060, + 0xFFFFFFE0, 0x23FFFFFF, 0xFF000000, 0x00000003, 0xFFF99FE0, 0x03C5FDFF, + 0xB0000000, 0x00030003, 0xFFF987E0, 0x036DFDFF, 0x5E000000, 0x001C0000, + 0xFFFBAFE0, 0x23EDFDFF, 0x00000000, 0x00000001, 0xFFF99FE0, 0x23CDFDFF, + 0xB0000000, 0x00000003, 0xD63DC7E0, 0x03BFC718, 0x00000000, 0x00000000, + 0xFFFDDFE0, 0x03EFFDFF, 0x00000000, 0x00000003, 0xFFFDDFE0, 0x03EFFDFF, + 0x40000000, 0x00000003, 0xFFFDDFE0, 0x03FFFDFF, 0x00000000, 0x00000003, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xFFFFFFFE, 0x000D7FFF, + 0x0000003F, 0x00000000, 0xFEF02596, 0x200D6CAE, 0x0000001F, 0x00000000, + 0x00000000, 0x00000000, 0xFFFFFEFF, 0x000003FF, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0xFFFFFFFF, 0xFFFF003F, 0x007FFFFF, 0x0007DAED, 0x50000000, + 0x82315001, 0x002C62AB, 0x40000000, 0xF580C900, 0x00000007, 0x02010800, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x0FFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0x03FFFFFF, 0x3F3FFFFF, 0xFFFFFFFF, 0xAAFF3F3F, 0x3FFFFFFF, + 0xFFFFFFFF, 0x5FDFFFFF, 0x0FCF1FDC, 0x1FDC1FFF, 0x00000000, 0x00004C40, + 0x00000000, 0x00000000, 0x00000007, 0x00000000, 0x00000000, 0x00000000, + 0x00000080, 0x000003FE, 0xFFFFFFFE, 0xFFFFFFFF, 0x001FFFFF, 0xFFFFFFFE, + 0xFFFFFFFF, 0x07FFFFFF, 0xFFFFFFE0, 0x00001FFF, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x0000003F, 0x00000000, 0x00000000, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x0000000F, + 0x00000000, 0x00000000, 0x00000000, 0x07FF6000, 0x87FFFFFE, 0x07FFFFFE, + 0x00000000, 0x00800000, 0xFF7FFFFF, 0xFF7FFFFF, 0x00FFFFFF, 0x00000000, + 0xFFFF0000, 0xFFFFFFFF, 0xFFFFFFFF, 0xF80001FF, 0x00030003, 0x00000000, + 0xFFFFFFFF, 0xFFFFFFFF, 0x0000003F, 0x00000003, 0xFFFFD7C0, 0xFFFFFFFB, + 0x547F7FFF, 0x000FFFFD, 0xFFFFDFFE, 0xFFFFFFFF, 0xDFFEFFFF, 0xFFFFFFFF, + 0xFFFF007B, 0xFFFFFFFF, 0xFFFF199F, 0x033FCFFF, 0x00000000, 0xFFFE0000, + 0x027FFFFF, 0xFFFFFFFE, 0xFFFE007F, 0xBBFFFFFB, 0xFFFF0016, 0x000707FF, + 0x00000000, 0x07FFFFFE, 0x0007FFFF, 0xFFFF03FF, 0xFFFFFFFF, 0x7CFFFFFF, + 0xFFEF7FFF, 0x03FF3DFF, 0xFFFFFFEE, 0xF3FFFFFF, 0xFF1E3FFF, 0x0000FFCF, + 0xFFF99FEE, 0xD3C5FDFF, 0xB080399F, 0x0003FFCF, 0xFFF987E4, 0xD36DFDFF, + 0x5E003987, 0x001FFFC0, 0xFFFBAFEE, 0xF3EDFDFF, 0x00003BBF, 0x0000FFC1, + 0xFFF99FEE, 0xF3CDFDFF, 0xB0C0398F, 0x0000FFC3, 0xD63DC7EC, 0xC3BFC718, + 0x00803DC7, 0x0000FF80, 0xFFFDDFEE, 0xC3EFFDFF, 0x00603DDF, 0x0000FFC3, + 0xFFFDDFEC, 0xC3EFFDFF, 0x40603DDF, 0x0000FFC3, 0xFFFDDFEC, 0xC3FFFDFF, + 0x00803DCF, 0x0000FFC3, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0xFFFFFFFE, 0x07FF7FFF, 0x03FF7FFF, 0x00000000, 0xFEF02596, 0x3BFF6CAE, + 0x03FF3F5F, 0x00000000, 0x03000000, 0xC2A003FF, 0xFFFFFEFF, 0xFFFE03FF, + 0xFEBF0FDF, 0x02FE3FFF, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1FFF0000, 0x00000002, + 0x000000A0, 0x003EFFFE, 0xFFFFFFFE, 0xFFFFFFFF, 0x661FFFFF, 0xFFFFFFFE, + 0xFFFFFFFF, 0x77FFFFFF, }; static const unsigned char nmstrtPages[] = { -0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x00, -0x00, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, -0x10, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x12, 0x13, -0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x15, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x17, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x18, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x00, 0x00, 0x09, 0x0A, 0x0B, + 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x12, 0x13, 0x00, 0x14, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x15, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x17, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x18, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, }; static const unsigned char namePages[] = { -0x19, 0x03, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x00, -0x00, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, -0x10, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x12, 0x13, -0x26, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x27, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x17, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, -0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x18, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x19, 0x03, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x00, 0x00, 0x1F, 0x20, 0x21, + 0x22, 0x23, 0x24, 0x25, 0x10, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x12, 0x13, 0x26, 0x14, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x27, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x17, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x18, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, }; diff --git a/Modules/ThirdParty/Expat/src/expat/siphash.h b/Modules/ThirdParty/Expat/src/expat/siphash.h new file mode 100644 index 00000000000..e5406d7ee9e --- /dev/null +++ b/Modules/ThirdParty/Expat/src/expat/siphash.h @@ -0,0 +1,393 @@ +/* ========================================================================== + * siphash.h - SipHash-2-4 in a single header file + * -------------------------------------------------------------------------- + * Derived by William Ahern from the reference implementation[1] published[2] + * by Jean-Philippe Aumasson and Daniel J. Berstein. + * Minimal changes by Sebastian Pipping and Victor Stinner on top, see below. + * Licensed under the CC0 Public Domain Dedication license. + * + * 1. https://www.131002.net/siphash/siphash24.c + * 2. https://www.131002.net/siphash/ + * -------------------------------------------------------------------------- + * HISTORY: + * + * 2020-10-03 (Sebastian Pipping) + * - Drop support for Visual Studio 9.0/2008 and earlier + * + * 2019-08-03 (Sebastian Pipping) + * - Mark part of sip24_valid as to be excluded from clang-format + * - Re-format code using clang-format 9 + * + * 2018-07-08 (Anton Maklakov) + * - Add "fall through" markers for GCC's -Wimplicit-fallthrough + * + * 2017-11-03 (Sebastian Pipping) + * - Hide sip_tobin and sip_binof unless SIPHASH_TOBIN macro is defined + * + * 2017-07-25 (Vadim Zeitlin) + * - Fix use of SIPHASH_MAIN macro + * + * 2017-07-05 (Sebastian Pipping) + * - Use _SIP_ULL macro to not require a C++11 compiler if compiled as C++ + * - Add const qualifiers at two places + * - Ensure <=80 characters line length (assuming tab width 4) + * + * 2017-06-23 (Victor Stinner) + * - Address Win64 compile warnings + * + * 2017-06-18 (Sebastian Pipping) + * - Clarify license note in the header + * - Address C89 issues: + * - Stop using inline keyword (and let compiler decide) + * - Replace _Bool by int + * - Turn macro siphash24 into a function + * - Address invalid conversion (void pointer) by explicit cast + * - Address lack of stdint.h for Visual Studio 2003 to 2008 + * - Always expose sip24_valid (for self-tests) + * + * 2012-11-04 - Born. (William Ahern) + * -------------------------------------------------------------------------- + * USAGE: + * + * SipHash-2-4 takes as input two 64-bit words as the key, some number of + * message bytes, and outputs a 64-bit word as the message digest. This + * implementation employs two data structures: a struct sipkey for + * representing the key, and a struct siphash for representing the hash + * state. + * + * For converting a 16-byte unsigned char array to a key, use either the + * macro sip_keyof or the routine sip_tokey. The former instantiates a + * compound literal key, while the latter requires a key object as a + * parameter. + * + * unsigned char secret[16]; + * arc4random_buf(secret, sizeof secret); + * struct sipkey *key = sip_keyof(secret); + * + * For hashing a message, use either the convenience macro siphash24 or the + * routines sip24_init, sip24_update, and sip24_final. + * + * struct siphash state; + * void *msg; + * size_t len; + * uint64_t hash; + * + * sip24_init(&state, key); + * sip24_update(&state, msg, len); + * hash = sip24_final(&state); + * + * or + * + * hash = siphash24(msg, len, key); + * + * To convert the 64-bit hash value to a canonical 8-byte little-endian + * binary representation, use either the macro sip_binof or the routine + * sip_tobin. The former instantiates and returns a compound literal array, + * while the latter requires an array object as a parameter. + * -------------------------------------------------------------------------- + * NOTES: + * + * o Neither sip_keyof, sip_binof, nor siphash24 will work with compilers + * lacking compound literal support. Instead, you must use the lower-level + * interfaces which take as parameters the temporary state objects. + * + * o Uppercase macros may evaluate parameters more than once. Lowercase + * macros should not exhibit any such side effects. + * ========================================================================== + */ +#ifndef SIPHASH_H +#define SIPHASH_H + +#include /* size_t */ +#include /* uint64_t uint32_t uint8_t */ + +/* + * Workaround to not require a C++11 compiler for using ULL suffix + * if this code is included and compiled as C++; related GCC warning is: + * warning: use of C++11 long long integer constant [-Wlong-long] + */ +#define _SIP_ULL(high, low) (((uint64_t)high << 32) | low) + +#define SIP_ROTL(x, b) (uint64_t)(((x) << (b)) | ((x) >> (64 - (b)))) + +#define SIP_U32TO8_LE(p, v) \ + (p)[0] = (uint8_t)((v) >> 0); \ + (p)[1] = (uint8_t)((v) >> 8); \ + (p)[2] = (uint8_t)((v) >> 16); \ + (p)[3] = (uint8_t)((v) >> 24); + +#define SIP_U64TO8_LE(p, v) \ + SIP_U32TO8_LE((p) + 0, (uint32_t)((v) >> 0)); \ + SIP_U32TO8_LE((p) + 4, (uint32_t)((v) >> 32)); + +#define SIP_U8TO64_LE(p) \ + (((uint64_t)((p)[0]) << 0) | ((uint64_t)((p)[1]) << 8) \ + | ((uint64_t)((p)[2]) << 16) | ((uint64_t)((p)[3]) << 24) \ + | ((uint64_t)((p)[4]) << 32) | ((uint64_t)((p)[5]) << 40) \ + | ((uint64_t)((p)[6]) << 48) | ((uint64_t)((p)[7]) << 56)) + +#define SIPHASH_INITIALIZER \ + { 0, 0, 0, 0, {0}, 0, 0 } + +struct siphash { + uint64_t v0, v1, v2, v3; + + unsigned char buf[8], *p; + uint64_t c; +}; /* struct siphash */ + +#define SIP_KEYLEN 16 + +struct sipkey { + uint64_t k[2]; +}; /* struct sipkey */ + +#define sip_keyof(k) sip_tokey(&(struct sipkey){{0}}, (k)) + +static struct sipkey * +sip_tokey(struct sipkey *key, const void *src) { + key->k[0] = SIP_U8TO64_LE((const unsigned char *)src); + key->k[1] = SIP_U8TO64_LE((const unsigned char *)src + 8); + return key; +} /* sip_tokey() */ + +#ifdef SIPHASH_TOBIN + +# define sip_binof(v) sip_tobin((unsigned char[8]){0}, (v)) + +static void * +sip_tobin(void *dst, uint64_t u64) { + SIP_U64TO8_LE((unsigned char *)dst, u64); + return dst; +} /* sip_tobin() */ + +#endif /* SIPHASH_TOBIN */ + +static void +sip_round(struct siphash *H, const int rounds) { + int i; + + for (i = 0; i < rounds; i++) { + H->v0 += H->v1; + H->v1 = SIP_ROTL(H->v1, 13); + H->v1 ^= H->v0; + H->v0 = SIP_ROTL(H->v0, 32); + + H->v2 += H->v3; + H->v3 = SIP_ROTL(H->v3, 16); + H->v3 ^= H->v2; + + H->v0 += H->v3; + H->v3 = SIP_ROTL(H->v3, 21); + H->v3 ^= H->v0; + + H->v2 += H->v1; + H->v1 = SIP_ROTL(H->v1, 17); + H->v1 ^= H->v2; + H->v2 = SIP_ROTL(H->v2, 32); + } +} /* sip_round() */ + +static struct siphash * +sip24_init(struct siphash *H, const struct sipkey *key) { + H->v0 = _SIP_ULL(0x736f6d65U, 0x70736575U) ^ key->k[0]; + H->v1 = _SIP_ULL(0x646f7261U, 0x6e646f6dU) ^ key->k[1]; + H->v2 = _SIP_ULL(0x6c796765U, 0x6e657261U) ^ key->k[0]; + H->v3 = _SIP_ULL(0x74656462U, 0x79746573U) ^ key->k[1]; + + H->p = H->buf; + H->c = 0; + + return H; +} /* sip24_init() */ + +#define sip_endof(a) (&(a)[sizeof(a) / sizeof *(a)]) + +static struct siphash * +sip24_update(struct siphash *H, const void *src, size_t len) { + const unsigned char *p = (const unsigned char *)src, *pe = p + len; + uint64_t m; + + do { + while (p < pe && H->p < sip_endof(H->buf)) + *H->p++ = *p++; + + if (H->p < sip_endof(H->buf)) + break; + + m = SIP_U8TO64_LE(H->buf); + H->v3 ^= m; + sip_round(H, 2); + H->v0 ^= m; + + H->p = H->buf; + H->c += 8; + } while (p < pe); + + return H; +} /* sip24_update() */ + +static uint64_t +sip24_final(struct siphash *H) { + const char left = (char)(H->p - H->buf); + uint64_t b = (H->c + left) << 56; + + switch (left) { + case 7: + b |= (uint64_t)H->buf[6] << 48; + /* fall through */ + case 6: + b |= (uint64_t)H->buf[5] << 40; + /* fall through */ + case 5: + b |= (uint64_t)H->buf[4] << 32; + /* fall through */ + case 4: + b |= (uint64_t)H->buf[3] << 24; + /* fall through */ + case 3: + b |= (uint64_t)H->buf[2] << 16; + /* fall through */ + case 2: + b |= (uint64_t)H->buf[1] << 8; + /* fall through */ + case 1: + b |= (uint64_t)H->buf[0] << 0; + /* fall through */ + case 0: + break; + } + + H->v3 ^= b; + sip_round(H, 2); + H->v0 ^= b; + H->v2 ^= 0xff; + sip_round(H, 4); + + return H->v0 ^ H->v1 ^ H->v2 ^ H->v3; +} /* sip24_final() */ + +static uint64_t +siphash24(const void *src, size_t len, const struct sipkey *key) { + struct siphash state = SIPHASH_INITIALIZER; + return sip24_final(sip24_update(sip24_init(&state, key), src, len)); +} /* siphash24() */ + +/* + * SipHash-2-4 output with + * k = 00 01 02 ... + * and + * in = (empty string) + * in = 00 (1 byte) + * in = 00 01 (2 bytes) + * in = 00 01 02 (3 bytes) + * ... + * in = 00 01 02 ... 3e (63 bytes) + */ +static int +sip24_valid(void) { + /* clang-format off */ + static const unsigned char vectors[64][8] = { + { 0x31, 0x0e, 0x0e, 0xdd, 0x47, 0xdb, 0x6f, 0x72, }, + { 0xfd, 0x67, 0xdc, 0x93, 0xc5, 0x39, 0xf8, 0x74, }, + { 0x5a, 0x4f, 0xa9, 0xd9, 0x09, 0x80, 0x6c, 0x0d, }, + { 0x2d, 0x7e, 0xfb, 0xd7, 0x96, 0x66, 0x67, 0x85, }, + { 0xb7, 0x87, 0x71, 0x27, 0xe0, 0x94, 0x27, 0xcf, }, + { 0x8d, 0xa6, 0x99, 0xcd, 0x64, 0x55, 0x76, 0x18, }, + { 0xce, 0xe3, 0xfe, 0x58, 0x6e, 0x46, 0xc9, 0xcb, }, + { 0x37, 0xd1, 0x01, 0x8b, 0xf5, 0x00, 0x02, 0xab, }, + { 0x62, 0x24, 0x93, 0x9a, 0x79, 0xf5, 0xf5, 0x93, }, + { 0xb0, 0xe4, 0xa9, 0x0b, 0xdf, 0x82, 0x00, 0x9e, }, + { 0xf3, 0xb9, 0xdd, 0x94, 0xc5, 0xbb, 0x5d, 0x7a, }, + { 0xa7, 0xad, 0x6b, 0x22, 0x46, 0x2f, 0xb3, 0xf4, }, + { 0xfb, 0xe5, 0x0e, 0x86, 0xbc, 0x8f, 0x1e, 0x75, }, + { 0x90, 0x3d, 0x84, 0xc0, 0x27, 0x56, 0xea, 0x14, }, + { 0xee, 0xf2, 0x7a, 0x8e, 0x90, 0xca, 0x23, 0xf7, }, + { 0xe5, 0x45, 0xbe, 0x49, 0x61, 0xca, 0x29, 0xa1, }, + { 0xdb, 0x9b, 0xc2, 0x57, 0x7f, 0xcc, 0x2a, 0x3f, }, + { 0x94, 0x47, 0xbe, 0x2c, 0xf5, 0xe9, 0x9a, 0x69, }, + { 0x9c, 0xd3, 0x8d, 0x96, 0xf0, 0xb3, 0xc1, 0x4b, }, + { 0xbd, 0x61, 0x79, 0xa7, 0x1d, 0xc9, 0x6d, 0xbb, }, + { 0x98, 0xee, 0xa2, 0x1a, 0xf2, 0x5c, 0xd6, 0xbe, }, + { 0xc7, 0x67, 0x3b, 0x2e, 0xb0, 0xcb, 0xf2, 0xd0, }, + { 0x88, 0x3e, 0xa3, 0xe3, 0x95, 0x67, 0x53, 0x93, }, + { 0xc8, 0xce, 0x5c, 0xcd, 0x8c, 0x03, 0x0c, 0xa8, }, + { 0x94, 0xaf, 0x49, 0xf6, 0xc6, 0x50, 0xad, 0xb8, }, + { 0xea, 0xb8, 0x85, 0x8a, 0xde, 0x92, 0xe1, 0xbc, }, + { 0xf3, 0x15, 0xbb, 0x5b, 0xb8, 0x35, 0xd8, 0x17, }, + { 0xad, 0xcf, 0x6b, 0x07, 0x63, 0x61, 0x2e, 0x2f, }, + { 0xa5, 0xc9, 0x1d, 0xa7, 0xac, 0xaa, 0x4d, 0xde, }, + { 0x71, 0x65, 0x95, 0x87, 0x66, 0x50, 0xa2, 0xa6, }, + { 0x28, 0xef, 0x49, 0x5c, 0x53, 0xa3, 0x87, 0xad, }, + { 0x42, 0xc3, 0x41, 0xd8, 0xfa, 0x92, 0xd8, 0x32, }, + { 0xce, 0x7c, 0xf2, 0x72, 0x2f, 0x51, 0x27, 0x71, }, + { 0xe3, 0x78, 0x59, 0xf9, 0x46, 0x23, 0xf3, 0xa7, }, + { 0x38, 0x12, 0x05, 0xbb, 0x1a, 0xb0, 0xe0, 0x12, }, + { 0xae, 0x97, 0xa1, 0x0f, 0xd4, 0x34, 0xe0, 0x15, }, + { 0xb4, 0xa3, 0x15, 0x08, 0xbe, 0xff, 0x4d, 0x31, }, + { 0x81, 0x39, 0x62, 0x29, 0xf0, 0x90, 0x79, 0x02, }, + { 0x4d, 0x0c, 0xf4, 0x9e, 0xe5, 0xd4, 0xdc, 0xca, }, + { 0x5c, 0x73, 0x33, 0x6a, 0x76, 0xd8, 0xbf, 0x9a, }, + { 0xd0, 0xa7, 0x04, 0x53, 0x6b, 0xa9, 0x3e, 0x0e, }, + { 0x92, 0x59, 0x58, 0xfc, 0xd6, 0x42, 0x0c, 0xad, }, + { 0xa9, 0x15, 0xc2, 0x9b, 0xc8, 0x06, 0x73, 0x18, }, + { 0x95, 0x2b, 0x79, 0xf3, 0xbc, 0x0a, 0xa6, 0xd4, }, + { 0xf2, 0x1d, 0xf2, 0xe4, 0x1d, 0x45, 0x35, 0xf9, }, + { 0x87, 0x57, 0x75, 0x19, 0x04, 0x8f, 0x53, 0xa9, }, + { 0x10, 0xa5, 0x6c, 0xf5, 0xdf, 0xcd, 0x9a, 0xdb, }, + { 0xeb, 0x75, 0x09, 0x5c, 0xcd, 0x98, 0x6c, 0xd0, }, + { 0x51, 0xa9, 0xcb, 0x9e, 0xcb, 0xa3, 0x12, 0xe6, }, + { 0x96, 0xaf, 0xad, 0xfc, 0x2c, 0xe6, 0x66, 0xc7, }, + { 0x72, 0xfe, 0x52, 0x97, 0x5a, 0x43, 0x64, 0xee, }, + { 0x5a, 0x16, 0x45, 0xb2, 0x76, 0xd5, 0x92, 0xa1, }, + { 0xb2, 0x74, 0xcb, 0x8e, 0xbf, 0x87, 0x87, 0x0a, }, + { 0x6f, 0x9b, 0xb4, 0x20, 0x3d, 0xe7, 0xb3, 0x81, }, + { 0xea, 0xec, 0xb2, 0xa3, 0x0b, 0x22, 0xa8, 0x7f, }, + { 0x99, 0x24, 0xa4, 0x3c, 0xc1, 0x31, 0x57, 0x24, }, + { 0xbd, 0x83, 0x8d, 0x3a, 0xaf, 0xbf, 0x8d, 0xb7, }, + { 0x0b, 0x1a, 0x2a, 0x32, 0x65, 0xd5, 0x1a, 0xea, }, + { 0x13, 0x50, 0x79, 0xa3, 0x23, 0x1c, 0xe6, 0x60, }, + { 0x93, 0x2b, 0x28, 0x46, 0xe4, 0xd7, 0x06, 0x66, }, + { 0xe1, 0x91, 0x5f, 0x5c, 0xb1, 0xec, 0xa4, 0x6c, }, + { 0xf3, 0x25, 0x96, 0x5c, 0xa1, 0x6d, 0x62, 0x9f, }, + { 0x57, 0x5f, 0xf2, 0x8e, 0x60, 0x38, 0x1b, 0xe5, }, + { 0x72, 0x45, 0x06, 0xeb, 0x4c, 0x32, 0x8a, 0x95, } + }; + /* clang-format on */ + + unsigned char in[64]; + struct sipkey k; + size_t i; + + sip_tokey(&k, "\000\001\002\003\004\005\006\007\010\011" + "\012\013\014\015\016\017"); + + for (i = 0; i < sizeof in; ++i) { + in[i] = (unsigned char)i; + + if (siphash24(in, i, &k) != SIP_U8TO64_LE(vectors[i])) + return 0; + } + + return 1; +} /* sip24_valid() */ + +#ifdef SIPHASH_MAIN + +# include + +int +main(void) { + const int ok = sip24_valid(); + + if (ok) + puts("OK"); + else + puts("FAIL"); + + return ! ok; +} /* main() */ + +#endif /* SIPHASH_MAIN */ + +#endif /* SIPHASH_H */ diff --git a/Modules/ThirdParty/Expat/src/expat/utf8tab.h b/Modules/ThirdParty/Expat/src/expat/utf8tab.h index 9e3b6b83eb6..88efcf91cc1 100644 --- a/Modules/ThirdParty/Expat/src/expat/utf8tab.h +++ b/Modules/ThirdParty/Expat/src/expat/utf8tab.h @@ -1,38 +1,66 @@ /* -Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd -See the file COPYING for copying permission. -*/ + __ __ _ + ___\ \/ /_ __ __ _| |_ + / _ \\ /| '_ \ / _` | __| + | __// \| |_) | (_| | |_ + \___/_/\_\ .__/ \__,_|\__| + |_| XML parser + + Copyright (c) 1997-2000 Thai Open Source Software Center Ltd + Copyright (c) 2000 Clark Cooper + Copyright (c) 2002 Fred L. Drake, Jr. + Copyright (c) 2017 Sebastian Pipping + Licensed under the MIT license: + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to permit + persons to whom the Software is furnished to do so, subject to the + following conditions: + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN + NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ /* 0x80 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, -/* 0x84 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, -/* 0x88 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, -/* 0x8C */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, -/* 0x90 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, -/* 0x94 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, -/* 0x98 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, -/* 0x9C */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, -/* 0xA0 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, -/* 0xA4 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, -/* 0xA8 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, -/* 0xAC */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, -/* 0xB0 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, -/* 0xB4 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, -/* 0xB8 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, -/* 0xBC */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, -/* 0xC0 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, -/* 0xC4 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, -/* 0xC8 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, -/* 0xCC */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, -/* 0xD0 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, -/* 0xD4 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, -/* 0xD8 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, -/* 0xDC */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, -/* 0xE0 */ BT_LEAD3, BT_LEAD3, BT_LEAD3, BT_LEAD3, -/* 0xE4 */ BT_LEAD3, BT_LEAD3, BT_LEAD3, BT_LEAD3, -/* 0xE8 */ BT_LEAD3, BT_LEAD3, BT_LEAD3, BT_LEAD3, -/* 0xEC */ BT_LEAD3, BT_LEAD3, BT_LEAD3, BT_LEAD3, -/* 0xF0 */ BT_LEAD4, BT_LEAD4, BT_LEAD4, BT_LEAD4, -/* 0xF4 */ BT_LEAD4, BT_NONXML, BT_NONXML, BT_NONXML, -/* 0xF8 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, -/* 0xFC */ BT_NONXML, BT_NONXML, BT_MALFORM, BT_MALFORM, + /* 0x84 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, + /* 0x88 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, + /* 0x8C */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, + /* 0x90 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, + /* 0x94 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, + /* 0x98 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, + /* 0x9C */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, + /* 0xA0 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, + /* 0xA4 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, + /* 0xA8 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, + /* 0xAC */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, + /* 0xB0 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, + /* 0xB4 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, + /* 0xB8 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, + /* 0xBC */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, + /* 0xC0 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, + /* 0xC4 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, + /* 0xC8 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, + /* 0xCC */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, + /* 0xD0 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, + /* 0xD4 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, + /* 0xD8 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, + /* 0xDC */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, + /* 0xE0 */ BT_LEAD3, BT_LEAD3, BT_LEAD3, BT_LEAD3, + /* 0xE4 */ BT_LEAD3, BT_LEAD3, BT_LEAD3, BT_LEAD3, + /* 0xE8 */ BT_LEAD3, BT_LEAD3, BT_LEAD3, BT_LEAD3, + /* 0xEC */ BT_LEAD3, BT_LEAD3, BT_LEAD3, BT_LEAD3, + /* 0xF0 */ BT_LEAD4, BT_LEAD4, BT_LEAD4, BT_LEAD4, + /* 0xF4 */ BT_LEAD4, BT_NONXML, BT_NONXML, BT_NONXML, + /* 0xF8 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, + /* 0xFC */ BT_NONXML, BT_NONXML, BT_MALFORM, BT_MALFORM, diff --git a/Modules/ThirdParty/Expat/src/expat/winconfig.h b/Modules/ThirdParty/Expat/src/expat/winconfig.h new file mode 100644 index 00000000000..2ecd61b5b94 --- /dev/null +++ b/Modules/ThirdParty/Expat/src/expat/winconfig.h @@ -0,0 +1,45 @@ +/* + __ __ _ + ___\ \/ /_ __ __ _| |_ + / _ \\ /| '_ \ / _` | __| + | __// \| |_) | (_| | |_ + \___/_/\_\ .__/ \__,_|\__| + |_| XML parser + + Copyright (c) 2000 Clark Cooper + Copyright (c) 2002 Greg Stein + Copyright (c) 2005 Karl Waclawek + Copyright (c) 2017-2021 Sebastian Pipping + Licensed under the MIT license: + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to permit + persons to whom the Software is furnished to do so, subject to the + following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN + NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +#ifndef WINCONFIG_H +#define WINCONFIG_H + +#define WIN32_LEAN_AND_MEAN +#include +#undef WIN32_LEAN_AND_MEAN + +#include +#include + +#endif /* ndef WINCONFIG_H */ diff --git a/Modules/ThirdParty/Expat/src/expat/xmlparse.c b/Modules/ThirdParty/Expat/src/expat/xmlparse.c index 81886e5e4bb..5ba56eaea63 100644 --- a/Modules/ThirdParty/Expat/src/expat/xmlparse.c +++ b/Modules/ThirdParty/Expat/src/expat/xmlparse.c @@ -1,52 +1,198 @@ -/* -Copyright (c) 1998, 1999, 2000 Thai Open Source Software Center Ltd -See the file COPYING for copying permission. +/* 8539b9040d9d901366a62560a064af7cb99811335784b363abc039c5b0ebc416 (2.4.1+) + __ __ _ + ___\ \/ /_ __ __ _| |_ + / _ \\ /| '_ \ / _` | __| + | __// \| |_) | (_| | |_ + \___/_/\_\ .__/ \__,_|\__| + |_| XML parser + + Copyright (c) 1997-2000 Thai Open Source Software Center Ltd + Copyright (c) 2000 Clark Cooper + Copyright (c) 2000-2006 Fred L. Drake, Jr. + Copyright (c) 2001-2002 Greg Stein + Copyright (c) 2002-2016 Karl Waclawek + Copyright (c) 2005-2009 Steven Solie + Copyright (c) 2016 Eric Rahm + Copyright (c) 2016-2021 Sebastian Pipping + Copyright (c) 2016 Gaurav + Copyright (c) 2016 Thomas Beutlich + Copyright (c) 2016 Gustavo Grieco + Copyright (c) 2016 Pascal Cuoq + Copyright (c) 2016 Ed Schouten + Copyright (c) 2017-2018 Rhodri James + Copyright (c) 2017 Václav Slavík + Copyright (c) 2017 Viktor Szakats + Copyright (c) 2017 Chanho Park + Copyright (c) 2017 Rolf Eike Beer + Copyright (c) 2017 Hans Wennborg + Copyright (c) 2018 Anton Maklakov + Copyright (c) 2018 Benjamin Peterson + Copyright (c) 2018 Marco Maggi + Copyright (c) 2018 Mariusz Zaborski + Copyright (c) 2019 David Loffredo + Copyright (c) 2019-2020 Ben Wagner + Copyright (c) 2019 Vadim Zeitlin + Licensed under the MIT license: + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to permit + persons to whom the Software is furnished to do so, subject to the + following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN + NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include "expatConfig.h" -#include "expat.h" +#if ! defined(_GNU_SOURCE) +# define _GNU_SOURCE 1 /* syscall prototype */ +#endif + +#ifdef _WIN32 +/* force stdlib to define rand_s() */ +# if ! defined(_CRT_RAND_S) +# define _CRT_RAND_S +# endif +#endif #include -#include +#include /* memset(), memcpy() */ +#include +#include /* UINT_MAX */ +#include /* fprintf */ +#include /* getenv, rand_s */ +#include /* uintptr_t */ +#include /* isnan */ + +#ifdef _WIN32 +# define getpid GetCurrentProcessId +#else +# include /* gettimeofday() */ +# include /* getpid() */ +# include /* getpid() */ +# include /* O_RDONLY */ +# include +#endif + +#define XML_BUILDING_EXPAT 1 + +#ifdef _WIN32 +# include "winconfig.h" +#endif + +#include + +#include "ascii.h" +#include "expat.h" +#include "siphash.h" + +#if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) +# if defined(HAVE_GETRANDOM) +# include /* getrandom */ +# else +# include /* syscall */ +# include /* SYS_getrandom */ +# endif +# if ! defined(GRND_NONBLOCK) +# define GRND_NONBLOCK 0x0001 +# endif /* defined(GRND_NONBLOCK) */ +#endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */ + +#if defined(HAVE_LIBBSD) \ + && (defined(HAVE_ARC4RANDOM_BUF) || defined(HAVE_ARC4RANDOM)) +# include +#endif + +#if defined(_WIN32) && ! defined(LOAD_LIBRARY_SEARCH_SYSTEM32) +# define LOAD_LIBRARY_SEARCH_SYSTEM32 0x00000800 +#endif + +#if ! defined(HAVE_GETRANDOM) && ! defined(HAVE_SYSCALL_GETRANDOM) \ + && ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) \ + && ! defined(XML_DEV_URANDOM) && ! defined(_WIN32) \ + && ! defined(XML_POOR_ENTROPY) +# error You do not have support for any sources of high quality entropy \ + enabled. For end user security, that is probably not what you want. \ + \ + Your options include: \ + * Linux >=3.17 + glibc >=2.25 (getrandom): HAVE_GETRANDOM, \ + * Linux >=3.17 + glibc (including <2.25) (syscall SYS_getrandom): HAVE_SYSCALL_GETRANDOM, \ + * BSD / macOS >=10.7 (arc4random_buf): HAVE_ARC4RANDOM_BUF, \ + * BSD / macOS (including <10.7) (arc4random): HAVE_ARC4RANDOM, \ + * libbsd (arc4random_buf): HAVE_ARC4RANDOM_BUF + HAVE_LIBBSD, \ + * libbsd (arc4random): HAVE_ARC4RANDOM + HAVE_LIBBSD, \ + * Linux (including <3.17) / BSD / macOS (including <10.7) (/dev/urandom): XML_DEV_URANDOM, \ + * Windows >=Vista (rand_s): _WIN32. \ + \ + If insist on not using any of these, bypass this error by defining \ + XML_POOR_ENTROPY; you have been warned. \ + \ + If you have reasons to patch this detection code away or need changes \ + to the build system, please open a bug. Thank you! +#endif #ifdef XML_UNICODE -#define XML_ENCODE_MAX XML_UTF16_ENCODE_MAX -#define XmlConvert XmlUtf16Convert -#define XmlGetInternalEncoding XmlGetUtf16InternalEncoding -#define XmlGetInternalEncodingNS XmlGetUtf16InternalEncodingNS -#define XmlEncode XmlUtf16Encode -#define MUST_CONVERT(enc, s) (!(enc)->isUtf16 || (((unsigned long)s) & 1)) +# define XML_ENCODE_MAX XML_UTF16_ENCODE_MAX +# define XmlConvert XmlUtf16Convert +# define XmlGetInternalEncoding XmlGetUtf16InternalEncoding +# define XmlGetInternalEncodingNS XmlGetUtf16InternalEncodingNS +# define XmlEncode XmlUtf16Encode +# define MUST_CONVERT(enc, s) (! (enc)->isUtf16 || (((uintptr_t)(s)) & 1)) typedef unsigned short ICHAR; #else -#define XML_ENCODE_MAX XML_UTF8_ENCODE_MAX -#define XmlConvert XmlUtf8Convert -#define XmlGetInternalEncoding XmlGetUtf8InternalEncoding -#define XmlGetInternalEncodingNS XmlGetUtf8InternalEncodingNS -#define XmlEncode XmlUtf8Encode -#define MUST_CONVERT(enc, s) (!(enc)->isUtf8) +# define XML_ENCODE_MAX XML_UTF8_ENCODE_MAX +# define XmlConvert XmlUtf8Convert +# define XmlGetInternalEncoding XmlGetUtf8InternalEncoding +# define XmlGetInternalEncodingNS XmlGetUtf8InternalEncodingNS +# define XmlEncode XmlUtf8Encode +# define MUST_CONVERT(enc, s) (! (enc)->isUtf8) typedef char ICHAR; #endif - #ifndef XML_NS -#define XmlInitEncodingNS XmlInitEncoding -#define XmlInitUnknownEncodingNS XmlInitUnknownEncoding -#undef XmlGetInternalEncodingNS -#define XmlGetInternalEncodingNS XmlGetInternalEncoding -#define XmlParseXmlDeclNS XmlParseXmlDecl +# define XmlInitEncodingNS XmlInitEncoding +# define XmlInitUnknownEncodingNS XmlInitUnknownEncoding +# undef XmlGetInternalEncodingNS +# define XmlGetInternalEncodingNS XmlGetInternalEncoding +# define XmlParseXmlDeclNS XmlParseXmlDecl #endif -#ifdef XML_UNICODE_WCHAR_T -#define XML_T(x) L ## x +#ifdef XML_UNICODE + +# ifdef XML_UNICODE_WCHAR_T +# define XML_T(x) (const wchar_t) x +# define XML_L(x) L##x +# else +# define XML_T(x) (const unsigned short)x +# define XML_L(x) x +# endif + #else -#define XML_T(x) x + +# define XML_T(x) x +# define XML_L(x) x + #endif /* Round up n to be a multiple of sz, where sz is a power of 2. */ -#define ROUND_UP(n, sz) (((n) + ((sz) - 1)) & ~((sz) - 1)) +#define ROUND_UP(n, sz) (((n) + ((sz)-1)) & ~((sz)-1)) +/* Do safe (NULL-aware) pointer arithmetic */ +#define EXPAT_SAFE_PTR_DIFF(p, q) (((p) && (q)) ? ((p) - (q)) : 0) + +#include "internal.h" #include "xmltok.h" #include "xmlrole.h" @@ -58,20 +204,38 @@ typedef struct { typedef struct { NAMED **v; + unsigned char power; size_t size; size_t used; - size_t usedLim; - XML_Memory_Handling_Suite *mem; + const XML_Memory_Handling_Suite *mem; } HASH_TABLE; +static size_t keylen(KEY s); + +static void copy_salt_to_sipkey(XML_Parser parser, struct sipkey *key); + +/* For probing (after a collision) we need a step size relative prime + to the hash table size, which is a power of 2. We use double-hashing, + since we can calculate a second hash value cheaply by taking those bits + of the first hash value that were discarded (masked out) when the table + index was calculated: index = hash & mask, where mask = table->size - 1. + We limit the maximum step size to table->size / 4 (mask >> 2) and make + it odd, since odd numbers are always relative prime to a power of 2. +*/ +#define SECOND_HASH(hash, mask, power) \ + ((((hash) & ~(mask)) >> ((power)-1)) & ((mask) >> 2)) +#define PROBE_STEP(hash, mask, power) \ + ((unsigned char)((SECOND_HASH(hash, mask, power)) | 1)) + typedef struct { NAMED **p; NAMED **end; } HASH_TABLE_ITER; -#define INIT_TAG_BUF_SIZE 32 /* must be a multiple of sizeof(XML_Char) */ +#define INIT_TAG_BUF_SIZE 32 /* must be a multiple of sizeof(XML_Char) */ #define INIT_DATA_BUF_SIZE 1024 #define INIT_ATTS_SIZE 16 +#define INIT_ATTS_VERSION 0xFFFFFFFF #define INIT_BLOCK_SIZE 1024 #define INIT_BUFFER_SIZE 1024 @@ -95,41 +259,61 @@ typedef struct prefix { typedef struct { const XML_Char *str; const XML_Char *localPart; + const XML_Char *prefix; + int strLen; int uriLen; + int prefixLen; } TAG_NAME; +/* TAG represents an open element. + The name of the element is stored in both the document and API + encodings. The memory buffer 'buf' is a separately-allocated + memory area which stores the name. During the XML_Parse()/ + XMLParseBuffer() when the element is open, the memory for the 'raw' + version of the name (in the document encoding) is shared with the + document buffer. If the element is open across calls to + XML_Parse()/XML_ParseBuffer(), the buffer is re-allocated to + contain the 'raw' name as well. + + A parser re-uses these structures, maintaining a list of allocated + TAG objects in a free list. +*/ typedef struct tag { - struct tag *parent; - const char *rawName; + struct tag *parent; /* parent of this element */ + const char *rawName; /* tagName in the original encoding */ int rawNameLength; - TAG_NAME name; - char *buf; - char *bufEnd; + TAG_NAME name; /* tagName in the API encoding */ + char *buf; /* buffer for name components */ + char *bufEnd; /* end of the buffer */ BINDING *bindings; } TAG; typedef struct { const XML_Char *name; const XML_Char *textPtr; - int textLen; + int textLen; /* length in XML_Chars */ + int processed; /* # of processed bytes - when suspended */ const XML_Char *systemId; const XML_Char *base; const XML_Char *publicId; const XML_Char *notation; - char open; - char is_param; + XML_Bool open; + XML_Bool is_param; + XML_Bool is_internal; /* true if declared in internal subset outside PE */ } ENTITY; typedef struct { - enum XML_Content_Type type; - enum XML_Content_Quant quant; - const XML_Char * name; - int firstchild; - int lastchild; - int childcnt; - int nextsib; + enum XML_Content_Type type; + enum XML_Content_Quant quant; + const XML_Char *name; + int firstchild; + int lastchild; + int childcnt; + int nextsib; } CONTENT_SCAFFOLD; +#define INIT_SCAFFOLD_ELEMENTS 32 + typedef struct block { struct block *next; int size; @@ -142,24 +326,30 @@ typedef struct { const XML_Char *end; XML_Char *ptr; XML_Char *start; - XML_Memory_Handling_Suite *mem; + const XML_Memory_Handling_Suite *mem; } STRING_POOL; /* The XML_Char before the name is used to determine whether -an attribute has been specified. */ + an attribute has been specified. */ typedef struct attribute_id { XML_Char *name; PREFIX *prefix; - char maybeTokenized; - char xmlns; + XML_Bool maybeTokenized; + XML_Bool xmlns; } ATTRIBUTE_ID; typedef struct { const ATTRIBUTE_ID *id; - char isCdata; + XML_Bool isCdata; const XML_Char *value; } DEFAULT_ATTRIBUTE; +typedef struct { + unsigned long version; + unsigned long hash; + const XML_Char *uriName; +} NS_ATT; + typedef struct { const XML_Char *name; PREFIX *prefix; @@ -175,14 +365,21 @@ typedef struct { HASH_TABLE attributeIds; HASH_TABLE prefixes; STRING_POOL pool; - int complete; - int standalone; + STRING_POOL entityValuePool; + /* false once a parameter entity reference has been skipped */ + XML_Bool keepProcessing; + /* true once an internal or external PE reference has been encountered; + this includes the reference to an external subset */ + XML_Bool hasParamEntityRefs; + XML_Bool standalone; #ifdef XML_DTD + /* indicates if external PE has been read */ + XML_Bool paramEntityRead; HASH_TABLE paramEntities; #endif /* XML_DTD */ PREFIX defaultPrefix; /* === scaffolding for building content model === */ - int in_eldecl; + XML_Bool in_eldecl; CONTENT_SCAFFOLD *scaffold; unsigned contentStringLen; unsigned scaffSize; @@ -196,12 +393,37 @@ typedef struct open_internal_entity { const char *internalEventEndPtr; struct open_internal_entity *next; ENTITY *entity; + int startTagLevel; + XML_Bool betweenDecl; /* WFC: PE Between Declarations */ } OPEN_INTERNAL_ENTITY; -typedef enum XML_Error Processor(XML_Parser parser, - const char *start, - const char *end, - const char **endPtr); +enum XML_Account { + XML_ACCOUNT_DIRECT, /* bytes directly passed to the Expat parser */ + XML_ACCOUNT_ENTITY_EXPANSION, /* intermediate bytes produced during entity + expansion */ + XML_ACCOUNT_NONE /* i.e. do not account, was accounted already */ +}; + +#ifdef XML_DTD +typedef unsigned long long XmlBigCount; +typedef struct accounting { + XmlBigCount countBytesDirect; + XmlBigCount countBytesIndirect; + int debugLevel; + float maximumAmplificationFactor; // >=1.0 + unsigned long long activationThresholdBytes; +} ACCOUNTING; + +typedef struct entity_stats { + unsigned int countEverOpened; + unsigned int currentDepth; + unsigned int maximumDepthSeen; + int debugLevel; +} ENTITY_STATS; +#endif /* XML_DTD */ + +typedef enum XML_Error PTRCALL Processor(XML_Parser parser, const char *start, + const char *end, const char **endPtr); static Processor prologProcessor; static Processor prologInitProcessor; @@ -209,6 +431,10 @@ static Processor contentProcessor; static Processor cdataSectionProcessor; #ifdef XML_DTD static Processor ignoreSectionProcessor; +static Processor externalParEntProcessor; +static Processor externalParEntInitProcessor; +static Processor entityValueProcessor; +static Processor entityValueInitProcessor; #endif /* XML_DTD */ static Processor epilogProcessor; static Processor errorProcessor; @@ -216,98 +442,149 @@ static Processor externalEntityInitProcessor; static Processor externalEntityInitProcessor2; static Processor externalEntityInitProcessor3; static Processor externalEntityContentProcessor; - -static enum XML_Error -handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName); -static enum XML_Error -processXmlDecl(XML_Parser parser, int isGeneralTextEntity, const char *, const char *); -static enum XML_Error -initializeEncoding(XML_Parser parser); -static enum XML_Error -doProlog(XML_Parser parser, const ENCODING *enc, const char *s, - const char *end, int tok, const char *next, const char **nextPtr); -static enum XML_Error -processInternalParamEntity(XML_Parser parser, ENTITY *entity); -static enum XML_Error -doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, - const char *start, const char *end, const char **endPtr); -static enum XML_Error -doCdataSection(XML_Parser parser, const ENCODING *, const char **startPtr, const char *end, const char **nextPtr); +static Processor internalEntityProcessor; + +static enum XML_Error handleUnknownEncoding(XML_Parser parser, + const XML_Char *encodingName); +static enum XML_Error processXmlDecl(XML_Parser parser, int isGeneralTextEntity, + const char *s, const char *next); +static enum XML_Error initializeEncoding(XML_Parser parser); +static enum XML_Error doProlog(XML_Parser parser, const ENCODING *enc, + const char *s, const char *end, int tok, + const char *next, const char **nextPtr, + XML_Bool haveMore, XML_Bool allowClosingDoctype, + enum XML_Account account); +static enum XML_Error processInternalEntity(XML_Parser parser, ENTITY *entity, + XML_Bool betweenDecl); +static enum XML_Error doContent(XML_Parser parser, int startTagLevel, + const ENCODING *enc, const char *start, + const char *end, const char **endPtr, + XML_Bool haveMore, enum XML_Account account); +static enum XML_Error doCdataSection(XML_Parser parser, const ENCODING *, + const char **startPtr, const char *end, + const char **nextPtr, XML_Bool haveMore, + enum XML_Account account); #ifdef XML_DTD -static enum XML_Error -doIgnoreSection(XML_Parser parser, const ENCODING *, const char **startPtr, const char *end, const char **nextPtr); +static enum XML_Error doIgnoreSection(XML_Parser parser, const ENCODING *, + const char **startPtr, const char *end, + const char **nextPtr, XML_Bool haveMore); #endif /* XML_DTD */ -static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *, const char *s, - TAG_NAME *tagNamePtr, BINDING **bindingsPtr); -static -int addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, const XML_Char *uri, BINDING **bindingsPtr); - -static int -defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *, - int isCdata, int isId, const XML_Char *dfltValue, - XML_Parser parser); -static enum XML_Error -storeAttributeValue(XML_Parser parser, const ENCODING *, int isCdata, const char *, const char *, - STRING_POOL *); -static enum XML_Error -appendAttributeValue(XML_Parser parser, const ENCODING *, int isCdata, const char *, const char *, - STRING_POOL *); -static ATTRIBUTE_ID * -getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start, const char *end); +static void freeBindings(XML_Parser parser, BINDING *bindings); +static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *, + const char *s, TAG_NAME *tagNamePtr, + BINDING **bindingsPtr, + enum XML_Account account); +static enum XML_Error addBinding(XML_Parser parser, PREFIX *prefix, + const ATTRIBUTE_ID *attId, const XML_Char *uri, + BINDING **bindingsPtr); +static int defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *, XML_Bool isCdata, + XML_Bool isId, const XML_Char *dfltValue, + XML_Parser parser); +static enum XML_Error storeAttributeValue(XML_Parser parser, const ENCODING *, + XML_Bool isCdata, const char *, + const char *, STRING_POOL *, + enum XML_Account account); +static enum XML_Error appendAttributeValue(XML_Parser parser, const ENCODING *, + XML_Bool isCdata, const char *, + const char *, STRING_POOL *, + enum XML_Account account); +static ATTRIBUTE_ID *getAttributeId(XML_Parser parser, const ENCODING *enc, + const char *start, const char *end); static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *); -static enum XML_Error -storeEntityValue(XML_Parser parser, const ENCODING *enc, const char *start, const char *end); -static int -reportProcessingInstruction(XML_Parser parser, const ENCODING *enc, const char *start, const char *end); -static int -reportComment(XML_Parser parser, const ENCODING *enc, const char *start, const char *end); -static void -reportDefault(XML_Parser parser, const ENCODING *enc, const char *start, const char *end); +static enum XML_Error storeEntityValue(XML_Parser parser, const ENCODING *enc, + const char *start, const char *end, + enum XML_Account account); +static int reportProcessingInstruction(XML_Parser parser, const ENCODING *enc, + const char *start, const char *end); +static int reportComment(XML_Parser parser, const ENCODING *enc, + const char *start, const char *end); +static void reportDefault(XML_Parser parser, const ENCODING *enc, + const char *start, const char *end); static const XML_Char *getContext(XML_Parser parser); -static int setContext(XML_Parser parser, const XML_Char *context); -static void normalizePublicId(XML_Char *s); -static int dtdInit(DTD *, XML_Parser parser); +static XML_Bool setContext(XML_Parser parser, const XML_Char *context); + +static void FASTCALL normalizePublicId(XML_Char *s); + +static DTD *dtdCreate(const XML_Memory_Handling_Suite *ms); +/* do not call if m_parentParser != NULL */ +static void dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms); +static void dtdDestroy(DTD *p, XML_Bool isDocEntity, + const XML_Memory_Handling_Suite *ms); +static int dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd, + const XML_Memory_Handling_Suite *ms); +static int copyEntityTable(XML_Parser oldParser, HASH_TABLE *, STRING_POOL *, + const HASH_TABLE *); +static NAMED *lookup(XML_Parser parser, HASH_TABLE *table, KEY name, + size_t createSize); +static void FASTCALL hashTableInit(HASH_TABLE *, + const XML_Memory_Handling_Suite *ms); +static void FASTCALL hashTableClear(HASH_TABLE *); +static void FASTCALL hashTableDestroy(HASH_TABLE *); +static void FASTCALL hashTableIterInit(HASH_TABLE_ITER *, const HASH_TABLE *); +static NAMED *FASTCALL hashTableIterNext(HASH_TABLE_ITER *); + +static void FASTCALL poolInit(STRING_POOL *, + const XML_Memory_Handling_Suite *ms); +static void FASTCALL poolClear(STRING_POOL *); +static void FASTCALL poolDestroy(STRING_POOL *); +static XML_Char *poolAppend(STRING_POOL *pool, const ENCODING *enc, + const char *ptr, const char *end); +static XML_Char *poolStoreString(STRING_POOL *pool, const ENCODING *enc, + const char *ptr, const char *end); +static XML_Bool FASTCALL poolGrow(STRING_POOL *pool); +static const XML_Char *FASTCALL poolCopyString(STRING_POOL *pool, + const XML_Char *s); +static const XML_Char *poolCopyStringN(STRING_POOL *pool, const XML_Char *s, + int n); +static const XML_Char *FASTCALL poolAppendString(STRING_POOL *pool, + const XML_Char *s); + +static int FASTCALL nextScaffoldPart(XML_Parser parser); +static XML_Content *build_model(XML_Parser parser); +static ELEMENT_TYPE *getElementType(XML_Parser parser, const ENCODING *enc, + const char *ptr, const char *end); -static void dtdDestroy(DTD *, XML_Parser parser); +static XML_Char *copyString(const XML_Char *s, + const XML_Memory_Handling_Suite *memsuite); -static int dtdCopy(DTD *newDtd, const DTD *oldDtd, XML_Parser parser); +static unsigned long generate_hash_secret_salt(XML_Parser parser); +static XML_Bool startParsing(XML_Parser parser); -static int copyEntityTable(HASH_TABLE *, STRING_POOL *, const HASH_TABLE *, - XML_Parser parser); +static XML_Parser parserCreate(const XML_Char *encodingName, + const XML_Memory_Handling_Suite *memsuite, + const XML_Char *nameSep, DTD *dtd); + +static void parserInit(XML_Parser parser, const XML_Char *encodingName); #ifdef XML_DTD -static void dtdSwap(DTD *, DTD *); +static float accountingGetCurrentAmplification(XML_Parser rootParser); +static void accountingReportStats(XML_Parser originParser, const char *epilog); +static void accountingOnAbort(XML_Parser originParser); +static void accountingReportDiff(XML_Parser rootParser, + unsigned int levelsAwayFromRootParser, + const char *before, const char *after, + ptrdiff_t bytesMore, int source_line, + enum XML_Account account); +static XML_Bool accountingDiffTolerated(XML_Parser originParser, int tok, + const char *before, const char *after, + int source_line, + enum XML_Account account); + +static void entityTrackingReportStats(XML_Parser parser, ENTITY *entity, + const char *action, int sourceLine); +static void entityTrackingOnOpen(XML_Parser parser, ENTITY *entity, + int sourceLine); +static void entityTrackingOnClose(XML_Parser parser, ENTITY *entity, + int sourceLine); + +static XML_Parser getRootParserOf(XML_Parser parser, + unsigned int *outLevelDiff); #endif /* XML_DTD */ -static NAMED *lookup(HASH_TABLE *table, KEY name, size_t createSize); - -static void hashTableInit(HASH_TABLE *, XML_Memory_Handling_Suite *ms); - -static void hashTableDestroy(HASH_TABLE *); -static void hashTableIterInit(HASH_TABLE_ITER *, const HASH_TABLE *); -static NAMED *hashTableIterNext(HASH_TABLE_ITER *); -static void poolInit(STRING_POOL *, XML_Memory_Handling_Suite *ms); -static void poolClear(STRING_POOL *); -static void poolDestroy(STRING_POOL *); -static XML_Char *poolAppend(STRING_POOL *pool, const ENCODING *enc, - const char *ptr, const char *end); -static XML_Char *poolStoreString(STRING_POOL *pool, const ENCODING *enc, - const char *ptr, const char *end); - -static int poolGrow(STRING_POOL *pool); - -static int nextScaffoldPart(XML_Parser parser); -static XML_Content *build_model(XML_Parser parser); - -static const XML_Char *poolCopyString(STRING_POOL *pool, const XML_Char *s); -static const XML_Char *poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n); -static const XML_Char *poolAppendString(STRING_POOL *pool, const XML_Char *s); -static ELEMENT_TYPE * getElementType(XML_Parser Paraser, - const ENCODING *enc, - const char *ptr, - const char *end); +static unsigned long getDebugLevel(const char *variableName, + unsigned long defaultDebugLevel); #define poolStart(pool) ((pool)->start) #define poolEnd(pool) ((pool)->ptr) @@ -316,24 +593,25 @@ static ELEMENT_TYPE * getElementType(XML_Parser Paraser, #define poolLastChar(pool) (((pool)->ptr)[-1]) #define poolDiscard(pool) ((pool)->ptr = (pool)->start) #define poolFinish(pool) ((pool)->start = (pool)->ptr) -#define poolAppendChar(pool, c) \ - (((pool)->ptr == (pool)->end && !poolGrow(pool)) \ - ? 0 \ - : ((*((pool)->ptr)++ = c), 1)) - -typedef struct { - /* The first member must be userData so that the XML_GetUserData macro works. */ +#define poolAppendChar(pool, c) \ + (((pool)->ptr == (pool)->end && ! poolGrow(pool)) \ + ? 0 \ + : ((*((pool)->ptr)++ = c), 1)) + +struct XML_ParserStruct { + /* The first member must be m_userData so that the XML_GetUserData + macro works. */ void *m_userData; void *m_handlerArg; char *m_buffer; - XML_Memory_Handling_Suite m_mem; + const XML_Memory_Handling_Suite m_mem; /* first character to be parsed */ const char *m_bufferPtr; /* past last character to be parsed */ char *m_bufferEnd; - /* allocated end of buffer */ + /* allocated end of m_buffer */ const char *m_bufferLim; - long m_parseEndByteIndex; + XML_Index m_parseEndByteIndex; const char *m_parseEndPtr; XML_Char *m_dataBuf; XML_Char *m_dataBufEnd; @@ -353,7 +631,8 @@ typedef struct { XML_EndNamespaceDeclHandler m_endNamespaceDeclHandler; XML_NotStandaloneHandler m_notStandaloneHandler; XML_ExternalEntityRefHandler m_externalEntityRefHandler; - void *m_externalEntityRefHandlerArg; + XML_Parser m_externalEntityRefHandlerArg; + XML_SkippedEntityHandler m_skippedEntityHandler; XML_UnknownEncodingHandler m_unknownEncodingHandler; XML_ElementDeclHandler m_elementDeclHandler; XML_AttlistDeclHandler m_attlistDeclHandler; @@ -363,12 +642,12 @@ typedef struct { INIT_ENCODING m_initEncoding; const ENCODING *m_internalEncoding; const XML_Char *m_protocolEncodingName; - int m_ns; - int m_ns_triplets; + XML_Bool m_ns; + XML_Bool m_ns_triplets; void *m_unknownEncodingMem; void *m_unknownEncodingData; void *m_unknownEncodingHandlerData; - void (*m_unknownEncodingRelease)(void *); + void(XMLCALL *m_unknownEncodingRelease)(void *); PROLOG_STATE m_prologState; Processor *m_processor; enum XML_Error m_errorCode; @@ -376,7 +655,8 @@ typedef struct { const char *m_eventEndPtr; const char *m_positionPtr; OPEN_INTERNAL_ENTITY *m_openInternalEntities; - int m_defaultExpandInternalEntities; + OPEN_INTERNAL_ENTITY *m_freeInternalEntities; + XML_Bool m_defaultExpandInternalEntities; int m_tagLevel; ENTITY *m_declEntity; const XML_Char *m_doctypeName; @@ -387,9 +667,9 @@ typedef struct { const XML_Char *m_declNotationPublicId; ELEMENT_TYPE *m_declElementType; ATTRIBUTE_ID *m_declAttributeId; - char m_declAttributeIsCdata; - char m_declAttributeIsId; - DTD m_dtd; + XML_Bool m_declAttributeIsCdata; + XML_Bool m_declAttributeIsId; + DTD *m_dtd; const XML_Char *m_curBase; TAG *m_tagStack; TAG *m_freeTagList; @@ -399,919 +679,1791 @@ typedef struct { int m_nSpecifiedAtts; int m_idAttIndex; ATTRIBUTE *m_atts; + NS_ATT *m_nsAtts; + unsigned long m_nsAttsVersion; + unsigned char m_nsAttsPower; +#ifdef XML_ATTR_INFO + XML_AttrInfo *m_attInfo; +#endif POSITION m_position; STRING_POOL m_tempPool; STRING_POOL m_temp2Pool; char *m_groupConnector; - unsigned m_groupSize; - int m_hadExternalDoctype; + unsigned int m_groupSize; XML_Char m_namespaceSeparator; + XML_Parser m_parentParser; + XML_ParsingStatus m_parsingStatus; #ifdef XML_DTD + XML_Bool m_isParamEntity; + XML_Bool m_useForeignDTD; enum XML_ParamEntityParsing m_paramEntityParsing; - XML_Parser m_parentParser; #endif -} Parser; - -#define MALLOC(s) (((Parser *)parser)->m_mem.malloc_fcn((s))) -#define REALLOC(p,s) (((Parser *)parser)->m_mem.realloc_fcn((p),(s))) -#define FREE(p) (((Parser *)parser)->m_mem.free_fcn((p))) - -#define userData (((Parser *)parser)->m_userData) -#define handlerArg (((Parser *)parser)->m_handlerArg) -#define startElementHandler (((Parser *)parser)->m_startElementHandler) -#define endElementHandler (((Parser *)parser)->m_endElementHandler) -#define characterDataHandler (((Parser *)parser)->m_characterDataHandler) -#define processingInstructionHandler (((Parser *)parser)->m_processingInstructionHandler) -#define commentHandler (((Parser *)parser)->m_commentHandler) -#define startCdataSectionHandler (((Parser *)parser)->m_startCdataSectionHandler) -#define endCdataSectionHandler (((Parser *)parser)->m_endCdataSectionHandler) -#define defaultHandler (((Parser *)parser)->m_defaultHandler) -#define startDoctypeDeclHandler (((Parser *)parser)->m_startDoctypeDeclHandler) -#define endDoctypeDeclHandler (((Parser *)parser)->m_endDoctypeDeclHandler) -#define unparsedEntityDeclHandler (((Parser *)parser)->m_unparsedEntityDeclHandler) -#define notationDeclHandler (((Parser *)parser)->m_notationDeclHandler) -#define startNamespaceDeclHandler (((Parser *)parser)->m_startNamespaceDeclHandler) -#define endNamespaceDeclHandler (((Parser *)parser)->m_endNamespaceDeclHandler) -#define notStandaloneHandler (((Parser *)parser)->m_notStandaloneHandler) -#define externalEntityRefHandler (((Parser *)parser)->m_externalEntityRefHandler) -#define externalEntityRefHandlerArg (((Parser *)parser)->m_externalEntityRefHandlerArg) -#define internalEntityRefHandler (((Parser *)parser)->m_internalEntityRefHandler) -#define unknownEncodingHandler (((Parser *)parser)->m_unknownEncodingHandler) -#define elementDeclHandler (((Parser *)parser)->m_elementDeclHandler) -#define attlistDeclHandler (((Parser *)parser)->m_attlistDeclHandler) -#define entityDeclHandler (((Parser *)parser)->m_entityDeclHandler) -#define xmlDeclHandler (((Parser *)parser)->m_xmlDeclHandler) -#define encoding (((Parser *)parser)->m_encoding) -#define initEncoding (((Parser *)parser)->m_initEncoding) -#define internalEncoding (((Parser *)parser)->m_internalEncoding) -#define unknownEncodingMem (((Parser *)parser)->m_unknownEncodingMem) -#define unknownEncodingData (((Parser *)parser)->m_unknownEncodingData) -#define unknownEncodingHandlerData \ - (((Parser *)parser)->m_unknownEncodingHandlerData) -#define unknownEncodingRelease (((Parser *)parser)->m_unknownEncodingRelease) -#define protocolEncodingName (((Parser *)parser)->m_protocolEncodingName) -#define ns (((Parser *)parser)->m_ns) -#define ns_triplets (((Parser *)parser)->m_ns_triplets) -#define prologState (((Parser *)parser)->m_prologState) -#define processor (((Parser *)parser)->m_processor) -#define errorCode (((Parser *)parser)->m_errorCode) -#define eventPtr (((Parser *)parser)->m_eventPtr) -#define eventEndPtr (((Parser *)parser)->m_eventEndPtr) -#define positionPtr (((Parser *)parser)->m_positionPtr) -#define position (((Parser *)parser)->m_position) -#define openInternalEntities (((Parser *)parser)->m_openInternalEntities) -#define defaultExpandInternalEntities (((Parser *)parser)->m_defaultExpandInternalEntities) -#define tagLevel (((Parser *)parser)->m_tagLevel) -#define buffer (((Parser *)parser)->m_buffer) -#define bufferPtr (((Parser *)parser)->m_bufferPtr) -#define bufferEnd (((Parser *)parser)->m_bufferEnd) -#define parseEndByteIndex (((Parser *)parser)->m_parseEndByteIndex) -#define parseEndPtr (((Parser *)parser)->m_parseEndPtr) -#define bufferLim (((Parser *)parser)->m_bufferLim) -#define dataBuf (((Parser *)parser)->m_dataBuf) -#define dataBufEnd (((Parser *)parser)->m_dataBufEnd) -#define dtd (((Parser *)parser)->m_dtd) -#define curBase (((Parser *)parser)->m_curBase) -#define declEntity (((Parser *)parser)->m_declEntity) -#define doctypeName (((Parser *)parser)->m_doctypeName) -#define doctypeSysid (((Parser *)parser)->m_doctypeSysid) -#define doctypePubid (((Parser *)parser)->m_doctypePubid) -#define declAttributeType (((Parser *)parser)->m_declAttributeType) -#define declNotationName (((Parser *)parser)->m_declNotationName) -#define declNotationPublicId (((Parser *)parser)->m_declNotationPublicId) -#define declElementType (((Parser *)parser)->m_declElementType) -#define declAttributeId (((Parser *)parser)->m_declAttributeId) -#define declAttributeIsCdata (((Parser *)parser)->m_declAttributeIsCdata) -#define declAttributeIsId (((Parser *)parser)->m_declAttributeIsId) -#define freeTagList (((Parser *)parser)->m_freeTagList) -#define freeBindingList (((Parser *)parser)->m_freeBindingList) -#define inheritedBindings (((Parser *)parser)->m_inheritedBindings) -#define tagStack (((Parser *)parser)->m_tagStack) -#define atts (((Parser *)parser)->m_atts) -#define attsSize (((Parser *)parser)->m_attsSize) -#define nSpecifiedAtts (((Parser *)parser)->m_nSpecifiedAtts) -#define idAttIndex (((Parser *)parser)->m_idAttIndex) -#define tempPool (((Parser *)parser)->m_tempPool) -#define temp2Pool (((Parser *)parser)->m_temp2Pool) -#define groupConnector (((Parser *)parser)->m_groupConnector) -#define groupSize (((Parser *)parser)->m_groupSize) -#define hadExternalDoctype (((Parser *)parser)->m_hadExternalDoctype) -#define namespaceSeparator (((Parser *)parser)->m_namespaceSeparator) + unsigned long m_hash_secret_salt; #ifdef XML_DTD -#define parentParser (((Parser *)parser)->m_parentParser) -#define paramEntityParsing (((Parser *)parser)->m_paramEntityParsing) -#endif /* XML_DTD */ - -#ifdef COMPILED_FROM_DSP -BOOL WINAPI DllMain(HINSTANCE h, DWORD r, LPVOID p) { - return TRUE; -} -#endif /* def COMPILED_FROM_DSP */ - -#ifdef _MSC_VER -#ifdef _DEBUG -Parser *asParser(XML_Parser parser) -{ - return parser; -} -#endif + ACCOUNTING m_accounting; + ENTITY_STATS m_entity_stats; #endif +}; -XML_Parser XML_ParserCreate(const XML_Char *encodingName) -{ +#define MALLOC(parser, s) (parser->m_mem.malloc_fcn((s))) +#define REALLOC(parser, p, s) (parser->m_mem.realloc_fcn((p), (s))) +#define FREE(parser, p) (parser->m_mem.free_fcn((p))) + +XML_Parser XMLCALL +XML_ParserCreate(const XML_Char *encodingName) { return XML_ParserCreate_MM(encodingName, NULL, NULL); } -XML_Parser XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep) -{ +XML_Parser XMLCALL +XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep) { XML_Char tmp[2]; *tmp = nsSep; return XML_ParserCreate_MM(encodingName, NULL, tmp); } -XML_Parser +static const XML_Char implicitContext[] + = {ASCII_x, ASCII_m, ASCII_l, ASCII_EQUALS, ASCII_h, + ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH, + ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, + ASCII_w, ASCII_3, ASCII_PERIOD, ASCII_o, ASCII_r, + ASCII_g, ASCII_SLASH, ASCII_X, ASCII_M, ASCII_L, + ASCII_SLASH, ASCII_1, ASCII_9, ASCII_9, ASCII_8, + ASCII_SLASH, ASCII_n, ASCII_a, ASCII_m, ASCII_e, + ASCII_s, ASCII_p, ASCII_a, ASCII_c, ASCII_e, + '\0'}; + +/* To avoid warnings about unused functions: */ +#if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) + +# if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) + +/* Obtain entropy on Linux 3.17+ */ +static int +writeRandomBytes_getrandom_nonblock(void *target, size_t count) { + int success = 0; /* full count bytes written? */ + size_t bytesWrittenTotal = 0; + const unsigned int getrandomFlags = GRND_NONBLOCK; + + do { + void *const currentTarget = (void *)((char *)target + bytesWrittenTotal); + const size_t bytesToWrite = count - bytesWrittenTotal; + + const int bytesWrittenMore = +# if defined(HAVE_GETRANDOM) + getrandom(currentTarget, bytesToWrite, getrandomFlags); +# else + syscall(SYS_getrandom, currentTarget, bytesToWrite, getrandomFlags); +# endif + + if (bytesWrittenMore > 0) { + bytesWrittenTotal += bytesWrittenMore; + if (bytesWrittenTotal >= count) + success = 1; + } + } while (! success && (errno == EINTR)); + + return success; +} + +# endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */ + +# if ! defined(_WIN32) && defined(XML_DEV_URANDOM) + +/* Extract entropy from /dev/urandom */ +static int +writeRandomBytes_dev_urandom(void *target, size_t count) { + int success = 0; /* full count bytes written? */ + size_t bytesWrittenTotal = 0; + + const int fd = open("/dev/urandom", O_RDONLY); + if (fd < 0) { + return 0; + } + + do { + void *const currentTarget = (void *)((char *)target + bytesWrittenTotal); + const size_t bytesToWrite = count - bytesWrittenTotal; + + const ssize_t bytesWrittenMore = read(fd, currentTarget, bytesToWrite); + + if (bytesWrittenMore > 0) { + bytesWrittenTotal += bytesWrittenMore; + if (bytesWrittenTotal >= count) + success = 1; + } + } while (! success && (errno == EINTR)); + + close(fd); + return success; +} + +# endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */ + +#endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */ + +#if defined(HAVE_ARC4RANDOM) && ! defined(HAVE_ARC4RANDOM_BUF) + +static void +writeRandomBytes_arc4random(void *target, size_t count) { + size_t bytesWrittenTotal = 0; + + while (bytesWrittenTotal < count) { + const uint32_t random32 = arc4random(); + size_t i = 0; + + for (; (i < sizeof(random32)) && (bytesWrittenTotal < count); + i++, bytesWrittenTotal++) { + const uint8_t random8 = (uint8_t)(random32 >> (i * 8)); + ((uint8_t *)target)[bytesWrittenTotal] = random8; + } + } +} + +#endif /* defined(HAVE_ARC4RANDOM) && ! defined(HAVE_ARC4RANDOM_BUF) */ + +#ifdef _WIN32 + +/* Provide declaration of rand_s() for MinGW-32 (not 64, which has it), + as it didn't declare it in its header prior to version 5.3.0 of its + runtime package (mingwrt, containing stdlib.h). The upstream fix + was introduced at https://osdn.net/projects/mingw/ticket/39658 . */ +# if defined(__MINGW32__) && defined(__MINGW32_VERSION) \ + && __MINGW32_VERSION < 5003000L && ! defined(__MINGW64_VERSION_MAJOR) +__declspec(dllimport) int rand_s(unsigned int *); +# endif + +/* Obtain entropy on Windows using the rand_s() function which + * generates cryptographically secure random numbers. Internally it + * uses RtlGenRandom API which is present in Windows XP and later. + */ +static int +writeRandomBytes_rand_s(void *target, size_t count) { + size_t bytesWrittenTotal = 0; + + while (bytesWrittenTotal < count) { + unsigned int random32 = 0; + size_t i = 0; + + if (rand_s(&random32)) + return 0; /* failure */ + + for (; (i < sizeof(random32)) && (bytesWrittenTotal < count); + i++, bytesWrittenTotal++) { + const uint8_t random8 = (uint8_t)(random32 >> (i * 8)); + ((uint8_t *)target)[bytesWrittenTotal] = random8; + } + } + return 1; /* success */ +} + +#endif /* _WIN32 */ + +#if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) + +static unsigned long +gather_time_entropy(void) { +# ifdef _WIN32 + FILETIME ft; + GetSystemTimeAsFileTime(&ft); /* never fails */ + return ft.dwHighDateTime ^ ft.dwLowDateTime; +# else + struct timeval tv; + int gettimeofday_res; + + gettimeofday_res = gettimeofday(&tv, NULL); + +# if defined(NDEBUG) + (void)gettimeofday_res; +# else + assert(gettimeofday_res == 0); +# endif /* defined(NDEBUG) */ + + /* Microseconds time is <20 bits entropy */ + return tv.tv_usec; +# endif +} + +#endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */ + +static unsigned long +ENTROPY_DEBUG(const char *label, unsigned long entropy) { + if (getDebugLevel("EXPAT_ENTROPY_DEBUG", 0) >= 1u) { + fprintf(stderr, "expat: Entropy: %s --> 0x%0*lx (%lu bytes)\n", label, + (int)sizeof(entropy) * 2, entropy, (unsigned long)sizeof(entropy)); + } + return entropy; +} + +static unsigned long +generate_hash_secret_salt(XML_Parser parser) { + unsigned long entropy; + (void)parser; + + /* "Failproof" high quality providers: */ +#if defined(HAVE_ARC4RANDOM_BUF) + arc4random_buf(&entropy, sizeof(entropy)); + return ENTROPY_DEBUG("arc4random_buf", entropy); +#elif defined(HAVE_ARC4RANDOM) + writeRandomBytes_arc4random((void *)&entropy, sizeof(entropy)); + return ENTROPY_DEBUG("arc4random", entropy); +#else + /* Try high quality providers first .. */ +# ifdef _WIN32 + if (writeRandomBytes_rand_s((void *)&entropy, sizeof(entropy))) { + return ENTROPY_DEBUG("rand_s", entropy); + } +# elif defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) + if (writeRandomBytes_getrandom_nonblock((void *)&entropy, sizeof(entropy))) { + return ENTROPY_DEBUG("getrandom", entropy); + } +# endif +# if ! defined(_WIN32) && defined(XML_DEV_URANDOM) + if (writeRandomBytes_dev_urandom((void *)&entropy, sizeof(entropy))) { + return ENTROPY_DEBUG("/dev/urandom", entropy); + } +# endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */ + /* .. and self-made low quality for backup: */ + + /* Process ID is 0 bits entropy if attacker has local access */ + entropy = gather_time_entropy() ^ getpid(); + + /* Factors are 2^31-1 and 2^61-1 (Mersenne primes M31 and M61) */ + if (sizeof(unsigned long) == 4) { + return ENTROPY_DEBUG("fallback(4)", entropy * 2147483647); + } else { + return ENTROPY_DEBUG("fallback(8)", + entropy * (unsigned long)2305843009213693951ULL); + } +#endif +} + +static unsigned long +get_hash_secret_salt(XML_Parser parser) { + if (parser->m_parentParser != NULL) + return get_hash_secret_salt(parser->m_parentParser); + return parser->m_hash_secret_salt; +} + +static XML_Bool /* only valid for root parser */ +startParsing(XML_Parser parser) { + /* hash functions must be initialized before setContext() is called */ + if (parser->m_hash_secret_salt == 0) + parser->m_hash_secret_salt = generate_hash_secret_salt(parser); + if (parser->m_ns) { + /* implicit context only set for root parser, since child + parsers (i.e. external entity parsers) will inherit it + */ + return setContext(parser, implicitContext); + } + return XML_TRUE; +} + +XML_Parser XMLCALL XML_ParserCreate_MM(const XML_Char *encodingName, const XML_Memory_Handling_Suite *memsuite, const XML_Char *nameSep) { - - XML_Parser parser; - static - const XML_Char implicitContext[] = { - XML_T('x'), XML_T('m'), XML_T('l'), XML_T('='), - XML_T('h'), XML_T('t'), XML_T('t'), XML_T('p'), XML_T(':'), - XML_T('/'), XML_T('/'), XML_T('w'), XML_T('w'), XML_T('w'), - XML_T('.'), XML_T('w'), XML_T('3'), - XML_T('.'), XML_T('o'), XML_T('r'), XML_T('g'), - XML_T('/'), XML_T('X'), XML_T('M'), XML_T('L'), - XML_T('/'), XML_T('1'), XML_T('9'), XML_T('9'), XML_T('8'), - XML_T('/'), XML_T('n'), XML_T('a'), XML_T('m'), XML_T('e'), - XML_T('s'), XML_T('p'), XML_T('a'), XML_T('c'), XML_T('e'), - XML_T('\0') - }; + return parserCreate(encodingName, memsuite, nameSep, NULL); +} +static XML_Parser +parserCreate(const XML_Char *encodingName, + const XML_Memory_Handling_Suite *memsuite, const XML_Char *nameSep, + DTD *dtd) { + XML_Parser parser; if (memsuite) { XML_Memory_Handling_Suite *mtemp; - parser = memsuite->malloc_fcn(sizeof(Parser)); - mtemp = &(((Parser *) parser)->m_mem); - mtemp->malloc_fcn = memsuite->malloc_fcn; - mtemp->realloc_fcn = memsuite->realloc_fcn; - mtemp->free_fcn = memsuite->free_fcn; - } - else { + parser = (XML_Parser)memsuite->malloc_fcn(sizeof(struct XML_ParserStruct)); + if (parser != NULL) { + mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem); + mtemp->malloc_fcn = memsuite->malloc_fcn; + mtemp->realloc_fcn = memsuite->realloc_fcn; + mtemp->free_fcn = memsuite->free_fcn; + } + } else { XML_Memory_Handling_Suite *mtemp; - parser = malloc(sizeof(Parser)); - mtemp = &(((Parser *) parser)->m_mem); - mtemp->malloc_fcn = malloc; - mtemp->realloc_fcn = realloc; - mtemp->free_fcn = free; + parser = (XML_Parser)malloc(sizeof(struct XML_ParserStruct)); + if (parser != NULL) { + mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem); + mtemp->malloc_fcn = malloc; + mtemp->realloc_fcn = realloc; + mtemp->free_fcn = free; + } } - if (!parser) + if (! parser) return parser; - processor = prologInitProcessor; - XmlPrologStateInit(&prologState); - userData = 0; - handlerArg = 0; - startElementHandler = 0; - endElementHandler = 0; - characterDataHandler = 0; - processingInstructionHandler = 0; - commentHandler = 0; - startCdataSectionHandler = 0; - endCdataSectionHandler = 0; - defaultHandler = 0; - startDoctypeDeclHandler = 0; - endDoctypeDeclHandler = 0; - unparsedEntityDeclHandler = 0; - notationDeclHandler = 0; - startNamespaceDeclHandler = 0; - endNamespaceDeclHandler = 0; - notStandaloneHandler = 0; - externalEntityRefHandler = 0; - externalEntityRefHandlerArg = parser; - unknownEncodingHandler = 0; - elementDeclHandler = 0; - attlistDeclHandler = 0; - entityDeclHandler = 0; - xmlDeclHandler = 0; - buffer = 0; - bufferPtr = 0; - bufferEnd = 0; - parseEndByteIndex = 0; - parseEndPtr = 0; - bufferLim = 0; - declElementType = 0; - declAttributeId = 0; - declEntity = 0; - doctypeName = 0; - doctypeSysid = 0; - doctypePubid = 0; - declAttributeType = 0; - declNotationName = 0; - declNotationPublicId = 0; - memset(&position, 0, sizeof(POSITION)); - errorCode = XML_ERROR_NONE; - eventPtr = 0; - eventEndPtr = 0; - positionPtr = 0; - openInternalEntities = 0; - tagLevel = 0; - tagStack = 0; - freeTagList = 0; - freeBindingList = 0; - inheritedBindings = 0; - attsSize = INIT_ATTS_SIZE; - atts = MALLOC(attsSize * sizeof(ATTRIBUTE)); - nSpecifiedAtts = 0; - dataBuf = MALLOC(INIT_DATA_BUF_SIZE * sizeof(XML_Char)); - groupSize = 0; - groupConnector = 0; - hadExternalDoctype = 0; - unknownEncodingMem = 0; - unknownEncodingRelease = 0; - unknownEncodingData = 0; - unknownEncodingHandlerData = 0; - namespaceSeparator = '!'; -#ifdef XML_DTD - parentParser = 0; - paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER; + + parser->m_buffer = NULL; + parser->m_bufferLim = NULL; + + parser->m_attsSize = INIT_ATTS_SIZE; + parser->m_atts + = (ATTRIBUTE *)MALLOC(parser, parser->m_attsSize * sizeof(ATTRIBUTE)); + if (parser->m_atts == NULL) { + FREE(parser, parser); + return NULL; + } +#ifdef XML_ATTR_INFO + parser->m_attInfo = (XML_AttrInfo *)MALLOC( + parser, parser->m_attsSize * sizeof(XML_AttrInfo)); + if (parser->m_attInfo == NULL) { + FREE(parser, parser->m_atts); + FREE(parser, parser); + return NULL; + } #endif - ns = 0; - ns_triplets = 0; - poolInit(&tempPool, &(((Parser *) parser)->m_mem)); - poolInit(&temp2Pool, &(((Parser *) parser)->m_mem)); - protocolEncodingName = encodingName ? poolCopyString(&tempPool, encodingName) : 0; - curBase = 0; - if (!dtdInit(&dtd, parser) || !atts || !dataBuf - || (encodingName && !protocolEncodingName)) { + parser->m_dataBuf + = (XML_Char *)MALLOC(parser, INIT_DATA_BUF_SIZE * sizeof(XML_Char)); + if (parser->m_dataBuf == NULL) { + FREE(parser, parser->m_atts); +#ifdef XML_ATTR_INFO + FREE(parser, parser->m_attInfo); +#endif + FREE(parser, parser); + return NULL; + } + parser->m_dataBufEnd = parser->m_dataBuf + INIT_DATA_BUF_SIZE; + + if (dtd) + parser->m_dtd = dtd; + else { + parser->m_dtd = dtdCreate(&parser->m_mem); + if (parser->m_dtd == NULL) { + FREE(parser, parser->m_dataBuf); + FREE(parser, parser->m_atts); +#ifdef XML_ATTR_INFO + FREE(parser, parser->m_attInfo); +#endif + FREE(parser, parser); + return NULL; + } + } + + parser->m_freeBindingList = NULL; + parser->m_freeTagList = NULL; + parser->m_freeInternalEntities = NULL; + + parser->m_groupSize = 0; + parser->m_groupConnector = NULL; + + parser->m_unknownEncodingHandler = NULL; + parser->m_unknownEncodingHandlerData = NULL; + + parser->m_namespaceSeparator = ASCII_EXCL; + parser->m_ns = XML_FALSE; + parser->m_ns_triplets = XML_FALSE; + + parser->m_nsAtts = NULL; + parser->m_nsAttsVersion = 0; + parser->m_nsAttsPower = 0; + + parser->m_protocolEncodingName = NULL; + + poolInit(&parser->m_tempPool, &(parser->m_mem)); + poolInit(&parser->m_temp2Pool, &(parser->m_mem)); + parserInit(parser, encodingName); + + if (encodingName && ! parser->m_protocolEncodingName) { XML_ParserFree(parser); - return 0; + return NULL; } - dataBufEnd = dataBuf + INIT_DATA_BUF_SIZE; if (nameSep) { - XmlInitEncodingNS(&initEncoding, &encoding, 0); - ns = 1; - internalEncoding = XmlGetInternalEncodingNS(); - namespaceSeparator = *nameSep; + parser->m_ns = XML_TRUE; + parser->m_internalEncoding = XmlGetInternalEncodingNS(); + parser->m_namespaceSeparator = *nameSep; + } else { + parser->m_internalEncoding = XmlGetInternalEncoding(); + } - if (! setContext(parser, implicitContext)) { - XML_ParserFree(parser); - return 0; - } + return parser; +} + +static void +parserInit(XML_Parser parser, const XML_Char *encodingName) { + parser->m_processor = prologInitProcessor; + XmlPrologStateInit(&parser->m_prologState); + if (encodingName != NULL) { + parser->m_protocolEncodingName = copyString(encodingName, &(parser->m_mem)); } - else { - XmlInitEncoding(&initEncoding, &encoding, 0); - internalEncoding = XmlGetInternalEncoding(); + parser->m_curBase = NULL; + XmlInitEncoding(&parser->m_initEncoding, &parser->m_encoding, 0); + parser->m_userData = NULL; + parser->m_handlerArg = NULL; + parser->m_startElementHandler = NULL; + parser->m_endElementHandler = NULL; + parser->m_characterDataHandler = NULL; + parser->m_processingInstructionHandler = NULL; + parser->m_commentHandler = NULL; + parser->m_startCdataSectionHandler = NULL; + parser->m_endCdataSectionHandler = NULL; + parser->m_defaultHandler = NULL; + parser->m_startDoctypeDeclHandler = NULL; + parser->m_endDoctypeDeclHandler = NULL; + parser->m_unparsedEntityDeclHandler = NULL; + parser->m_notationDeclHandler = NULL; + parser->m_startNamespaceDeclHandler = NULL; + parser->m_endNamespaceDeclHandler = NULL; + parser->m_notStandaloneHandler = NULL; + parser->m_externalEntityRefHandler = NULL; + parser->m_externalEntityRefHandlerArg = parser; + parser->m_skippedEntityHandler = NULL; + parser->m_elementDeclHandler = NULL; + parser->m_attlistDeclHandler = NULL; + parser->m_entityDeclHandler = NULL; + parser->m_xmlDeclHandler = NULL; + parser->m_bufferPtr = parser->m_buffer; + parser->m_bufferEnd = parser->m_buffer; + parser->m_parseEndByteIndex = 0; + parser->m_parseEndPtr = NULL; + parser->m_declElementType = NULL; + parser->m_declAttributeId = NULL; + parser->m_declEntity = NULL; + parser->m_doctypeName = NULL; + parser->m_doctypeSysid = NULL; + parser->m_doctypePubid = NULL; + parser->m_declAttributeType = NULL; + parser->m_declNotationName = NULL; + parser->m_declNotationPublicId = NULL; + parser->m_declAttributeIsCdata = XML_FALSE; + parser->m_declAttributeIsId = XML_FALSE; + memset(&parser->m_position, 0, sizeof(POSITION)); + parser->m_errorCode = XML_ERROR_NONE; + parser->m_eventPtr = NULL; + parser->m_eventEndPtr = NULL; + parser->m_positionPtr = NULL; + parser->m_openInternalEntities = NULL; + parser->m_defaultExpandInternalEntities = XML_TRUE; + parser->m_tagLevel = 0; + parser->m_tagStack = NULL; + parser->m_inheritedBindings = NULL; + parser->m_nSpecifiedAtts = 0; + parser->m_unknownEncodingMem = NULL; + parser->m_unknownEncodingRelease = NULL; + parser->m_unknownEncodingData = NULL; + parser->m_parentParser = NULL; + parser->m_parsingStatus.parsing = XML_INITIALIZED; +#ifdef XML_DTD + parser->m_isParamEntity = XML_FALSE; + parser->m_useForeignDTD = XML_FALSE; + parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER; +#endif + parser->m_hash_secret_salt = 0; + +#ifdef XML_DTD + memset(&parser->m_accounting, 0, sizeof(ACCOUNTING)); + parser->m_accounting.debugLevel = getDebugLevel("EXPAT_ACCOUNTING_DEBUG", 0u); + parser->m_accounting.maximumAmplificationFactor + = EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT; + parser->m_accounting.activationThresholdBytes + = EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT; + + memset(&parser->m_entity_stats, 0, sizeof(ENTITY_STATS)); + parser->m_entity_stats.debugLevel = getDebugLevel("EXPAT_ENTITY_DEBUG", 0u); +#endif +} + +/* moves list of bindings to m_freeBindingList */ +static void FASTCALL +moveToFreeBindingList(XML_Parser parser, BINDING *bindings) { + while (bindings) { + BINDING *b = bindings; + bindings = bindings->nextTagBinding; + b->nextTagBinding = parser->m_freeBindingList; + parser->m_freeBindingList = b; } +} - return parser; -} /* End XML_ParserCreate_MM */ +XML_Bool XMLCALL +XML_ParserReset(XML_Parser parser, const XML_Char *encodingName) { + TAG *tStk; + OPEN_INTERNAL_ENTITY *openEntityList; + + if (parser == NULL) + return XML_FALSE; + + if (parser->m_parentParser) + return XML_FALSE; + /* move m_tagStack to m_freeTagList */ + tStk = parser->m_tagStack; + while (tStk) { + TAG *tag = tStk; + tStk = tStk->parent; + tag->parent = parser->m_freeTagList; + moveToFreeBindingList(parser, tag->bindings); + tag->bindings = NULL; + parser->m_freeTagList = tag; + } + /* move m_openInternalEntities to m_freeInternalEntities */ + openEntityList = parser->m_openInternalEntities; + while (openEntityList) { + OPEN_INTERNAL_ENTITY *openEntity = openEntityList; + openEntityList = openEntity->next; + openEntity->next = parser->m_freeInternalEntities; + parser->m_freeInternalEntities = openEntity; + } + moveToFreeBindingList(parser, parser->m_inheritedBindings); + FREE(parser, parser->m_unknownEncodingMem); + if (parser->m_unknownEncodingRelease) + parser->m_unknownEncodingRelease(parser->m_unknownEncodingData); + poolClear(&parser->m_tempPool); + poolClear(&parser->m_temp2Pool); + FREE(parser, (void *)parser->m_protocolEncodingName); + parser->m_protocolEncodingName = NULL; + parserInit(parser, encodingName); + dtdReset(parser->m_dtd, &parser->m_mem); + return XML_TRUE; +} -int XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName) -{ - if (!encodingName) - protocolEncodingName = 0; +enum XML_Status XMLCALL +XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName) { + if (parser == NULL) + return XML_STATUS_ERROR; + /* Block after XML_Parse()/XML_ParseBuffer() has been called. + XXX There's no way for the caller to determine which of the + XXX possible error cases caused the XML_STATUS_ERROR return. + */ + if (parser->m_parsingStatus.parsing == XML_PARSING + || parser->m_parsingStatus.parsing == XML_SUSPENDED) + return XML_STATUS_ERROR; + + /* Get rid of any previous encoding name */ + FREE(parser, (void *)parser->m_protocolEncodingName); + + if (encodingName == NULL) + /* No new encoding name */ + parser->m_protocolEncodingName = NULL; else { - protocolEncodingName = poolCopyString(&tempPool, encodingName); - if (!protocolEncodingName) - return 0; + /* Copy the new encoding name into allocated memory */ + parser->m_protocolEncodingName = copyString(encodingName, &(parser->m_mem)); + if (! parser->m_protocolEncodingName) + return XML_STATUS_ERROR; } - return 1; + return XML_STATUS_OK; } -XML_Parser XML_ExternalEntityParserCreate(XML_Parser oldParser, - const XML_Char *context, - const XML_Char *encodingName) -{ +XML_Parser XMLCALL +XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context, + const XML_Char *encodingName) { XML_Parser parser = oldParser; - DTD *oldDtd = &dtd; - XML_StartElementHandler oldStartElementHandler = startElementHandler; - XML_EndElementHandler oldEndElementHandler = endElementHandler; - XML_CharacterDataHandler oldCharacterDataHandler = characterDataHandler; - XML_ProcessingInstructionHandler oldProcessingInstructionHandler = processingInstructionHandler; - XML_CommentHandler oldCommentHandler = commentHandler; - XML_StartCdataSectionHandler oldStartCdataSectionHandler = startCdataSectionHandler; - XML_EndCdataSectionHandler oldEndCdataSectionHandler = endCdataSectionHandler; - XML_DefaultHandler oldDefaultHandler = defaultHandler; - XML_UnparsedEntityDeclHandler oldUnparsedEntityDeclHandler = unparsedEntityDeclHandler; - XML_NotationDeclHandler oldNotationDeclHandler = notationDeclHandler; - XML_StartNamespaceDeclHandler oldStartNamespaceDeclHandler = startNamespaceDeclHandler; - XML_EndNamespaceDeclHandler oldEndNamespaceDeclHandler = endNamespaceDeclHandler; - XML_NotStandaloneHandler oldNotStandaloneHandler = notStandaloneHandler; - XML_ExternalEntityRefHandler oldExternalEntityRefHandler = externalEntityRefHandler; - XML_UnknownEncodingHandler oldUnknownEncodingHandler = unknownEncodingHandler; - XML_ElementDeclHandler oldElementDeclHandler = elementDeclHandler; - XML_AttlistDeclHandler oldAttlistDeclHandler = attlistDeclHandler; - XML_EntityDeclHandler oldEntityDeclHandler = entityDeclHandler; - XML_XmlDeclHandler oldXmlDeclHandler = xmlDeclHandler; - ELEMENT_TYPE * oldDeclElementType = declElementType; - - void *oldUserData = userData; - void *oldHandlerArg = handlerArg; - int oldDefaultExpandInternalEntities = defaultExpandInternalEntities; - void *oldExternalEntityRefHandlerArg = externalEntityRefHandlerArg; + DTD *newDtd = NULL; + DTD *oldDtd; + XML_StartElementHandler oldStartElementHandler; + XML_EndElementHandler oldEndElementHandler; + XML_CharacterDataHandler oldCharacterDataHandler; + XML_ProcessingInstructionHandler oldProcessingInstructionHandler; + XML_CommentHandler oldCommentHandler; + XML_StartCdataSectionHandler oldStartCdataSectionHandler; + XML_EndCdataSectionHandler oldEndCdataSectionHandler; + XML_DefaultHandler oldDefaultHandler; + XML_UnparsedEntityDeclHandler oldUnparsedEntityDeclHandler; + XML_NotationDeclHandler oldNotationDeclHandler; + XML_StartNamespaceDeclHandler oldStartNamespaceDeclHandler; + XML_EndNamespaceDeclHandler oldEndNamespaceDeclHandler; + XML_NotStandaloneHandler oldNotStandaloneHandler; + XML_ExternalEntityRefHandler oldExternalEntityRefHandler; + XML_SkippedEntityHandler oldSkippedEntityHandler; + XML_UnknownEncodingHandler oldUnknownEncodingHandler; + XML_ElementDeclHandler oldElementDeclHandler; + XML_AttlistDeclHandler oldAttlistDeclHandler; + XML_EntityDeclHandler oldEntityDeclHandler; + XML_XmlDeclHandler oldXmlDeclHandler; + ELEMENT_TYPE *oldDeclElementType; + + void *oldUserData; + void *oldHandlerArg; + XML_Bool oldDefaultExpandInternalEntities; + XML_Parser oldExternalEntityRefHandlerArg; #ifdef XML_DTD - enum XML_ParamEntityParsing oldParamEntityParsing = paramEntityParsing; + enum XML_ParamEntityParsing oldParamEntityParsing; + int oldInEntityValue; #endif - int oldns_triplets = ns_triplets; + XML_Bool oldns_triplets; + /* Note that the new parser shares the same hash secret as the old + parser, so that dtdCopy and copyEntityTable can lookup values + from hash tables associated with either parser without us having + to worry which hash secrets each table has. + */ + unsigned long oldhash_secret_salt; + + /* Validate the oldParser parameter before we pull everything out of it */ + if (oldParser == NULL) + return NULL; + + /* Stash the original parser contents on the stack */ + oldDtd = parser->m_dtd; + oldStartElementHandler = parser->m_startElementHandler; + oldEndElementHandler = parser->m_endElementHandler; + oldCharacterDataHandler = parser->m_characterDataHandler; + oldProcessingInstructionHandler = parser->m_processingInstructionHandler; + oldCommentHandler = parser->m_commentHandler; + oldStartCdataSectionHandler = parser->m_startCdataSectionHandler; + oldEndCdataSectionHandler = parser->m_endCdataSectionHandler; + oldDefaultHandler = parser->m_defaultHandler; + oldUnparsedEntityDeclHandler = parser->m_unparsedEntityDeclHandler; + oldNotationDeclHandler = parser->m_notationDeclHandler; + oldStartNamespaceDeclHandler = parser->m_startNamespaceDeclHandler; + oldEndNamespaceDeclHandler = parser->m_endNamespaceDeclHandler; + oldNotStandaloneHandler = parser->m_notStandaloneHandler; + oldExternalEntityRefHandler = parser->m_externalEntityRefHandler; + oldSkippedEntityHandler = parser->m_skippedEntityHandler; + oldUnknownEncodingHandler = parser->m_unknownEncodingHandler; + oldElementDeclHandler = parser->m_elementDeclHandler; + oldAttlistDeclHandler = parser->m_attlistDeclHandler; + oldEntityDeclHandler = parser->m_entityDeclHandler; + oldXmlDeclHandler = parser->m_xmlDeclHandler; + oldDeclElementType = parser->m_declElementType; + + oldUserData = parser->m_userData; + oldHandlerArg = parser->m_handlerArg; + oldDefaultExpandInternalEntities = parser->m_defaultExpandInternalEntities; + oldExternalEntityRefHandlerArg = parser->m_externalEntityRefHandlerArg; +#ifdef XML_DTD + oldParamEntityParsing = parser->m_paramEntityParsing; + oldInEntityValue = parser->m_prologState.inEntityValue; +#endif + oldns_triplets = parser->m_ns_triplets; + /* Note that the new parser shares the same hash secret as the old + parser, so that dtdCopy and copyEntityTable can lookup values + from hash tables associated with either parser without us having + to worry which hash secrets each table has. + */ + oldhash_secret_salt = parser->m_hash_secret_salt; - if (ns) { - XML_Char tmp[2]; +#ifdef XML_DTD + if (! context) + newDtd = oldDtd; +#endif /* XML_DTD */ - *tmp = namespaceSeparator; - parser = XML_ParserCreate_MM(encodingName, &((Parser *)parser)->m_mem, - tmp); - } - else { - parser = XML_ParserCreate_MM(encodingName, &((Parser *)parser)->m_mem, - NULL); + /* Note that the magical uses of the pre-processor to make field + access look more like C++ require that `parser' be overwritten + here. This makes this function more painful to follow than it + would be otherwise. + */ + if (parser->m_ns) { + XML_Char tmp[2]; + *tmp = parser->m_namespaceSeparator; + parser = parserCreate(encodingName, &parser->m_mem, tmp, newDtd); + } else { + parser = parserCreate(encodingName, &parser->m_mem, NULL, newDtd); } - if (!parser) - return 0; - - startElementHandler = oldStartElementHandler; - endElementHandler = oldEndElementHandler; - characterDataHandler = oldCharacterDataHandler; - processingInstructionHandler = oldProcessingInstructionHandler; - commentHandler = oldCommentHandler; - startCdataSectionHandler = oldStartCdataSectionHandler; - endCdataSectionHandler = oldEndCdataSectionHandler; - defaultHandler = oldDefaultHandler; - unparsedEntityDeclHandler = oldUnparsedEntityDeclHandler; - notationDeclHandler = oldNotationDeclHandler; - startNamespaceDeclHandler = oldStartNamespaceDeclHandler; - endNamespaceDeclHandler = oldEndNamespaceDeclHandler; - notStandaloneHandler = oldNotStandaloneHandler; - externalEntityRefHandler = oldExternalEntityRefHandler; - unknownEncodingHandler = oldUnknownEncodingHandler; - elementDeclHandler = oldElementDeclHandler; - attlistDeclHandler = oldAttlistDeclHandler; - entityDeclHandler = oldEntityDeclHandler; - xmlDeclHandler = oldXmlDeclHandler; - declElementType = oldDeclElementType; - userData = oldUserData; + if (! parser) + return NULL; + + parser->m_startElementHandler = oldStartElementHandler; + parser->m_endElementHandler = oldEndElementHandler; + parser->m_characterDataHandler = oldCharacterDataHandler; + parser->m_processingInstructionHandler = oldProcessingInstructionHandler; + parser->m_commentHandler = oldCommentHandler; + parser->m_startCdataSectionHandler = oldStartCdataSectionHandler; + parser->m_endCdataSectionHandler = oldEndCdataSectionHandler; + parser->m_defaultHandler = oldDefaultHandler; + parser->m_unparsedEntityDeclHandler = oldUnparsedEntityDeclHandler; + parser->m_notationDeclHandler = oldNotationDeclHandler; + parser->m_startNamespaceDeclHandler = oldStartNamespaceDeclHandler; + parser->m_endNamespaceDeclHandler = oldEndNamespaceDeclHandler; + parser->m_notStandaloneHandler = oldNotStandaloneHandler; + parser->m_externalEntityRefHandler = oldExternalEntityRefHandler; + parser->m_skippedEntityHandler = oldSkippedEntityHandler; + parser->m_unknownEncodingHandler = oldUnknownEncodingHandler; + parser->m_elementDeclHandler = oldElementDeclHandler; + parser->m_attlistDeclHandler = oldAttlistDeclHandler; + parser->m_entityDeclHandler = oldEntityDeclHandler; + parser->m_xmlDeclHandler = oldXmlDeclHandler; + parser->m_declElementType = oldDeclElementType; + parser->m_userData = oldUserData; if (oldUserData == oldHandlerArg) - handlerArg = userData; + parser->m_handlerArg = parser->m_userData; else - handlerArg = parser; + parser->m_handlerArg = parser; if (oldExternalEntityRefHandlerArg != oldParser) - externalEntityRefHandlerArg = oldExternalEntityRefHandlerArg; - defaultExpandInternalEntities = oldDefaultExpandInternalEntities; - ns_triplets = oldns_triplets; + parser->m_externalEntityRefHandlerArg = oldExternalEntityRefHandlerArg; + parser->m_defaultExpandInternalEntities = oldDefaultExpandInternalEntities; + parser->m_ns_triplets = oldns_triplets; + parser->m_hash_secret_salt = oldhash_secret_salt; + parser->m_parentParser = oldParser; #ifdef XML_DTD - paramEntityParsing = oldParamEntityParsing; + parser->m_paramEntityParsing = oldParamEntityParsing; + parser->m_prologState.inEntityValue = oldInEntityValue; if (context) { #endif /* XML_DTD */ - if (!dtdCopy(&dtd, oldDtd, parser) || !setContext(parser, context)) { + if (! dtdCopy(oldParser, parser->m_dtd, oldDtd, &parser->m_mem) + || ! setContext(parser, context)) { XML_ParserFree(parser); - return 0; + return NULL; } - processor = externalEntityInitProcessor; + parser->m_processor = externalEntityInitProcessor; #ifdef XML_DTD - } - else { - dtdSwap(&dtd, oldDtd); - parentParser = oldParser; - XmlPrologStateInitExternalEntity(&prologState); - dtd.complete = 1; - hadExternalDoctype = 1; + } else { + /* The DTD instance referenced by parser->m_dtd is shared between the + document's root parser and external PE parsers, therefore one does not + need to call setContext. In addition, one also *must* not call + setContext, because this would overwrite existing prefix->binding + pointers in parser->m_dtd with ones that get destroyed with the external + PE parser. This would leave those prefixes with dangling pointers. + */ + parser->m_isParamEntity = XML_TRUE; + XmlPrologStateInitExternalEntity(&parser->m_prologState); + parser->m_processor = externalParEntInitProcessor; } #endif /* XML_DTD */ return parser; } -static -void destroyBindings(BINDING *bindings, XML_Parser parser) -{ +static void FASTCALL +destroyBindings(BINDING *bindings, XML_Parser parser) { for (;;) { BINDING *b = bindings; - if (!b) + if (! b) break; bindings = b->nextTagBinding; - FREE(b->uri); - FREE(b); + FREE(parser, b->uri); + FREE(parser, b); } } -void XML_ParserFree(XML_Parser parser) -{ +void XMLCALL +XML_ParserFree(XML_Parser parser) { + TAG *tagList; + OPEN_INTERNAL_ENTITY *entityList; + if (parser == NULL) + return; + /* free m_tagStack and m_freeTagList */ + tagList = parser->m_tagStack; for (;;) { TAG *p; - if (tagStack == 0) { - if (freeTagList == 0) + if (tagList == NULL) { + if (parser->m_freeTagList == NULL) break; - tagStack = freeTagList; - freeTagList = 0; + tagList = parser->m_freeTagList; + parser->m_freeTagList = NULL; } - p = tagStack; - tagStack = tagStack->parent; - FREE(p->buf); + p = tagList; + tagList = tagList->parent; + FREE(parser, p->buf); destroyBindings(p->bindings, parser); - FREE(p); + FREE(parser, p); } - destroyBindings(freeBindingList, parser); - destroyBindings(inheritedBindings, parser); - poolDestroy(&tempPool); - poolDestroy(&temp2Pool); -#ifdef XML_DTD - if (parentParser) { - if (hadExternalDoctype) - dtd.complete = 0; - dtdSwap(&dtd, &((Parser *)parentParser)->m_dtd); + /* free m_openInternalEntities and m_freeInternalEntities */ + entityList = parser->m_openInternalEntities; + for (;;) { + OPEN_INTERNAL_ENTITY *openEntity; + if (entityList == NULL) { + if (parser->m_freeInternalEntities == NULL) + break; + entityList = parser->m_freeInternalEntities; + parser->m_freeInternalEntities = NULL; + } + openEntity = entityList; + entityList = entityList->next; + FREE(parser, openEntity); } + + destroyBindings(parser->m_freeBindingList, parser); + destroyBindings(parser->m_inheritedBindings, parser); + poolDestroy(&parser->m_tempPool); + poolDestroy(&parser->m_temp2Pool); + FREE(parser, (void *)parser->m_protocolEncodingName); +#ifdef XML_DTD + /* external parameter entity parsers share the DTD structure + parser->m_dtd with the root parser, so we must not destroy it + */ + if (! parser->m_isParamEntity && parser->m_dtd) +#else + if (parser->m_dtd) #endif /* XML_DTD */ - dtdDestroy(&dtd, parser); - FREE((void *)atts); - if (groupConnector) - FREE(groupConnector); - if (buffer) - FREE(buffer); - FREE(dataBuf); - if (unknownEncodingMem) - FREE(unknownEncodingMem); - if (unknownEncodingRelease) - unknownEncodingRelease(unknownEncodingData); - FREE(parser); -} - -void XML_UseParserAsHandlerArg(XML_Parser parser) -{ - handlerArg = parser; -} - -void + dtdDestroy(parser->m_dtd, (XML_Bool)! parser->m_parentParser, + &parser->m_mem); + FREE(parser, (void *)parser->m_atts); +#ifdef XML_ATTR_INFO + FREE(parser, (void *)parser->m_attInfo); +#endif + FREE(parser, parser->m_groupConnector); + FREE(parser, parser->m_buffer); + FREE(parser, parser->m_dataBuf); + FREE(parser, parser->m_nsAtts); + FREE(parser, parser->m_unknownEncodingMem); + if (parser->m_unknownEncodingRelease) + parser->m_unknownEncodingRelease(parser->m_unknownEncodingData); + FREE(parser, parser); +} + +void XMLCALL +XML_UseParserAsHandlerArg(XML_Parser parser) { + if (parser != NULL) + parser->m_handlerArg = parser; +} + +enum XML_Error XMLCALL +XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD) { + if (parser == NULL) + return XML_ERROR_INVALID_ARGUMENT; +#ifdef XML_DTD + /* block after XML_Parse()/XML_ParseBuffer() has been called */ + if (parser->m_parsingStatus.parsing == XML_PARSING + || parser->m_parsingStatus.parsing == XML_SUSPENDED) + return XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING; + parser->m_useForeignDTD = useDTD; + return XML_ERROR_NONE; +#else + UNUSED_P(useDTD); + return XML_ERROR_FEATURE_REQUIRES_XML_DTD; +#endif +} + +void XMLCALL XML_SetReturnNSTriplet(XML_Parser parser, int do_nst) { - ns_triplets = do_nst; + if (parser == NULL) + return; + /* block after XML_Parse()/XML_ParseBuffer() has been called */ + if (parser->m_parsingStatus.parsing == XML_PARSING + || parser->m_parsingStatus.parsing == XML_SUSPENDED) + return; + parser->m_ns_triplets = do_nst ? XML_TRUE : XML_FALSE; } -void XML_SetUserData(XML_Parser parser, void *p) -{ - if (handlerArg == userData) - handlerArg = userData = p; +void XMLCALL +XML_SetUserData(XML_Parser parser, void *p) { + if (parser == NULL) + return; + if (parser->m_handlerArg == parser->m_userData) + parser->m_handlerArg = parser->m_userData = p; else - userData = p; + parser->m_userData = p; } -int XML_SetBase(XML_Parser parser, const XML_Char *p) -{ +enum XML_Status XMLCALL +XML_SetBase(XML_Parser parser, const XML_Char *p) { + if (parser == NULL) + return XML_STATUS_ERROR; if (p) { - p = poolCopyString(&dtd.pool, p); - if (!p) - return 0; - curBase = p; - } - else - curBase = 0; - return 1; + p = poolCopyString(&parser->m_dtd->pool, p); + if (! p) + return XML_STATUS_ERROR; + parser->m_curBase = p; + } else + parser->m_curBase = NULL; + return XML_STATUS_OK; } -const XML_Char *XML_GetBase(XML_Parser parser) -{ - return curBase; +const XML_Char *XMLCALL +XML_GetBase(XML_Parser parser) { + if (parser == NULL) + return NULL; + return parser->m_curBase; } -int XML_GetSpecifiedAttributeCount(XML_Parser parser) -{ - return nSpecifiedAtts; +int XMLCALL +XML_GetSpecifiedAttributeCount(XML_Parser parser) { + if (parser == NULL) + return -1; + return parser->m_nSpecifiedAtts; } -int XML_GetIdAttributeIndex(XML_Parser parser) -{ - return idAttIndex; +int XMLCALL +XML_GetIdAttributeIndex(XML_Parser parser) { + if (parser == NULL) + return -1; + return parser->m_idAttIndex; } -void XML_SetElementHandler(XML_Parser parser, - XML_StartElementHandler start, - XML_EndElementHandler end) -{ - startElementHandler = start; - endElementHandler = end; +#ifdef XML_ATTR_INFO +const XML_AttrInfo *XMLCALL +XML_GetAttributeInfo(XML_Parser parser) { + if (parser == NULL) + return NULL; + return parser->m_attInfo; } +#endif -void XML_SetStartElementHandler(XML_Parser parser, - XML_StartElementHandler start) { - startElementHandler = start; +void XMLCALL +XML_SetElementHandler(XML_Parser parser, XML_StartElementHandler start, + XML_EndElementHandler end) { + if (parser == NULL) + return; + parser->m_startElementHandler = start; + parser->m_endElementHandler = end; } -void XML_SetEndElementHandler(XML_Parser parser, - XML_EndElementHandler end) { - endElementHandler = end; +void XMLCALL +XML_SetStartElementHandler(XML_Parser parser, XML_StartElementHandler start) { + if (parser != NULL) + parser->m_startElementHandler = start; } -void XML_SetCharacterDataHandler(XML_Parser parser, - XML_CharacterDataHandler handler) -{ - characterDataHandler = handler; +void XMLCALL +XML_SetEndElementHandler(XML_Parser parser, XML_EndElementHandler end) { + if (parser != NULL) + parser->m_endElementHandler = end; } -void XML_SetProcessingInstructionHandler(XML_Parser parser, - XML_ProcessingInstructionHandler handler) -{ - processingInstructionHandler = handler; +void XMLCALL +XML_SetCharacterDataHandler(XML_Parser parser, + XML_CharacterDataHandler handler) { + if (parser != NULL) + parser->m_characterDataHandler = handler; } -void XML_SetCommentHandler(XML_Parser parser, - XML_CommentHandler handler) -{ - commentHandler = handler; +void XMLCALL +XML_SetProcessingInstructionHandler(XML_Parser parser, + XML_ProcessingInstructionHandler handler) { + if (parser != NULL) + parser->m_processingInstructionHandler = handler; } -void XML_SetCdataSectionHandler(XML_Parser parser, - XML_StartCdataSectionHandler start, - XML_EndCdataSectionHandler end) -{ - startCdataSectionHandler = start; - endCdataSectionHandler = end; +void XMLCALL +XML_SetCommentHandler(XML_Parser parser, XML_CommentHandler handler) { + if (parser != NULL) + parser->m_commentHandler = handler; } -void XML_SetStartCdataSectionHandler(XML_Parser parser, - XML_StartCdataSectionHandler start) { - startCdataSectionHandler = start; +void XMLCALL +XML_SetCdataSectionHandler(XML_Parser parser, + XML_StartCdataSectionHandler start, + XML_EndCdataSectionHandler end) { + if (parser == NULL) + return; + parser->m_startCdataSectionHandler = start; + parser->m_endCdataSectionHandler = end; } -void XML_SetEndCdataSectionHandler(XML_Parser parser, - XML_EndCdataSectionHandler end) { - endCdataSectionHandler = end; +void XMLCALL +XML_SetStartCdataSectionHandler(XML_Parser parser, + XML_StartCdataSectionHandler start) { + if (parser != NULL) + parser->m_startCdataSectionHandler = start; } -void XML_SetDefaultHandler(XML_Parser parser, - XML_DefaultHandler handler) -{ - defaultHandler = handler; - defaultExpandInternalEntities = 0; +void XMLCALL +XML_SetEndCdataSectionHandler(XML_Parser parser, + XML_EndCdataSectionHandler end) { + if (parser != NULL) + parser->m_endCdataSectionHandler = end; } -void XML_SetDefaultHandlerExpand(XML_Parser parser, - XML_DefaultHandler handler) -{ - defaultHandler = handler; - defaultExpandInternalEntities = 1; +void XMLCALL +XML_SetDefaultHandler(XML_Parser parser, XML_DefaultHandler handler) { + if (parser == NULL) + return; + parser->m_defaultHandler = handler; + parser->m_defaultExpandInternalEntities = XML_FALSE; } -void XML_SetDoctypeDeclHandler(XML_Parser parser, - XML_StartDoctypeDeclHandler start, - XML_EndDoctypeDeclHandler end) -{ - startDoctypeDeclHandler = start; - endDoctypeDeclHandler = end; +void XMLCALL +XML_SetDefaultHandlerExpand(XML_Parser parser, XML_DefaultHandler handler) { + if (parser == NULL) + return; + parser->m_defaultHandler = handler; + parser->m_defaultExpandInternalEntities = XML_TRUE; } -void XML_SetStartDoctypeDeclHandler(XML_Parser parser, - XML_StartDoctypeDeclHandler start) { - startDoctypeDeclHandler = start; +void XMLCALL +XML_SetDoctypeDeclHandler(XML_Parser parser, XML_StartDoctypeDeclHandler start, + XML_EndDoctypeDeclHandler end) { + if (parser == NULL) + return; + parser->m_startDoctypeDeclHandler = start; + parser->m_endDoctypeDeclHandler = end; } -void XML_SetEndDoctypeDeclHandler(XML_Parser parser, - XML_EndDoctypeDeclHandler end) { - endDoctypeDeclHandler = end; +void XMLCALL +XML_SetStartDoctypeDeclHandler(XML_Parser parser, + XML_StartDoctypeDeclHandler start) { + if (parser != NULL) + parser->m_startDoctypeDeclHandler = start; } -void XML_SetUnparsedEntityDeclHandler(XML_Parser parser, - XML_UnparsedEntityDeclHandler handler) -{ - unparsedEntityDeclHandler = handler; +void XMLCALL +XML_SetEndDoctypeDeclHandler(XML_Parser parser, XML_EndDoctypeDeclHandler end) { + if (parser != NULL) + parser->m_endDoctypeDeclHandler = end; } -void XML_SetNotationDeclHandler(XML_Parser parser, - XML_NotationDeclHandler handler) -{ - notationDeclHandler = handler; +void XMLCALL +XML_SetUnparsedEntityDeclHandler(XML_Parser parser, + XML_UnparsedEntityDeclHandler handler) { + if (parser != NULL) + parser->m_unparsedEntityDeclHandler = handler; } -void XML_SetNamespaceDeclHandler(XML_Parser parser, - XML_StartNamespaceDeclHandler start, - XML_EndNamespaceDeclHandler end) -{ - startNamespaceDeclHandler = start; - endNamespaceDeclHandler = end; +void XMLCALL +XML_SetNotationDeclHandler(XML_Parser parser, XML_NotationDeclHandler handler) { + if (parser != NULL) + parser->m_notationDeclHandler = handler; } -void XML_SetStartNamespaceDeclHandler(XML_Parser parser, - XML_StartNamespaceDeclHandler start) { - startNamespaceDeclHandler = start; +void XMLCALL +XML_SetNamespaceDeclHandler(XML_Parser parser, + XML_StartNamespaceDeclHandler start, + XML_EndNamespaceDeclHandler end) { + if (parser == NULL) + return; + parser->m_startNamespaceDeclHandler = start; + parser->m_endNamespaceDeclHandler = end; } -void XML_SetEndNamespaceDeclHandler(XML_Parser parser, - XML_EndNamespaceDeclHandler end) { - endNamespaceDeclHandler = end; +void XMLCALL +XML_SetStartNamespaceDeclHandler(XML_Parser parser, + XML_StartNamespaceDeclHandler start) { + if (parser != NULL) + parser->m_startNamespaceDeclHandler = start; } +void XMLCALL +XML_SetEndNamespaceDeclHandler(XML_Parser parser, + XML_EndNamespaceDeclHandler end) { + if (parser != NULL) + parser->m_endNamespaceDeclHandler = end; +} -void XML_SetNotStandaloneHandler(XML_Parser parser, - XML_NotStandaloneHandler handler) -{ - notStandaloneHandler = handler; +void XMLCALL +XML_SetNotStandaloneHandler(XML_Parser parser, + XML_NotStandaloneHandler handler) { + if (parser != NULL) + parser->m_notStandaloneHandler = handler; } -void XML_SetExternalEntityRefHandler(XML_Parser parser, - XML_ExternalEntityRefHandler handler) -{ - externalEntityRefHandler = handler; +void XMLCALL +XML_SetExternalEntityRefHandler(XML_Parser parser, + XML_ExternalEntityRefHandler handler) { + if (parser != NULL) + parser->m_externalEntityRefHandler = handler; } -void XML_SetExternalEntityRefHandlerArg(XML_Parser parser, void *arg) -{ +void XMLCALL +XML_SetExternalEntityRefHandlerArg(XML_Parser parser, void *arg) { + if (parser == NULL) + return; if (arg) - externalEntityRefHandlerArg = arg; + parser->m_externalEntityRefHandlerArg = (XML_Parser)arg; else - externalEntityRefHandlerArg = parser; + parser->m_externalEntityRefHandlerArg = parser; +} + +void XMLCALL +XML_SetSkippedEntityHandler(XML_Parser parser, + XML_SkippedEntityHandler handler) { + if (parser != NULL) + parser->m_skippedEntityHandler = handler; } -void XML_SetUnknownEncodingHandler(XML_Parser parser, - XML_UnknownEncodingHandler handler, - void *data) -{ - unknownEncodingHandler = handler; - unknownEncodingHandlerData = data; +void XMLCALL +XML_SetUnknownEncodingHandler(XML_Parser parser, + XML_UnknownEncodingHandler handler, void *data) { + if (parser == NULL) + return; + parser->m_unknownEncodingHandler = handler; + parser->m_unknownEncodingHandlerData = data; } -void XML_SetElementDeclHandler(XML_Parser parser, - XML_ElementDeclHandler eldecl) -{ - elementDeclHandler = eldecl; +void XMLCALL +XML_SetElementDeclHandler(XML_Parser parser, XML_ElementDeclHandler eldecl) { + if (parser != NULL) + parser->m_elementDeclHandler = eldecl; } -void XML_SetAttlistDeclHandler(XML_Parser parser, - XML_AttlistDeclHandler attdecl) -{ - attlistDeclHandler = attdecl; +void XMLCALL +XML_SetAttlistDeclHandler(XML_Parser parser, XML_AttlistDeclHandler attdecl) { + if (parser != NULL) + parser->m_attlistDeclHandler = attdecl; } -void XML_SetEntityDeclHandler(XML_Parser parser, - XML_EntityDeclHandler handler) -{ - entityDeclHandler = handler; +void XMLCALL +XML_SetEntityDeclHandler(XML_Parser parser, XML_EntityDeclHandler handler) { + if (parser != NULL) + parser->m_entityDeclHandler = handler; } -void XML_SetXmlDeclHandler(XML_Parser parser, - XML_XmlDeclHandler handler) { - xmlDeclHandler = handler; +void XMLCALL +XML_SetXmlDeclHandler(XML_Parser parser, XML_XmlDeclHandler handler) { + if (parser != NULL) + parser->m_xmlDeclHandler = handler; } -int XML_SetParamEntityParsing(XML_Parser parser, - enum XML_ParamEntityParsing parsing) -{ +int XMLCALL +XML_SetParamEntityParsing(XML_Parser parser, + enum XML_ParamEntityParsing peParsing) { + if (parser == NULL) + return 0; + /* block after XML_Parse()/XML_ParseBuffer() has been called */ + if (parser->m_parsingStatus.parsing == XML_PARSING + || parser->m_parsingStatus.parsing == XML_SUSPENDED) + return 0; #ifdef XML_DTD - paramEntityParsing = parsing; + parser->m_paramEntityParsing = peParsing; return 1; #else - return parsing == XML_PARAM_ENTITY_PARSING_NEVER; + return peParsing == XML_PARAM_ENTITY_PARSING_NEVER; #endif } -int XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) -{ - if (len == 0) { - if (!isFinal) - return 1; - positionPtr = bufferPtr; - errorCode = processor(parser, bufferPtr, parseEndPtr = bufferEnd, 0); - if (errorCode == XML_ERROR_NONE) - return 1; - eventEndPtr = eventPtr; - processor = errorProcessor; +int XMLCALL +XML_SetHashSalt(XML_Parser parser, unsigned long hash_salt) { + if (parser == NULL) return 0; + if (parser->m_parentParser) + return XML_SetHashSalt(parser->m_parentParser, hash_salt); + /* block after XML_Parse()/XML_ParseBuffer() has been called */ + if (parser->m_parsingStatus.parsing == XML_PARSING + || parser->m_parsingStatus.parsing == XML_SUSPENDED) + return 0; + parser->m_hash_secret_salt = hash_salt; + return 1; +} + +enum XML_Status XMLCALL +XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) { + if ((parser == NULL) || (len < 0) || ((s == NULL) && (len != 0))) { + if (parser != NULL) + parser->m_errorCode = XML_ERROR_INVALID_ARGUMENT; + return XML_STATUS_ERROR; } -#ifndef XML_CONTEXT_BYTES - else if (bufferPtr == bufferEnd) { - const char *end; + switch (parser->m_parsingStatus.parsing) { + case XML_SUSPENDED: + parser->m_errorCode = XML_ERROR_SUSPENDED; + return XML_STATUS_ERROR; + case XML_FINISHED: + parser->m_errorCode = XML_ERROR_FINISHED; + return XML_STATUS_ERROR; + case XML_INITIALIZED: + if (parser->m_parentParser == NULL && ! startParsing(parser)) { + parser->m_errorCode = XML_ERROR_NO_MEMORY; + return XML_STATUS_ERROR; + } + /* fall through */ + default: + parser->m_parsingStatus.parsing = XML_PARSING; + } + + if (len == 0) { + parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal; + if (! isFinal) + return XML_STATUS_OK; + parser->m_positionPtr = parser->m_bufferPtr; + parser->m_parseEndPtr = parser->m_bufferEnd; + + /* If data are left over from last buffer, and we now know that these + data are the final chunk of input, then we have to check them again + to detect errors based on that fact. + */ + parser->m_errorCode + = parser->m_processor(parser, parser->m_bufferPtr, + parser->m_parseEndPtr, &parser->m_bufferPtr); + + if (parser->m_errorCode == XML_ERROR_NONE) { + switch (parser->m_parsingStatus.parsing) { + case XML_SUSPENDED: + /* It is hard to be certain, but it seems that this case + * cannot occur. This code is cleaning up a previous parse + * with no new data (since len == 0). Changing the parsing + * state requires getting to execute a handler function, and + * there doesn't seem to be an opportunity for that while in + * this circumstance. + * + * Given the uncertainty, we retain the code but exclude it + * from coverage tests. + * + * LCOV_EXCL_START + */ + XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, + parser->m_bufferPtr, &parser->m_position); + parser->m_positionPtr = parser->m_bufferPtr; + return XML_STATUS_SUSPENDED; + /* LCOV_EXCL_STOP */ + case XML_INITIALIZED: + case XML_PARSING: + parser->m_parsingStatus.parsing = XML_FINISHED; + /* fall through */ + default: + return XML_STATUS_OK; + } + } + parser->m_eventEndPtr = parser->m_eventPtr; + parser->m_processor = errorProcessor; + return XML_STATUS_ERROR; + } +#ifndef XML_CONTEXT_BYTES + else if (parser->m_bufferPtr == parser->m_bufferEnd) { + const char *end; int nLeftOver; - parseEndByteIndex += len; - positionPtr = s; - if (isFinal) { - errorCode = processor(parser, s, parseEndPtr = s + len, 0); - if (errorCode == XML_ERROR_NONE) - return 1; - eventEndPtr = eventPtr; - processor = errorProcessor; - return 0; + enum XML_Status result; + /* Detect overflow (a+b > MAX <==> b > MAX-a) */ + if ((XML_Size)len > ((XML_Size)-1) / 2 - parser->m_parseEndByteIndex) { + parser->m_errorCode = XML_ERROR_NO_MEMORY; + parser->m_eventPtr = parser->m_eventEndPtr = NULL; + parser->m_processor = errorProcessor; + return XML_STATUS_ERROR; } - errorCode = processor(parser, s, parseEndPtr = s + len, &end); - if (errorCode != XML_ERROR_NONE) { - eventEndPtr = eventPtr; - processor = errorProcessor; - return 0; + parser->m_parseEndByteIndex += len; + parser->m_positionPtr = s; + parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal; + + parser->m_errorCode + = parser->m_processor(parser, s, parser->m_parseEndPtr = s + len, &end); + + if (parser->m_errorCode != XML_ERROR_NONE) { + parser->m_eventEndPtr = parser->m_eventPtr; + parser->m_processor = errorProcessor; + return XML_STATUS_ERROR; + } else { + switch (parser->m_parsingStatus.parsing) { + case XML_SUSPENDED: + result = XML_STATUS_SUSPENDED; + break; + case XML_INITIALIZED: + case XML_PARSING: + if (isFinal) { + parser->m_parsingStatus.parsing = XML_FINISHED; + return XML_STATUS_OK; + } + /* fall through */ + default: + result = XML_STATUS_OK; + } } - XmlUpdatePosition(encoding, positionPtr, end, &position); + + XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, end, + &parser->m_position); nLeftOver = s + len - end; if (nLeftOver) { - if (buffer == 0 || nLeftOver > bufferLim - buffer) { - /* FIXME avoid integer overflow */ - buffer = buffer == 0 ? MALLOC(len * 2) : REALLOC(buffer, len * 2); - /* FIXME storage leak if realloc fails */ - if (!buffer) { - errorCode = XML_ERROR_NO_MEMORY; - eventPtr = eventEndPtr = 0; - processor = errorProcessor; - return 0; + if (parser->m_buffer == NULL + || nLeftOver > parser->m_bufferLim - parser->m_buffer) { + /* avoid _signed_ integer overflow */ + char *temp = NULL; + const int bytesToAllocate = (int)((unsigned)len * 2U); + if (bytesToAllocate > 0) { + temp = (char *)REALLOC(parser, parser->m_buffer, bytesToAllocate); } - bufferLim = buffer + len * 2; + if (temp == NULL) { + parser->m_errorCode = XML_ERROR_NO_MEMORY; + parser->m_eventPtr = parser->m_eventEndPtr = NULL; + parser->m_processor = errorProcessor; + return XML_STATUS_ERROR; + } + parser->m_buffer = temp; + parser->m_bufferLim = parser->m_buffer + bytesToAllocate; } - memcpy(buffer, end, nLeftOver); - bufferPtr = buffer; - bufferEnd = buffer + nLeftOver; + memcpy(parser->m_buffer, end, nLeftOver); } - return 1; + parser->m_bufferPtr = parser->m_buffer; + parser->m_bufferEnd = parser->m_buffer + nLeftOver; + parser->m_positionPtr = parser->m_bufferPtr; + parser->m_parseEndPtr = parser->m_bufferEnd; + parser->m_eventPtr = parser->m_bufferPtr; + parser->m_eventEndPtr = parser->m_bufferPtr; + return result; } -#endif /* not defined XML_CONTEXT_BYTES */ +#endif /* not defined XML_CONTEXT_BYTES */ else { - memcpy(XML_GetBuffer(parser, len), s, len); - return XML_ParseBuffer(parser, len, isFinal); - } -} - -int XML_ParseBuffer(XML_Parser parser, int len, int isFinal) -{ - const char *start = bufferPtr; - positionPtr = start; - bufferEnd += len; - parseEndByteIndex += len; - errorCode = processor(parser, start, parseEndPtr = bufferEnd, - isFinal ? (const char **)0 : &bufferPtr); - if (errorCode == XML_ERROR_NONE) { - if (!isFinal) - XmlUpdatePosition(encoding, positionPtr, bufferPtr, &position); - return 1; + void *buff = XML_GetBuffer(parser, len); + if (buff == NULL) + return XML_STATUS_ERROR; + else { + memcpy(buff, s, len); + return XML_ParseBuffer(parser, len, isFinal); + } } - else { - eventEndPtr = eventPtr; - processor = errorProcessor; - return 0; +} + +enum XML_Status XMLCALL +XML_ParseBuffer(XML_Parser parser, int len, int isFinal) { + const char *start; + enum XML_Status result = XML_STATUS_OK; + + if (parser == NULL) + return XML_STATUS_ERROR; + switch (parser->m_parsingStatus.parsing) { + case XML_SUSPENDED: + parser->m_errorCode = XML_ERROR_SUSPENDED; + return XML_STATUS_ERROR; + case XML_FINISHED: + parser->m_errorCode = XML_ERROR_FINISHED; + return XML_STATUS_ERROR; + case XML_INITIALIZED: + /* Has someone called XML_GetBuffer successfully before? */ + if (! parser->m_bufferPtr) { + parser->m_errorCode = XML_ERROR_NO_BUFFER; + return XML_STATUS_ERROR; + } + + if (parser->m_parentParser == NULL && ! startParsing(parser)) { + parser->m_errorCode = XML_ERROR_NO_MEMORY; + return XML_STATUS_ERROR; + } + /* fall through */ + default: + parser->m_parsingStatus.parsing = XML_PARSING; + } + + start = parser->m_bufferPtr; + parser->m_positionPtr = start; + parser->m_bufferEnd += len; + parser->m_parseEndPtr = parser->m_bufferEnd; + parser->m_parseEndByteIndex += len; + parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal; + + parser->m_errorCode = parser->m_processor( + parser, start, parser->m_parseEndPtr, &parser->m_bufferPtr); + + if (parser->m_errorCode != XML_ERROR_NONE) { + parser->m_eventEndPtr = parser->m_eventPtr; + parser->m_processor = errorProcessor; + return XML_STATUS_ERROR; + } else { + switch (parser->m_parsingStatus.parsing) { + case XML_SUSPENDED: + result = XML_STATUS_SUSPENDED; + break; + case XML_INITIALIZED: + case XML_PARSING: + if (isFinal) { + parser->m_parsingStatus.parsing = XML_FINISHED; + return result; + } + default:; /* should not happen */ + } } + + XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, + parser->m_bufferPtr, &parser->m_position); + parser->m_positionPtr = parser->m_bufferPtr; + return result; } -void *XML_GetBuffer(XML_Parser parser, int len) -{ - if (len > bufferLim - bufferEnd) { - /* FIXME avoid integer overflow */ - int neededSize = len + (bufferEnd - bufferPtr); -#ifdef XML_CONTEXT_BYTES - int keep = bufferPtr - buffer; +void *XMLCALL +XML_GetBuffer(XML_Parser parser, int len) { + if (parser == NULL) + return NULL; + if (len < 0) { + parser->m_errorCode = XML_ERROR_NO_MEMORY; + return NULL; + } + switch (parser->m_parsingStatus.parsing) { + case XML_SUSPENDED: + parser->m_errorCode = XML_ERROR_SUSPENDED; + return NULL; + case XML_FINISHED: + parser->m_errorCode = XML_ERROR_FINISHED; + return NULL; + default:; + } + if (len > EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd)) { +#ifdef XML_CONTEXT_BYTES + int keep; +#endif /* defined XML_CONTEXT_BYTES */ + /* Do not invoke signed arithmetic overflow: */ + int neededSize = (int)((unsigned)len + + (unsigned)EXPAT_SAFE_PTR_DIFF( + parser->m_bufferEnd, parser->m_bufferPtr)); + if (neededSize < 0) { + parser->m_errorCode = XML_ERROR_NO_MEMORY; + return NULL; + } +#ifdef XML_CONTEXT_BYTES + keep = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer); if (keep > XML_CONTEXT_BYTES) keep = XML_CONTEXT_BYTES; neededSize += keep; -#endif /* defined XML_CONTEXT_BYTES */ - if (neededSize <= bufferLim - buffer) { +#endif /* defined XML_CONTEXT_BYTES */ + if (neededSize + <= EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer)) { #ifdef XML_CONTEXT_BYTES - if (keep < bufferPtr - buffer) { - int offset = (bufferPtr - buffer) - keep; - memmove(buffer, &buffer[offset], bufferEnd - bufferPtr + keep); - bufferEnd -= offset; - bufferPtr -= offset; + if (keep < EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer)) { + int offset + = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer) + - keep; + /* The buffer pointers cannot be NULL here; we have at least some bytes + * in the buffer */ + memmove(parser->m_buffer, &parser->m_buffer[offset], + parser->m_bufferEnd - parser->m_bufferPtr + keep); + parser->m_bufferEnd -= offset; + parser->m_bufferPtr -= offset; } #else - memmove(buffer, bufferPtr, bufferEnd - bufferPtr); - bufferEnd = buffer + (bufferEnd - bufferPtr); - bufferPtr = buffer; -#endif /* not defined XML_CONTEXT_BYTES */ - } - else { + if (parser->m_buffer && parser->m_bufferPtr) { + memmove(parser->m_buffer, parser->m_bufferPtr, + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)); + parser->m_bufferEnd + = parser->m_buffer + + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr); + parser->m_bufferPtr = parser->m_buffer; + } +#endif /* not defined XML_CONTEXT_BYTES */ + } else { char *newBuf; - int bufferSize = bufferLim - bufferPtr; + int bufferSize + = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferPtr); if (bufferSize == 0) bufferSize = INIT_BUFFER_SIZE; do { - bufferSize *= 2; - } while (bufferSize < neededSize); - newBuf = MALLOC(bufferSize); + /* Do not invoke signed arithmetic overflow: */ + bufferSize = (int)(2U * (unsigned)bufferSize); + } while (bufferSize < neededSize && bufferSize > 0); + if (bufferSize <= 0) { + parser->m_errorCode = XML_ERROR_NO_MEMORY; + return NULL; + } + newBuf = (char *)MALLOC(parser, bufferSize); if (newBuf == 0) { - errorCode = XML_ERROR_NO_MEMORY; - return 0; + parser->m_errorCode = XML_ERROR_NO_MEMORY; + return NULL; } - bufferLim = newBuf + bufferSize; + parser->m_bufferLim = newBuf + bufferSize; #ifdef XML_CONTEXT_BYTES - if (bufferPtr) { - int keepnumber = bufferPtr - buffer; - if (keepnumber > XML_CONTEXT_BYTES) - keepnumber = XML_CONTEXT_BYTES; - memcpy(newBuf, &bufferPtr[-keepnumber], bufferEnd - bufferPtr + keepnumber); - FREE(buffer); - buffer = newBuf; - bufferEnd = buffer + (bufferEnd - bufferPtr) + keepnumber; - bufferPtr = buffer + keepnumber; - } - else { - bufferEnd = newBuf + (bufferEnd - bufferPtr); - bufferPtr = buffer = newBuf; + if (parser->m_bufferPtr) { + memcpy(newBuf, &parser->m_bufferPtr[-keep], + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr) + + keep); + FREE(parser, parser->m_buffer); + parser->m_buffer = newBuf; + parser->m_bufferEnd + = parser->m_buffer + + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr) + + keep; + parser->m_bufferPtr = parser->m_buffer + keep; + } else { + /* This must be a brand new buffer with no data in it yet */ + parser->m_bufferEnd = newBuf; + parser->m_bufferPtr = parser->m_buffer = newBuf; } #else - if (bufferPtr) { - memcpy(newBuf, bufferPtr, bufferEnd - bufferPtr); - FREE(buffer); + if (parser->m_bufferPtr) { + memcpy(newBuf, parser->m_bufferPtr, + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)); + FREE(parser, parser->m_buffer); + parser->m_bufferEnd + = newBuf + + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr); + } else { + /* This must be a brand new buffer with no data in it yet */ + parser->m_bufferEnd = newBuf; + } + parser->m_bufferPtr = parser->m_buffer = newBuf; +#endif /* not defined XML_CONTEXT_BYTES */ + } + parser->m_eventPtr = parser->m_eventEndPtr = NULL; + parser->m_positionPtr = NULL; + } + return parser->m_bufferEnd; +} + +enum XML_Status XMLCALL +XML_StopParser(XML_Parser parser, XML_Bool resumable) { + if (parser == NULL) + return XML_STATUS_ERROR; + switch (parser->m_parsingStatus.parsing) { + case XML_SUSPENDED: + if (resumable) { + parser->m_errorCode = XML_ERROR_SUSPENDED; + return XML_STATUS_ERROR; + } + parser->m_parsingStatus.parsing = XML_FINISHED; + break; + case XML_FINISHED: + parser->m_errorCode = XML_ERROR_FINISHED; + return XML_STATUS_ERROR; + default: + if (resumable) { +#ifdef XML_DTD + if (parser->m_isParamEntity) { + parser->m_errorCode = XML_ERROR_SUSPEND_PE; + return XML_STATUS_ERROR; } - bufferEnd = newBuf + (bufferEnd - bufferPtr); - bufferPtr = buffer = newBuf; -#endif /* not defined XML_CONTEXT_BYTES */ +#endif + parser->m_parsingStatus.parsing = XML_SUSPENDED; + } else + parser->m_parsingStatus.parsing = XML_FINISHED; + } + return XML_STATUS_OK; +} + +enum XML_Status XMLCALL +XML_ResumeParser(XML_Parser parser) { + enum XML_Status result = XML_STATUS_OK; + + if (parser == NULL) + return XML_STATUS_ERROR; + if (parser->m_parsingStatus.parsing != XML_SUSPENDED) { + parser->m_errorCode = XML_ERROR_NOT_SUSPENDED; + return XML_STATUS_ERROR; + } + parser->m_parsingStatus.parsing = XML_PARSING; + + parser->m_errorCode = parser->m_processor( + parser, parser->m_bufferPtr, parser->m_parseEndPtr, &parser->m_bufferPtr); + + if (parser->m_errorCode != XML_ERROR_NONE) { + parser->m_eventEndPtr = parser->m_eventPtr; + parser->m_processor = errorProcessor; + return XML_STATUS_ERROR; + } else { + switch (parser->m_parsingStatus.parsing) { + case XML_SUSPENDED: + result = XML_STATUS_SUSPENDED; + break; + case XML_INITIALIZED: + case XML_PARSING: + if (parser->m_parsingStatus.finalBuffer) { + parser->m_parsingStatus.parsing = XML_FINISHED; + return result; + } + default:; } } - return bufferEnd; + + XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, + parser->m_bufferPtr, &parser->m_position); + parser->m_positionPtr = parser->m_bufferPtr; + return result; +} + +void XMLCALL +XML_GetParsingStatus(XML_Parser parser, XML_ParsingStatus *status) { + if (parser == NULL) + return; + assert(status != NULL); + *status = parser->m_parsingStatus; } -enum XML_Error XML_GetErrorCode(XML_Parser parser) -{ - return errorCode; +enum XML_Error XMLCALL +XML_GetErrorCode(XML_Parser parser) { + if (parser == NULL) + return XML_ERROR_INVALID_ARGUMENT; + return parser->m_errorCode; } -long XML_GetCurrentByteIndex(XML_Parser parser) -{ - if (eventPtr) - return parseEndByteIndex - (parseEndPtr - eventPtr); +XML_Index XMLCALL +XML_GetCurrentByteIndex(XML_Parser parser) { + if (parser == NULL) + return -1; + if (parser->m_eventPtr) + return (XML_Index)(parser->m_parseEndByteIndex + - (parser->m_parseEndPtr - parser->m_eventPtr)); return -1; } -int XML_GetCurrentByteCount(XML_Parser parser) -{ - if (eventEndPtr && eventPtr) - return eventEndPtr - eventPtr; +int XMLCALL +XML_GetCurrentByteCount(XML_Parser parser) { + if (parser == NULL) + return 0; + if (parser->m_eventEndPtr && parser->m_eventPtr) + return (int)(parser->m_eventEndPtr - parser->m_eventPtr); return 0; } -const char * XML_GetInputContext(XML_Parser parser, int *offset, int *size) -{ +const char *XMLCALL +XML_GetInputContext(XML_Parser parser, int *offset, int *size) { #ifdef XML_CONTEXT_BYTES - if (eventPtr && buffer) { - *offset = eventPtr - buffer; - *size = bufferEnd - buffer; - return buffer; + if (parser == NULL) + return NULL; + if (parser->m_eventPtr && parser->m_buffer) { + if (offset != NULL) + *offset = (int)(parser->m_eventPtr - parser->m_buffer); + if (size != NULL) + *size = (int)(parser->m_bufferEnd - parser->m_buffer); + return parser->m_buffer; } +#else + (void)parser; + (void)offset; + (void)size; #endif /* defined XML_CONTEXT_BYTES */ - return (char *) 0; + return (const char *)0; } -int XML_GetCurrentLineNumber(XML_Parser parser) -{ - if (eventPtr) { - XmlUpdatePosition(encoding, positionPtr, eventPtr, &position); - positionPtr = eventPtr; +XML_Size XMLCALL +XML_GetCurrentLineNumber(XML_Parser parser) { + if (parser == NULL) + return 0; + if (parser->m_eventPtr && parser->m_eventPtr >= parser->m_positionPtr) { + XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, + parser->m_eventPtr, &parser->m_position); + parser->m_positionPtr = parser->m_eventPtr; } - return position.lineNumber + 1; + return parser->m_position.lineNumber + 1; } -int XML_GetCurrentColumnNumber(XML_Parser parser) -{ - if (eventPtr) { - XmlUpdatePosition(encoding, positionPtr, eventPtr, &position); - positionPtr = eventPtr; +XML_Size XMLCALL +XML_GetCurrentColumnNumber(XML_Parser parser) { + if (parser == NULL) + return 0; + if (parser->m_eventPtr && parser->m_eventPtr >= parser->m_positionPtr) { + XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, + parser->m_eventPtr, &parser->m_position); + parser->m_positionPtr = parser->m_eventPtr; } - return position.columnNumber; + return parser->m_position.columnNumber; } -void XML_DefaultCurrent(XML_Parser parser) -{ - if (defaultHandler) { - if (openInternalEntities) - reportDefault(parser, - internalEncoding, - openInternalEntities->internalEventPtr, - openInternalEntities->internalEventEndPtr); +void XMLCALL +XML_FreeContentModel(XML_Parser parser, XML_Content *model) { + if (parser != NULL) + FREE(parser, model); +} + +void *XMLCALL +XML_MemMalloc(XML_Parser parser, size_t size) { + if (parser == NULL) + return NULL; + return MALLOC(parser, size); +} + +void *XMLCALL +XML_MemRealloc(XML_Parser parser, void *ptr, size_t size) { + if (parser == NULL) + return NULL; + return REALLOC(parser, ptr, size); +} + +void XMLCALL +XML_MemFree(XML_Parser parser, void *ptr) { + if (parser != NULL) + FREE(parser, ptr); +} + +void XMLCALL +XML_DefaultCurrent(XML_Parser parser) { + if (parser == NULL) + return; + if (parser->m_defaultHandler) { + if (parser->m_openInternalEntities) + reportDefault(parser, parser->m_internalEncoding, + parser->m_openInternalEntities->internalEventPtr, + parser->m_openInternalEntities->internalEventEndPtr); else - reportDefault(parser, encoding, eventPtr, eventEndPtr); - } -} - -const XML_LChar *XML_ErrorString(int code) -{ - static const XML_LChar *message[] = { - 0, - XML_T("out of memory"), - XML_T("syntax error"), - XML_T("no element found"), - XML_T("not well-formed (invalid token)"), - XML_T("unclosed token"), - XML_T("unclosed token"), - XML_T("mismatched tag"), - XML_T("duplicate attribute"), - XML_T("junk after document element"), - XML_T("illegal parameter entity reference"), - XML_T("undefined entity"), - XML_T("recursive entity reference"), - XML_T("asynchronous entity"), - XML_T("reference to invalid character number"), - XML_T("reference to binary entity"), - XML_T("reference to external entity in attribute"), - XML_T("xml processing instruction not at start of external entity"), - XML_T("unknown encoding"), - XML_T("encoding specified in XML declaration is incorrect"), - XML_T("unclosed CDATA section"), - XML_T("error in processing external entity reference"), - XML_T("document is not standalone"), - XML_T("unexpected parser state - please send a bug report") - }; - if (code > 0 && code < (int)(sizeof(message)/sizeof(message[0]))) - return message[code]; - return 0; + reportDefault(parser, parser->m_encoding, parser->m_eventPtr, + parser->m_eventEndPtr); + } } -const XML_LChar * +const XML_LChar *XMLCALL +XML_ErrorString(enum XML_Error code) { + switch (code) { + case XML_ERROR_NONE: + return NULL; + case XML_ERROR_NO_MEMORY: + return XML_L("out of memory"); + case XML_ERROR_SYNTAX: + return XML_L("syntax error"); + case XML_ERROR_NO_ELEMENTS: + return XML_L("no element found"); + case XML_ERROR_INVALID_TOKEN: + return XML_L("not well-formed (invalid token)"); + case XML_ERROR_UNCLOSED_TOKEN: + return XML_L("unclosed token"); + case XML_ERROR_PARTIAL_CHAR: + return XML_L("partial character"); + case XML_ERROR_TAG_MISMATCH: + return XML_L("mismatched tag"); + case XML_ERROR_DUPLICATE_ATTRIBUTE: + return XML_L("duplicate attribute"); + case XML_ERROR_JUNK_AFTER_DOC_ELEMENT: + return XML_L("junk after document element"); + case XML_ERROR_PARAM_ENTITY_REF: + return XML_L("illegal parameter entity reference"); + case XML_ERROR_UNDEFINED_ENTITY: + return XML_L("undefined entity"); + case XML_ERROR_RECURSIVE_ENTITY_REF: + return XML_L("recursive entity reference"); + case XML_ERROR_ASYNC_ENTITY: + return XML_L("asynchronous entity"); + case XML_ERROR_BAD_CHAR_REF: + return XML_L("reference to invalid character number"); + case XML_ERROR_BINARY_ENTITY_REF: + return XML_L("reference to binary entity"); + case XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF: + return XML_L("reference to external entity in attribute"); + case XML_ERROR_MISPLACED_XML_PI: + return XML_L("XML or text declaration not at start of entity"); + case XML_ERROR_UNKNOWN_ENCODING: + return XML_L("unknown encoding"); + case XML_ERROR_INCORRECT_ENCODING: + return XML_L("encoding specified in XML declaration is incorrect"); + case XML_ERROR_UNCLOSED_CDATA_SECTION: + return XML_L("unclosed CDATA section"); + case XML_ERROR_EXTERNAL_ENTITY_HANDLING: + return XML_L("error in processing external entity reference"); + case XML_ERROR_NOT_STANDALONE: + return XML_L("document is not standalone"); + case XML_ERROR_UNEXPECTED_STATE: + return XML_L("unexpected parser state - please send a bug report"); + case XML_ERROR_ENTITY_DECLARED_IN_PE: + return XML_L("entity declared in parameter entity"); + case XML_ERROR_FEATURE_REQUIRES_XML_DTD: + return XML_L("requested feature requires XML_DTD support in Expat"); + case XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING: + return XML_L("cannot change setting once parsing has begun"); + /* Added in 1.95.7. */ + case XML_ERROR_UNBOUND_PREFIX: + return XML_L("unbound prefix"); + /* Added in 1.95.8. */ + case XML_ERROR_UNDECLARING_PREFIX: + return XML_L("must not undeclare prefix"); + case XML_ERROR_INCOMPLETE_PE: + return XML_L("incomplete markup in parameter entity"); + case XML_ERROR_XML_DECL: + return XML_L("XML declaration not well-formed"); + case XML_ERROR_TEXT_DECL: + return XML_L("text declaration not well-formed"); + case XML_ERROR_PUBLICID: + return XML_L("illegal character(s) in public id"); + case XML_ERROR_SUSPENDED: + return XML_L("parser suspended"); + case XML_ERROR_NOT_SUSPENDED: + return XML_L("parser not suspended"); + case XML_ERROR_ABORTED: + return XML_L("parsing aborted"); + case XML_ERROR_FINISHED: + return XML_L("parsing finished"); + case XML_ERROR_SUSPEND_PE: + return XML_L("cannot suspend in external parameter entity"); + /* Added in 2.0.0. */ + case XML_ERROR_RESERVED_PREFIX_XML: + return XML_L( + "reserved prefix (xml) must not be undeclared or bound to another namespace name"); + case XML_ERROR_RESERVED_PREFIX_XMLNS: + return XML_L("reserved prefix (xmlns) must not be declared or undeclared"); + case XML_ERROR_RESERVED_NAMESPACE_URI: + return XML_L( + "prefix must not be bound to one of the reserved namespace names"); + /* Added in 2.2.5. */ + case XML_ERROR_INVALID_ARGUMENT: /* Constant added in 2.2.1, already */ + return XML_L("invalid argument"); + /* Added in 2.3.0. */ + case XML_ERROR_NO_BUFFER: + return XML_L( + "a successful prior call to function XML_GetBuffer is required"); + /* Added in 2.4.0. */ + case XML_ERROR_AMPLIFICATION_LIMIT_BREACH: + return XML_L( + "limit on input amplification factor (from DTD and entities) breached"); + } + return NULL; +} + +const XML_LChar *XMLCALL XML_ExpatVersion(void) { - return VERSION; + /* V1 is used to string-ize the version number. However, it would + string-ize the actual version macro *names* unless we get them + substituted before being passed to V1. CPP is defined to expand + a macro, then rescan for more expansions. Thus, we use V2 to expand + the version macros, then CPP will expand the resulting V1() macro + with the correct numerals. */ + /* ### I'm assuming cpp is portable in this respect... */ + +#define V1(a, b, c) XML_L(#a) XML_L(".") XML_L(#b) XML_L(".") XML_L(#c) +#define V2(a, b, c) XML_L("expat_") V1(a, b, c) + + return V2(XML_MAJOR_VERSION, XML_MINOR_VERSION, XML_MICRO_VERSION); + +#undef V1 +#undef V2 } -XML_Expat_Version +XML_Expat_Version XMLCALL XML_ExpatVersionInfo(void) { XML_Expat_Version version; @@ -1322,154 +2474,318 @@ XML_ExpatVersionInfo(void) { return version; } -static -enum XML_Error contentProcessor(XML_Parser parser, - const char *start, - const char *end, - const char **endPtr) -{ - return doContent(parser, 0, encoding, start, end, endPtr); +const XML_Feature *XMLCALL +XML_GetFeatureList(void) { + static const XML_Feature features[] = { + {XML_FEATURE_SIZEOF_XML_CHAR, XML_L("sizeof(XML_Char)"), + sizeof(XML_Char)}, + {XML_FEATURE_SIZEOF_XML_LCHAR, XML_L("sizeof(XML_LChar)"), + sizeof(XML_LChar)}, +#ifdef XML_UNICODE + {XML_FEATURE_UNICODE, XML_L("XML_UNICODE"), 0}, +#endif +#ifdef XML_UNICODE_WCHAR_T + {XML_FEATURE_UNICODE_WCHAR_T, XML_L("XML_UNICODE_WCHAR_T"), 0}, +#endif +#ifdef XML_DTD + {XML_FEATURE_DTD, XML_L("XML_DTD"), 0}, +#endif +#ifdef XML_CONTEXT_BYTES + {XML_FEATURE_CONTEXT_BYTES, XML_L("XML_CONTEXT_BYTES"), + XML_CONTEXT_BYTES}, +#endif +#ifdef XML_MIN_SIZE + {XML_FEATURE_MIN_SIZE, XML_L("XML_MIN_SIZE"), 0}, +#endif +#ifdef XML_NS + {XML_FEATURE_NS, XML_L("XML_NS"), 0}, +#endif +#ifdef XML_LARGE_SIZE + {XML_FEATURE_LARGE_SIZE, XML_L("XML_LARGE_SIZE"), 0}, +#endif +#ifdef XML_ATTR_INFO + {XML_FEATURE_ATTR_INFO, XML_L("XML_ATTR_INFO"), 0}, +#endif +#ifdef XML_DTD + /* Added in Expat 2.4.0. */ + {XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT, + XML_L("XML_BLAP_MAX_AMP"), + (long int) + EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT}, + {XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT, + XML_L("XML_BLAP_ACT_THRES"), + EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT}, +#endif + {XML_FEATURE_END, NULL, 0}}; + + return features; +} + +#ifdef XML_DTD +XML_Bool XMLCALL +XML_SetBillionLaughsAttackProtectionMaximumAmplification( + XML_Parser parser, float maximumAmplificationFactor) { + if ((parser == NULL) || (parser->m_parentParser != NULL) + || isnan(maximumAmplificationFactor) + || (maximumAmplificationFactor < 1.0f)) { + return XML_FALSE; + } + parser->m_accounting.maximumAmplificationFactor = maximumAmplificationFactor; + return XML_TRUE; +} + +XML_Bool XMLCALL +XML_SetBillionLaughsAttackProtectionActivationThreshold( + XML_Parser parser, unsigned long long activationThresholdBytes) { + if ((parser == NULL) || (parser->m_parentParser != NULL)) { + return XML_FALSE; + } + parser->m_accounting.activationThresholdBytes = activationThresholdBytes; + return XML_TRUE; } +#endif /* XML_DTD */ -static -enum XML_Error externalEntityInitProcessor(XML_Parser parser, - const char *start, - const char *end, - const char **endPtr) -{ +/* Initially tag->rawName always points into the parse buffer; + for those TAG instances opened while the current parse buffer was + processed, and not yet closed, we need to store tag->rawName in a more + permanent location, since the parse buffer is about to be discarded. +*/ +static XML_Bool +storeRawNames(XML_Parser parser) { + TAG *tag = parser->m_tagStack; + while (tag) { + int bufSize; + int nameLen = sizeof(XML_Char) * (tag->name.strLen + 1); + char *rawNameBuf = tag->buf + nameLen; + /* Stop if already stored. Since m_tagStack is a stack, we can stop + at the first entry that has already been copied; everything + below it in the stack is already been accounted for in a + previous call to this function. + */ + if (tag->rawName == rawNameBuf) + break; + /* For re-use purposes we need to ensure that the + size of tag->buf is a multiple of sizeof(XML_Char). + */ + bufSize = nameLen + ROUND_UP(tag->rawNameLength, sizeof(XML_Char)); + if (bufSize > tag->bufEnd - tag->buf) { + char *temp = (char *)REALLOC(parser, tag->buf, bufSize); + if (temp == NULL) + return XML_FALSE; + /* if tag->name.str points to tag->buf (only when namespace + processing is off) then we have to update it + */ + if (tag->name.str == (XML_Char *)tag->buf) + tag->name.str = (XML_Char *)temp; + /* if tag->name.localPart is set (when namespace processing is on) + then update it as well, since it will always point into tag->buf + */ + if (tag->name.localPart) + tag->name.localPart + = (XML_Char *)temp + (tag->name.localPart - (XML_Char *)tag->buf); + tag->buf = temp; + tag->bufEnd = temp + bufSize; + rawNameBuf = temp + nameLen; + } + memcpy(rawNameBuf, tag->rawName, tag->rawNameLength); + tag->rawName = rawNameBuf; + tag = tag->parent; + } + return XML_TRUE; +} + +static enum XML_Error PTRCALL +contentProcessor(XML_Parser parser, const char *start, const char *end, + const char **endPtr) { + enum XML_Error result = doContent( + parser, 0, parser->m_encoding, start, end, endPtr, + (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_ACCOUNT_DIRECT); + if (result == XML_ERROR_NONE) { + if (! storeRawNames(parser)) + return XML_ERROR_NO_MEMORY; + } + return result; +} + +static enum XML_Error PTRCALL +externalEntityInitProcessor(XML_Parser parser, const char *start, + const char *end, const char **endPtr) { enum XML_Error result = initializeEncoding(parser); if (result != XML_ERROR_NONE) return result; - processor = externalEntityInitProcessor2; + parser->m_processor = externalEntityInitProcessor2; return externalEntityInitProcessor2(parser, start, end, endPtr); } -static -enum XML_Error externalEntityInitProcessor2(XML_Parser parser, - const char *start, - const char *end, - const char **endPtr) -{ - const char *next; - int tok = XmlContentTok(encoding, start, end, &next); +static enum XML_Error PTRCALL +externalEntityInitProcessor2(XML_Parser parser, const char *start, + const char *end, const char **endPtr) { + const char *next = start; /* XmlContentTok doesn't always set the last arg */ + int tok = XmlContentTok(parser->m_encoding, start, end, &next); switch (tok) { case XML_TOK_BOM: +#ifdef XML_DTD + if (! accountingDiffTolerated(parser, tok, start, next, __LINE__, + XML_ACCOUNT_DIRECT)) { + accountingOnAbort(parser); + return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; + } +#endif /* XML_DTD */ + + /* If we are at the end of the buffer, this would cause the next stage, + i.e. externalEntityInitProcessor3, to pass control directly to + doContent (by detecting XML_TOK_NONE) without processing any xml text + declaration - causing the error XML_ERROR_MISPLACED_XML_PI in doContent. + */ + if (next == end && ! parser->m_parsingStatus.finalBuffer) { + *endPtr = next; + return XML_ERROR_NONE; + } start = next; break; case XML_TOK_PARTIAL: - if (endPtr) { + if (! parser->m_parsingStatus.finalBuffer) { *endPtr = start; return XML_ERROR_NONE; } - eventPtr = start; + parser->m_eventPtr = start; return XML_ERROR_UNCLOSED_TOKEN; case XML_TOK_PARTIAL_CHAR: - if (endPtr) { + if (! parser->m_parsingStatus.finalBuffer) { *endPtr = start; return XML_ERROR_NONE; } - eventPtr = start; + parser->m_eventPtr = start; return XML_ERROR_PARTIAL_CHAR; } - processor = externalEntityInitProcessor3; + parser->m_processor = externalEntityInitProcessor3; return externalEntityInitProcessor3(parser, start, end, endPtr); } -static -enum XML_Error externalEntityInitProcessor3(XML_Parser parser, - const char *start, - const char *end, - const char **endPtr) -{ - const char *next; - int tok = XmlContentTok(encoding, start, end, &next); +static enum XML_Error PTRCALL +externalEntityInitProcessor3(XML_Parser parser, const char *start, + const char *end, const char **endPtr) { + int tok; + const char *next = start; /* XmlContentTok doesn't always set the last arg */ + parser->m_eventPtr = start; + tok = XmlContentTok(parser->m_encoding, start, end, &next); + /* Note: These bytes are accounted later in: + - processXmlDecl + - externalEntityContentProcessor + */ + parser->m_eventEndPtr = next; + switch (tok) { - case XML_TOK_XML_DECL: - { - enum XML_Error result = processXmlDecl(parser, 1, start, next); - if (result != XML_ERROR_NONE) - return result; + case XML_TOK_XML_DECL: { + enum XML_Error result; + result = processXmlDecl(parser, 1, start, next); + if (result != XML_ERROR_NONE) + return result; + switch (parser->m_parsingStatus.parsing) { + case XML_SUSPENDED: + *endPtr = next; + return XML_ERROR_NONE; + case XML_FINISHED: + return XML_ERROR_ABORTED; + default: start = next; } - break; + } break; case XML_TOK_PARTIAL: - if (endPtr) { + if (! parser->m_parsingStatus.finalBuffer) { *endPtr = start; return XML_ERROR_NONE; } - eventPtr = start; return XML_ERROR_UNCLOSED_TOKEN; case XML_TOK_PARTIAL_CHAR: - if (endPtr) { + if (! parser->m_parsingStatus.finalBuffer) { *endPtr = start; return XML_ERROR_NONE; } - eventPtr = start; return XML_ERROR_PARTIAL_CHAR; } - processor = externalEntityContentProcessor; - tagLevel = 1; - return doContent(parser, 1, encoding, start, end, endPtr); + parser->m_processor = externalEntityContentProcessor; + parser->m_tagLevel = 1; + return externalEntityContentProcessor(parser, start, end, endPtr); } -static -enum XML_Error externalEntityContentProcessor(XML_Parser parser, - const char *start, - const char *end, - const char **endPtr) -{ - return doContent(parser, 1, encoding, start, end, endPtr); +static enum XML_Error PTRCALL +externalEntityContentProcessor(XML_Parser parser, const char *start, + const char *end, const char **endPtr) { + enum XML_Error result + = doContent(parser, 1, parser->m_encoding, start, end, endPtr, + (XML_Bool)! parser->m_parsingStatus.finalBuffer, + XML_ACCOUNT_ENTITY_EXPANSION); + if (result == XML_ERROR_NONE) { + if (! storeRawNames(parser)) + return XML_ERROR_NO_MEMORY; + } + return result; } static enum XML_Error -doContent(XML_Parser parser, - int startTagLevel, - const ENCODING *enc, - const char *s, - const char *end, - const char **nextPtr) -{ +doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, + const char *s, const char *end, const char **nextPtr, + XML_Bool haveMore, enum XML_Account account) { + /* save one level of indirection */ + DTD *const dtd = parser->m_dtd; + const char **eventPP; const char **eventEndPP; - if (enc == encoding) { - eventPP = &eventPtr; - eventEndPP = &eventEndPtr; - } - else { - eventPP = &(openInternalEntities->internalEventPtr); - eventEndPP = &(openInternalEntities->internalEventEndPtr); + if (enc == parser->m_encoding) { + eventPP = &parser->m_eventPtr; + eventEndPP = &parser->m_eventEndPtr; + } else { + eventPP = &(parser->m_openInternalEntities->internalEventPtr); + eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr); } *eventPP = s; + for (;;) { const char *next = s; /* XmlContentTok doesn't always set the last arg */ int tok = XmlContentTok(enc, s, end, &next); +#ifdef XML_DTD + const char *accountAfter + = ((tok == XML_TOK_TRAILING_RSQB) || (tok == XML_TOK_TRAILING_CR)) + ? (haveMore ? s /* i.e. 0 bytes */ : end) + : next; + if (! accountingDiffTolerated(parser, tok, s, accountAfter, __LINE__, + account)) { + accountingOnAbort(parser); + return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; + } +#endif *eventEndPP = next; switch (tok) { case XML_TOK_TRAILING_CR: - if (nextPtr) { + if (haveMore) { *nextPtr = s; return XML_ERROR_NONE; } *eventEndPP = end; - if (characterDataHandler) { + if (parser->m_characterDataHandler) { XML_Char c = 0xA; - characterDataHandler(handlerArg, &c, 1); - } - else if (defaultHandler) + parser->m_characterDataHandler(parser->m_handlerArg, &c, 1); + } else if (parser->m_defaultHandler) reportDefault(parser, enc, s, end); + /* We are at the end of the final buffer, should we check for + XML_SUSPENDED, XML_FINISHED? + */ if (startTagLevel == 0) return XML_ERROR_NO_ELEMENTS; - if (tagLevel != startTagLevel) + if (parser->m_tagLevel != startTagLevel) return XML_ERROR_ASYNC_ENTITY; + *nextPtr = end; return XML_ERROR_NONE; case XML_TOK_NONE: - if (nextPtr) { + if (haveMore) { *nextPtr = s; return XML_ERROR_NONE; } if (startTagLevel > 0) { - if (tagLevel != startTagLevel) + if (parser->m_tagLevel != startTagLevel) return XML_ERROR_ASYNC_ENTITY; + *nextPtr = s; return XML_ERROR_NONE; } return XML_ERROR_NO_ELEMENTS; @@ -1477,444 +2793,536 @@ doContent(XML_Parser parser, *eventPP = next; return XML_ERROR_INVALID_TOKEN; case XML_TOK_PARTIAL: - if (nextPtr) { + if (haveMore) { *nextPtr = s; return XML_ERROR_NONE; } return XML_ERROR_UNCLOSED_TOKEN; case XML_TOK_PARTIAL_CHAR: - if (nextPtr) { + if (haveMore) { *nextPtr = s; return XML_ERROR_NONE; } return XML_ERROR_PARTIAL_CHAR; - case XML_TOK_ENTITY_REF: - { - const XML_Char *name; - ENTITY *entity; - XML_Char ch = XmlPredefinedEntityName(enc, - s + enc->minBytesPerChar, - next - enc->minBytesPerChar); - if (ch) { - if (characterDataHandler) - characterDataHandler(handlerArg, &ch, 1); - else if (defaultHandler) + case XML_TOK_ENTITY_REF: { + const XML_Char *name; + ENTITY *entity; + XML_Char ch = (XML_Char)XmlPredefinedEntityName( + enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar); + if (ch) { +#ifdef XML_DTD + /* NOTE: We are replacing 4-6 characters original input for 1 character + * so there is no amplification and hence recording without + * protection. */ + accountingDiffTolerated(parser, tok, (char *)&ch, + ((char *)&ch) + sizeof(XML_Char), __LINE__, + XML_ACCOUNT_ENTITY_EXPANSION); +#endif /* XML_DTD */ + if (parser->m_characterDataHandler) + parser->m_characterDataHandler(parser->m_handlerArg, &ch, 1); + else if (parser->m_defaultHandler) + reportDefault(parser, enc, s, next); + break; + } + name = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar, + next - enc->minBytesPerChar); + if (! name) + return XML_ERROR_NO_MEMORY; + entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0); + poolDiscard(&dtd->pool); + /* First, determine if a check for an existing declaration is needed; + if yes, check that the entity exists, and that it is internal, + otherwise call the skipped entity or default handler. + */ + if (! dtd->hasParamEntityRefs || dtd->standalone) { + if (! entity) + return XML_ERROR_UNDEFINED_ENTITY; + else if (! entity->is_internal) + return XML_ERROR_ENTITY_DECLARED_IN_PE; + } else if (! entity) { + if (parser->m_skippedEntityHandler) + parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0); + else if (parser->m_defaultHandler) + reportDefault(parser, enc, s, next); + break; + } + if (entity->open) + return XML_ERROR_RECURSIVE_ENTITY_REF; + if (entity->notation) + return XML_ERROR_BINARY_ENTITY_REF; + if (entity->textPtr) { + enum XML_Error result; + if (! parser->m_defaultExpandInternalEntities) { + if (parser->m_skippedEntityHandler) + parser->m_skippedEntityHandler(parser->m_handlerArg, entity->name, + 0); + else if (parser->m_defaultHandler) reportDefault(parser, enc, s, next); break; } - name = poolStoreString(&dtd.pool, enc, - s + enc->minBytesPerChar, - next - enc->minBytesPerChar); - if (!name) + result = processInternalEntity(parser, entity, XML_FALSE); + if (result != XML_ERROR_NONE) + return result; + } else if (parser->m_externalEntityRefHandler) { + const XML_Char *context; + entity->open = XML_TRUE; + context = getContext(parser); + entity->open = XML_FALSE; + if (! context) + return XML_ERROR_NO_MEMORY; + if (! parser->m_externalEntityRefHandler( + parser->m_externalEntityRefHandlerArg, context, entity->base, + entity->systemId, entity->publicId)) + return XML_ERROR_EXTERNAL_ENTITY_HANDLING; + poolDiscard(&parser->m_tempPool); + } else if (parser->m_defaultHandler) + reportDefault(parser, enc, s, next); + break; + } + case XML_TOK_START_TAG_NO_ATTS: + /* fall through */ + case XML_TOK_START_TAG_WITH_ATTS: { + TAG *tag; + enum XML_Error result; + XML_Char *toPtr; + if (parser->m_freeTagList) { + tag = parser->m_freeTagList; + parser->m_freeTagList = parser->m_freeTagList->parent; + } else { + tag = (TAG *)MALLOC(parser, sizeof(TAG)); + if (! tag) + return XML_ERROR_NO_MEMORY; + tag->buf = (char *)MALLOC(parser, INIT_TAG_BUF_SIZE); + if (! tag->buf) { + FREE(parser, tag); return XML_ERROR_NO_MEMORY; - entity = (ENTITY *)lookup(&dtd.generalEntities, name, 0); - poolDiscard(&dtd.pool); - if (!entity) { - if (dtd.complete || dtd.standalone) - return XML_ERROR_UNDEFINED_ENTITY; - if (defaultHandler) - reportDefault(parser, enc, s, next); - break; - } - if (entity->open) - return XML_ERROR_RECURSIVE_ENTITY_REF; - if (entity->notation) - return XML_ERROR_BINARY_ENTITY_REF; - if (entity) { - if (entity->textPtr) { - enum XML_Error result; - OPEN_INTERNAL_ENTITY openEntity; - if (defaultHandler && !defaultExpandInternalEntities) { - reportDefault(parser, enc, s, next); - break; - } - entity->open = 1; - openEntity.next = openInternalEntities; - openInternalEntities = &openEntity; - openEntity.entity = entity; - openEntity.internalEventPtr = 0; - openEntity.internalEventEndPtr = 0; - result = doContent(parser, - tagLevel, - internalEncoding, - (char *)entity->textPtr, - (char *)(entity->textPtr + entity->textLen), - 0); - entity->open = 0; - openInternalEntities = openEntity.next; - if (result) - return result; - } - else if (externalEntityRefHandler) { - const XML_Char *context; - entity->open = 1; - context = getContext(parser); - entity->open = 0; - if (!context) - return XML_ERROR_NO_MEMORY; - if (!externalEntityRefHandler(externalEntityRefHandlerArg, - context, - entity->base, - entity->systemId, - entity->publicId)) - return XML_ERROR_EXTERNAL_ENTITY_HANDLING; - poolDiscard(&tempPool); - } - else if (defaultHandler) - reportDefault(parser, enc, s, next); } - break; - } - case XML_TOK_START_TAG_WITH_ATTS: - if (!startElementHandler) { - enum XML_Error result = storeAtts(parser, enc, s, 0, 0); - if (result) - return result; + tag->bufEnd = tag->buf + INIT_TAG_BUF_SIZE; } - /* fall through */ - case XML_TOK_START_TAG_NO_ATTS: + tag->bindings = NULL; + tag->parent = parser->m_tagStack; + parser->m_tagStack = tag; + tag->name.localPart = NULL; + tag->name.prefix = NULL; + tag->rawName = s + enc->minBytesPerChar; + tag->rawNameLength = XmlNameLength(enc, tag->rawName); + ++parser->m_tagLevel; { - TAG *tag; - if (freeTagList) { - tag = freeTagList; - freeTagList = freeTagList->parent; - } - else { - tag = MALLOC(sizeof(TAG)); - if (!tag) - return XML_ERROR_NO_MEMORY; - tag->buf = MALLOC(INIT_TAG_BUF_SIZE); - if (!tag->buf) - return XML_ERROR_NO_MEMORY; - tag->bufEnd = tag->buf + INIT_TAG_BUF_SIZE; - } - tag->bindings = 0; - tag->parent = tagStack; - tagStack = tag; - tag->name.localPart = 0; - tag->rawName = s + enc->minBytesPerChar; - tag->rawNameLength = XmlNameLength(enc, tag->rawName); - if (nextPtr) { - /* Need to guarantee that: - tag->buf + ROUND_UP(tag->rawNameLength, sizeof(XML_Char)) <= tag->bufEnd - sizeof(XML_Char) */ - if (tag->rawNameLength + (int)(sizeof(XML_Char) - 1) + (int)sizeof(XML_Char) > tag->bufEnd - tag->buf) { - int bufSize = tag->rawNameLength * 4; - bufSize = ROUND_UP(bufSize, sizeof(XML_Char)); - tag->buf = REALLOC(tag->buf, bufSize); - if (!tag->buf) - return XML_ERROR_NO_MEMORY; - tag->bufEnd = tag->buf + bufSize; + const char *rawNameEnd = tag->rawName + tag->rawNameLength; + const char *fromPtr = tag->rawName; + toPtr = (XML_Char *)tag->buf; + for (;;) { + int bufSize; + int convLen; + const enum XML_Convert_Result convert_res + = XmlConvert(enc, &fromPtr, rawNameEnd, (ICHAR **)&toPtr, + (ICHAR *)tag->bufEnd - 1); + convLen = (int)(toPtr - (XML_Char *)tag->buf); + if ((fromPtr >= rawNameEnd) + || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) { + tag->name.strLen = convLen; + break; } - memcpy(tag->buf, tag->rawName, tag->rawNameLength); - tag->rawName = tag->buf; - } - ++tagLevel; - if (startElementHandler) { - enum XML_Error result; - XML_Char *toPtr; - for (;;) { - const char *rawNameEnd = tag->rawName + tag->rawNameLength; - const char *fromPtr = tag->rawName; - int bufSize; - if (nextPtr) - toPtr = (XML_Char *)(tag->buf + ROUND_UP(tag->rawNameLength, sizeof(XML_Char))); - else - toPtr = (XML_Char *)tag->buf; - tag->name.str = toPtr; - XmlConvert(enc, - &fromPtr, rawNameEnd, - (ICHAR **)&toPtr, (ICHAR *)tag->bufEnd - 1); - if (fromPtr == rawNameEnd) - break; - bufSize = (tag->bufEnd - tag->buf) << 1; - tag->buf = REALLOC(tag->buf, bufSize); - if (!tag->buf) + bufSize = (int)(tag->bufEnd - tag->buf) << 1; + { + char *temp = (char *)REALLOC(parser, tag->buf, bufSize); + if (temp == NULL) return XML_ERROR_NO_MEMORY; - tag->bufEnd = tag->buf + bufSize; - if (nextPtr) - tag->rawName = tag->buf; + tag->buf = temp; + tag->bufEnd = temp + bufSize; + toPtr = (XML_Char *)temp + convLen; } - *toPtr = XML_T('\0'); - result = storeAtts(parser, enc, s, &(tag->name), &(tag->bindings)); - if (result) - return result; - startElementHandler(handlerArg, tag->name.str, (const XML_Char **)atts); - poolClear(&tempPool); } - else { - tag->name.str = 0; - if (defaultHandler) - reportDefault(parser, enc, s, next); - } - break; } - case XML_TOK_EMPTY_ELEMENT_WITH_ATTS: - if (!startElementHandler) { - enum XML_Error result = storeAtts(parser, enc, s, 0, 0); - if (result) - return result; - } - /* fall through */ + tag->name.str = (XML_Char *)tag->buf; + *toPtr = XML_T('\0'); + result + = storeAtts(parser, enc, s, &(tag->name), &(tag->bindings), account); + if (result) + return result; + if (parser->m_startElementHandler) + parser->m_startElementHandler(parser->m_handlerArg, tag->name.str, + (const XML_Char **)parser->m_atts); + else if (parser->m_defaultHandler) + reportDefault(parser, enc, s, next); + poolClear(&parser->m_tempPool); + break; + } case XML_TOK_EMPTY_ELEMENT_NO_ATTS: - if (startElementHandler || endElementHandler) { - const char *rawName = s + enc->minBytesPerChar; - enum XML_Error result; - BINDING *bindings = 0; - TAG_NAME name; - name.str = poolStoreString(&tempPool, enc, rawName, - rawName + XmlNameLength(enc, rawName)); - if (!name.str) - return XML_ERROR_NO_MEMORY; - poolFinish(&tempPool); - result = storeAtts(parser, enc, s, &name, &bindings); - if (result) - return result; - poolFinish(&tempPool); - if (startElementHandler) - startElementHandler(handlerArg, name.str, (const XML_Char **)atts); - if (endElementHandler) { - if (startElementHandler) - *eventPP = *eventEndPP; - endElementHandler(handlerArg, name.str); - } - poolClear(&tempPool); - while (bindings) { - BINDING *b = bindings; - if (endNamespaceDeclHandler) - endNamespaceDeclHandler(handlerArg, b->prefix->name); - bindings = bindings->nextTagBinding; - b->nextTagBinding = freeBindingList; - freeBindingList = b; - b->prefix->binding = b->prevPrefixBinding; - } + /* fall through */ + case XML_TOK_EMPTY_ELEMENT_WITH_ATTS: { + const char *rawName = s + enc->minBytesPerChar; + enum XML_Error result; + BINDING *bindings = NULL; + XML_Bool noElmHandlers = XML_TRUE; + TAG_NAME name; + name.str = poolStoreString(&parser->m_tempPool, enc, rawName, + rawName + XmlNameLength(enc, rawName)); + if (! name.str) + return XML_ERROR_NO_MEMORY; + poolFinish(&parser->m_tempPool); + result = storeAtts(parser, enc, s, &name, &bindings, + XML_ACCOUNT_NONE /* token spans whole start tag */); + if (result != XML_ERROR_NONE) { + freeBindings(parser, bindings); + return result; + } + poolFinish(&parser->m_tempPool); + if (parser->m_startElementHandler) { + parser->m_startElementHandler(parser->m_handlerArg, name.str, + (const XML_Char **)parser->m_atts); + noElmHandlers = XML_FALSE; } - else if (defaultHandler) + if (parser->m_endElementHandler) { + if (parser->m_startElementHandler) + *eventPP = *eventEndPP; + parser->m_endElementHandler(parser->m_handlerArg, name.str); + noElmHandlers = XML_FALSE; + } + if (noElmHandlers && parser->m_defaultHandler) reportDefault(parser, enc, s, next); - if (tagLevel == 0) - return epilogProcessor(parser, next, end, nextPtr); + poolClear(&parser->m_tempPool); + freeBindings(parser, bindings); + } + if ((parser->m_tagLevel == 0) + && (parser->m_parsingStatus.parsing != XML_FINISHED)) { + if (parser->m_parsingStatus.parsing == XML_SUSPENDED) + parser->m_processor = epilogProcessor; + else + return epilogProcessor(parser, next, end, nextPtr); + } break; case XML_TOK_END_TAG: - if (tagLevel == startTagLevel) + if (parser->m_tagLevel == startTagLevel) return XML_ERROR_ASYNC_ENTITY; else { int len; const char *rawName; - TAG *tag = tagStack; - tagStack = tag->parent; - tag->parent = freeTagList; - freeTagList = tag; - rawName = s + enc->minBytesPerChar*2; + TAG *tag = parser->m_tagStack; + parser->m_tagStack = tag->parent; + tag->parent = parser->m_freeTagList; + parser->m_freeTagList = tag; + rawName = s + enc->minBytesPerChar * 2; len = XmlNameLength(enc, rawName); if (len != tag->rawNameLength || memcmp(tag->rawName, rawName, len) != 0) { *eventPP = rawName; return XML_ERROR_TAG_MISMATCH; } - --tagLevel; - if (endElementHandler && tag->name.str) { - if (tag->name.localPart) { - XML_Char *to = (XML_Char *)tag->name.str + tag->name.uriLen; - const XML_Char *from = tag->name.localPart; - while ((*to++ = *from++) != 0) - ; + --parser->m_tagLevel; + if (parser->m_endElementHandler) { + const XML_Char *localPart; + const XML_Char *prefix; + XML_Char *uri; + localPart = tag->name.localPart; + if (parser->m_ns && localPart) { + /* localPart and prefix may have been overwritten in + tag->name.str, since this points to the binding->uri + buffer which gets re-used; so we have to add them again + */ + uri = (XML_Char *)tag->name.str + tag->name.uriLen; + /* don't need to check for space - already done in storeAtts() */ + while (*localPart) + *uri++ = *localPart++; + prefix = (XML_Char *)tag->name.prefix; + if (parser->m_ns_triplets && prefix) { + *uri++ = parser->m_namespaceSeparator; + while (*prefix) + *uri++ = *prefix++; + } + *uri = XML_T('\0'); } - endElementHandler(handlerArg, tag->name.str); - } - else if (defaultHandler) + parser->m_endElementHandler(parser->m_handlerArg, tag->name.str); + } else if (parser->m_defaultHandler) reportDefault(parser, enc, s, next); while (tag->bindings) { BINDING *b = tag->bindings; - if (endNamespaceDeclHandler) - endNamespaceDeclHandler(handlerArg, b->prefix->name); + if (parser->m_endNamespaceDeclHandler) + parser->m_endNamespaceDeclHandler(parser->m_handlerArg, + b->prefix->name); tag->bindings = tag->bindings->nextTagBinding; - b->nextTagBinding = freeBindingList; - freeBindingList = b; + b->nextTagBinding = parser->m_freeBindingList; + parser->m_freeBindingList = b; b->prefix->binding = b->prevPrefixBinding; } - if (tagLevel == 0) - return epilogProcessor(parser, next, end, nextPtr); - } - break; - case XML_TOK_CHAR_REF: - { - int n = XmlCharRefNumber(enc, s); - if (n < 0) - return XML_ERROR_BAD_CHAR_REF; - if (characterDataHandler) { - XML_Char buf[XML_ENCODE_MAX]; - characterDataHandler(handlerArg, buf, XmlEncode(n, (ICHAR *)buf)); + if ((parser->m_tagLevel == 0) + && (parser->m_parsingStatus.parsing != XML_FINISHED)) { + if (parser->m_parsingStatus.parsing == XML_SUSPENDED) + parser->m_processor = epilogProcessor; + else + return epilogProcessor(parser, next, end, nextPtr); } - else if (defaultHandler) - reportDefault(parser, enc, s, next); } break; + case XML_TOK_CHAR_REF: { + int n = XmlCharRefNumber(enc, s); + if (n < 0) + return XML_ERROR_BAD_CHAR_REF; + if (parser->m_characterDataHandler) { + XML_Char buf[XML_ENCODE_MAX]; + parser->m_characterDataHandler(parser->m_handlerArg, buf, + XmlEncode(n, (ICHAR *)buf)); + } else if (parser->m_defaultHandler) + reportDefault(parser, enc, s, next); + } break; case XML_TOK_XML_DECL: return XML_ERROR_MISPLACED_XML_PI; case XML_TOK_DATA_NEWLINE: - if (characterDataHandler) { + if (parser->m_characterDataHandler) { XML_Char c = 0xA; - characterDataHandler(handlerArg, &c, 1); - } - else if (defaultHandler) + parser->m_characterDataHandler(parser->m_handlerArg, &c, 1); + } else if (parser->m_defaultHandler) reportDefault(parser, enc, s, next); break; - case XML_TOK_CDATA_SECT_OPEN: - { - enum XML_Error result; - if (startCdataSectionHandler) - startCdataSectionHandler(handlerArg); -#if 0 - /* Suppose you doing a transformation on a document that involves - changing only the character data. You set up a defaultHandler - and a characterDataHandler. The defaultHandler simply copies - characters through. The characterDataHandler does the transformation - and writes the characters out escaping them as necessary. This case - will fail to work if we leave out the following two lines (because & - and < inside CDATA sections will be incorrectly escaped). - - However, now we have a start/endCdataSectionHandler, so it seems - easier to let the user deal with this. */ - - else if (characterDataHandler) - characterDataHandler(handlerArg, dataBuf, 0); -#endif - else if (defaultHandler) - reportDefault(parser, enc, s, next); - result = doCdataSection(parser, enc, &next, end, nextPtr); - if (!next) { - processor = cdataSectionProcessor; - return result; - } + case XML_TOK_CDATA_SECT_OPEN: { + enum XML_Error result; + if (parser->m_startCdataSectionHandler) + parser->m_startCdataSectionHandler(parser->m_handlerArg); + /* BEGIN disabled code */ + /* Suppose you doing a transformation on a document that involves + changing only the character data. You set up a defaultHandler + and a characterDataHandler. The defaultHandler simply copies + characters through. The characterDataHandler does the + transformation and writes the characters out escaping them as + necessary. This case will fail to work if we leave out the + following two lines (because & and < inside CDATA sections will + be incorrectly escaped). + + However, now we have a start/endCdataSectionHandler, so it seems + easier to let the user deal with this. + */ + else if (0 && parser->m_characterDataHandler) + parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf, + 0); + /* END disabled code */ + else if (parser->m_defaultHandler) + reportDefault(parser, enc, s, next); + result + = doCdataSection(parser, enc, &next, end, nextPtr, haveMore, account); + if (result != XML_ERROR_NONE) + return result; + else if (! next) { + parser->m_processor = cdataSectionProcessor; + return result; } - break; + } break; case XML_TOK_TRAILING_RSQB: - if (nextPtr) { + if (haveMore) { *nextPtr = s; return XML_ERROR_NONE; } - if (characterDataHandler) { + if (parser->m_characterDataHandler) { if (MUST_CONVERT(enc, s)) { - ICHAR *dataPtr = (ICHAR *)dataBuf; - XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)dataBufEnd); - characterDataHandler(handlerArg, dataBuf, dataPtr - (ICHAR *)dataBuf); - } - else - characterDataHandler(handlerArg, - (XML_Char *)s, - (XML_Char *)end - (XML_Char *)s); - } - else if (defaultHandler) + ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf; + XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)parser->m_dataBufEnd); + parser->m_characterDataHandler( + parser->m_handlerArg, parser->m_dataBuf, + (int)(dataPtr - (ICHAR *)parser->m_dataBuf)); + } else + parser->m_characterDataHandler( + parser->m_handlerArg, (XML_Char *)s, + (int)((XML_Char *)end - (XML_Char *)s)); + } else if (parser->m_defaultHandler) reportDefault(parser, enc, s, end); + /* We are at the end of the final buffer, should we check for + XML_SUSPENDED, XML_FINISHED? + */ if (startTagLevel == 0) { *eventPP = end; return XML_ERROR_NO_ELEMENTS; } - if (tagLevel != startTagLevel) { + if (parser->m_tagLevel != startTagLevel) { *eventPP = end; return XML_ERROR_ASYNC_ENTITY; } + *nextPtr = end; return XML_ERROR_NONE; - case XML_TOK_DATA_CHARS: - if (characterDataHandler) { + case XML_TOK_DATA_CHARS: { + XML_CharacterDataHandler charDataHandler = parser->m_characterDataHandler; + if (charDataHandler) { if (MUST_CONVERT(enc, s)) { for (;;) { - ICHAR *dataPtr = (ICHAR *)dataBuf; - XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd); + ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf; + const enum XML_Convert_Result convert_res = XmlConvert( + enc, &s, next, &dataPtr, (ICHAR *)parser->m_dataBufEnd); *eventEndPP = s; - characterDataHandler(handlerArg, dataBuf, dataPtr - (ICHAR *)dataBuf); - if (s == next) + charDataHandler(parser->m_handlerArg, parser->m_dataBuf, + (int)(dataPtr - (ICHAR *)parser->m_dataBuf)); + if ((convert_res == XML_CONVERT_COMPLETED) + || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) break; *eventPP = s; } - } - else - characterDataHandler(handlerArg, - (XML_Char *)s, - (XML_Char *)next - (XML_Char *)s); - } - else if (defaultHandler) + } else + charDataHandler(parser->m_handlerArg, (XML_Char *)s, + (int)((XML_Char *)next - (XML_Char *)s)); + } else if (parser->m_defaultHandler) reportDefault(parser, enc, s, next); - break; + } break; case XML_TOK_PI: - if (!reportProcessingInstruction(parser, enc, s, next)) + if (! reportProcessingInstruction(parser, enc, s, next)) return XML_ERROR_NO_MEMORY; break; case XML_TOK_COMMENT: - if (!reportComment(parser, enc, s, next)) + if (! reportComment(parser, enc, s, next)) return XML_ERROR_NO_MEMORY; break; default: - if (defaultHandler) + /* All of the tokens produced by XmlContentTok() have their own + * explicit cases, so this default is not strictly necessary. + * However it is a useful safety net, so we retain the code and + * simply exclude it from the coverage tests. + * + * LCOV_EXCL_START + */ + if (parser->m_defaultHandler) reportDefault(parser, enc, s, next); break; + /* LCOV_EXCL_STOP */ } *eventPP = s = next; + switch (parser->m_parsingStatus.parsing) { + case XML_SUSPENDED: + *nextPtr = next; + return XML_ERROR_NONE; + case XML_FINISHED: + return XML_ERROR_ABORTED; + default:; + } } /* not reached */ } -/* If tagNamePtr is non-null, build a real list of attributes, -otherwise just check the attributes for well-formedness. */ +/* This function does not call free() on the allocated memory, merely + * moving it to the parser's m_freeBindingList where it can be freed or + * reused as appropriate. + */ +static void +freeBindings(XML_Parser parser, BINDING *bindings) { + while (bindings) { + BINDING *b = bindings; + + /* m_startNamespaceDeclHandler will have been called for this + * binding in addBindings(), so call the end handler now. + */ + if (parser->m_endNamespaceDeclHandler) + parser->m_endNamespaceDeclHandler(parser->m_handlerArg, b->prefix->name); + + bindings = bindings->nextTagBinding; + b->nextTagBinding = parser->m_freeBindingList; + parser->m_freeBindingList = b; + b->prefix->binding = b->prevPrefixBinding; + } +} -static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *enc, - const char *attStr, TAG_NAME *tagNamePtr, - BINDING **bindingsPtr) -{ - ELEMENT_TYPE *elementType = 0; - int nDefaultAtts = 0; - const XML_Char **appAtts; /* the attribute list to pass to the application */ +/* Precondition: all arguments must be non-NULL; + Purpose: + - normalize attributes + - check attributes for well-formedness + - generate namespace aware attribute names (URI, prefix) + - build list of attributes for startElementHandler + - default attributes + - process namespace declarations (check and report them) + - generate namespace aware element name (URI, prefix) +*/ +static enum XML_Error +storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr, + TAG_NAME *tagNamePtr, BINDING **bindingsPtr, + enum XML_Account account) { + DTD *const dtd = parser->m_dtd; /* save one level of indirection */ + ELEMENT_TYPE *elementType; + int nDefaultAtts; + const XML_Char **appAtts; /* the attribute list for the application */ int attIndex = 0; + int prefixLen; int i; int n; + XML_Char *uri; int nPrefixes = 0; BINDING *binding; const XML_Char *localPart; /* lookup the element type name */ - if (tagNamePtr) { - elementType = (ELEMENT_TYPE *)lookup(&dtd.elementTypes, tagNamePtr->str,0); - if (!elementType) { - tagNamePtr->str = poolCopyString(&dtd.pool, tagNamePtr->str); - if (!tagNamePtr->str) - return XML_ERROR_NO_MEMORY; - elementType = (ELEMENT_TYPE *)lookup(&dtd.elementTypes, tagNamePtr->str, sizeof(ELEMENT_TYPE)); - if (!elementType) - return XML_ERROR_NO_MEMORY; - if (ns && !setElementTypePrefix(parser, elementType)) - return XML_ERROR_NO_MEMORY; - } - nDefaultAtts = elementType->nDefaultAtts; + elementType + = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, tagNamePtr->str, 0); + if (! elementType) { + const XML_Char *name = poolCopyString(&dtd->pool, tagNamePtr->str); + if (! name) + return XML_ERROR_NO_MEMORY; + elementType = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name, + sizeof(ELEMENT_TYPE)); + if (! elementType) + return XML_ERROR_NO_MEMORY; + if (parser->m_ns && ! setElementTypePrefix(parser, elementType)) + return XML_ERROR_NO_MEMORY; } + nDefaultAtts = elementType->nDefaultAtts; + /* get the attributes from the tokenizer */ - n = XmlGetAttributes(enc, attStr, attsSize, atts); - if (n + nDefaultAtts > attsSize) { - int oldAttsSize = attsSize; - attsSize = n + nDefaultAtts + INIT_ATTS_SIZE; - atts = REALLOC((void *)atts, attsSize * sizeof(ATTRIBUTE)); - if (!atts) + n = XmlGetAttributes(enc, attStr, parser->m_attsSize, parser->m_atts); + if (n + nDefaultAtts > parser->m_attsSize) { + int oldAttsSize = parser->m_attsSize; + ATTRIBUTE *temp; +#ifdef XML_ATTR_INFO + XML_AttrInfo *temp2; +#endif + parser->m_attsSize = n + nDefaultAtts + INIT_ATTS_SIZE; + temp = (ATTRIBUTE *)REALLOC(parser, (void *)parser->m_atts, + parser->m_attsSize * sizeof(ATTRIBUTE)); + if (temp == NULL) { + parser->m_attsSize = oldAttsSize; return XML_ERROR_NO_MEMORY; + } + parser->m_atts = temp; +#ifdef XML_ATTR_INFO + temp2 = (XML_AttrInfo *)REALLOC(parser, (void *)parser->m_attInfo, + parser->m_attsSize * sizeof(XML_AttrInfo)); + if (temp2 == NULL) { + parser->m_attsSize = oldAttsSize; + return XML_ERROR_NO_MEMORY; + } + parser->m_attInfo = temp2; +#endif if (n > oldAttsSize) - XmlGetAttributes(enc, attStr, n, atts); + XmlGetAttributes(enc, attStr, n, parser->m_atts); } - appAtts = (const XML_Char **)atts; + + appAtts = (const XML_Char **)parser->m_atts; for (i = 0; i < n; i++) { + ATTRIBUTE *currAtt = &parser->m_atts[i]; +#ifdef XML_ATTR_INFO + XML_AttrInfo *currAttInfo = &parser->m_attInfo[i]; +#endif /* add the name and value to the attribute list */ - ATTRIBUTE_ID *attId = getAttributeId(parser, enc, atts[i].name, - atts[i].name - + XmlNameLength(enc, atts[i].name)); - if (!attId) + ATTRIBUTE_ID *attId + = getAttributeId(parser, enc, currAtt->name, + currAtt->name + XmlNameLength(enc, currAtt->name)); + if (! attId) return XML_ERROR_NO_MEMORY; - /* detect duplicate attributes */ +#ifdef XML_ATTR_INFO + currAttInfo->nameStart + = parser->m_parseEndByteIndex - (parser->m_parseEndPtr - currAtt->name); + currAttInfo->nameEnd + = currAttInfo->nameStart + XmlNameLength(enc, currAtt->name); + currAttInfo->valueStart = parser->m_parseEndByteIndex + - (parser->m_parseEndPtr - currAtt->valuePtr); + currAttInfo->valueEnd = parser->m_parseEndByteIndex + - (parser->m_parseEndPtr - currAtt->valueEnd); +#endif + /* Detect duplicate attributes by their QNames. This does not work when + namespace processing is turned on and different prefixes for the same + namespace are used. For this case we have a check further down. + */ if ((attId->name)[-1]) { - if (enc == encoding) - eventPtr = atts[i].name; + if (enc == parser->m_encoding) + parser->m_eventPtr = parser->m_atts[i].name; return XML_ERROR_DUPLICATE_ATTRIBUTE; } (attId->name)[-1] = 1; appAtts[attIndex++] = attId->name; - if (!atts[i].normalized) { + if (! parser->m_atts[i].normalized) { enum XML_Error result; - int isCdata = 1; + XML_Bool isCdata = XML_TRUE; /* figure out whether declared as other than CDATA */ if (attId->maybeTokenized) { @@ -1928,323 +3336,532 @@ static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *enc, } /* normalize the attribute value */ - result = storeAttributeValue(parser, enc, isCdata, - atts[i].valuePtr, atts[i].valueEnd, - &tempPool); + result = storeAttributeValue( + parser, enc, isCdata, parser->m_atts[i].valuePtr, + parser->m_atts[i].valueEnd, &parser->m_tempPool, account); if (result) return result; - if (tagNamePtr) { - appAtts[attIndex] = poolStart(&tempPool); - poolFinish(&tempPool); - } - else - poolDiscard(&tempPool); - } - else if (tagNamePtr) { + appAtts[attIndex] = poolStart(&parser->m_tempPool); + poolFinish(&parser->m_tempPool); + } else { /* the value did not need normalizing */ - appAtts[attIndex] = poolStoreString(&tempPool, enc, atts[i].valuePtr, atts[i].valueEnd); + appAtts[attIndex] = poolStoreString(&parser->m_tempPool, enc, + parser->m_atts[i].valuePtr, + parser->m_atts[i].valueEnd); if (appAtts[attIndex] == 0) return XML_ERROR_NO_MEMORY; - poolFinish(&tempPool); + poolFinish(&parser->m_tempPool); } /* handle prefixed attribute names */ - if (attId->prefix && tagNamePtr) { + if (attId->prefix) { if (attId->xmlns) { /* deal with namespace declarations here */ - if (!addBinding(parser, attId->prefix, attId, appAtts[attIndex], bindingsPtr)) - return XML_ERROR_NO_MEMORY; + enum XML_Error result = addBinding(parser, attId->prefix, attId, + appAtts[attIndex], bindingsPtr); + if (result) + return result; --attIndex; - } - else { + } else { /* deal with other prefixed names later */ attIndex++; nPrefixes++; (attId->name)[-1] = 2; } - } - else + } else attIndex++; } - if (tagNamePtr) { - int j; - nSpecifiedAtts = attIndex; - if (elementType->idAtt && (elementType->idAtt->name)[-1]) { - for (i = 0; i < attIndex; i += 2) - if (appAtts[i] == elementType->idAtt->name) { - idAttIndex = i; - break; - } - } - else - idAttIndex = -1; - /* do attribute defaulting */ - for (j = 0; j < nDefaultAtts; j++) { - const DEFAULT_ATTRIBUTE *da = elementType->defaultAtts + j; - if (!(da->id->name)[-1] && da->value) { - if (da->id->prefix) { - if (da->id->xmlns) { - if (!addBinding(parser, da->id->prefix, da->id, da->value, bindingsPtr)) - return XML_ERROR_NO_MEMORY; - } - else { - (da->id->name)[-1] = 2; - nPrefixes++; - appAtts[attIndex++] = da->id->name; - appAtts[attIndex++] = da->value; - } - } - else { - (da->id->name)[-1] = 1; + + /* set-up for XML_GetSpecifiedAttributeCount and XML_GetIdAttributeIndex */ + parser->m_nSpecifiedAtts = attIndex; + if (elementType->idAtt && (elementType->idAtt->name)[-1]) { + for (i = 0; i < attIndex; i += 2) + if (appAtts[i] == elementType->idAtt->name) { + parser->m_idAttIndex = i; + break; + } + } else + parser->m_idAttIndex = -1; + + /* do attribute defaulting */ + for (i = 0; i < nDefaultAtts; i++) { + const DEFAULT_ATTRIBUTE *da = elementType->defaultAtts + i; + if (! (da->id->name)[-1] && da->value) { + if (da->id->prefix) { + if (da->id->xmlns) { + enum XML_Error result = addBinding(parser, da->id->prefix, da->id, + da->value, bindingsPtr); + if (result) + return result; + } else { + (da->id->name)[-1] = 2; + nPrefixes++; appAtts[attIndex++] = da->id->name; appAtts[attIndex++] = da->value; } + } else { + (da->id->name)[-1] = 1; + appAtts[attIndex++] = da->id->name; + appAtts[attIndex++] = da->value; } } - appAtts[attIndex] = 0; } + appAtts[attIndex] = 0; + + /* expand prefixed attribute names, check for duplicates, + and clear flags that say whether attributes were specified */ i = 0; if (nPrefixes) { - /* expand prefixed attribute names */ + int j; /* hash table index */ + unsigned long version = parser->m_nsAttsVersion; + int nsAttsSize = (int)1 << parser->m_nsAttsPower; + unsigned char oldNsAttsPower = parser->m_nsAttsPower; + /* size of hash table must be at least 2 * (# of prefixed attributes) */ + if ((nPrefixes << 1) + >> parser->m_nsAttsPower) { /* true for m_nsAttsPower = 0 */ + NS_ATT *temp; + /* hash table size must also be a power of 2 and >= 8 */ + while (nPrefixes >> parser->m_nsAttsPower++) + ; + if (parser->m_nsAttsPower < 3) + parser->m_nsAttsPower = 3; + nsAttsSize = (int)1 << parser->m_nsAttsPower; + temp = (NS_ATT *)REALLOC(parser, parser->m_nsAtts, + nsAttsSize * sizeof(NS_ATT)); + if (! temp) { + /* Restore actual size of memory in m_nsAtts */ + parser->m_nsAttsPower = oldNsAttsPower; + return XML_ERROR_NO_MEMORY; + } + parser->m_nsAtts = temp; + version = 0; /* force re-initialization of m_nsAtts hash table */ + } + /* using a version flag saves us from initializing m_nsAtts every time */ + if (! version) { /* initialize version flags when version wraps around */ + version = INIT_ATTS_VERSION; + for (j = nsAttsSize; j != 0;) + parser->m_nsAtts[--j].version = version; + } + parser->m_nsAttsVersion = --version; + + /* expand prefixed names and check for duplicates */ for (; i < attIndex; i += 2) { - if (appAtts[i][-1] == 2) { + const XML_Char *s = appAtts[i]; + if (s[-1] == 2) { /* prefixed */ ATTRIBUTE_ID *id; - ((XML_Char *)(appAtts[i]))[-1] = 0; - id = (ATTRIBUTE_ID *)lookup(&dtd.attributeIds, appAtts[i], 0); - if (id->prefix->binding) { - int j; - const BINDING *b = id->prefix->binding; - const XML_Char *s = appAtts[i]; - for (j = 0; j < b->uriLen; j++) { - if (!poolAppendChar(&tempPool, b->uri[j])) - return XML_ERROR_NO_MEMORY; + const BINDING *b; + unsigned long uriHash; + struct siphash sip_state; + struct sipkey sip_key; + + copy_salt_to_sipkey(parser, &sip_key); + sip24_init(&sip_state, &sip_key); + + ((XML_Char *)s)[-1] = 0; /* clear flag */ + id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, s, 0); + if (! id || ! id->prefix) { + /* This code is walking through the appAtts array, dealing + * with (in this case) a prefixed attribute name. To be in + * the array, the attribute must have already been bound, so + * has to have passed through the hash table lookup once + * already. That implies that an entry for it already + * exists, so the lookup above will return a pointer to + * already allocated memory. There is no opportunaity for + * the allocator to fail, so the condition above cannot be + * fulfilled. + * + * Since it is difficult to be certain that the above + * analysis is complete, we retain the test and merely + * remove the code from coverage tests. + */ + return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */ + } + b = id->prefix->binding; + if (! b) + return XML_ERROR_UNBOUND_PREFIX; + + for (j = 0; j < b->uriLen; j++) { + const XML_Char c = b->uri[j]; + if (! poolAppendChar(&parser->m_tempPool, c)) + return XML_ERROR_NO_MEMORY; + } + + sip24_update(&sip_state, b->uri, b->uriLen * sizeof(XML_Char)); + + while (*s++ != XML_T(ASCII_COLON)) + ; + + sip24_update(&sip_state, s, keylen(s) * sizeof(XML_Char)); + + do { /* copies null terminator */ + if (! poolAppendChar(&parser->m_tempPool, *s)) + return XML_ERROR_NO_MEMORY; + } while (*s++); + + uriHash = (unsigned long)sip24_final(&sip_state); + + { /* Check hash table for duplicate of expanded name (uriName). + Derived from code in lookup(parser, HASH_TABLE *table, ...). + */ + unsigned char step = 0; + unsigned long mask = nsAttsSize - 1; + j = uriHash & mask; /* index into hash table */ + while (parser->m_nsAtts[j].version == version) { + /* for speed we compare stored hash values first */ + if (uriHash == parser->m_nsAtts[j].hash) { + const XML_Char *s1 = poolStart(&parser->m_tempPool); + const XML_Char *s2 = parser->m_nsAtts[j].uriName; + /* s1 is null terminated, but not s2 */ + for (; *s1 == *s2 && *s1 != 0; s1++, s2++) + ; + if (*s1 == 0) + return XML_ERROR_DUPLICATE_ATTRIBUTE; + } + if (! step) + step = PROBE_STEP(uriHash, mask, parser->m_nsAttsPower); + j < step ? (j += nsAttsSize - step) : (j -= step); } - while (*s++ != ':') - ; + } + + if (parser->m_ns_triplets) { /* append namespace separator and prefix */ + parser->m_tempPool.ptr[-1] = parser->m_namespaceSeparator; + s = b->prefix->name; do { - if (!poolAppendChar(&tempPool, *s)) + if (! poolAppendChar(&parser->m_tempPool, *s)) return XML_ERROR_NO_MEMORY; } while (*s++); - if (ns_triplets) { - tempPool.ptr[-1] = namespaceSeparator; - s = b->prefix->name; - do { - if (!poolAppendChar(&tempPool, *s)) - return XML_ERROR_NO_MEMORY; - } while (*s++); - } - - appAtts[i] = poolStart(&tempPool); - poolFinish(&tempPool); } - if (!--nPrefixes) + + /* store expanded name in attribute list */ + s = poolStart(&parser->m_tempPool); + poolFinish(&parser->m_tempPool); + appAtts[i] = s; + + /* fill empty slot with new version, uriName and hash value */ + parser->m_nsAtts[j].version = version; + parser->m_nsAtts[j].hash = uriHash; + parser->m_nsAtts[j].uriName = s; + + if (! --nPrefixes) { + i += 2; break; - } - else - ((XML_Char *)(appAtts[i]))[-1] = 0; + } + } else /* not prefixed */ + ((XML_Char *)s)[-1] = 0; /* clear flag */ } } - /* clear the flags that say whether attributes were specified */ + /* clear flags for the remaining attributes */ for (; i < attIndex; i += 2) ((XML_Char *)(appAtts[i]))[-1] = 0; - if (!tagNamePtr) - return XML_ERROR_NONE; for (binding = *bindingsPtr; binding; binding = binding->nextTagBinding) binding->attId->name[-1] = 0; + + if (! parser->m_ns) + return XML_ERROR_NONE; + /* expand the element type name */ if (elementType->prefix) { binding = elementType->prefix->binding; - if (!binding) - return XML_ERROR_NONE; + if (! binding) + return XML_ERROR_UNBOUND_PREFIX; localPart = tagNamePtr->str; - while (*localPart++ != XML_T(':')) + while (*localPart++ != XML_T(ASCII_COLON)) ; - } - else if (dtd.defaultPrefix.binding) { - binding = dtd.defaultPrefix.binding; + } else if (dtd->defaultPrefix.binding) { + binding = dtd->defaultPrefix.binding; localPart = tagNamePtr->str; - } - else + } else return XML_ERROR_NONE; + prefixLen = 0; + if (parser->m_ns_triplets && binding->prefix->name) { + for (; binding->prefix->name[prefixLen++];) + ; /* prefixLen includes null terminator */ + } tagNamePtr->localPart = localPart; tagNamePtr->uriLen = binding->uriLen; + tagNamePtr->prefix = binding->prefix->name; + tagNamePtr->prefixLen = prefixLen; for (i = 0; localPart[i++];) - ; - n = i + binding->uriLen; + ; /* i includes null terminator */ + n = i + binding->uriLen + prefixLen; if (n > binding->uriAlloc) { TAG *p; - XML_Char *uri = MALLOC((n + EXPAND_SPARE) * sizeof(XML_Char)); - if (!uri) + uri = (XML_Char *)MALLOC(parser, (n + EXPAND_SPARE) * sizeof(XML_Char)); + if (! uri) return XML_ERROR_NO_MEMORY; binding->uriAlloc = n + EXPAND_SPARE; memcpy(uri, binding->uri, binding->uriLen * sizeof(XML_Char)); - for (p = tagStack; p; p = p->parent) + for (p = parser->m_tagStack; p; p = p->parent) if (p->name.str == binding->uri) p->name.str = uri; - FREE(binding->uri); + FREE(parser, binding->uri); binding->uri = uri; } - memcpy(binding->uri + binding->uriLen, localPart, i * sizeof(XML_Char)); + /* if m_namespaceSeparator != '\0' then uri includes it already */ + uri = binding->uri + binding->uriLen; + memcpy(uri, localPart, i * sizeof(XML_Char)); + /* we always have a namespace separator between localPart and prefix */ + if (prefixLen) { + uri += i - 1; + *uri = parser->m_namespaceSeparator; /* replace null terminator */ + memcpy(uri + 1, binding->prefix->name, prefixLen * sizeof(XML_Char)); + } tagNamePtr->str = binding->uri; return XML_ERROR_NONE; } -static -int addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, const XML_Char *uri, BINDING **bindingsPtr) -{ +/* addBinding() overwrites the value of prefix->binding without checking. + Therefore one must keep track of the old value outside of addBinding(). +*/ +static enum XML_Error +addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, + const XML_Char *uri, BINDING **bindingsPtr) { + static const XML_Char xmlNamespace[] + = {ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, + ASCII_SLASH, ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, + ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD, ASCII_o, + ASCII_r, ASCII_g, ASCII_SLASH, ASCII_X, ASCII_M, + ASCII_L, ASCII_SLASH, ASCII_1, ASCII_9, ASCII_9, + ASCII_8, ASCII_SLASH, ASCII_n, ASCII_a, ASCII_m, + ASCII_e, ASCII_s, ASCII_p, ASCII_a, ASCII_c, + ASCII_e, '\0'}; + static const int xmlLen = (int)sizeof(xmlNamespace) / sizeof(XML_Char) - 1; + static const XML_Char xmlnsNamespace[] + = {ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH, + ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w, + ASCII_3, ASCII_PERIOD, ASCII_o, ASCII_r, ASCII_g, ASCII_SLASH, + ASCII_2, ASCII_0, ASCII_0, ASCII_0, ASCII_SLASH, ASCII_x, + ASCII_m, ASCII_l, ASCII_n, ASCII_s, ASCII_SLASH, '\0'}; + static const int xmlnsLen + = (int)sizeof(xmlnsNamespace) / sizeof(XML_Char) - 1; + + XML_Bool mustBeXML = XML_FALSE; + XML_Bool isXML = XML_TRUE; + XML_Bool isXMLNS = XML_TRUE; + BINDING *b; int len; - for (len = 0; uri[len]; len++) - ; - if (namespaceSeparator) + + /* empty URI is only valid for default namespace per XML NS 1.0 (not 1.1) */ + if (*uri == XML_T('\0') && prefix->name) + return XML_ERROR_UNDECLARING_PREFIX; + + if (prefix->name && prefix->name[0] == XML_T(ASCII_x) + && prefix->name[1] == XML_T(ASCII_m) + && prefix->name[2] == XML_T(ASCII_l)) { + /* Not allowed to bind xmlns */ + if (prefix->name[3] == XML_T(ASCII_n) && prefix->name[4] == XML_T(ASCII_s) + && prefix->name[5] == XML_T('\0')) + return XML_ERROR_RESERVED_PREFIX_XMLNS; + + if (prefix->name[3] == XML_T('\0')) + mustBeXML = XML_TRUE; + } + + for (len = 0; uri[len]; len++) { + if (isXML && (len > xmlLen || uri[len] != xmlNamespace[len])) + isXML = XML_FALSE; + + if (! mustBeXML && isXMLNS + && (len > xmlnsLen || uri[len] != xmlnsNamespace[len])) + isXMLNS = XML_FALSE; + } + isXML = isXML && len == xmlLen; + isXMLNS = isXMLNS && len == xmlnsLen; + + if (mustBeXML != isXML) + return mustBeXML ? XML_ERROR_RESERVED_PREFIX_XML + : XML_ERROR_RESERVED_NAMESPACE_URI; + + if (isXMLNS) + return XML_ERROR_RESERVED_NAMESPACE_URI; + + if (parser->m_namespaceSeparator) len++; - if (freeBindingList) { - b = freeBindingList; + if (parser->m_freeBindingList) { + b = parser->m_freeBindingList; if (len > b->uriAlloc) { - b->uri = REALLOC(b->uri, sizeof(XML_Char) * (len + EXPAND_SPARE)); - if (!b->uri) - return 0; + XML_Char *temp = (XML_Char *)REALLOC( + parser, b->uri, sizeof(XML_Char) * (len + EXPAND_SPARE)); + if (temp == NULL) + return XML_ERROR_NO_MEMORY; + b->uri = temp; b->uriAlloc = len + EXPAND_SPARE; } - freeBindingList = b->nextTagBinding; - } - else { - b = MALLOC(sizeof(BINDING)); - if (!b) - return 0; - b->uri = MALLOC(sizeof(XML_Char) * (len + EXPAND_SPARE)); - if (!b->uri) { - FREE(b); - return 0; + parser->m_freeBindingList = b->nextTagBinding; + } else { + b = (BINDING *)MALLOC(parser, sizeof(BINDING)); + if (! b) + return XML_ERROR_NO_MEMORY; + b->uri + = (XML_Char *)MALLOC(parser, sizeof(XML_Char) * (len + EXPAND_SPARE)); + if (! b->uri) { + FREE(parser, b); + return XML_ERROR_NO_MEMORY; } b->uriAlloc = len + EXPAND_SPARE; } b->uriLen = len; memcpy(b->uri, uri, len * sizeof(XML_Char)); - if (namespaceSeparator) - b->uri[len - 1] = namespaceSeparator; + if (parser->m_namespaceSeparator) + b->uri[len - 1] = parser->m_namespaceSeparator; b->prefix = prefix; b->attId = attId; b->prevPrefixBinding = prefix->binding; - if (*uri == XML_T('\0') && prefix == &dtd.defaultPrefix) - prefix->binding = 0; + /* NULL binding when default namespace undeclared */ + if (*uri == XML_T('\0') && prefix == &parser->m_dtd->defaultPrefix) + prefix->binding = NULL; else prefix->binding = b; b->nextTagBinding = *bindingsPtr; *bindingsPtr = b; - if (startNamespaceDeclHandler) - startNamespaceDeclHandler(handlerArg, prefix->name, - prefix->binding ? uri : 0); - return 1; + /* if attId == NULL then we are not starting a namespace scope */ + if (attId && parser->m_startNamespaceDeclHandler) + parser->m_startNamespaceDeclHandler(parser->m_handlerArg, prefix->name, + prefix->binding ? uri : 0); + return XML_ERROR_NONE; } /* The idea here is to avoid using stack for each CDATA section when -the whole file is parsed with one call. */ - -static -enum XML_Error cdataSectionProcessor(XML_Parser parser, - const char *start, - const char *end, - const char **endPtr) -{ - enum XML_Error result = doCdataSection(parser, encoding, &start, end, endPtr); + the whole file is parsed with one call. +*/ +static enum XML_Error PTRCALL +cdataSectionProcessor(XML_Parser parser, const char *start, const char *end, + const char **endPtr) { + enum XML_Error result = doCdataSection( + parser, parser->m_encoding, &start, end, endPtr, + (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_ACCOUNT_DIRECT); + if (result != XML_ERROR_NONE) + return result; if (start) { - processor = contentProcessor; - return contentProcessor(parser, start, end, endPtr); + if (parser->m_parentParser) { /* we are parsing an external entity */ + parser->m_processor = externalEntityContentProcessor; + return externalEntityContentProcessor(parser, start, end, endPtr); + } else { + parser->m_processor = contentProcessor; + return contentProcessor(parser, start, end, endPtr); + } } return result; } -/* startPtr gets set to non-null is the section is closed, and to null if -the section is not yet closed. */ - -static -enum XML_Error doCdataSection(XML_Parser parser, - const ENCODING *enc, - const char **startPtr, - const char *end, - const char **nextPtr) -{ +/* startPtr gets set to non-null if the section is closed, and to null if + the section is not yet closed. +*/ +static enum XML_Error +doCdataSection(XML_Parser parser, const ENCODING *enc, const char **startPtr, + const char *end, const char **nextPtr, XML_Bool haveMore, + enum XML_Account account) { const char *s = *startPtr; const char **eventPP; const char **eventEndPP; - if (enc == encoding) { - eventPP = &eventPtr; + if (enc == parser->m_encoding) { + eventPP = &parser->m_eventPtr; *eventPP = s; - eventEndPP = &eventEndPtr; - } - else { - eventPP = &(openInternalEntities->internalEventPtr); - eventEndPP = &(openInternalEntities->internalEventEndPtr); + eventEndPP = &parser->m_eventEndPtr; + } else { + eventPP = &(parser->m_openInternalEntities->internalEventPtr); + eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr); } *eventPP = s; - *startPtr = 0; + *startPtr = NULL; + for (;;) { - const char *next; + const char *next = s; /* in case of XML_TOK_NONE or XML_TOK_PARTIAL */ int tok = XmlCdataSectionTok(enc, s, end, &next); +#ifdef XML_DTD + if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, account)) { + accountingOnAbort(parser); + return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; + } +#else + UNUSED_P(account); +#endif *eventEndPP = next; switch (tok) { case XML_TOK_CDATA_SECT_CLOSE: - if (endCdataSectionHandler) - endCdataSectionHandler(handlerArg); -#if 0 + if (parser->m_endCdataSectionHandler) + parser->m_endCdataSectionHandler(parser->m_handlerArg); + /* BEGIN disabled code */ /* see comment under XML_TOK_CDATA_SECT_OPEN */ - else if (characterDataHandler) - characterDataHandler(handlerArg, dataBuf, 0); -#endif - else if (defaultHandler) + else if (0 && parser->m_characterDataHandler) + parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf, + 0); + /* END disabled code */ + else if (parser->m_defaultHandler) reportDefault(parser, enc, s, next); *startPtr = next; - return XML_ERROR_NONE; + *nextPtr = next; + if (parser->m_parsingStatus.parsing == XML_FINISHED) + return XML_ERROR_ABORTED; + else + return XML_ERROR_NONE; case XML_TOK_DATA_NEWLINE: - if (characterDataHandler) { + if (parser->m_characterDataHandler) { XML_Char c = 0xA; - characterDataHandler(handlerArg, &c, 1); - } - else if (defaultHandler) + parser->m_characterDataHandler(parser->m_handlerArg, &c, 1); + } else if (parser->m_defaultHandler) reportDefault(parser, enc, s, next); break; - case XML_TOK_DATA_CHARS: - if (characterDataHandler) { + case XML_TOK_DATA_CHARS: { + XML_CharacterDataHandler charDataHandler = parser->m_characterDataHandler; + if (charDataHandler) { if (MUST_CONVERT(enc, s)) { for (;;) { - ICHAR *dataPtr = (ICHAR *)dataBuf; - XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd); + ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf; + const enum XML_Convert_Result convert_res = XmlConvert( + enc, &s, next, &dataPtr, (ICHAR *)parser->m_dataBufEnd); *eventEndPP = next; - characterDataHandler(handlerArg, dataBuf, dataPtr - (ICHAR *)dataBuf); - if (s == next) + charDataHandler(parser->m_handlerArg, parser->m_dataBuf, + (int)(dataPtr - (ICHAR *)parser->m_dataBuf)); + if ((convert_res == XML_CONVERT_COMPLETED) + || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) break; *eventPP = s; } - } - else - characterDataHandler(handlerArg, - (XML_Char *)s, - (XML_Char *)next - (XML_Char *)s); - } - else if (defaultHandler) + } else + charDataHandler(parser->m_handlerArg, (XML_Char *)s, + (int)((XML_Char *)next - (XML_Char *)s)); + } else if (parser->m_defaultHandler) reportDefault(parser, enc, s, next); - break; + } break; case XML_TOK_INVALID: *eventPP = next; return XML_ERROR_INVALID_TOKEN; case XML_TOK_PARTIAL_CHAR: - if (nextPtr) { + if (haveMore) { *nextPtr = s; return XML_ERROR_NONE; } return XML_ERROR_PARTIAL_CHAR; case XML_TOK_PARTIAL: case XML_TOK_NONE: - if (nextPtr) { + if (haveMore) { *nextPtr = s; return XML_ERROR_NONE; } return XML_ERROR_UNCLOSED_CDATA_SECTION; default: + /* Every token returned by XmlCdataSectionTok() has its own + * explicit case, so this default case will never be executed. + * We retain it as a safety net and exclude it from the coverage + * statistics. + * + * LCOV_EXCL_START + */ *eventPP = next; return XML_ERROR_UNEXPECTED_STATE; + /* LCOV_EXCL_STOP */ } + *eventPP = s = next; + switch (parser->m_parsingStatus.parsing) { + case XML_SUSPENDED: + *nextPtr = next; + return XML_ERROR_NONE; + case XML_FINISHED: + return XML_ERROR_ABORTED; + default:; + } } /* not reached */ } @@ -2252,75 +3869,102 @@ enum XML_Error doCdataSection(XML_Parser parser, #ifdef XML_DTD /* The idea here is to avoid using stack for each IGNORE section when -the whole file is parsed with one call. */ - -static -enum XML_Error ignoreSectionProcessor(XML_Parser parser, - const char *start, - const char *end, - const char **endPtr) -{ - enum XML_Error result = doIgnoreSection(parser, encoding, &start, end, endPtr); + the whole file is parsed with one call. +*/ +static enum XML_Error PTRCALL +ignoreSectionProcessor(XML_Parser parser, const char *start, const char *end, + const char **endPtr) { + enum XML_Error result + = doIgnoreSection(parser, parser->m_encoding, &start, end, endPtr, + (XML_Bool)! parser->m_parsingStatus.finalBuffer); + if (result != XML_ERROR_NONE) + return result; if (start) { - processor = prologProcessor; + parser->m_processor = prologProcessor; return prologProcessor(parser, start, end, endPtr); } return result; } -/* startPtr gets set to non-null is the section is closed, and to null if -the section is not yet closed. */ - -static -enum XML_Error doIgnoreSection(XML_Parser parser, - const ENCODING *enc, - const char **startPtr, - const char *end, - const char **nextPtr) -{ - const char *next; +/* startPtr gets set to non-null is the section is closed, and to null + if the section is not yet closed. +*/ +static enum XML_Error +doIgnoreSection(XML_Parser parser, const ENCODING *enc, const char **startPtr, + const char *end, const char **nextPtr, XML_Bool haveMore) { + const char *next = *startPtr; /* in case of XML_TOK_NONE or XML_TOK_PARTIAL */ int tok; const char *s = *startPtr; const char **eventPP; const char **eventEndPP; - if (enc == encoding) { - eventPP = &eventPtr; + if (enc == parser->m_encoding) { + eventPP = &parser->m_eventPtr; *eventPP = s; - eventEndPP = &eventEndPtr; - } - else { - eventPP = &(openInternalEntities->internalEventPtr); - eventEndPP = &(openInternalEntities->internalEventEndPtr); + eventEndPP = &parser->m_eventEndPtr; + } else { + /* It's not entirely clear, but it seems the following two lines + * of code cannot be executed. The only occasions on which 'enc' + * is not 'encoding' are when this function is called + * from the internal entity processing, and IGNORE sections are an + * error in internal entities. + * + * Since it really isn't clear that this is true, we keep the code + * and just remove it from our coverage tests. + * + * LCOV_EXCL_START + */ + eventPP = &(parser->m_openInternalEntities->internalEventPtr); + eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr); + /* LCOV_EXCL_STOP */ } *eventPP = s; - *startPtr = 0; + *startPtr = NULL; tok = XmlIgnoreSectionTok(enc, s, end, &next); +# ifdef XML_DTD + if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, + XML_ACCOUNT_DIRECT)) { + accountingOnAbort(parser); + return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; + } +# endif *eventEndPP = next; switch (tok) { case XML_TOK_IGNORE_SECT: - if (defaultHandler) + if (parser->m_defaultHandler) reportDefault(parser, enc, s, next); *startPtr = next; - return XML_ERROR_NONE; + *nextPtr = next; + if (parser->m_parsingStatus.parsing == XML_FINISHED) + return XML_ERROR_ABORTED; + else + return XML_ERROR_NONE; case XML_TOK_INVALID: *eventPP = next; return XML_ERROR_INVALID_TOKEN; case XML_TOK_PARTIAL_CHAR: - if (nextPtr) { + if (haveMore) { *nextPtr = s; return XML_ERROR_NONE; } return XML_ERROR_PARTIAL_CHAR; case XML_TOK_PARTIAL: case XML_TOK_NONE: - if (nextPtr) { + if (haveMore) { *nextPtr = s; return XML_ERROR_NONE; } return XML_ERROR_SYNTAX; /* XML_ERROR_UNCLOSED_IGNORE_SECTION */ default: + /* All of the tokens that XmlIgnoreSectionTok() returns have + * explicit cases to handle them, so this default case is never + * executed. We keep it as a safety net anyway, and remove it + * from our test coverage statistics. + * + * LCOV_EXCL_START + */ *eventPP = next; return XML_ERROR_UNEXPECTED_STATE; + /* LCOV_EXCL_STOP */ } /* not reached */ } @@ -2328,214 +3972,427 @@ enum XML_Error doIgnoreSection(XML_Parser parser, #endif /* XML_DTD */ static enum XML_Error -initializeEncoding(XML_Parser parser) -{ +initializeEncoding(XML_Parser parser) { const char *s; #ifdef XML_UNICODE char encodingBuf[128]; - if (!protocolEncodingName) - s = 0; + /* See comments abount `protoclEncodingName` in parserInit() */ + if (! parser->m_protocolEncodingName) + s = NULL; else { int i; - for (i = 0; protocolEncodingName[i]; i++) { + for (i = 0; parser->m_protocolEncodingName[i]; i++) { if (i == sizeof(encodingBuf) - 1 - || (protocolEncodingName[i] & ~0x7f) != 0) { + || (parser->m_protocolEncodingName[i] & ~0x7f) != 0) { encodingBuf[0] = '\0'; break; } - encodingBuf[i] = (char)protocolEncodingName[i]; + encodingBuf[i] = (char)parser->m_protocolEncodingName[i]; } encodingBuf[i] = '\0'; s = encodingBuf; } #else - s = protocolEncodingName; + s = parser->m_protocolEncodingName; #endif - if ((ns ? XmlInitEncodingNS : XmlInitEncoding)(&initEncoding, &encoding, s)) + if ((parser->m_ns ? XmlInitEncodingNS : XmlInitEncoding)( + &parser->m_initEncoding, &parser->m_encoding, s)) return XML_ERROR_NONE; - return handleUnknownEncoding(parser, protocolEncodingName); + return handleUnknownEncoding(parser, parser->m_protocolEncodingName); } static enum XML_Error -processXmlDecl(XML_Parser parser, int isGeneralTextEntity, - const char *s, const char *next) -{ - const char *encodingName = 0; - const char *storedEncName = 0; - const ENCODING *newEncoding = 0; - const char *version = 0; +processXmlDecl(XML_Parser parser, int isGeneralTextEntity, const char *s, + const char *next) { + const char *encodingName = NULL; + const XML_Char *storedEncName = NULL; + const ENCODING *newEncoding = NULL; + const char *version = NULL; const char *versionend; - const char *storedversion = 0; + const XML_Char *storedversion = NULL; int standalone = -1; - if (!(ns - ? XmlParseXmlDeclNS - : XmlParseXmlDecl)(isGeneralTextEntity, - encoding, - s, - next, - &eventPtr, - &version, - &versionend, - &encodingName, - &newEncoding, - &standalone)) - return XML_ERROR_SYNTAX; - if (!isGeneralTextEntity && standalone == 1) { - dtd.standalone = 1; + +#ifdef XML_DTD + if (! accountingDiffTolerated(parser, XML_TOK_XML_DECL, s, next, __LINE__, + XML_ACCOUNT_DIRECT)) { + accountingOnAbort(parser); + return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; + } +#endif + + if (! (parser->m_ns ? XmlParseXmlDeclNS : XmlParseXmlDecl)( + isGeneralTextEntity, parser->m_encoding, s, next, &parser->m_eventPtr, + &version, &versionend, &encodingName, &newEncoding, &standalone)) { + if (isGeneralTextEntity) + return XML_ERROR_TEXT_DECL; + else + return XML_ERROR_XML_DECL; + } + if (! isGeneralTextEntity && standalone == 1) { + parser->m_dtd->standalone = XML_TRUE; #ifdef XML_DTD - if (paramEntityParsing == XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE) - paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER; + if (parser->m_paramEntityParsing + == XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE) + parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER; #endif /* XML_DTD */ } - if (xmlDeclHandler) { - if (encodingName) { - storedEncName = poolStoreString(&temp2Pool, - encoding, - encodingName, - encodingName - + XmlNameLength(encoding, encodingName)); + if (parser->m_xmlDeclHandler) { + if (encodingName != NULL) { + storedEncName = poolStoreString( + &parser->m_temp2Pool, parser->m_encoding, encodingName, + encodingName + XmlNameLength(parser->m_encoding, encodingName)); if (! storedEncName) return XML_ERROR_NO_MEMORY; - poolFinish(&temp2Pool); + poolFinish(&parser->m_temp2Pool); } if (version) { - storedversion = poolStoreString(&temp2Pool, - encoding, - version, - versionend - encoding->minBytesPerChar); + storedversion + = poolStoreString(&parser->m_temp2Pool, parser->m_encoding, version, + versionend - parser->m_encoding->minBytesPerChar); if (! storedversion) return XML_ERROR_NO_MEMORY; } - xmlDeclHandler(handlerArg, storedversion, storedEncName, standalone); - } - else if (defaultHandler) - reportDefault(parser, encoding, s, next); - if (!protocolEncodingName) { + parser->m_xmlDeclHandler(parser->m_handlerArg, storedversion, storedEncName, + standalone); + } else if (parser->m_defaultHandler) + reportDefault(parser, parser->m_encoding, s, next); + if (parser->m_protocolEncodingName == NULL) { if (newEncoding) { - if (newEncoding->minBytesPerChar != encoding->minBytesPerChar) { - eventPtr = encodingName; + /* Check that the specified encoding does not conflict with what + * the parser has already deduced. Do we have the same number + * of bytes in the smallest representation of a character? If + * this is UTF-16, is it the same endianness? + */ + if (newEncoding->minBytesPerChar != parser->m_encoding->minBytesPerChar + || (newEncoding->minBytesPerChar == 2 + && newEncoding != parser->m_encoding)) { + parser->m_eventPtr = encodingName; return XML_ERROR_INCORRECT_ENCODING; } - encoding = newEncoding; - } - else if (encodingName) { + parser->m_encoding = newEncoding; + } else if (encodingName) { enum XML_Error result; if (! storedEncName) { - storedEncName = poolStoreString(&temp2Pool, - encoding, - encodingName, - encodingName - + XmlNameLength(encoding, encodingName)); + storedEncName = poolStoreString( + &parser->m_temp2Pool, parser->m_encoding, encodingName, + encodingName + XmlNameLength(parser->m_encoding, encodingName)); if (! storedEncName) return XML_ERROR_NO_MEMORY; } result = handleUnknownEncoding(parser, storedEncName); - poolClear(&tempPool); + poolClear(&parser->m_temp2Pool); if (result == XML_ERROR_UNKNOWN_ENCODING) - eventPtr = encodingName; + parser->m_eventPtr = encodingName; return result; } } if (storedEncName || storedversion) - poolClear(&temp2Pool); + poolClear(&parser->m_temp2Pool); return XML_ERROR_NONE; } static enum XML_Error -handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName) -{ - if (unknownEncodingHandler) { +handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName) { + if (parser->m_unknownEncodingHandler) { XML_Encoding info; int i; for (i = 0; i < 256; i++) info.map[i] = -1; - info.convert = 0; - info.data = 0; - info.release = 0; - if (unknownEncodingHandler(unknownEncodingHandlerData, encodingName, &info)) { + info.convert = NULL; + info.data = NULL; + info.release = NULL; + if (parser->m_unknownEncodingHandler(parser->m_unknownEncodingHandlerData, + encodingName, &info)) { ENCODING *enc; - unknownEncodingMem = MALLOC(XmlSizeOfUnknownEncoding()); - if (!unknownEncodingMem) { + parser->m_unknownEncodingMem = MALLOC(parser, XmlSizeOfUnknownEncoding()); + if (! parser->m_unknownEncodingMem) { if (info.release) info.release(info.data); return XML_ERROR_NO_MEMORY; } - enc = (ns - ? XmlInitUnknownEncodingNS - : XmlInitUnknownEncoding)(unknownEncodingMem, - info.map, - info.convert, - info.data); + enc = (parser->m_ns ? XmlInitUnknownEncodingNS : XmlInitUnknownEncoding)( + parser->m_unknownEncodingMem, info.map, info.convert, info.data); if (enc) { - unknownEncodingData = info.data; - unknownEncodingRelease = info.release; - encoding = enc; + parser->m_unknownEncodingData = info.data; + parser->m_unknownEncodingRelease = info.release; + parser->m_encoding = enc; return XML_ERROR_NONE; } } - if (info.release) + if (info.release != NULL) info.release(info.data); } return XML_ERROR_UNKNOWN_ENCODING; } -static enum XML_Error -prologInitProcessor(XML_Parser parser, - const char *s, - const char *end, - const char **nextPtr) -{ +static enum XML_Error PTRCALL +prologInitProcessor(XML_Parser parser, const char *s, const char *end, + const char **nextPtr) { enum XML_Error result = initializeEncoding(parser); if (result != XML_ERROR_NONE) return result; - processor = prologProcessor; + parser->m_processor = prologProcessor; return prologProcessor(parser, s, end, nextPtr); } -static enum XML_Error -prologProcessor(XML_Parser parser, - const char *s, - const char *end, - const char **nextPtr) -{ - const char *next; - int tok = XmlPrologTok(encoding, s, end, &next); - return doProlog(parser, encoding, s, end, tok, next, nextPtr); +#ifdef XML_DTD + +static enum XML_Error PTRCALL +externalParEntInitProcessor(XML_Parser parser, const char *s, const char *end, + const char **nextPtr) { + enum XML_Error result = initializeEncoding(parser); + if (result != XML_ERROR_NONE) + return result; + + /* we know now that XML_Parse(Buffer) has been called, + so we consider the external parameter entity read */ + parser->m_dtd->paramEntityRead = XML_TRUE; + + if (parser->m_prologState.inEntityValue) { + parser->m_processor = entityValueInitProcessor; + return entityValueInitProcessor(parser, s, end, nextPtr); + } else { + parser->m_processor = externalParEntProcessor; + return externalParEntProcessor(parser, s, end, nextPtr); + } +} + +static enum XML_Error PTRCALL +entityValueInitProcessor(XML_Parser parser, const char *s, const char *end, + const char **nextPtr) { + int tok; + const char *start = s; + const char *next = start; + parser->m_eventPtr = start; + + for (;;) { + tok = XmlPrologTok(parser->m_encoding, start, end, &next); + /* Note: Except for XML_TOK_BOM below, these bytes are accounted later in: + - storeEntityValue + - processXmlDecl + */ + parser->m_eventEndPtr = next; + if (tok <= 0) { + if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) { + *nextPtr = s; + return XML_ERROR_NONE; + } + switch (tok) { + case XML_TOK_INVALID: + return XML_ERROR_INVALID_TOKEN; + case XML_TOK_PARTIAL: + return XML_ERROR_UNCLOSED_TOKEN; + case XML_TOK_PARTIAL_CHAR: + return XML_ERROR_PARTIAL_CHAR; + case XML_TOK_NONE: /* start == end */ + default: + break; + } + /* found end of entity value - can store it now */ + return storeEntityValue(parser, parser->m_encoding, s, end, + XML_ACCOUNT_DIRECT); + } else if (tok == XML_TOK_XML_DECL) { + enum XML_Error result; + result = processXmlDecl(parser, 0, start, next); + if (result != XML_ERROR_NONE) + return result; + /* At this point, m_parsingStatus.parsing cannot be XML_SUSPENDED. For + * that to happen, a parameter entity parsing handler must have attempted + * to suspend the parser, which fails and raises an error. The parser can + * be aborted, but can't be suspended. + */ + if (parser->m_parsingStatus.parsing == XML_FINISHED) + return XML_ERROR_ABORTED; + *nextPtr = next; + /* stop scanning for text declaration - we found one */ + parser->m_processor = entityValueProcessor; + return entityValueProcessor(parser, next, end, nextPtr); + } + /* If we are at the end of the buffer, this would cause XmlPrologTok to + return XML_TOK_NONE on the next call, which would then cause the + function to exit with *nextPtr set to s - that is what we want for other + tokens, but not for the BOM - we would rather like to skip it; + then, when this routine is entered the next time, XmlPrologTok will + return XML_TOK_INVALID, since the BOM is still in the buffer + */ + else if (tok == XML_TOK_BOM && next == end + && ! parser->m_parsingStatus.finalBuffer) { +# ifdef XML_DTD + if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, + XML_ACCOUNT_DIRECT)) { + accountingOnAbort(parser); + return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; + } +# endif + + *nextPtr = next; + return XML_ERROR_NONE; + } + /* If we get this token, we have the start of what might be a + normal tag, but not a declaration (i.e. it doesn't begin with + "m_eventPtr = start; + } +} + +static enum XML_Error PTRCALL +externalParEntProcessor(XML_Parser parser, const char *s, const char *end, + const char **nextPtr) { + const char *next = s; + int tok; + + tok = XmlPrologTok(parser->m_encoding, s, end, &next); + if (tok <= 0) { + if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) { + *nextPtr = s; + return XML_ERROR_NONE; + } + switch (tok) { + case XML_TOK_INVALID: + return XML_ERROR_INVALID_TOKEN; + case XML_TOK_PARTIAL: + return XML_ERROR_UNCLOSED_TOKEN; + case XML_TOK_PARTIAL_CHAR: + return XML_ERROR_PARTIAL_CHAR; + case XML_TOK_NONE: /* start == end */ + default: + break; + } + } + /* This would cause the next stage, i.e. doProlog to be passed XML_TOK_BOM. + However, when parsing an external subset, doProlog will not accept a BOM + as valid, and report a syntax error, so we have to skip the BOM, and + account for the BOM bytes. + */ + else if (tok == XML_TOK_BOM) { + if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, + XML_ACCOUNT_DIRECT)) { + accountingOnAbort(parser); + return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; + } + + s = next; + tok = XmlPrologTok(parser->m_encoding, s, end, &next); + } + + parser->m_processor = prologProcessor; + return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr, + (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE, + XML_ACCOUNT_DIRECT); +} + +static enum XML_Error PTRCALL +entityValueProcessor(XML_Parser parser, const char *s, const char *end, + const char **nextPtr) { + const char *start = s; + const char *next = s; + const ENCODING *enc = parser->m_encoding; + int tok; + + for (;;) { + tok = XmlPrologTok(enc, start, end, &next); + /* Note: These bytes are accounted later in: + - storeEntityValue + */ + if (tok <= 0) { + if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) { + *nextPtr = s; + return XML_ERROR_NONE; + } + switch (tok) { + case XML_TOK_INVALID: + return XML_ERROR_INVALID_TOKEN; + case XML_TOK_PARTIAL: + return XML_ERROR_UNCLOSED_TOKEN; + case XML_TOK_PARTIAL_CHAR: + return XML_ERROR_PARTIAL_CHAR; + case XML_TOK_NONE: /* start == end */ + default: + break; + } + /* found end of entity value - can store it now */ + return storeEntityValue(parser, enc, s, end, XML_ACCOUNT_DIRECT); + } + start = next; + } +} + +#endif /* XML_DTD */ + +static enum XML_Error PTRCALL +prologProcessor(XML_Parser parser, const char *s, const char *end, + const char **nextPtr) { + const char *next = s; + int tok = XmlPrologTok(parser->m_encoding, s, end, &next); + return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr, + (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE, + XML_ACCOUNT_DIRECT); } static enum XML_Error -doProlog(XML_Parser parser, - const ENCODING *enc, - const char *s, - const char *end, - int tok, - const char *next, - const char **nextPtr) -{ +doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, + int tok, const char *next, const char **nextPtr, XML_Bool haveMore, + XML_Bool allowClosingDoctype, enum XML_Account account) { #ifdef XML_DTD - static const XML_Char externalSubsetName[] = { '#' , '\0' }; + static const XML_Char externalSubsetName[] = {ASCII_HASH, '\0'}; #endif /* XML_DTD */ + static const XML_Char atypeCDATA[] + = {ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0'}; + static const XML_Char atypeID[] = {ASCII_I, ASCII_D, '\0'}; + static const XML_Char atypeIDREF[] + = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0'}; + static const XML_Char atypeIDREFS[] + = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0'}; + static const XML_Char atypeENTITY[] + = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0'}; + static const XML_Char atypeENTITIES[] + = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, + ASCII_I, ASCII_E, ASCII_S, '\0'}; + static const XML_Char atypeNMTOKEN[] + = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0'}; + static const XML_Char atypeNMTOKENS[] + = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, + ASCII_E, ASCII_N, ASCII_S, '\0'}; + static const XML_Char notationPrefix[] + = {ASCII_N, ASCII_O, ASCII_T, ASCII_A, ASCII_T, + ASCII_I, ASCII_O, ASCII_N, ASCII_LPAREN, '\0'}; + static const XML_Char enumValueSep[] = {ASCII_PIPE, '\0'}; + static const XML_Char enumValueStart[] = {ASCII_LPAREN, '\0'}; + +#ifndef XML_DTD + UNUSED_P(account); +#endif + + /* save one level of indirection */ + DTD *const dtd = parser->m_dtd; const char **eventPP; const char **eventEndPP; enum XML_Content_Quant quant; - if (enc == encoding) { - eventPP = &eventPtr; - eventEndPP = &eventEndPtr; - } - else { - eventPP = &(openInternalEntities->internalEventPtr); - eventEndPP = &(openInternalEntities->internalEventEndPtr); + if (enc == parser->m_encoding) { + eventPP = &parser->m_eventPtr; + eventEndPP = &parser->m_eventEndPtr; + } else { + eventPP = &(parser->m_openInternalEntities->internalEventPtr); + eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr); } + for (;;) { int role; + XML_Bool handleDefault = XML_TRUE; *eventPP = s; *eventEndPP = next; if (tok <= 0) { - if (nextPtr != 0 && tok != XML_TOK_INVALID) { + if (haveMore && tok != XML_TOK_INVALID) { *nextPtr = s; return XML_ERROR_NONE; } @@ -2547,15 +4404,26 @@ doProlog(XML_Parser parser, return XML_ERROR_UNCLOSED_TOKEN; case XML_TOK_PARTIAL_CHAR: return XML_ERROR_PARTIAL_CHAR; + case -XML_TOK_PROLOG_S: + tok = -tok; + break; case XML_TOK_NONE: #ifdef XML_DTD - if (enc != encoding) + /* for internal PE NOT referenced between declarations */ + if (enc != parser->m_encoding + && ! parser->m_openInternalEntities->betweenDecl) { + *nextPtr = s; return XML_ERROR_NONE; - if (parentParser) { - if (XmlTokenRole(&prologState, XML_TOK_NONE, end, end, enc) + } + /* WFC: PE Between Declarations - must check that PE contains + complete markup, not only for external PEs, but also for + internal PEs if the reference occurs between declarations. + */ + if (parser->m_isParamEntity || enc != parser->m_encoding) { + if (XmlTokenRole(&parser->m_prologState, XML_TOK_NONE, end, end, enc) == XML_ROLE_ERROR) - return XML_ERROR_SYNTAX; - hadExternalDoctype = 0; + return XML_ERROR_INCOMPLETE_PE; + *nextPtr = s; return XML_ERROR_NONE; } #endif /* XML_DTD */ @@ -2566,440 +4434,565 @@ doProlog(XML_Parser parser, break; } } - role = XmlTokenRole(&prologState, tok, s, next, enc); + role = XmlTokenRole(&parser->m_prologState, tok, s, next, enc); +#ifdef XML_DTD switch (role) { - case XML_ROLE_XML_DECL: - { - enum XML_Error result = processXmlDecl(parser, 0, s, next); - if (result != XML_ERROR_NONE) - return result; - enc = encoding; - } + case XML_ROLE_INSTANCE_START: // bytes accounted in contentProcessor + case XML_ROLE_XML_DECL: // bytes accounted in processXmlDecl + case XML_ROLE_TEXT_DECL: // bytes accounted in processXmlDecl break; + default: + if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, account)) { + accountingOnAbort(parser); + return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; + } + } +#endif + switch (role) { + case XML_ROLE_XML_DECL: { + enum XML_Error result = processXmlDecl(parser, 0, s, next); + if (result != XML_ERROR_NONE) + return result; + enc = parser->m_encoding; + handleDefault = XML_FALSE; + } break; case XML_ROLE_DOCTYPE_NAME: - if (startDoctypeDeclHandler) { - doctypeName = poolStoreString(&tempPool, enc, s, next); - if (! doctypeName) + if (parser->m_startDoctypeDeclHandler) { + parser->m_doctypeName + = poolStoreString(&parser->m_tempPool, enc, s, next); + if (! parser->m_doctypeName) return XML_ERROR_NO_MEMORY; - poolFinish(&tempPool); - doctypeSysid = 0; - doctypePubid = 0; + poolFinish(&parser->m_tempPool); + parser->m_doctypePubid = NULL; + handleDefault = XML_FALSE; } + parser->m_doctypeSysid = NULL; /* always initialize to NULL */ break; case XML_ROLE_DOCTYPE_INTERNAL_SUBSET: - if (startDoctypeDeclHandler) { - startDoctypeDeclHandler(handlerArg, doctypeName, doctypeSysid, - doctypePubid, 1); - doctypeName = 0; - poolClear(&tempPool); + if (parser->m_startDoctypeDeclHandler) { + parser->m_startDoctypeDeclHandler( + parser->m_handlerArg, parser->m_doctypeName, parser->m_doctypeSysid, + parser->m_doctypePubid, 1); + parser->m_doctypeName = NULL; + poolClear(&parser->m_tempPool); + handleDefault = XML_FALSE; } break; #ifdef XML_DTD - case XML_ROLE_TEXT_DECL: - { - enum XML_Error result = processXmlDecl(parser, 1, s, next); - if (result != XML_ERROR_NONE) - return result; - enc = encoding; - } - break; + case XML_ROLE_TEXT_DECL: { + enum XML_Error result = processXmlDecl(parser, 1, s, next); + if (result != XML_ERROR_NONE) + return result; + enc = parser->m_encoding; + handleDefault = XML_FALSE; + } break; #endif /* XML_DTD */ case XML_ROLE_DOCTYPE_PUBLIC_ID: - if (startDoctypeDeclHandler) { - doctypePubid = poolStoreString(&tempPool, enc, s + 1, next - 1); - if (! doctypePubid) - return XML_ERROR_NO_MEMORY; - poolFinish(&tempPool); - } #ifdef XML_DTD - declEntity = (ENTITY *)lookup(&dtd.paramEntities, - externalSubsetName, - sizeof(ENTITY)); - if (!declEntity) + parser->m_useForeignDTD = XML_FALSE; + parser->m_declEntity = (ENTITY *)lookup( + parser, &dtd->paramEntities, externalSubsetName, sizeof(ENTITY)); + if (! parser->m_declEntity) return XML_ERROR_NO_MEMORY; #endif /* XML_DTD */ + dtd->hasParamEntityRefs = XML_TRUE; + if (parser->m_startDoctypeDeclHandler) { + XML_Char *pubId; + if (! XmlIsPublicId(enc, s, next, eventPP)) + return XML_ERROR_PUBLICID; + pubId = poolStoreString(&parser->m_tempPool, enc, + s + enc->minBytesPerChar, + next - enc->minBytesPerChar); + if (! pubId) + return XML_ERROR_NO_MEMORY; + normalizePublicId(pubId); + poolFinish(&parser->m_tempPool); + parser->m_doctypePubid = pubId; + handleDefault = XML_FALSE; + goto alreadyChecked; + } /* fall through */ case XML_ROLE_ENTITY_PUBLIC_ID: - if (!XmlIsPublicId(enc, s, next, eventPP)) - return XML_ERROR_SYNTAX; - if (declEntity) { - XML_Char *tem = poolStoreString(&dtd.pool, - enc, - s + enc->minBytesPerChar, - next - enc->minBytesPerChar); - if (!tem) + if (! XmlIsPublicId(enc, s, next, eventPP)) + return XML_ERROR_PUBLICID; + alreadyChecked: + if (dtd->keepProcessing && parser->m_declEntity) { + XML_Char *tem + = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar, + next - enc->minBytesPerChar); + if (! tem) return XML_ERROR_NO_MEMORY; normalizePublicId(tem); - declEntity->publicId = tem; - poolFinish(&dtd.pool); + parser->m_declEntity->publicId = tem; + poolFinish(&dtd->pool); + /* Don't suppress the default handler if we fell through from + * the XML_ROLE_DOCTYPE_PUBLIC_ID case. + */ + if (parser->m_entityDeclHandler && role == XML_ROLE_ENTITY_PUBLIC_ID) + handleDefault = XML_FALSE; } break; case XML_ROLE_DOCTYPE_CLOSE: - if (doctypeName) { - startDoctypeDeclHandler(handlerArg, doctypeName, - doctypeSysid, doctypePubid, 0); - poolClear(&tempPool); + if (allowClosingDoctype != XML_TRUE) { + /* Must not close doctype from within expanded parameter entities */ + return XML_ERROR_INVALID_TOKEN; + } + + if (parser->m_doctypeName) { + parser->m_startDoctypeDeclHandler( + parser->m_handlerArg, parser->m_doctypeName, parser->m_doctypeSysid, + parser->m_doctypePubid, 0); + poolClear(&parser->m_tempPool); + handleDefault = XML_FALSE; } - if (dtd.complete && hadExternalDoctype) { - dtd.complete = 0; + /* parser->m_doctypeSysid will be non-NULL in the case of a previous + XML_ROLE_DOCTYPE_SYSTEM_ID, even if parser->m_startDoctypeDeclHandler + was not set, indicating an external subset + */ #ifdef XML_DTD - if (paramEntityParsing && externalEntityRefHandler) { - ENTITY *entity = (ENTITY *)lookup(&dtd.paramEntities, - externalSubsetName, - 0); - if (!externalEntityRefHandler(externalEntityRefHandlerArg, - 0, - entity->base, - entity->systemId, - entity->publicId)) - return XML_ERROR_EXTERNAL_ENTITY_HANDLING; + if (parser->m_doctypeSysid || parser->m_useForeignDTD) { + XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs; + dtd->hasParamEntityRefs = XML_TRUE; + if (parser->m_paramEntityParsing + && parser->m_externalEntityRefHandler) { + ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities, + externalSubsetName, sizeof(ENTITY)); + if (! entity) { + /* The external subset name "#" will have already been + * inserted into the hash table at the start of the + * external entity parsing, so no allocation will happen + * and lookup() cannot fail. + */ + return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */ + } + if (parser->m_useForeignDTD) + entity->base = parser->m_curBase; + dtd->paramEntityRead = XML_FALSE; + if (! parser->m_externalEntityRefHandler( + parser->m_externalEntityRefHandlerArg, 0, entity->base, + entity->systemId, entity->publicId)) + return XML_ERROR_EXTERNAL_ENTITY_HANDLING; + if (dtd->paramEntityRead) { + if (! dtd->standalone && parser->m_notStandaloneHandler + && ! parser->m_notStandaloneHandler(parser->m_handlerArg)) + return XML_ERROR_NOT_STANDALONE; + } + /* if we didn't read the foreign DTD then this means that there + is no external subset and we must reset dtd->hasParamEntityRefs + */ + else if (! parser->m_doctypeSysid) + dtd->hasParamEntityRefs = hadParamEntityRefs; + /* end of DTD - no need to update dtd->keepProcessing */ } + parser->m_useForeignDTD = XML_FALSE; + } #endif /* XML_DTD */ - if (!dtd.complete - && !dtd.standalone - && notStandaloneHandler - && !notStandaloneHandler(handlerArg)) - return XML_ERROR_NOT_STANDALONE; + if (parser->m_endDoctypeDeclHandler) { + parser->m_endDoctypeDeclHandler(parser->m_handlerArg); + handleDefault = XML_FALSE; } - if (endDoctypeDeclHandler) - endDoctypeDeclHandler(handlerArg); break; case XML_ROLE_INSTANCE_START: - processor = contentProcessor; +#ifdef XML_DTD + /* if there is no DOCTYPE declaration then now is the + last chance to read the foreign DTD + */ + if (parser->m_useForeignDTD) { + XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs; + dtd->hasParamEntityRefs = XML_TRUE; + if (parser->m_paramEntityParsing + && parser->m_externalEntityRefHandler) { + ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities, + externalSubsetName, sizeof(ENTITY)); + if (! entity) + return XML_ERROR_NO_MEMORY; + entity->base = parser->m_curBase; + dtd->paramEntityRead = XML_FALSE; + if (! parser->m_externalEntityRefHandler( + parser->m_externalEntityRefHandlerArg, 0, entity->base, + entity->systemId, entity->publicId)) + return XML_ERROR_EXTERNAL_ENTITY_HANDLING; + if (dtd->paramEntityRead) { + if (! dtd->standalone && parser->m_notStandaloneHandler + && ! parser->m_notStandaloneHandler(parser->m_handlerArg)) + return XML_ERROR_NOT_STANDALONE; + } + /* if we didn't read the foreign DTD then this means that there + is no external subset and we must reset dtd->hasParamEntityRefs + */ + else + dtd->hasParamEntityRefs = hadParamEntityRefs; + /* end of DTD - no need to update dtd->keepProcessing */ + } + } +#endif /* XML_DTD */ + parser->m_processor = contentProcessor; return contentProcessor(parser, s, end, nextPtr); case XML_ROLE_ATTLIST_ELEMENT_NAME: - declElementType = getElementType(parser, enc, s, next); - if (!declElementType) + parser->m_declElementType = getElementType(parser, enc, s, next); + if (! parser->m_declElementType) return XML_ERROR_NO_MEMORY; - break; + goto checkAttListDeclHandler; case XML_ROLE_ATTRIBUTE_NAME: - declAttributeId = getAttributeId(parser, enc, s, next); - if (!declAttributeId) + parser->m_declAttributeId = getAttributeId(parser, enc, s, next); + if (! parser->m_declAttributeId) return XML_ERROR_NO_MEMORY; - declAttributeIsCdata = 0; - declAttributeType = 0; - declAttributeIsId = 0; - break; + parser->m_declAttributeIsCdata = XML_FALSE; + parser->m_declAttributeType = NULL; + parser->m_declAttributeIsId = XML_FALSE; + goto checkAttListDeclHandler; case XML_ROLE_ATTRIBUTE_TYPE_CDATA: - declAttributeIsCdata = 1; - declAttributeType = "CDATA"; - break; + parser->m_declAttributeIsCdata = XML_TRUE; + parser->m_declAttributeType = atypeCDATA; + goto checkAttListDeclHandler; case XML_ROLE_ATTRIBUTE_TYPE_ID: - declAttributeIsId = 1; - declAttributeType = "ID"; - break; + parser->m_declAttributeIsId = XML_TRUE; + parser->m_declAttributeType = atypeID; + goto checkAttListDeclHandler; case XML_ROLE_ATTRIBUTE_TYPE_IDREF: - declAttributeType = "IDREF"; - break; + parser->m_declAttributeType = atypeIDREF; + goto checkAttListDeclHandler; case XML_ROLE_ATTRIBUTE_TYPE_IDREFS: - declAttributeType = "IDREFS"; - break; + parser->m_declAttributeType = atypeIDREFS; + goto checkAttListDeclHandler; case XML_ROLE_ATTRIBUTE_TYPE_ENTITY: - declAttributeType = "ENTITY"; - break; + parser->m_declAttributeType = atypeENTITY; + goto checkAttListDeclHandler; case XML_ROLE_ATTRIBUTE_TYPE_ENTITIES: - declAttributeType = "ENTITIES"; - break; + parser->m_declAttributeType = atypeENTITIES; + goto checkAttListDeclHandler; case XML_ROLE_ATTRIBUTE_TYPE_NMTOKEN: - declAttributeType = "NMTOKEN"; - break; + parser->m_declAttributeType = atypeNMTOKEN; + goto checkAttListDeclHandler; case XML_ROLE_ATTRIBUTE_TYPE_NMTOKENS: - declAttributeType = "NMTOKENS"; + parser->m_declAttributeType = atypeNMTOKENS; + checkAttListDeclHandler: + if (dtd->keepProcessing && parser->m_attlistDeclHandler) + handleDefault = XML_FALSE; break; - case XML_ROLE_ATTRIBUTE_ENUM_VALUE: case XML_ROLE_ATTRIBUTE_NOTATION_VALUE: - if (attlistDeclHandler) - { - const char *prefix; - if (declAttributeType) { - prefix = "|"; + if (dtd->keepProcessing && parser->m_attlistDeclHandler) { + const XML_Char *prefix; + if (parser->m_declAttributeType) { + prefix = enumValueSep; + } else { + prefix = (role == XML_ROLE_ATTRIBUTE_NOTATION_VALUE ? notationPrefix + : enumValueStart); } - else { - prefix = (role == XML_ROLE_ATTRIBUTE_NOTATION_VALUE - ? "NOTATION(" - : "("); - } - if (! poolAppendString(&tempPool, prefix)) + if (! poolAppendString(&parser->m_tempPool, prefix)) return XML_ERROR_NO_MEMORY; - if (! poolAppend(&tempPool, enc, s, next)) + if (! poolAppend(&parser->m_tempPool, enc, s, next)) return XML_ERROR_NO_MEMORY; - declAttributeType = tempPool.start; + parser->m_declAttributeType = parser->m_tempPool.start; + handleDefault = XML_FALSE; } break; case XML_ROLE_IMPLIED_ATTRIBUTE_VALUE: case XML_ROLE_REQUIRED_ATTRIBUTE_VALUE: - if (dtd.complete - && !defineAttribute(declElementType, declAttributeId, - declAttributeIsCdata, declAttributeIsId, 0, - parser)) - return XML_ERROR_NO_MEMORY; - if (attlistDeclHandler && declAttributeType) { - if (*declAttributeType == '(' - || (*declAttributeType == 'N' && declAttributeType[1] == 'O')) { - /* Enumerated or Notation type */ - if (! poolAppendChar(&tempPool, ')') - || ! poolAppendChar(&tempPool, '\0')) - return XML_ERROR_NO_MEMORY; - declAttributeType = tempPool.start; - poolFinish(&tempPool); + if (dtd->keepProcessing) { + if (! defineAttribute(parser->m_declElementType, + parser->m_declAttributeId, + parser->m_declAttributeIsCdata, + parser->m_declAttributeIsId, 0, parser)) + return XML_ERROR_NO_MEMORY; + if (parser->m_attlistDeclHandler && parser->m_declAttributeType) { + if (*parser->m_declAttributeType == XML_T(ASCII_LPAREN) + || (*parser->m_declAttributeType == XML_T(ASCII_N) + && parser->m_declAttributeType[1] == XML_T(ASCII_O))) { + /* Enumerated or Notation type */ + if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_RPAREN)) + || ! poolAppendChar(&parser->m_tempPool, XML_T('\0'))) + return XML_ERROR_NO_MEMORY; + parser->m_declAttributeType = parser->m_tempPool.start; + poolFinish(&parser->m_tempPool); + } + *eventEndPP = s; + parser->m_attlistDeclHandler( + parser->m_handlerArg, parser->m_declElementType->name, + parser->m_declAttributeId->name, parser->m_declAttributeType, 0, + role == XML_ROLE_REQUIRED_ATTRIBUTE_VALUE); + poolClear(&parser->m_tempPool); + handleDefault = XML_FALSE; } - *eventEndPP = s; - attlistDeclHandler(handlerArg, declElementType->name, - declAttributeId->name, declAttributeType, - 0, role == XML_ROLE_REQUIRED_ATTRIBUTE_VALUE); - poolClear(&tempPool); } break; case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE: case XML_ROLE_FIXED_ATTRIBUTE_VALUE: - { + if (dtd->keepProcessing) { const XML_Char *attVal; - enum XML_Error result - = storeAttributeValue(parser, enc, declAttributeIsCdata, - s + enc->minBytesPerChar, - next - enc->minBytesPerChar, - &dtd.pool); + enum XML_Error result = storeAttributeValue( + parser, enc, parser->m_declAttributeIsCdata, + s + enc->minBytesPerChar, next - enc->minBytesPerChar, &dtd->pool, + XML_ACCOUNT_NONE); if (result) return result; - attVal = poolStart(&dtd.pool); - poolFinish(&dtd.pool); - if (dtd.complete - /* ID attributes aren't allowed to have a default */ - && !defineAttribute(declElementType, declAttributeId, declAttributeIsCdata, 0, attVal, parser)) + attVal = poolStart(&dtd->pool); + poolFinish(&dtd->pool); + /* ID attributes aren't allowed to have a default */ + if (! defineAttribute( + parser->m_declElementType, parser->m_declAttributeId, + parser->m_declAttributeIsCdata, XML_FALSE, attVal, parser)) return XML_ERROR_NO_MEMORY; - if (attlistDeclHandler && declAttributeType) { - if (*declAttributeType == '(' - || (*declAttributeType == 'N' && declAttributeType[1] == 'O')) { + if (parser->m_attlistDeclHandler && parser->m_declAttributeType) { + if (*parser->m_declAttributeType == XML_T(ASCII_LPAREN) + || (*parser->m_declAttributeType == XML_T(ASCII_N) + && parser->m_declAttributeType[1] == XML_T(ASCII_O))) { /* Enumerated or Notation type */ - if (! poolAppendChar(&tempPool, ')') - || ! poolAppendChar(&tempPool, '\0')) + if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_RPAREN)) + || ! poolAppendChar(&parser->m_tempPool, XML_T('\0'))) return XML_ERROR_NO_MEMORY; - declAttributeType = tempPool.start; - poolFinish(&tempPool); + parser->m_declAttributeType = parser->m_tempPool.start; + poolFinish(&parser->m_tempPool); } *eventEndPP = s; - attlistDeclHandler(handlerArg, declElementType->name, - declAttributeId->name, declAttributeType, - attVal, - role == XML_ROLE_FIXED_ATTRIBUTE_VALUE); - poolClear(&tempPool); + parser->m_attlistDeclHandler( + parser->m_handlerArg, parser->m_declElementType->name, + parser->m_declAttributeId->name, parser->m_declAttributeType, + attVal, role == XML_ROLE_FIXED_ATTRIBUTE_VALUE); + poolClear(&parser->m_tempPool); + handleDefault = XML_FALSE; } - break; } + break; case XML_ROLE_ENTITY_VALUE: - { - enum XML_Error result = storeEntityValue(parser, enc, - s + enc->minBytesPerChar, - next - enc->minBytesPerChar); - if (declEntity) { - declEntity->textPtr = poolStart(&dtd.pool); - declEntity->textLen = poolLength(&dtd.pool); - poolFinish(&dtd.pool); - if (entityDeclHandler) { + if (dtd->keepProcessing) { + enum XML_Error result + = storeEntityValue(parser, enc, s + enc->minBytesPerChar, + next - enc->minBytesPerChar, XML_ACCOUNT_NONE); + if (parser->m_declEntity) { + parser->m_declEntity->textPtr = poolStart(&dtd->entityValuePool); + parser->m_declEntity->textLen + = (int)(poolLength(&dtd->entityValuePool)); + poolFinish(&dtd->entityValuePool); + if (parser->m_entityDeclHandler) { *eventEndPP = s; - entityDeclHandler(handlerArg, - declEntity->name, - declEntity->is_param, - declEntity->textPtr, - declEntity->textLen, - curBase, 0, 0, 0); + parser->m_entityDeclHandler( + parser->m_handlerArg, parser->m_declEntity->name, + parser->m_declEntity->is_param, parser->m_declEntity->textPtr, + parser->m_declEntity->textLen, parser->m_curBase, 0, 0, 0); + handleDefault = XML_FALSE; } - } - else - poolDiscard(&dtd.pool); + } else + poolDiscard(&dtd->entityValuePool); if (result != XML_ERROR_NONE) return result; } break; case XML_ROLE_DOCTYPE_SYSTEM_ID: - if (startDoctypeDeclHandler) { - doctypeSysid = poolStoreString(&tempPool, enc, s + 1, next - 1); - if (! doctypeSysid) +#ifdef XML_DTD + parser->m_useForeignDTD = XML_FALSE; +#endif /* XML_DTD */ + dtd->hasParamEntityRefs = XML_TRUE; + if (parser->m_startDoctypeDeclHandler) { + parser->m_doctypeSysid = poolStoreString(&parser->m_tempPool, enc, + s + enc->minBytesPerChar, + next - enc->minBytesPerChar); + if (parser->m_doctypeSysid == NULL) return XML_ERROR_NO_MEMORY; - poolFinish(&tempPool); + poolFinish(&parser->m_tempPool); + handleDefault = XML_FALSE; } - if (!dtd.standalone #ifdef XML_DTD - && !paramEntityParsing + else + /* use externalSubsetName to make parser->m_doctypeSysid non-NULL + for the case where no parser->m_startDoctypeDeclHandler is set */ + parser->m_doctypeSysid = externalSubsetName; +#endif /* XML_DTD */ + if (! dtd->standalone +#ifdef XML_DTD + && ! parser->m_paramEntityParsing #endif /* XML_DTD */ - && notStandaloneHandler - && !notStandaloneHandler(handlerArg)) + && parser->m_notStandaloneHandler + && ! parser->m_notStandaloneHandler(parser->m_handlerArg)) return XML_ERROR_NOT_STANDALONE; - hadExternalDoctype = 1; #ifndef XML_DTD break; -#else /* XML_DTD */ - if (!declEntity) { - declEntity = (ENTITY *)lookup(&dtd.paramEntities, - externalSubsetName, - sizeof(ENTITY)); - declEntity->publicId = 0; - if (!declEntity) +#else /* XML_DTD */ + if (! parser->m_declEntity) { + parser->m_declEntity = (ENTITY *)lookup( + parser, &dtd->paramEntities, externalSubsetName, sizeof(ENTITY)); + if (! parser->m_declEntity) return XML_ERROR_NO_MEMORY; + parser->m_declEntity->publicId = NULL; } - /* fall through */ #endif /* XML_DTD */ + /* fall through */ case XML_ROLE_ENTITY_SYSTEM_ID: - if (declEntity) { - declEntity->systemId = poolStoreString(&dtd.pool, enc, - s + enc->minBytesPerChar, - next - enc->minBytesPerChar); - if (!declEntity->systemId) + if (dtd->keepProcessing && parser->m_declEntity) { + parser->m_declEntity->systemId + = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar, + next - enc->minBytesPerChar); + if (! parser->m_declEntity->systemId) return XML_ERROR_NO_MEMORY; - declEntity->base = curBase; - poolFinish(&dtd.pool); + parser->m_declEntity->base = parser->m_curBase; + poolFinish(&dtd->pool); + /* Don't suppress the default handler if we fell through from + * the XML_ROLE_DOCTYPE_SYSTEM_ID case. + */ + if (parser->m_entityDeclHandler && role == XML_ROLE_ENTITY_SYSTEM_ID) + handleDefault = XML_FALSE; } break; case XML_ROLE_ENTITY_COMPLETE: - if (declEntity && entityDeclHandler) { + if (dtd->keepProcessing && parser->m_declEntity + && parser->m_entityDeclHandler) { *eventEndPP = s; - entityDeclHandler(handlerArg, - declEntity->name, - 0,0,0, - declEntity->base, - declEntity->systemId, - declEntity->publicId, - 0); + parser->m_entityDeclHandler( + parser->m_handlerArg, parser->m_declEntity->name, + parser->m_declEntity->is_param, 0, 0, parser->m_declEntity->base, + parser->m_declEntity->systemId, parser->m_declEntity->publicId, 0); + handleDefault = XML_FALSE; } break; case XML_ROLE_ENTITY_NOTATION_NAME: - if (declEntity) { - declEntity->notation = poolStoreString(&dtd.pool, enc, s, next); - if (!declEntity->notation) + if (dtd->keepProcessing && parser->m_declEntity) { + parser->m_declEntity->notation + = poolStoreString(&dtd->pool, enc, s, next); + if (! parser->m_declEntity->notation) return XML_ERROR_NO_MEMORY; - poolFinish(&dtd.pool); - if (unparsedEntityDeclHandler) { + poolFinish(&dtd->pool); + if (parser->m_unparsedEntityDeclHandler) { *eventEndPP = s; - unparsedEntityDeclHandler(handlerArg, - declEntity->name, - declEntity->base, - declEntity->systemId, - declEntity->publicId, - declEntity->notation); - } - else if (entityDeclHandler) { + parser->m_unparsedEntityDeclHandler( + parser->m_handlerArg, parser->m_declEntity->name, + parser->m_declEntity->base, parser->m_declEntity->systemId, + parser->m_declEntity->publicId, parser->m_declEntity->notation); + handleDefault = XML_FALSE; + } else if (parser->m_entityDeclHandler) { *eventEndPP = s; - entityDeclHandler(handlerArg, - declEntity->name, - 0,0,0, - declEntity->base, - declEntity->systemId, - declEntity->publicId, - declEntity->notation); + parser->m_entityDeclHandler( + parser->m_handlerArg, parser->m_declEntity->name, 0, 0, 0, + parser->m_declEntity->base, parser->m_declEntity->systemId, + parser->m_declEntity->publicId, parser->m_declEntity->notation); + handleDefault = XML_FALSE; } } break; - case XML_ROLE_GENERAL_ENTITY_NAME: - { - const XML_Char *name; - if (XmlPredefinedEntityName(enc, s, next)) { - declEntity = 0; - break; - } - name = poolStoreString(&dtd.pool, enc, s, next); - if (!name) + case XML_ROLE_GENERAL_ENTITY_NAME: { + if (XmlPredefinedEntityName(enc, s, next)) { + parser->m_declEntity = NULL; + break; + } + if (dtd->keepProcessing) { + const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next); + if (! name) return XML_ERROR_NO_MEMORY; - if (dtd.complete) { - declEntity = (ENTITY *)lookup(&dtd.generalEntities, name, sizeof(ENTITY)); - if (!declEntity) - return XML_ERROR_NO_MEMORY; - if (declEntity->name != name) { - poolDiscard(&dtd.pool); - declEntity = 0; - } - else { - poolFinish(&dtd.pool); - declEntity->publicId = 0; - declEntity->is_param = 0; - } - } - else { - poolDiscard(&dtd.pool); - declEntity = 0; + parser->m_declEntity = (ENTITY *)lookup(parser, &dtd->generalEntities, + name, sizeof(ENTITY)); + if (! parser->m_declEntity) + return XML_ERROR_NO_MEMORY; + if (parser->m_declEntity->name != name) { + poolDiscard(&dtd->pool); + parser->m_declEntity = NULL; + } else { + poolFinish(&dtd->pool); + parser->m_declEntity->publicId = NULL; + parser->m_declEntity->is_param = XML_FALSE; + /* if we have a parent parser or are reading an internal parameter + entity, then the entity declaration is not considered "internal" + */ + parser->m_declEntity->is_internal + = ! (parser->m_parentParser || parser->m_openInternalEntities); + if (parser->m_entityDeclHandler) + handleDefault = XML_FALSE; } + } else { + poolDiscard(&dtd->pool); + parser->m_declEntity = NULL; } - break; + } break; case XML_ROLE_PARAM_ENTITY_NAME: #ifdef XML_DTD - if (dtd.complete) { - const XML_Char *name = poolStoreString(&dtd.pool, enc, s, next); - if (!name) + if (dtd->keepProcessing) { + const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next); + if (! name) return XML_ERROR_NO_MEMORY; - declEntity = (ENTITY *)lookup(&dtd.paramEntities, - name, sizeof(ENTITY)); - if (!declEntity) + parser->m_declEntity = (ENTITY *)lookup(parser, &dtd->paramEntities, + name, sizeof(ENTITY)); + if (! parser->m_declEntity) return XML_ERROR_NO_MEMORY; - if (declEntity->name != name) { - poolDiscard(&dtd.pool); - declEntity = 0; - } - else { - poolFinish(&dtd.pool); - declEntity->publicId = 0; - declEntity->is_param = 1; + if (parser->m_declEntity->name != name) { + poolDiscard(&dtd->pool); + parser->m_declEntity = NULL; + } else { + poolFinish(&dtd->pool); + parser->m_declEntity->publicId = NULL; + parser->m_declEntity->is_param = XML_TRUE; + /* if we have a parent parser or are reading an internal parameter + entity, then the entity declaration is not considered "internal" + */ + parser->m_declEntity->is_internal + = ! (parser->m_parentParser || parser->m_openInternalEntities); + if (parser->m_entityDeclHandler) + handleDefault = XML_FALSE; } + } else { + poolDiscard(&dtd->pool); + parser->m_declEntity = NULL; } -#else /* not XML_DTD */ - declEntity = 0; -#endif /* not XML_DTD */ +#else /* not XML_DTD */ + parser->m_declEntity = NULL; +#endif /* XML_DTD */ break; case XML_ROLE_NOTATION_NAME: - declNotationPublicId = 0; - declNotationName = 0; - if (notationDeclHandler) { - declNotationName = poolStoreString(&tempPool, enc, s, next); - if (!declNotationName) + parser->m_declNotationPublicId = NULL; + parser->m_declNotationName = NULL; + if (parser->m_notationDeclHandler) { + parser->m_declNotationName + = poolStoreString(&parser->m_tempPool, enc, s, next); + if (! parser->m_declNotationName) return XML_ERROR_NO_MEMORY; - poolFinish(&tempPool); + poolFinish(&parser->m_tempPool); + handleDefault = XML_FALSE; } break; case XML_ROLE_NOTATION_PUBLIC_ID: - if (!XmlIsPublicId(enc, s, next, eventPP)) - return XML_ERROR_SYNTAX; - if (declNotationName) { - XML_Char *tem = poolStoreString(&tempPool, - enc, + if (! XmlIsPublicId(enc, s, next, eventPP)) + return XML_ERROR_PUBLICID; + if (parser + ->m_declNotationName) { /* means m_notationDeclHandler != NULL */ + XML_Char *tem = poolStoreString(&parser->m_tempPool, enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar); - if (!tem) + if (! tem) return XML_ERROR_NO_MEMORY; normalizePublicId(tem); - declNotationPublicId = tem; - poolFinish(&tempPool); + parser->m_declNotationPublicId = tem; + poolFinish(&parser->m_tempPool); + handleDefault = XML_FALSE; } break; case XML_ROLE_NOTATION_SYSTEM_ID: - if (declNotationName && notationDeclHandler) { - const XML_Char *systemId - = poolStoreString(&tempPool, enc, - s + enc->minBytesPerChar, - next - enc->minBytesPerChar); - if (!systemId) + if (parser->m_declNotationName && parser->m_notationDeclHandler) { + const XML_Char *systemId = poolStoreString(&parser->m_tempPool, enc, + s + enc->minBytesPerChar, + next - enc->minBytesPerChar); + if (! systemId) return XML_ERROR_NO_MEMORY; *eventEndPP = s; - notationDeclHandler(handlerArg, - declNotationName, - curBase, - systemId, - declNotationPublicId); + parser->m_notationDeclHandler( + parser->m_handlerArg, parser->m_declNotationName, parser->m_curBase, + systemId, parser->m_declNotationPublicId); + handleDefault = XML_FALSE; } - poolClear(&tempPool); + poolClear(&parser->m_tempPool); break; case XML_ROLE_NOTATION_NO_SYSTEM_ID: - if (declNotationPublicId && notationDeclHandler) { + if (parser->m_declNotationPublicId && parser->m_notationDeclHandler) { *eventEndPP = s; - notationDeclHandler(handlerArg, - declNotationName, - curBase, - 0, - declNotationPublicId); + parser->m_notationDeclHandler( + parser->m_handlerArg, parser->m_declNotationName, parser->m_curBase, + 0, parser->m_declNotationPublicId); + handleDefault = XML_FALSE; } - poolClear(&tempPool); + poolClear(&parser->m_tempPool); break; case XML_ROLE_ERROR: switch (tok) { case XML_TOK_PARAM_ENTITY_REF: + /* PE references in internal subset are + not allowed within declarations. */ return XML_ERROR_PARAM_ENTITY_REF; case XML_TOK_XML_DECL: return XML_ERROR_MISPLACED_XML_PI; @@ -3007,149 +5000,224 @@ doProlog(XML_Parser parser, return XML_ERROR_SYNTAX; } #ifdef XML_DTD - case XML_ROLE_IGNORE_SECT: - { - enum XML_Error result; - if (defaultHandler) - reportDefault(parser, enc, s, next); - result = doIgnoreSection(parser, enc, &next, end, nextPtr); - if (!next) { - processor = ignoreSectionProcessor; - return result; - } + case XML_ROLE_IGNORE_SECT: { + enum XML_Error result; + if (parser->m_defaultHandler) + reportDefault(parser, enc, s, next); + handleDefault = XML_FALSE; + result = doIgnoreSection(parser, enc, &next, end, nextPtr, haveMore); + if (result != XML_ERROR_NONE) + return result; + else if (! next) { + parser->m_processor = ignoreSectionProcessor; + return result; } - break; + } break; #endif /* XML_DTD */ case XML_ROLE_GROUP_OPEN: - if (prologState.level >= groupSize) { - if (groupSize) { - groupConnector = REALLOC(groupConnector, groupSize *= 2); - if (dtd.scaffIndex) - dtd.scaffIndex = REALLOC(dtd.scaffIndex, groupSize * sizeof(int)); + if (parser->m_prologState.level >= parser->m_groupSize) { + if (parser->m_groupSize) { + { + char *const new_connector = (char *)REALLOC( + parser, parser->m_groupConnector, parser->m_groupSize *= 2); + if (new_connector == NULL) { + parser->m_groupSize /= 2; + return XML_ERROR_NO_MEMORY; + } + parser->m_groupConnector = new_connector; + } + + if (dtd->scaffIndex) { + int *const new_scaff_index = (int *)REALLOC( + parser, dtd->scaffIndex, parser->m_groupSize * sizeof(int)); + if (new_scaff_index == NULL) + return XML_ERROR_NO_MEMORY; + dtd->scaffIndex = new_scaff_index; + } + } else { + parser->m_groupConnector + = (char *)MALLOC(parser, parser->m_groupSize = 32); + if (! parser->m_groupConnector) { + parser->m_groupSize = 0; + return XML_ERROR_NO_MEMORY; + } } - else - groupConnector = MALLOC(groupSize = 32); - if (!groupConnector) - return XML_ERROR_NO_MEMORY; } - groupConnector[prologState.level] = 0; - if (dtd.in_eldecl) { + parser->m_groupConnector[parser->m_prologState.level] = 0; + if (dtd->in_eldecl) { int myindex = nextScaffoldPart(parser); if (myindex < 0) return XML_ERROR_NO_MEMORY; - dtd.scaffIndex[dtd.scaffLevel] = myindex; - dtd.scaffLevel++; - dtd.scaffold[myindex].type = XML_CTYPE_SEQ; + assert(dtd->scaffIndex != NULL); + dtd->scaffIndex[dtd->scaffLevel] = myindex; + dtd->scaffLevel++; + dtd->scaffold[myindex].type = XML_CTYPE_SEQ; + if (parser->m_elementDeclHandler) + handleDefault = XML_FALSE; } break; case XML_ROLE_GROUP_SEQUENCE: - if (groupConnector[prologState.level] == '|') + if (parser->m_groupConnector[parser->m_prologState.level] == ASCII_PIPE) return XML_ERROR_SYNTAX; - groupConnector[prologState.level] = ','; + parser->m_groupConnector[parser->m_prologState.level] = ASCII_COMMA; + if (dtd->in_eldecl && parser->m_elementDeclHandler) + handleDefault = XML_FALSE; break; case XML_ROLE_GROUP_CHOICE: - if (groupConnector[prologState.level] == ',') + if (parser->m_groupConnector[parser->m_prologState.level] == ASCII_COMMA) return XML_ERROR_SYNTAX; - if (dtd.in_eldecl - && ! groupConnector[prologState.level] - && dtd.scaffold[dtd.scaffIndex[dtd.scaffLevel - 1]].type != XML_CTYPE_MIXED - ) { - dtd.scaffold[dtd.scaffIndex[dtd.scaffLevel - 1]].type = XML_CTYPE_CHOICE; + if (dtd->in_eldecl + && ! parser->m_groupConnector[parser->m_prologState.level] + && (dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type + != XML_CTYPE_MIXED)) { + dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type + = XML_CTYPE_CHOICE; + if (parser->m_elementDeclHandler) + handleDefault = XML_FALSE; } - groupConnector[prologState.level] = '|'; + parser->m_groupConnector[parser->m_prologState.level] = ASCII_PIPE; break; case XML_ROLE_PARAM_ENTITY_REF: #ifdef XML_DTD case XML_ROLE_INNER_PARAM_ENTITY_REF: - if (paramEntityParsing - && (dtd.complete || role == XML_ROLE_INNER_PARAM_ENTITY_REF)) { + dtd->hasParamEntityRefs = XML_TRUE; + if (! parser->m_paramEntityParsing) + dtd->keepProcessing = dtd->standalone; + else { const XML_Char *name; ENTITY *entity; - name = poolStoreString(&dtd.pool, enc, - s + enc->minBytesPerChar, - next - enc->minBytesPerChar); - if (!name) + name = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar, + next - enc->minBytesPerChar); + if (! name) return XML_ERROR_NO_MEMORY; - entity = (ENTITY *)lookup(&dtd.paramEntities, name, 0); - poolDiscard(&dtd.pool); - if (!entity) { - /* FIXME what to do if !dtd.complete? */ - return XML_ERROR_UNDEFINED_ENTITY; + entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0); + poolDiscard(&dtd->pool); + /* first, determine if a check for an existing declaration is needed; + if yes, check that the entity exists, and that it is internal, + otherwise call the skipped entity handler + */ + if (parser->m_prologState.documentEntity + && (dtd->standalone ? ! parser->m_openInternalEntities + : ! dtd->hasParamEntityRefs)) { + if (! entity) + return XML_ERROR_UNDEFINED_ENTITY; + else if (! entity->is_internal) { + /* It's hard to exhaustively search the code to be sure, + * but there doesn't seem to be a way of executing the + * following line. There are two cases: + * + * If 'standalone' is false, the DTD must have no + * parameter entities or we wouldn't have passed the outer + * 'if' statement. That measn the only entity in the hash + * table is the external subset name "#" which cannot be + * given as a parameter entity name in XML syntax, so the + * lookup must have returned NULL and we don't even reach + * the test for an internal entity. + * + * If 'standalone' is true, it does not seem to be + * possible to create entities taking this code path that + * are not internal entities, so fail the test above. + * + * Because this analysis is very uncertain, the code is + * being left in place and merely removed from the + * coverage test statistics. + */ + return XML_ERROR_ENTITY_DECLARED_IN_PE; /* LCOV_EXCL_LINE */ + } + } else if (! entity) { + dtd->keepProcessing = dtd->standalone; + /* cannot report skipped entities in declarations */ + if ((role == XML_ROLE_PARAM_ENTITY_REF) + && parser->m_skippedEntityHandler) { + parser->m_skippedEntityHandler(parser->m_handlerArg, name, 1); + handleDefault = XML_FALSE; + } + break; } if (entity->open) return XML_ERROR_RECURSIVE_ENTITY_REF; if (entity->textPtr) { enum XML_Error result; - result = processInternalParamEntity(parser, entity); + XML_Bool betweenDecl + = (role == XML_ROLE_PARAM_ENTITY_REF ? XML_TRUE : XML_FALSE); + result = processInternalEntity(parser, entity, betweenDecl); if (result != XML_ERROR_NONE) return result; + handleDefault = XML_FALSE; break; } - if (role == XML_ROLE_INNER_PARAM_ENTITY_REF) - return XML_ERROR_PARAM_ENTITY_REF; - if (externalEntityRefHandler) { - dtd.complete = 0; - entity->open = 1; - if (!externalEntityRefHandler(externalEntityRefHandlerArg, - 0, - entity->base, - entity->systemId, - entity->publicId)) { - entity->open = 0; + if (parser->m_externalEntityRefHandler) { + dtd->paramEntityRead = XML_FALSE; + entity->open = XML_TRUE; + entityTrackingOnOpen(parser, entity, __LINE__); + if (! parser->m_externalEntityRefHandler( + parser->m_externalEntityRefHandlerArg, 0, entity->base, + entity->systemId, entity->publicId)) { + entityTrackingOnClose(parser, entity, __LINE__); + entity->open = XML_FALSE; return XML_ERROR_EXTERNAL_ENTITY_HANDLING; } - entity->open = 0; - if (dtd.complete) + entityTrackingOnClose(parser, entity, __LINE__); + entity->open = XML_FALSE; + handleDefault = XML_FALSE; + if (! dtd->paramEntityRead) { + dtd->keepProcessing = dtd->standalone; break; + } + } else { + dtd->keepProcessing = dtd->standalone; + break; } } #endif /* XML_DTD */ - if (!dtd.standalone - && notStandaloneHandler - && !notStandaloneHandler(handlerArg)) + if (! dtd->standalone && parser->m_notStandaloneHandler + && ! parser->m_notStandaloneHandler(parser->m_handlerArg)) return XML_ERROR_NOT_STANDALONE; - dtd.complete = 0; - if (defaultHandler) - reportDefault(parser, enc, s, next); break; /* Element declaration stuff */ case XML_ROLE_ELEMENT_NAME: - if (elementDeclHandler) { - declElementType = getElementType(parser, enc, s, next); - if (! declElementType) + if (parser->m_elementDeclHandler) { + parser->m_declElementType = getElementType(parser, enc, s, next); + if (! parser->m_declElementType) return XML_ERROR_NO_MEMORY; - dtd.scaffLevel = 0; - dtd.scaffCount = 0; - dtd.in_eldecl = 1; + dtd->scaffLevel = 0; + dtd->scaffCount = 0; + dtd->in_eldecl = XML_TRUE; + handleDefault = XML_FALSE; } break; case XML_ROLE_CONTENT_ANY: case XML_ROLE_CONTENT_EMPTY: - if (dtd.in_eldecl) { - if (elementDeclHandler) { - XML_Content * content = (XML_Content *) MALLOC(sizeof(XML_Content)); + if (dtd->in_eldecl) { + if (parser->m_elementDeclHandler) { + XML_Content *content + = (XML_Content *)MALLOC(parser, sizeof(XML_Content)); if (! content) return XML_ERROR_NO_MEMORY; content->quant = XML_CQUANT_NONE; - content->name = 0; + content->name = NULL; content->numchildren = 0; - content->children = 0; - content->type = ((role == XML_ROLE_CONTENT_ANY) ? - XML_CTYPE_ANY : - XML_CTYPE_EMPTY); + content->children = NULL; + content->type = ((role == XML_ROLE_CONTENT_ANY) ? XML_CTYPE_ANY + : XML_CTYPE_EMPTY); *eventEndPP = s; - elementDeclHandler(handlerArg, declElementType->name, content); + parser->m_elementDeclHandler( + parser->m_handlerArg, parser->m_declElementType->name, content); + handleDefault = XML_FALSE; } - dtd.in_eldecl = 0; + dtd->in_eldecl = XML_FALSE; } break; - + case XML_ROLE_CONTENT_PCDATA: - if (dtd.in_eldecl) { - dtd.scaffold[dtd.scaffIndex[dtd.scaffLevel - 1]].type = XML_CTYPE_MIXED; + if (dtd->in_eldecl) { + dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type + = XML_CTYPE_MIXED; + if (parser->m_elementDeclHandler) + handleDefault = XML_FALSE; } break; @@ -3165,21 +5233,29 @@ doProlog(XML_Parser parser, case XML_ROLE_CONTENT_ELEMENT_PLUS: quant = XML_CQUANT_PLUS; elementContent: - if (dtd.in_eldecl) - { - ELEMENT_TYPE *el; - const char *nxt = quant == XML_CQUANT_NONE ? next : next - 1; - int myindex = nextScaffoldPart(parser); - if (myindex < 0) - return XML_ERROR_NO_MEMORY; - dtd.scaffold[myindex].type = XML_CTYPE_NAME; - dtd.scaffold[myindex].quant = quant; - el = getElementType(parser, enc, s, nxt); - if (! el) - return XML_ERROR_NO_MEMORY; - dtd.scaffold[myindex].name = el->name; - dtd.contentStringLen += nxt - s + 1; - } + if (dtd->in_eldecl) { + ELEMENT_TYPE *el; + const XML_Char *name; + int nameLen; + const char *nxt + = (quant == XML_CQUANT_NONE ? next : next - enc->minBytesPerChar); + int myindex = nextScaffoldPart(parser); + if (myindex < 0) + return XML_ERROR_NO_MEMORY; + dtd->scaffold[myindex].type = XML_CTYPE_NAME; + dtd->scaffold[myindex].quant = quant; + el = getElementType(parser, enc, s, nxt); + if (! el) + return XML_ERROR_NO_MEMORY; + name = el->name; + dtd->scaffold[myindex].name = name; + nameLen = 0; + for (; name[nameLen++];) + ; + dtd->contentStringLen += nameLen; + if (parser->m_elementDeclHandler) + handleDefault = XML_FALSE; + } break; case XML_ROLE_GROUP_CLOSE: @@ -3194,107 +5270,135 @@ doProlog(XML_Parser parser, case XML_ROLE_GROUP_CLOSE_PLUS: quant = XML_CQUANT_PLUS; closeGroup: - if (dtd.in_eldecl) { - dtd.scaffLevel--; - dtd.scaffold[dtd.scaffIndex[dtd.scaffLevel]].quant = quant; - if (dtd.scaffLevel == 0) { - if (elementDeclHandler) { + if (dtd->in_eldecl) { + if (parser->m_elementDeclHandler) + handleDefault = XML_FALSE; + dtd->scaffLevel--; + dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel]].quant = quant; + if (dtd->scaffLevel == 0) { + if (! handleDefault) { XML_Content *model = build_model(parser); if (! model) return XML_ERROR_NO_MEMORY; *eventEndPP = s; - elementDeclHandler(handlerArg, declElementType->name, model); + parser->m_elementDeclHandler( + parser->m_handlerArg, parser->m_declElementType->name, model); } - dtd.in_eldecl = 0; - dtd.contentStringLen = 0; + dtd->in_eldecl = XML_FALSE; + dtd->contentStringLen = 0; } } break; /* End element declaration stuff */ - case XML_ROLE_NONE: - switch (tok) { - case XML_TOK_PI: - if (!reportProcessingInstruction(parser, enc, s, next)) - return XML_ERROR_NO_MEMORY; - break; - case XML_TOK_COMMENT: - if (!reportComment(parser, enc, s, next)) - return XML_ERROR_NO_MEMORY; - break; - } + case XML_ROLE_PI: + if (! reportProcessingInstruction(parser, enc, s, next)) + return XML_ERROR_NO_MEMORY; + handleDefault = XML_FALSE; break; - } - if (defaultHandler) { + case XML_ROLE_COMMENT: + if (! reportComment(parser, enc, s, next)) + return XML_ERROR_NO_MEMORY; + handleDefault = XML_FALSE; + break; + case XML_ROLE_NONE: switch (tok) { - case XML_TOK_PI: - case XML_TOK_COMMENT: case XML_TOK_BOM: - case XML_TOK_XML_DECL: -#ifdef XML_DTD - case XML_TOK_IGNORE_SECT: -#endif /* XML_DTD */ - case XML_TOK_PARAM_ENTITY_REF: + handleDefault = XML_FALSE; break; - default: -#ifdef XML_DTD - if (role != XML_ROLE_IGNORE_SECT) -#endif /* XML_DTD */ - reportDefault(parser, enc, s, next); } + break; + case XML_ROLE_DOCTYPE_NONE: + if (parser->m_startDoctypeDeclHandler) + handleDefault = XML_FALSE; + break; + case XML_ROLE_ENTITY_NONE: + if (dtd->keepProcessing && parser->m_entityDeclHandler) + handleDefault = XML_FALSE; + break; + case XML_ROLE_NOTATION_NONE: + if (parser->m_notationDeclHandler) + handleDefault = XML_FALSE; + break; + case XML_ROLE_ATTLIST_NONE: + if (dtd->keepProcessing && parser->m_attlistDeclHandler) + handleDefault = XML_FALSE; + break; + case XML_ROLE_ELEMENT_NONE: + if (parser->m_elementDeclHandler) + handleDefault = XML_FALSE; + break; + } /* end of big switch */ + + if (handleDefault && parser->m_defaultHandler) + reportDefault(parser, enc, s, next); + + switch (parser->m_parsingStatus.parsing) { + case XML_SUSPENDED: + *nextPtr = next; + return XML_ERROR_NONE; + case XML_FINISHED: + return XML_ERROR_ABORTED; + default: + s = next; + tok = XmlPrologTok(enc, s, end, &next); } - s = next; - tok = XmlPrologTok(enc, s, end, &next); } /* not reached */ } -static -enum XML_Error epilogProcessor(XML_Parser parser, - const char *s, - const char *end, - const char **nextPtr) -{ - processor = epilogProcessor; - eventPtr = s; +static enum XML_Error PTRCALL +epilogProcessor(XML_Parser parser, const char *s, const char *end, + const char **nextPtr) { + parser->m_processor = epilogProcessor; + parser->m_eventPtr = s; for (;;) { - const char *next; - int tok = XmlPrologTok(encoding, s, end, &next); - eventEndPtr = next; + const char *next = NULL; + int tok = XmlPrologTok(parser->m_encoding, s, end, &next); +#ifdef XML_DTD + if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, + XML_ACCOUNT_DIRECT)) { + accountingOnAbort(parser); + return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; + } +#endif + parser->m_eventEndPtr = next; switch (tok) { + /* report partial linebreak - it might be the last token */ case -XML_TOK_PROLOG_S: - if (defaultHandler) { - eventEndPtr = end; - reportDefault(parser, encoding, s, end); + if (parser->m_defaultHandler) { + reportDefault(parser, parser->m_encoding, s, next); + if (parser->m_parsingStatus.parsing == XML_FINISHED) + return XML_ERROR_ABORTED; } - /* fall through */ + *nextPtr = next; + return XML_ERROR_NONE; case XML_TOK_NONE: - if (nextPtr) - *nextPtr = end; + *nextPtr = s; return XML_ERROR_NONE; case XML_TOK_PROLOG_S: - if (defaultHandler) - reportDefault(parser, encoding, s, next); + if (parser->m_defaultHandler) + reportDefault(parser, parser->m_encoding, s, next); break; case XML_TOK_PI: - if (!reportProcessingInstruction(parser, encoding, s, next)) + if (! reportProcessingInstruction(parser, parser->m_encoding, s, next)) return XML_ERROR_NO_MEMORY; break; case XML_TOK_COMMENT: - if (!reportComment(parser, encoding, s, next)) + if (! reportComment(parser, parser->m_encoding, s, next)) return XML_ERROR_NO_MEMORY; break; case XML_TOK_INVALID: - eventPtr = next; + parser->m_eventPtr = next; return XML_ERROR_INVALID_TOKEN; case XML_TOK_PARTIAL: - if (nextPtr) { + if (! parser->m_parsingStatus.finalBuffer) { *nextPtr = s; return XML_ERROR_NONE; } return XML_ERROR_UNCLOSED_TOKEN; case XML_TOK_PARTIAL_CHAR: - if (nextPtr) { + if (! parser->m_parsingStatus.finalBuffer) { *nextPtr = s; return XML_ERROR_NONE; } @@ -3302,110 +5406,232 @@ enum XML_Error epilogProcessor(XML_Parser parser, default: return XML_ERROR_JUNK_AFTER_DOC_ELEMENT; } - eventPtr = s = next; + parser->m_eventPtr = s = next; + switch (parser->m_parsingStatus.parsing) { + case XML_SUSPENDED: + *nextPtr = next; + return XML_ERROR_NONE; + case XML_FINISHED: + return XML_ERROR_ABORTED; + default:; + } } } -#ifdef XML_DTD - static enum XML_Error -processInternalParamEntity(XML_Parser parser, ENTITY *entity) -{ - const char *s, *end, *next; - int tok; +processInternalEntity(XML_Parser parser, ENTITY *entity, XML_Bool betweenDecl) { + const char *textStart, *textEnd; + const char *next; enum XML_Error result; - OPEN_INTERNAL_ENTITY openEntity; - entity->open = 1; - openEntity.next = openInternalEntities; - openInternalEntities = &openEntity; - openEntity.entity = entity; - openEntity.internalEventPtr = 0; - openEntity.internalEventEndPtr = 0; - s = (char *)entity->textPtr; - end = (char *)(entity->textPtr + entity->textLen); - tok = XmlPrologTok(internalEncoding, s, end, &next); - result = doProlog(parser, internalEncoding, s, end, tok, next, 0); - entity->open = 0; - openInternalEntities = openEntity.next; + OPEN_INTERNAL_ENTITY *openEntity; + + if (parser->m_freeInternalEntities) { + openEntity = parser->m_freeInternalEntities; + parser->m_freeInternalEntities = openEntity->next; + } else { + openEntity + = (OPEN_INTERNAL_ENTITY *)MALLOC(parser, sizeof(OPEN_INTERNAL_ENTITY)); + if (! openEntity) + return XML_ERROR_NO_MEMORY; + } + entity->open = XML_TRUE; +#ifdef XML_DTD + entityTrackingOnOpen(parser, entity, __LINE__); +#endif + entity->processed = 0; + openEntity->next = parser->m_openInternalEntities; + parser->m_openInternalEntities = openEntity; + openEntity->entity = entity; + openEntity->startTagLevel = parser->m_tagLevel; + openEntity->betweenDecl = betweenDecl; + openEntity->internalEventPtr = NULL; + openEntity->internalEventEndPtr = NULL; + textStart = (const char *)entity->textPtr; + textEnd = (const char *)(entity->textPtr + entity->textLen); + /* Set a safe default value in case 'next' does not get set */ + next = textStart; + +#ifdef XML_DTD + if (entity->is_param) { + int tok + = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next); + result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd, + tok, next, &next, XML_FALSE, XML_FALSE, + XML_ACCOUNT_ENTITY_EXPANSION); + } else +#endif /* XML_DTD */ + result = doContent(parser, parser->m_tagLevel, parser->m_internalEncoding, + textStart, textEnd, &next, XML_FALSE, + XML_ACCOUNT_ENTITY_EXPANSION); + + if (result == XML_ERROR_NONE) { + if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) { + entity->processed = (int)(next - textStart); + parser->m_processor = internalEntityProcessor; + } else { +#ifdef XML_DTD + entityTrackingOnClose(parser, entity, __LINE__); +#endif /* XML_DTD */ + entity->open = XML_FALSE; + parser->m_openInternalEntities = openEntity->next; + /* put openEntity back in list of free instances */ + openEntity->next = parser->m_freeInternalEntities; + parser->m_freeInternalEntities = openEntity; + } + } return result; } +static enum XML_Error PTRCALL +internalEntityProcessor(XML_Parser parser, const char *s, const char *end, + const char **nextPtr) { + ENTITY *entity; + const char *textStart, *textEnd; + const char *next; + enum XML_Error result; + OPEN_INTERNAL_ENTITY *openEntity = parser->m_openInternalEntities; + if (! openEntity) + return XML_ERROR_UNEXPECTED_STATE; + + entity = openEntity->entity; + textStart = ((const char *)entity->textPtr) + entity->processed; + textEnd = (const char *)(entity->textPtr + entity->textLen); + /* Set a safe default value in case 'next' does not get set */ + next = textStart; + +#ifdef XML_DTD + if (entity->is_param) { + int tok + = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next); + result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd, + tok, next, &next, XML_FALSE, XML_TRUE, + XML_ACCOUNT_ENTITY_EXPANSION); + } else +#endif /* XML_DTD */ + result = doContent(parser, openEntity->startTagLevel, + parser->m_internalEncoding, textStart, textEnd, &next, + XML_FALSE, XML_ACCOUNT_ENTITY_EXPANSION); + + if (result != XML_ERROR_NONE) + return result; + else if (textEnd != next + && parser->m_parsingStatus.parsing == XML_SUSPENDED) { + entity->processed = (int)(next - (const char *)entity->textPtr); + return result; + } else { +#ifdef XML_DTD + entityTrackingOnClose(parser, entity, __LINE__); +#endif + entity->open = XML_FALSE; + parser->m_openInternalEntities = openEntity->next; + /* put openEntity back in list of free instances */ + openEntity->next = parser->m_freeInternalEntities; + parser->m_freeInternalEntities = openEntity; + } + +#ifdef XML_DTD + if (entity->is_param) { + int tok; + parser->m_processor = prologProcessor; + tok = XmlPrologTok(parser->m_encoding, s, end, &next); + return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr, + (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE, + XML_ACCOUNT_DIRECT); + } else #endif /* XML_DTD */ + { + parser->m_processor = contentProcessor; + /* see externalEntityContentProcessor vs contentProcessor */ + return doContent(parser, parser->m_parentParser ? 1 : 0, parser->m_encoding, + s, end, nextPtr, + (XML_Bool)! parser->m_parsingStatus.finalBuffer, + XML_ACCOUNT_DIRECT); + } +} -static -enum XML_Error errorProcessor(XML_Parser parser, - const char *s, - const char *end, - const char **nextPtr) -{ - itkExpatUnused(s); - itkExpatUnused(end); - itkExpatUnused(nextPtr); - return errorCode; +static enum XML_Error PTRCALL +errorProcessor(XML_Parser parser, const char *s, const char *end, + const char **nextPtr) { + UNUSED_P(s); + UNUSED_P(end); + UNUSED_P(nextPtr); + return parser->m_errorCode; } static enum XML_Error -storeAttributeValue(XML_Parser parser, const ENCODING *enc, int isCdata, - const char *ptr, const char *end, - STRING_POOL *pool) -{ - enum XML_Error result = appendAttributeValue(parser, enc, isCdata, ptr, end, pool); +storeAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, + const char *ptr, const char *end, STRING_POOL *pool, + enum XML_Account account) { + enum XML_Error result + = appendAttributeValue(parser, enc, isCdata, ptr, end, pool, account); if (result) return result; - if (!isCdata && poolLength(pool) && poolLastChar(pool) == 0x20) + if (! isCdata && poolLength(pool) && poolLastChar(pool) == 0x20) poolChop(pool); - if (!poolAppendChar(pool, XML_T('\0'))) + if (! poolAppendChar(pool, XML_T('\0'))) return XML_ERROR_NO_MEMORY; return XML_ERROR_NONE; } static enum XML_Error -appendAttributeValue(XML_Parser parser, const ENCODING *enc, int isCdata, - const char *ptr, const char *end, - STRING_POOL *pool) -{ +appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, + const char *ptr, const char *end, STRING_POOL *pool, + enum XML_Account account) { + DTD *const dtd = parser->m_dtd; /* save one level of indirection */ +#ifndef XML_DTD + UNUSED_P(account); +#endif + for (;;) { - const char *next; + const char *next + = ptr; /* XmlAttributeValueTok doesn't always set the last arg */ int tok = XmlAttributeValueTok(enc, ptr, end, &next); +#ifdef XML_DTD + if (! accountingDiffTolerated(parser, tok, ptr, next, __LINE__, account)) { + accountingOnAbort(parser); + return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; + } +#endif switch (tok) { case XML_TOK_NONE: return XML_ERROR_NONE; case XML_TOK_INVALID: - if (enc == encoding) - eventPtr = next; + if (enc == parser->m_encoding) + parser->m_eventPtr = next; return XML_ERROR_INVALID_TOKEN; case XML_TOK_PARTIAL: - if (enc == encoding) - eventPtr = ptr; + if (enc == parser->m_encoding) + parser->m_eventPtr = ptr; return XML_ERROR_INVALID_TOKEN; - case XML_TOK_CHAR_REF: - { - XML_Char buf[XML_ENCODE_MAX]; - int i; - int n = XmlCharRefNumber(enc, ptr); - if (n < 0) { - if (enc == encoding) - eventPtr = ptr; - return XML_ERROR_BAD_CHAR_REF; - } - if (!isCdata - && n == 0x20 /* space */ - && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20)) - break; - n = XmlEncode(n, (ICHAR *)buf); - if (!n) { - if (enc == encoding) - eventPtr = ptr; - return XML_ERROR_BAD_CHAR_REF; - } - for (i = 0; i < n; i++) { - if (!poolAppendChar(pool, buf[i])) - return XML_ERROR_NO_MEMORY; - } + case XML_TOK_CHAR_REF: { + XML_Char buf[XML_ENCODE_MAX]; + int i; + int n = XmlCharRefNumber(enc, ptr); + if (n < 0) { + if (enc == parser->m_encoding) + parser->m_eventPtr = ptr; + return XML_ERROR_BAD_CHAR_REF; } - break; + if (! isCdata && n == 0x20 /* space */ + && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20)) + break; + n = XmlEncode(n, (ICHAR *)buf); + /* The XmlEncode() functions can never return 0 here. That + * error return happens if the code point passed in is either + * negative or greater than or equal to 0x110000. The + * XmlCharRefNumber() functions will all return a number + * strictly less than 0x110000 or a negative value if an error + * occurred. The negative value is intercepted above, so + * XmlEncode() is never passed a value it might return an + * error for. + */ + for (i = 0; i < n; i++) { + if (! poolAppendChar(pool, buf[i])) + return XML_ERROR_NO_MEMORY; + } + } break; case XML_TOK_DATA_CHARS: - if (!poolAppend(pool, enc, ptr, next)) + if (! poolAppend(pool, enc, ptr, next)) return XML_ERROR_NO_MEMORY; break; case XML_TOK_TRAILING_CR: @@ -3413,184 +5639,328 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, int isCdata, /* fall through */ case XML_TOK_ATTRIBUTE_VALUE_S: case XML_TOK_DATA_NEWLINE: - if (!isCdata && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20)) + if (! isCdata && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20)) break; - if (!poolAppendChar(pool, 0x20)) + if (! poolAppendChar(pool, 0x20)) return XML_ERROR_NO_MEMORY; break; - case XML_TOK_ENTITY_REF: - { - const XML_Char *name; - ENTITY *entity; - XML_Char ch = XmlPredefinedEntityName(enc, - ptr + enc->minBytesPerChar, - next - enc->minBytesPerChar); - if (ch) { - if (!poolAppendChar(pool, ch)) - return XML_ERROR_NO_MEMORY; - break; - } - name = poolStoreString(&temp2Pool, enc, - ptr + enc->minBytesPerChar, - next - enc->minBytesPerChar); - if (!name) + case XML_TOK_ENTITY_REF: { + const XML_Char *name; + ENTITY *entity; + char checkEntityDecl; + XML_Char ch = (XML_Char)XmlPredefinedEntityName( + enc, ptr + enc->minBytesPerChar, next - enc->minBytesPerChar); + if (ch) { +#ifdef XML_DTD + /* NOTE: We are replacing 4-6 characters original input for 1 character + * so there is no amplification and hence recording without + * protection. */ + accountingDiffTolerated(parser, tok, (char *)&ch, + ((char *)&ch) + sizeof(XML_Char), __LINE__, + XML_ACCOUNT_ENTITY_EXPANSION); +#endif /* XML_DTD */ + if (! poolAppendChar(pool, ch)) return XML_ERROR_NO_MEMORY; - entity = (ENTITY *)lookup(&dtd.generalEntities, name, 0); - poolDiscard(&temp2Pool); - if (!entity) { - if (dtd.complete) { - if (enc == encoding) - eventPtr = ptr; - return XML_ERROR_UNDEFINED_ENTITY; - } - } - else if (entity->open) { - if (enc == encoding) - eventPtr = ptr; - return XML_ERROR_RECURSIVE_ENTITY_REF; - } - else if (entity->notation) { - if (enc == encoding) - eventPtr = ptr; - return XML_ERROR_BINARY_ENTITY_REF; - } - else if (!entity->textPtr) { - if (enc == encoding) - eventPtr = ptr; - return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF; - } - else { - enum XML_Error result; - const XML_Char *textEnd = entity->textPtr + entity->textLen; - entity->open = 1; - result = appendAttributeValue(parser, internalEncoding, isCdata, (char *)entity->textPtr, (char *)textEnd, pool); - entity->open = 0; - if (result) - return result; + break; + } + name = poolStoreString(&parser->m_temp2Pool, enc, + ptr + enc->minBytesPerChar, + next - enc->minBytesPerChar); + if (! name) + return XML_ERROR_NO_MEMORY; + entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0); + poolDiscard(&parser->m_temp2Pool); + /* First, determine if a check for an existing declaration is needed; + if yes, check that the entity exists, and that it is internal. + */ + if (pool == &dtd->pool) /* are we called from prolog? */ + checkEntityDecl = +#ifdef XML_DTD + parser->m_prologState.documentEntity && +#endif /* XML_DTD */ + (dtd->standalone ? ! parser->m_openInternalEntities + : ! dtd->hasParamEntityRefs); + else /* if (pool == &parser->m_tempPool): we are called from content */ + checkEntityDecl = ! dtd->hasParamEntityRefs || dtd->standalone; + if (checkEntityDecl) { + if (! entity) + return XML_ERROR_UNDEFINED_ENTITY; + else if (! entity->is_internal) + return XML_ERROR_ENTITY_DECLARED_IN_PE; + } else if (! entity) { + /* Cannot report skipped entity here - see comments on + parser->m_skippedEntityHandler. + if (parser->m_skippedEntityHandler) + parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0); + */ + /* Cannot call the default handler because this would be + out of sync with the call to the startElementHandler. + if ((pool == &parser->m_tempPool) && parser->m_defaultHandler) + reportDefault(parser, enc, ptr, next); + */ + break; + } + if (entity->open) { + if (enc == parser->m_encoding) { + /* It does not appear that this line can be executed. + * + * The "if (entity->open)" check catches recursive entity + * definitions. In order to be called with an open + * entity, it must have gone through this code before and + * been through the recursive call to + * appendAttributeValue() some lines below. That call + * sets the local encoding ("enc") to the parser's + * internal encoding (internal_utf8 or internal_utf16), + * which can never be the same as the principle encoding. + * It doesn't appear there is another code path that gets + * here with entity->open being TRUE. + * + * Since it is not certain that this logic is watertight, + * we keep the line and merely exclude it from coverage + * tests. + */ + parser->m_eventPtr = ptr; /* LCOV_EXCL_LINE */ } + return XML_ERROR_RECURSIVE_ENTITY_REF; } - break; + if (entity->notation) { + if (enc == parser->m_encoding) + parser->m_eventPtr = ptr; + return XML_ERROR_BINARY_ENTITY_REF; + } + if (! entity->textPtr) { + if (enc == parser->m_encoding) + parser->m_eventPtr = ptr; + return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF; + } else { + enum XML_Error result; + const XML_Char *textEnd = entity->textPtr + entity->textLen; + entity->open = XML_TRUE; +#ifdef XML_DTD + entityTrackingOnOpen(parser, entity, __LINE__); +#endif + result = appendAttributeValue(parser, parser->m_internalEncoding, + isCdata, (const char *)entity->textPtr, + (const char *)textEnd, pool, + XML_ACCOUNT_ENTITY_EXPANSION); +#ifdef XML_DTD + entityTrackingOnClose(parser, entity, __LINE__); +#endif + entity->open = XML_FALSE; + if (result) + return result; + } + } break; default: - if (enc == encoding) - eventPtr = ptr; + /* The only token returned by XmlAttributeValueTok() that does + * not have an explicit case here is XML_TOK_PARTIAL_CHAR. + * Getting that would require an entity name to contain an + * incomplete XML character (e.g. \xE2\x82); however previous + * tokenisers will have already recognised and rejected such + * names before XmlAttributeValueTok() gets a look-in. This + * default case should be retained as a safety net, but the code + * excluded from coverage tests. + * + * LCOV_EXCL_START + */ + if (enc == parser->m_encoding) + parser->m_eventPtr = ptr; return XML_ERROR_UNEXPECTED_STATE; + /* LCOV_EXCL_STOP */ } ptr = next; } /* not reached */ } -static -enum XML_Error storeEntityValue(XML_Parser parser, - const ENCODING *enc, - const char *entityTextPtr, - const char *entityTextEnd) -{ - STRING_POOL *pool = &(dtd.pool); +static enum XML_Error +storeEntityValue(XML_Parser parser, const ENCODING *enc, + const char *entityTextPtr, const char *entityTextEnd, + enum XML_Account account) { + DTD *const dtd = parser->m_dtd; /* save one level of indirection */ + STRING_POOL *pool = &(dtd->entityValuePool); + enum XML_Error result = XML_ERROR_NONE; +#ifdef XML_DTD + int oldInEntityValue = parser->m_prologState.inEntityValue; + parser->m_prologState.inEntityValue = 1; +#else + UNUSED_P(account); +#endif /* XML_DTD */ + /* never return Null for the value argument in EntityDeclHandler, + since this would indicate an external entity; therefore we + have to make sure that entityValuePool.start is not null */ + if (! pool->blocks) { + if (! poolGrow(pool)) + return XML_ERROR_NO_MEMORY; + } + for (;;) { - const char *next; + const char *next + = entityTextPtr; /* XmlEntityValueTok doesn't always set the last arg */ int tok = XmlEntityValueTok(enc, entityTextPtr, entityTextEnd, &next); + +#ifdef XML_DTD + if (! accountingDiffTolerated(parser, tok, entityTextPtr, next, __LINE__, + account)) { + accountingOnAbort(parser); + result = XML_ERROR_AMPLIFICATION_LIMIT_BREACH; + goto endEntityValue; + } +#endif + switch (tok) { case XML_TOK_PARAM_ENTITY_REF: #ifdef XML_DTD - if (parentParser || enc != encoding) { - enum XML_Error result; + if (parser->m_isParamEntity || enc != parser->m_encoding) { const XML_Char *name; ENTITY *entity; - name = poolStoreString(&tempPool, enc, + name = poolStoreString(&parser->m_tempPool, enc, entityTextPtr + enc->minBytesPerChar, next - enc->minBytesPerChar); - if (!name) - return XML_ERROR_NO_MEMORY; - entity = (ENTITY *)lookup(&dtd.paramEntities, name, 0); - poolDiscard(&tempPool); - if (!entity) { - if (enc == encoding) - eventPtr = entityTextPtr; - return XML_ERROR_UNDEFINED_ENTITY; + if (! name) { + result = XML_ERROR_NO_MEMORY; + goto endEntityValue; + } + entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0); + poolDiscard(&parser->m_tempPool); + if (! entity) { + /* not a well-formedness error - see XML 1.0: WFC Entity Declared */ + /* cannot report skipped entity here - see comments on + parser->m_skippedEntityHandler + if (parser->m_skippedEntityHandler) + parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0); + */ + dtd->keepProcessing = dtd->standalone; + goto endEntityValue; } if (entity->open) { - if (enc == encoding) - eventPtr = entityTextPtr; - return XML_ERROR_RECURSIVE_ENTITY_REF; + if (enc == parser->m_encoding) + parser->m_eventPtr = entityTextPtr; + result = XML_ERROR_RECURSIVE_ENTITY_REF; + goto endEntityValue; } if (entity->systemId) { - if (enc == encoding) - eventPtr = entityTextPtr; - return XML_ERROR_PARAM_ENTITY_REF; - } - entity->open = 1; - result = storeEntityValue(parser, - internalEncoding, - (char *)entity->textPtr, - (char *)(entity->textPtr + entity->textLen)); - entity->open = 0; - if (result) - return result; + if (parser->m_externalEntityRefHandler) { + dtd->paramEntityRead = XML_FALSE; + entity->open = XML_TRUE; + entityTrackingOnOpen(parser, entity, __LINE__); + if (! parser->m_externalEntityRefHandler( + parser->m_externalEntityRefHandlerArg, 0, entity->base, + entity->systemId, entity->publicId)) { + entityTrackingOnClose(parser, entity, __LINE__); + entity->open = XML_FALSE; + result = XML_ERROR_EXTERNAL_ENTITY_HANDLING; + goto endEntityValue; + } + entityTrackingOnClose(parser, entity, __LINE__); + entity->open = XML_FALSE; + if (! dtd->paramEntityRead) + dtd->keepProcessing = dtd->standalone; + } else + dtd->keepProcessing = dtd->standalone; + } else { + entity->open = XML_TRUE; + entityTrackingOnOpen(parser, entity, __LINE__); + result = storeEntityValue( + parser, parser->m_internalEncoding, (const char *)entity->textPtr, + (const char *)(entity->textPtr + entity->textLen), + XML_ACCOUNT_ENTITY_EXPANSION); + entityTrackingOnClose(parser, entity, __LINE__); + entity->open = XML_FALSE; + if (result) + goto endEntityValue; + } break; } #endif /* XML_DTD */ - eventPtr = entityTextPtr; - return XML_ERROR_SYNTAX; + /* In the internal subset, PE references are not legal + within markup declarations, e.g entity values in this case. */ + parser->m_eventPtr = entityTextPtr; + result = XML_ERROR_PARAM_ENTITY_REF; + goto endEntityValue; case XML_TOK_NONE: - return XML_ERROR_NONE; + result = XML_ERROR_NONE; + goto endEntityValue; case XML_TOK_ENTITY_REF: case XML_TOK_DATA_CHARS: - if (!poolAppend(pool, enc, entityTextPtr, next)) - return XML_ERROR_NO_MEMORY; + if (! poolAppend(pool, enc, entityTextPtr, next)) { + result = XML_ERROR_NO_MEMORY; + goto endEntityValue; + } break; case XML_TOK_TRAILING_CR: next = entityTextPtr + enc->minBytesPerChar; /* fall through */ case XML_TOK_DATA_NEWLINE: - if (pool->end == pool->ptr && !poolGrow(pool)) - return XML_ERROR_NO_MEMORY; + if (pool->end == pool->ptr && ! poolGrow(pool)) { + result = XML_ERROR_NO_MEMORY; + goto endEntityValue; + } *(pool->ptr)++ = 0xA; break; - case XML_TOK_CHAR_REF: - { - XML_Char buf[XML_ENCODE_MAX]; - int i; - int n = XmlCharRefNumber(enc, entityTextPtr); - if (n < 0) { - if (enc == encoding) - eventPtr = entityTextPtr; - return XML_ERROR_BAD_CHAR_REF; - } - n = XmlEncode(n, (ICHAR *)buf); - if (!n) { - if (enc == encoding) - eventPtr = entityTextPtr; - return XML_ERROR_BAD_CHAR_REF; - } - for (i = 0; i < n; i++) { - if (pool->end == pool->ptr && !poolGrow(pool)) - return XML_ERROR_NO_MEMORY; - *(pool->ptr)++ = buf[i]; + case XML_TOK_CHAR_REF: { + XML_Char buf[XML_ENCODE_MAX]; + int i; + int n = XmlCharRefNumber(enc, entityTextPtr); + if (n < 0) { + if (enc == parser->m_encoding) + parser->m_eventPtr = entityTextPtr; + result = XML_ERROR_BAD_CHAR_REF; + goto endEntityValue; + } + n = XmlEncode(n, (ICHAR *)buf); + /* The XmlEncode() functions can never return 0 here. That + * error return happens if the code point passed in is either + * negative or greater than or equal to 0x110000. The + * XmlCharRefNumber() functions will all return a number + * strictly less than 0x110000 or a negative value if an error + * occurred. The negative value is intercepted above, so + * XmlEncode() is never passed a value it might return an + * error for. + */ + for (i = 0; i < n; i++) { + if (pool->end == pool->ptr && ! poolGrow(pool)) { + result = XML_ERROR_NO_MEMORY; + goto endEntityValue; } + *(pool->ptr)++ = buf[i]; } - break; + } break; case XML_TOK_PARTIAL: - if (enc == encoding) - eventPtr = entityTextPtr; - return XML_ERROR_INVALID_TOKEN; + if (enc == parser->m_encoding) + parser->m_eventPtr = entityTextPtr; + result = XML_ERROR_INVALID_TOKEN; + goto endEntityValue; case XML_TOK_INVALID: - if (enc == encoding) - eventPtr = next; - return XML_ERROR_INVALID_TOKEN; + if (enc == parser->m_encoding) + parser->m_eventPtr = next; + result = XML_ERROR_INVALID_TOKEN; + goto endEntityValue; default: - if (enc == encoding) - eventPtr = entityTextPtr; - return XML_ERROR_UNEXPECTED_STATE; + /* This default case should be unnecessary -- all the tokens + * that XmlEntityValueTok() can return have their own explicit + * cases -- but should be retained for safety. We do however + * exclude it from the coverage statistics. + * + * LCOV_EXCL_START + */ + if (enc == parser->m_encoding) + parser->m_eventPtr = entityTextPtr; + result = XML_ERROR_UNEXPECTED_STATE; + goto endEntityValue; + /* LCOV_EXCL_STOP */ } entityTextPtr = next; } - /* not reached */ +endEntityValue: +#ifdef XML_DTD + parser->m_prologState.inEntityValue = oldInEntityValue; +#endif /* XML_DTD */ + return result; } -static void -normalizeLines(XML_Char *s) -{ +static void FASTCALL +normalizeLines(XML_Char *s) { XML_Char *p; for (;; s++) { if (*s == XML_T('\0')) @@ -3604,93 +5974,108 @@ normalizeLines(XML_Char *s) *p++ = 0xA; if (*++s == 0xA) s++; - } - else + } else *p++ = *s++; } while (*s); *p = XML_T('\0'); } static int -reportProcessingInstruction(XML_Parser parser, const ENCODING *enc, const char *start, const char *end) -{ +reportProcessingInstruction(XML_Parser parser, const ENCODING *enc, + const char *start, const char *end) { const XML_Char *target; XML_Char *data; const char *tem; - if (!processingInstructionHandler) { - if (defaultHandler) + if (! parser->m_processingInstructionHandler) { + if (parser->m_defaultHandler) reportDefault(parser, enc, start, end); return 1; } start += enc->minBytesPerChar * 2; tem = start + XmlNameLength(enc, start); - target = poolStoreString(&tempPool, enc, start, tem); - if (!target) + target = poolStoreString(&parser->m_tempPool, enc, start, tem); + if (! target) return 0; - poolFinish(&tempPool); - data = poolStoreString(&tempPool, enc, - XmlSkipS(enc, tem), - end - enc->minBytesPerChar*2); - if (!data) + poolFinish(&parser->m_tempPool); + data = poolStoreString(&parser->m_tempPool, enc, XmlSkipS(enc, tem), + end - enc->minBytesPerChar * 2); + if (! data) return 0; normalizeLines(data); - processingInstructionHandler(handlerArg, target, data); - poolClear(&tempPool); + parser->m_processingInstructionHandler(parser->m_handlerArg, target, data); + poolClear(&parser->m_tempPool); return 1; } static int -reportComment(XML_Parser parser, const ENCODING *enc, const char *start, const char *end) -{ +reportComment(XML_Parser parser, const ENCODING *enc, const char *start, + const char *end) { XML_Char *data; - if (!commentHandler) { - if (defaultHandler) + if (! parser->m_commentHandler) { + if (parser->m_defaultHandler) reportDefault(parser, enc, start, end); return 1; } - data = poolStoreString(&tempPool, - enc, - start + enc->minBytesPerChar * 4, + data = poolStoreString(&parser->m_tempPool, enc, + start + enc->minBytesPerChar * 4, end - enc->minBytesPerChar * 3); - if (!data) + if (! data) return 0; normalizeLines(data); - commentHandler(handlerArg, data); - poolClear(&tempPool); + parser->m_commentHandler(parser->m_handlerArg, data); + poolClear(&parser->m_tempPool); return 1; } static void -reportDefault(XML_Parser parser, const ENCODING *enc, const char *s, const char *end) -{ +reportDefault(XML_Parser parser, const ENCODING *enc, const char *s, + const char *end) { if (MUST_CONVERT(enc, s)) { + enum XML_Convert_Result convert_res; const char **eventPP; const char **eventEndPP; - if (enc == encoding) { - eventPP = &eventPtr; - eventEndPP = &eventEndPtr; - } - else { - eventPP = &(openInternalEntities->internalEventPtr); - eventEndPP = &(openInternalEntities->internalEventEndPtr); + if (enc == parser->m_encoding) { + eventPP = &parser->m_eventPtr; + eventEndPP = &parser->m_eventEndPtr; + } else { + /* To get here, two things must be true; the parser must be + * using a character encoding that is not the same as the + * encoding passed in, and the encoding passed in must need + * conversion to the internal format (UTF-8 unless XML_UNICODE + * is defined). The only occasions on which the encoding passed + * in is not the same as the parser's encoding are when it is + * the internal encoding (e.g. a previously defined parameter + * entity, already converted to internal format). This by + * definition doesn't need conversion, so the whole branch never + * gets executed. + * + * For safety's sake we don't delete these lines and merely + * exclude them from coverage statistics. + * + * LCOV_EXCL_START + */ + eventPP = &(parser->m_openInternalEntities->internalEventPtr); + eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr); + /* LCOV_EXCL_STOP */ } do { - ICHAR *dataPtr = (ICHAR *)dataBuf; - XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)dataBufEnd); + ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf; + convert_res + = XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)parser->m_dataBufEnd); *eventEndPP = s; - defaultHandler(handlerArg, dataBuf, dataPtr - (ICHAR *)dataBuf); + parser->m_defaultHandler(parser->m_handlerArg, parser->m_dataBuf, + (int)(dataPtr - (ICHAR *)parser->m_dataBuf)); *eventPP = s; - } while (s != end); - } - else - defaultHandler(handlerArg, (XML_Char *)s, (XML_Char *)end - (XML_Char *)s); + } while ((convert_res != XML_CONVERT_COMPLETED) + && (convert_res != XML_CONVERT_INPUT_INCOMPLETE)); + } else + parser->m_defaultHandler(parser->m_handlerArg, (XML_Char *)s, + (int)((XML_Char *)end - (XML_Char *)s)); } - static int -defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, int isCdata, - int isId, const XML_Char *value, XML_Parser parser) -{ +defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, XML_Bool isCdata, + XML_Bool isId, const XML_Char *value, XML_Parser parser) { DEFAULT_ATTRIBUTE *att; if (value || isId) { /* The handling of default attributes gets messed up if we have @@ -3699,107 +6084,121 @@ defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, int isCdata, for (i = 0; i < type->nDefaultAtts; i++) if (attId == type->defaultAtts[i].id) return 1; - if (isId && !type->idAtt && !attId->xmlns) + if (isId && ! type->idAtt && ! attId->xmlns) type->idAtt = attId; } if (type->nDefaultAtts == type->allocDefaultAtts) { if (type->allocDefaultAtts == 0) { type->allocDefaultAtts = 8; - type->defaultAtts = MALLOC(type->allocDefaultAtts*sizeof(DEFAULT_ATTRIBUTE)); - } - else { - type->allocDefaultAtts *= 2; - type->defaultAtts = REALLOC(type->defaultAtts, - type->allocDefaultAtts*sizeof(DEFAULT_ATTRIBUTE)); + type->defaultAtts = (DEFAULT_ATTRIBUTE *)MALLOC( + parser, type->allocDefaultAtts * sizeof(DEFAULT_ATTRIBUTE)); + if (! type->defaultAtts) { + type->allocDefaultAtts = 0; + return 0; + } + } else { + DEFAULT_ATTRIBUTE *temp; + int count = type->allocDefaultAtts * 2; + temp = (DEFAULT_ATTRIBUTE *)REALLOC(parser, type->defaultAtts, + (count * sizeof(DEFAULT_ATTRIBUTE))); + if (temp == NULL) + return 0; + type->allocDefaultAtts = count; + type->defaultAtts = temp; } - if (!type->defaultAtts) - return 0; } att = type->defaultAtts + type->nDefaultAtts; att->id = attId; att->value = value; att->isCdata = isCdata; - if (!isCdata) - attId->maybeTokenized = 1; + if (! isCdata) + attId->maybeTokenized = XML_TRUE; type->nDefaultAtts += 1; return 1; } -static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType) -{ +static int +setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType) { + DTD *const dtd = parser->m_dtd; /* save one level of indirection */ const XML_Char *name; for (name = elementType->name; *name; name++) { - if (*name == XML_T(':')) { + if (*name == XML_T(ASCII_COLON)) { PREFIX *prefix; const XML_Char *s; for (s = elementType->name; s != name; s++) { - if (!poolAppendChar(&dtd.pool, *s)) + if (! poolAppendChar(&dtd->pool, *s)) return 0; } - if (!poolAppendChar(&dtd.pool, XML_T('\0'))) + if (! poolAppendChar(&dtd->pool, XML_T('\0'))) return 0; - prefix = (PREFIX *)lookup(&dtd.prefixes, poolStart(&dtd.pool), sizeof(PREFIX)); - if (!prefix) + prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&dtd->pool), + sizeof(PREFIX)); + if (! prefix) return 0; - if (prefix->name == poolStart(&dtd.pool)) - poolFinish(&dtd.pool); + if (prefix->name == poolStart(&dtd->pool)) + poolFinish(&dtd->pool); else - poolDiscard(&dtd.pool); + poolDiscard(&dtd->pool); elementType->prefix = prefix; - + break; } } return 1; } static ATTRIBUTE_ID * -getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start, const char *end) -{ +getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start, + const char *end) { + DTD *const dtd = parser->m_dtd; /* save one level of indirection */ ATTRIBUTE_ID *id; const XML_Char *name; - if (!poolAppendChar(&dtd.pool, XML_T('\0'))) - return 0; - name = poolStoreString(&dtd.pool, enc, start, end); - if (!name) - return 0; + if (! poolAppendChar(&dtd->pool, XML_T('\0'))) + return NULL; + name = poolStoreString(&dtd->pool, enc, start, end); + if (! name) + return NULL; + /* skip quotation mark - its storage will be re-used (like in name[-1]) */ ++name; - id = (ATTRIBUTE_ID *)lookup(&dtd.attributeIds, name, sizeof(ATTRIBUTE_ID)); - if (!id) - return 0; + id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, name, + sizeof(ATTRIBUTE_ID)); + if (! id) + return NULL; if (id->name != name) - poolDiscard(&dtd.pool); + poolDiscard(&dtd->pool); else { - poolFinish(&dtd.pool); - if (!ns) + poolFinish(&dtd->pool); + if (! parser->m_ns) ; - else if (name[0] == 'x' - && name[1] == 'm' - && name[2] == 'l' - && name[3] == 'n' - && name[4] == 's' - && (name[5] == XML_T('\0') || name[5] == XML_T(':'))) { - if (name[5] == '\0') - id->prefix = &dtd.defaultPrefix; + else if (name[0] == XML_T(ASCII_x) && name[1] == XML_T(ASCII_m) + && name[2] == XML_T(ASCII_l) && name[3] == XML_T(ASCII_n) + && name[4] == XML_T(ASCII_s) + && (name[5] == XML_T('\0') || name[5] == XML_T(ASCII_COLON))) { + if (name[5] == XML_T('\0')) + id->prefix = &dtd->defaultPrefix; else - id->prefix = (PREFIX *)lookup(&dtd.prefixes, name + 6, sizeof(PREFIX)); - id->xmlns = 1; - } - else { + id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, name + 6, + sizeof(PREFIX)); + id->xmlns = XML_TRUE; + } else { int i; for (i = 0; name[i]; i++) { - if (name[i] == XML_T(':')) { + /* attributes without prefix are *not* in the default namespace */ + if (name[i] == XML_T(ASCII_COLON)) { int j; for (j = 0; j < i; j++) { - if (!poolAppendChar(&dtd.pool, name[j])) - return 0; + if (! poolAppendChar(&dtd->pool, name[j])) + return NULL; } - if (!poolAppendChar(&dtd.pool, XML_T('\0'))) - return 0; - id->prefix = (PREFIX *)lookup(&dtd.prefixes, poolStart(&dtd.pool), sizeof(PREFIX)); - if (id->prefix->name == poolStart(&dtd.pool)) - poolFinish(&dtd.pool); + if (! poolAppendChar(&dtd->pool, XML_T('\0'))) + return NULL; + id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, + poolStart(&dtd->pool), sizeof(PREFIX)); + if (! id->prefix) + return NULL; + if (id->prefix->name == poolStart(&dtd->pool)) + poolFinish(&dtd->pool); else - poolDiscard(&dtd.pool); + poolDiscard(&dtd->pool); break; } } @@ -3808,136 +6207,166 @@ getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start, const return id; } -#define CONTEXT_SEP XML_T('\f') +#define CONTEXT_SEP XML_T(ASCII_FF) -static -const XML_Char *getContext(XML_Parser parser) -{ +static const XML_Char * +getContext(XML_Parser parser) { + DTD *const dtd = parser->m_dtd; /* save one level of indirection */ HASH_TABLE_ITER iter; - int needSep = 0; + XML_Bool needSep = XML_FALSE; - if (dtd.defaultPrefix.binding) { + if (dtd->defaultPrefix.binding) { int i; int len; - if (!poolAppendChar(&tempPool, XML_T('='))) - return 0; - len = dtd.defaultPrefix.binding->uriLen; - if (namespaceSeparator != XML_T('\0')) + if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS))) + return NULL; + len = dtd->defaultPrefix.binding->uriLen; + if (parser->m_namespaceSeparator) len--; - for (i = 0; i < len; i++) - if (!poolAppendChar(&tempPool, dtd.defaultPrefix.binding->uri[i])) - return 0; - needSep = 1; + for (i = 0; i < len; i++) { + if (! poolAppendChar(&parser->m_tempPool, + dtd->defaultPrefix.binding->uri[i])) { + /* Because of memory caching, I don't believe this line can be + * executed. + * + * This is part of a loop copying the default prefix binding + * URI into the parser's temporary string pool. Previously, + * that URI was copied into the same string pool, with a + * terminating NUL character, as part of setContext(). When + * the pool was cleared, that leaves a block definitely big + * enough to hold the URI on the free block list of the pool. + * The URI copy in getContext() therefore cannot run out of + * memory. + * + * If the pool is used between the setContext() and + * getContext() calls, the worst it can do is leave a bigger + * block on the front of the free list. Given that this is + * all somewhat inobvious and program logic can be changed, we + * don't delete the line but we do exclude it from the test + * coverage statistics. + */ + return NULL; /* LCOV_EXCL_LINE */ + } + } + needSep = XML_TRUE; } - hashTableIterInit(&iter, &(dtd.prefixes)); + hashTableIterInit(&iter, &(dtd->prefixes)); for (;;) { int i; int len; const XML_Char *s; PREFIX *prefix = (PREFIX *)hashTableIterNext(&iter); - if (!prefix) + if (! prefix) break; - if (!prefix->binding) - continue; - if (needSep && !poolAppendChar(&tempPool, CONTEXT_SEP)) - return 0; + if (! prefix->binding) { + /* This test appears to be (justifiable) paranoia. There does + * not seem to be a way of injecting a prefix without a binding + * that doesn't get errored long before this function is called. + * The test should remain for safety's sake, so we instead + * exclude the following line from the coverage statistics. + */ + continue; /* LCOV_EXCL_LINE */ + } + if (needSep && ! poolAppendChar(&parser->m_tempPool, CONTEXT_SEP)) + return NULL; for (s = prefix->name; *s; s++) - if (!poolAppendChar(&tempPool, *s)) - return 0; - if (!poolAppendChar(&tempPool, XML_T('='))) - return 0; + if (! poolAppendChar(&parser->m_tempPool, *s)) + return NULL; + if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS))) + return NULL; len = prefix->binding->uriLen; - if (namespaceSeparator != XML_T('\0')) + if (parser->m_namespaceSeparator) len--; for (i = 0; i < len; i++) - if (!poolAppendChar(&tempPool, prefix->binding->uri[i])) - return 0; - needSep = 1; + if (! poolAppendChar(&parser->m_tempPool, prefix->binding->uri[i])) + return NULL; + needSep = XML_TRUE; } - - hashTableIterInit(&iter, &(dtd.generalEntities)); + hashTableIterInit(&iter, &(dtd->generalEntities)); for (;;) { const XML_Char *s; ENTITY *e = (ENTITY *)hashTableIterNext(&iter); - if (!e) + if (! e) break; - if (!e->open) + if (! e->open) continue; - if (needSep && !poolAppendChar(&tempPool, CONTEXT_SEP)) - return 0; + if (needSep && ! poolAppendChar(&parser->m_tempPool, CONTEXT_SEP)) + return NULL; for (s = e->name; *s; s++) - if (!poolAppendChar(&tempPool, *s)) + if (! poolAppendChar(&parser->m_tempPool, *s)) return 0; - needSep = 1; + needSep = XML_TRUE; } - if (!poolAppendChar(&tempPool, XML_T('\0'))) - return 0; - return tempPool.start; + if (! poolAppendChar(&parser->m_tempPool, XML_T('\0'))) + return NULL; + return parser->m_tempPool.start; } -static -int setContext(XML_Parser parser, const XML_Char *context) -{ +static XML_Bool +setContext(XML_Parser parser, const XML_Char *context) { + DTD *const dtd = parser->m_dtd; /* save one level of indirection */ const XML_Char *s = context; while (*context != XML_T('\0')) { if (*s == CONTEXT_SEP || *s == XML_T('\0')) { ENTITY *e; - if (!poolAppendChar(&tempPool, XML_T('\0'))) - return 0; - e = (ENTITY *)lookup(&dtd.generalEntities, poolStart(&tempPool), 0); + if (! poolAppendChar(&parser->m_tempPool, XML_T('\0'))) + return XML_FALSE; + e = (ENTITY *)lookup(parser, &dtd->generalEntities, + poolStart(&parser->m_tempPool), 0); if (e) - e->open = 1; + e->open = XML_TRUE; if (*s != XML_T('\0')) s++; context = s; - poolDiscard(&tempPool); - } - else if (*s == '=') { + poolDiscard(&parser->m_tempPool); + } else if (*s == XML_T(ASCII_EQUALS)) { PREFIX *prefix; - if (poolLength(&tempPool) == 0) - prefix = &dtd.defaultPrefix; + if (poolLength(&parser->m_tempPool) == 0) + prefix = &dtd->defaultPrefix; else { - if (!poolAppendChar(&tempPool, XML_T('\0'))) - return 0; - prefix = (PREFIX *)lookup(&dtd.prefixes, poolStart(&tempPool), sizeof(PREFIX)); - if (!prefix) - return 0; - if (prefix->name == poolStart(&tempPool)) { - prefix->name = poolCopyString(&dtd.pool, prefix->name); - if (!prefix->name) - return 0; + if (! poolAppendChar(&parser->m_tempPool, XML_T('\0'))) + return XML_FALSE; + prefix + = (PREFIX *)lookup(parser, &dtd->prefixes, + poolStart(&parser->m_tempPool), sizeof(PREFIX)); + if (! prefix) + return XML_FALSE; + if (prefix->name == poolStart(&parser->m_tempPool)) { + prefix->name = poolCopyString(&dtd->pool, prefix->name); + if (! prefix->name) + return XML_FALSE; } - poolDiscard(&tempPool); + poolDiscard(&parser->m_tempPool); } - for (context = s + 1; *context != CONTEXT_SEP && *context != XML_T('\0'); context++) - if (!poolAppendChar(&tempPool, *context)) - return 0; - if (!poolAppendChar(&tempPool, XML_T('\0'))) - return 0; - if (!addBinding(parser, prefix, 0, poolStart(&tempPool), &inheritedBindings)) - return 0; - poolDiscard(&tempPool); + for (context = s + 1; *context != CONTEXT_SEP && *context != XML_T('\0'); + context++) + if (! poolAppendChar(&parser->m_tempPool, *context)) + return XML_FALSE; + if (! poolAppendChar(&parser->m_tempPool, XML_T('\0'))) + return XML_FALSE; + if (addBinding(parser, prefix, NULL, poolStart(&parser->m_tempPool), + &parser->m_inheritedBindings) + != XML_ERROR_NONE) + return XML_FALSE; + poolDiscard(&parser->m_tempPool); if (*context != XML_T('\0')) ++context; s = context; - } - else { - if (!poolAppendChar(&tempPool, *s)) - return 0; + } else { + if (! poolAppendChar(&parser->m_tempPool, *s)) + return XML_FALSE; s++; } } - return 1; + return XML_TRUE; } - -static -void normalizePublicId(XML_Char *publicId) -{ +static void FASTCALL +normalizePublicId(XML_Char *publicId) { XML_Char *p = publicId; XML_Char *s; for (s = publicId; *s; s++) { @@ -3957,55 +6386,89 @@ void normalizePublicId(XML_Char *publicId) *p = XML_T('\0'); } -static int dtdInit(DTD *p, XML_Parser parser) -{ - XML_Memory_Handling_Suite *ms = &((Parser *) parser)->m_mem; +static DTD * +dtdCreate(const XML_Memory_Handling_Suite *ms) { + DTD *p = (DTD *)ms->malloc_fcn(sizeof(DTD)); + if (p == NULL) + return p; poolInit(&(p->pool), ms); + poolInit(&(p->entityValuePool), ms); hashTableInit(&(p->generalEntities), ms); hashTableInit(&(p->elementTypes), ms); hashTableInit(&(p->attributeIds), ms); hashTableInit(&(p->prefixes), ms); - p->complete = 1; - p->standalone = 0; #ifdef XML_DTD + p->paramEntityRead = XML_FALSE; hashTableInit(&(p->paramEntities), ms); #endif /* XML_DTD */ - p->defaultPrefix.name = 0; - p->defaultPrefix.binding = 0; + p->defaultPrefix.name = NULL; + p->defaultPrefix.binding = NULL; - p->in_eldecl = 0; - p->scaffIndex = 0; + p->in_eldecl = XML_FALSE; + p->scaffIndex = NULL; + p->scaffold = NULL; p->scaffLevel = 0; - p->scaffold = 0; - p->contentStringLen = 0; p->scaffSize = 0; p->scaffCount = 0; + p->contentStringLen = 0; - return 1; + p->keepProcessing = XML_TRUE; + p->hasParamEntityRefs = XML_FALSE; + p->standalone = XML_FALSE; + return p; } +static void +dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms) { + HASH_TABLE_ITER iter; + hashTableIterInit(&iter, &(p->elementTypes)); + for (;;) { + ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter); + if (! e) + break; + if (e->allocDefaultAtts != 0) + ms->free_fcn(e->defaultAtts); + } + hashTableClear(&(p->generalEntities)); #ifdef XML_DTD + p->paramEntityRead = XML_FALSE; + hashTableClear(&(p->paramEntities)); +#endif /* XML_DTD */ + hashTableClear(&(p->elementTypes)); + hashTableClear(&(p->attributeIds)); + hashTableClear(&(p->prefixes)); + poolClear(&(p->pool)); + poolClear(&(p->entityValuePool)); + p->defaultPrefix.name = NULL; + p->defaultPrefix.binding = NULL; -static void dtdSwap(DTD *p1, DTD *p2) -{ - DTD tem; - memcpy(&tem, p1, sizeof(DTD)); - memcpy(p1, p2, sizeof(DTD)); - memcpy(p2, &tem, sizeof(DTD)); -} + p->in_eldecl = XML_FALSE; -#endif /* XML_DTD */ + ms->free_fcn(p->scaffIndex); + p->scaffIndex = NULL; + ms->free_fcn(p->scaffold); + p->scaffold = NULL; + + p->scaffLevel = 0; + p->scaffSize = 0; + p->scaffCount = 0; + p->contentStringLen = 0; -static void dtdDestroy(DTD *p, XML_Parser parser) -{ + p->keepProcessing = XML_TRUE; + p->hasParamEntityRefs = XML_FALSE; + p->standalone = XML_FALSE; +} + +static void +dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms) { HASH_TABLE_ITER iter; hashTableIterInit(&iter, &(p->elementTypes)); for (;;) { ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter); - if (!e) + if (! e) break; if (e->allocDefaultAtts != 0) - FREE(e->defaultAtts); + ms->free_fcn(e->defaultAtts); } hashTableDestroy(&(p->generalEntities)); #ifdef XML_DTD @@ -4015,17 +6478,20 @@ static void dtdDestroy(DTD *p, XML_Parser parser) hashTableDestroy(&(p->attributeIds)); hashTableDestroy(&(p->prefixes)); poolDestroy(&(p->pool)); - if (p->scaffIndex) - FREE(p->scaffIndex); - if (p->scaffold) - FREE(p->scaffold); + poolDestroy(&(p->entityValuePool)); + if (isDocEntity) { + ms->free_fcn(p->scaffIndex); + ms->free_fcn(p->scaffold); + } + ms->free_fcn(p); } -/* Do a deep copy of the DTD. Return 0 for out of memory; non-zero otherwise. -The new DTD has already been initialized. */ - -static int dtdCopy(DTD *newDtd, const DTD *oldDtd, XML_Parser parser) -{ +/* Do a deep copy of the DTD. Return 0 for out of memory, non-zero otherwise. + The new DTD has already been initialized. +*/ +static int +dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd, + const XML_Memory_Handling_Suite *ms) { HASH_TABLE_ITER iter; /* Copy the prefix table. */ @@ -4034,12 +6500,12 @@ static int dtdCopy(DTD *newDtd, const DTD *oldDtd, XML_Parser parser) for (;;) { const XML_Char *name; const PREFIX *oldP = (PREFIX *)hashTableIterNext(&iter); - if (!oldP) + if (! oldP) break; name = poolCopyString(&(newDtd->pool), oldP->name); - if (!name) + if (! name) return 0; - if (!lookup(&(newDtd->prefixes), name, sizeof(PREFIX))) + if (! lookup(oldParser, &(newDtd->prefixes), name, sizeof(PREFIX))) return 0; } @@ -4052,17 +6518,18 @@ static int dtdCopy(DTD *newDtd, const DTD *oldDtd, XML_Parser parser) const XML_Char *name; const ATTRIBUTE_ID *oldA = (ATTRIBUTE_ID *)hashTableIterNext(&iter); - if (!oldA) + if (! oldA) break; /* Remember to allocate the scratch byte before the name. */ - if (!poolAppendChar(&(newDtd->pool), XML_T('\0'))) + if (! poolAppendChar(&(newDtd->pool), XML_T('\0'))) return 0; name = poolCopyString(&(newDtd->pool), oldA->name); - if (!name) + if (! name) return 0; ++name; - newA = (ATTRIBUTE_ID *)lookup(&(newDtd->attributeIds), name, sizeof(ATTRIBUTE_ID)); - if (!newA) + newA = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds), name, + sizeof(ATTRIBUTE_ID)); + if (! newA) return 0; newA->maybeTokenized = oldA->maybeTokenized; if (oldA->prefix) { @@ -4070,7 +6537,8 @@ static int dtdCopy(DTD *newDtd, const DTD *oldDtd, XML_Parser parser) if (oldA->prefix == &oldDtd->defaultPrefix) newA->prefix = &newDtd->defaultPrefix; else - newA->prefix = (PREFIX *)lookup(&(newDtd->prefixes), oldA->prefix->name, 0); + newA->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes), + oldA->prefix->name, 0); } } @@ -4083,51 +6551,57 @@ static int dtdCopy(DTD *newDtd, const DTD *oldDtd, XML_Parser parser) ELEMENT_TYPE *newE; const XML_Char *name; const ELEMENT_TYPE *oldE = (ELEMENT_TYPE *)hashTableIterNext(&iter); - if (!oldE) + if (! oldE) break; name = poolCopyString(&(newDtd->pool), oldE->name); - if (!name) + if (! name) return 0; - newE = (ELEMENT_TYPE *)lookup(&(newDtd->elementTypes), name, sizeof(ELEMENT_TYPE)); - if (!newE) + newE = (ELEMENT_TYPE *)lookup(oldParser, &(newDtd->elementTypes), name, + sizeof(ELEMENT_TYPE)); + if (! newE) return 0; if (oldE->nDefaultAtts) { - newE->defaultAtts = (DEFAULT_ATTRIBUTE *)MALLOC(oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE)); - if (!newE->defaultAtts) + newE->defaultAtts = (DEFAULT_ATTRIBUTE *)ms->malloc_fcn( + oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE)); + if (! newE->defaultAtts) { return 0; + } } if (oldE->idAtt) - newE->idAtt = (ATTRIBUTE_ID *)lookup(&(newDtd->attributeIds), oldE->idAtt->name, 0); + newE->idAtt = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds), + oldE->idAtt->name, 0); newE->allocDefaultAtts = newE->nDefaultAtts = oldE->nDefaultAtts; if (oldE->prefix) - newE->prefix = (PREFIX *)lookup(&(newDtd->prefixes), oldE->prefix->name, 0); + newE->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes), + oldE->prefix->name, 0); for (i = 0; i < newE->nDefaultAtts; i++) { - newE->defaultAtts[i].id = (ATTRIBUTE_ID *)lookup(&(newDtd->attributeIds), oldE->defaultAtts[i].id->name, 0); + newE->defaultAtts[i].id = (ATTRIBUTE_ID *)lookup( + oldParser, &(newDtd->attributeIds), oldE->defaultAtts[i].id->name, 0); newE->defaultAtts[i].isCdata = oldE->defaultAtts[i].isCdata; if (oldE->defaultAtts[i].value) { - newE->defaultAtts[i].value = poolCopyString(&(newDtd->pool), oldE->defaultAtts[i].value); - if (!newE->defaultAtts[i].value) + newE->defaultAtts[i].value + = poolCopyString(&(newDtd->pool), oldE->defaultAtts[i].value); + if (! newE->defaultAtts[i].value) return 0; - } - else - newE->defaultAtts[i].value = 0; + } else + newE->defaultAtts[i].value = NULL; } } /* Copy the entity tables. */ - if (!copyEntityTable(&(newDtd->generalEntities), - &(newDtd->pool), - &(oldDtd->generalEntities), parser)) - return 0; + if (! copyEntityTable(oldParser, &(newDtd->generalEntities), &(newDtd->pool), + &(oldDtd->generalEntities))) + return 0; #ifdef XML_DTD - if (!copyEntityTable(&(newDtd->paramEntities), - &(newDtd->pool), - &(oldDtd->paramEntities), parser)) - return 0; + if (! copyEntityTable(oldParser, &(newDtd->paramEntities), &(newDtd->pool), + &(oldDtd->paramEntities))) + return 0; + newDtd->paramEntityRead = oldDtd->paramEntityRead; #endif /* XML_DTD */ - newDtd->complete = oldDtd->complete; + newDtd->keepProcessing = oldDtd->keepProcessing; + newDtd->hasParamEntityRefs = oldDtd->hasParamEntityRefs; newDtd->standalone = oldDtd->standalone; /* Don't want deep copying for scaffolding */ @@ -4139,17 +6613,14 @@ static int dtdCopy(DTD *newDtd, const DTD *oldDtd, XML_Parser parser) newDtd->scaffIndex = oldDtd->scaffIndex; return 1; -} /* End dtdCopy */ +} /* End dtdCopy */ -static int copyEntityTable(HASH_TABLE *newTable, - STRING_POOL *newPool, - const HASH_TABLE *oldTable, - XML_Parser parser) -{ +static int +copyEntityTable(XML_Parser oldParser, HASH_TABLE *newTable, + STRING_POOL *newPool, const HASH_TABLE *oldTable) { HASH_TABLE_ITER iter; - const XML_Char *cachedOldBase = 0; - const XML_Char *cachedNewBase = 0; - itkExpatUnused(parser); + const XML_Char *cachedOldBase = NULL; + const XML_Char *cachedNewBase = NULL; hashTableIterInit(&iter, oldTable); @@ -4157,17 +6628,17 @@ static int copyEntityTable(HASH_TABLE *newTable, ENTITY *newE; const XML_Char *name; const ENTITY *oldE = (ENTITY *)hashTableIterNext(&iter); - if (!oldE) + if (! oldE) break; name = poolCopyString(newPool, oldE->name); - if (!name) + if (! name) return 0; - newE = (ENTITY *)lookup(newTable, name, sizeof(ENTITY)); - if (!newE) + newE = (ENTITY *)lookup(oldParser, newTable, name, sizeof(ENTITY)); + if (! newE) return 0; if (oldE->systemId) { const XML_Char *tem = poolCopyString(newPool, oldE->systemId); - if (!tem) + if (! tem) return 0; newE->systemId = tem; if (oldE->base) { @@ -4176,170 +6647,205 @@ static int copyEntityTable(HASH_TABLE *newTable, else { cachedOldBase = oldE->base; tem = poolCopyString(newPool, cachedOldBase); - if (!tem) + if (! tem) return 0; cachedNewBase = newE->base = tem; } } - } - else { - const XML_Char *tem = poolCopyStringN(newPool, oldE->textPtr, oldE->textLen); - if (!tem) + if (oldE->publicId) { + tem = poolCopyString(newPool, oldE->publicId); + if (! tem) + return 0; + newE->publicId = tem; + } + } else { + const XML_Char *tem + = poolCopyStringN(newPool, oldE->textPtr, oldE->textLen); + if (! tem) return 0; newE->textPtr = tem; newE->textLen = oldE->textLen; } if (oldE->notation) { const XML_Char *tem = poolCopyString(newPool, oldE->notation); - if (!tem) + if (! tem) return 0; newE->notation = tem; } + newE->is_param = oldE->is_param; + newE->is_internal = oldE->is_internal; } return 1; } -#define INIT_SIZE 64 +#define INIT_POWER 6 -static -int keyeq(KEY s1, KEY s2) -{ +static XML_Bool FASTCALL +keyeq(KEY s1, KEY s2) { for (; *s1 == *s2; s1++, s2++) if (*s1 == 0) - return 1; - return 0; + return XML_TRUE; + return XML_FALSE; } -static -unsigned long hash(KEY s) -{ - unsigned long h = 0; - while (*s) - h = (h << 5) + h + (unsigned char)*s++; - return h; +static size_t +keylen(KEY s) { + size_t len = 0; + for (; *s; s++, len++) + ; + return len; +} + +static void +copy_salt_to_sipkey(XML_Parser parser, struct sipkey *key) { + key->k[0] = 0; + key->k[1] = get_hash_secret_salt(parser); } -static -NAMED *lookup(HASH_TABLE *table, KEY name, size_t createSize) -{ +static unsigned long FASTCALL +hash(XML_Parser parser, KEY s) { + struct siphash state; + struct sipkey key; + (void)sip24_valid; + copy_salt_to_sipkey(parser, &key); + sip24_init(&state, &key); + sip24_update(&state, s, keylen(s) * sizeof(XML_Char)); + return (unsigned long)sip24_final(&state); +} + +static NAMED * +lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize) { size_t i; if (table->size == 0) { size_t tsize; - - if (!createSize) - return 0; - tsize = INIT_SIZE * sizeof(NAMED *); - table->v = table->mem->malloc_fcn(tsize); - if (!table->v) - return 0; + if (! createSize) + return NULL; + table->power = INIT_POWER; + /* table->size is a power of 2 */ + table->size = (size_t)1 << INIT_POWER; + tsize = table->size * sizeof(NAMED *); + table->v = (NAMED **)table->mem->malloc_fcn(tsize); + if (! table->v) { + table->size = 0; + return NULL; + } memset(table->v, 0, tsize); - table->size = INIT_SIZE; - table->usedLim = INIT_SIZE / 2; - i = hash(name) & (table->size - 1); - } - else { - unsigned long h = hash(name); - for (i = h & (table->size - 1); - table->v[i]; - i == 0 ? i = table->size - 1 : --i) { + i = hash(parser, name) & ((unsigned long)table->size - 1); + } else { + unsigned long h = hash(parser, name); + unsigned long mask = (unsigned long)table->size - 1; + unsigned char step = 0; + i = h & mask; + while (table->v[i]) { if (keyeq(name, table->v[i]->name)) return table->v[i]; + if (! step) + step = PROBE_STEP(h, mask, table->power); + i < step ? (i += table->size - step) : (i -= step); } - if (!createSize) - return 0; - if (table->used == table->usedLim) { - /* check for overflow */ - size_t newSize = table->size * 2; + if (! createSize) + return NULL; + + /* check for overflow (table is half full) */ + if (table->used >> (table->power - 1)) { + unsigned char newPower = table->power + 1; + size_t newSize = (size_t)1 << newPower; + unsigned long newMask = (unsigned long)newSize - 1; size_t tsize = newSize * sizeof(NAMED *); - NAMED **newV = table->mem->malloc_fcn(tsize); - if (!newV) - return 0; + NAMED **newV = (NAMED **)table->mem->malloc_fcn(tsize); + if (! newV) + return NULL; memset(newV, 0, tsize); for (i = 0; i < table->size; i++) if (table->v[i]) { - size_t j; - for (j = hash(table->v[i]->name) & (newSize - 1); - newV[j]; - j == 0 ? j = newSize - 1 : --j) - ; + unsigned long newHash = hash(parser, table->v[i]->name); + size_t j = newHash & newMask; + step = 0; + while (newV[j]) { + if (! step) + step = PROBE_STEP(newHash, newMask, newPower); + j < step ? (j += newSize - step) : (j -= step); + } newV[j] = table->v[i]; } table->mem->free_fcn(table->v); table->v = newV; + table->power = newPower; table->size = newSize; - table->usedLim = newSize/2; - for (i = h & (table->size - 1); - table->v[i]; - i == 0 ? i = table->size - 1 : --i) - ; + i = h & newMask; + step = 0; + while (table->v[i]) { + if (! step) + step = PROBE_STEP(h, newMask, newPower); + i < step ? (i += newSize - step) : (i -= step); + } } } - table->v[i] = table->mem->malloc_fcn(createSize); - if (!table->v[i]) - return 0; + table->v[i] = (NAMED *)table->mem->malloc_fcn(createSize); + if (! table->v[i]) + return NULL; memset(table->v[i], 0, createSize); table->v[i]->name = name; (table->used)++; return table->v[i]; } -static -void hashTableDestroy(HASH_TABLE *table) -{ +static void FASTCALL +hashTableClear(HASH_TABLE *table) { size_t i; for (i = 0; i < table->size; i++) { - NAMED *p = table->v[i]; - if (p) - table->mem->free_fcn(p); + table->mem->free_fcn(table->v[i]); + table->v[i] = NULL; } - if (table->v) - table->mem->free_fcn(table->v); + table->used = 0; +} + +static void FASTCALL +hashTableDestroy(HASH_TABLE *table) { + size_t i; + for (i = 0; i < table->size; i++) + table->mem->free_fcn(table->v[i]); + table->mem->free_fcn(table->v); } -static -void hashTableInit(HASH_TABLE *p, XML_Memory_Handling_Suite *ms) -{ +static void FASTCALL +hashTableInit(HASH_TABLE *p, const XML_Memory_Handling_Suite *ms) { + p->power = 0; p->size = 0; - p->usedLim = 0; p->used = 0; - p->v = 0; + p->v = NULL; p->mem = ms; } -static -void hashTableIterInit(HASH_TABLE_ITER *iter, const HASH_TABLE *table) -{ +static void FASTCALL +hashTableIterInit(HASH_TABLE_ITER *iter, const HASH_TABLE *table) { iter->p = table->v; - iter->end = iter->p + table->size; + iter->end = iter->p ? iter->p + table->size : NULL; } -static -NAMED *hashTableIterNext(HASH_TABLE_ITER *iter) -{ +static NAMED *FASTCALL +hashTableIterNext(HASH_TABLE_ITER *iter) { while (iter->p != iter->end) { NAMED *tem = *(iter->p)++; if (tem) return tem; } - return 0; + return NULL; } - -static -void poolInit(STRING_POOL *pool, XML_Memory_Handling_Suite *ms) -{ - pool->blocks = 0; - pool->freeBlocks = 0; - pool->start = 0; - pool->ptr = 0; - pool->end = 0; +static void FASTCALL +poolInit(STRING_POOL *pool, const XML_Memory_Handling_Suite *ms) { + pool->blocks = NULL; + pool->freeBlocks = NULL; + pool->start = NULL; + pool->ptr = NULL; + pool->end = NULL; pool->mem = ms; } -static -void poolClear(STRING_POOL *pool) -{ - if (!pool->freeBlocks) +static void FASTCALL +poolClear(STRING_POOL *pool) { + if (! pool->freeBlocks) pool->freeBlocks = pool->blocks; else { BLOCK *p = pool->blocks; @@ -4350,143 +6856,222 @@ void poolClear(STRING_POOL *pool) p = tem; } } - pool->blocks = 0; - pool->start = 0; - pool->ptr = 0; - pool->end = 0; + pool->blocks = NULL; + pool->start = NULL; + pool->ptr = NULL; + pool->end = NULL; } -static -void poolDestroy(STRING_POOL *pool) -{ +static void FASTCALL +poolDestroy(STRING_POOL *pool) { BLOCK *p = pool->blocks; while (p) { BLOCK *tem = p->next; pool->mem->free_fcn(p); p = tem; } - pool->blocks = 0; p = pool->freeBlocks; while (p) { BLOCK *tem = p->next; pool->mem->free_fcn(p); p = tem; } - pool->freeBlocks = 0; - pool->ptr = 0; - pool->start = 0; - pool->end = 0; } -static -XML_Char *poolAppend(STRING_POOL *pool, const ENCODING *enc, - const char *ptr, const char *end) -{ - if (!pool->ptr && !poolGrow(pool)) - return 0; +static XML_Char * +poolAppend(STRING_POOL *pool, const ENCODING *enc, const char *ptr, + const char *end) { + if (! pool->ptr && ! poolGrow(pool)) + return NULL; for (;;) { - XmlConvert(enc, &ptr, end, (ICHAR **)&(pool->ptr), (ICHAR *)pool->end); - if (ptr == end) + const enum XML_Convert_Result convert_res = XmlConvert( + enc, &ptr, end, (ICHAR **)&(pool->ptr), (ICHAR *)pool->end); + if ((convert_res == XML_CONVERT_COMPLETED) + || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) break; - if (!poolGrow(pool)) - return 0; + if (! poolGrow(pool)) + return NULL; } return pool->start; } -static const XML_Char *poolCopyString(STRING_POOL *pool, const XML_Char *s) -{ +static const XML_Char *FASTCALL +poolCopyString(STRING_POOL *pool, const XML_Char *s) { do { - if (!poolAppendChar(pool, *s)) - return 0; + if (! poolAppendChar(pool, *s)) + return NULL; } while (*s++); s = pool->start; poolFinish(pool); return s; } -static const XML_Char *poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n) -{ - if (!pool->ptr && !poolGrow(pool)) - return 0; +static const XML_Char * +poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n) { + if (! pool->ptr && ! poolGrow(pool)) { + /* The following line is unreachable given the current usage of + * poolCopyStringN(). Currently it is called from exactly one + * place to copy the text of a simple general entity. By that + * point, the name of the entity is already stored in the pool, so + * pool->ptr cannot be NULL. + * + * If poolCopyStringN() is used elsewhere as it well might be, + * this line may well become executable again. Regardless, this + * sort of check shouldn't be removed lightly, so we just exclude + * it from the coverage statistics. + */ + return NULL; /* LCOV_EXCL_LINE */ + } for (; n > 0; --n, s++) { - if (!poolAppendChar(pool, *s)) - return 0; - + if (! poolAppendChar(pool, *s)) + return NULL; } s = pool->start; poolFinish(pool); return s; } -static -const XML_Char *poolAppendString(STRING_POOL *pool, const XML_Char *s) -{ +static const XML_Char *FASTCALL +poolAppendString(STRING_POOL *pool, const XML_Char *s) { while (*s) { - if (!poolAppendChar(pool, *s)) - return 0; + if (! poolAppendChar(pool, *s)) + return NULL; s++; - } + } return pool->start; -} /* End poolAppendString */ +} -static -XML_Char *poolStoreString(STRING_POOL *pool, const ENCODING *enc, - const char *ptr, const char *end) -{ - if (!poolAppend(pool, enc, ptr, end)) - return 0; - if (pool->ptr == pool->end && !poolGrow(pool)) - return 0; +static XML_Char * +poolStoreString(STRING_POOL *pool, const ENCODING *enc, const char *ptr, + const char *end) { + if (! poolAppend(pool, enc, ptr, end)) + return NULL; + if (pool->ptr == pool->end && ! poolGrow(pool)) + return NULL; *(pool->ptr)++ = 0; return pool->start; } -static -int poolGrow(STRING_POOL *pool) -{ +static size_t +poolBytesToAllocateFor(int blockSize) { + /* Unprotected math would be: + ** return offsetof(BLOCK, s) + blockSize * sizeof(XML_Char); + ** + ** Detect overflow, avoiding _signed_ overflow undefined behavior + ** For a + b * c we check b * c in isolation first, so that addition of a + ** on top has no chance of making us accept a small non-negative number + */ + const size_t stretch = sizeof(XML_Char); /* can be 4 bytes */ + + if (blockSize <= 0) + return 0; + + if (blockSize > (int)(INT_MAX / stretch)) + return 0; + + { + const int stretchedBlockSize = blockSize * (int)stretch; + const int bytesToAllocate + = (int)(offsetof(BLOCK, s) + (unsigned)stretchedBlockSize); + if (bytesToAllocate < 0) + return 0; + + return (size_t)bytesToAllocate; + } +} + +static XML_Bool FASTCALL +poolGrow(STRING_POOL *pool) { if (pool->freeBlocks) { if (pool->start == 0) { pool->blocks = pool->freeBlocks; pool->freeBlocks = pool->freeBlocks->next; - pool->blocks->next = 0; + pool->blocks->next = NULL; pool->start = pool->blocks->s; pool->end = pool->start + pool->blocks->size; pool->ptr = pool->start; - return 1; + return XML_TRUE; } if (pool->end - pool->start < pool->freeBlocks->size) { BLOCK *tem = pool->freeBlocks->next; pool->freeBlocks->next = pool->blocks; pool->blocks = pool->freeBlocks; pool->freeBlocks = tem; - memcpy(pool->blocks->s, pool->start, (pool->end - pool->start) * sizeof(XML_Char)); + memcpy(pool->blocks->s, pool->start, + (pool->end - pool->start) * sizeof(XML_Char)); pool->ptr = pool->blocks->s + (pool->ptr - pool->start); pool->start = pool->blocks->s; pool->end = pool->start + pool->blocks->size; - return 1; + return XML_TRUE; } } if (pool->blocks && pool->start == pool->blocks->s) { - int blockSize = (pool->end - pool->start)*2; - pool->blocks = pool->mem->realloc_fcn(pool->blocks, offsetof(BLOCK, s) + blockSize * sizeof(XML_Char)); - if (!pool->blocks) - return 0; + BLOCK *temp; + int blockSize = (int)((unsigned)(pool->end - pool->start) * 2U); + size_t bytesToAllocate; + + /* NOTE: Needs to be calculated prior to calling `realloc` + to avoid dangling pointers: */ + const ptrdiff_t offsetInsideBlock = pool->ptr - pool->start; + + if (blockSize < 0) { + /* This condition traps a situation where either more than + * INT_MAX/2 bytes have already been allocated. This isn't + * readily testable, since it is unlikely that an average + * machine will have that much memory, so we exclude it from the + * coverage statistics. + */ + return XML_FALSE; /* LCOV_EXCL_LINE */ + } + + bytesToAllocate = poolBytesToAllocateFor(blockSize); + if (bytesToAllocate == 0) + return XML_FALSE; + + temp = (BLOCK *)pool->mem->realloc_fcn(pool->blocks, + (unsigned)bytesToAllocate); + if (temp == NULL) + return XML_FALSE; + pool->blocks = temp; pool->blocks->size = blockSize; - pool->ptr = pool->blocks->s + (pool->ptr - pool->start); + pool->ptr = pool->blocks->s + offsetInsideBlock; pool->start = pool->blocks->s; pool->end = pool->start + blockSize; - } - else { + } else { BLOCK *tem; - int blockSize = pool->end - pool->start; + int blockSize = (int)(pool->end - pool->start); + size_t bytesToAllocate; + + if (blockSize < 0) { + /* This condition traps a situation where either more than + * INT_MAX bytes have already been allocated (which is prevented + * by various pieces of program logic, not least this one, never + * mind the unlikelihood of actually having that much memory) or + * the pool control fields have been corrupted (which could + * conceivably happen in an extremely buggy user handler + * function). Either way it isn't readily testable, so we + * exclude it from the coverage statistics. + */ + return XML_FALSE; /* LCOV_EXCL_LINE */ + } + if (blockSize < INIT_BLOCK_SIZE) blockSize = INIT_BLOCK_SIZE; - else + else { + /* Detect overflow, avoiding _signed_ overflow undefined behavior */ + if ((int)((unsigned)blockSize * 2U) < 0) { + return XML_FALSE; + } blockSize *= 2; - tem = pool->mem->malloc_fcn(offsetof(BLOCK, s) + blockSize * sizeof(XML_Char)); - if (!tem) - return 0; + } + + bytesToAllocate = poolBytesToAllocateFor(blockSize); + if (bytesToAllocate == 0) + return XML_FALSE; + + tem = (BLOCK *)pool->mem->malloc_fcn(bytesToAllocate); + if (! tem) + return XML_FALSE; tem->size = blockSize; tem->next = pool->blocks; pool->blocks = tem; @@ -4496,41 +7081,46 @@ int poolGrow(STRING_POOL *pool) pool->start = tem->s; pool->end = tem->s + blockSize; } - return 1; + return XML_TRUE; } -static int -nextScaffoldPart(XML_Parser parser) -{ - CONTENT_SCAFFOLD * me; +static int FASTCALL +nextScaffoldPart(XML_Parser parser) { + DTD *const dtd = parser->m_dtd; /* save one level of indirection */ + CONTENT_SCAFFOLD *me; int next; - if (! dtd.scaffIndex) { - dtd.scaffIndex = MALLOC(groupSize * sizeof(int)); - if (! dtd.scaffIndex) + if (! dtd->scaffIndex) { + dtd->scaffIndex = (int *)MALLOC(parser, parser->m_groupSize * sizeof(int)); + if (! dtd->scaffIndex) return -1; - dtd.scaffIndex[0] = 0; + dtd->scaffIndex[0] = 0; } - if (dtd.scaffCount >= dtd.scaffSize) { - if (dtd.scaffold) { - dtd.scaffSize *= 2; - dtd.scaffold = (CONTENT_SCAFFOLD *) REALLOC(dtd.scaffold, - dtd.scaffSize * sizeof(CONTENT_SCAFFOLD)); - } - else { - dtd.scaffSize = 32; - dtd.scaffold = (CONTENT_SCAFFOLD *) MALLOC(dtd.scaffSize * sizeof(CONTENT_SCAFFOLD)); + if (dtd->scaffCount >= dtd->scaffSize) { + CONTENT_SCAFFOLD *temp; + if (dtd->scaffold) { + temp = (CONTENT_SCAFFOLD *)REALLOC( + parser, dtd->scaffold, dtd->scaffSize * 2 * sizeof(CONTENT_SCAFFOLD)); + if (temp == NULL) + return -1; + dtd->scaffSize *= 2; + } else { + temp = (CONTENT_SCAFFOLD *)MALLOC(parser, INIT_SCAFFOLD_ELEMENTS + * sizeof(CONTENT_SCAFFOLD)); + if (temp == NULL) + return -1; + dtd->scaffSize = INIT_SCAFFOLD_ELEMENTS; } - if (! dtd.scaffold) - return -1; + dtd->scaffold = temp; } - next = dtd.scaffCount++; - me = &dtd.scaffold[next]; - if (dtd.scaffLevel) { - CONTENT_SCAFFOLD *parent = &dtd.scaffold[dtd.scaffIndex[dtd.scaffLevel - 1]]; + next = dtd->scaffCount++; + me = &dtd->scaffold[next]; + if (dtd->scaffLevel) { + CONTENT_SCAFFOLD *parent + = &dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]]; if (parent->lastchild) { - dtd.scaffold[parent->lastchild].nextsib = next; + dtd->scaffold[parent->lastchild].nextsib = next; } if (! parent->childcnt) parent->firstchild = next; @@ -4539,21 +7129,18 @@ nextScaffoldPart(XML_Parser parser) } me->firstchild = me->lastchild = me->childcnt = me->nextsib = 0; return next; -} /* End nextScaffoldPart */ +} static void -build_node (XML_Parser parser, - int src_node, - XML_Content *dest, - XML_Content **contpos, - char **strpos) -{ - dest->type = dtd.scaffold[src_node].type; - dest->quant = dtd.scaffold[src_node].quant; +build_node(XML_Parser parser, int src_node, XML_Content *dest, + XML_Content **contpos, XML_Char **strpos) { + DTD *const dtd = parser->m_dtd; /* save one level of indirection */ + dest->type = dtd->scaffold[src_node].type; + dest->quant = dtd->scaffold[src_node].quant; if (dest->type == XML_CTYPE_NAME) { - const char *src; + const XML_Char *src; dest->name = *strpos; - src = dtd.scaffold[src_node].name; + src = dtd->scaffold[src_node].name; for (;;) { *(*strpos)++ = *src; if (! *src) @@ -4561,62 +7148,833 @@ build_node (XML_Parser parser, src++; } dest->numchildren = 0; - dest->children = 0; - } - else { + dest->children = NULL; + } else { unsigned int i; int cn; - dest->numchildren = dtd.scaffold[src_node].childcnt; + dest->numchildren = dtd->scaffold[src_node].childcnt; dest->children = *contpos; *contpos += dest->numchildren; - for (i = 0, cn = dtd.scaffold[src_node].firstchild; - i < dest->numchildren; - i++, cn = dtd.scaffold[cn].nextsib) { + for (i = 0, cn = dtd->scaffold[src_node].firstchild; i < dest->numchildren; + i++, cn = dtd->scaffold[cn].nextsib) { build_node(parser, cn, &(dest->children[i]), contpos, strpos); } - dest->name = 0; + dest->name = NULL; } -} /* End build_node */ +} static XML_Content * -build_model (XML_Parser parser) -{ +build_model(XML_Parser parser) { + DTD *const dtd = parser->m_dtd; /* save one level of indirection */ XML_Content *ret; XML_Content *cpos; - char * str; - int allocsize = dtd.scaffCount * sizeof(XML_Content) + dtd.contentStringLen; - - ret = MALLOC(allocsize); + XML_Char *str; + int allocsize = (dtd->scaffCount * sizeof(XML_Content) + + (dtd->contentStringLen * sizeof(XML_Char))); + + ret = (XML_Content *)MALLOC(parser, allocsize); if (! ret) - return 0; + return NULL; - str = (char *) (&ret[dtd.scaffCount]); + str = (XML_Char *)(&ret[dtd->scaffCount]); cpos = &ret[1]; build_node(parser, 0, ret, &cpos, &str); return ret; -} /* End build_model */ +} static ELEMENT_TYPE * -getElementType(XML_Parser parser, - const ENCODING *enc, - const char *ptr, - const char *end) -{ - const XML_Char *name = poolStoreString(&dtd.pool, enc, ptr, end); +getElementType(XML_Parser parser, const ENCODING *enc, const char *ptr, + const char *end) { + DTD *const dtd = parser->m_dtd; /* save one level of indirection */ + const XML_Char *name = poolStoreString(&dtd->pool, enc, ptr, end); ELEMENT_TYPE *ret; if (! name) - return 0; - ret = (ELEMENT_TYPE *) lookup(&dtd.elementTypes, name, sizeof(ELEMENT_TYPE)); + return NULL; + ret = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name, + sizeof(ELEMENT_TYPE)); if (! ret) - return 0; + return NULL; if (ret->name != name) - poolDiscard(&dtd.pool); + poolDiscard(&dtd->pool); else { - poolFinish(&dtd.pool); - if (!setElementTypePrefix(parser, ret)) - return 0; + poolFinish(&dtd->pool); + if (! setElementTypePrefix(parser, ret)) + return NULL; } return ret; -} /* End getElementType */ +} + +static XML_Char * +copyString(const XML_Char *s, const XML_Memory_Handling_Suite *memsuite) { + int charsRequired = 0; + XML_Char *result; + + /* First determine how long the string is */ + while (s[charsRequired] != 0) { + charsRequired++; + } + /* Include the terminator */ + charsRequired++; + + /* Now allocate space for the copy */ + result = memsuite->malloc_fcn(charsRequired * sizeof(XML_Char)); + if (result == NULL) + return NULL; + /* Copy the original into place */ + memcpy(result, s, charsRequired * sizeof(XML_Char)); + return result; +} + +#ifdef XML_DTD + +static float +accountingGetCurrentAmplification(XML_Parser rootParser) { + const XmlBigCount countBytesOutput + = rootParser->m_accounting.countBytesDirect + + rootParser->m_accounting.countBytesIndirect; + const float amplificationFactor + = rootParser->m_accounting.countBytesDirect + ? (countBytesOutput + / (float)(rootParser->m_accounting.countBytesDirect)) + : 1.0f; + assert(! rootParser->m_parentParser); + return amplificationFactor; +} + +static void +accountingReportStats(XML_Parser originParser, const char *epilog) { + const XML_Parser rootParser = getRootParserOf(originParser, NULL); + assert(! rootParser->m_parentParser); + + if (rootParser->m_accounting.debugLevel < 1) { + return; + } + + const float amplificationFactor + = accountingGetCurrentAmplification(rootParser); + fprintf(stderr, + "expat: Accounting(%p): Direct " EXPAT_FMT_ULL( + "10") ", indirect " EXPAT_FMT_ULL("10") ", amplification %8.2f%s", + (void *)rootParser, rootParser->m_accounting.countBytesDirect, + rootParser->m_accounting.countBytesIndirect, + (double)amplificationFactor, epilog); +} + +static void +accountingOnAbort(XML_Parser originParser) { + accountingReportStats(originParser, " ABORTING\n"); +} + +static void +accountingReportDiff(XML_Parser rootParser, + unsigned int levelsAwayFromRootParser, const char *before, + const char *after, ptrdiff_t bytesMore, int source_line, + enum XML_Account account) { + assert(! rootParser->m_parentParser); + + fprintf(stderr, + " (+" EXPAT_FMT_PTRDIFF_T("6") " bytes %s|%d, xmlparse.c:%d) %*s\"", + bytesMore, (account == XML_ACCOUNT_DIRECT) ? "DIR" : "EXP", + levelsAwayFromRootParser, source_line, 10, ""); + + const char ellipis[] = "[..]"; + const size_t ellipsisLength = sizeof(ellipis) /* because compile-time */ - 1; + const unsigned int contextLength = 10; + + /* Note: Performance is of no concern here */ + const char *walker = before; + if ((rootParser->m_accounting.debugLevel >= 3) + || (after - before) + <= (ptrdiff_t)(contextLength + ellipsisLength + contextLength)) { + for (; walker < after; walker++) { + fprintf(stderr, "%s", unsignedCharToPrintable(walker[0])); + } + } else { + for (; walker < before + contextLength; walker++) { + fprintf(stderr, "%s", unsignedCharToPrintable(walker[0])); + } + fprintf(stderr, ellipis); + walker = after - contextLength; + for (; walker < after; walker++) { + fprintf(stderr, "%s", unsignedCharToPrintable(walker[0])); + } + } + fprintf(stderr, "\"\n"); +} + +static XML_Bool +accountingDiffTolerated(XML_Parser originParser, int tok, const char *before, + const char *after, int source_line, + enum XML_Account account) { + /* Note: We need to check the token type *first* to be sure that + * we can even access variable , safely. + * E.g. for XML_TOK_NONE may hold an invalid pointer. */ + switch (tok) { + case XML_TOK_INVALID: + case XML_TOK_PARTIAL: + case XML_TOK_PARTIAL_CHAR: + case XML_TOK_NONE: + return XML_TRUE; + } + + if (account == XML_ACCOUNT_NONE) + return XML_TRUE; /* because these bytes have been accounted for, already */ + + unsigned int levelsAwayFromRootParser; + const XML_Parser rootParser + = getRootParserOf(originParser, &levelsAwayFromRootParser); + assert(! rootParser->m_parentParser); + + const int isDirect + = (account == XML_ACCOUNT_DIRECT) && (originParser == rootParser); + const ptrdiff_t bytesMore = after - before; + + XmlBigCount *const additionTarget + = isDirect ? &rootParser->m_accounting.countBytesDirect + : &rootParser->m_accounting.countBytesIndirect; + + /* Detect and avoid integer overflow */ + if (*additionTarget > (XmlBigCount)(-1) - (XmlBigCount)bytesMore) + return XML_FALSE; + *additionTarget += bytesMore; + + const XmlBigCount countBytesOutput + = rootParser->m_accounting.countBytesDirect + + rootParser->m_accounting.countBytesIndirect; + const float amplificationFactor + = accountingGetCurrentAmplification(rootParser); + const XML_Bool tolerated + = (countBytesOutput < rootParser->m_accounting.activationThresholdBytes) + || (amplificationFactor + <= rootParser->m_accounting.maximumAmplificationFactor); + + if (rootParser->m_accounting.debugLevel >= 2) { + accountingReportStats(rootParser, ""); + accountingReportDiff(rootParser, levelsAwayFromRootParser, before, after, + bytesMore, source_line, account); + } + + return tolerated; +} + +unsigned long long +testingAccountingGetCountBytesDirect(XML_Parser parser) { + if (! parser) + return 0; + return parser->m_accounting.countBytesDirect; +} + +unsigned long long +testingAccountingGetCountBytesIndirect(XML_Parser parser) { + if (! parser) + return 0; + return parser->m_accounting.countBytesIndirect; +} + +static void +entityTrackingReportStats(XML_Parser rootParser, ENTITY *entity, + const char *action, int sourceLine) { + assert(! rootParser->m_parentParser); + if (rootParser->m_entity_stats.debugLevel < 1) + return; + +# if defined(XML_UNICODE) + const char *const entityName = "[..]"; +# else + const char *const entityName = entity->name; +# endif + + fprintf( + stderr, + "expat: Entities(%p): Count %9d, depth %2d/%2d %*s%s%s; %s length %d (xmlparse.c:%d)\n", + (void *)rootParser, rootParser->m_entity_stats.countEverOpened, + rootParser->m_entity_stats.currentDepth, + rootParser->m_entity_stats.maximumDepthSeen, + (rootParser->m_entity_stats.currentDepth - 1) * 2, "", + entity->is_param ? "%" : "&", entityName, action, entity->textLen, + sourceLine); +} + +static void +entityTrackingOnOpen(XML_Parser originParser, ENTITY *entity, int sourceLine) { + const XML_Parser rootParser = getRootParserOf(originParser, NULL); + assert(! rootParser->m_parentParser); + + rootParser->m_entity_stats.countEverOpened++; + rootParser->m_entity_stats.currentDepth++; + if (rootParser->m_entity_stats.currentDepth + > rootParser->m_entity_stats.maximumDepthSeen) { + rootParser->m_entity_stats.maximumDepthSeen++; + } + + entityTrackingReportStats(rootParser, entity, "OPEN ", sourceLine); +} + +static void +entityTrackingOnClose(XML_Parser originParser, ENTITY *entity, int sourceLine) { + const XML_Parser rootParser = getRootParserOf(originParser, NULL); + assert(! rootParser->m_parentParser); + + entityTrackingReportStats(rootParser, entity, "CLOSE", sourceLine); + rootParser->m_entity_stats.currentDepth--; +} + +static XML_Parser +getRootParserOf(XML_Parser parser, unsigned int *outLevelDiff) { + XML_Parser rootParser = parser; + unsigned int stepsTakenUpwards = 0; + while (rootParser->m_parentParser) { + rootParser = rootParser->m_parentParser; + stepsTakenUpwards++; + } + assert(! rootParser->m_parentParser); + if (outLevelDiff != NULL) { + *outLevelDiff = stepsTakenUpwards; + } + return rootParser; +} + +const char * +unsignedCharToPrintable(unsigned char c) { + switch (c) { + case 0: + return "\\0"; + case 1: + return "\\x1"; + case 2: + return "\\x2"; + case 3: + return "\\x3"; + case 4: + return "\\x4"; + case 5: + return "\\x5"; + case 6: + return "\\x6"; + case 7: + return "\\x7"; + case 8: + return "\\x8"; + case 9: + return "\\t"; + case 10: + return "\\n"; + case 11: + return "\\xB"; + case 12: + return "\\xC"; + case 13: + return "\\r"; + case 14: + return "\\xE"; + case 15: + return "\\xF"; + case 16: + return "\\x10"; + case 17: + return "\\x11"; + case 18: + return "\\x12"; + case 19: + return "\\x13"; + case 20: + return "\\x14"; + case 21: + return "\\x15"; + case 22: + return "\\x16"; + case 23: + return "\\x17"; + case 24: + return "\\x18"; + case 25: + return "\\x19"; + case 26: + return "\\x1A"; + case 27: + return "\\x1B"; + case 28: + return "\\x1C"; + case 29: + return "\\x1D"; + case 30: + return "\\x1E"; + case 31: + return "\\x1F"; + case 32: + return " "; + case 33: + return "!"; + case 34: + return "\\\""; + case 35: + return "#"; + case 36: + return "$"; + case 37: + return "%"; + case 38: + return "&"; + case 39: + return "'"; + case 40: + return "("; + case 41: + return ")"; + case 42: + return "*"; + case 43: + return "+"; + case 44: + return ","; + case 45: + return "-"; + case 46: + return "."; + case 47: + return "/"; + case 48: + return "0"; + case 49: + return "1"; + case 50: + return "2"; + case 51: + return "3"; + case 52: + return "4"; + case 53: + return "5"; + case 54: + return "6"; + case 55: + return "7"; + case 56: + return "8"; + case 57: + return "9"; + case 58: + return ":"; + case 59: + return ";"; + case 60: + return "<"; + case 61: + return "="; + case 62: + return ">"; + case 63: + return "?"; + case 64: + return "@"; + case 65: + return "A"; + case 66: + return "B"; + case 67: + return "C"; + case 68: + return "D"; + case 69: + return "E"; + case 70: + return "F"; + case 71: + return "G"; + case 72: + return "H"; + case 73: + return "I"; + case 74: + return "J"; + case 75: + return "K"; + case 76: + return "L"; + case 77: + return "M"; + case 78: + return "N"; + case 79: + return "O"; + case 80: + return "P"; + case 81: + return "Q"; + case 82: + return "R"; + case 83: + return "S"; + case 84: + return "T"; + case 85: + return "U"; + case 86: + return "V"; + case 87: + return "W"; + case 88: + return "X"; + case 89: + return "Y"; + case 90: + return "Z"; + case 91: + return "["; + case 92: + return "\\\\"; + case 93: + return "]"; + case 94: + return "^"; + case 95: + return "_"; + case 96: + return "`"; + case 97: + return "a"; + case 98: + return "b"; + case 99: + return "c"; + case 100: + return "d"; + case 101: + return "e"; + case 102: + return "f"; + case 103: + return "g"; + case 104: + return "h"; + case 105: + return "i"; + case 106: + return "j"; + case 107: + return "k"; + case 108: + return "l"; + case 109: + return "m"; + case 110: + return "n"; + case 111: + return "o"; + case 112: + return "p"; + case 113: + return "q"; + case 114: + return "r"; + case 115: + return "s"; + case 116: + return "t"; + case 117: + return "u"; + case 118: + return "v"; + case 119: + return "w"; + case 120: + return "x"; + case 121: + return "y"; + case 122: + return "z"; + case 123: + return "{"; + case 124: + return "|"; + case 125: + return "}"; + case 126: + return "~"; + case 127: + return "\\x7F"; + case 128: + return "\\x80"; + case 129: + return "\\x81"; + case 130: + return "\\x82"; + case 131: + return "\\x83"; + case 132: + return "\\x84"; + case 133: + return "\\x85"; + case 134: + return "\\x86"; + case 135: + return "\\x87"; + case 136: + return "\\x88"; + case 137: + return "\\x89"; + case 138: + return "\\x8A"; + case 139: + return "\\x8B"; + case 140: + return "\\x8C"; + case 141: + return "\\x8D"; + case 142: + return "\\x8E"; + case 143: + return "\\x8F"; + case 144: + return "\\x90"; + case 145: + return "\\x91"; + case 146: + return "\\x92"; + case 147: + return "\\x93"; + case 148: + return "\\x94"; + case 149: + return "\\x95"; + case 150: + return "\\x96"; + case 151: + return "\\x97"; + case 152: + return "\\x98"; + case 153: + return "\\x99"; + case 154: + return "\\x9A"; + case 155: + return "\\x9B"; + case 156: + return "\\x9C"; + case 157: + return "\\x9D"; + case 158: + return "\\x9E"; + case 159: + return "\\x9F"; + case 160: + return "\\xA0"; + case 161: + return "\\xA1"; + case 162: + return "\\xA2"; + case 163: + return "\\xA3"; + case 164: + return "\\xA4"; + case 165: + return "\\xA5"; + case 166: + return "\\xA6"; + case 167: + return "\\xA7"; + case 168: + return "\\xA8"; + case 169: + return "\\xA9"; + case 170: + return "\\xAA"; + case 171: + return "\\xAB"; + case 172: + return "\\xAC"; + case 173: + return "\\xAD"; + case 174: + return "\\xAE"; + case 175: + return "\\xAF"; + case 176: + return "\\xB0"; + case 177: + return "\\xB1"; + case 178: + return "\\xB2"; + case 179: + return "\\xB3"; + case 180: + return "\\xB4"; + case 181: + return "\\xB5"; + case 182: + return "\\xB6"; + case 183: + return "\\xB7"; + case 184: + return "\\xB8"; + case 185: + return "\\xB9"; + case 186: + return "\\xBA"; + case 187: + return "\\xBB"; + case 188: + return "\\xBC"; + case 189: + return "\\xBD"; + case 190: + return "\\xBE"; + case 191: + return "\\xBF"; + case 192: + return "\\xC0"; + case 193: + return "\\xC1"; + case 194: + return "\\xC2"; + case 195: + return "\\xC3"; + case 196: + return "\\xC4"; + case 197: + return "\\xC5"; + case 198: + return "\\xC6"; + case 199: + return "\\xC7"; + case 200: + return "\\xC8"; + case 201: + return "\\xC9"; + case 202: + return "\\xCA"; + case 203: + return "\\xCB"; + case 204: + return "\\xCC"; + case 205: + return "\\xCD"; + case 206: + return "\\xCE"; + case 207: + return "\\xCF"; + case 208: + return "\\xD0"; + case 209: + return "\\xD1"; + case 210: + return "\\xD2"; + case 211: + return "\\xD3"; + case 212: + return "\\xD4"; + case 213: + return "\\xD5"; + case 214: + return "\\xD6"; + case 215: + return "\\xD7"; + case 216: + return "\\xD8"; + case 217: + return "\\xD9"; + case 218: + return "\\xDA"; + case 219: + return "\\xDB"; + case 220: + return "\\xDC"; + case 221: + return "\\xDD"; + case 222: + return "\\xDE"; + case 223: + return "\\xDF"; + case 224: + return "\\xE0"; + case 225: + return "\\xE1"; + case 226: + return "\\xE2"; + case 227: + return "\\xE3"; + case 228: + return "\\xE4"; + case 229: + return "\\xE5"; + case 230: + return "\\xE6"; + case 231: + return "\\xE7"; + case 232: + return "\\xE8"; + case 233: + return "\\xE9"; + case 234: + return "\\xEA"; + case 235: + return "\\xEB"; + case 236: + return "\\xEC"; + case 237: + return "\\xED"; + case 238: + return "\\xEE"; + case 239: + return "\\xEF"; + case 240: + return "\\xF0"; + case 241: + return "\\xF1"; + case 242: + return "\\xF2"; + case 243: + return "\\xF3"; + case 244: + return "\\xF4"; + case 245: + return "\\xF5"; + case 246: + return "\\xF6"; + case 247: + return "\\xF7"; + case 248: + return "\\xF8"; + case 249: + return "\\xF9"; + case 250: + return "\\xFA"; + case 251: + return "\\xFB"; + case 252: + return "\\xFC"; + case 253: + return "\\xFD"; + case 254: + return "\\xFE"; + case 255: + return "\\xFF"; + default: + assert(0); /* never gets here */ + return "dead code"; + } + assert(0); /* never gets here */ +} + +#endif /* XML_DTD */ + +static unsigned long +getDebugLevel(const char *variableName, unsigned long defaultDebugLevel) { + const char *const valueOrNull = getenv(variableName); + if (valueOrNull == NULL) { + return defaultDebugLevel; + } + const char *const value = valueOrNull; + + errno = 0; + char *afterValue = (char *)value; + unsigned long debugLevel = strtoul(value, &afterValue, 10); + if ((errno != 0) || (afterValue[0] != '\0')) { + errno = 0; + return defaultDebugLevel; + } + + return debugLevel; +} diff --git a/Modules/ThirdParty/Expat/src/expat/xmlrole.c b/Modules/ThirdParty/Expat/src/expat/xmlrole.c index aabc8bcf4ae..08173b0fd54 100644 --- a/Modules/ThirdParty/Expat/src/expat/xmlrole.c +++ b/Modules/ThirdParty/Expat/src/expat/xmlrole.c @@ -1,17 +1,52 @@ /* -Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd -See the file COPYING for copying permission. + __ __ _ + ___\ \/ /_ __ __ _| |_ + / _ \\ /| '_ \ / _` | __| + | __// \| |_) | (_| | |_ + \___/_/\_\ .__/ \__,_|\__| + |_| XML parser + + Copyright (c) 1997-2000 Thai Open Source Software Center Ltd + Copyright (c) 2000 Clark Cooper + Copyright (c) 2002 Greg Stein + Copyright (c) 2002-2006 Karl Waclawek + Copyright (c) 2002-2003 Fred L. Drake, Jr. + Copyright (c) 2005-2009 Steven Solie + Copyright (c) 2016-2021 Sebastian Pipping + Copyright (c) 2017 Rhodri James + Copyright (c) 2019 David Loffredo + Licensed under the MIT license: + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to permit + persons to whom the Software is furnished to do so, subject to the + following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN + NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + USE OR OTHER DEALINGS IN THE SOFTWARE. */ -static char RCSId[] - = "Header"; -const char* GetVTKEXpatXMLRole_RCSId(void) -{ - return RCSId; -} +#include + +#ifdef _WIN32 +# include "winconfig.h" +#endif -#include "expatConfig.h" +#include +#include "expat_external.h" +#include "internal.h" #include "xmlrole.h" #include "ascii.h" @@ -22,77 +57,88 @@ const char* GetVTKEXpatXMLRole_RCSId(void) */ -static const char KW_ANY[] = { ASCII_A, ASCII_N, ASCII_Y, '\0' }; -static const char KW_ATTLIST[] = { ASCII_A, ASCII_T, ASCII_T, ASCII_L, ASCII_I, ASCII_S, ASCII_T, '\0' }; -static const char KW_CDATA[] = { ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' }; -static const char KW_DOCTYPE[] = { ASCII_D, ASCII_O, ASCII_C, ASCII_T, ASCII_Y, ASCII_P, ASCII_E, '\0' }; -static const char KW_ELEMENT[] = { ASCII_E, ASCII_L, ASCII_E, ASCII_M, ASCII_E, ASCII_N, ASCII_T, '\0' }; -static const char KW_EMPTY[] = { ASCII_E, ASCII_M, ASCII_P, ASCII_T, ASCII_Y, '\0' }; -static const char KW_ENTITIES[] = { ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_I, ASCII_E, ASCII_S, '\0' }; -static const char KW_ENTITY[] = { ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0' }; -static const char KW_FIXED[] = { ASCII_F, ASCII_I, ASCII_X, ASCII_E, ASCII_D, '\0' }; -static const char KW_ID[] = { ASCII_I, ASCII_D, '\0' }; -static const char KW_IDREF[] = { ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0' }; -static const char KW_IDREFS[] = { ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0' }; -static const char KW_IGNORE[] = { ASCII_I, ASCII_G, ASCII_N, ASCII_O, ASCII_R, ASCII_E, '\0' }; -static const char KW_IMPLIED[] = { ASCII_I, ASCII_M, ASCII_P, ASCII_L, ASCII_I, ASCII_E, ASCII_D, '\0' }; -static const char KW_INCLUDE[] = { ASCII_I, ASCII_N, ASCII_C, ASCII_L, ASCII_U, ASCII_D, ASCII_E, '\0' }; -static const char KW_NDATA[] = { ASCII_N, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' }; -static const char KW_NMTOKEN[] = { ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0' }; -static const char KW_NMTOKENS[] = { ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, ASCII_S, '\0' }; -static const char KW_NOTATION[] = { ASCII_N, ASCII_O, ASCII_T, ASCII_A, ASCII_T, ASCII_I, ASCII_O, ASCII_N, '\0' }; -static const char KW_PCDATA[] = { ASCII_P, ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' }; -static const char KW_PUBLIC[] = { ASCII_P, ASCII_U, ASCII_B, ASCII_L, ASCII_I, ASCII_C, '\0' }; -static const char KW_REQUIRED[] = { ASCII_R, ASCII_E, ASCII_Q, ASCII_U, ASCII_I, ASCII_R, ASCII_E, ASCII_D, '\0' }; -static const char KW_SYSTEM[] = { ASCII_S, ASCII_Y, ASCII_S, ASCII_T, ASCII_E, ASCII_M, '\0' }; +static const char KW_ANY[] = {ASCII_A, ASCII_N, ASCII_Y, '\0'}; +static const char KW_ATTLIST[] + = {ASCII_A, ASCII_T, ASCII_T, ASCII_L, ASCII_I, ASCII_S, ASCII_T, '\0'}; +static const char KW_CDATA[] + = {ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0'}; +static const char KW_DOCTYPE[] + = {ASCII_D, ASCII_O, ASCII_C, ASCII_T, ASCII_Y, ASCII_P, ASCII_E, '\0'}; +static const char KW_ELEMENT[] + = {ASCII_E, ASCII_L, ASCII_E, ASCII_M, ASCII_E, ASCII_N, ASCII_T, '\0'}; +static const char KW_EMPTY[] + = {ASCII_E, ASCII_M, ASCII_P, ASCII_T, ASCII_Y, '\0'}; +static const char KW_ENTITIES[] = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, + ASCII_I, ASCII_E, ASCII_S, '\0'}; +static const char KW_ENTITY[] + = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0'}; +static const char KW_FIXED[] + = {ASCII_F, ASCII_I, ASCII_X, ASCII_E, ASCII_D, '\0'}; +static const char KW_ID[] = {ASCII_I, ASCII_D, '\0'}; +static const char KW_IDREF[] + = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0'}; +static const char KW_IDREFS[] + = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0'}; +#ifdef XML_DTD +static const char KW_IGNORE[] + = {ASCII_I, ASCII_G, ASCII_N, ASCII_O, ASCII_R, ASCII_E, '\0'}; +#endif +static const char KW_IMPLIED[] + = {ASCII_I, ASCII_M, ASCII_P, ASCII_L, ASCII_I, ASCII_E, ASCII_D, '\0'}; +#ifdef XML_DTD +static const char KW_INCLUDE[] + = {ASCII_I, ASCII_N, ASCII_C, ASCII_L, ASCII_U, ASCII_D, ASCII_E, '\0'}; +#endif +static const char KW_NDATA[] + = {ASCII_N, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0'}; +static const char KW_NMTOKEN[] + = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0'}; +static const char KW_NMTOKENS[] = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, + ASCII_E, ASCII_N, ASCII_S, '\0'}; +static const char KW_NOTATION[] = {ASCII_N, ASCII_O, ASCII_T, ASCII_A, ASCII_T, + ASCII_I, ASCII_O, ASCII_N, '\0'}; +static const char KW_PCDATA[] + = {ASCII_P, ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0'}; +static const char KW_PUBLIC[] + = {ASCII_P, ASCII_U, ASCII_B, ASCII_L, ASCII_I, ASCII_C, '\0'}; +static const char KW_REQUIRED[] = {ASCII_R, ASCII_E, ASCII_Q, ASCII_U, ASCII_I, + ASCII_R, ASCII_E, ASCII_D, '\0'}; +static const char KW_SYSTEM[] + = {ASCII_S, ASCII_Y, ASCII_S, ASCII_T, ASCII_E, ASCII_M, '\0'}; #ifndef MIN_BYTES_PER_CHAR -#define MIN_BYTES_PER_CHAR(enc) ((enc)->minBytesPerChar) +# define MIN_BYTES_PER_CHAR(enc) ((enc)->minBytesPerChar) #endif #ifdef XML_DTD -#define setTopLevel(state) \ - ((state)->handler = ((state)->documentEntity \ - ? internalSubset \ - : externalSubset1)) +# define setTopLevel(state) \ + ((state)->handler \ + = ((state)->documentEntity ? internalSubset : externalSubset1)) #else /* not XML_DTD */ -#define setTopLevel(state) ((state)->handler = internalSubset) +# define setTopLevel(state) ((state)->handler = internalSubset) #endif /* not XML_DTD */ -typedef int PROLOG_HANDLER(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc); - -static PROLOG_HANDLER - prolog0, prolog1, prolog2, - doctype0, doctype1, doctype2, doctype3, doctype4, doctype5, - internalSubset, - entity0, entity1, entity2, entity3, entity4, entity5, entity6, - entity7, entity8, entity9, - notation0, notation1, notation2, notation3, notation4, - attlist0, attlist1, attlist2, attlist3, attlist4, attlist5, attlist6, - attlist7, attlist8, attlist9, - element0, element1, element2, element3, element4, element5, element6, - element7, +typedef int PTRCALL PROLOG_HANDLER(PROLOG_STATE *state, int tok, + const char *ptr, const char *end, + const ENCODING *enc); + +static PROLOG_HANDLER prolog0, prolog1, prolog2, doctype0, doctype1, doctype2, + doctype3, doctype4, doctype5, internalSubset, entity0, entity1, entity2, + entity3, entity4, entity5, entity6, entity7, entity8, entity9, entity10, + notation0, notation1, notation2, notation3, notation4, attlist0, attlist1, + attlist2, attlist3, attlist4, attlist5, attlist6, attlist7, attlist8, + attlist9, element0, element1, element2, element3, element4, element5, + element6, element7, #ifdef XML_DTD - externalSubset0, externalSubset1, - condSect0, condSect1, condSect2, + externalSubset0, externalSubset1, condSect0, condSect1, condSect2, #endif /* XML_DTD */ - declClose, - error; - -static -int common(PROLOG_STATE *state, int tok); - -static -int prolog0(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ + declClose, error; + +static int FASTCALL common(PROLOG_STATE *state, int tok); + +static int PTRCALL +prolog0(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: state->handler = prolog1; @@ -102,19 +148,18 @@ int prolog0(PROLOG_STATE *state, return XML_ROLE_XML_DECL; case XML_TOK_PI: state->handler = prolog1; - return XML_ROLE_NONE; + return XML_ROLE_PI; case XML_TOK_COMMENT: state->handler = prolog1; + return XML_ROLE_COMMENT; case XML_TOK_BOM: return XML_ROLE_NONE; case XML_TOK_DECL_OPEN: - if (!XmlNameMatchesAscii(enc, - ptr + 2 * MIN_BYTES_PER_CHAR(enc), - end, - KW_DOCTYPE)) + if (! XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end, + KW_DOCTYPE)) break; state->handler = doctype0; - return XML_ROLE_NONE; + return XML_ROLE_DOCTYPE_NONE; case XML_TOK_INSTANCE_START: state->handler = error; return XML_ROLE_INSTANCE_START; @@ -122,28 +167,31 @@ int prolog0(PROLOG_STATE *state, return common(state, tok); } -static -int prolog1(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +static int PTRCALL +prolog1(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_NONE; case XML_TOK_PI: + return XML_ROLE_PI; case XML_TOK_COMMENT: + return XML_ROLE_COMMENT; case XML_TOK_BOM: - return XML_ROLE_NONE; + /* This case can never arise. To reach this role function, the + * parse must have passed through prolog0 and therefore have had + * some form of input, even if only a space. At that point, a + * byte order mark is no longer a valid character (though + * technically it should be interpreted as a non-breaking space), + * so will be rejected by the tokenizing stages. + */ + return XML_ROLE_NONE; /* LCOV_EXCL_LINE */ case XML_TOK_DECL_OPEN: - if (!XmlNameMatchesAscii(enc, - ptr + 2 * MIN_BYTES_PER_CHAR(enc), - end, - KW_DOCTYPE)) + if (! XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end, + KW_DOCTYPE)) break; state->handler = doctype0; - return XML_ROLE_NONE; + return XML_ROLE_DOCTYPE_NONE; case XML_TOK_INSTANCE_START: state->handler = error; return XML_ROLE_INSTANCE_START; @@ -151,22 +199,19 @@ int prolog1(PROLOG_STATE *state, return common(state, tok); } -static -int prolog2(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ - itkExpatUnused(ptr); - itkExpatUnused(end); - itkExpatUnused(enc); +static int PTRCALL +prolog2(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_NONE; case XML_TOK_PI: + return XML_ROLE_PI; case XML_TOK_COMMENT: - return XML_ROLE_NONE; + return XML_ROLE_COMMENT; case XML_TOK_INSTANCE_START: state->handler = error; return XML_ROLE_INSTANCE_START; @@ -174,19 +219,15 @@ int prolog2(PROLOG_STATE *state, return common(state, tok); } -static -int doctype0(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ - itkExpatUnused(ptr); - itkExpatUnused(end); - itkExpatUnused(enc); +static int PTRCALL +doctype0(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_DOCTYPE_NONE; case XML_TOK_NAME: case XML_TOK_PREFIXED_NAME: state->handler = doctype1; @@ -195,16 +236,12 @@ int doctype0(PROLOG_STATE *state, return common(state, tok); } -static -int doctype1(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +static int PTRCALL +doctype1(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_DOCTYPE_NONE; case XML_TOK_OPEN_BRACKET: state->handler = internalSubset; return XML_ROLE_DOCTYPE_INTERNAL_SUBSET; @@ -214,30 +251,26 @@ int doctype1(PROLOG_STATE *state, case XML_TOK_NAME: if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) { state->handler = doctype3; - return XML_ROLE_NONE; + return XML_ROLE_DOCTYPE_NONE; } if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) { state->handler = doctype2; - return XML_ROLE_NONE; + return XML_ROLE_DOCTYPE_NONE; } break; } return common(state, tok); } -static -int doctype2(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ - itkExpatUnused(ptr); - itkExpatUnused(end); - itkExpatUnused(enc); +static int PTRCALL +doctype2(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_DOCTYPE_NONE; case XML_TOK_LITERAL: state->handler = doctype3; return XML_ROLE_DOCTYPE_PUBLIC_ID; @@ -245,19 +278,15 @@ int doctype2(PROLOG_STATE *state, return common(state, tok); } -static -int doctype3(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ - itkExpatUnused(ptr); - itkExpatUnused(end); - itkExpatUnused(enc); +static int PTRCALL +doctype3(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_DOCTYPE_NONE; case XML_TOK_LITERAL: state->handler = doctype4; return XML_ROLE_DOCTYPE_SYSTEM_ID; @@ -265,19 +294,15 @@ int doctype3(PROLOG_STATE *state, return common(state, tok); } -static -int doctype4(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ - itkExpatUnused(ptr); - itkExpatUnused(end); - itkExpatUnused(enc); +static int PTRCALL +doctype4(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_DOCTYPE_NONE; case XML_TOK_OPEN_BRACKET: state->handler = internalSubset; return XML_ROLE_DOCTYPE_INTERNAL_SUBSET; @@ -288,19 +313,15 @@ int doctype4(PROLOG_STATE *state, return common(state, tok); } -static -int doctype5(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ - itkExpatUnused(ptr); - itkExpatUnused(end); - itkExpatUnused(enc); +static int PTRCALL +doctype5(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_DOCTYPE_NONE; case XML_TOK_DECL_CLOSE: state->handler = prolog2; return XML_ROLE_DOCTYPE_CLOSE; @@ -308,53 +329,44 @@ int doctype5(PROLOG_STATE *state, return common(state, tok); } -static -int internalSubset(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +static int PTRCALL +internalSubset(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_NONE; case XML_TOK_DECL_OPEN: - if (XmlNameMatchesAscii(enc, - ptr + 2 * MIN_BYTES_PER_CHAR(enc), - end, + if (XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end, KW_ENTITY)) { state->handler = entity0; - return XML_ROLE_NONE; + return XML_ROLE_ENTITY_NONE; } - if (XmlNameMatchesAscii(enc, - ptr + 2 * MIN_BYTES_PER_CHAR(enc), - end, + if (XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end, KW_ATTLIST)) { state->handler = attlist0; - return XML_ROLE_NONE; + return XML_ROLE_ATTLIST_NONE; } - if (XmlNameMatchesAscii(enc, - ptr + 2 * MIN_BYTES_PER_CHAR(enc), - end, + if (XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end, KW_ELEMENT)) { state->handler = element0; - return XML_ROLE_NONE; + return XML_ROLE_ELEMENT_NONE; } - if (XmlNameMatchesAscii(enc, - ptr + 2 * MIN_BYTES_PER_CHAR(enc), - end, + if (XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end, KW_NOTATION)) { state->handler = notation0; - return XML_ROLE_NONE; + return XML_ROLE_NOTATION_NONE; } break; case XML_TOK_PI: + return XML_ROLE_PI; case XML_TOK_COMMENT: - return XML_ROLE_NONE; + return XML_ROLE_COMMENT; case XML_TOK_PARAM_ENTITY_REF: return XML_ROLE_PARAM_ENTITY_REF; case XML_TOK_CLOSE_BRACKET: state->handler = doctype5; + return XML_ROLE_DOCTYPE_NONE; + case XML_TOK_NONE: return XML_ROLE_NONE; } return common(state, tok); @@ -362,26 +374,18 @@ int internalSubset(PROLOG_STATE *state, #ifdef XML_DTD -static -int externalSubset0(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +static int PTRCALL +externalSubset0(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { state->handler = externalSubset1; if (tok == XML_TOK_XML_DECL) return XML_ROLE_TEXT_DECL; return externalSubset1(state, tok, ptr, end, enc); } -static -int externalSubset1(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +static int PTRCALL +externalSubset1(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_COND_SECT_OPEN: state->handler = condSect0; @@ -407,22 +411,18 @@ int externalSubset1(PROLOG_STATE *state, #endif /* XML_DTD */ -static -int entity0(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ - itkExpatUnused(ptr); - itkExpatUnused(end); - itkExpatUnused(enc); +static int PTRCALL +entity0(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ENTITY_NONE; case XML_TOK_PERCENT: state->handler = entity1; - return XML_ROLE_NONE; + return XML_ROLE_ENTITY_NONE; case XML_TOK_NAME: state->handler = entity2; return XML_ROLE_GENERAL_ENTITY_NAME; @@ -430,19 +430,15 @@ int entity0(PROLOG_STATE *state, return common(state, tok); } -static -int entity1(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ - itkExpatUnused(ptr); - itkExpatUnused(end); - itkExpatUnused(enc); +static int PTRCALL +entity1(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ENTITY_NONE; case XML_TOK_NAME: state->handler = entity7; return XML_ROLE_PARAM_ENTITY_NAME; @@ -450,46 +446,39 @@ int entity1(PROLOG_STATE *state, return common(state, tok); } -static -int entity2(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +static int PTRCALL +entity2(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ENTITY_NONE; case XML_TOK_NAME: if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) { state->handler = entity4; - return XML_ROLE_NONE; + return XML_ROLE_ENTITY_NONE; } if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) { state->handler = entity3; - return XML_ROLE_NONE; + return XML_ROLE_ENTITY_NONE; } break; case XML_TOK_LITERAL: state->handler = declClose; + state->role_none = XML_ROLE_ENTITY_NONE; return XML_ROLE_ENTITY_VALUE; } return common(state, tok); } -static -int entity3(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ - itkExpatUnused(ptr); - itkExpatUnused(end); - itkExpatUnused(enc); +static int PTRCALL +entity3(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ENTITY_NONE; case XML_TOK_LITERAL: state->handler = entity4; return XML_ROLE_ENTITY_PUBLIC_ID; @@ -497,20 +486,15 @@ int entity3(PROLOG_STATE *state, return common(state, tok); } - -static -int entity4(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ - itkExpatUnused(ptr); - itkExpatUnused(end); - itkExpatUnused(enc); +static int PTRCALL +entity4(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ENTITY_NONE; case XML_TOK_LITERAL: state->handler = entity5; return XML_ROLE_ENTITY_SYSTEM_ID; @@ -518,89 +502,75 @@ int entity4(PROLOG_STATE *state, return common(state, tok); } -static -int entity5(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +static int PTRCALL +entity5(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ENTITY_NONE; case XML_TOK_DECL_CLOSE: setTopLevel(state); return XML_ROLE_ENTITY_COMPLETE; case XML_TOK_NAME: if (XmlNameMatchesAscii(enc, ptr, end, KW_NDATA)) { state->handler = entity6; - return XML_ROLE_NONE; + return XML_ROLE_ENTITY_NONE; } break; } return common(state, tok); } -static -int entity6(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ - itkExpatUnused(ptr); - itkExpatUnused(end); - itkExpatUnused(enc); +static int PTRCALL +entity6(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ENTITY_NONE; case XML_TOK_NAME: state->handler = declClose; + state->role_none = XML_ROLE_ENTITY_NONE; return XML_ROLE_ENTITY_NOTATION_NAME; } return common(state, tok); } -static -int entity7(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +static int PTRCALL +entity7(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ENTITY_NONE; case XML_TOK_NAME: if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) { state->handler = entity9; - return XML_ROLE_NONE; + return XML_ROLE_ENTITY_NONE; } if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) { state->handler = entity8; - return XML_ROLE_NONE; + return XML_ROLE_ENTITY_NONE; } break; case XML_TOK_LITERAL: state->handler = declClose; + state->role_none = XML_ROLE_ENTITY_NONE; return XML_ROLE_ENTITY_VALUE; } return common(state, tok); } -static -int entity8(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ - itkExpatUnused(ptr); - itkExpatUnused(end); - itkExpatUnused(enc); +static int PTRCALL +entity8(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ENTITY_NONE; case XML_TOK_LITERAL: state->handler = entity9; return XML_ROLE_ENTITY_PUBLIC_ID; @@ -608,39 +578,47 @@ int entity8(PROLOG_STATE *state, return common(state, tok); } -static -int entity9(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ - itkExpatUnused(ptr); - itkExpatUnused(end); - itkExpatUnused(enc); +static int PTRCALL +entity9(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ENTITY_NONE; case XML_TOK_LITERAL: - state->handler = declClose; + state->handler = entity10; return XML_ROLE_ENTITY_SYSTEM_ID; } return common(state, tok); } -static -int notation0(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ - itkExpatUnused(ptr); - itkExpatUnused(end); - itkExpatUnused(enc); +static int PTRCALL +entity10(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ENTITY_NONE; + case XML_TOK_DECL_CLOSE: + setTopLevel(state); + return XML_ROLE_ENTITY_COMPLETE; + } + return common(state, tok); +} + +static int PTRCALL +notation0(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); + switch (tok) { + case XML_TOK_PROLOG_S: + return XML_ROLE_NOTATION_NONE; case XML_TOK_NAME: state->handler = notation1; return XML_ROLE_NOTATION_NAME; @@ -648,43 +626,35 @@ int notation0(PROLOG_STATE *state, return common(state, tok); } -static -int notation1(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ +static int PTRCALL +notation1(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_NOTATION_NONE; case XML_TOK_NAME: if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) { state->handler = notation3; - return XML_ROLE_NONE; + return XML_ROLE_NOTATION_NONE; } if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) { state->handler = notation2; - return XML_ROLE_NONE; + return XML_ROLE_NOTATION_NONE; } break; } return common(state, tok); } -static -int notation2(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ - itkExpatUnused(ptr); - itkExpatUnused(end); - itkExpatUnused(enc); +static int PTRCALL +notation2(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_NOTATION_NONE; case XML_TOK_LITERAL: state->handler = notation4; return XML_ROLE_NOTATION_PUBLIC_ID; @@ -692,41 +662,35 @@ int notation2(PROLOG_STATE *state, return common(state, tok); } -static -int notation3(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ - itkExpatUnused(ptr); - itkExpatUnused(end); - itkExpatUnused(enc); +static int PTRCALL +notation3(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_NOTATION_NONE; case XML_TOK_LITERAL: state->handler = declClose; + state->role_none = XML_ROLE_NOTATION_NONE; return XML_ROLE_NOTATION_SYSTEM_ID; } return common(state, tok); } -static -int notation4(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ - itkExpatUnused(ptr); - itkExpatUnused(end); - itkExpatUnused(enc); +static int PTRCALL +notation4(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_NOTATION_NONE; case XML_TOK_LITERAL: state->handler = declClose; + state->role_none = XML_ROLE_NOTATION_NONE; return XML_ROLE_NOTATION_SYSTEM_ID; case XML_TOK_DECL_CLOSE: setTopLevel(state); @@ -735,19 +699,15 @@ int notation4(PROLOG_STATE *state, return common(state, tok); } -static -int attlist0(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ - itkExpatUnused(ptr); - itkExpatUnused(end); - itkExpatUnused(enc); +static int PTRCALL +attlist0(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ATTLIST_NONE; case XML_TOK_NAME: case XML_TOK_PREFIXED_NAME: state->handler = attlist1; @@ -756,22 +716,18 @@ int attlist0(PROLOG_STATE *state, return common(state, tok); } -static -int attlist1(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ - itkExpatUnused(ptr); - itkExpatUnused(end); - itkExpatUnused(enc); +static int PTRCALL +attlist1(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ATTLIST_NONE; case XML_TOK_DECL_CLOSE: setTopLevel(state); - return XML_ROLE_NONE; + return XML_ROLE_ATTLIST_NONE; case XML_TOK_NAME: case XML_TOK_PREFIXED_NAME: state->handler = attlist2; @@ -780,63 +736,45 @@ int attlist1(PROLOG_STATE *state, return common(state, tok); } -static -int attlist2(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ - itkExpatUnused(ptr); - itkExpatUnused(end); - itkExpatUnused(enc); +static int PTRCALL +attlist2(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; - case XML_TOK_NAME: - { - static const char *types[] = { - KW_CDATA, - KW_ID, - KW_IDREF, - KW_IDREFS, - KW_ENTITY, - KW_ENTITIES, - KW_NMTOKEN, - KW_NMTOKENS, - }; - int i; - for (i = 0; i < (int)(sizeof(types)/sizeof(types[0])); i++) - if (XmlNameMatchesAscii(enc, ptr, end, types[i])) { - state->handler = attlist8; - return XML_ROLE_ATTRIBUTE_TYPE_CDATA + i; - } - } + return XML_ROLE_ATTLIST_NONE; + case XML_TOK_NAME: { + static const char *const types[] = { + KW_CDATA, KW_ID, KW_IDREF, KW_IDREFS, + KW_ENTITY, KW_ENTITIES, KW_NMTOKEN, KW_NMTOKENS, + }; + int i; + for (i = 0; i < (int)(sizeof(types) / sizeof(types[0])); i++) + if (XmlNameMatchesAscii(enc, ptr, end, types[i])) { + state->handler = attlist8; + return XML_ROLE_ATTRIBUTE_TYPE_CDATA + i; + } + } if (XmlNameMatchesAscii(enc, ptr, end, KW_NOTATION)) { state->handler = attlist5; - return XML_ROLE_NONE; + return XML_ROLE_ATTLIST_NONE; } break; case XML_TOK_OPEN_PAREN: state->handler = attlist3; - return XML_ROLE_NONE; + return XML_ROLE_ATTLIST_NONE; } return common(state, tok); } -static -int attlist3(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ - itkExpatUnused(ptr); - itkExpatUnused(end); - itkExpatUnused(enc); +static int PTRCALL +attlist3(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ATTLIST_NONE; case XML_TOK_NMTOKEN: case XML_TOK_NAME: case XML_TOK_PREFIXED_NAME: @@ -846,63 +784,50 @@ int attlist3(PROLOG_STATE *state, return common(state, tok); } -static -int attlist4(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ - itkExpatUnused(ptr); - itkExpatUnused(end); - itkExpatUnused(enc); +static int PTRCALL +attlist4(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ATTLIST_NONE; case XML_TOK_CLOSE_PAREN: state->handler = attlist8; - return XML_ROLE_NONE; + return XML_ROLE_ATTLIST_NONE; case XML_TOK_OR: state->handler = attlist3; - return XML_ROLE_NONE; + return XML_ROLE_ATTLIST_NONE; } return common(state, tok); } -static -int attlist5(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ - itkExpatUnused(ptr); - itkExpatUnused(end); - itkExpatUnused(enc); +static int PTRCALL +attlist5(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ATTLIST_NONE; case XML_TOK_OPEN_PAREN: state->handler = attlist6; - return XML_ROLE_NONE; + return XML_ROLE_ATTLIST_NONE; } return common(state, tok); } - -static -int attlist6(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ - itkExpatUnused(ptr); - itkExpatUnused(end); - itkExpatUnused(enc); +static int PTRCALL +attlist6(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ATTLIST_NONE; case XML_TOK_NAME: state->handler = attlist7; return XML_ROLE_ATTRIBUTE_NOTATION_VALUE; @@ -910,64 +835,47 @@ int attlist6(PROLOG_STATE *state, return common(state, tok); } -static -int attlist7(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ - itkExpatUnused(ptr); - itkExpatUnused(end); - itkExpatUnused(enc); +static int PTRCALL +attlist7(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ATTLIST_NONE; case XML_TOK_CLOSE_PAREN: state->handler = attlist8; - return XML_ROLE_NONE; + return XML_ROLE_ATTLIST_NONE; case XML_TOK_OR: state->handler = attlist6; - return XML_ROLE_NONE; + return XML_ROLE_ATTLIST_NONE; } return common(state, tok); } /* default value */ -static -int attlist8(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ - itkExpatUnused(ptr); - itkExpatUnused(end); - itkExpatUnused(enc); +static int PTRCALL +attlist8(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ATTLIST_NONE; case XML_TOK_POUND_NAME: - if (XmlNameMatchesAscii(enc, - ptr + MIN_BYTES_PER_CHAR(enc), - end, + if (XmlNameMatchesAscii(enc, ptr + MIN_BYTES_PER_CHAR(enc), end, KW_IMPLIED)) { state->handler = attlist1; return XML_ROLE_IMPLIED_ATTRIBUTE_VALUE; } - if (XmlNameMatchesAscii(enc, - ptr + MIN_BYTES_PER_CHAR(enc), - end, + if (XmlNameMatchesAscii(enc, ptr + MIN_BYTES_PER_CHAR(enc), end, KW_REQUIRED)) { state->handler = attlist1; return XML_ROLE_REQUIRED_ATTRIBUTE_VALUE; } - if (XmlNameMatchesAscii(enc, - ptr + MIN_BYTES_PER_CHAR(enc), - end, + if (XmlNameMatchesAscii(enc, ptr + MIN_BYTES_PER_CHAR(enc), end, KW_FIXED)) { state->handler = attlist9; - return XML_ROLE_NONE; + return XML_ROLE_ATTLIST_NONE; } break; case XML_TOK_LITERAL: @@ -977,19 +885,15 @@ int attlist8(PROLOG_STATE *state, return common(state, tok); } -static -int attlist9(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ - itkExpatUnused(ptr); - itkExpatUnused(end); - itkExpatUnused(enc); +static int PTRCALL +attlist9(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ATTLIST_NONE; case XML_TOK_LITERAL: state->handler = attlist1; return XML_ROLE_FIXED_ATTRIBUTE_VALUE; @@ -997,19 +901,15 @@ int attlist9(PROLOG_STATE *state, return common(state, tok); } -static -int element0(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ - itkExpatUnused(ptr); - itkExpatUnused(end); - itkExpatUnused(enc); +static int PTRCALL +element0(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ELEMENT_NONE; case XML_TOK_NAME: case XML_TOK_PREFIXED_NAME: state->handler = element1; @@ -1018,26 +918,21 @@ int element0(PROLOG_STATE *state, return common(state, tok); } -static -int element1(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ - itkExpatUnused(ptr); - itkExpatUnused(end); - itkExpatUnused(enc); +static int PTRCALL +element1(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ELEMENT_NONE; case XML_TOK_NAME: if (XmlNameMatchesAscii(enc, ptr, end, KW_EMPTY)) { state->handler = declClose; + state->role_none = XML_ROLE_ELEMENT_NONE; return XML_ROLE_CONTENT_EMPTY; } if (XmlNameMatchesAscii(enc, ptr, end, KW_ANY)) { state->handler = declClose; + state->role_none = XML_ROLE_ELEMENT_NONE; return XML_ROLE_CONTENT_ANY; } break; @@ -1049,23 +944,14 @@ int element1(PROLOG_STATE *state, return common(state, tok); } -static -int element2(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ - itkExpatUnused(ptr); - itkExpatUnused(end); - itkExpatUnused(enc); +static int PTRCALL +element2(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ELEMENT_NONE; case XML_TOK_POUND_NAME: - if (XmlNameMatchesAscii(enc, - ptr + MIN_BYTES_PER_CHAR(enc), - end, + if (XmlNameMatchesAscii(enc, ptr + MIN_BYTES_PER_CHAR(enc), end, KW_PCDATA)) { state->handler = element3; return XML_ROLE_CONTENT_PCDATA; @@ -1092,45 +978,39 @@ int element2(PROLOG_STATE *state, return common(state, tok); } -static -int element3(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ - itkExpatUnused(ptr); - itkExpatUnused(end); - itkExpatUnused(enc); +static int PTRCALL +element3(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ELEMENT_NONE; case XML_TOK_CLOSE_PAREN: state->handler = declClose; + state->role_none = XML_ROLE_ELEMENT_NONE; return XML_ROLE_GROUP_CLOSE; case XML_TOK_CLOSE_PAREN_ASTERISK: state->handler = declClose; + state->role_none = XML_ROLE_ELEMENT_NONE; return XML_ROLE_GROUP_CLOSE_REP; case XML_TOK_OR: state->handler = element4; - return XML_ROLE_NONE; + return XML_ROLE_ELEMENT_NONE; } return common(state, tok); } -static -int element4(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ - itkExpatUnused(ptr); - itkExpatUnused(end); - itkExpatUnused(enc); +static int PTRCALL +element4(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ELEMENT_NONE; case XML_TOK_NAME: case XML_TOK_PREFIXED_NAME: state->handler = element5; @@ -1139,42 +1019,35 @@ int element4(PROLOG_STATE *state, return common(state, tok); } -static -int element5(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ - itkExpatUnused(ptr); - itkExpatUnused(end); - itkExpatUnused(enc); +static int PTRCALL +element5(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ELEMENT_NONE; case XML_TOK_CLOSE_PAREN_ASTERISK: state->handler = declClose; + state->role_none = XML_ROLE_ELEMENT_NONE; return XML_ROLE_GROUP_CLOSE_REP; case XML_TOK_OR: state->handler = element4; - return XML_ROLE_NONE; + return XML_ROLE_ELEMENT_NONE; } return common(state, tok); } -static -int element6(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ - itkExpatUnused(ptr); - itkExpatUnused(end); - itkExpatUnused(enc); +static int PTRCALL +element6(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ELEMENT_NONE; case XML_TOK_OPEN_PAREN: state->level += 1; return XML_ROLE_GROUP_OPEN; @@ -1195,38 +1068,42 @@ int element6(PROLOG_STATE *state, return common(state, tok); } -static -int element7(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ - itkExpatUnused(ptr); - itkExpatUnused(end); - itkExpatUnused(enc); +static int PTRCALL +element7(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ELEMENT_NONE; case XML_TOK_CLOSE_PAREN: state->level -= 1; - if (state->level == 0) + if (state->level == 0) { state->handler = declClose; + state->role_none = XML_ROLE_ELEMENT_NONE; + } return XML_ROLE_GROUP_CLOSE; case XML_TOK_CLOSE_PAREN_ASTERISK: state->level -= 1; - if (state->level == 0) + if (state->level == 0) { state->handler = declClose; + state->role_none = XML_ROLE_ELEMENT_NONE; + } return XML_ROLE_GROUP_CLOSE_REP; case XML_TOK_CLOSE_PAREN_QUESTION: state->level -= 1; - if (state->level == 0) + if (state->level == 0) { state->handler = declClose; + state->role_none = XML_ROLE_ELEMENT_NONE; + } return XML_ROLE_GROUP_CLOSE_OPT; case XML_TOK_CLOSE_PAREN_PLUS: state->level -= 1; - if (state->level == 0) + if (state->level == 0) { state->handler = declClose; + state->role_none = XML_ROLE_ELEMENT_NONE; + } return XML_ROLE_GROUP_CLOSE_PLUS; case XML_TOK_COMMA: state->handler = element6; @@ -1240,16 +1117,9 @@ int element7(PROLOG_STATE *state, #ifdef XML_DTD -static -int condSect0(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ - itkExpatUnused(ptr); - itkExpatUnused(end); - itkExpatUnused(enc); +static int PTRCALL +condSect0(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_NONE; @@ -1267,16 +1137,12 @@ int condSect0(PROLOG_STATE *state, return common(state, tok); } -static -int condSect1(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ - itkExpatUnused(ptr); - itkExpatUnused(end); - itkExpatUnused(enc); +static int PTRCALL +condSect1(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_NONE; @@ -1288,16 +1154,12 @@ int condSect1(PROLOG_STATE *state, return common(state, tok); } -static -int condSect2(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ - itkExpatUnused(ptr); - itkExpatUnused(end); - itkExpatUnused(enc); +static int PTRCALL +condSect2(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_NONE; @@ -1310,88 +1172,80 @@ int condSect2(PROLOG_STATE *state, #endif /* XML_DTD */ -static -int declClose(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ - itkExpatUnused(ptr); - itkExpatUnused(end); - itkExpatUnused(enc); +static int PTRCALL +declClose(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return state->role_none; case XML_TOK_DECL_CLOSE: setTopLevel(state); - return XML_ROLE_NONE; + return state->role_none; } return common(state, tok); } -#if 0 - -static -int ignore(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ - itkExpatUnused(ptr); - itkExpatUnused(end); - itkExpatUnused(enc); - switch (tok) { - case XML_TOK_DECL_CLOSE: - state->handler = internalSubset; - return 0; - default: - return XML_ROLE_NONE; - } - return common(state, tok); -} -#endif - -static -int error(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ - itkExpatUnused(state); - itkExpatUnused(tok); - itkExpatUnused(ptr); - itkExpatUnused(end); - itkExpatUnused(enc); +/* This function will only be invoked if the internal logic of the + * parser has broken down. It is used in two cases: + * + * 1: When the XML prolog has been finished. At this point the + * processor (the parser level above these role handlers) should + * switch from prologProcessor to contentProcessor and reinitialise + * the handler function. + * + * 2: When an error has been detected (via common() below). At this + * point again the processor should be switched to errorProcessor, + * which will never call a handler. + * + * The result of this is that error() can only be called if the + * processor switch failed to happen, which is an internal error and + * therefore we shouldn't be able to provoke it simply by using the + * library. It is a necessary backstop, however, so we merely exclude + * it from the coverage statistics. + * + * LCOV_EXCL_START + */ +static int PTRCALL +error(PROLOG_STATE *state, int tok, const char *ptr, const char *end, + const ENCODING *enc) { + UNUSED_P(state); + UNUSED_P(tok); + UNUSED_P(ptr); + UNUSED_P(end); + UNUSED_P(enc); return XML_ROLE_NONE; } +/* LCOV_EXCL_STOP */ -static -int common(PROLOG_STATE *state, int tok) -{ +static int FASTCALL +common(PROLOG_STATE *state, int tok) { #ifdef XML_DTD - if (!state->documentEntity && tok == XML_TOK_PARAM_ENTITY_REF) + if (! state->documentEntity && tok == XML_TOK_PARAM_ENTITY_REF) return XML_ROLE_INNER_PARAM_ENTITY_REF; +#else + UNUSED_P(tok); #endif state->handler = error; return XML_ROLE_ERROR; } -void XmlPrologStateInit(PROLOG_STATE *state) -{ +void +XmlPrologStateInit(PROLOG_STATE *state) { state->handler = prolog0; #ifdef XML_DTD state->documentEntity = 1; state->includeLevel = 0; + state->inEntityValue = 0; #endif /* XML_DTD */ } #ifdef XML_DTD -void XmlPrologStateInitExternalEntity(PROLOG_STATE *state) -{ +void +XmlPrologStateInitExternalEntity(PROLOG_STATE *state) { state->handler = externalSubset0; state->documentEntity = 0; state->includeLevel = 0; diff --git a/Modules/ThirdParty/Expat/src/expat/xmlrole.h b/Modules/ThirdParty/Expat/src/expat/xmlrole.h index 6f257930666..d6e1fa150a1 100644 --- a/Modules/ThirdParty/Expat/src/expat/xmlrole.h +++ b/Modules/ThirdParty/Expat/src/expat/xmlrole.h @@ -1,11 +1,47 @@ /* -Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd -See the file COPYING for copying permission. + __ __ _ + ___\ \/ /_ __ __ _| |_ + / _ \\ /| '_ \ / _` | __| + | __// \| |_) | (_| | |_ + \___/_/\_\ .__/ \__,_|\__| + |_| XML parser + + Copyright (c) 1997-2000 Thai Open Source Software Center Ltd + Copyright (c) 2000 Clark Cooper + Copyright (c) 2002 Karl Waclawek + Copyright (c) 2002 Fred L. Drake, Jr. + Copyright (c) 2017 Sebastian Pipping + Licensed under the MIT license: + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to permit + persons to whom the Software is furnished to do so, subject to the + following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN + NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef XmlRole_INCLUDED #define XmlRole_INCLUDED 1 +#ifdef __VMS +/* 0 1 2 3 0 1 2 3 + 1234567890123456789012345678901 1234567890123456789012345678901 */ +# define XmlPrologStateInitExternalEntity XmlPrologStateInitExternalEnt +#endif + #include "xmltok.h" #ifdef __cplusplus @@ -17,6 +53,7 @@ enum { XML_ROLE_NONE = 0, XML_ROLE_XML_DECL, XML_ROLE_INSTANCE_START, + XML_ROLE_DOCTYPE_NONE, XML_ROLE_DOCTYPE_NAME, XML_ROLE_DOCTYPE_SYSTEM_ID, XML_ROLE_DOCTYPE_PUBLIC_ID, @@ -24,11 +61,13 @@ enum { XML_ROLE_DOCTYPE_CLOSE, XML_ROLE_GENERAL_ENTITY_NAME, XML_ROLE_PARAM_ENTITY_NAME, + XML_ROLE_ENTITY_NONE, XML_ROLE_ENTITY_VALUE, XML_ROLE_ENTITY_SYSTEM_ID, XML_ROLE_ENTITY_PUBLIC_ID, XML_ROLE_ENTITY_COMPLETE, XML_ROLE_ENTITY_NOTATION_NAME, + XML_ROLE_NOTATION_NONE, XML_ROLE_NOTATION_NAME, XML_ROLE_NOTATION_SYSTEM_ID, XML_ROLE_NOTATION_NO_SYSTEM_ID, @@ -44,11 +83,13 @@ enum { XML_ROLE_ATTRIBUTE_TYPE_NMTOKENS, XML_ROLE_ATTRIBUTE_ENUM_VALUE, XML_ROLE_ATTRIBUTE_NOTATION_VALUE, + XML_ROLE_ATTLIST_NONE, XML_ROLE_ATTLIST_ELEMENT_NAME, XML_ROLE_IMPLIED_ATTRIBUTE_VALUE, XML_ROLE_REQUIRED_ATTRIBUTE_VALUE, XML_ROLE_DEFAULT_ATTRIBUTE_VALUE, XML_ROLE_FIXED_ATTRIBUTE_VALUE, + XML_ROLE_ELEMENT_NONE, XML_ROLE_ELEMENT_NAME, XML_ROLE_CONTENT_ANY, XML_ROLE_CONTENT_EMPTY, @@ -64,6 +105,8 @@ enum { XML_ROLE_CONTENT_ELEMENT_REP, XML_ROLE_CONTENT_ELEMENT_OPT, XML_ROLE_CONTENT_ELEMENT_PLUS, + XML_ROLE_PI, + XML_ROLE_COMMENT, #ifdef XML_DTD XML_ROLE_TEXT_DECL, XML_ROLE_IGNORE_SECT, @@ -73,15 +116,14 @@ enum { }; typedef struct prolog_state { - int (*handler)(struct prolog_state *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc); + int(PTRCALL *handler)(struct prolog_state *state, int tok, const char *ptr, + const char *end, const ENCODING *enc); unsigned level; + int role_none; #ifdef XML_DTD unsigned includeLevel; int documentEntity; + int inEntityValue; #endif /* XML_DTD */ } PROLOG_STATE; @@ -90,8 +132,8 @@ void XmlPrologStateInit(PROLOG_STATE *); void XmlPrologStateInitExternalEntity(PROLOG_STATE *); #endif /* XML_DTD */ -#define XmlTokenRole(state, tok, ptr, end, enc) \ - (((state)->handler)(state, tok, ptr, end, enc)) +#define XmlTokenRole(state, tok, ptr, end, enc) \ + (((state)->handler)(state, tok, ptr, end, enc)) #ifdef __cplusplus } diff --git a/Modules/ThirdParty/Expat/src/expat/xmltok.c b/Modules/ThirdParty/Expat/src/expat/xmltok.c index 1a6c6538d19..f2b6b406067 100644 --- a/Modules/ThirdParty/Expat/src/expat/xmltok.c +++ b/Modules/ThirdParty/Expat/src/expat/xmltok.c @@ -1,134 +1,187 @@ /* -Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd -See the file COPYING for copying permission. + __ __ _ + ___\ \/ /_ __ __ _| |_ + / _ \\ /| '_ \ / _` | __| + | __// \| |_) | (_| | |_ + \___/_/\_\ .__/ \__,_|\__| + |_| XML parser + + Copyright (c) 1997-2000 Thai Open Source Software Center Ltd + Copyright (c) 2000 Clark Cooper + Copyright (c) 2001-2003 Fred L. Drake, Jr. + Copyright (c) 2002 Greg Stein + Copyright (c) 2002-2016 Karl Waclawek + Copyright (c) 2005-2009 Steven Solie + Copyright (c) 2016-2021 Sebastian Pipping + Copyright (c) 2016 Pascal Cuoq + Copyright (c) 2016 Don Lewis + Copyright (c) 2017 Rhodri James + Copyright (c) 2017 Alexander Bluhm + Copyright (c) 2017 Benbuck Nason + Copyright (c) 2017 José Gutiérrez de la Concha + Copyright (c) 2019 David Loffredo + Licensed under the MIT license: + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to permit + persons to whom the Software is furnished to do so, subject to the + following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN + NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include "expatConfig.h" +#include +#include /* memcpy */ +#include +#ifdef _WIN32 +# include "winconfig.h" +#endif + +#include + +#include "expat_external.h" +#include "internal.h" #include "xmltok.h" #include "nametab.h" -#if defined(__BORLANDC__) -#pragma warn -8008 // Disable "condition is always true" warning. -#pragma warn -8066 // Disable "unreachable code" warning. -#endif - #ifdef XML_DTD -#define IGNORE_SECTION_TOK_VTABLE , PREFIX(ignoreSectionTok) +# define IGNORE_SECTION_TOK_VTABLE , PREFIX(ignoreSectionTok) #else -#define IGNORE_SECTION_TOK_VTABLE /* as nothing */ +# define IGNORE_SECTION_TOK_VTABLE /* as nothing */ #endif -#define VTABLE1 \ - { PREFIX(prologTok), PREFIX(contentTok), \ - PREFIX(cdataSectionTok) IGNORE_SECTION_TOK_VTABLE }, \ - { PREFIX(attributeValueTok), PREFIX(entityValueTok) }, \ - PREFIX(sameName), \ - PREFIX(nameMatchesAscii), \ - PREFIX(nameLength), \ - PREFIX(skipS), \ - PREFIX(getAtts), \ - PREFIX(charRefNumber), \ - PREFIX(predefinedEntityName), \ - PREFIX(updatePosition), \ - PREFIX(isPublicId) +#define VTABLE1 \ + {PREFIX(prologTok), PREFIX(contentTok), \ + PREFIX(cdataSectionTok) IGNORE_SECTION_TOK_VTABLE}, \ + {PREFIX(attributeValueTok), PREFIX(entityValueTok)}, \ + PREFIX(nameMatchesAscii), PREFIX(nameLength), PREFIX(skipS), \ + PREFIX(getAtts), PREFIX(charRefNumber), PREFIX(predefinedEntityName), \ + PREFIX(updatePosition), PREFIX(isPublicId) #define VTABLE VTABLE1, PREFIX(toUtf8), PREFIX(toUtf16) -#define UCS2_GET_NAMING(pages, hi, lo) \ - (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1 << ((lo) & 0x1F))) - -/* A 2 byte UTF-8 representation splits the characters 11 bits -between the bottom 5 and 6 bits of the bytes. -We need 8 bits to index into pages, 3 bits to add to that index and -5 bits to generate the mask. */ -#define UTF8_GET_NAMING2(pages, byte) \ - (namingBitmap[((pages)[(((byte)[0]) >> 2) & 7] << 3) \ - + ((((byte)[0]) & 3) << 1) \ - + ((((byte)[1]) >> 5) & 1)] \ - & (1 << (((byte)[1]) & 0x1F))) - -/* A 3 byte UTF-8 representation splits the characters 16 bits -between the bottom 4, 6 and 6 bits of the bytes. -We need 8 bits to index into pages, 3 bits to add to that index and -5 bits to generate the mask. */ -#define UTF8_GET_NAMING3(pages, byte) \ - (namingBitmap[((pages)[((((byte)[0]) & 0xF) << 4) \ - + ((((byte)[1]) >> 2) & 0xF)] \ - << 3) \ - + ((((byte)[1]) & 3) << 1) \ - + ((((byte)[2]) >> 5) & 1)] \ - & (1 << (((byte)[2]) & 0x1F))) - -#define UTF8_GET_NAMING(pages, p, n) \ - ((n) == 2 \ - ? UTF8_GET_NAMING2(pages, (const unsigned char *)(p)) \ - : ((n) == 3 \ - ? UTF8_GET_NAMING3(pages, (const unsigned char *)(p)) \ - : 0)) - -#define UTF8_INVALID3(p) \ - ((*p) == 0xED \ - ? (((p)[1] & 0x20) != 0) \ - : ((*p) == 0xEF \ - ? ((p)[1] == 0xBF && ((p)[2] == 0xBF || (p)[2] == 0xBE)) \ - : 0)) - -#define UTF8_INVALID4(p) ((*p) == 0xF4 && ((p)[1] & 0x30) != 0) - -static -int isNever(const ENCODING *enc, const char *p) -{ - itkExpatUnused(enc); - itkExpatUnused(p); +#define UCS2_GET_NAMING(pages, hi, lo) \ + (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1u << ((lo)&0x1F))) + +/* A 2 byte UTF-8 representation splits the characters 11 bits between + the bottom 5 and 6 bits of the bytes. We need 8 bits to index into + pages, 3 bits to add to that index and 5 bits to generate the mask. +*/ +#define UTF8_GET_NAMING2(pages, byte) \ + (namingBitmap[((pages)[(((byte)[0]) >> 2) & 7] << 3) \ + + ((((byte)[0]) & 3) << 1) + ((((byte)[1]) >> 5) & 1)] \ + & (1u << (((byte)[1]) & 0x1F))) + +/* A 3 byte UTF-8 representation splits the characters 16 bits between + the bottom 4, 6 and 6 bits of the bytes. We need 8 bits to index + into pages, 3 bits to add to that index and 5 bits to generate the + mask. +*/ +#define UTF8_GET_NAMING3(pages, byte) \ + (namingBitmap \ + [((pages)[((((byte)[0]) & 0xF) << 4) + ((((byte)[1]) >> 2) & 0xF)] \ + << 3) \ + + ((((byte)[1]) & 3) << 1) + ((((byte)[2]) >> 5) & 1)] \ + & (1u << (((byte)[2]) & 0x1F))) + +#define UTF8_GET_NAMING(pages, p, n) \ + ((n) == 2 \ + ? UTF8_GET_NAMING2(pages, (const unsigned char *)(p)) \ + : ((n) == 3 ? UTF8_GET_NAMING3(pages, (const unsigned char *)(p)) : 0)) + +/* Detection of invalid UTF-8 sequences is based on Table 3.1B + of Unicode 3.2: http://www.unicode.org/unicode/reports/tr28/ + with the additional restriction of not allowing the Unicode + code points 0xFFFF and 0xFFFE (sequences EF,BF,BF and EF,BF,BE). + Implementation details: + (A & 0x80) == 0 means A < 0x80 + and + (A & 0xC0) == 0xC0 means A > 0xBF +*/ + +#define UTF8_INVALID2(p) \ + ((*p) < 0xC2 || ((p)[1] & 0x80) == 0 || ((p)[1] & 0xC0) == 0xC0) + +#define UTF8_INVALID3(p) \ + (((p)[2] & 0x80) == 0 \ + || ((*p) == 0xEF && (p)[1] == 0xBF ? (p)[2] > 0xBD \ + : ((p)[2] & 0xC0) == 0xC0) \ + || ((*p) == 0xE0 \ + ? (p)[1] < 0xA0 || ((p)[1] & 0xC0) == 0xC0 \ + : ((p)[1] & 0x80) == 0 \ + || ((*p) == 0xED ? (p)[1] > 0x9F : ((p)[1] & 0xC0) == 0xC0))) + +#define UTF8_INVALID4(p) \ + (((p)[3] & 0x80) == 0 || ((p)[3] & 0xC0) == 0xC0 || ((p)[2] & 0x80) == 0 \ + || ((p)[2] & 0xC0) == 0xC0 \ + || ((*p) == 0xF0 \ + ? (p)[1] < 0x90 || ((p)[1] & 0xC0) == 0xC0 \ + : ((p)[1] & 0x80) == 0 \ + || ((*p) == 0xF4 ? (p)[1] > 0x8F : ((p)[1] & 0xC0) == 0xC0))) + +static int PTRFASTCALL +isNever(const ENCODING *enc, const char *p) { + UNUSED_P(enc); + UNUSED_P(p); return 0; } -static -int utf8_isName2(const ENCODING *enc, const char *p) -{ - itkExpatUnused(enc); +static int PTRFASTCALL +utf8_isName2(const ENCODING *enc, const char *p) { + UNUSED_P(enc); return UTF8_GET_NAMING2(namePages, (const unsigned char *)p); } -static -int utf8_isName3(const ENCODING *enc, const char *p) -{ - itkExpatUnused(enc); +static int PTRFASTCALL +utf8_isName3(const ENCODING *enc, const char *p) { + UNUSED_P(enc); return UTF8_GET_NAMING3(namePages, (const unsigned char *)p); } #define utf8_isName4 isNever -static -int utf8_isNmstrt2(const ENCODING *enc, const char *p) -{ - itkExpatUnused(enc); +static int PTRFASTCALL +utf8_isNmstrt2(const ENCODING *enc, const char *p) { + UNUSED_P(enc); return UTF8_GET_NAMING2(nmstrtPages, (const unsigned char *)p); } -static -int utf8_isNmstrt3(const ENCODING *enc, const char *p) -{ - itkExpatUnused(enc); +static int PTRFASTCALL +utf8_isNmstrt3(const ENCODING *enc, const char *p) { + UNUSED_P(enc); return UTF8_GET_NAMING3(nmstrtPages, (const unsigned char *)p); } #define utf8_isNmstrt4 isNever -#define utf8_isInvalid2 isNever +static int PTRFASTCALL +utf8_isInvalid2(const ENCODING *enc, const char *p) { + UNUSED_P(enc); + return UTF8_INVALID2((const unsigned char *)p); +} -static -int utf8_isInvalid3(const ENCODING *enc, const char *p) -{ - itkExpatUnused(enc); +static int PTRFASTCALL +utf8_isInvalid3(const ENCODING *enc, const char *p) { + UNUSED_P(enc); return UTF8_INVALID3((const unsigned char *)p); } -static -int utf8_isInvalid4(const ENCODING *enc, const char *p) -{ - itkExpatUnused(enc); +static int PTRFASTCALL +utf8_isInvalid4(const ENCODING *enc, const char *p) { + UNUSED_P(enc); return UTF8_INVALID4((const unsigned char *)p); } @@ -136,127 +189,124 @@ struct normal_encoding { ENCODING enc; unsigned char type[256]; #ifdef XML_MIN_SIZE - int (*byteType)(const ENCODING *, const char *); - int (*isNameMin)(const ENCODING *, const char *); - int (*isNmstrtMin)(const ENCODING *, const char *); - int (*byteToAscii)(const ENCODING *, const char *); - int (*charMatches)(const ENCODING *, const char *, int); + int(PTRFASTCALL *byteType)(const ENCODING *, const char *); + int(PTRFASTCALL *isNameMin)(const ENCODING *, const char *); + int(PTRFASTCALL *isNmstrtMin)(const ENCODING *, const char *); + int(PTRFASTCALL *byteToAscii)(const ENCODING *, const char *); + int(PTRCALL *charMatches)(const ENCODING *, const char *, int); #endif /* XML_MIN_SIZE */ - int (*isName2)(const ENCODING *, const char *); - int (*isName3)(const ENCODING *, const char *); - int (*isName4)(const ENCODING *, const char *); - int (*isNmstrt2)(const ENCODING *, const char *); - int (*isNmstrt3)(const ENCODING *, const char *); - int (*isNmstrt4)(const ENCODING *, const char *); - int (*isInvalid2)(const ENCODING *, const char *); - int (*isInvalid3)(const ENCODING *, const char *); - int (*isInvalid4)(const ENCODING *, const char *); + int(PTRFASTCALL *isName2)(const ENCODING *, const char *); + int(PTRFASTCALL *isName3)(const ENCODING *, const char *); + int(PTRFASTCALL *isName4)(const ENCODING *, const char *); + int(PTRFASTCALL *isNmstrt2)(const ENCODING *, const char *); + int(PTRFASTCALL *isNmstrt3)(const ENCODING *, const char *); + int(PTRFASTCALL *isNmstrt4)(const ENCODING *, const char *); + int(PTRFASTCALL *isInvalid2)(const ENCODING *, const char *); + int(PTRFASTCALL *isInvalid3)(const ENCODING *, const char *); + int(PTRFASTCALL *isInvalid4)(const ENCODING *, const char *); }; +#define AS_NORMAL_ENCODING(enc) ((const struct normal_encoding *)(enc)) + #ifdef XML_MIN_SIZE -#define STANDARD_VTABLE(E) \ - E ## byteType, \ - E ## isNameMin, \ - E ## isNmstrtMin, \ - E ## byteToAscii, \ - E ## charMatches, +# define STANDARD_VTABLE(E) \ + E##byteType, E##isNameMin, E##isNmstrtMin, E##byteToAscii, E##charMatches, #else -#define STANDARD_VTABLE(E) /* as nothing */ +# define STANDARD_VTABLE(E) /* as nothing */ #endif -#define NORMAL_VTABLE(E) \ - E ## isName2, \ - E ## isName3, \ - E ## isName4, \ - E ## isNmstrt2, \ - E ## isNmstrt3, \ - E ## isNmstrt4, \ - E ## isInvalid2, \ - E ## isInvalid3, \ - E ## isInvalid4 +#define NORMAL_VTABLE(E) \ + E##isName2, E##isName3, E##isName4, E##isNmstrt2, E##isNmstrt3, \ + E##isNmstrt4, E##isInvalid2, E##isInvalid3, E##isInvalid4 -#define EMPTY_VTABLE(E) 0, 0, 0, 0, 0, 0, 0, 0, 0 +#define NULL_VTABLE \ + /* isName2 */ NULL, /* isName3 */ NULL, /* isName4 */ NULL, \ + /* isNmstrt2 */ NULL, /* isNmstrt3 */ NULL, /* isNmstrt4 */ NULL, \ + /* isInvalid2 */ NULL, /* isInvalid3 */ NULL, /* isInvalid4 */ NULL -static int checkCharRefNumber(int); +static int FASTCALL checkCharRefNumber(int); #include "xmltok_impl.h" #include "ascii.h" #ifdef XML_MIN_SIZE -#define sb_isNameMin isNever -#define sb_isNmstrtMin isNever +# define sb_isNameMin isNever +# define sb_isNmstrtMin isNever #endif #ifdef XML_MIN_SIZE -#define MINBPC(enc) ((enc)->minBytesPerChar) +# define MINBPC(enc) ((enc)->minBytesPerChar) #else /* minimum bytes per character */ -#define MINBPC(enc) 1 +# define MINBPC(enc) 1 #endif -#define SB_BYTE_TYPE(enc, p) \ +#define SB_BYTE_TYPE(enc, p) \ (((struct normal_encoding *)(enc))->type[(unsigned char)*(p)]) #ifdef XML_MIN_SIZE -static -int sb_byteType(const ENCODING *enc, const char *p) -{ +static int PTRFASTCALL +sb_byteType(const ENCODING *enc, const char *p) { return SB_BYTE_TYPE(enc, p); } -#define BYTE_TYPE(enc, p) \ - (((const struct normal_encoding *)(enc))->byteType(enc, p)) +# define BYTE_TYPE(enc, p) (AS_NORMAL_ENCODING(enc)->byteType(enc, p)) #else -#define BYTE_TYPE(enc, p) SB_BYTE_TYPE(enc, p) +# define BYTE_TYPE(enc, p) SB_BYTE_TYPE(enc, p) #endif #ifdef XML_MIN_SIZE -#define BYTE_TO_ASCII(enc, p) \ - (((const struct normal_encoding *)(enc))->byteToAscii(enc, p)) -static -int sb_byteToAscii(const ENCODING *enc, const char *p) -{ +# define BYTE_TO_ASCII(enc, p) (AS_NORMAL_ENCODING(enc)->byteToAscii(enc, p)) +static int PTRFASTCALL +sb_byteToAscii(const ENCODING *enc, const char *p) { + UNUSED_P(enc); return *p; } #else -#define BYTE_TO_ASCII(enc, p) (*(p)) +# define BYTE_TO_ASCII(enc, p) (*(p)) #endif -#define IS_NAME_CHAR(enc, p, n) \ - (((const struct normal_encoding *)(enc))->isName ## n(enc, p)) -#define IS_NMSTRT_CHAR(enc, p, n) \ - (((const struct normal_encoding *)(enc))->isNmstrt ## n(enc, p)) -#define IS_INVALID_CHAR(enc, p, n) \ - (((const struct normal_encoding *)(enc))->isInvalid ## n(enc, p)) +#define IS_NAME_CHAR(enc, p, n) (AS_NORMAL_ENCODING(enc)->isName##n(enc, p)) +#define IS_NMSTRT_CHAR(enc, p, n) (AS_NORMAL_ENCODING(enc)->isNmstrt##n(enc, p)) +#ifdef XML_MIN_SIZE +# define IS_INVALID_CHAR(enc, p, n) \ + (AS_NORMAL_ENCODING(enc)->isInvalid##n \ + && AS_NORMAL_ENCODING(enc)->isInvalid##n(enc, p)) +#else +# define IS_INVALID_CHAR(enc, p, n) \ + (AS_NORMAL_ENCODING(enc)->isInvalid##n(enc, p)) +#endif #ifdef XML_MIN_SIZE -#define IS_NAME_CHAR_MINBPC(enc, p) \ - (((const struct normal_encoding *)(enc))->isNameMin(enc, p)) -#define IS_NMSTRT_CHAR_MINBPC(enc, p) \ - (((const struct normal_encoding *)(enc))->isNmstrtMin(enc, p)) +# define IS_NAME_CHAR_MINBPC(enc, p) \ + (AS_NORMAL_ENCODING(enc)->isNameMin(enc, p)) +# define IS_NMSTRT_CHAR_MINBPC(enc, p) \ + (AS_NORMAL_ENCODING(enc)->isNmstrtMin(enc, p)) #else -#define IS_NAME_CHAR_MINBPC(enc, p) (0) -#define IS_NMSTRT_CHAR_MINBPC(enc, p) (0) +# define IS_NAME_CHAR_MINBPC(enc, p) (0) +# define IS_NMSTRT_CHAR_MINBPC(enc, p) (0) #endif #ifdef XML_MIN_SIZE -#define CHAR_MATCHES(enc, p, c) \ - (((const struct normal_encoding *)(enc))->charMatches(enc, p, c)) -static -int sb_charMatches(const ENCODING *enc, const char *p, int c) -{ +# define CHAR_MATCHES(enc, p, c) \ + (AS_NORMAL_ENCODING(enc)->charMatches(enc, p, c)) +static int PTRCALL +sb_charMatches(const ENCODING *enc, const char *p, int c) { + UNUSED_P(enc); return *p == c; } #else /* c is an ASCII character */ -#define CHAR_MATCHES(enc, p, c) (*(p) == c) +# define CHAR_MATCHES(enc, p, c) (*(p) == c) #endif -#define PREFIX(ident) normal_ ## ident +#define PREFIX(ident) normal_##ident +#define XML_TOK_IMPL_C #include "xmltok_impl.c" +#undef XML_TOK_IMPL_C #undef MINBPC #undef BYTE_TYPE @@ -268,223 +318,301 @@ int sb_charMatches(const ENCODING *enc, const char *p, int c) #undef IS_NMSTRT_CHAR_MINBPC #undef IS_INVALID_CHAR -enum { /* UTF8_cvalN is value of masked first byte of N byte sequence */ - UTF8_cval1 = 0x00, - UTF8_cval2 = 0xc0, - UTF8_cval3 = 0xe0, - UTF8_cval4 = 0xf0 +enum { /* UTF8_cvalN is value of masked first byte of N byte sequence */ + UTF8_cval1 = 0x00, + UTF8_cval2 = 0xc0, + UTF8_cval3 = 0xe0, + UTF8_cval4 = 0xf0 }; -static -void utf8_toUtf8(const ENCODING *enc, - const char **fromP, const char *fromLim, - char **toP, const char *toLim) -{ - char *to; - const char *from; - itkExpatUnused(enc); - if (fromLim - *fromP > toLim - *toP) { - /* Avoid copying partial characters. */ - for (fromLim = *fromP + (toLim - *toP); fromLim > *fromP; fromLim--) - if (((unsigned char)fromLim[-1] & 0xc0) != 0x80) +void +_INTERNAL_trim_to_complete_utf8_characters(const char *from, + const char **fromLimRef) { + const char *fromLim = *fromLimRef; + size_t walked = 0; + for (; fromLim > from; fromLim--, walked++) { + const unsigned char prev = (unsigned char)fromLim[-1]; + if ((prev & 0xf8u) + == 0xf0u) { /* 4-byte character, lead by 0b11110xxx byte */ + if (walked + 1 >= 4) { + fromLim += 4 - 1; + break; + } else { + walked = 0; + } + } else if ((prev & 0xf0u) + == 0xe0u) { /* 3-byte character, lead by 0b1110xxxx byte */ + if (walked + 1 >= 3) { + fromLim += 3 - 1; + break; + } else { + walked = 0; + } + } else if ((prev & 0xe0u) + == 0xc0u) { /* 2-byte character, lead by 0b110xxxxx byte */ + if (walked + 1 >= 2) { + fromLim += 2 - 1; break; + } else { + walked = 0; + } + } else if ((prev & 0x80u) + == 0x00u) { /* 1-byte character, matching 0b0xxxxxxx */ + break; + } } - for (to = *toP, from = *fromP; from != fromLim; from++, to++) - *to = *from; - *fromP = from; - *toP = to; + *fromLimRef = fromLim; } -static -void utf8_toUtf16(const ENCODING *enc, - const char **fromP, const char *fromLim, - unsigned short **toP, const unsigned short *toLim) -{ +static enum XML_Convert_Result PTRCALL +utf8_toUtf8(const ENCODING *enc, const char **fromP, const char *fromLim, + char **toP, const char *toLim) { + bool input_incomplete = false; + bool output_exhausted = false; + + /* Avoid copying partial characters (due to limited space). */ + const ptrdiff_t bytesAvailable = fromLim - *fromP; + const ptrdiff_t bytesStorable = toLim - *toP; + UNUSED_P(enc); + if (bytesAvailable > bytesStorable) { + fromLim = *fromP + bytesStorable; + output_exhausted = true; + } + + /* Avoid copying partial characters (from incomplete input). */ + { + const char *const fromLimBefore = fromLim; + _INTERNAL_trim_to_complete_utf8_characters(*fromP, &fromLim); + if (fromLim < fromLimBefore) { + input_incomplete = true; + } + } + + { + const ptrdiff_t bytesToCopy = fromLim - *fromP; + memcpy(*toP, *fromP, bytesToCopy); + *fromP += bytesToCopy; + *toP += bytesToCopy; + } + + if (output_exhausted) /* needs to go first */ + return XML_CONVERT_OUTPUT_EXHAUSTED; + else if (input_incomplete) + return XML_CONVERT_INPUT_INCOMPLETE; + else + return XML_CONVERT_COMPLETED; +} + +static enum XML_Convert_Result PTRCALL +utf8_toUtf16(const ENCODING *enc, const char **fromP, const char *fromLim, + unsigned short **toP, const unsigned short *toLim) { + enum XML_Convert_Result res = XML_CONVERT_COMPLETED; unsigned short *to = *toP; const char *from = *fromP; - while (from != fromLim && to != toLim) { + while (from < fromLim && to < toLim) { switch (((struct normal_encoding *)enc)->type[(unsigned char)*from]) { case BT_LEAD2: - *to++ = ((from[0] & 0x1f) << 6) | (from[1] & 0x3f); + if (fromLim - from < 2) { + res = XML_CONVERT_INPUT_INCOMPLETE; + goto after; + } + *to++ = (unsigned short)(((from[0] & 0x1f) << 6) | (from[1] & 0x3f)); from += 2; break; case BT_LEAD3: - *to++ = ((from[0] & 0xf) << 12) | ((from[1] & 0x3f) << 6) | (from[2] & 0x3f); + if (fromLim - from < 3) { + res = XML_CONVERT_INPUT_INCOMPLETE; + goto after; + } + *to++ = (unsigned short)(((from[0] & 0xf) << 12) | ((from[1] & 0x3f) << 6) + | (from[2] & 0x3f)); from += 3; break; - case BT_LEAD4: - { - unsigned long n; - if (to + 1 == toLim) - break; - n = ((from[0] & 0x7) << 18) | ((from[1] & 0x3f) << 12) | ((from[2] & 0x3f) << 6) | (from[3] & 0x3f); - n -= 0x10000; - to[0] = (unsigned short)((n >> 10) | 0xD800); - to[1] = (unsigned short)((n & 0x3FF) | 0xDC00); - to += 2; - from += 4; + case BT_LEAD4: { + unsigned long n; + if (toLim - to < 2) { + res = XML_CONVERT_OUTPUT_EXHAUSTED; + goto after; } - break; + if (fromLim - from < 4) { + res = XML_CONVERT_INPUT_INCOMPLETE; + goto after; + } + n = ((from[0] & 0x7) << 18) | ((from[1] & 0x3f) << 12) + | ((from[2] & 0x3f) << 6) | (from[3] & 0x3f); + n -= 0x10000; + to[0] = (unsigned short)((n >> 10) | 0xD800); + to[1] = (unsigned short)((n & 0x3FF) | 0xDC00); + to += 2; + from += 4; + } break; default: *to++ = *from++; break; } } + if (from < fromLim) + res = XML_CONVERT_OUTPUT_EXHAUSTED; +after: *fromP = from; *toP = to; + return res; } #ifdef XML_NS -static const struct normal_encoding utf8_encoding_ns = { - { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 }, - { -#include "asciitab.h" -#include "utf8tab.h" - }, - STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_) -}; +static const struct normal_encoding utf8_encoding_ns + = {{VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0}, + { +# include "asciitab.h" +# include "utf8tab.h" + }, + STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)}; #endif -static const struct normal_encoding utf8_encoding = { - { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 }, - { +static const struct normal_encoding utf8_encoding + = {{VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0}, + { #define BT_COLON BT_NMSTRT #include "asciitab.h" #undef BT_COLON #include "utf8tab.h" - }, - STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_) -}; + }, + STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)}; #ifdef XML_NS -static const struct normal_encoding internal_utf8_encoding_ns = { - { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 }, - { -#include "iasciitab.h" -#include "utf8tab.h" - }, - STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_) -}; +static const struct normal_encoding internal_utf8_encoding_ns + = {{VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0}, + { +# include "iasciitab.h" +# include "utf8tab.h" + }, + STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)}; #endif -static const struct normal_encoding internal_utf8_encoding = { - { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 }, - { +static const struct normal_encoding internal_utf8_encoding + = {{VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0}, + { #define BT_COLON BT_NMSTRT #include "iasciitab.h" #undef BT_COLON #include "utf8tab.h" - }, - STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_) -}; + }, + STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)}; -static -void latin1_toUtf8(const ENCODING *enc, - const char **fromP, const char *fromLim, - char **toP, const char *toLim) -{ - itkExpatUnused(enc); +static enum XML_Convert_Result PTRCALL +latin1_toUtf8(const ENCODING *enc, const char **fromP, const char *fromLim, + char **toP, const char *toLim) { + UNUSED_P(enc); for (;;) { unsigned char c; if (*fromP == fromLim) - break; + return XML_CONVERT_COMPLETED; c = (unsigned char)**fromP; if (c & 0x80) { if (toLim - *toP < 2) - break; - *(*toP)++ = ((c >> 6) | UTF8_cval2); - *(*toP)++ = ((c & 0x3f) | 0x80); + return XML_CONVERT_OUTPUT_EXHAUSTED; + *(*toP)++ = (char)((c >> 6) | UTF8_cval2); + *(*toP)++ = (char)((c & 0x3f) | 0x80); (*fromP)++; - } - else { + } else { if (*toP == toLim) - break; + return XML_CONVERT_OUTPUT_EXHAUSTED; *(*toP)++ = *(*fromP)++; } } } -static -void latin1_toUtf16(const ENCODING *enc, - const char **fromP, const char *fromLim, - unsigned short **toP, const unsigned short *toLim) -{ - itkExpatUnused(enc); - while (*fromP != fromLim && *toP != toLim) +static enum XML_Convert_Result PTRCALL +latin1_toUtf16(const ENCODING *enc, const char **fromP, const char *fromLim, + unsigned short **toP, const unsigned short *toLim) { + UNUSED_P(enc); + while (*fromP < fromLim && *toP < toLim) *(*toP)++ = (unsigned char)*(*fromP)++; + + if ((*toP == toLim) && (*fromP < fromLim)) + return XML_CONVERT_OUTPUT_EXHAUSTED; + else + return XML_CONVERT_COMPLETED; } #ifdef XML_NS -static const struct normal_encoding latin1_encoding_ns = { - { VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0 }, - { -#include "asciitab.h" -#include "latin1tab.h" - }, - STANDARD_VTABLE(sb_) EMPTY_VTABLE(sb_) -}; +static const struct normal_encoding latin1_encoding_ns + = {{VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0}, + { +# include "asciitab.h" +# include "latin1tab.h" + }, + STANDARD_VTABLE(sb_) NULL_VTABLE}; #endif -static const struct normal_encoding latin1_encoding = { - { VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0 }, - { +static const struct normal_encoding latin1_encoding + = {{VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0}, + { #define BT_COLON BT_NMSTRT #include "asciitab.h" #undef BT_COLON #include "latin1tab.h" - }, - STANDARD_VTABLE(sb_) EMPTY_VTABLE(sb_) -}; - -static -void ascii_toUtf8(const ENCODING *enc, - const char **fromP, const char *fromLim, - char **toP, const char *toLim) -{ - itkExpatUnused(enc); - while (*fromP != fromLim && *toP != toLim) + }, + STANDARD_VTABLE(sb_) NULL_VTABLE}; + +static enum XML_Convert_Result PTRCALL +ascii_toUtf8(const ENCODING *enc, const char **fromP, const char *fromLim, + char **toP, const char *toLim) { + UNUSED_P(enc); + while (*fromP < fromLim && *toP < toLim) *(*toP)++ = *(*fromP)++; + + if ((*toP == toLim) && (*fromP < fromLim)) + return XML_CONVERT_OUTPUT_EXHAUSTED; + else + return XML_CONVERT_COMPLETED; } #ifdef XML_NS -static const struct normal_encoding ascii_encoding_ns = { - { VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0 }, - { -#include "asciitab.h" -/* BT_NONXML == 0 */ - }, - STANDARD_VTABLE(sb_) EMPTY_VTABLE(sb_) -}; +static const struct normal_encoding ascii_encoding_ns + = {{VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0}, + { +# include "asciitab.h" + /* BT_NONXML == 0 */ + }, + STANDARD_VTABLE(sb_) NULL_VTABLE}; #endif -static const struct normal_encoding ascii_encoding = { - { VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0 }, - { +static const struct normal_encoding ascii_encoding + = {{VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0}, + { #define BT_COLON BT_NMSTRT #include "asciitab.h" #undef BT_COLON -/* BT_NONXML == 0 */ - }, - STANDARD_VTABLE(sb_) EMPTY_VTABLE(sb_) -}; + /* BT_NONXML == 0 */ + }, + STANDARD_VTABLE(sb_) NULL_VTABLE}; -static int unicode_byte_type(char hi, char lo) -{ +static int PTRFASTCALL +unicode_byte_type(char hi, char lo) { switch ((unsigned char)hi) { - case 0xD8: case 0xD9: case 0xDA: case 0xDB: + /* 0xD800-0xDBFF first 16-bit code unit or high surrogate (W1) */ + case 0xD8: + case 0xD9: + case 0xDA: + case 0xDB: return BT_LEAD4; - case 0xDC: case 0xDD: case 0xDE: case 0xDF: + /* 0xDC00-0xDFFF second 16-bit code unit or low surrogate (W2) */ + case 0xDC: + case 0xDD: + case 0xDE: + case 0xDF: return BT_TRAIL; case 0xFF: switch ((unsigned char)lo) { - case 0xFF: - case 0xFE: + case 0xFF: /* noncharacter-FFFF */ + case 0xFE: /* noncharacter-FFFE */ return BT_NONXML; } break; @@ -492,87 +620,105 @@ static int unicode_byte_type(char hi, char lo) return BT_NONASCII; } -#define DEFINE_UTF16_TO_UTF8(E) \ -static \ -void E ## toUtf8(const ENCODING *enc, \ - const char **fromP, const char *fromLim, \ - char **toP, const char *toLim) \ -{ \ - const char *from; \ - itkExpatUnused(enc);\ - for (from = *fromP; from != fromLim; from += 2) { \ - int plane; \ - unsigned char lo2; \ - unsigned char lo = GET_LO(from); \ - unsigned char hi = GET_HI(from); \ - switch (hi) { \ - case 0: \ - if (lo < 0x80) { \ - if (*toP == toLim) { \ - *fromP = from; \ - return; \ - } \ - *(*toP)++ = lo; \ - break; \ - } \ - /* fall through */ \ - case 0x1: case 0x2: case 0x3: \ - case 0x4: case 0x5: case 0x6: case 0x7: \ - if (toLim - *toP < 2) { \ - *fromP = from; \ - return; \ - } \ - *(*toP)++ = ((lo >> 6) | (hi << 2) | UTF8_cval2); \ - *(*toP)++ = ((lo & 0x3f) | 0x80); \ - break; \ - default: \ - if (toLim - *toP < 3) { \ - *fromP = from; \ - return; \ - } \ - /* 16 bits divided 4, 6, 6 amongst 3 bytes */ \ - *(*toP)++ = ((hi >> 4) | UTF8_cval3); \ - *(*toP)++ = (((hi & 0xf) << 2) | (lo >> 6) | 0x80); \ - *(*toP)++ = ((lo & 0x3f) | 0x80); \ - break; \ - case 0xD8: case 0xD9: case 0xDA: case 0xDB: \ - if (toLim - *toP < 4) { \ - *fromP = from; \ - return; \ - } \ - plane = (((hi & 0x3) << 2) | ((lo >> 6) & 0x3)) + 1; \ - *(*toP)++ = ((plane >> 2) | UTF8_cval4); \ - *(*toP)++ = (((lo >> 2) & 0xF) | ((plane & 0x3) << 4) | 0x80); \ - from += 2; \ - lo2 = GET_LO(from); \ - *(*toP)++ = (((lo & 0x3) << 4) \ - | ((GET_HI(from) & 0x3) << 2) \ - | (lo2 >> 6) \ - | 0x80); \ - *(*toP)++ = ((lo2 & 0x3f) | 0x80); \ - break; \ - } \ - } \ - *fromP = from; \ -} +#define DEFINE_UTF16_TO_UTF8(E) \ + static enum XML_Convert_Result PTRCALL E##toUtf8( \ + const ENCODING *enc, const char **fromP, const char *fromLim, \ + char **toP, const char *toLim) { \ + const char *from = *fromP; \ + UNUSED_P(enc); \ + fromLim = from + (((fromLim - from) >> 1) << 1); /* shrink to even */ \ + for (; from < fromLim; from += 2) { \ + int plane; \ + unsigned char lo2; \ + unsigned char lo = GET_LO(from); \ + unsigned char hi = GET_HI(from); \ + switch (hi) { \ + case 0: \ + if (lo < 0x80) { \ + if (*toP == toLim) { \ + *fromP = from; \ + return XML_CONVERT_OUTPUT_EXHAUSTED; \ + } \ + *(*toP)++ = lo; \ + break; \ + } \ + /* fall through */ \ + case 0x1: \ + case 0x2: \ + case 0x3: \ + case 0x4: \ + case 0x5: \ + case 0x6: \ + case 0x7: \ + if (toLim - *toP < 2) { \ + *fromP = from; \ + return XML_CONVERT_OUTPUT_EXHAUSTED; \ + } \ + *(*toP)++ = ((lo >> 6) | (hi << 2) | UTF8_cval2); \ + *(*toP)++ = ((lo & 0x3f) | 0x80); \ + break; \ + default: \ + if (toLim - *toP < 3) { \ + *fromP = from; \ + return XML_CONVERT_OUTPUT_EXHAUSTED; \ + } \ + /* 16 bits divided 4, 6, 6 amongst 3 bytes */ \ + *(*toP)++ = ((hi >> 4) | UTF8_cval3); \ + *(*toP)++ = (((hi & 0xf) << 2) | (lo >> 6) | 0x80); \ + *(*toP)++ = ((lo & 0x3f) | 0x80); \ + break; \ + case 0xD8: \ + case 0xD9: \ + case 0xDA: \ + case 0xDB: \ + if (toLim - *toP < 4) { \ + *fromP = from; \ + return XML_CONVERT_OUTPUT_EXHAUSTED; \ + } \ + if (fromLim - from < 4) { \ + *fromP = from; \ + return XML_CONVERT_INPUT_INCOMPLETE; \ + } \ + plane = (((hi & 0x3) << 2) | ((lo >> 6) & 0x3)) + 1; \ + *(*toP)++ = (char)((plane >> 2) | UTF8_cval4); \ + *(*toP)++ = (((lo >> 2) & 0xF) | ((plane & 0x3) << 4) | 0x80); \ + from += 2; \ + lo2 = GET_LO(from); \ + *(*toP)++ = (((lo & 0x3) << 4) | ((GET_HI(from) & 0x3) << 2) \ + | (lo2 >> 6) | 0x80); \ + *(*toP)++ = ((lo2 & 0x3f) | 0x80); \ + break; \ + } \ + } \ + *fromP = from; \ + if (from < fromLim) \ + return XML_CONVERT_INPUT_INCOMPLETE; \ + else \ + return XML_CONVERT_COMPLETED; \ + } -#define DEFINE_UTF16_TO_UTF16(E) \ -static \ -void E ## toUtf16(const ENCODING *enc, \ - const char **fromP, const char *fromLim, \ - unsigned short **toP, const unsigned short *toLim) \ -{ \ - itkExpatUnused(enc);\ - /* Avoid copying first half only of surrogate */ \ - if (fromLim - *fromP > ((toLim - *toP) << 1) \ - && (GET_HI(fromLim - 2) & 0xF8) == 0xD8) \ - fromLim -= 2; \ - for (; *fromP != fromLim && *toP != toLim; *fromP += 2) \ - *(*toP)++ = (GET_HI(*fromP) << 8) | GET_LO(*fromP); \ -} +#define DEFINE_UTF16_TO_UTF16(E) \ + static enum XML_Convert_Result PTRCALL E##toUtf16( \ + const ENCODING *enc, const char **fromP, const char *fromLim, \ + unsigned short **toP, const unsigned short *toLim) { \ + enum XML_Convert_Result res = XML_CONVERT_COMPLETED; \ + UNUSED_P(enc); \ + fromLim = *fromP + (((fromLim - *fromP) >> 1) << 1); /* shrink to even */ \ + /* Avoid copying first half only of surrogate */ \ + if (fromLim - *fromP > ((toLim - *toP) << 1) \ + && (GET_HI(fromLim - 2) & 0xF8) == 0xD8) { \ + fromLim -= 2; \ + res = XML_CONVERT_INPUT_INCOMPLETE; \ + } \ + for (; *fromP < fromLim && *toP < toLim; *fromP += 2) \ + *(*toP)++ = (GET_HI(*fromP) << 8) | GET_LO(*fromP); \ + if ((*toP == toLim) && (*fromP < fromLim)) \ + return XML_CONVERT_OUTPUT_EXHAUSTED; \ + else \ + return res; \ + } -#define SET2(ptr, ch) \ - (((ptr)[0] = ((ch) & 0xff)), ((ptr)[1] = ((ch) >> 8))) +#define SET2(ptr, ch) (((ptr)[0] = ((ch)&0xff)), ((ptr)[1] = ((ch) >> 8))) #define GET_LO(ptr) ((unsigned char)(ptr)[0]) #define GET_HI(ptr) ((unsigned char)(ptr)[1]) @@ -583,8 +729,7 @@ DEFINE_UTF16_TO_UTF16(little2_) #undef GET_LO #undef GET_HI -#define SET2(ptr, ch) \ - (((ptr)[0] = ((ch) >> 8)), ((ptr)[1] = ((ch) & 0xFF))) +#define SET2(ptr, ch) (((ptr)[0] = ((ch) >> 8)), ((ptr)[1] = ((ch)&0xFF))) #define GET_LO(ptr) ((unsigned char)(ptr)[1]) #define GET_HI(ptr) ((unsigned char)(ptr)[0]) @@ -595,314 +740,307 @@ DEFINE_UTF16_TO_UTF16(big2_) #undef GET_LO #undef GET_HI -#define LITTLE2_BYTE_TYPE(enc, p) \ - ((p)[1] == 0 \ - ? ((struct normal_encoding *)(enc))->type[(unsigned char)*(p)] \ - : unicode_byte_type((p)[1], (p)[0])) -#define LITTLE2_BYTE_TO_ASCII(enc, p) ((p)[1] == 0 ? (p)[0] : -1) -#define LITTLE2_CHAR_MATCHES(enc, p, c) ((p)[1] == 0 && (p)[0] == c) -#define LITTLE2_IS_NAME_CHAR_MINBPC(enc, p) \ +#define LITTLE2_BYTE_TYPE(enc, p) \ + ((p)[1] == 0 ? ((struct normal_encoding *)(enc))->type[(unsigned char)*(p)] \ + : unicode_byte_type((p)[1], (p)[0])) +#define LITTLE2_BYTE_TO_ASCII(p) ((p)[1] == 0 ? (p)[0] : -1) +#define LITTLE2_CHAR_MATCHES(p, c) ((p)[1] == 0 && (p)[0] == c) +#define LITTLE2_IS_NAME_CHAR_MINBPC(p) \ UCS2_GET_NAMING(namePages, (unsigned char)p[1], (unsigned char)p[0]) -#define LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p) \ +#define LITTLE2_IS_NMSTRT_CHAR_MINBPC(p) \ UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[1], (unsigned char)p[0]) #ifdef XML_MIN_SIZE -static -int little2_byteType(const ENCODING *enc, const char *p) -{ +static int PTRFASTCALL +little2_byteType(const ENCODING *enc, const char *p) { return LITTLE2_BYTE_TYPE(enc, p); } -static -int little2_byteToAscii(const ENCODING *enc, const char *p) -{ - return LITTLE2_BYTE_TO_ASCII(enc, p); +static int PTRFASTCALL +little2_byteToAscii(const ENCODING *enc, const char *p) { + UNUSED_P(enc); + return LITTLE2_BYTE_TO_ASCII(p); } -static -int little2_charMatches(const ENCODING *enc, const char *p, int c) -{ - return LITTLE2_CHAR_MATCHES(enc, p, c); +static int PTRCALL +little2_charMatches(const ENCODING *enc, const char *p, int c) { + UNUSED_P(enc); + return LITTLE2_CHAR_MATCHES(p, c); } -static -int little2_isNameMin(const ENCODING *enc, const char *p) -{ - return LITTLE2_IS_NAME_CHAR_MINBPC(enc, p); +static int PTRFASTCALL +little2_isNameMin(const ENCODING *enc, const char *p) { + UNUSED_P(enc); + return LITTLE2_IS_NAME_CHAR_MINBPC(p); } -static -int little2_isNmstrtMin(const ENCODING *enc, const char *p) -{ - return LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p); +static int PTRFASTCALL +little2_isNmstrtMin(const ENCODING *enc, const char *p) { + UNUSED_P(enc); + return LITTLE2_IS_NMSTRT_CHAR_MINBPC(p); } -#undef VTABLE -#define VTABLE VTABLE1, little2_toUtf8, little2_toUtf16 +# undef VTABLE +# define VTABLE VTABLE1, little2_toUtf8, little2_toUtf16 #else /* not XML_MIN_SIZE */ -#undef PREFIX -#define PREFIX(ident) little2_ ## ident -#define MINBPC(enc) 2 +# undef PREFIX +# define PREFIX(ident) little2_##ident +# define MINBPC(enc) 2 /* CHAR_MATCHES is guaranteed to have MINBPC bytes available. */ -#define BYTE_TYPE(enc, p) LITTLE2_BYTE_TYPE(enc, p) -#define BYTE_TO_ASCII(enc, p) LITTLE2_BYTE_TO_ASCII(enc, p) -#define CHAR_MATCHES(enc, p, c) LITTLE2_CHAR_MATCHES(enc, p, c) -#define IS_NAME_CHAR(enc, p, n) 0 -#define IS_NAME_CHAR_MINBPC(enc, p) LITTLE2_IS_NAME_CHAR_MINBPC(enc, p) -#define IS_NMSTRT_CHAR(enc, p, n) (0) -#define IS_NMSTRT_CHAR_MINBPC(enc, p) LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p) - -#include "xmltok_impl.c" - -#undef MINBPC -#undef BYTE_TYPE -#undef BYTE_TO_ASCII -#undef CHAR_MATCHES -#undef IS_NAME_CHAR -#undef IS_NAME_CHAR_MINBPC -#undef IS_NMSTRT_CHAR -#undef IS_NMSTRT_CHAR_MINBPC -#undef IS_INVALID_CHAR +# define BYTE_TYPE(enc, p) LITTLE2_BYTE_TYPE(enc, p) +# define BYTE_TO_ASCII(enc, p) LITTLE2_BYTE_TO_ASCII(p) +# define CHAR_MATCHES(enc, p, c) LITTLE2_CHAR_MATCHES(p, c) +# define IS_NAME_CHAR(enc, p, n) 0 +# define IS_NAME_CHAR_MINBPC(enc, p) LITTLE2_IS_NAME_CHAR_MINBPC(p) +# define IS_NMSTRT_CHAR(enc, p, n) (0) +# define IS_NMSTRT_CHAR_MINBPC(enc, p) LITTLE2_IS_NMSTRT_CHAR_MINBPC(p) + +# define XML_TOK_IMPL_C +# include "xmltok_impl.c" +# undef XML_TOK_IMPL_C + +# undef MINBPC +# undef BYTE_TYPE +# undef BYTE_TO_ASCII +# undef CHAR_MATCHES +# undef IS_NAME_CHAR +# undef IS_NAME_CHAR_MINBPC +# undef IS_NMSTRT_CHAR +# undef IS_NMSTRT_CHAR_MINBPC +# undef IS_INVALID_CHAR #endif /* not XML_MIN_SIZE */ #ifdef XML_NS -static const struct normal_encoding little2_encoding_ns = { - { VTABLE, 2, 0, -#if XML_BYTE_ORDER == 12 - 1 -#else - 0 -#endif - }, - { -#include "asciitab.h" -#include "latin1tab.h" - }, - STANDARD_VTABLE(little2_) EMPTY_VTABLE(little2_) -}; +static const struct normal_encoding little2_encoding_ns + = {{VTABLE, 2, 0, +# if BYTEORDER == 1234 + 1 +# else + 0 +# endif + }, + { +# include "asciitab.h" +# include "latin1tab.h" + }, + STANDARD_VTABLE(little2_) NULL_VTABLE}; #endif -static const struct normal_encoding little2_encoding = { - { VTABLE, 2, 0, -#if XML_BYTE_ORDER == 12 - 1 +static const struct normal_encoding little2_encoding + = {{VTABLE, 2, 0, +#if BYTEORDER == 1234 + 1 #else - 0 + 0 #endif - }, - { + }, + { #define BT_COLON BT_NMSTRT #include "asciitab.h" #undef BT_COLON #include "latin1tab.h" - }, - STANDARD_VTABLE(little2_) EMPTY_VTABLE(little2_) -}; + }, + STANDARD_VTABLE(little2_) NULL_VTABLE}; -#if XML_BYTE_ORDER != 21 +#if BYTEORDER != 4321 -#ifdef XML_NS +# ifdef XML_NS -static const struct normal_encoding internal_little2_encoding_ns = { - { VTABLE, 2, 0, 1 }, - { -#include "iasciitab.h" -#include "latin1tab.h" - }, - STANDARD_VTABLE(little2_) EMPTY_VTABLE(little2_) -}; +static const struct normal_encoding internal_little2_encoding_ns + = {{VTABLE, 2, 0, 1}, + { +# include "iasciitab.h" +# include "latin1tab.h" + }, + STANDARD_VTABLE(little2_) NULL_VTABLE}; -#endif +# endif -static const struct normal_encoding internal_little2_encoding = { - { VTABLE, 2, 0, 1 }, - { -#define BT_COLON BT_NMSTRT -#include "iasciitab.h" -#undef BT_COLON -#include "latin1tab.h" - }, - STANDARD_VTABLE(little2_) EMPTY_VTABLE(little2_) -}; +static const struct normal_encoding internal_little2_encoding + = {{VTABLE, 2, 0, 1}, + { +# define BT_COLON BT_NMSTRT +# include "iasciitab.h" +# undef BT_COLON +# include "latin1tab.h" + }, + STANDARD_VTABLE(little2_) NULL_VTABLE}; #endif - -#define BIG2_BYTE_TYPE(enc, p) \ - ((p)[0] == 0 \ - ? ((struct normal_encoding *)(enc))->type[(unsigned char)(p)[1]] \ - : unicode_byte_type((p)[0], (p)[1])) -#define BIG2_BYTE_TO_ASCII(enc, p) ((p)[0] == 0 ? (p)[1] : -1) -#define BIG2_CHAR_MATCHES(enc, p, c) ((p)[0] == 0 && (p)[1] == c) -#define BIG2_IS_NAME_CHAR_MINBPC(enc, p) \ +#define BIG2_BYTE_TYPE(enc, p) \ + ((p)[0] == 0 \ + ? ((struct normal_encoding *)(enc))->type[(unsigned char)(p)[1]] \ + : unicode_byte_type((p)[0], (p)[1])) +#define BIG2_BYTE_TO_ASCII(p) ((p)[0] == 0 ? (p)[1] : -1) +#define BIG2_CHAR_MATCHES(p, c) ((p)[0] == 0 && (p)[1] == c) +#define BIG2_IS_NAME_CHAR_MINBPC(p) \ UCS2_GET_NAMING(namePages, (unsigned char)p[0], (unsigned char)p[1]) -#define BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p) \ +#define BIG2_IS_NMSTRT_CHAR_MINBPC(p) \ UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[0], (unsigned char)p[1]) #ifdef XML_MIN_SIZE -static -int big2_byteType(const ENCODING *enc, const char *p) -{ +static int PTRFASTCALL +big2_byteType(const ENCODING *enc, const char *p) { return BIG2_BYTE_TYPE(enc, p); } -static -int big2_byteToAscii(const ENCODING *enc, const char *p) -{ - return BIG2_BYTE_TO_ASCII(enc, p); +static int PTRFASTCALL +big2_byteToAscii(const ENCODING *enc, const char *p) { + UNUSED_P(enc); + return BIG2_BYTE_TO_ASCII(p); } -static -int big2_charMatches(const ENCODING *enc, const char *p, int c) -{ - return BIG2_CHAR_MATCHES(enc, p, c); +static int PTRCALL +big2_charMatches(const ENCODING *enc, const char *p, int c) { + UNUSED_P(enc); + return BIG2_CHAR_MATCHES(p, c); } -static -int big2_isNameMin(const ENCODING *enc, const char *p) -{ - return BIG2_IS_NAME_CHAR_MINBPC(enc, p); +static int PTRFASTCALL +big2_isNameMin(const ENCODING *enc, const char *p) { + UNUSED_P(enc); + return BIG2_IS_NAME_CHAR_MINBPC(p); } -static -int big2_isNmstrtMin(const ENCODING *enc, const char *p) -{ - return BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p); +static int PTRFASTCALL +big2_isNmstrtMin(const ENCODING *enc, const char *p) { + UNUSED_P(enc); + return BIG2_IS_NMSTRT_CHAR_MINBPC(p); } -#undef VTABLE -#define VTABLE VTABLE1, big2_toUtf8, big2_toUtf16 +# undef VTABLE +# define VTABLE VTABLE1, big2_toUtf8, big2_toUtf16 #else /* not XML_MIN_SIZE */ -#undef PREFIX -#define PREFIX(ident) big2_ ## ident -#define MINBPC(enc) 2 +# undef PREFIX +# define PREFIX(ident) big2_##ident +# define MINBPC(enc) 2 /* CHAR_MATCHES is guaranteed to have MINBPC bytes available. */ -#define BYTE_TYPE(enc, p) BIG2_BYTE_TYPE(enc, p) -#define BYTE_TO_ASCII(enc, p) BIG2_BYTE_TO_ASCII(enc, p) -#define CHAR_MATCHES(enc, p, c) BIG2_CHAR_MATCHES(enc, p, c) -#define IS_NAME_CHAR(enc, p, n) 0 -#define IS_NAME_CHAR_MINBPC(enc, p) BIG2_IS_NAME_CHAR_MINBPC(enc, p) -#define IS_NMSTRT_CHAR(enc, p, n) (0) -#define IS_NMSTRT_CHAR_MINBPC(enc, p) BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p) - -#include "xmltok_impl.c" - -#undef MINBPC -#undef BYTE_TYPE -#undef BYTE_TO_ASCII -#undef CHAR_MATCHES -#undef IS_NAME_CHAR -#undef IS_NAME_CHAR_MINBPC -#undef IS_NMSTRT_CHAR -#undef IS_NMSTRT_CHAR_MINBPC -#undef IS_INVALID_CHAR +# define BYTE_TYPE(enc, p) BIG2_BYTE_TYPE(enc, p) +# define BYTE_TO_ASCII(enc, p) BIG2_BYTE_TO_ASCII(p) +# define CHAR_MATCHES(enc, p, c) BIG2_CHAR_MATCHES(p, c) +# define IS_NAME_CHAR(enc, p, n) 0 +# define IS_NAME_CHAR_MINBPC(enc, p) BIG2_IS_NAME_CHAR_MINBPC(p) +# define IS_NMSTRT_CHAR(enc, p, n) (0) +# define IS_NMSTRT_CHAR_MINBPC(enc, p) BIG2_IS_NMSTRT_CHAR_MINBPC(p) + +# define XML_TOK_IMPL_C +# include "xmltok_impl.c" +# undef XML_TOK_IMPL_C + +# undef MINBPC +# undef BYTE_TYPE +# undef BYTE_TO_ASCII +# undef CHAR_MATCHES +# undef IS_NAME_CHAR +# undef IS_NAME_CHAR_MINBPC +# undef IS_NMSTRT_CHAR +# undef IS_NMSTRT_CHAR_MINBPC +# undef IS_INVALID_CHAR #endif /* not XML_MIN_SIZE */ #ifdef XML_NS -static const struct normal_encoding big2_encoding_ns = { - { VTABLE, 2, 0, -#if XML_BYTE_ORDER == 21 - 1 -#else - 0 -#endif - }, - { -#include "asciitab.h" -#include "latin1tab.h" - }, - STANDARD_VTABLE(big2_) EMPTY_VTABLE(big2_) -}; +static const struct normal_encoding big2_encoding_ns + = {{VTABLE, 2, 0, +# if BYTEORDER == 4321 + 1 +# else + 0 +# endif + }, + { +# include "asciitab.h" +# include "latin1tab.h" + }, + STANDARD_VTABLE(big2_) NULL_VTABLE}; #endif -static const struct normal_encoding big2_encoding = { - { VTABLE, 2, 0, -#if XML_BYTE_ORDER == 21 - 1 +static const struct normal_encoding big2_encoding + = {{VTABLE, 2, 0, +#if BYTEORDER == 4321 + 1 #else - 0 + 0 #endif - }, - { + }, + { #define BT_COLON BT_NMSTRT #include "asciitab.h" #undef BT_COLON #include "latin1tab.h" - }, - STANDARD_VTABLE(big2_) EMPTY_VTABLE(big2_) -}; + }, + STANDARD_VTABLE(big2_) NULL_VTABLE}; -#if XML_BYTE_ORDER != 12 +#if BYTEORDER != 1234 -#ifdef XML_NS +# ifdef XML_NS -static const struct normal_encoding internal_big2_encoding_ns = { - { VTABLE, 2, 0, 1 }, - { -#include "iasciitab.h" -#include "latin1tab.h" - }, - STANDARD_VTABLE(big2_) EMPTY_VTABLE(big2_) -}; +static const struct normal_encoding internal_big2_encoding_ns + = {{VTABLE, 2, 0, 1}, + { +# include "iasciitab.h" +# include "latin1tab.h" + }, + STANDARD_VTABLE(big2_) NULL_VTABLE}; -#endif +# endif -static const struct normal_encoding internal_big2_encoding = { - { VTABLE, 2, 0, 1 }, - { -#define BT_COLON BT_NMSTRT -#include "iasciitab.h" -#undef BT_COLON -#include "latin1tab.h" - }, - STANDARD_VTABLE(big2_) EMPTY_VTABLE(big2_) -}; +static const struct normal_encoding internal_big2_encoding + = {{VTABLE, 2, 0, 1}, + { +# define BT_COLON BT_NMSTRT +# include "iasciitab.h" +# undef BT_COLON +# include "latin1tab.h" + }, + STANDARD_VTABLE(big2_) NULL_VTABLE}; #endif #undef PREFIX -static -int streqci(const char *s1, const char *s2) -{ +static int FASTCALL +streqci(const char *s1, const char *s2) { for (;;) { char c1 = *s1++; char c2 = *s2++; if (ASCII_a <= c1 && c1 <= ASCII_z) c1 += ASCII_A - ASCII_a; if (ASCII_a <= c2 && c2 <= ASCII_z) - c2 += ASCII_A - ASCII_a; + /* The following line will never get executed. streqci() is + * only called from two places, both of which guarantee to put + * upper-case strings into s2. + */ + c2 += ASCII_A - ASCII_a; /* LCOV_EXCL_LINE */ if (c1 != c2) return 0; - if (!c1) + if (! c1) break; } return 1; } -static -void initUpdatePosition(const ENCODING *enc, const char *ptr, - const char *end, POSITION *pos) -{ - itkExpatUnused(enc); +static void PTRCALL +initUpdatePosition(const ENCODING *enc, const char *ptr, const char *end, + POSITION *pos) { + UNUSED_P(enc); normal_updatePosition(&utf8_encoding.enc, ptr, end, pos); } -static -int toAscii(const ENCODING *enc, const char *ptr, const char *end) -{ +static int +toAscii(const ENCODING *enc, const char *ptr, const char *end) { char buf[1]; char *p = buf; XmlUtf8Convert(enc, &ptr, end, &p, p + 1); @@ -912,37 +1050,32 @@ int toAscii(const ENCODING *enc, const char *ptr, const char *end) return buf[0]; } -static -int isSpace(int c) -{ +static int FASTCALL +isSpace(int c) { switch (c) { case 0x20: case 0xD: case 0xA: - case 0x9: + case 0x9: return 1; } return 0; } -/* Return 1 if there's just optional white space -or there's an S followed by name=val. */ -static -int parsePseudoAttribute(const ENCODING *enc, - const char *ptr, - const char *end, - const char **namePtr, - const char **nameEndPtr, - const char **valPtr, - const char **nextTokPtr) -{ +/* Return 1 if there's just optional white space or there's an S + followed by name=val. +*/ +static int +parsePseudoAttribute(const ENCODING *enc, const char *ptr, const char *end, + const char **namePtr, const char **nameEndPtr, + const char **valPtr, const char **nextTokPtr) { int c; char open; if (ptr == end) { - *namePtr = 0; + *namePtr = NULL; return 1; } - if (!isSpace(toAscii(enc, ptr, end))) { + if (! isSpace(toAscii(enc, ptr, end))) { *nextTokPtr = ptr; return 0; } @@ -950,7 +1083,7 @@ int parsePseudoAttribute(const ENCODING *enc, ptr += enc->minBytesPerChar; } while (isSpace(toAscii(enc, ptr, end))); if (ptr == end) { - *namePtr = 0; + *namePtr = NULL; return 1; } *namePtr = ptr; @@ -991,19 +1124,16 @@ int parsePseudoAttribute(const ENCODING *enc, *nextTokPtr = ptr; return 0; } - open = c; + open = (char)c; ptr += enc->minBytesPerChar; *valPtr = ptr; for (;; ptr += enc->minBytesPerChar) { c = toAscii(enc, ptr, end); if (c == open) break; - if (!(ASCII_a <= c && c <= ASCII_z) - && !(ASCII_A <= c && c <= ASCII_Z) - && !(ASCII_0 <= c && c <= ASCII_9) - && c != ASCII_PERIOD - && c != ASCII_MINUS - && c != ASCII_UNDERSCORE) { + if (! (ASCII_a <= c && c <= ASCII_z) && ! (ASCII_A <= c && c <= ASCII_Z) + && ! (ASCII_0 <= c && c <= ASCII_9) && c != ASCII_PERIOD + && c != ASCII_MINUS && c != ASCII_UNDERSCORE) { *nextTokPtr = ptr; return 0; } @@ -1012,66 +1142,52 @@ int parsePseudoAttribute(const ENCODING *enc, return 1; } -static const char KW_version[] = { - ASCII_v, ASCII_e, ASCII_r, ASCII_s, ASCII_i, ASCII_o, ASCII_n, '\0' -}; +static const char KW_version[] + = {ASCII_v, ASCII_e, ASCII_r, ASCII_s, ASCII_i, ASCII_o, ASCII_n, '\0'}; -static const char KW_encoding[] = { - ASCII_e, ASCII_n, ASCII_c, ASCII_o, ASCII_d, ASCII_i, ASCII_n, ASCII_g, '\0' -}; +static const char KW_encoding[] = {ASCII_e, ASCII_n, ASCII_c, ASCII_o, ASCII_d, + ASCII_i, ASCII_n, ASCII_g, '\0'}; -static const char KW_standalone[] = { - ASCII_s, ASCII_t, ASCII_a, ASCII_n, ASCII_d, ASCII_a, ASCII_l, ASCII_o, ASCII_n, ASCII_e, '\0' -}; +static const char KW_standalone[] + = {ASCII_s, ASCII_t, ASCII_a, ASCII_n, ASCII_d, ASCII_a, + ASCII_l, ASCII_o, ASCII_n, ASCII_e, '\0'}; -static const char KW_yes[] = { - ASCII_y, ASCII_e, ASCII_s, '\0' -}; +static const char KW_yes[] = {ASCII_y, ASCII_e, ASCII_s, '\0'}; -static const char KW_no[] = { - ASCII_n, ASCII_o, '\0' -}; +static const char KW_no[] = {ASCII_n, ASCII_o, '\0'}; -static -int doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *, - const char *, - const char *), - int isGeneralTextEntity, - const ENCODING *enc, - const char *ptr, - const char *end, - const char **badPtr, - const char **versionPtr, - const char **versionEndPtr, - const char **encodingName, - const ENCODING **encoding, - int *standalone) -{ - const char *val = 0; - const char *name = 0; - const char *nameEnd = 0; +static int +doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *, const char *, + const char *), + int isGeneralTextEntity, const ENCODING *enc, const char *ptr, + const char *end, const char **badPtr, const char **versionPtr, + const char **versionEndPtr, const char **encodingName, + const ENCODING **encoding, int *standalone) { + const char *val = NULL; + const char *name = NULL; + const char *nameEnd = NULL; ptr += 5 * enc->minBytesPerChar; end -= 2 * enc->minBytesPerChar; - if (!parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr) || !name) { + if (! parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr) + || ! name) { *badPtr = ptr; return 0; } - if (!XmlNameMatchesAscii(enc, name, nameEnd, KW_version)) { - if (!isGeneralTextEntity) { + if (! XmlNameMatchesAscii(enc, name, nameEnd, KW_version)) { + if (! isGeneralTextEntity) { *badPtr = name; return 0; } - } - else { + } else { if (versionPtr) *versionPtr = val; if (versionEndPtr) *versionEndPtr = ptr; - if (!parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr)) { + if (! parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr)) { *badPtr = ptr; return 0; } - if (!name) { + if (! name) { if (isGeneralTextEntity) { /* a TextDecl must have an EncodingDecl */ *badPtr = ptr; @@ -1082,7 +1198,7 @@ int doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *, } if (XmlNameMatchesAscii(enc, name, nameEnd, KW_encoding)) { int c = toAscii(enc, val, end); - if (!(ASCII_a <= c && c <= ASCII_z) && !(ASCII_A <= c && c <= ASCII_Z)) { + if (! (ASCII_a <= c && c <= ASCII_z) && ! (ASCII_A <= c && c <= ASCII_Z)) { *badPtr = val; return 0; } @@ -1090,26 +1206,25 @@ int doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *, *encodingName = val; if (encoding) *encoding = encodingFinder(enc, val, ptr - enc->minBytesPerChar); - if (!parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr)) { + if (! parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr)) { *badPtr = ptr; return 0; } - if (!name) + if (! name) return 1; } - if (!XmlNameMatchesAscii(enc, name, nameEnd, KW_standalone) || isGeneralTextEntity) { + if (! XmlNameMatchesAscii(enc, name, nameEnd, KW_standalone) + || isGeneralTextEntity) { *badPtr = name; return 0; } if (XmlNameMatchesAscii(enc, val, ptr - enc->minBytesPerChar, KW_yes)) { if (standalone) *standalone = 1; - } - else if (XmlNameMatchesAscii(enc, val, ptr - enc->minBytesPerChar, KW_no)) { + } else if (XmlNameMatchesAscii(enc, val, ptr - enc->minBytesPerChar, KW_no)) { if (standalone) *standalone = 0; - } - else { + } else { *badPtr = val; return 0; } @@ -1122,12 +1237,17 @@ int doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *, return 1; } -static -int checkCharRefNumber(int result) -{ +static int FASTCALL +checkCharRefNumber(int result) { switch (result >> 8) { - case 0xD8: case 0xD9: case 0xDA: case 0xDB: - case 0xDC: case 0xDD: case 0xDE: case 0xDF: + case 0xD8: + case 0xD9: + case 0xDA: + case 0xDB: + case 0xDC: + case 0xDD: + case 0xDE: + case 0xDF: return -1; case 0: if (latin1_encoding.type[result] == BT_NONXML) @@ -1141,8 +1261,8 @@ int checkCharRefNumber(int result) return result; } -int XmlUtf8Encode(int c, char *buf) -{ +int FASTCALL +XmlUtf8Encode(int c, char *buf) { enum { /* minN is minimum legal resulting value for N byte sequence */ min2 = 0x80, @@ -1151,44 +1271,44 @@ int XmlUtf8Encode(int c, char *buf) }; if (c < 0) - return 0; + return 0; /* LCOV_EXCL_LINE: this case is always eliminated beforehand */ if (c < min2) { - buf[0] = (c | UTF8_cval1); + buf[0] = (char)(c | UTF8_cval1); return 1; } if (c < min3) { - buf[0] = ((c >> 6) | UTF8_cval2); - buf[1] = ((c & 0x3f) | 0x80); + buf[0] = (char)((c >> 6) | UTF8_cval2); + buf[1] = (char)((c & 0x3f) | 0x80); return 2; } if (c < min4) { - buf[0] = ((c >> 12) | UTF8_cval3); - buf[1] = (((c >> 6) & 0x3f) | 0x80); - buf[2] = ((c & 0x3f) | 0x80); + buf[0] = (char)((c >> 12) | UTF8_cval3); + buf[1] = (char)(((c >> 6) & 0x3f) | 0x80); + buf[2] = (char)((c & 0x3f) | 0x80); return 3; } if (c < 0x110000) { - buf[0] = ((c >> 18) | UTF8_cval4); - buf[1] = (((c >> 12) & 0x3f) | 0x80); - buf[2] = (((c >> 6) & 0x3f) | 0x80); - buf[3] = ((c & 0x3f) | 0x80); + buf[0] = (char)((c >> 18) | UTF8_cval4); + buf[1] = (char)(((c >> 12) & 0x3f) | 0x80); + buf[2] = (char)(((c >> 6) & 0x3f) | 0x80); + buf[3] = (char)((c & 0x3f) | 0x80); return 4; } - return 0; + return 0; /* LCOV_EXCL_LINE: this case too is eliminated before calling */ } -int XmlUtf16Encode(int charNum, unsigned short *buf) -{ +int FASTCALL +XmlUtf16Encode(int charNum, unsigned short *buf) { if (charNum < 0) return 0; if (charNum < 0x10000) { - buf[0] = charNum; + buf[0] = (unsigned short)charNum; return 1; } if (charNum < 0x110000) { charNum -= 0x10000; - buf[0] = (charNum >> 10) + 0xD800; - buf[1] = (charNum & 0x3FF) + 0xDC00; + buf[0] = (unsigned short)((charNum >> 10) + 0xD800); + buf[1] = (unsigned short)((charNum & 0x3FF) + 0xDC00); return 2; } return 0; @@ -1196,113 +1316,104 @@ int XmlUtf16Encode(int charNum, unsigned short *buf) struct unknown_encoding { struct normal_encoding normal; - int (*convert)(void *userData, const char *p); + CONVERTER convert; void *userData; unsigned short utf16[256]; char utf8[256][4]; }; -int XmlSizeOfUnknownEncoding(void) -{ +#define AS_UNKNOWN_ENCODING(enc) ((const struct unknown_encoding *)(enc)) + +int +XmlSizeOfUnknownEncoding(void) { return sizeof(struct unknown_encoding); } -static -int unknown_isName(const ENCODING *enc, const char *p) -{ - int c = ((const struct unknown_encoding *)enc) - ->convert(((const struct unknown_encoding *)enc)->userData, p); +static int PTRFASTCALL +unknown_isName(const ENCODING *enc, const char *p) { + const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc); + int c = uenc->convert(uenc->userData, p); if (c & ~0xFFFF) return 0; return UCS2_GET_NAMING(namePages, c >> 8, c & 0xFF); } -static -int unknown_isNmstrt(const ENCODING *enc, const char *p) -{ - int c = ((const struct unknown_encoding *)enc) - ->convert(((const struct unknown_encoding *)enc)->userData, p); +static int PTRFASTCALL +unknown_isNmstrt(const ENCODING *enc, const char *p) { + const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc); + int c = uenc->convert(uenc->userData, p); if (c & ~0xFFFF) return 0; return UCS2_GET_NAMING(nmstrtPages, c >> 8, c & 0xFF); } -static -int unknown_isInvalid(const ENCODING *enc, const char *p) -{ - int c = ((const struct unknown_encoding *)enc) - ->convert(((const struct unknown_encoding *)enc)->userData, p); +static int PTRFASTCALL +unknown_isInvalid(const ENCODING *enc, const char *p) { + const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc); + int c = uenc->convert(uenc->userData, p); return (c & ~0xFFFF) || checkCharRefNumber(c) < 0; } -static -void unknown_toUtf8(const ENCODING *enc, - const char **fromP, const char *fromLim, - char **toP, const char *toLim) -{ +static enum XML_Convert_Result PTRCALL +unknown_toUtf8(const ENCODING *enc, const char **fromP, const char *fromLim, + char **toP, const char *toLim) { + const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc); char buf[XML_UTF8_ENCODE_MAX]; for (;;) { const char *utf8; int n; if (*fromP == fromLim) - break; - utf8 = ((const struct unknown_encoding *)enc)->utf8[(unsigned char)**fromP]; + return XML_CONVERT_COMPLETED; + utf8 = uenc->utf8[(unsigned char)**fromP]; n = *utf8++; if (n == 0) { - int c = ((const struct unknown_encoding *)enc) - ->convert(((const struct unknown_encoding *)enc)->userData, *fromP); + int c = uenc->convert(uenc->userData, *fromP); n = XmlUtf8Encode(c, buf); if (n > toLim - *toP) - break; + return XML_CONVERT_OUTPUT_EXHAUSTED; utf8 = buf; - *fromP += ((const struct normal_encoding *)enc)->type[(unsigned char)**fromP] - - (BT_LEAD2 - 2); - } - else { + *fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP] + - (BT_LEAD2 - 2)); + } else { if (n > toLim - *toP) - break; + return XML_CONVERT_OUTPUT_EXHAUSTED; (*fromP)++; } - do { - *(*toP)++ = *utf8++; - } while (--n != 0); + memcpy(*toP, utf8, n); + *toP += n; } } -static -void unknown_toUtf16(const ENCODING *enc, - const char **fromP, const char *fromLim, - unsigned short **toP, const unsigned short *toLim) -{ - while (*fromP != fromLim && *toP != toLim) { - unsigned short c - = ((const struct unknown_encoding *)enc)->utf16[(unsigned char)**fromP]; +static enum XML_Convert_Result PTRCALL +unknown_toUtf16(const ENCODING *enc, const char **fromP, const char *fromLim, + unsigned short **toP, const unsigned short *toLim) { + const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc); + while (*fromP < fromLim && *toP < toLim) { + unsigned short c = uenc->utf16[(unsigned char)**fromP]; if (c == 0) { - c = (unsigned short)((const struct unknown_encoding *)enc) - ->convert(((const struct unknown_encoding *)enc)->userData, *fromP); - *fromP += ((const struct normal_encoding *)enc)->type[(unsigned char)**fromP] - - (BT_LEAD2 - 2); - } - else + c = (unsigned short)uenc->convert(uenc->userData, *fromP); + *fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP] + - (BT_LEAD2 - 2)); + } else (*fromP)++; *(*toP)++ = c; } + + if ((*toP == toLim) && (*fromP < fromLim)) + return XML_CONVERT_OUTPUT_EXHAUSTED; + else + return XML_CONVERT_COMPLETED; } ENCODING * -XmlInitUnknownEncoding(void *mem, - int *table, - int (*convert)(void *userData, const char *p), - void *userData) -{ +XmlInitUnknownEncoding(void *mem, int *table, CONVERTER convert, + void *userData) { int i; - struct unknown_encoding *e = mem; - for (i = 0; i < (int)sizeof(struct normal_encoding); i++) - ((char *)mem)[i] = ((char *)&latin1_encoding)[i]; + struct unknown_encoding *e = (struct unknown_encoding *)mem; + memcpy(mem, &latin1_encoding, sizeof(struct normal_encoding)); for (i = 0; i < 128; i++) if (latin1_encoding.type[i] != BT_OTHER - && latin1_encoding.type[i] != BT_NONXML - && table[i] != i) + && latin1_encoding.type[i] != BT_NONXML && table[i] != i) return 0; for (i = 0; i < 256; i++) { int c = table[i]; @@ -1312,32 +1423,30 @@ XmlInitUnknownEncoding(void *mem, e->utf16[i] = 0xFFFF; e->utf8[i][0] = 1; e->utf8[i][1] = 0; - } - else if (c < 0) { + } else if (c < 0) { if (c < -4) return 0; - e->normal.type[i] = BT_LEAD2 - (c + 2); + /* Multi-byte sequences need a converter function */ + if (! convert) + return 0; + e->normal.type[i] = (unsigned char)(BT_LEAD2 - (c + 2)); e->utf8[i][0] = 0; e->utf16[i] = 0; - } - else if (c < 0x80) { + } else if (c < 0x80) { if (latin1_encoding.type[c] != BT_OTHER - && latin1_encoding.type[c] != BT_NONXML - && c != i) + && latin1_encoding.type[c] != BT_NONXML && c != i) return 0; e->normal.type[i] = latin1_encoding.type[c]; e->utf8[i][0] = 1; e->utf8[i][1] = (char)c; - e->utf16[i] = c == 0 ? 0xFFFF : c; - } - else if (checkCharRefNumber(c) < 0) { + e->utf16[i] = (unsigned short)(c == 0 ? 0xFFFF : c); + } else if (checkCharRefNumber(c) < 0) { e->normal.type[i] = BT_NONXML; /* This shouldn't really get used. */ e->utf16[i] = 0xFFFF; e->utf8[i][0] = 1; e->utf8[i][1] = 0; - } - else { + } else { if (c > 0xFFFF) return 0; if (UCS2_GET_NAMING(nmstrtPages, c >> 8, c & 0xff)) @@ -1347,7 +1456,7 @@ XmlInitUnknownEncoding(void *mem, else e->normal.type[i] = BT_OTHER; e->utf8[i][0] = (char)XmlUtf8Encode(c, e->utf8[i] + 1); - e->utf16[i] = c; + e->utf16[i] = (unsigned short)c; } } e->userData = userData; @@ -1382,70 +1491,57 @@ enum { NO_ENC }; -static const char KW_ISO_8859_1[] = { - ASCII_I, ASCII_S, ASCII_O, ASCII_MINUS, ASCII_8, ASCII_8, ASCII_5, ASCII_9, ASCII_MINUS, ASCII_1, '\0' -}; -static const char KW_US_ASCII[] = { - ASCII_U, ASCII_S, ASCII_MINUS, ASCII_A, ASCII_S, ASCII_C, ASCII_I, ASCII_I, '\0' -}; -static const char KW_UTF_8[] = { - ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_8, '\0' -}; -static const char KW_UTF_16[] = { - ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, '\0' -}; -static const char KW_UTF_16BE[] = { - ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, ASCII_B, ASCII_E, '\0' -}; -static const char KW_UTF_16LE[] = { - ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, ASCII_L, ASCII_E, '\0' -}; - -static -int getEncodingIndex(const char *name) -{ - static const char *encodingNames[] = { - KW_ISO_8859_1, - KW_US_ASCII, - KW_UTF_8, - KW_UTF_16, - KW_UTF_16BE, - KW_UTF_16LE, +static const char KW_ISO_8859_1[] + = {ASCII_I, ASCII_S, ASCII_O, ASCII_MINUS, ASCII_8, ASCII_8, + ASCII_5, ASCII_9, ASCII_MINUS, ASCII_1, '\0'}; +static const char KW_US_ASCII[] + = {ASCII_U, ASCII_S, ASCII_MINUS, ASCII_A, ASCII_S, + ASCII_C, ASCII_I, ASCII_I, '\0'}; +static const char KW_UTF_8[] + = {ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_8, '\0'}; +static const char KW_UTF_16[] + = {ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, '\0'}; +static const char KW_UTF_16BE[] + = {ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, + ASCII_6, ASCII_B, ASCII_E, '\0'}; +static const char KW_UTF_16LE[] + = {ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, + ASCII_6, ASCII_L, ASCII_E, '\0'}; + +static int FASTCALL +getEncodingIndex(const char *name) { + static const char *const encodingNames[] = { + KW_ISO_8859_1, KW_US_ASCII, KW_UTF_8, KW_UTF_16, KW_UTF_16BE, KW_UTF_16LE, }; int i; - if (name == 0) + if (name == NULL) return NO_ENC; - for (i = 0; i < (int)(sizeof(encodingNames)/sizeof(encodingNames[0])); i++) + for (i = 0; i < (int)(sizeof(encodingNames) / sizeof(encodingNames[0])); i++) if (streqci(name, encodingNames[i])) return i; return UNKNOWN_ENC; } -/* For binary compatibility, we store the index of the encoding specified -at initialization in the isUtf16 member. */ +/* For binary compatibility, we store the index of the encoding + specified at initialization in the isUtf16 member. +*/ #define INIT_ENC_INDEX(enc) ((int)(enc)->initEnc.isUtf16) #define SET_INIT_ENC_INDEX(enc, i) ((enc)->initEnc.isUtf16 = (char)i) -/* This is what detects the encoding. -encodingTable maps from encoding indices to encodings; -INIT_ENC_INDEX(enc) is the index of the external (protocol) specified encoding; -state is XML_CONTENT_STATE if we're parsing an external text entity, -and XML_PROLOG_STATE otherwise. +/* This is what detects the encoding. encodingTable maps from + encoding indices to encodings; INIT_ENC_INDEX(enc) is the index of + the external (protocol) specified encoding; state is + XML_CONTENT_STATE if we're parsing an external text entity, and + XML_PROLOG_STATE otherwise. */ - -static -int initScan(const ENCODING **encodingTable, - const INIT_ENCODING *enc, - int state, - const char *ptr, - const char *end, - const char **nextTokPtr) -{ +static int +initScan(const ENCODING *const *encodingTable, const INIT_ENCODING *enc, + int state, const char *ptr, const char *end, const char **nextTokPtr) { const ENCODING **encPtr; - if (ptr == end) + if (ptr >= end) return XML_TOK_NONE; encPtr = enc->encPtr; if (ptr + 1 == end) { @@ -1467,20 +1563,17 @@ int initScan(const ENCODING **encodingTable, case 0xFE: case 0xFF: case 0xEF: /* possibly first byte of UTF-8 BOM */ - if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC - && state == XML_CONTENT_STATE) + if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC && state == XML_CONTENT_STATE) break; /* fall through */ case 0x00: case 0x3C: return XML_TOK_PARTIAL; } - } - else { + } else { switch (((unsigned char)ptr[0] << 8) | (unsigned char)ptr[1]) { case 0xFEFF: - if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC - && state == XML_CONTENT_STATE) + if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC && state == XML_CONTENT_STATE) break; *nextTokPtr = ptr + 2; *encPtr = encodingTable[UTF_16BE_ENC]; @@ -1494,8 +1587,7 @@ int initScan(const ENCODING **encodingTable, *encPtr = encodingTable[UTF_16LE_ENC]; return XmlTok(*encPtr, state, ptr, end, nextTokPtr); case 0xFFFE: - if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC - && state == XML_CONTENT_STATE) + if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC && state == XML_CONTENT_STATE) break; *nextTokPtr = ptr + 2; *encPtr = encodingTable[UTF_16LE_ENC]; @@ -1506,10 +1598,12 @@ int initScan(const ENCODING **encodingTable, of ISO-8859-1 or some flavour of UTF-16 and this is an external text entity, don't look for the BOM, - because it might be a legal data. */ + because it might be a legal data. + */ if (state == XML_CONTENT_STATE) { int e = INIT_ENC_INDEX(enc); - if (e == ISO_8859_1_ENC || e == UTF_16BE_ENC || e == UTF_16LE_ENC || e == UTF_16_ENC) + if (e == ISO_8859_1_ENC || e == UTF_16BE_ENC || e == UTF_16LE_ENC + || e == UTF_16_ENC) break; } if (ptr + 2 == end) @@ -1522,16 +1616,17 @@ int initScan(const ENCODING **encodingTable, break; default: if (ptr[0] == '\0') { - /* 0 isn't a legal data character. Furthermore a document entity can only - start with ASCII characters. So the only way this can fail to be big-endian - UTF-16 if it it's an external parsed general entity that's labelled as - UTF-16LE. */ + /* 0 isn't a legal data character. Furthermore a document + entity can only start with ASCII characters. So the only + way this can fail to be big-endian UTF-16 if it it's an + external parsed general entity that's labelled as + UTF-16LE. + */ if (state == XML_CONTENT_STATE && INIT_ENC_INDEX(enc) == UTF_16LE_ENC) break; *encPtr = encodingTable[UTF_16BE_ENC]; return XmlTok(*encPtr, state, ptr, end, nextTokPtr); - } - else if (ptr[1] == '\0') { + } else if (ptr[1] == '\0') { /* We could recover here in the case: - parsing an external entity - second byte is 0 @@ -1539,7 +1634,8 @@ int initScan(const ENCODING **encodingTable, - no encoding declaration by assuming UTF-16LE. But we don't, because this would mean when presented just with a single byte, we couldn't reliably determine - whether we needed further bytes. */ + whether we needed further bytes. + */ if (state == XML_CONTENT_STATE) break; *encPtr = encodingTable[UTF_16LE_ENC]; @@ -1552,29 +1648,29 @@ int initScan(const ENCODING **encodingTable, return XmlTok(*encPtr, state, ptr, end, nextTokPtr); } - #define NS(x) x #define ns(x) x +#define XML_TOK_NS_C #include "xmltok_ns.c" +#undef XML_TOK_NS_C #undef NS #undef ns #ifdef XML_NS -#define NS(x) x ## NS -#define ns(x) x ## _ns +# define NS(x) x##NS +# define ns(x) x##_ns -#include "xmltok_ns.c" +# define XML_TOK_NS_C +# include "xmltok_ns.c" +# undef XML_TOK_NS_C -#undef NS -#undef ns +# undef NS +# undef ns ENCODING * -XmlInitUnknownEncodingNS(void *mem, - int *table, - int (*convert)(void *userData, const char *p), - void *userData) -{ +XmlInitUnknownEncodingNS(void *mem, int *table, CONVERTER convert, + void *userData) { ENCODING *enc = XmlInitUnknownEncoding(mem, table, convert, userData); if (enc) ((struct normal_encoding *)enc)->type[ASCII_COLON] = BT_COLON; diff --git a/Modules/ThirdParty/Expat/src/expat/xmltok.h b/Modules/ThirdParty/Expat/src/expat/xmltok.h index 717305e3115..6f630c2f9ba 100644 --- a/Modules/ThirdParty/Expat/src/expat/xmltok.h +++ b/Modules/ThirdParty/Expat/src/expat/xmltok.h @@ -1,30 +1,64 @@ /* -Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd -See the file COPYING for copying permission. + __ __ _ + ___\ \/ /_ __ __ _| |_ + / _ \\ /| '_ \ / _` | __| + | __// \| |_) | (_| | |_ + \___/_/\_\ .__/ \__,_|\__| + |_| XML parser + + Copyright (c) 1997-2000 Thai Open Source Software Center Ltd + Copyright (c) 2000 Clark Cooper + Copyright (c) 2002 Fred L. Drake, Jr. + Copyright (c) 2002-2005 Karl Waclawek + Copyright (c) 2016-2017 Sebastian Pipping + Copyright (c) 2017 Rhodri James + Licensed under the MIT license: + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to permit + persons to whom the Software is furnished to do so, subject to the + following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN + NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef XmlTok_INCLUDED #define XmlTok_INCLUDED 1 -#include "itk_expat_mangle.h" #ifdef __cplusplus extern "C" { #endif /* The following token may be returned by XmlContentTok */ -#define XML_TOK_TRAILING_RSQB -5 /* ] or ]] at the end of the scan; might be start of - illegal ]]> sequence */ -/* The following tokens may be returned by both XmlPrologTok and XmlContentTok */ -#define XML_TOK_NONE -4 /* The string to be scanned is empty */ -#define XML_TOK_TRAILING_CR -3 /* A CR at the end of the scan; - might be part of CRLF sequence */ +#define XML_TOK_TRAILING_RSQB \ + -5 /* ] or ]] at the end of the scan; might be \ + start of illegal ]]> sequence */ +/* The following tokens may be returned by both XmlPrologTok and + XmlContentTok. +*/ +#define XML_TOK_NONE -4 /* The string to be scanned is empty */ +#define XML_TOK_TRAILING_CR \ + -3 /* A CR at the end of the scan; \ + might be part of CRLF sequence */ #define XML_TOK_PARTIAL_CHAR -2 /* only part of a multibyte sequence */ -#define XML_TOK_PARTIAL -1 /* only part of a token */ +#define XML_TOK_PARTIAL -1 /* only part of a token */ #define XML_TOK_INVALID 0 /* The following tokens are returned by XmlContentTok; some are also - returned by XmlAttributeValueTok, XmlEntityTok, XmlCdataSectionTok */ - + returned by XmlAttributeValueTok, XmlEntityTok, XmlCdataSectionTok. +*/ #define XML_TOK_START_TAG_WITH_ATTS 1 #define XML_TOK_START_TAG_NO_ATTS 2 #define XML_TOK_EMPTY_ELEMENT_WITH_ATTS 3 /* empty element tag */ @@ -34,22 +68,24 @@ extern "C" { #define XML_TOK_DATA_NEWLINE 7 #define XML_TOK_CDATA_SECT_OPEN 8 #define XML_TOK_ENTITY_REF 9 -#define XML_TOK_CHAR_REF 10 /* numeric character reference */ +#define XML_TOK_CHAR_REF 10 /* numeric character reference */ -/* The following tokens may be returned by both XmlPrologTok and XmlContentTok */ -#define XML_TOK_PI 11 /* processing instruction */ +/* The following tokens may be returned by both XmlPrologTok and + XmlContentTok. +*/ +#define XML_TOK_PI 11 /* processing instruction */ #define XML_TOK_XML_DECL 12 /* XML decl or text decl */ #define XML_TOK_COMMENT 13 -#define XML_TOK_BOM 14 /* Byte order mark */ +#define XML_TOK_BOM 14 /* Byte order mark */ /* The following tokens are returned only by XmlPrologTok */ #define XML_TOK_PROLOG_S 15 -#define XML_TOK_DECL_OPEN 16 /* */ #define XML_TOK_NAME 18 #define XML_TOK_NMTOKEN 19 #define XML_TOK_POUND_NAME 20 /* #name */ -#define XML_TOK_OR 21 /* | */ +#define XML_TOK_OR 21 /* | */ #define XML_TOK_PERCENT 22 #define XML_TOK_OPEN_PAREN 23 #define XML_TOK_CLOSE_PAREN 24 @@ -60,14 +96,14 @@ extern "C" { #define XML_TOK_INSTANCE_START 29 /* The following occur only in element type declarations */ -#define XML_TOK_NAME_QUESTION 30 /* name? */ -#define XML_TOK_NAME_ASTERISK 31 /* name* */ -#define XML_TOK_NAME_PLUS 32 /* name+ */ -#define XML_TOK_COND_SECT_OPEN 33 /* */ +#define XML_TOK_NAME_QUESTION 30 /* name? */ +#define XML_TOK_NAME_ASTERISK 31 /* name* */ +#define XML_TOK_NAME_PLUS 32 /* name+ */ +#define XML_TOK_COND_SECT_OPEN 33 /* */ #define XML_TOK_CLOSE_PAREN_QUESTION 35 /* )? */ #define XML_TOK_CLOSE_PAREN_ASTERISK 36 /* )* */ -#define XML_TOK_CLOSE_PAREN_PLUS 37 /* )+ */ +#define XML_TOK_CLOSE_PAREN_PLUS 37 /* )+ */ #define XML_TOK_COMMA 38 /* The following token is returned only by XmlAttributeValueTok */ @@ -76,25 +112,26 @@ extern "C" { /* The following token is returned only by XmlCdataSectionTok */ #define XML_TOK_CDATA_SECT_CLOSE 40 -/* With namespace processing this is returned by XmlPrologTok - for a name with a colon. */ +/* With namespace processing this is returned by XmlPrologTok for a + name with a colon. +*/ #define XML_TOK_PREFIXED_NAME 41 #ifdef XML_DTD -#define XML_TOK_IGNORE_SECT 42 +# define XML_TOK_IGNORE_SECT 42 #endif /* XML_DTD */ #ifdef XML_DTD -#define XML_N_STATES 4 +# define XML_N_STATES 4 #else /* not XML_DTD */ -#define XML_N_STATES 3 +# define XML_N_STATES 3 #endif /* not XML_DTD */ #define XML_PROLOG_STATE 0 #define XML_CONTENT_STATE 1 #define XML_CDATA_SECTION_STATE 2 #ifdef XML_DTD -#define XML_IGNORE_SECTION_STATE 3 +# define XML_IGNORE_SECTION_STATE 3 #endif /* XML_DTD */ #define XML_N_LITERAL_TYPES 2 @@ -108,8 +145,8 @@ extern "C" { typedef struct position { /* first line and first column are 0 not 1 */ - unsigned long lineNumber; - unsigned long columnNumber; + XML_Size lineNumber; + XML_Size columnNumber; } POSITION; typedef struct { @@ -122,128 +159,123 @@ typedef struct { struct encoding; typedef struct encoding ENCODING; +typedef int(PTRCALL *SCANNER)(const ENCODING *, const char *, const char *, + const char **); + +enum XML_Convert_Result { + XML_CONVERT_COMPLETED = 0, + XML_CONVERT_INPUT_INCOMPLETE = 1, + XML_CONVERT_OUTPUT_EXHAUSTED + = 2 /* and therefore potentially input remaining as well */ +}; + struct encoding { - int (*scanners[XML_N_STATES])(const ENCODING *, - const char *, - const char *, - const char **); - int (*literalScanners[XML_N_LITERAL_TYPES])(const ENCODING *, - const char *, - const char *, - const char **); - int (*sameName)(const ENCODING *, - const char *, const char *); - int (*nameMatchesAscii)(const ENCODING *, - const char *, const char *, const char *); - int (*nameLength)(const ENCODING *, const char *); - const char *(*skipS)(const ENCODING *, const char *); - int (*getAtts)(const ENCODING *enc, const char *ptr, - int attsMax, ATTRIBUTE *atts); - int (*charRefNumber)(const ENCODING *enc, const char *ptr); - int (*predefinedEntityName)(const ENCODING *, const char *, const char *); - void (*updatePosition)(const ENCODING *, - const char *ptr, - const char *end, - POSITION *); - int (*isPublicId)(const ENCODING *enc, const char *ptr, const char *end, - const char **badPtr); - void (*utf8Convert)(const ENCODING *enc, - const char **fromP, - const char *fromLim, - char **toP, - const char *toLim); - void (*utf16Convert)(const ENCODING *enc, - const char **fromP, - const char *fromLim, - unsigned short **toP, - const unsigned short *toLim); + SCANNER scanners[XML_N_STATES]; + SCANNER literalScanners[XML_N_LITERAL_TYPES]; + int(PTRCALL *nameMatchesAscii)(const ENCODING *, const char *, const char *, + const char *); + int(PTRFASTCALL *nameLength)(const ENCODING *, const char *); + const char *(PTRFASTCALL *skipS)(const ENCODING *, const char *); + int(PTRCALL *getAtts)(const ENCODING *enc, const char *ptr, int attsMax, + ATTRIBUTE *atts); + int(PTRFASTCALL *charRefNumber)(const ENCODING *enc, const char *ptr); + int(PTRCALL *predefinedEntityName)(const ENCODING *, const char *, + const char *); + void(PTRCALL *updatePosition)(const ENCODING *, const char *ptr, + const char *end, POSITION *); + int(PTRCALL *isPublicId)(const ENCODING *enc, const char *ptr, + const char *end, const char **badPtr); + enum XML_Convert_Result(PTRCALL *utf8Convert)(const ENCODING *enc, + const char **fromP, + const char *fromLim, char **toP, + const char *toLim); + enum XML_Convert_Result(PTRCALL *utf16Convert)(const ENCODING *enc, + const char **fromP, + const char *fromLim, + unsigned short **toP, + const unsigned short *toLim); int minBytesPerChar; char isUtf8; char isUtf16; }; -/* -Scan the string starting at ptr until the end of the next complete token, -but do not scan past eptr. Return an integer giving the type of token. +/* Scan the string starting at ptr until the end of the next complete + token, but do not scan past eptr. Return an integer giving the + type of token. -Return XML_TOK_NONE when ptr == eptr; nextTokPtr will not be set. + Return XML_TOK_NONE when ptr == eptr; nextTokPtr will not be set. -Return XML_TOK_PARTIAL when the string does not contain a complete token; -nextTokPtr will not be set. + Return XML_TOK_PARTIAL when the string does not contain a complete + token; nextTokPtr will not be set. -Return XML_TOK_INVALID when the string does not start a valid token; nextTokPtr -will be set to point to the character which made the token invalid. + Return XML_TOK_INVALID when the string does not start a valid + token; nextTokPtr will be set to point to the character which made + the token invalid. -Otherwise the string starts with a valid token; nextTokPtr will be set to point -to the character following the end of that token. + Otherwise the string starts with a valid token; nextTokPtr will be + set to point to the character following the end of that token. -Each data character counts as a single token, but adjacent data characters -may be returned together. Similarly for characters in the prolog outside -literals, comments and processing instructions. + Each data character counts as a single token, but adjacent data + characters may be returned together. Similarly for characters in + the prolog outside literals, comments and processing instructions. */ - -#define XmlTok(enc, state, ptr, end, nextTokPtr) \ +#define XmlTok(enc, state, ptr, end, nextTokPtr) \ (((enc)->scanners[state])(enc, ptr, end, nextTokPtr)) -#define XmlPrologTok(enc, ptr, end, nextTokPtr) \ - XmlTok(enc, XML_PROLOG_STATE, ptr, end, nextTokPtr) +#define XmlPrologTok(enc, ptr, end, nextTokPtr) \ + XmlTok(enc, XML_PROLOG_STATE, ptr, end, nextTokPtr) -#define XmlContentTok(enc, ptr, end, nextTokPtr) \ - XmlTok(enc, XML_CONTENT_STATE, ptr, end, nextTokPtr) +#define XmlContentTok(enc, ptr, end, nextTokPtr) \ + XmlTok(enc, XML_CONTENT_STATE, ptr, end, nextTokPtr) -#define XmlCdataSectionTok(enc, ptr, end, nextTokPtr) \ - XmlTok(enc, XML_CDATA_SECTION_STATE, ptr, end, nextTokPtr) +#define XmlCdataSectionTok(enc, ptr, end, nextTokPtr) \ + XmlTok(enc, XML_CDATA_SECTION_STATE, ptr, end, nextTokPtr) #ifdef XML_DTD -#define XmlIgnoreSectionTok(enc, ptr, end, nextTokPtr) \ - XmlTok(enc, XML_IGNORE_SECTION_STATE, ptr, end, nextTokPtr) +# define XmlIgnoreSectionTok(enc, ptr, end, nextTokPtr) \ + XmlTok(enc, XML_IGNORE_SECTION_STATE, ptr, end, nextTokPtr) #endif /* XML_DTD */ -/* This is used for performing a 2nd-level tokenization on -the content of a literal that has already been returned by XmlTok. */ - -#define XmlLiteralTok(enc, literalType, ptr, end, nextTokPtr) \ +/* This is used for performing a 2nd-level tokenization on the content + of a literal that has already been returned by XmlTok. +*/ +#define XmlLiteralTok(enc, literalType, ptr, end, nextTokPtr) \ (((enc)->literalScanners[literalType])(enc, ptr, end, nextTokPtr)) -#define XmlAttributeValueTok(enc, ptr, end, nextTokPtr) \ - XmlLiteralTok(enc, XML_ATTRIBUTE_VALUE_LITERAL, ptr, end, nextTokPtr) - -#define XmlEntityValueTok(enc, ptr, end, nextTokPtr) \ - XmlLiteralTok(enc, XML_ENTITY_VALUE_LITERAL, ptr, end, nextTokPtr) +#define XmlAttributeValueTok(enc, ptr, end, nextTokPtr) \ + XmlLiteralTok(enc, XML_ATTRIBUTE_VALUE_LITERAL, ptr, end, nextTokPtr) -#define XmlSameName(enc, ptr1, ptr2) (((enc)->sameName)(enc, ptr1, ptr2)) +#define XmlEntityValueTok(enc, ptr, end, nextTokPtr) \ + XmlLiteralTok(enc, XML_ENTITY_VALUE_LITERAL, ptr, end, nextTokPtr) -#define XmlNameMatchesAscii(enc, ptr1, end1, ptr2) \ +#define XmlNameMatchesAscii(enc, ptr1, end1, ptr2) \ (((enc)->nameMatchesAscii)(enc, ptr1, end1, ptr2)) -#define XmlNameLength(enc, ptr) \ - (((enc)->nameLength)(enc, ptr)) +#define XmlNameLength(enc, ptr) (((enc)->nameLength)(enc, ptr)) -#define XmlSkipS(enc, ptr) \ - (((enc)->skipS)(enc, ptr)) +#define XmlSkipS(enc, ptr) (((enc)->skipS)(enc, ptr)) -#define XmlGetAttributes(enc, ptr, attsMax, atts) \ +#define XmlGetAttributes(enc, ptr, attsMax, atts) \ (((enc)->getAtts)(enc, ptr, attsMax, atts)) -#define XmlCharRefNumber(enc, ptr) \ - (((enc)->charRefNumber)(enc, ptr)) +#define XmlCharRefNumber(enc, ptr) (((enc)->charRefNumber)(enc, ptr)) -#define XmlPredefinedEntityName(enc, ptr, end) \ +#define XmlPredefinedEntityName(enc, ptr, end) \ (((enc)->predefinedEntityName)(enc, ptr, end)) -#define XmlUpdatePosition(enc, ptr, end, pos) \ +#define XmlUpdatePosition(enc, ptr, end, pos) \ (((enc)->updatePosition)(enc, ptr, end, pos)) -#define XmlIsPublicId(enc, ptr, end, badPtr) \ +#define XmlIsPublicId(enc, ptr, end, badPtr) \ (((enc)->isPublicId)(enc, ptr, end, badPtr)) -#define XmlUtf8Convert(enc, fromP, fromLim, toP, toLim) \ +#define XmlUtf8Convert(enc, fromP, fromLim, toP, toLim) \ (((enc)->utf8Convert)(enc, fromP, fromLim, toP, toLim)) -#define XmlUtf16Convert(enc, fromP, fromLim, toP, toLim) \ +#define XmlUtf16Convert(enc, fromP, fromLim, toP, toLim) \ (((enc)->utf16Convert)(enc, fromP, fromLim, toP, toLim)) typedef struct { @@ -251,48 +283,35 @@ typedef struct { const ENCODING **encPtr; } INIT_ENCODING; -int XmlParseXmlDecl(int isGeneralTextEntity, - const ENCODING *enc, - const char *ptr, - const char *end, - const char **badPtr, - const char **versionPtr, - const char **versionEndPtr, - const char **encodingNamePtr, - const ENCODING **namedEncodingPtr, - int *standalonePtr); - -int XmlInitEncoding(INIT_ENCODING *, const ENCODING **, const char *name); -const ENCODING *XmlGetUtf8InternalEncoding(void); -const ENCODING *XmlGetUtf16InternalEncoding(void); -int XmlUtf8Encode(int charNumber, char *buf); -int XmlUtf16Encode(int charNumber, unsigned short *buf); - -int XmlSizeOfUnknownEncoding(void); -ENCODING * -XmlInitUnknownEncoding(void *mem, - int *table, - int (*conv)(void *userData, const char *p), - void *userData); - -int XmlParseXmlDeclNS(int isGeneralTextEntity, - const ENCODING *enc, - const char *ptr, - const char *end, - const char **badPtr, - const char **versionPtr, - const char **versionEndPtr, - const char **encodingNamePtr, - const ENCODING **namedEncodingPtr, - int *standalonePtr); -int XmlInitEncodingNS(INIT_ENCODING *, const ENCODING **, const char *name); -const ENCODING *XmlGetUtf8InternalEncodingNS(void); -const ENCODING *XmlGetUtf16InternalEncodingNS(void); -ENCODING * -XmlInitUnknownEncodingNS(void *mem, - int *table, - int (*conv)(void *userData, const char *p), - void *userData); +int XmlParseXmlDecl(int isGeneralTextEntity, const ENCODING *enc, + const char *ptr, const char *end, const char **badPtr, + const char **versionPtr, const char **versionEndPtr, + const char **encodingNamePtr, + const ENCODING **namedEncodingPtr, int *standalonePtr); + +int XmlInitEncoding(INIT_ENCODING *, const ENCODING **, const char *name); +const ENCODING *XmlGetUtf8InternalEncoding(void); +const ENCODING *XmlGetUtf16InternalEncoding(void); +int FASTCALL XmlUtf8Encode(int charNumber, char *buf); +int FASTCALL XmlUtf16Encode(int charNumber, unsigned short *buf); +int XmlSizeOfUnknownEncoding(void); + +typedef int(XMLCALL *CONVERTER)(void *userData, const char *p); + +ENCODING *XmlInitUnknownEncoding(void *mem, int *table, CONVERTER convert, + void *userData); + +int XmlParseXmlDeclNS(int isGeneralTextEntity, const ENCODING *enc, + const char *ptr, const char *end, const char **badPtr, + const char **versionPtr, const char **versionEndPtr, + const char **encodingNamePtr, + const ENCODING **namedEncodingPtr, int *standalonePtr); + +int XmlInitEncodingNS(INIT_ENCODING *, const ENCODING **, const char *name); +const ENCODING *XmlGetUtf8InternalEncodingNS(void); +const ENCODING *XmlGetUtf16InternalEncodingNS(void); +ENCODING *XmlInitUnknownEncodingNS(void *mem, int *table, CONVERTER convert, + void *userData); #ifdef __cplusplus } #endif diff --git a/Modules/ThirdParty/Expat/src/expat/xmltok_impl.c b/Modules/ThirdParty/Expat/src/expat/xmltok_impl.c index 963facec350..0430591b426 100644 --- a/Modules/ThirdParty/Expat/src/expat/xmltok_impl.c +++ b/Modules/ThirdParty/Expat/src/expat/xmltok_impl.c @@ -1,114 +1,163 @@ -/* -Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd -See the file COPYING for copying permission. +/* This file is included (from xmltok.c, 1-3 times depending on XML_MIN_SIZE)! + __ __ _ + ___\ \/ /_ __ __ _| |_ + / _ \\ /| '_ \ / _` | __| + | __// \| |_) | (_| | |_ + \___/_/\_\ .__/ \__,_|\__| + |_| XML parser + + Copyright (c) 1997-2000 Thai Open Source Software Center Ltd + Copyright (c) 2000 Clark Cooper + Copyright (c) 2002 Fred L. Drake, Jr. + Copyright (c) 2002-2016 Karl Waclawek + Copyright (c) 2016-2021 Sebastian Pipping + Copyright (c) 2017 Rhodri James + Copyright (c) 2018 Benjamin Peterson + Copyright (c) 2018 Anton Maklakov + Copyright (c) 2019 David Loffredo + Copyright (c) 2020 Boris Kolpackov + Licensed under the MIT license: + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to permit + persons to whom the Software is furnished to do so, subject to the + following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN + NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef IS_INVALID_CHAR -#define IS_INVALID_CHAR(enc, ptr, n) (0) -#endif +#ifdef XML_TOK_IMPL_C -#ifndef INVALID_LEAD_CASE -#define INVALID_LEAD_CASE(n, ptr, nextTokPtr) \ - case BT_LEAD ## n: \ - if (end - ptr < n) \ - return XML_TOK_PARTIAL_CHAR; \ - if (IS_INVALID_CHAR(enc, ptr, n)) { \ - *(nextTokPtr) = (ptr); \ - return XML_TOK_INVALID; \ - } \ - ptr += n; \ - break; -#endif +# ifndef IS_INVALID_CHAR // i.e. for UTF-16 and XML_MIN_SIZE not defined +# define IS_INVALID_CHAR(enc, ptr, n) (0) +# endif -#define INVALID_CASES(ptr, nextTokPtr) \ - INVALID_LEAD_CASE(2, ptr, nextTokPtr) \ - INVALID_LEAD_CASE(3, ptr, nextTokPtr) \ - INVALID_LEAD_CASE(4, ptr, nextTokPtr) \ - case BT_NONXML: \ - case BT_MALFORM: \ - case BT_TRAIL: \ - *(nextTokPtr) = (ptr); \ +# define INVALID_LEAD_CASE(n, ptr, nextTokPtr) \ + case BT_LEAD##n: \ + if (end - ptr < n) \ + return XML_TOK_PARTIAL_CHAR; \ + if (IS_INVALID_CHAR(enc, ptr, n)) { \ + *(nextTokPtr) = (ptr); \ + return XML_TOK_INVALID; \ + } \ + ptr += n; \ + break; + +# define INVALID_CASES(ptr, nextTokPtr) \ + INVALID_LEAD_CASE(2, ptr, nextTokPtr) \ + INVALID_LEAD_CASE(3, ptr, nextTokPtr) \ + INVALID_LEAD_CASE(4, ptr, nextTokPtr) \ + case BT_NONXML: \ + case BT_MALFORM: \ + case BT_TRAIL: \ + *(nextTokPtr) = (ptr); \ return XML_TOK_INVALID; -#define CHECK_NAME_CASE(n, enc, ptr, end, nextTokPtr) \ - case BT_LEAD ## n: \ - if (end - ptr < n) \ - return XML_TOK_PARTIAL_CHAR; \ - if (!IS_NAME_CHAR(enc, ptr, n)) { \ - *nextTokPtr = ptr; \ - return XML_TOK_INVALID; \ - } \ - ptr += n; \ - break; +# define CHECK_NAME_CASE(n, enc, ptr, end, nextTokPtr) \ + case BT_LEAD##n: \ + if (end - ptr < n) \ + return XML_TOK_PARTIAL_CHAR; \ + if (! IS_NAME_CHAR(enc, ptr, n)) { \ + *nextTokPtr = ptr; \ + return XML_TOK_INVALID; \ + } \ + ptr += n; \ + break; -#define CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) \ - case BT_NONASCII: \ - if (!IS_NAME_CHAR_MINBPC(enc, ptr)) { \ - *nextTokPtr = ptr; \ - return XML_TOK_INVALID; \ - } \ - case BT_NMSTRT: \ - case BT_HEX: \ - case BT_DIGIT: \ - case BT_NAME: \ - case BT_MINUS: \ - ptr += MINBPC(enc); \ - break; \ - CHECK_NAME_CASE(2, enc, ptr, end, nextTokPtr) \ - CHECK_NAME_CASE(3, enc, ptr, end, nextTokPtr) \ - CHECK_NAME_CASE(4, enc, ptr, end, nextTokPtr) +# define CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) \ + case BT_NONASCII: \ + if (! IS_NAME_CHAR_MINBPC(enc, ptr)) { \ + *nextTokPtr = ptr; \ + return XML_TOK_INVALID; \ + } \ + /* fall through */ \ + case BT_NMSTRT: \ + case BT_HEX: \ + case BT_DIGIT: \ + case BT_NAME: \ + case BT_MINUS: \ + ptr += MINBPC(enc); \ + break; \ + CHECK_NAME_CASE(2, enc, ptr, end, nextTokPtr) \ + CHECK_NAME_CASE(3, enc, ptr, end, nextTokPtr) \ + CHECK_NAME_CASE(4, enc, ptr, end, nextTokPtr) -#define CHECK_NMSTRT_CASE(n, enc, ptr, end, nextTokPtr) \ - case BT_LEAD ## n: \ - if (end - ptr < n) \ - return XML_TOK_PARTIAL_CHAR; \ - if (!IS_NMSTRT_CHAR(enc, ptr, n)) { \ - *nextTokPtr = ptr; \ - return XML_TOK_INVALID; \ - } \ - ptr += n; \ - break; +# define CHECK_NMSTRT_CASE(n, enc, ptr, end, nextTokPtr) \ + case BT_LEAD##n: \ + if (end - ptr < n) \ + return XML_TOK_PARTIAL_CHAR; \ + if (! IS_NMSTRT_CHAR(enc, ptr, n)) { \ + *nextTokPtr = ptr; \ + return XML_TOK_INVALID; \ + } \ + ptr += n; \ + break; -#define CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) \ - case BT_NONASCII: \ - if (!IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { \ - *nextTokPtr = ptr; \ - return XML_TOK_INVALID; \ - } \ - case BT_NMSTRT: \ - case BT_HEX: \ - ptr += MINBPC(enc); \ - break; \ - CHECK_NMSTRT_CASE(2, enc, ptr, end, nextTokPtr) \ - CHECK_NMSTRT_CASE(3, enc, ptr, end, nextTokPtr) \ - CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTokPtr) +# define CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) \ + case BT_NONASCII: \ + if (! IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { \ + *nextTokPtr = ptr; \ + return XML_TOK_INVALID; \ + } \ + /* fall through */ \ + case BT_NMSTRT: \ + case BT_HEX: \ + ptr += MINBPC(enc); \ + break; \ + CHECK_NMSTRT_CASE(2, enc, ptr, end, nextTokPtr) \ + CHECK_NMSTRT_CASE(3, enc, ptr, end, nextTokPtr) \ + CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTokPtr) -#ifndef PREFIX -#define PREFIX(ident) ident -#endif +# ifndef PREFIX +# define PREFIX(ident) ident +# endif + +# define HAS_CHARS(enc, ptr, end, count) (end - ptr >= count * MINBPC(enc)) + +# define HAS_CHAR(enc, ptr, end) HAS_CHARS(enc, ptr, end, 1) + +# define REQUIRE_CHARS(enc, ptr, end, count) \ + { \ + if (! HAS_CHARS(enc, ptr, end, count)) { \ + return XML_TOK_PARTIAL; \ + } \ + } + +# define REQUIRE_CHAR(enc, ptr, end) REQUIRE_CHARS(enc, ptr, end, 1) /* ptr points to character following " */ switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) { - case BT_S: case BT_CR: case BT_LF: case BT_PERCNT: + case BT_S: + case BT_CR: + case BT_LF: + case BT_PERCNT: *nextTokPtr = ptr; return XML_TOK_INVALID; } /* fall through */ - case BT_S: case BT_CR: case BT_LF: + case BT_S: + case BT_CR: + case BT_LF: *nextTokPtr = ptr; return XML_TOK_DECL_OPEN; case BT_NMSTRT: @@ -174,13 +225,13 @@ int PREFIX(scanDecl)(const ENCODING *enc, const char *ptr, const char *end, return XML_TOK_PARTIAL; } -static -int PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr, const char *end, int *tokPtr) -{ +static int PTRCALL +PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr, const char *end, + int *tokPtr) { int upper = 0; - itkExpatUnused(enc); + UNUSED_P(enc); *tokPtr = XML_TOK_PI; - if (end - ptr != MINBPC(enc)*3) + if (end - ptr != MINBPC(enc) * 3) return 1; switch (BYTE_TO_ASCII(enc, ptr)) { case ASCII_x: @@ -219,36 +270,35 @@ int PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr, const char *end, /* ptr points to character following "= end) return XML_TOK_NONE; if (MINBPC(enc) > 1) { size_t n = end - ptr; @@ -320,14 +366,12 @@ int PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, const char *en switch (BYTE_TYPE(enc, ptr)) { case BT_RSQB: ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_PARTIAL; - if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB)) + REQUIRE_CHAR(enc, ptr, end); + if (! CHAR_MATCHES(enc, ptr, ASCII_RSQB)) break; ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_PARTIAL; - if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { + REQUIRE_CHAR(enc, ptr, end); + if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) { ptr -= MINBPC(enc); break; } @@ -335,8 +379,7 @@ int PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, const char *en return XML_TOK_CDATA_SECT_CLOSE; case BT_CR: ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_PARTIAL; + REQUIRE_CHAR(enc, ptr, end); if (BYTE_TYPE(enc, ptr) == BT_LF) ptr += MINBPC(enc); *nextTokPtr = ptr; @@ -344,23 +387,25 @@ int PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, const char *en case BT_LF: *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_DATA_NEWLINE; - INVALID_CASES(ptr, nextTokPtr) + INVALID_CASES(ptr, nextTokPtr) default: ptr += MINBPC(enc); break; } - while (ptr != end) { + while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { -#define LEAD_CASE(n) \ - case BT_LEAD ## n: \ - if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \ - *nextTokPtr = ptr; \ - return XML_TOK_DATA_CHARS; \ - } \ - ptr += n; \ - break; - LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) -#undef LEAD_CASE +# define LEAD_CASE(n) \ + case BT_LEAD##n: \ + if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \ + *nextTokPtr = ptr; \ + return XML_TOK_DATA_CHARS; \ + } \ + ptr += n; \ + break; + LEAD_CASE(2) + LEAD_CASE(3) + LEAD_CASE(4) +# undef LEAD_CASE case BT_NONXML: case BT_MALFORM: case BT_TRAIL: @@ -380,25 +425,27 @@ int PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, const char *en /* ptr points to character following "= end) return XML_TOK_NONE; if (MINBPC(enc) > 1) { size_t n = end - ptr; @@ -803,7 +838,7 @@ int PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end, return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); case BT_CR: ptr += MINBPC(enc); - if (ptr == end) + if (! HAS_CHAR(enc, ptr, end)) return XML_TOK_TRAILING_CR; if (BYTE_TYPE(enc, ptr) == BT_LF) ptr += MINBPC(enc); @@ -814,50 +849,52 @@ int PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end, return XML_TOK_DATA_NEWLINE; case BT_RSQB: ptr += MINBPC(enc); - if (ptr == end) + if (! HAS_CHAR(enc, ptr, end)) return XML_TOK_TRAILING_RSQB; - if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB)) + if (! CHAR_MATCHES(enc, ptr, ASCII_RSQB)) break; ptr += MINBPC(enc); - if (ptr == end) + if (! HAS_CHAR(enc, ptr, end)) return XML_TOK_TRAILING_RSQB; - if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { + if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) { ptr -= MINBPC(enc); break; } *nextTokPtr = ptr; return XML_TOK_INVALID; - INVALID_CASES(ptr, nextTokPtr) + INVALID_CASES(ptr, nextTokPtr) default: ptr += MINBPC(enc); break; } - while (ptr != end) { + while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { -#define LEAD_CASE(n) \ - case BT_LEAD ## n: \ - if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \ - *nextTokPtr = ptr; \ - return XML_TOK_DATA_CHARS; \ - } \ - ptr += n; \ - break; - LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) -#undef LEAD_CASE +# define LEAD_CASE(n) \ + case BT_LEAD##n: \ + if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \ + *nextTokPtr = ptr; \ + return XML_TOK_DATA_CHARS; \ + } \ + ptr += n; \ + break; + LEAD_CASE(2) + LEAD_CASE(3) + LEAD_CASE(4) +# undef LEAD_CASE case BT_RSQB: - if (ptr + MINBPC(enc) != end) { - if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) { - ptr += MINBPC(enc); - break; - } - if (ptr + 2*MINBPC(enc) != end) { - if (!CHAR_MATCHES(enc, ptr + 2*MINBPC(enc), ASCII_GT)) { - ptr += MINBPC(enc); - break; - } - *nextTokPtr = ptr + 2*MINBPC(enc); - return XML_TOK_INVALID; - } + if (HAS_CHARS(enc, ptr, end, 2)) { + if (! CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) { + ptr += MINBPC(enc); + break; + } + if (HAS_CHARS(enc, ptr, end, 3)) { + if (! CHAR_MATCHES(enc, ptr + 2 * MINBPC(enc), ASCII_GT)) { + ptr += MINBPC(enc); + break; + } + *nextTokPtr = ptr + 2 * MINBPC(enc); + return XML_TOK_INVALID; + } } /* fall through */ case BT_AMP: @@ -880,24 +917,25 @@ int PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end, /* ptr points to character following "%" */ -static -int PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end, - const char **nextTokPtr) -{ - if (ptr == end) - return XML_TOK_PARTIAL; +static int PTRCALL +PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end, + const char **nextTokPtr) { + REQUIRE_CHAR(enc, ptr, end); switch (BYTE_TYPE(enc, ptr)) { - CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) - case BT_S: case BT_LF: case BT_CR: case BT_PERCNT: + CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) + case BT_S: + case BT_LF: + case BT_CR: + case BT_PERCNT: *nextTokPtr = ptr; return XML_TOK_PERCENT; default: *nextTokPtr = ptr; return XML_TOK_INVALID; } - while (ptr != end) { + while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { - CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) + CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) case BT_SEMI: *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_PARAM_ENTITY_REF; @@ -909,23 +947,26 @@ int PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end, return XML_TOK_PARTIAL; } -static -int PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end, - const char **nextTokPtr) -{ - if (ptr == end) - return XML_TOK_PARTIAL; +static int PTRCALL +PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end, + const char **nextTokPtr) { + REQUIRE_CHAR(enc, ptr, end); switch (BYTE_TYPE(enc, ptr)) { - CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) + CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) default: *nextTokPtr = ptr; return XML_TOK_INVALID; } - while (ptr != end) { + while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { - CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) - case BT_CR: case BT_LF: case BT_S: - case BT_RPAR: case BT_GT: case BT_PERCNT: case BT_VERBAR: + CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) + case BT_CR: + case BT_LF: + case BT_S: + case BT_RPAR: + case BT_GT: + case BT_PERCNT: + case BT_VERBAR: *nextTokPtr = ptr; return XML_TOK_POUND_NAME; default: @@ -936,26 +977,28 @@ int PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end, return -XML_TOK_POUND_NAME; } -static -int PREFIX(scanLit)(int open, const ENCODING *enc, - const char *ptr, const char *end, - const char **nextTokPtr) -{ - while (ptr != end) { +static int PTRCALL +PREFIX(scanLit)(int open, const ENCODING *enc, const char *ptr, const char *end, + const char **nextTokPtr) { + while (HAS_CHAR(enc, ptr, end)) { int t = BYTE_TYPE(enc, ptr); switch (t) { - INVALID_CASES(ptr, nextTokPtr) + INVALID_CASES(ptr, nextTokPtr) case BT_QUOT: case BT_APOS: ptr += MINBPC(enc); if (t != open) break; - if (ptr == end) + if (! HAS_CHAR(enc, ptr, end)) return -XML_TOK_LITERAL; *nextTokPtr = ptr; switch (BYTE_TYPE(enc, ptr)) { - case BT_S: case BT_CR: case BT_LF: - case BT_GT: case BT_PERCNT: case BT_LSQB: + case BT_S: + case BT_CR: + case BT_LF: + case BT_GT: + case BT_PERCNT: + case BT_LSQB: return XML_TOK_LITERAL; default: return XML_TOK_INVALID; @@ -968,12 +1011,11 @@ int PREFIX(scanLit)(int open, const ENCODING *enc, return XML_TOK_PARTIAL; } -static -int PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, - const char **nextTokPtr) -{ +static int PTRCALL +PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, + const char **nextTokPtr) { int tok; - if (ptr == end) + if (ptr >= end) return XML_TOK_NONE; if (MINBPC(enc) > 1) { size_t n = end - ptr; @@ -989,39 +1031,42 @@ int PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, return PREFIX(scanLit)(BT_QUOT, enc, ptr + MINBPC(enc), end, nextTokPtr); case BT_APOS: return PREFIX(scanLit)(BT_APOS, enc, ptr + MINBPC(enc), end, nextTokPtr); - case BT_LT: - { - ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_PARTIAL; - switch (BYTE_TYPE(enc, ptr)) { - case BT_EXCL: - return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr); - case BT_QUEST: - return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr); - case BT_NMSTRT: - case BT_HEX: - case BT_NONASCII: - case BT_LEAD2: - case BT_LEAD3: - case BT_LEAD4: - *nextTokPtr = ptr - MINBPC(enc); - return XML_TOK_INSTANCE_START; - } - *nextTokPtr = ptr; - return XML_TOK_INVALID; + case BT_LT: { + ptr += MINBPC(enc); + REQUIRE_CHAR(enc, ptr, end); + switch (BYTE_TYPE(enc, ptr)) { + case BT_EXCL: + return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr); + case BT_QUEST: + return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr); + case BT_NMSTRT: + case BT_HEX: + case BT_NONASCII: + case BT_LEAD2: + case BT_LEAD3: + case BT_LEAD4: + *nextTokPtr = ptr - MINBPC(enc); + return XML_TOK_INSTANCE_START; } + *nextTokPtr = ptr; + return XML_TOK_INVALID; + } case BT_CR: - if (ptr + MINBPC(enc) == end) + if (ptr + MINBPC(enc) == end) { + *nextTokPtr = end; + /* indicate that this might be part of a CR/LF pair */ return -XML_TOK_PROLOG_S; + } /* fall through */ - case BT_S: case BT_LF: + case BT_S: + case BT_LF: for (;;) { ptr += MINBPC(enc); - if (ptr == end) + if (! HAS_CHAR(enc, ptr, end)) break; switch (BYTE_TYPE(enc, ptr)) { - case BT_S: case BT_LF: + case BT_S: + case BT_LF: break; case BT_CR: /* don't split CR/LF pair */ @@ -1045,13 +1090,12 @@ int PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, return XML_TOK_OPEN_BRACKET; case BT_RSQB: ptr += MINBPC(enc); - if (ptr == end) + if (! HAS_CHAR(enc, ptr, end)) return -XML_TOK_CLOSE_BRACKET; if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) { - if (ptr + MINBPC(enc) == end) - return XML_TOK_PARTIAL; + REQUIRE_CHARS(enc, ptr, end, 2); if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) { - *nextTokPtr = ptr + 2*MINBPC(enc); + *nextTokPtr = ptr + 2 * MINBPC(enc); return XML_TOK_COND_SECT_CLOSE; } } @@ -1062,7 +1106,7 @@ int PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, return XML_TOK_OPEN_PAREN; case BT_RPAR: ptr += MINBPC(enc); - if (ptr == end) + if (! HAS_CHAR(enc, ptr, end)) return -XML_TOK_CLOSE_PAREN; switch (BYTE_TYPE(enc, ptr)) { case BT_AST: @@ -1074,8 +1118,12 @@ int PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, case BT_PLUS: *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_CLOSE_PAREN_PLUS; - case BT_CR: case BT_LF: case BT_S: - case BT_GT: case BT_COMMA: case BT_VERBAR: + case BT_CR: + case BT_LF: + case BT_S: + case BT_GT: + case BT_COMMA: + case BT_VERBAR: case BT_RPAR: *nextTokPtr = ptr; return XML_TOK_CLOSE_PAREN; @@ -1090,24 +1138,26 @@ int PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, return XML_TOK_DECL_CLOSE; case BT_NUM: return PREFIX(scanPoundName)(enc, ptr + MINBPC(enc), end, nextTokPtr); -#define LEAD_CASE(n) \ - case BT_LEAD ## n: \ - if (end - ptr < n) \ - return XML_TOK_PARTIAL_CHAR; \ - if (IS_NMSTRT_CHAR(enc, ptr, n)) { \ - ptr += n; \ - tok = XML_TOK_NAME; \ - break; \ - } \ - if (IS_NAME_CHAR(enc, ptr, n)) { \ - ptr += n; \ - tok = XML_TOK_NMTOKEN; \ - break; \ - } \ - *nextTokPtr = ptr; \ +# define LEAD_CASE(n) \ + case BT_LEAD##n: \ + if (end - ptr < n) \ + return XML_TOK_PARTIAL_CHAR; \ + if (IS_NMSTRT_CHAR(enc, ptr, n)) { \ + ptr += n; \ + tok = XML_TOK_NAME; \ + break; \ + } \ + if (IS_NAME_CHAR(enc, ptr, n)) { \ + ptr += n; \ + tok = XML_TOK_NMTOKEN; \ + break; \ + } \ + *nextTokPtr = ptr; \ return XML_TOK_INVALID; - LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) -#undef LEAD_CASE + LEAD_CASE(2) + LEAD_CASE(3) + LEAD_CASE(4) +# undef LEAD_CASE case BT_NMSTRT: case BT_HEX: tok = XML_TOK_NAME; @@ -1116,9 +1166,9 @@ int PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, case BT_DIGIT: case BT_NAME: case BT_MINUS: -#ifdef XML_NS +# ifdef XML_NS case BT_COLON: -#endif +# endif tok = XML_TOK_NMTOKEN; ptr += MINBPC(enc); break; @@ -1138,24 +1188,29 @@ int PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, *nextTokPtr = ptr; return XML_TOK_INVALID; } - while (ptr != end) { + while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { - CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) - case BT_GT: case BT_RPAR: case BT_COMMA: - case BT_VERBAR: case BT_LSQB: case BT_PERCNT: - case BT_S: case BT_CR: case BT_LF: + CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) + case BT_GT: + case BT_RPAR: + case BT_COMMA: + case BT_VERBAR: + case BT_LSQB: + case BT_PERCNT: + case BT_S: + case BT_CR: + case BT_LF: *nextTokPtr = ptr; return tok; -#ifdef XML_NS +# ifdef XML_NS case BT_COLON: ptr += MINBPC(enc); switch (tok) { case XML_TOK_NAME: - if (ptr == end) - return XML_TOK_PARTIAL; + REQUIRE_CHAR(enc, ptr, end); tok = XML_TOK_PREFIXED_NAME; switch (BYTE_TYPE(enc, ptr)) { - CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) + CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) default: tok = XML_TOK_NMTOKEN; break; @@ -1166,23 +1221,23 @@ int PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, break; } break; -#endif +# endif case BT_PLUS: - if (tok == XML_TOK_NMTOKEN) { + if (tok == XML_TOK_NMTOKEN) { *nextTokPtr = ptr; return XML_TOK_INVALID; } *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_NAME_PLUS; case BT_AST: - if (tok == XML_TOK_NMTOKEN) { + if (tok == XML_TOK_NMTOKEN) { *nextTokPtr = ptr; return XML_TOK_INVALID; } *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_NAME_ASTERISK; case BT_QUEST: - if (tok == XML_TOK_NMTOKEN) { + if (tok == XML_TOK_NMTOKEN) { *nextTokPtr = ptr; return XML_TOK_INVALID; } @@ -1196,20 +1251,31 @@ int PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, return -tok; } -static -int PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, const char *end, - const char **nextTokPtr) -{ +static int PTRCALL +PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, const char *end, + const char **nextTokPtr) { const char *start; - if (ptr == end) + if (ptr >= end) return XML_TOK_NONE; + else if (! HAS_CHAR(enc, ptr, end)) { + /* This line cannot be executed. The incoming data has already + * been tokenized once, so incomplete characters like this have + * already been eliminated from the input. Retaining the paranoia + * check is still valuable, however. + */ + return XML_TOK_PARTIAL; /* LCOV_EXCL_LINE */ + } start = ptr; - while (ptr != end) { + while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { -#define LEAD_CASE(n) \ - case BT_LEAD ## n: ptr += n; break; - LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) -#undef LEAD_CASE +# define LEAD_CASE(n) \ + case BT_LEAD##n: \ + ptr += n; \ + break; + LEAD_CASE(2) + LEAD_CASE(3) + LEAD_CASE(4) +# undef LEAD_CASE case BT_AMP: if (ptr == start) return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); @@ -1229,7 +1295,7 @@ int PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, const char * case BT_CR: if (ptr == start) { ptr += MINBPC(enc); - if (ptr == end) + if (! HAS_CHAR(enc, ptr, end)) return XML_TOK_TRAILING_CR; if (BYTE_TYPE(enc, ptr) == BT_LF) ptr += MINBPC(enc); @@ -1254,20 +1320,31 @@ int PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, const char * return XML_TOK_DATA_CHARS; } -static -int PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, const char *end, - const char **nextTokPtr) -{ +static int PTRCALL +PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, const char *end, + const char **nextTokPtr) { const char *start; - if (ptr == end) + if (ptr >= end) return XML_TOK_NONE; + else if (! HAS_CHAR(enc, ptr, end)) { + /* This line cannot be executed. The incoming data has already + * been tokenized once, so incomplete characters like this have + * already been eliminated from the input. Retaining the paranoia + * check is still valuable, however. + */ + return XML_TOK_PARTIAL; /* LCOV_EXCL_LINE */ + } start = ptr; - while (ptr != end) { + while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { -#define LEAD_CASE(n) \ - case BT_LEAD ## n: ptr += n; break; - LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) -#undef LEAD_CASE +# define LEAD_CASE(n) \ + case BT_LEAD##n: \ + ptr += n; \ + break; + LEAD_CASE(2) + LEAD_CASE(3) + LEAD_CASE(4) +# undef LEAD_CASE case BT_AMP: if (ptr == start) return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); @@ -1275,8 +1352,7 @@ int PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, const char *end return XML_TOK_DATA_CHARS; case BT_PERCNT: if (ptr == start) { - int tok = PREFIX(scanPercent)(enc, ptr + MINBPC(enc), - end, nextTokPtr); + int tok = PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr); return (tok == XML_TOK_PERCENT) ? XML_TOK_INVALID : tok; } *nextTokPtr = ptr; @@ -1291,7 +1367,7 @@ int PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, const char *end case BT_CR: if (ptr == start) { ptr += MINBPC(enc); - if (ptr == end) + if (! HAS_CHAR(enc, ptr, end)) return XML_TOK_TRAILING_CR; if (BYTE_TYPE(enc, ptr) == BT_LF) ptr += MINBPC(enc); @@ -1309,12 +1385,11 @@ int PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, const char *end return XML_TOK_DATA_CHARS; } -#ifdef XML_DTD +# ifdef XML_DTD -static -int PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr, const char *end, - const char **nextTokPtr) -{ +static int PTRCALL +PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr, const char *end, + const char **nextTokPtr) { int level = 0; if (MINBPC(enc) > 1) { size_t n = end - ptr; @@ -1323,15 +1398,15 @@ int PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr, const char *e end = ptr + n; } } - while (ptr != end) { + while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { - INVALID_CASES(ptr, nextTokPtr) + INVALID_CASES(ptr, nextTokPtr) case BT_LT: - if ((ptr += MINBPC(enc)) == end) - return XML_TOK_PARTIAL; + ptr += MINBPC(enc); + REQUIRE_CHAR(enc, ptr, end); if (CHAR_MATCHES(enc, ptr, ASCII_EXCL)) { - if ((ptr += MINBPC(enc)) == end) - return XML_TOK_PARTIAL; + ptr += MINBPC(enc); + REQUIRE_CHAR(enc, ptr, end); if (CHAR_MATCHES(enc, ptr, ASCII_LSQB)) { ++level; ptr += MINBPC(enc); @@ -1339,11 +1414,11 @@ int PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr, const char *e } break; case BT_RSQB: - if ((ptr += MINBPC(enc)) == end) - return XML_TOK_PARTIAL; + ptr += MINBPC(enc); + REQUIRE_CHAR(enc, ptr, end); if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) { - if ((ptr += MINBPC(enc)) == end) - return XML_TOK_PARTIAL; + ptr += MINBPC(enc); + REQUIRE_CHAR(enc, ptr, end); if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { ptr += MINBPC(enc); if (level == 0) { @@ -1362,15 +1437,14 @@ int PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr, const char *e return XML_TOK_PARTIAL; } -#endif /* XML_DTD */ +# endif /* XML_DTD */ -static -int PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end, - const char **badPtr) -{ +static int PTRCALL +PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end, + const char **badPtr) { ptr += MINBPC(enc); end -= MINBPC(enc); - for (; ptr != end; ptr += MINBPC(enc)) { + for (; HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) { switch (BYTE_TYPE(enc, ptr)) { case BT_DIGIT: case BT_HEX: @@ -1390,9 +1464,9 @@ int PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end, case BT_AST: case BT_PERCNT: case BT_NUM: -#ifdef XML_NS +# ifdef XML_NS case BT_COLON: -#endif +# endif break; case BT_S: if (CHAR_MATCHES(enc, ptr, ASCII_TAB)) { @@ -1402,8 +1476,9 @@ int PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end, break; case BT_NAME: case BT_NMSTRT: - if (!(BYTE_TO_ASCII(enc, ptr) & ~0x7f)) + if (! (BYTE_TO_ASCII(enc, ptr) & ~0x7f)) break; + /* fall through */ default: switch (BYTE_TO_ASCII(enc, ptr)) { case 0x24: /* $ */ @@ -1419,14 +1494,14 @@ int PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end, return 1; } -/* This must only be called for a well-formed start-tag or empty element tag. -Returns the number of attributes. Pointers to the first attsMax attributes -are stored in atts. */ +/* This must only be called for a well-formed start-tag or empty + element tag. Returns the number of attributes. Pointers to the + first attsMax attributes are stored in atts. +*/ -static -int PREFIX(getAtts)(const ENCODING *enc, const char *ptr, - int attsMax, ATTRIBUTE *atts) -{ +static int PTRCALL +PREFIX(getAtts)(const ENCODING *enc, const char *ptr, int attsMax, + ATTRIBUTE *atts) { enum { other, inName, inValue } state = inName; int nAtts = 0; int open = 0; /* defined when state == inValue; @@ -1434,32 +1509,35 @@ int PREFIX(getAtts)(const ENCODING *enc, const char *ptr, for (ptr += MINBPC(enc);; ptr += MINBPC(enc)) { switch (BYTE_TYPE(enc, ptr)) { -#define START_NAME \ - if (state == other) { \ - if (nAtts < attsMax) { \ - atts[nAtts].name = ptr; \ - atts[nAtts].normalized = 1; \ - } \ - state = inName; \ - } -#define LEAD_CASE(n) \ - case BT_LEAD ## n: START_NAME ptr += (n - MINBPC(enc)); break; - LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) -#undef LEAD_CASE +# define START_NAME \ + if (state == other) { \ + if (nAtts < attsMax) { \ + atts[nAtts].name = ptr; \ + atts[nAtts].normalized = 1; \ + } \ + state = inName; \ + } +# define LEAD_CASE(n) \ + case BT_LEAD##n: \ + START_NAME ptr += (n - MINBPC(enc)); \ + break; + LEAD_CASE(2) + LEAD_CASE(3) + LEAD_CASE(4) +# undef LEAD_CASE case BT_NONASCII: case BT_NMSTRT: case BT_HEX: START_NAME break; -#undef START_NAME +# undef START_NAME case BT_QUOT: if (state != inValue) { if (nAtts < attsMax) atts[nAtts].valuePtr = ptr + MINBPC(enc); state = inValue; open = BT_QUOT; - } - else if (open == BT_QUOT) { + } else if (open == BT_QUOT) { state = other; if (nAtts < attsMax) atts[nAtts].valueEnd = ptr; @@ -1472,8 +1550,7 @@ int PREFIX(getAtts)(const ENCODING *enc, const char *ptr, atts[nAtts].valuePtr = ptr + MINBPC(enc); state = inValue; open = BT_APOS; - } - else if (open == BT_APOS) { + } else if (open == BT_APOS) { state = other; if (nAtts < attsMax) atts[nAtts].valueEnd = ptr; @@ -1487,16 +1564,15 @@ int PREFIX(getAtts)(const ENCODING *enc, const char *ptr, case BT_S: if (state == inName) state = other; - else if (state == inValue - && nAtts < attsMax - && atts[nAtts].normalized + else if (state == inValue && nAtts < attsMax && atts[nAtts].normalized && (ptr == atts[nAtts].valuePtr || BYTE_TO_ASCII(enc, ptr) != ASCII_SPACE || BYTE_TO_ASCII(enc, ptr + MINBPC(enc)) == ASCII_SPACE || BYTE_TYPE(enc, ptr + MINBPC(enc)) == open)) atts[nAtts].normalized = 0; break; - case BT_CR: case BT_LF: + case BT_CR: + case BT_LF: /* This case ensures that the first attribute name is counted Apart from that we could just change state on the quote. */ if (state == inName) @@ -1516,27 +1592,45 @@ int PREFIX(getAtts)(const ENCODING *enc, const char *ptr, /* not reached */ } -static -int PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr) -{ +static int PTRFASTCALL +PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr) { int result = 0; - itkExpatUnused(enc); /* skip &# */ - ptr += 2*MINBPC(enc); + UNUSED_P(enc); + ptr += 2 * MINBPC(enc); if (CHAR_MATCHES(enc, ptr, ASCII_x)) { - for (ptr += MINBPC(enc); !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) { + for (ptr += MINBPC(enc); ! CHAR_MATCHES(enc, ptr, ASCII_SEMI); + ptr += MINBPC(enc)) { int c = BYTE_TO_ASCII(enc, ptr); switch (c) { - case ASCII_0: case ASCII_1: case ASCII_2: case ASCII_3: case ASCII_4: - case ASCII_5: case ASCII_6: case ASCII_7: case ASCII_8: case ASCII_9: + case ASCII_0: + case ASCII_1: + case ASCII_2: + case ASCII_3: + case ASCII_4: + case ASCII_5: + case ASCII_6: + case ASCII_7: + case ASCII_8: + case ASCII_9: result <<= 4; result |= (c - ASCII_0); break; - case ASCII_A: case ASCII_B: case ASCII_C: case ASCII_D: case ASCII_E: case ASCII_F: + case ASCII_A: + case ASCII_B: + case ASCII_C: + case ASCII_D: + case ASCII_E: + case ASCII_F: result <<= 4; result += 10 + (c - ASCII_A); break; - case ASCII_a: case ASCII_b: case ASCII_c: case ASCII_d: case ASCII_e: case ASCII_f: + case ASCII_a: + case ASCII_b: + case ASCII_c: + case ASCII_d: + case ASCII_e: + case ASCII_f: result <<= 4; result += 10 + (c - ASCII_a); break; @@ -1544,9 +1638,8 @@ int PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr) if (result >= 0x110000) return -1; } - } - else { - for (; !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) { + } else { + for (; ! CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) { int c = BYTE_TO_ASCII(enc, ptr); result *= 10; result += (c - ASCII_0); @@ -1557,11 +1650,11 @@ int PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr) return checkCharRefNumber(result); } -static -int PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr, const char *end) -{ - itkExpatUnused(enc); - switch ((end - ptr)/MINBPC(enc)) { +static int PTRCALL +PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr, + const char *end) { + UNUSED_P(enc); + switch ((end - ptr) / MINBPC(enc)) { case 2: if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_t)) { switch (BYTE_TO_ASCII(enc, ptr)) { @@ -1611,99 +1704,43 @@ int PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr, const cha return 0; } -static -int PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2) -{ - for (;;) { - switch (BYTE_TYPE(enc, ptr1)) { -#define LEAD_CASE(n) \ - case BT_LEAD ## n: \ - if (*ptr1++ != *ptr2++) \ - return 0; - LEAD_CASE(4) LEAD_CASE(3) LEAD_CASE(2) -#undef LEAD_CASE - /* fall through */ - if (*ptr1++ != *ptr2++) - return 0; - break; - case BT_NONASCII: - case BT_NMSTRT: -#ifdef XML_NS - case BT_COLON: -#endif - case BT_HEX: - case BT_DIGIT: - case BT_NAME: - case BT_MINUS: - if (*ptr2++ != *ptr1++) - return 0; - if (MINBPC(enc) > 1) { - if (*ptr2++ != *ptr1++) - return 0; - if (MINBPC(enc) > 2) { - if (*ptr2++ != *ptr1++) - return 0; - if (MINBPC(enc) > 3) { - if (*ptr2++ != *ptr1++) - return 0; - } - } - } - break; - default: - if (MINBPC(enc) == 1 && *ptr1 == *ptr2) - return 1; - switch (BYTE_TYPE(enc, ptr2)) { - case BT_LEAD2: - case BT_LEAD3: - case BT_LEAD4: - case BT_NONASCII: - case BT_NMSTRT: -#ifdef XML_NS - case BT_COLON: -#endif - case BT_HEX: - case BT_DIGIT: - case BT_NAME: - case BT_MINUS: - return 0; - default: - return 1; - } - } - } - /* not reached */ -} - -static -int PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1, - const char *end1, const char *ptr2) -{ - itkExpatUnused(enc); +static int PTRCALL +PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1, + const char *end1, const char *ptr2) { + UNUSED_P(enc); for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) { - if (ptr1 == end1) - return 0; - if (!CHAR_MATCHES(enc, ptr1, *ptr2)) + if (end1 - ptr1 < MINBPC(enc)) { + /* This line cannot be executed. The incoming data has already + * been tokenized once, so incomplete characters like this have + * already been eliminated from the input. Retaining the + * paranoia check is still valuable, however. + */ + return 0; /* LCOV_EXCL_LINE */ + } + if (! CHAR_MATCHES(enc, ptr1, *ptr2)) return 0; } return ptr1 == end1; } -static -int PREFIX(nameLength)(const ENCODING *enc, const char *ptr) -{ +static int PTRFASTCALL +PREFIX(nameLength)(const ENCODING *enc, const char *ptr) { const char *start = ptr; for (;;) { switch (BYTE_TYPE(enc, ptr)) { -#define LEAD_CASE(n) \ - case BT_LEAD ## n: ptr += n; break; - LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) -#undef LEAD_CASE +# define LEAD_CASE(n) \ + case BT_LEAD##n: \ + ptr += n; \ + break; + LEAD_CASE(2) + LEAD_CASE(3) + LEAD_CASE(4) +# undef LEAD_CASE case BT_NONASCII: case BT_NMSTRT: -#ifdef XML_NS +# ifdef XML_NS case BT_COLON: -#endif +# endif case BT_HEX: case BT_DIGIT: case BT_NAME: @@ -1711,14 +1748,13 @@ int PREFIX(nameLength)(const ENCODING *enc, const char *ptr) ptr += MINBPC(enc); break; default: - return ptr - start; + return (int)(ptr - start); } } } -static -const char *PREFIX(skipS)(const ENCODING *enc, const char *ptr) -{ +static const char *PTRFASTCALL +PREFIX(skipS)(const ENCODING *enc, const char *ptr) { for (;;) { switch (BYTE_TYPE(enc, ptr)) { case BT_LF: @@ -1732,44 +1768,46 @@ const char *PREFIX(skipS)(const ENCODING *enc, const char *ptr) } } -static -void PREFIX(updatePosition)(const ENCODING *enc, - const char *ptr, - const char *end, - POSITION *pos) -{ - while (ptr != end) { +static void PTRCALL +PREFIX(updatePosition)(const ENCODING *enc, const char *ptr, const char *end, + POSITION *pos) { + while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { -#define LEAD_CASE(n) \ - case BT_LEAD ## n: \ - ptr += n; \ - break; - LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) -#undef LEAD_CASE +# define LEAD_CASE(n) \ + case BT_LEAD##n: \ + ptr += n; \ + pos->columnNumber++; \ + break; + LEAD_CASE(2) + LEAD_CASE(3) + LEAD_CASE(4) +# undef LEAD_CASE case BT_LF: - pos->columnNumber = (unsigned)-1; + pos->columnNumber = 0; pos->lineNumber++; ptr += MINBPC(enc); break; case BT_CR: pos->lineNumber++; ptr += MINBPC(enc); - if (ptr != end && BYTE_TYPE(enc, ptr) == BT_LF) + if (HAS_CHAR(enc, ptr, end) && BYTE_TYPE(enc, ptr) == BT_LF) ptr += MINBPC(enc); - pos->columnNumber = (unsigned)-1; + pos->columnNumber = 0; break; default: ptr += MINBPC(enc); + pos->columnNumber++; break; } - pos->columnNumber++; } } -#undef DO_LEAD_CASE -#undef MULTIBYTE_CASES -#undef INVALID_CASES -#undef CHECK_NAME_CASE -#undef CHECK_NAME_CASES -#undef CHECK_NMSTRT_CASE -#undef CHECK_NMSTRT_CASES +# undef DO_LEAD_CASE +# undef MULTIBYTE_CASES +# undef INVALID_CASES +# undef CHECK_NAME_CASE +# undef CHECK_NAME_CASES +# undef CHECK_NMSTRT_CASE +# undef CHECK_NMSTRT_CASES + +#endif /* XML_TOK_IMPL_C */ diff --git a/Modules/ThirdParty/Expat/src/expat/xmltok_impl.h b/Modules/ThirdParty/Expat/src/expat/xmltok_impl.h index da0ea60a657..c518aada013 100644 --- a/Modules/ThirdParty/Expat/src/expat/xmltok_impl.h +++ b/Modules/ThirdParty/Expat/src/expat/xmltok_impl.h @@ -1,46 +1,74 @@ /* -Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd -See the file COPYING for copying permission. + __ __ _ + ___\ \/ /_ __ __ _| |_ + / _ \\ /| '_ \ / _` | __| + | __// \| |_) | (_| | |_ + \___/_/\_\ .__/ \__,_|\__| + |_| XML parser + + Copyright (c) 1997-2000 Thai Open Source Software Center Ltd + Copyright (c) 2000 Clark Cooper + Copyright (c) 2017-2019 Sebastian Pipping + Licensed under the MIT license: + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to permit + persons to whom the Software is furnished to do so, subject to the + following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN + NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + USE OR OTHER DEALINGS IN THE SOFTWARE. */ enum { - BT_NONXML, - BT_MALFORM, - BT_LT, - BT_AMP, - BT_RSQB, - BT_LEAD2, - BT_LEAD3, - BT_LEAD4, - BT_TRAIL, - BT_CR, - BT_LF, - BT_GT, - BT_QUOT, - BT_APOS, - BT_EQUALS, - BT_QUEST, - BT_EXCL, - BT_SOL, - BT_SEMI, - BT_NUM, - BT_LSQB, - BT_S, - BT_NMSTRT, - BT_COLON, - BT_HEX, - BT_DIGIT, - BT_NAME, - BT_MINUS, - BT_OTHER, /* known not to be a name or name start character */ + BT_NONXML, /* e.g. noncharacter-FFFF */ + BT_MALFORM, /* illegal, with regard to encoding */ + BT_LT, /* less than = "<" */ + BT_AMP, /* ampersand = "&" */ + BT_RSQB, /* right square bracket = "[" */ + BT_LEAD2, /* lead byte of a 2-byte UTF-8 character */ + BT_LEAD3, /* lead byte of a 3-byte UTF-8 character */ + BT_LEAD4, /* lead byte of a 4-byte UTF-8 character */ + BT_TRAIL, /* trailing unit, e.g. second 16-bit unit of a 4-byte char. */ + BT_CR, /* carriage return = "\r" */ + BT_LF, /* line feed = "\n" */ + BT_GT, /* greater than = ">" */ + BT_QUOT, /* quotation character = "\"" */ + BT_APOS, /* aposthrophe = "'" */ + BT_EQUALS, /* equal sign = "=" */ + BT_QUEST, /* question mark = "?" */ + BT_EXCL, /* exclamation mark = "!" */ + BT_SOL, /* solidus, slash = "/" */ + BT_SEMI, /* semicolon = ";" */ + BT_NUM, /* number sign = "#" */ + BT_LSQB, /* left square bracket = "[" */ + BT_S, /* white space, e.g. "\t", " "[, "\r"] */ + BT_NMSTRT, /* non-hex name start letter = "G".."Z" + "g".."z" + "_" */ + BT_COLON, /* colon = ":" */ + BT_HEX, /* hex letter = "A".."F" + "a".."f" */ + BT_DIGIT, /* digit = "0".."9" */ + BT_NAME, /* dot and middle dot = "." + chr(0xb7) */ + BT_MINUS, /* minus = "-" */ + BT_OTHER, /* known not to be a name or name start character */ BT_NONASCII, /* might be a name or name start character */ - BT_PERCNT, - BT_LPAR, - BT_RPAR, - BT_AST, - BT_PLUS, - BT_COMMA, - BT_VERBAR + BT_PERCNT, /* percent sign = "%" */ + BT_LPAR, /* left parenthesis = "(" */ + BT_RPAR, /* right parenthesis = "(" */ + BT_AST, /* asterisk = "*" */ + BT_PLUS, /* plus sign = "+" */ + BT_COMMA, /* comma = "," */ + BT_VERBAR /* vertical bar = "|" */ }; #include diff --git a/Modules/ThirdParty/Expat/src/expat/xmltok_ns.c b/Modules/ThirdParty/Expat/src/expat/xmltok_ns.c index fa78adffce1..5fd83922359 100644 --- a/Modules/ThirdParty/Expat/src/expat/xmltok_ns.c +++ b/Modules/ThirdParty/Expat/src/expat/xmltok_ns.c @@ -1,47 +1,83 @@ -const ENCODING *NS(XmlGetUtf8InternalEncoding)(void) -{ +/* This file is included! + __ __ _ + ___\ \/ /_ __ __ _| |_ + / _ \\ /| '_ \ / _` | __| + | __// \| |_) | (_| | |_ + \___/_/\_\ .__/ \__,_|\__| + |_| XML parser + + Copyright (c) 1997-2000 Thai Open Source Software Center Ltd + Copyright (c) 2000 Clark Cooper + Copyright (c) 2002 Greg Stein + Copyright (c) 2002 Fred L. Drake, Jr. + Copyright (c) 2002-2006 Karl Waclawek + Copyright (c) 2017 Sebastian Pipping + Licensed under the MIT license: + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to permit + persons to whom the Software is furnished to do so, subject to the + following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN + NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +#ifdef XML_TOK_NS_C + +const ENCODING * +NS(XmlGetUtf8InternalEncoding)(void) { return &ns(internal_utf8_encoding).enc; } -const ENCODING *NS(XmlGetUtf16InternalEncoding)(void) -{ -#if XML_BYTE_ORDER == 12 +const ENCODING * +NS(XmlGetUtf16InternalEncoding)(void) { +# if BYTEORDER == 1234 return &ns(internal_little2_encoding).enc; -#elif XML_BYTE_ORDER == 21 +# elif BYTEORDER == 4321 return &ns(internal_big2_encoding).enc; -#else +# else const short n = 1; - return *(const char *)&n ? &ns(internal_little2_encoding).enc : &ns(internal_big2_encoding).enc; -#endif + return (*(const char *)&n ? &ns(internal_little2_encoding).enc + : &ns(internal_big2_encoding).enc); +# endif } -static -const ENCODING *NS(encodings)[] = { - &ns(latin1_encoding).enc, - &ns(ascii_encoding).enc, - &ns(utf8_encoding).enc, - &ns(big2_encoding).enc, - &ns(big2_encoding).enc, - &ns(little2_encoding).enc, - &ns(utf8_encoding).enc /* NO_ENC */ +static const ENCODING *const NS(encodings)[] = { + &ns(latin1_encoding).enc, &ns(ascii_encoding).enc, + &ns(utf8_encoding).enc, &ns(big2_encoding).enc, + &ns(big2_encoding).enc, &ns(little2_encoding).enc, + &ns(utf8_encoding).enc /* NO_ENC */ }; -static -int NS(initScanProlog)(const ENCODING *enc, const char *ptr, const char *end, - const char **nextTokPtr) -{ - return initScan(NS(encodings), (const INIT_ENCODING *)enc, XML_PROLOG_STATE, ptr, end, nextTokPtr); +static int PTRCALL +NS(initScanProlog)(const ENCODING *enc, const char *ptr, const char *end, + const char **nextTokPtr) { + return initScan(NS(encodings), (const INIT_ENCODING *)enc, XML_PROLOG_STATE, + ptr, end, nextTokPtr); } -static -int NS(initScanContent)(const ENCODING *enc, const char *ptr, const char *end, - const char **nextTokPtr) -{ - return initScan(NS(encodings), (const INIT_ENCODING *)enc, XML_CONTENT_STATE, ptr, end, nextTokPtr); +static int PTRCALL +NS(initScanContent)(const ENCODING *enc, const char *ptr, const char *end, + const char **nextTokPtr) { + return initScan(NS(encodings), (const INIT_ENCODING *)enc, XML_CONTENT_STATE, + ptr, end, nextTokPtr); } -int NS(XmlInitEncoding)(INIT_ENCODING *p, const ENCODING **encPtr, const char *name) -{ +int +NS(XmlInitEncoding)(INIT_ENCODING *p, const ENCODING **encPtr, + const char *name) { int i = getEncodingIndex(name); if (i == UNKNOWN_ENC) return 0; @@ -54,10 +90,9 @@ int NS(XmlInitEncoding)(INIT_ENCODING *p, const ENCODING **encPtr, const char *n return 1; } -static -const ENCODING *NS(findEncoding)(const ENCODING *enc, const char *ptr, const char *end) -{ -#define ENCODING_MAX 128 +static const ENCODING * +NS(findEncoding)(const ENCODING *enc, const char *ptr, const char *end) { +# define ENCODING_MAX 128 char buf[ENCODING_MAX]; char *p = buf; int i; @@ -73,26 +108,15 @@ const ENCODING *NS(findEncoding)(const ENCODING *enc, const char *ptr, const cha return NS(encodings)[i]; } -int NS(XmlParseXmlDecl)(int isGeneralTextEntity, - const ENCODING *enc, - const char *ptr, - const char *end, - const char **badPtr, - const char **versionPtr, - const char **versionEndPtr, - const char **encodingName, - const ENCODING **encoding, - int *standalone) -{ - return doParseXmlDecl(NS(findEncoding), - isGeneralTextEntity, - enc, - ptr, - end, - badPtr, - versionPtr, - versionEndPtr, - encodingName, - encoding, - standalone); +int +NS(XmlParseXmlDecl)(int isGeneralTextEntity, const ENCODING *enc, + const char *ptr, const char *end, const char **badPtr, + const char **versionPtr, const char **versionEndPtr, + const char **encodingName, const ENCODING **encoding, + int *standalone) { + return doParseXmlDecl(NS(findEncoding), isGeneralTextEntity, enc, ptr, end, + badPtr, versionPtr, versionEndPtr, encodingName, + encoding, standalone); } + +#endif /* XML_TOK_NS_C */