From 6234b109866d64c304bbcd2dfc689126937bcd1a Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Wed, 28 Sep 2022 12:04:50 -0400 Subject: [PATCH 1/2] wip: use xmlsec's noxxe entity loader this segfaults with seed 12441 and I have not yet been able to track it down --- LICENSE-DEPENDENCIES.md | 30 +++++++++++++++++ ext/nokogiri/nokogiri.c | 2 ++ ext/nokogiri/nokogiri.h | 2 ++ ext/nokogiri/xml_document.c | 29 +++++++++++++++-- ext/nokogiri/xmlsec.c | 64 +++++++++++++++++++++++++++++++++++++ 5 files changed, 125 insertions(+), 2 deletions(-) create mode 100644 ext/nokogiri/xmlsec.c diff --git a/LICENSE-DEPENDENCIES.md b/LICENSE-DEPENDENCIES.md index d2d190dd52..e3ae9b6e4f 100644 --- a/LICENSE-DEPENDENCIES.md +++ b/LICENSE-DEPENDENCIES.md @@ -412,6 +412,36 @@ http://xmlsoft.org/libxslt/ ---------------------------------------------------------------------- +### xmlsec + +MIT + +https://www.aleksey.com/xmlsec/ + + Copyright (C) 2002-2016 Aleksey Sanin . All Rights Reserved. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is fur- + nished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FIT- + NESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ALEKSEY SANIN BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CON- + NECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + Except as contained in this notice, the name of Aleksey Sanin shall not + be used in advertising or otherwise to promote the sale, use or other deal- + ings in this Software without prior written authorization from him. + + ### zlib zlib license diff --git a/ext/nokogiri/nokogiri.c b/ext/nokogiri/nokogiri.c index f2e39ab014..70c32df90e 100644 --- a/ext/nokogiri/nokogiri.c +++ b/ext/nokogiri/nokogiri.c @@ -47,6 +47,7 @@ void noko_init_html_entity_lookup(void); void noko_init_html_sax_parser_context(void); void noko_init_html_sax_push_parser(void); void noko_init_gumbo(void); +void noko_init_xmlsec(void); void noko_init_test_global_handlers(void); static ID id_read, id_write, id_external_encoding; @@ -250,6 +251,7 @@ Init_nokogiri() noko_init_xml_document(); noko_init_html_document(); noko_init_gumbo(); + noko_init_xmlsec(); noko_init_test_global_handlers(); diff --git a/ext/nokogiri/nokogiri.h b/ext/nokogiri/nokogiri.h index 63549ecec5..8d70319c91 100644 --- a/ext/nokogiri/nokogiri.h +++ b/ext/nokogiri/nokogiri.h @@ -225,6 +225,8 @@ NORETURN_DECL void Nokogiri_error_raise(void *ctx, xmlErrorPtr error); void Nokogiri_marshal_xpath_funcall_and_return_values(xmlXPathParserContextPtr ctx, int nargs, VALUE handler, const char *function_name) ; +xmlParserInputPtr Nokogiri_xmlSecNoXxeExternalEntityLoader(const char *URL, const char *ID, xmlParserCtxtPtr ctxt); + static inline nokogiriSAXTuplePtr nokogiri_sax_tuple_new(xmlParserCtxtPtr ctxt, VALUE self) diff --git a/ext/nokogiri/xml_document.c b/ext/nokogiri/xml_document.c index 187ba1d86f..0063f4e9f9 100644 --- a/ext/nokogiri/xml_document.c +++ b/ext/nokogiri/xml_document.c @@ -271,18 +271,30 @@ read_io(VALUE klass, VALUE error_list = rb_ary_new(); VALUE document; xmlDocPtr doc; + int parse_options_int = (int)NUM2INT(rb_funcall(options, rb_intern("to_i"), 0)); + xmlExternalEntityLoader old_loader = 0; xmlResetLastError(); xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher); + if (parse_options_int & XML_PARSE_NONET) { + old_loader = xmlGetExternalEntityLoader(); + xmlSetExternalEntityLoader(Nokogiri_xmlSecNoXxeExternalEntityLoader); + } + doc = xmlReadIO( (xmlInputReadCallback)noko_io_read, (xmlInputCloseCallback)noko_io_close, (void *)io, c_url, c_enc, - (int)NUM2INT(options) + parse_options_int ); + + if (old_loader) { + xmlSetExternalEntityLoader(old_loader); + } + xmlSetStructuredErrorFunc(NULL, NULL); if (doc == NULL) { @@ -325,10 +337,23 @@ read_memory(VALUE klass, VALUE error_list = rb_ary_new(); VALUE document; xmlDocPtr doc; + int parse_options_int = (int)NUM2INT(rb_funcall(options, rb_intern("to_i"), 0)); + xmlExternalEntityLoader old_loader = 0; xmlResetLastError(); xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher); - doc = xmlReadMemory(c_buffer, len, c_url, c_enc, (int)NUM2INT(options)); + + if (parse_options_int & XML_PARSE_NONET) { + old_loader = xmlGetExternalEntityLoader(); + xmlSetExternalEntityLoader(Nokogiri_xmlSecNoXxeExternalEntityLoader); + } + + doc = xmlReadMemory(c_buffer, len, c_url, c_enc, parse_options_int); + + if (old_loader) { + xmlSetExternalEntityLoader(old_loader); + } + xmlSetStructuredErrorFunc(NULL, NULL); if (doc == NULL) { diff --git a/ext/nokogiri/xmlsec.c b/ext/nokogiri/xmlsec.c new file mode 100644 index 0000000000..0753f71b07 --- /dev/null +++ b/ext/nokogiri/xmlsec.c @@ -0,0 +1,64 @@ +#include + +/* + * This file contains code originally written as part of the xmlsec library. + * + * https://www.aleksey.com/xmlsec/ + * + * Copyright (C) 2002-2022 Aleksey Sanin . All Rights Reserved. + */ + +/* --- taken from include/xmlsec/errors.h --- */ +/** + * xmlSecErrorsSafeString: + * @str: the string. + * + * Macro. Returns @str if it is not NULL or pointer to "NULL" otherwise. + */ +#define xmlSecErrorsSafeString(str) \ + (((str) != NULL) ? ((const char*)(str)) : (const char*)"NULL") + + +/* --- taken from src/xmlsec.c --- */ +/* + * Custom external entity handler, denies all files except the initial + * document we're parsing (input_id == 1) + */ +/* default external entity loader, pointer saved during xmlInit */ +static xmlExternalEntityLoader +xmlSecDefaultExternalEntityLoader = NULL; + +/* + * xmlSecNoXxeExternalEntityLoader: + * @URL: the URL for the entity to load + * @ID: public ID for the entity to load + * @ctxt: XML parser context, or NULL + * + * See libxml2's xmlLoadExternalEntity and xmlNoNetExternalEntityLoader. + * This function prevents any external (file or network) entities from being loaded. + */ +xmlParserInputPtr +Nokogiri_xmlSecNoXxeExternalEntityLoader( + const char *URL, + const char *ID, + xmlParserCtxtPtr ctxt +) +{ + if (ctxt == NULL) { + return (NULL); + } + if (ctxt->input_id == 1) { + return xmlSecDefaultExternalEntityLoader((const char *) URL, ID, ctxt); + } + xmlParserError(ctxt, "NONET disallows external entity '%s'\n", xmlSecErrorsSafeString(URL)); + return (NULL); +} + + +void +noko_init_xmlsec() +{ + if (!xmlSecDefaultExternalEntityLoader) { + xmlSecDefaultExternalEntityLoader = xmlGetExternalEntityLoader(); + } +} From b2d5f4d13beee9e7b63b30c040cebc6ee7117c6a Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Thu, 29 Sep 2022 08:38:34 -0400 Subject: [PATCH 2/2] wip - this seems to work around the segfault --- ext/nokogiri/html4_sax_parser_context.c | 1 + ext/nokogiri/nokogiri.h | 1 + ext/nokogiri/xmlsec.c | 5 +++++ 3 files changed, 7 insertions(+) diff --git a/ext/nokogiri/html4_sax_parser_context.c b/ext/nokogiri/html4_sax_parser_context.c index 54adca4cb4..28b5aacf6f 100644 --- a/ext/nokogiri/html4_sax_parser_context.c +++ b/ext/nokogiri/html4_sax_parser_context.c @@ -99,6 +99,7 @@ parse_with(VALUE self, VALUE sax_handler) ctxt->userData = (void *)NOKOGIRI_SAX_TUPLE_NEW(ctxt, sax_handler); xmlSetStructuredErrorFunc(NULL, NULL); + noko_xmlsec_reset_entity_loader(); rb_ensure(parse_doc, (VALUE)ctxt, parse_doc_finalize, (VALUE)ctxt); diff --git a/ext/nokogiri/nokogiri.h b/ext/nokogiri/nokogiri.h index 8d70319c91..88775a00e5 100644 --- a/ext/nokogiri/nokogiri.h +++ b/ext/nokogiri/nokogiri.h @@ -226,6 +226,7 @@ void Nokogiri_marshal_xpath_funcall_and_return_values(xmlXPathParserContextPtr c const char *function_name) ; xmlParserInputPtr Nokogiri_xmlSecNoXxeExternalEntityLoader(const char *URL, const char *ID, xmlParserCtxtPtr ctxt); +void noko_xmlsec_reset_entity_loader(void); static inline nokogiriSAXTuplePtr diff --git a/ext/nokogiri/xmlsec.c b/ext/nokogiri/xmlsec.c index 0753f71b07..85546b2e6f 100644 --- a/ext/nokogiri/xmlsec.c +++ b/ext/nokogiri/xmlsec.c @@ -55,6 +55,11 @@ Nokogiri_xmlSecNoXxeExternalEntityLoader( } +void noko_xmlsec_reset_entity_loader(void) +{ + xmlSetExternalEntityLoader(xmlSecDefaultExternalEntityLoader); +} + void noko_init_xmlsec() {