From 0d9c75fd5eb20645d937f2deb22410eb2c72a7f0 Mon Sep 17 00:00:00 2001 From: Raul Metsma Date: Thu, 21 Mar 2024 14:23:11 +0200 Subject: [PATCH] Use libxml2 for ASiC parsing IB-7946 Signed-off-by: Raul Metsma --- etc/schema/OpenDocument_manifest.xsd | 63 ----------- etc/schema/OpenDocument_manifest_v1_2.xsd | 2 +- src/ASiC_E.cpp | 127 +++++++--------------- src/ASiC_E.h | 1 - src/ASiContainer.cpp | 8 +- src/ASiContainer.h | 2 +- src/CMakeLists.txt | 7 -- src/XMLDocument.h | 66 ++++++++--- 8 files changed, 98 insertions(+), 178 deletions(-) delete mode 100644 etc/schema/OpenDocument_manifest.xsd diff --git a/etc/schema/OpenDocument_manifest.xsd b/etc/schema/OpenDocument_manifest.xsd deleted file mode 100644 index f07e18ce5..000000000 --- a/etc/schema/OpenDocument_manifest.xsd +++ /dev/null @@ -1,63 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/etc/schema/OpenDocument_manifest_v1_2.xsd b/etc/schema/OpenDocument_manifest_v1_2.xsd index 9e5e86374..9fa8126f1 100644 --- a/etc/schema/OpenDocument_manifest_v1_2.xsd +++ b/etc/schema/OpenDocument_manifest_v1_2.xsd @@ -41,7 +41,7 @@ - + diff --git a/src/ASiC_E.cpp b/src/ASiC_E.cpp index 2482c0b11..a3ea553f5 100644 --- a/src/ASiC_E.cpp +++ b/src/ASiC_E.cpp @@ -22,18 +22,15 @@ #include "Conf.h" #include "DataFile_p.h" #include "SignatureXAdES_LTA.h" +#include "XMLDocument.h" #include "crypto/Digest.h" #include "crypto/Signer.h" #include "util/File.h" -#include "util/log.h" #include "util/ZipSerialize.h" -#include "xml/OpenDocument_manifest.hxx" -#include "xml/OpenDocument_manifest_v1_2.hxx" -#include "xml/SecureDOMParser.h" - -#include +#include #include +#include using namespace digidoc; using namespace digidoc::util; @@ -43,7 +40,7 @@ const string_view ASiC_E::ASIC_TM_PROFILE = "time-mark"; const string_view ASiC_E::ASIC_TS_PROFILE = "time-stamp"; const string_view ASiC_E::ASIC_TSA_PROFILE = "time-stamp-archive"; const string_view ASiC_E::ASIC_TMA_PROFILE = "time-mark-archive"; -const string ASiC_E::MANIFEST_NAMESPACE = "urn:oasis:names:tc:opendocument:xmlns:manifest:1.0"; +constexpr string_view MANIFEST_NS {"urn:oasis:names:tc:opendocument:xmlns:manifest:1.0"}; class ASiC_E::Private { @@ -166,7 +163,7 @@ unique_ptr ASiC_E::openInternal(const string &path) /** * Creates BDoc container manifest file and returns its path. - * + * * Note: If non-ascii characters are present in XML data, we depend on the LANG variable to be set properly * (see iconv --list for the list of supported encoding values for libiconv). * @@ -177,40 +174,25 @@ unique_ptr ASiC_E::openInternal(const string &path) void ASiC_E::createManifest(ostream &os) { DEBUG("ASiC_E::createManifest()"); - - try - { - manifest_1_2::Manifest manifest(manifest_1_2::Manifest::VersionType::cxx_1_2); - manifest.file_entry().push_back(make_unique("/", mediaType())); - for(const DataFile *file: dataFiles()) - manifest.file_entry().push_back(make_unique(file->fileName(), file->mediaType())); - - xml_schema::NamespaceInfomap map; - map["manifest"].name = ASiC_E::MANIFEST_NAMESPACE; - manifest_1_2::manifest(os, manifest, map, {}, xml_schema::Flags::dont_initialize); - if(os.fail()) - THROW("Failed to create manifest XML"); - } - catch(const xercesc::DOMException &e) - { - try { - string result = xsd::cxx::xml::transcode(e.getMessage()); - THROW("Failed to create manifest XML file. Error: %s", result.c_str()); - } catch(const xsd::cxx::xml::invalid_utf16_string & /* ex */) { - THROW("Failed to create manifest XML file."); - } - } - catch(const xml_schema::Exception &e) - { - THROW("Failed to create manifest XML file. Error: %s", e.what()); - } + auto doc = XMLDocument::create("manifest", MANIFEST_NS, "manifest"); + doc.setProperty("version", "1.2", MANIFEST_NS); + auto add = [&doc](string_view path, string_view mime) { + auto file = doc.addChild("file-entry", MANIFEST_NS); + file.setProperty("full-path", path, MANIFEST_NS); + file.setProperty("media-type", mime, MANIFEST_NS); + }; + add("/", mediaType()); + for(const DataFile *file: dataFiles()) + add(file->fileName(), file->mediaType()); + if(!doc.save(os)) + THROW("Failed to create manifest XML"); } /** * Parses manifest file and checks that files described in manifest exist, also * checks that no extra file do exist that are not described in manifest.xml. * - * Note: If non-ascii characters are present in XML data, we depend on the LANG variable to be set properly + * Note: If non-ascii characters are present in XML data, we depend on the LANG variable to be set properly * (see iconv --list for the list of supported encoding values for libiconv). * * @param path directory on disk of the BDOC container. @@ -231,45 +213,45 @@ void ASiC_E::parseManifestAndLoadFiles(const ZipSerialize &z) { stringstream manifestdata; z.extract("META-INF/manifest.xml", manifestdata); - xml_schema::Properties p; - p.schema_location(ASiC_E::MANIFEST_NAMESPACE, - File::fullPathUrl(Conf::instance()->xsdPath() + "/OpenDocument_manifest.xsd")); - unique_ptr doc = SecureDOMParser(p.schema_location(), true).parseIStream(manifestdata); - unique_ptr manifest = manifest::manifest(*doc, {}, p); - set manifestFiles; + set manifestFiles; bool mimeFound = false; - for(const manifest::File_entry &file: manifest->file_entry()) + auto doc = XMLDocument::openStream(manifestdata, "manifest", MANIFEST_NS); + if(!doc.validateSchema(File::path(Conf::instance()->xsdPath(), "OpenDocument_manifest_v1_2.xsd"))) + THROW("Failed to parse manifest XML"); + for(auto file = doc/"file-entry"; file; file++) { - DEBUG("full_path = '%s', media_type = '%s'", file.full_path().c_str(), file.media_type().c_str()); + auto full_path = file.property("full-path", MANIFEST_NS); + auto media_type = file.property("media-type", MANIFEST_NS); + DEBUG("full_path = '%s', media_type = '%s'", full_path.data(), media_type.data()); - if(manifestFiles.find(file.full_path()) != manifestFiles.end()) - THROW("Manifest multiple entries defined for file '%s'.", file.full_path().c_str()); + if(manifestFiles.find(full_path) != manifestFiles.end()) + THROW("Manifest multiple entries defined for file '%s'.", full_path.data()); // ODF does not specify that mimetype should be first in manifest - if(file.full_path() == "/") + if(full_path == "/") { - if(mediaType() != file.media_type()) - THROW("Manifest has incorrect container media type defined '%s', expecting '%s'.", file.media_type().c_str(), mediaType().c_str()); + if(mediaType() != media_type) + THROW("Manifest has incorrect container media type defined '%s', expecting '%s'.", media_type.data(), mediaType().c_str()); mimeFound = true; continue; } - if(file.full_path().back() == '/') // Skip Directory entries + if(full_path.back() == '/') // Skip Directory entries continue; - auto fcount = size_t(count(list.cbegin(), list.cend(), file.full_path())); + auto fcount = size_t(count(list.cbegin(), list.cend(), full_path)); if(fcount < 1) - THROW("File described in manifest '%s' does not exist in container.", file.full_path().c_str()); + THROW("File described in manifest '%s' does not exist in container.", full_path.data()); if(fcount > 1) - THROW("Found multiple references of file '%s' in zip container.", file.full_path().c_str()); + THROW("Found multiple references of file '%s' in zip container.", full_path.data()); - manifestFiles.insert(file.full_path()); + manifestFiles.insert(full_path); if(mediaType() == MIMETYPE_ADOC && - (file.full_path().compare(0, 9, "META-INF/") == 0 || - file.full_path().compare(0, 9, "metadata/") == 0)) - d->metadata.push_back(new DataFilePrivate(dataStream(file.full_path(), z), file.full_path(), file.media_type())); + (full_path.compare(0, 9, "META-INF/") == 0 || + full_path.compare(0, 9, "metadata/") == 0)) + d->metadata.push_back(new DataFilePrivate(dataStream(string(full_path), z), string(full_path), string(media_type))); else - addDataFilePrivate(dataStream(file.full_path(), z), file.full_path(), file.media_type()); + addDataFilePrivate(dataStream(string(full_path), z), string(full_path), string(media_type)); } if(!mimeFound) THROW("Manifest is missing mediatype file entry."); @@ -307,35 +289,6 @@ void ASiC_E::parseManifestAndLoadFiles(const ZipSerialize &z) THROW("File '%s' found in container is not described in manifest.", file.c_str()); } } - catch(const xercesc::DOMException &e) - { - try { - string result = xsd::cxx::xml::transcode(e.getMessage()); - THROW("Failed to create manifest XML file. Error: %s", result.c_str()); - } catch(const xsd::cxx::xml::invalid_utf16_string & /* ex */) { - THROW("Failed to create manifest XML file."); - } - } - catch(const xsd::cxx::xml::invalid_utf16_string &) - { - THROW("Failed to parse manifest XML: %s", Conf::instance()->xsdPath().c_str()); - } - catch(const xsd::cxx::xml::properties::argument & /* e */) - { - THROW("Failed to parse manifest XML: %s", Conf::instance()->xsdPath().c_str()); - } - catch(const xsd::cxx::tree::unexpected_element &e) - { - THROW("Failed to parse manifest XML: %s %s %s", Conf::instance()->xsdPath().c_str(), e.expected_name().c_str(), e.encountered_name().c_str()); - } - catch(const xml_schema::Exception& e) - { - THROW("Failed to parse manifest XML: %s (xsd path: %s)", e.what(), Conf::instance()->xsdPath().c_str()); - } - catch(const xercesc::OutOfMemoryException &) - { - THROW("Failed to parse manifest XML: out of memory"); - } catch(const Exception &e) { THROW_CAUSE(e, "Failed to parse manifest"); diff --git a/src/ASiC_E.h b/src/ASiC_E.h index d54ab69ba..0ed9b99fe 100644 --- a/src/ASiC_E.h +++ b/src/ASiC_E.h @@ -40,7 +40,6 @@ namespace digidoc static const std::string_view ASIC_TS_PROFILE; static const std::string_view ASIC_TMA_PROFILE; static const std::string_view ASIC_TSA_PROFILE; - static const std::string MANIFEST_NAMESPACE; ~ASiC_E() final; void save(const std::string &path = {}) final; diff --git a/src/ASiContainer.cpp b/src/ASiContainer.cpp index 6efcd1ffb..ba8628056 100644 --- a/src/ASiContainer.cpp +++ b/src/ASiContainer.cpp @@ -172,7 +172,7 @@ void ASiContainer::addDataFile(const string &path, const string &mediaType) ZipSerialize::Properties prop { appInfo(), File::modifiedTime(path), File::fileSize(path) }; bool useTempFile = prop.size > MAX_MEM_FILE; - zproperty(File::fileName(path), std::move(prop)); + zproperty(fileName, std::move(prop)); unique_ptr is; if(useTempFile) { @@ -185,7 +185,7 @@ void ASiContainer::addDataFile(const string &path, const string &mediaType) *data << file.rdbuf(); is = std::move(data); } - addDataFilePrivate(std::move(is), fileName, mediaType); + addDataFilePrivate(std::move(is), std::move(fileName), mediaType); } void ASiContainer::addDataFile(unique_ptr is, const string &fileName, const string &mediaType) @@ -208,9 +208,9 @@ void ASiContainer::addDataFileChecks(const string &fileName, const string &media THROW("MediaType does not meet format requirements (RFC2045, section 5.1) '%s'.", mediaType.c_str()); } -void ASiContainer::addDataFilePrivate(unique_ptr is, const string &fileName, const string &mediaType) +void ASiContainer::addDataFilePrivate(unique_ptr is, string fileName, string mediaType) { - d->documents.push_back(new DataFilePrivate(std::move(is), fileName, mediaType)); + d->documents.push_back(new DataFilePrivate(std::move(is), std::move(fileName), std::move(mediaType))); } /** diff --git a/src/ASiContainer.h b/src/ASiContainer.h index bb7487738..de6fb098a 100644 --- a/src/ASiContainer.h +++ b/src/ASiContainer.h @@ -57,7 +57,7 @@ namespace digidoc protected: ASiContainer(const std::string &mimetype); - void addDataFilePrivate(std::unique_ptr is, const std::string &fileName, const std::string &mediaType); + void addDataFilePrivate(std::unique_ptr is, std::string fileName, std::string mediaType); Signature* addSignature(std::unique_ptr &&signature); std::unique_ptr dataStream(const std::string &path, const ZipSerialize &z) const; std::unique_ptr load(const std::string &path, bool requireMimetype, const std::set &supported); diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 159becabf..3f8bacaa9 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -14,12 +14,6 @@ configure_file( ${CMAKE_SOURCE_DIR}/etc/digidocpp.conf.cmake digidocpp.conf ) set(SCHEMA_DIR ${CMAKE_SOURCE_DIR}/etc/schema) set(XML_DIR ${CMAKE_CURRENT_BINARY_DIR}/xml) -XSD_SCHEMA( xsd_SRCS IGNORE ${XML_DIR} ${SCHEMA_DIR}/OpenDocument_manifest.xsd - --root-element manifest - --namespace-map urn:oasis:names:tc:opendocument:xmlns:manifest:1.0=digidoc::manifest ) -XSD_SCHEMA( xsd_SRCS IGNORE ${XML_DIR} ${SCHEMA_DIR}/OpenDocument_manifest_v1_2.xsd - --root-element manifest - --namespace-map urn:oasis:names:tc:opendocument:xmlns:manifest:1.0=digidoc::manifest_1_2 ) XSD_SCHEMA( xsd_SRCS XML_HEADER ${XML_DIR} ${SCHEMA_DIR}/xmldsig-core-schema.xsd --root-element-none --namespace-map http://www.w3.org/2000/09/xmldsig\#=digidoc::dsig @@ -82,7 +76,6 @@ file(APPEND ${CMAKE_CURRENT_BINARY_DIR}/tslcerts.h "};\n}") set( SCHEMA_FILES ${SCHEMA_DIR}/conf.xsd - ${SCHEMA_DIR}/OpenDocument_manifest.xsd ${SCHEMA_DIR}/OpenDocument_manifest_v1_2.xsd ${SCHEMA_DIR}/xmldsig-core-schema.xsd ${SCHEMA_DIR}/XAdES01903v132-201601.xsd diff --git a/src/XMLDocument.h b/src/XMLDocument.h index cd02f4f82..06a25f2e1 100644 --- a/src/XMLDocument.h +++ b/src/XMLDocument.h @@ -25,6 +25,8 @@ #include #include +#include +#include namespace digidoc { @@ -55,10 +57,15 @@ struct XMLElem template constexpr static C find(C n, xmlElementType type) noexcept { - for(; n; n = n->next) - if(n->type == type) - return n; - return {}; + for(; n && n->type != type; n = n->next); + return n; + } + + template + constexpr static C find(C n, sv name, sv ns) noexcept + { + for(; n && (n.name() != name || n.ns() != ns); ++n); + return n; } constexpr static sv to_string_view(const xmlChar *str) noexcept @@ -87,6 +94,13 @@ struct XMLElem return *this; } + constexpr auto operator++(int) noexcept + { + auto c = *this; + d = find(operator++(), c.name(), c.ns()).d; + return c; + } + constexpr operator sv() const noexcept { auto text = find(d ? d->children : nullptr, XML_TEXT_NODE); @@ -98,10 +112,6 @@ struct XMLElem struct XMLNode: public XMLElem { - struct Name_NS { - sv name, ns; - }; - struct iterator: XMLElem { using iterator_category = std::forward_iterator_tag; @@ -137,12 +147,12 @@ struct XMLNode: public XMLElem constexpr sv property(sv name, sv ns = {}) const noexcept { - for(XMLElem a{d ? d->properties : nullptr}; a; ++a) - { - if(a.name() == name && a.ns() == ns) - return a; - } - return {}; + return find(XMLElem{d ? d->properties : nullptr}, name, ns); + } + + void setProperty(sv name, sv value, sv ns) const noexcept + { + setProperty(name, value, searchNS(ns)); } void setProperty(sv name, sv value, xmlNsPtr ns = {}) const noexcept @@ -168,6 +178,11 @@ struct XMLNode: public XMLElem xmlFree(content); return *this; } + + constexpr XMLNode operator/(sv name) const noexcept + { + return find(*begin(), name, ns()); + } }; struct XMLDocument: public unique_xml_t, public XMLNode @@ -186,6 +201,20 @@ struct XMLDocument: public unique_xml_t, public XMLNode : XMLDocument(xmlParseFile(path.data()), name) {} + static XMLDocument openStream(std::istream &is, std::string_view name = {}, std::string_view ns = {}) + { + auto ctxt = make_unique_ptr(xmlCreateIOParserCtxt(nullptr, nullptr, [](void *context, char *buffer, int len) -> int { + auto *is = static_cast(context); + is->read(buffer, len); + return is->good() || is->eof() ? int(is->gcount()) : -1; + }, nullptr, &is, XML_CHAR_ENCODING_NONE), xmlFreeParserCtxt); + ctxt->linenumbers = 1; + auto result = xmlParseDocument(ctxt.get()); + if(result != 0 || ctxt->wellFormed) + THROW("%s", ctxt->lastError.message); + return {ctxt->myDoc, name, ns}; + } + static XMLDocument create(std::string_view name = {}, std::string_view href = {}, std::string_view prefix = {}) noexcept { XMLDocument doc(xmlNewDoc(nullptr)); @@ -204,6 +233,15 @@ struct XMLDocument: public unique_xml_t, public XMLNode return xmlSaveFormatFileEnc(path.data(), get(), "UTF-8", 1) > 0; } + bool save(std::ostream &os) const noexcept + { + auto *buf = xmlOutputBufferCreateIO([](void *context, const char *buffer, int len) { + auto *os = static_cast(context); + return os->write(buffer, len) ? len : -1; + }, nullptr, &os, nullptr); + return xmlSaveFormatFileTo(buf, get(), "UTF-8", 1) > 0; + } + bool validateSchema(const std::string &schemaPath) const noexcept { auto parser = make_unique_ptr(xmlSchemaNewParserCtxt(schemaPath.c_str()), xmlSchemaFreeParserCtxt);