From 95aafb9d261be152b8281dfcb9166edd7f06ff38 Mon Sep 17 00:00:00 2001 From: Raul Metsma Date: Thu, 21 Mar 2024 14:23:11 +0200 Subject: [PATCH] Use libxml2 for DDoc parsing IB-7946 Signed-off-by: Raul Metsma --- src/SiVaContainer.cpp | 133 ++++++++++++------------------------------ src/XMLDocument.h | 101 ++++++++++++++++++++++++++++++++ 2 files changed, 137 insertions(+), 97 deletions(-) diff --git a/src/SiVaContainer.cpp b/src/SiVaContainer.cpp index 250086d50..5b90e5225 100644 --- a/src/SiVaContainer.cpp +++ b/src/SiVaContainer.cpp @@ -27,35 +27,27 @@ #include "ASiContainer.h" #include "Conf.h" #include "DataFile_p.h" -#include "Signature.h" +#include "XMLDocument.h" #include "crypto/Connect.h" -#include "crypto/Digest.h" #include "util/File.h" -#include "util/log.h" -#include "util/ZipSerialize.h" -#include "xml/xml.hxx" -#include "xml/SecureDOMParser.h" #include "json.hpp" -#include -#include -#include - -#define XSD_CXX11 -#include -#include - -#include #include +#include using namespace digidoc; using namespace digidoc::util; using namespace std; -using namespace xercesc; using json = nlohmann::json; -static auto base64_decode(const XMLCh *in) { +template +constexpr T base64_enc_size(T n) noexcept +{ + return ((n + 2) / 3) << 2; +} + +static auto base64_decode(string_view data) { static constexpr array T{ 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, @@ -70,9 +62,8 @@ static auto base64_decode(const XMLCh *in) { auto out = make_unique(); int value = 0; int bits = -8; - for(; in; ++in) + for(auto c: data) { - const char c(*in); if(c == '\r' || c == '\n' || c == ' ') continue; uint8_t check = T[c]; @@ -197,7 +188,7 @@ SiVaContainer::SiVaContainer(const string &path, ContainerOpenCB *cb, bool useHa if(useHashCode && cb && !cb->validateOnline()) THROW("Online validation disabled"); - array buf{}; + array buf{}; string b64; is->clear(); is->seekg(0); @@ -207,11 +198,10 @@ SiVaContainer::SiVaContainer(const string &path, ContainerOpenCB *cb, bool useHa if(is->gcount() <= 0) break; - XMLSize_t size = 0; - XMLByte *out = Base64::encode(buf.data(), XMLSize_t(is->gcount()), &size); - if(out) - b64.append((char*)out, size); - delete out; + size_t pos = b64.size(); + b64.resize(b64.size() + base64_enc_size(buf.size())); + int size = EVP_EncodeBlock((unsigned char*)&b64[pos], buf.data(), int(is->gcount())); + b64.resize(pos + size); } ifs.reset(); @@ -257,13 +247,7 @@ SiVaContainer::SiVaContainer(const string &path, ContainerOpenCB *cb, bool useHa s->_tsTime = info.value("timestampCreationTime", {}); s->_ocspTime = info.value("ocspResponseCreationTime", {}); if(info.contains("timeAssertionMessageImprint")) - { - string base64 = info["timeAssertionMessageImprint"]; - XMLSize_t size = 0; - XMLByte *message = Base64::decode((const XMLByte*)base64.c_str(), &size); - s->_messageImprint.assign(message, message + size); - delete message; - } + s->_messageImprint = from_base64(info["timeAssertionMessageImprint"].get()); for(const json &signerRole: info.value("signerRole", {})) s->_signerRoles.push_back(signerRole["claimedRole"]); if(json signatureProductionPlace = info.value("signatureProductionPlace", {}); !signatureProductionPlace.is_null()) @@ -276,17 +260,15 @@ SiVaContainer::SiVaContainer(const string &path, ContainerOpenCB *cb, bool useHa } for(const json &certificate: signature.value("certificates", {})) { - XMLSize_t size = 0; - XMLByte *der = Base64::decode((const XMLByte*)certificate.value("content", {}).data(), &size); + auto der = from_base64(certificate.value("content", {})); if(certificate["type"] == "SIGNING") - s->_signingCertificate = X509Cert(der, size, X509Cert::Der); + s->_signingCertificate = X509Cert(der, X509Cert::Der); if(certificate["type"] == "REVOCATION") - s->_ocspCertificate = X509Cert(der, size, X509Cert::Der); + s->_ocspCertificate = X509Cert(der, X509Cert::Der); if(certificate["type"] == "SIGNATURE_TIMESTAMP") - s->_tsCertificate = X509Cert(der, size, X509Cert::Der); + s->_tsCertificate = X509Cert(der, X509Cert::Der); if(certificate["type"] == "ARCHIVE_TIMESTAMP") - s->_tsaCertificate = X509Cert(der, size, X509Cert::Der); - delete der; + s->_tsaCertificate = X509Cert(der, X509Cert::Der); } for(const json &error: signature.value("errors", {})) { @@ -363,76 +345,33 @@ unique_ptr SiVaContainer::openInternal(const string &path, ContainerO unique_ptr SiVaContainer::parseDDoc(bool useHashCode) { - namespace xml = xsd::cxx::xml; try { - unique_ptr dom(SecureDOMParser().parseIStream(*d->ddoc)); - DOMNodeList *nodeList = dom->getElementsByTagName(u"DataFile"); - for(XMLSize_t i = 0; i < nodeList->getLength(); ++i) + auto doc = XMLDocument::openStream(*d->ddoc); + for(auto dataFile = doc/"DataFile"; dataFile; dataFile++) { - auto *item = static_cast(nodeList->item(i)); - if(!item) - continue; - - if(XMLString::compareString(item->getAttribute(u"ContentType"), u"HASHCODE") == 0) + auto contentType = dataFile.property("ContentType"); + if(contentType == "HASHCODE") THROW("Currently supports only content types EMBEDDED_BASE64 for DDOC format"); - if(XMLString::compareString(item->getAttribute(u"ContentType"), u"EMBEDDED_BASE64") != 0) + if(contentType != "EMBEDDED_BASE64") continue; - - if(const XMLCh *b64 = item->getTextContent()) - { - d->dataFiles.push_back(new DataFilePrivate(base64_decode(b64), - xml::transcode(item->getAttribute(u"Filename")), - xml::transcode(item->getAttribute(u"MimeType")), - xml::transcode(item->getAttribute(u"Id")))); - } - + d->dataFiles.push_back(new DataFilePrivate(base64_decode(dataFile), + string(dataFile.property("Filename")), + string(dataFile.property("MimeType")), + string(dataFile.property("Id")))); if(!useHashCode) continue; Digest calc(URI_SHA1); - SecureDOMParser::calcDigestOnNode(&calc, "http://www.w3.org/TR/2001/REC-xml-c14n-20010315", item); - vector digest = calc.result(); - if(XMLSize_t size = 0; XMLByte *out = Base64::encode(digest.data(), XMLSize_t(digest.size()), &size)) - { - item->setAttribute(u"ContentType", u"HASHCODE"); - item->setAttribute(u"DigestType", u"sha1"); - xml::string outXMLCh(reinterpret_cast(out)); - item->setAttribute(u"DigestValue", outXMLCh.c_str()); - item->setTextContent(nullptr); - delete out; - } + doc.c14n(&calc, XMLDocument::C14D_ID_1_0, dataFile); + dataFile.setProperty("ContentType", "HASHCODE"); + dataFile.setProperty("DigestType", "sha1"); + dataFile.setProperty("DigestValue", to_base64(calc.result())); + dataFile = std::string_view{}; } - - DOMImplementation *pImplement = DOMImplementationRegistry::getDOMImplementation(u"LS"); - unique_ptr pDomLsOutput(pImplement->createLSOutput()); - unique_ptr pSerializer(pImplement->createLSSerializer()); auto result = make_unique(); - xml::dom::ostream_format_target out(*result); - pDomLsOutput->setByteStream(&out); - pSerializer->setNewLine(u"\n"); - pSerializer->write(dom.get(), pDomLsOutput.get()); + doc.save(*result); return result; } - catch(const XMLException& e) - { - try { - string result = xml::transcode(e.getMessage()); - THROW("Failed to parse DDoc XML: %s", result.c_str()); - } catch(const xml::invalid_utf16_string & /* ex */) { - THROW("Failed to parse DDoc XML."); - } - } - catch(const DOMException& e) - { - try { - string result = xml::transcode(e.getMessage()); - THROW("Failed to parse DDoc XML: %s", result.c_str()); - } catch(const xml::invalid_utf16_string & /* ex */) { - THROW("Failed to parse DDoc XML."); - } - } catch(const xml::invalid_utf16_string & /* ex */) { - THROW("Failed to parse DDoc XML."); - } catch(const Exception &) { throw; diff --git a/src/XMLDocument.h b/src/XMLDocument.h index 5b74cefe7..b89b7ea56 100644 --- a/src/XMLDocument.h +++ b/src/XMLDocument.h @@ -19,10 +19,14 @@ #pragma once +#include "crypto/Digest.h" #include "util/log.h" #include #include +#include // needs to be last to workaround old libxml2 errors + +#include #include #include @@ -46,6 +50,54 @@ constexpr std::unique_ptr make_unique_ptr(T *p, D d) noexcept return {p, d}; } +static std::vector from_base64(std::string_view data) +{ + static constexpr std::string_view whitespace {" \n\r\f\t\v"}; + std::vector result(EVP_DECODE_LENGTH(data.size()), 0); + size_t dataPos = 0; + int size = 0; + auto ctx = make_unique_ptr(EVP_ENCODE_CTX_new(), EVP_ENCODE_CTX_free); + EVP_DecodeInit(ctx.get()); + + for(auto pos = data.find_first_of(whitespace); + !data.empty(); + pos = data.find_first_of(whitespace), dataPos += size_t(size)) + { + auto sub = data.substr(0, pos); + if(pos == std::string_view::npos) + data = {}; + else + data.remove_prefix(pos + 1); + if(EVP_DecodeUpdate(ctx.get(), &result[dataPos], &size, (const unsigned char*)sub.data(), int(sub.size())) >= 0) + continue; + result.clear(); + return result; + } + + if(EVP_DecodeFinal(ctx.get(), &result[dataPos], &size) == 1) + result.resize(dataPos + size_t(size)); + else + result.clear(); + return result; +} + +static std::string to_base64(const std::vector &data) +{ + std::string result(EVP_ENCODE_LENGTH(data.size()), 0); + auto ctx = make_unique_ptr(EVP_ENCODE_CTX_new(), EVP_ENCODE_CTX_free); + EVP_EncodeInit(ctx.get()); + int size{}; + if(EVP_EncodeUpdate(ctx.get(), (unsigned char*)result.data(), &size, data.data(), int(data.size())) < 1) + { + result.clear(); + return result; + } + auto pos = size_t(size); + EVP_EncodeFinal(ctx.get(), (unsigned char*)&result[pos], &size); + result.resize(pos + size_t(size)); + return result; +} + template struct XMLElem { @@ -196,6 +248,13 @@ struct XMLNode: public XMLElem struct XMLDocument: public unique_xml_t, public XMLNode { + static constexpr std::string_view C14D_ID_1_0 {"http://www.w3.org/TR/2001/REC-xml-c14n-20010315"}; + static constexpr std::string_view C14D_ID_1_0_COM {"http://www.w3.org/TR/2001/REC-xml-c14n-20010315#WithComments"}; + static constexpr std::string_view C14D_ID_1_1 {"http://www.w3.org/2006/12/xml-c14n11"}; + static constexpr std::string_view C14D_ID_1_1_COM {"http://www.w3.org/2006/12/xml-c14n11#WithComments"}; + static constexpr std::string_view C14D_ID_1_0_EXC {"http://www.w3.org/2001/10/xml-exc-c14n#"}; + static constexpr std::string_view C14D_ID_1_0_EXC_COM {"http://www.w3.org/2001/10/xml-exc-c14n#WithComments"}; + using XMLNode::operator bool; XMLDocument(element_type *ptr, std::string_view _name = {}, std::string_view _ns = {}) noexcept @@ -234,6 +293,48 @@ struct XMLDocument: public unique_xml_t, public XMLNode return doc; } + void c14n(Digest *digest, std::string_view algo, XMLNode node) + { + xmlC14NMode mode = XML_C14N_1_0; + int with_comments = 0; + if(algo == C14D_ID_1_0) {} + else if(algo == C14D_ID_1_0_COM) + with_comments = 1; + else if(algo == C14D_ID_1_1) + mode = XML_C14N_1_1; + else if(algo == C14D_ID_1_1_COM) + { + mode = XML_C14N_1_1; + with_comments = 1; + } + else if(algo == C14D_ID_1_0_EXC) + mode = XML_C14N_EXCLUSIVE_1_0; + else if(algo == C14D_ID_1_0_EXC_COM) + { + mode = XML_C14N_EXCLUSIVE_1_0; + with_comments = 1; + } + else if(!algo.empty()) + THROW("Unsupported canonicalization method '%.*s'", int(algo.size()), algo.data()); + auto *buf = xmlOutputBufferCreateIO([](void *context, const char *buffer, int len) { + auto *digest = static_cast(context); + digest->update(pcxmlChar(buffer), size_t(len)); + return len; + }, nullptr, digest, nullptr); + int size = xmlC14NExecute(get(), [](void *root, xmlNodePtr node, xmlNodePtr parent) constexpr noexcept { + if(root == node) + return 1; + for(; parent; parent = parent->parent) + { + if(root == parent) + return 1; + } + return 0; + }, node.d, mode, nullptr, with_comments, buf); + if(size < 0) + THROW("Failed to canonicalizate input"); + } + bool save(std::string_view path) const noexcept { return xmlSaveFormatFileEnc(path.data(), get(), "UTF-8", 1) > 0;