Skip to content

Commit

Permalink
Use libxml2 for DDoc parsing
Browse files Browse the repository at this point in the history
IB-7946

Signed-off-by: Raul Metsma <raul@metsma.ee>
  • Loading branch information
metsma committed Jul 1, 2024
1 parent 3970c7c commit 0f7aa01
Show file tree
Hide file tree
Showing 2 changed files with 138 additions and 97 deletions.
133 changes: 36 additions & 97 deletions src/SiVaContainer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,35 +27,27 @@
#include "ASiContainer.h"
#include "Conf.h"
#include "DataFile_p.h"
#include "Signature.h"
#include "XMLDocument.h"
#include "crypto/Connect.h"
#include "crypto/Digest.h"
#include "util/File.h"
#include "util/log.h"
#include "util/ZipSerialize.h"
#include "xml/xml.hxx"
#include "xml/SecureDOMParser.h"

#include "json.hpp"

#include <xercesc/dom/DOM.hpp>
#include <xercesc/framework/MemBufFormatTarget.hpp>
#include <xercesc/util/Base64.hpp>

#define XSD_CXX11
#include <xsd/cxx/xml/string.hxx>
#include <xsd/cxx/xml/dom/serialization-source.hxx>

#include <algorithm>
#include <fstream>
#include <sstream>

using namespace digidoc;
using namespace digidoc::util;
using namespace std;
using namespace xercesc;
using json = nlohmann::json;

static auto base64_decode(const XMLCh *in) {
template <class T>
constexpr T base64_enc_size(T n) noexcept
{
return ((n + 2) / 3) << 2;
}

static auto base64_decode(string_view data) {
static constexpr array<uint8_t, 128> T{
0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64,
0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64,
Expand All @@ -70,9 +62,8 @@ static auto base64_decode(const XMLCh *in) {
auto out = make_unique<stringstream>();
int value = 0;
int bits = -8;
for(; in; ++in)
for(auto c: data)
{
const char c(*in);
if(c == '\r' || c == '\n' || c == ' ')
continue;
uint8_t check = T[c];
Expand Down Expand Up @@ -197,7 +188,7 @@ SiVaContainer::SiVaContainer(const string &path, ContainerOpenCB *cb, bool useHa
if(useHashCode && cb && !cb->validateOnline())
THROW("Online validation disabled");

array<XMLByte, 4800> buf{};
array<unsigned char, 4800> buf{};
string b64;
is->clear();
is->seekg(0);
Expand All @@ -207,11 +198,10 @@ SiVaContainer::SiVaContainer(const string &path, ContainerOpenCB *cb, bool useHa
if(is->gcount() <= 0)
break;

XMLSize_t size = 0;
XMLByte *out = Base64::encode(buf.data(), XMLSize_t(is->gcount()), &size);
if(out)
b64.append((char*)out, size);
delete out;
size_t pos = b64.size();
b64.resize(b64.size() + base64_enc_size(buf.size()));
int size = EVP_EncodeBlock((unsigned char*)&b64[pos], buf.data(), int(is->gcount()));
b64.resize(pos + size);
}
ifs.reset();

Expand Down Expand Up @@ -257,13 +247,7 @@ SiVaContainer::SiVaContainer(const string &path, ContainerOpenCB *cb, bool useHa
s->_tsTime = info.value<string>("timestampCreationTime", {});
s->_ocspTime = info.value<string>("ocspResponseCreationTime", {});
if(info.contains("timeAssertionMessageImprint"))
{
string base64 = info["timeAssertionMessageImprint"];
XMLSize_t size = 0;
XMLByte *message = Base64::decode((const XMLByte*)base64.c_str(), &size);
s->_messageImprint.assign(message, message + size);
delete message;
}
s->_messageImprint = from_base64(info["timeAssertionMessageImprint"].get<string_view>());
for(const json &signerRole: info.value<json>("signerRole", {}))
s->_signerRoles.push_back(signerRole["claimedRole"]);
if(json signatureProductionPlace = info.value<json>("signatureProductionPlace", {}); !signatureProductionPlace.is_null())
Expand All @@ -276,17 +260,15 @@ SiVaContainer::SiVaContainer(const string &path, ContainerOpenCB *cb, bool useHa
}
for(const json &certificate: signature.value<json>("certificates", {}))
{
XMLSize_t size = 0;
XMLByte *der = Base64::decode((const XMLByte*)certificate.value<string_view>("content", {}).data(), &size);
auto der = from_base64(certificate.value<string_view>("content", {}));
if(certificate["type"] == "SIGNING")
s->_signingCertificate = X509Cert(der, size, X509Cert::Der);
s->_signingCertificate = X509Cert(der, X509Cert::Der);
if(certificate["type"] == "REVOCATION")
s->_ocspCertificate = X509Cert(der, size, X509Cert::Der);
s->_ocspCertificate = X509Cert(der, X509Cert::Der);
if(certificate["type"] == "SIGNATURE_TIMESTAMP")
s->_tsCertificate = X509Cert(der, size, X509Cert::Der);
s->_tsCertificate = X509Cert(der, X509Cert::Der);
if(certificate["type"] == "ARCHIVE_TIMESTAMP")
s->_tsaCertificate = X509Cert(der, size, X509Cert::Der);
delete der;
s->_tsaCertificate = X509Cert(der, X509Cert::Der);
}
for(const json &error: signature.value<json>("errors", {}))
{
Expand Down Expand Up @@ -363,76 +345,33 @@ unique_ptr<Container> SiVaContainer::openInternal(const string &path, ContainerO

unique_ptr<istream> SiVaContainer::parseDDoc(bool useHashCode)
{
namespace xml = xsd::cxx::xml;
try
{
unique_ptr<DOMDocument> dom(SecureDOMParser().parseIStream(*d->ddoc));
DOMNodeList *nodeList = dom->getElementsByTagName(u"DataFile");
for(XMLSize_t i = 0; i < nodeList->getLength(); ++i)
auto doc = XMLDocument::openStream(*d->ddoc);
for(auto dataFile = doc/"DataFile"; dataFile; dataFile++)
{
auto *item = static_cast<DOMElement*>(nodeList->item(i));
if(!item)
continue;

if(XMLString::compareString(item->getAttribute(u"ContentType"), u"HASHCODE") == 0)
auto contentType = dataFile.property("ContentType");
if(contentType == "HASHCODE")
THROW("Currently supports only content types EMBEDDED_BASE64 for DDOC format");
if(XMLString::compareString(item->getAttribute(u"ContentType"), u"EMBEDDED_BASE64") != 0)
if(contentType != "EMBEDDED_BASE64")
continue;

if(const XMLCh *b64 = item->getTextContent())
{
d->dataFiles.push_back(new DataFilePrivate(base64_decode(b64),
xml::transcode<char>(item->getAttribute(u"Filename")),
xml::transcode<char>(item->getAttribute(u"MimeType")),
xml::transcode<char>(item->getAttribute(u"Id"))));
}

d->dataFiles.push_back(new DataFilePrivate(base64_decode(dataFile),
string(dataFile.property("Filename")),
string(dataFile.property("MimeType")),
string(dataFile.property("Id"))));
if(!useHashCode)
continue;
Digest calc(URI_SHA1);
SecureDOMParser::calcDigestOnNode(&calc, "http://www.w3.org/TR/2001/REC-xml-c14n-20010315", item);
vector<unsigned char> digest = calc.result();
if(XMLSize_t size = 0; XMLByte *out = Base64::encode(digest.data(), XMLSize_t(digest.size()), &size))
{
item->setAttribute(u"ContentType", u"HASHCODE");
item->setAttribute(u"DigestType", u"sha1");
xml::string outXMLCh(reinterpret_cast<const char*>(out));
item->setAttribute(u"DigestValue", outXMLCh.c_str());
item->setTextContent(nullptr);
delete out;
}
doc.c14n(&calc, XMLDocument::C14D_ID_1_0, dataFile);
dataFile.setProperty("ContentType", "HASHCODE");
dataFile.setProperty("DigestType", "sha1");
dataFile.setProperty("DigestValue", to_base64(calc.result()));
dataFile = std::string_view{};
}

DOMImplementation *pImplement = DOMImplementationRegistry::getDOMImplementation(u"LS");
unique_ptr<DOMLSOutput> pDomLsOutput(pImplement->createLSOutput());
unique_ptr<DOMLSSerializer> pSerializer(pImplement->createLSSerializer());
auto result = make_unique<stringstream>();
xml::dom::ostream_format_target out(*result);
pDomLsOutput->setByteStream(&out);
pSerializer->setNewLine(u"\n");
pSerializer->write(dom.get(), pDomLsOutput.get());
doc.save(*result);
return result;
}
catch(const XMLException& e)
{
try {
string result = xml::transcode<char>(e.getMessage());
THROW("Failed to parse DDoc XML: %s", result.c_str());
} catch(const xml::invalid_utf16_string & /* ex */) {
THROW("Failed to parse DDoc XML.");
}
}
catch(const DOMException& e)
{
try {
string result = xml::transcode<char>(e.getMessage());
THROW("Failed to parse DDoc XML: %s", result.c_str());
} catch(const xml::invalid_utf16_string & /* ex */) {
THROW("Failed to parse DDoc XML.");
}
} catch(const xml::invalid_utf16_string & /* ex */) {
THROW("Failed to parse DDoc XML.");
}
catch(const Exception &)
{
throw;
Expand Down
102 changes: 102 additions & 0 deletions src/XMLDocument.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,14 @@

#pragma once

#include "crypto/Digest.h"
#include "util/log.h"

#include <libxml/parser.h>
#include <libxml/xmlschemas.h>
#include <libxml/c14n.h> // needs to be last to workaround old libxml2 errors

#include <openssl/evp.h>

#include <memory>
#include <istream>
Expand All @@ -46,6 +50,54 @@ constexpr std::unique_ptr<T, D> make_unique_ptr(T *p, D d) noexcept
return {p, d};
}

static std::vector<unsigned char> from_base64(std::string_view data)
{
static constexpr std::string_view whitespace {" \n\r\f\t\v"};
std::vector<unsigned char> result(EVP_DECODE_LENGTH(data.size()), 0);
size_t dataPos = 0;
int size = 0;
auto ctx = make_unique_ptr(EVP_ENCODE_CTX_new(), EVP_ENCODE_CTX_free);
EVP_DecodeInit(ctx.get());

for(auto pos = data.find_first_of(whitespace);
!data.empty();
pos = data.find_first_of(whitespace), dataPos += size_t(size))
{
auto sub = data.substr(0, pos);
if(pos == std::string_view::npos)
data = {};
else
data.remove_prefix(pos + 1);
if(EVP_DecodeUpdate(ctx.get(), &result[dataPos], &size, (const unsigned char*)sub.data(), int(sub.size())) >= 0)
continue;
result.clear();
return result;
}

if(EVP_DecodeFinal(ctx.get(), &result[dataPos], &size) == 1)
result.resize(dataPos + size_t(size));
else
result.clear();
return result;
}

static std::string to_base64(const std::vector<unsigned char> &data)
{
std::string result(EVP_ENCODE_LENGTH(data.size()), 0);
auto ctx = make_unique_ptr(EVP_ENCODE_CTX_new(), EVP_ENCODE_CTX_free);
EVP_EncodeInit(ctx.get());
int size{};
if(EVP_EncodeUpdate(ctx.get(), (unsigned char*)result.data(), &size, data.data(), int(data.size())) < 1)
{
result.clear();
return result;
}
auto pos = size_t(size);
EVP_EncodeFinal(ctx.get(), (unsigned char*)&result[pos], &size);
result.resize(pos + size_t(size));
return result;
}

template<class T>
struct XMLElem
{
Expand Down Expand Up @@ -187,6 +239,13 @@ struct XMLNode: public XMLElem<xmlNode>

struct XMLDocument: public unique_xml_t<decltype(xmlFreeDoc)>, public XMLNode
{
static constexpr std::string_view C14D_ID_1_0 {"http://www.w3.org/TR/2001/REC-xml-c14n-20010315"};
static constexpr std::string_view C14D_ID_1_0_COM {"http://www.w3.org/TR/2001/REC-xml-c14n-20010315#WithComments"};
static constexpr std::string_view C14D_ID_1_1 {"http://www.w3.org/2006/12/xml-c14n11"};
static constexpr std::string_view C14D_ID_1_1_COM {"http://www.w3.org/2006/12/xml-c14n11#WithComments"};
static constexpr std::string_view C14D_ID_1_0_EXC {"http://www.w3.org/2001/10/xml-exc-c14n#"};
static constexpr std::string_view C14D_ID_1_0_EXC_COM {"http://www.w3.org/2001/10/xml-exc-c14n#WithComments"};

using XMLNode::operator bool;

XMLDocument(element_type *ptr, std::string_view _name = {}, std::string_view _ns = {}) noexcept
Expand Down Expand Up @@ -228,6 +287,49 @@ struct XMLDocument: public unique_xml_t<decltype(xmlFreeDoc)>, public XMLNode
return doc;
}

void c14n(Digest *digest, std::string_view algo, XMLNode node)
{
xmlC14NMode mode = XML_C14N_1_0;
int with_comments = 0;
if(algo == C14D_ID_1_0)
mode = XML_C14N_1_0;
else if(algo == C14D_ID_1_0_COM)
with_comments = 1;
else if(algo == C14D_ID_1_1)
mode = XML_C14N_1_1;
else if(algo == C14D_ID_1_1_COM)
{
mode = XML_C14N_1_1;
with_comments = 1;
}
else if(algo == C14D_ID_1_0_EXC)
mode = XML_C14N_EXCLUSIVE_1_0;
else if(algo == C14D_ID_1_0_EXC_COM)
{
mode = XML_C14N_EXCLUSIVE_1_0;
with_comments = 1;
}
else if(!algo.empty())
THROW("Unsupported canonicalization method '%.*s'", int(algo.size()), algo.data());
auto *buf = xmlOutputBufferCreateIO([](void *context, const char *buffer, int len) {
auto *digest = static_cast<Digest *>(context);
digest->update(pcxmlChar(buffer), size_t(len));
return len;
}, nullptr, digest, nullptr);
int size = xmlC14NExecute(get(), [](void *root, xmlNodePtr node, xmlNodePtr parent) constexpr noexcept {
if(root == node)
return 1;
for(; parent; parent = parent->parent)
{
if(root == parent)
return 1;
}
return 0;
}, node.d, mode, nullptr, with_comments, buf);
if(size < 0)
THROW("Failed to canonicalizate input");
}

bool save(std::string_view path) const noexcept
{
return xmlSaveFormatFileEnc(path.data(), get(), "UTF-8", 1) > 0;
Expand Down

0 comments on commit 0f7aa01

Please sign in to comment.