Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add c14n for node and document #138

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 44 additions & 0 deletions src/tree/c14n.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
//! Shared canonicalization logic and types.
//!
use std::ffi::c_int;

use crate::bindings::{
xmlC14NMode_XML_C14N_1_0, xmlC14NMode_XML_C14N_1_1, xmlC14NMode_XML_C14N_EXCLUSIVE_1_0,
};

/// Options for configuring how to canonicalize XML
#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug, Default)]
pub struct CanonicalizationOptions {
/// Canonicalization specification to use
pub mode: CanonicalizationMode,
/// If true, keep `<!-- ... -->` comments, otherwise remove
pub with_comments: bool,
/// Namespaces to keep even if they are unused. By default, in [CanonicalizationMode::ExclusiveCanonical1_0], unused namespaces are removed.
///
/// Doesn't apply to other canonicalization modes.
pub inclusive_ns_prefixes: Vec<String>,
}

/// Canonicalization specification to use
#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug, Default)]
pub enum CanonicalizationMode {
/// Original C14N 1.0 spec
Canonical1_0,
/// Exclusive C14N 1.0 spec
#[default]
ExclusiveCanonical1_0,
/// C14N 1.1 spec
Canonical1_1,
}

impl From<CanonicalizationMode> for c_int {
fn from(mode: CanonicalizationMode) -> Self {
let c14n_mode = match mode {
CanonicalizationMode::Canonical1_0 => xmlC14NMode_XML_C14N_1_0,
CanonicalizationMode::ExclusiveCanonical1_0 => xmlC14NMode_XML_C14N_EXCLUSIVE_1_0,
CanonicalizationMode::Canonical1_1 => xmlC14NMode_XML_C14N_1_1,
};

c_int::from(c14n_mode as i32)
}
}
2 changes: 2 additions & 0 deletions src/tree/document.rs
Original file line number Diff line number Diff line change
Expand Up @@ -343,3 +343,5 @@ impl Document {
Ok(())
}
}

mod c14n;
111 changes: 111 additions & 0 deletions src/tree/document/c14n.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
//! Document canonicalization logic
//!
use std::ffi::{c_int, c_void, CString};
use std::os::raw;
use std::ptr::null_mut;

use crate::tree::c14n::*;

use super::{
xmlAllocOutputBuffer, xmlC14NExecute, xmlC14NIsVisibleCallback, xmlChar, xmlNodePtr,
xmlOutputBufferClose, xmlOutputBufferPtr, Document,
};

impl Document {
/// Canonicalize a document and return the results.
pub fn canonicalize(
&self,
options: CanonicalizationOptions,
callback: Option<(xmlNodePtr, xmlC14NIsVisibleCallback)>,
) -> Result<String, ()> {
let document = (*self.0).borrow().doc_ptr;

let mut ns_list_c = to_xml_string_vec(options.inclusive_ns_prefixes);
let inclusive_ns_prefixes = ns_list_c.as_mut_ptr();
let with_comments = c_int::from(options.with_comments);

let (is_visible_callback, user_data) = if let Some((node_ptr, visibility_callback)) = callback {
(visibility_callback, node_ptr as *mut _)
} else {
(None, null_mut())
};

let mode = options.mode.into();
unsafe {
let c_obuf = create_output_buffer();

let status = xmlC14NExecute(
document,
is_visible_callback,
user_data,
mode,
inclusive_ns_prefixes,
with_comments,
c_obuf,
);

let res = c_obuf_into_output(c_obuf);

if status < 0 {
Err(())
} else {
Ok(res)
}
}
}
}

unsafe fn c_obuf_into_output(c_obuf: xmlOutputBufferPtr) -> String {
let ctx_ptr = (*c_obuf).context;
let output = Box::from_raw(ctx_ptr as *mut String);

(*c_obuf).context = std::ptr::null_mut::<c_void>();

xmlOutputBufferClose(c_obuf);

*output
}

unsafe fn create_output_buffer() -> xmlOutputBufferPtr {
let output = String::new();
let ctx_ptr = Box::into_raw(Box::new(output));
let encoder = std::ptr::null_mut();

let buf = xmlAllocOutputBuffer(encoder);

(*buf).writecallback = Some(xml_write_io);
(*buf).closecallback = Some(xml_close_io);
(*buf).context = ctx_ptr as _;

buf
}

unsafe extern "C" fn xml_close_io(_context: *mut raw::c_void) -> raw::c_int {
0
}

unsafe extern "C" fn xml_write_io(
io_ptr: *mut raw::c_void,
buffer: *const raw::c_char,
len: raw::c_int,
) -> raw::c_int {
if io_ptr.is_null() {
0
} else {
let buf = std::slice::from_raw_parts_mut(buffer as *mut u8, len as usize);
let buf = String::from_utf8_lossy(buf);
let s2_ptr = io_ptr as *mut String;
String::push_str(&mut *s2_ptr, &buf);

len
}
}

/// Create a [Vec] of null-terminated [*mut xmlChar] strings
fn to_xml_string_vec(vec: Vec<String>) -> Vec<*mut xmlChar> {
vec
.into_iter()
.map(|s| CString::new(s).unwrap().into_raw() as *mut xmlChar)
.chain(std::iter::once(std::ptr::null_mut()))
.collect()
}
1 change: 1 addition & 0 deletions src/tree/mod.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
//! The tree functionality
//!

pub mod c14n;
pub mod document;
pub mod namespace;
pub mod node;
Expand Down
51 changes: 51 additions & 0 deletions src/tree/node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1054,6 +1054,34 @@ impl Node {
context.findnodes(xpath, Some(self))
}

/// Search this node for XPath `path`, and return only the first match.
pub fn at_xpath(&self, path: &str, ns_binlings: &[(&str, &str)]) -> Result<Option<Node>, ()> {
let mut context = Context::from_node(self)?;
for (prefix, href) in ns_binlings {
context.register_namespace(prefix, href)?;
}
let nodes = context.findnodes(path, Some(self))?;

Ok(nodes.first().cloned())
}

/// Get a list of ancestor Node for this Node.
pub fn ancestors(&self) -> Vec<Node> {
let node_ptr = self.node_ptr();

let mut res = Vec::new();

let ancestor_ptrs = node_ancestors(node_ptr);

for ptr in ancestor_ptrs {
if let Some(node) = self.ptr_as_option(ptr) {
res.push(node)
}
}

res
}

/// find String values via xpath, at a specified node or the document root
pub fn findvalues(&self, xpath: &str) -> Result<Vec<String>, ()> {
let mut context = Context::from_node(self)?;
Expand Down Expand Up @@ -1100,3 +1128,26 @@ impl Node {
}
}
}

fn node_ancestors(node_ptr: xmlNodePtr) -> Vec<xmlNodePtr> {
if node_ptr.is_null() {
return Vec::new();
}

let mut parent_ptr = xmlGetParent(node_ptr);

if parent_ptr.is_null() {
Vec::new()
} else {
let mut parents = vec![parent_ptr];

while !xmlGetParent(parent_ptr).is_null() {
parent_ptr = xmlGetParent(parent_ptr);
parents.push(parent_ptr);
}

parents
}
}

mod c14n;
58 changes: 58 additions & 0 deletions src/tree/node/c14n.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
//! Node canonicalization logic
//!
use std::ffi::c_void;

use crate::{
bindings::{xmlC14NIsVisibleCallback, xmlNodePtr},
c_helpers::xmlGetNodeType,
tree::{c14n::*, Node},
};

use super::node_ancestors;

impl Node {
/// Canonicalize a document and return the results.
pub fn canonicalize(&mut self, options: CanonicalizationOptions) -> Result<String, ()> {
let doc_ref = self.get_docref().upgrade().unwrap();
let document = crate::tree::Document(doc_ref);

let user_data = self.node_ptr_mut().unwrap();
let callback: xmlC14NIsVisibleCallback = Some(callback_wrapper);

document.canonicalize(options, Some((user_data, callback)))
}
}

unsafe extern "C" fn callback_wrapper(
c14n_root_ptr: *mut c_void,
node_ptr: xmlNodePtr,
parent_ptr: xmlNodePtr,
) -> ::std::os::raw::c_int {
let c14n_root_ptr = c14n_root_ptr as xmlNodePtr;
let node_type = xmlGetNodeType(node_ptr);

let tn_ptr = if NODE_TYPES.contains(&node_type) {
node_ptr
} else {
parent_ptr
};

let tn_ancestors = node_ancestors(tn_ptr);

let ret = (tn_ptr == c14n_root_ptr) || tn_ancestors.contains(&c14n_root_ptr);
if ret {
1
} else {
0
}
}

const NODE_TYPES: [u32; 7] = [
super::xmlElementType_XML_ELEMENT_NODE,
super::xmlElementType_XML_ATTRIBUTE_NODE,
super::xmlElementType_XML_DOCUMENT_TYPE_NODE,
super::xmlElementType_XML_TEXT_NODE,
super::xmlElementType_XML_DTD_NODE,
super::xmlElementType_XML_PI_NODE,
super::xmlElementType_XML_COMMENT_NODE,
];
Loading
Loading