From 55a8e2918f81d3ff0399c177cdac64efae718856 Mon Sep 17 00:00:00 2001 From: Matthew James Briggs Date: Mon, 23 Nov 2020 01:28:47 +0000 Subject: [PATCH] whitespace normalization and processing instructions (#75) Normalize whitespace of text nodes, ignores considerations for #55 and treats all nodes as if 'whitespace collapse' were in effect. Improve processing instructions #12 --- CHANGELOG.md | 4 +- README.md | 3 +- src/lib.rs | 3 +- src/parser/element.rs | 4 +- src/parser/mod.rs | 16 +- src/parser/pi.rs | 256 ++++++++---------- src/parser/string.rs | 22 +- src/xdoc/chars.rs | 10 - src/xdoc/mod.rs | 2 +- src/xdoc/pi.rs | 27 +- .../xmltestgen/ConfTest.java | 11 +- .../xmltestgen/ConfTestGenerator.java | 120 ++++++-- .../xmltestgen/ConfTestParser.java | 6 +- .../com/matthewjamesbriggs/xmltestgen/X.java | 18 +- .../xmltestgen/XEntityExpansion.java | 5 + .../xmltestgen/XNamespaces.java | 13 + .../matthewjamesbriggs/xmltestgen/XText.java | 58 ++++ .../generated/exile_doctypes_comments_pis.rs | 34 +-- tests/generated/exile_pi.rs | 8 +- .../exile_whitespace_normalization.rs | 41 +++ tests/generated/jclark_valid_sa_006.rs | 25 ++ tests/generated/jclark_valid_sa_007.rs | 24 ++ tests/generated/jclark_valid_sa_008.rs | 25 ++ tests/generated/jclark_valid_sa_009.rs | 24 ++ tests/generated/jclark_valid_sa_010.rs | 25 ++ tests/generated/jclark_valid_sa_011.rs | 26 ++ tests/generated/jclark_valid_sa_012.rs | 25 ++ tests/generated/jclark_valid_sa_013.rs | 25 ++ tests/generated/jclark_valid_sa_014.rs | 25 ++ tests/generated/jclark_valid_sa_015.rs | 25 ++ tests/generated/jclark_valid_sa_016.rs | 28 ++ tests/generated/jclark_valid_sa_017.rs | 28 ++ tests/generated/mod.rs | 13 + ...ile_whitespace_normalization.metadata.json | 6 + .../exile_whitespace_normalization.xml | 9 + tests/input_data/jclark_valid_sa_006.xml | 5 + tests/input_data/jclark_valid_sa_007.xml | 4 + tests/input_data/jclark_valid_sa_008.xml | 4 + tests/input_data/jclark_valid_sa_009.xml | 4 + tests/input_data/jclark_valid_sa_010.xml | 5 + tests/input_data/jclark_valid_sa_011.xml | 5 + tests/input_data/jclark_valid_sa_012.xml | 5 + tests/input_data/jclark_valid_sa_013.xml | 5 + tests/input_data/jclark_valid_sa_014.xml | 5 + tests/input_data/jclark_valid_sa_015.xml | 5 + tests/input_data/jclark_valid_sa_016.xml | 4 + tests/input_data/jclark_valid_sa_017.xml | 4 + 47 files changed, 802 insertions(+), 247 deletions(-) create mode 100644 testgen/xmltestgen/src/main/java/com/matthewjamesbriggs/xmltestgen/XEntityExpansion.java create mode 100644 testgen/xmltestgen/src/main/java/com/matthewjamesbriggs/xmltestgen/XNamespaces.java create mode 100644 testgen/xmltestgen/src/main/java/com/matthewjamesbriggs/xmltestgen/XText.java create mode 100644 tests/generated/exile_whitespace_normalization.rs create mode 100644 tests/generated/jclark_valid_sa_006.rs create mode 100644 tests/generated/jclark_valid_sa_007.rs create mode 100644 tests/generated/jclark_valid_sa_008.rs create mode 100644 tests/generated/jclark_valid_sa_009.rs create mode 100644 tests/generated/jclark_valid_sa_010.rs create mode 100644 tests/generated/jclark_valid_sa_011.rs create mode 100644 tests/generated/jclark_valid_sa_012.rs create mode 100644 tests/generated/jclark_valid_sa_013.rs create mode 100644 tests/generated/jclark_valid_sa_014.rs create mode 100644 tests/generated/jclark_valid_sa_015.rs create mode 100644 tests/generated/jclark_valid_sa_016.rs create mode 100644 tests/generated/jclark_valid_sa_017.rs create mode 100644 tests/input_data/exile_whitespace_normalization.metadata.json create mode 100644 tests/input_data/exile_whitespace_normalization.xml create mode 100644 tests/input_data/jclark_valid_sa_006.xml create mode 100644 tests/input_data/jclark_valid_sa_007.xml create mode 100644 tests/input_data/jclark_valid_sa_008.xml create mode 100644 tests/input_data/jclark_valid_sa_009.xml create mode 100644 tests/input_data/jclark_valid_sa_010.xml create mode 100644 tests/input_data/jclark_valid_sa_011.xml create mode 100644 tests/input_data/jclark_valid_sa_012.xml create mode 100644 tests/input_data/jclark_valid_sa_013.xml create mode 100644 tests/input_data/jclark_valid_sa_014.xml create mode 100644 tests/input_data/jclark_valid_sa_015.xml create mode 100644 tests/input_data/jclark_valid_sa_016.xml create mode 100644 tests/input_data/jclark_valid_sa_017.xml diff --git a/CHANGELOG.md b/CHANGELOG.md index 43f26b1..6fccf2a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,12 +8,14 @@ Currently we are using v0.0.x where every version can and will contain breaking ## [Unreleased] ## Added -- Nothing yet. +- Whitespace normalization of text nodes [#75] ### Changed - Make `Element` struct members private [#74] +- Improve processing instructions [#75] [#74]: https://github.com/webern/exile/pull/74 +[#75]: https://github.com/webern/exile/pull/75 ## [v0.0.2] - 2020-11-15 ### Added diff --git a/README.md b/README.md index ffd657e..a0cb152 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,7 @@ Currently supported: - Text Nodes - Processing Instructions - UTF-8 +- Whitespace Normalization Not Supported: - Entities @@ -22,7 +23,7 @@ Not Supported: - Doctypes - Comment Parsing - Other Encodings -- Whitespace Preservation +- Whitesace Preservation: All text nodes are treated as if whitespace `collapse` were in-effect. ## Example diff --git a/src/lib.rs b/src/lib.rs index 39e1eb1..0226584 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -12,6 +12,7 @@ Currently supported: - Text Nodes - Processing Instructions - UTF-8 +- Whitespace Normalization Not Supported: - Entities @@ -19,7 +20,7 @@ Not Supported: - Doctypes - Comment Parsing - Other Encodings -- Whitespace Preservation +- Whitesace Preservation: All text nodes are treated as if whitespace `collapse` were in-effect. # Example diff --git a/src/parser/element.rs b/src/parser/element.rs index d48c9cc..260680b 100644 --- a/src/parser/element.rs +++ b/src/parser/element.rs @@ -135,7 +135,9 @@ fn parse_children(iter: &mut Iter<'_>, parent: &mut Element) -> Result<()> { } } else { let text = parse_text(iter)?; - parent.add_node(Node::Text(text)); + if !text.is_empty() { + parent.add_node(Node::Text(text)); + } } // some parsing functions may return with the iter pointing to the last thing that was part // of their construct, while others might advance the iter to the next char *after* the diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 2f47e08..18b1821 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1,5 +1,6 @@ use std::collections::HashMap; use std::iter::Peekable; +use std::path::Path; use std::str::Chars; use crate::error::{display_char, parse_err, Error, ParseError, Result, ThrowSite, XmlSite}; @@ -7,7 +8,6 @@ use crate::parser::chars::{is_name_char, is_name_start_char}; use crate::parser::element::parse_element; use crate::parser::pi::{parse_pi, parse_pi_logic}; use crate::{Declaration, Document, Encoding, Misc, Version}; -use std::path::Path; mod chars; mod element; @@ -320,20 +320,24 @@ fn parse_document(iter: &mut Iter<'_>, document: &mut Document) -> Result<()> { // the values found into the mutable document parameter fn parse_declaration_pi(iter: &mut Iter<'_>, document: &mut Document) -> Result<()> { state_must_be_before_declaration(iter)?; - let (target, instructions) = parse_pi_logic(iter)?; - document.set_declaration(parse_declaration(&target, &instructions)?); + let (target, data) = parse_pi_logic(iter)?; + document.set_declaration(parse_declaration(&target, &data)?); Ok(()) } -fn parse_declaration(target: &str, instructions: &[String]) -> Result { +fn parse_declaration(target: &str, data: &str) -> Result { let mut declaration = Declaration::default(); if target != "xml" { return raise!("pi_data.target != xml"); } + let instructions: Vec<&str> = data.split_whitespace().collect(); if instructions.len() > 2 { - return raise!(""); + return raise!( + "only able to parse xml declarations that include version and encoding. \ + a string split of the xml processing instruction data yielded more than two items." + ); } - let map = parse_as_map(instructions)?; + let map = parse_as_map(&instructions)?; if let Some(&val) = map.get("version") { match val { "1.0" => { diff --git a/src/parser/pi.rs b/src/parser/pi.rs index 804df0d..c47c861 100644 --- a/src/parser/pi.rs +++ b/src/parser/pi.rs @@ -2,173 +2,137 @@ use crate::error::Result; use crate::parser::Iter; use crate::PI; -use super::chars::{is_name_char, is_name_start_char}; - -#[derive(Debug, Clone, Copy, PartialEq, Eq, Ord, PartialOrd)] -enum PIStatus { - BeforeTarget, - InsideTarget, - AfterTarget, - AfterInstruction, - QuestionMark, - Close, -} +/// The iter should be pointing to the opening `<` of a processing instruction. +pub(crate) fn parse_pi_logic(iter: &mut Iter<'_>) -> Result<(String, String)> { + expect!(iter, '<')?; + iter.advance_or_die()?; + expect!(iter, '?')?; + iter.advance_or_die()?; -impl Default for PIStatus { - fn default() -> Self { - PIStatus::BeforeTarget + // handle the special case + if iter.is('?') { + iter.advance_or_die()?; + expect!(iter, '>')?; + iter.advance(); + return Ok(("".into(), "".into())); } -} -#[derive(Debug, Clone, PartialEq, Eq, Ord, PartialOrd, Default)] -struct PIProcessor { - status: PIStatus, - target: String, - instructions: Vec, + let target = parse_pi_target(iter)?; + let mut data = String::new(); + loop { + if iter.is('?') && iter.peek_is('>') { + iter.advance_or_die()?; + iter.advance(); + break; + } + data.push(iter.st.c); + iter.advance_or_die()?; + } + Ok((target, data)) } -/// The iter should be pointing to the opening `<` of a processing instruction. -pub(crate) fn parse_pi_logic(iter: &mut Iter<'_>) -> Result<(String, Vec)> { - expect!(iter, '<')?; - iter.advance_or_die()?; - expect!(iter, '?')?; +/// Must be a valid name terminated by whitespace. +fn parse_pi_target(iter: &mut Iter<'_>) -> Result { + if !iter.is_name_start_char() { + return parse_err!(iter, "expected name start char, found '{}'", iter.st.c); + } + let mut name = String::new(); + name.push(iter.st.c); iter.advance_or_die()?; - let mut processor = PIProcessor::default(); loop { - take_processing_instruction_char(iter, &mut processor)?; - if processor.status == PIStatus::Close { + if iter.is_whitespace() { + iter.advance_or_die()?; + break; + } else if iter.is('?') { + // e.g. ) -> Result { - let (target, instructions) = parse_pi_logic(iter)?; - Ok(PI { - target, - instructions, - }) + let (target, data) = parse_pi_logic(iter)?; + Ok(PI { target, data }) } -fn take_processing_instruction_char( - iter: &mut Iter<'_>, - processor: &mut PIProcessor, -) -> Result<()> { - match processor.status { - PIStatus::BeforeTarget => { - if !is_name_start_char(iter.st.c) { - return parse_err!(iter); - } else { - processor.target.push(iter.st.c); - processor.status = PIStatus::InsideTarget; - } - } - PIStatus::InsideTarget => { - if iter.st.c.is_ascii_whitespace() { - processor.status = PIStatus::AfterTarget; - } else if !is_name_char(iter.st.c) { - return parse_err!(iter); - } else { - processor.target.push(iter.st.c); - } - } - PIStatus::AfterTarget | PIStatus::AfterInstruction => { - if iter.st.c == '?' { - processor.status = PIStatus::QuestionMark; - } else if !iter.is_whitespace() { - let instruction = parse_pi_string(iter)?; - processor.instructions.push(instruction); - if iter.is('?') { - processor.status = PIStatus::QuestionMark; - } else if !iter.is_whitespace() { - return parse_err!(iter); - } else { - processor.status = PIStatus::AfterInstruction; - } - } - } - PIStatus::QuestionMark => { - if iter.st.c == '>' { - processor.status = PIStatus::Close; - } else { - return parse_err!(iter); - } - } - PIStatus::Close => { /* done */ } - } - Ok(()) +//////////////////////////////////////////////////////////////////////////////////////////////////// + +#[test] +fn parse_pi_easy() { + let pi_str = ""; + let mut iter = Iter::new(pi_str).unwrap(); + let pi = parse_pi(&mut iter).unwrap(); + assert_eq!("target", pi.target); + assert_eq!("data", pi.data); + assert!(!iter.advance()); } -fn is_pi_close(iter: &mut Iter<'_>) -> Result { - Ok(iter.is('?') && iter.peek_or_die()? == '>') +#[test] +fn parse_pi_peasy() { + let pi_str = "X"; + let mut iter = Iter::new(pi_str).unwrap(); + let pi = parse_pi(&mut iter).unwrap(); + assert_eq!("target", pi.target); + assert_eq!("data", pi.data); + assert!(iter.is('X')); } -fn parse_pi_string(iter: &mut Iter<'_>) -> Result { - let mut buf = String::new(); - loop { - if iter.is_whitespace() || is_pi_close(iter)? { - return Ok(buf); - } else { - buf.push(iter.st.c); - } - if !iter.advance() { - break; - } - } - Ok(buf) +#[test] +fn parse_pi_funky_1() { + let pi_str = ""; + let mut iter = Iter::new(pi_str).unwrap(); + let parse_result = parse_pi(&mut iter); + assert!(parse_result.is_err()); } #[test] -fn parse_pi_string_test() { - struct TestCase { - input: &'static str, - want: &'static str, - iter: char, - } - let test_cases = vec![ - TestCase { - input: "bloop bleep", - want: "bloop", - iter: ' ', - }, - TestCase { - input: "bloop?bleep", - want: "bloop?bleep", - iter: 'p', - }, - TestCase { - input: "bloop?>bleep", - want: "bloop", - iter: '?', - }, - TestCase { - input: "beer๐Ÿบ๐Ÿบ๐Ÿบ ๐Ÿบ๐Ÿบ?>", - want: "beer๐Ÿบ๐Ÿบ๐Ÿบ", - iter: ' ', - }, - TestCase { - input: "beer๐Ÿบ๐Ÿบ๐Ÿบ๐Ÿบ๐Ÿบ", - want: "beer๐Ÿบ๐Ÿบ๐Ÿบ๐Ÿบ๐Ÿบ", - iter: '๐Ÿบ', - }, - ]; - for test_case in &test_cases { - let mut iter = Iter::new(test_case.input).unwrap(); - let got = parse_pi_string(&mut iter).unwrap(); - assert_eq!( - got.as_str(), - test_case.want, - "parse_pi_string(\"{}\") returned '{}', expected '{}'", - test_case.input, - got.as_str(), - test_case.want - ); - assert_eq!( - iter.st.c, test_case.iter, - "expected iter to be pointing at '{}', got '{}'", - test_case.iter, iter.st.c - ); - } +fn parse_pi_funky_2() { + let pi_str = ""; + let mut iter = Iter::new(pi_str).unwrap(); + let pi = parse_pi(&mut iter).unwrap(); + assert_eq!("", pi.target); + assert!(pi.data.is_empty()); +} + +#[test] +fn parse_pi_funky_3() { + // established as not-well-formed by jclark_not_wf_sa_003.xml + let pi_str = ""; + let mut iter = Iter::new(pi_str).unwrap(); + let parse_result = parse_pi(&mut iter); + assert!(parse_result.is_err()); +} + +#[test] +fn parse_pi_funky_4() { + let pi_str = "< ? ? >"; + let mut iter = Iter::new(pi_str).unwrap(); + let parse_result = parse_pi(&mut iter); + assert!(parse_result.is_err()); +} + +#[test] +fn parse_pi_funky_5() { + let pi_str = ""; + let mut iter = Iter::new(pi_str).unwrap(); + let pi = parse_pi(&mut iter).unwrap(); + assert_eq!("bones", pi.target); + assert!(pi.data.is_empty()); +} + +#[test] +fn parse_pi_funky_6() { + // this is from jclark_valid_sa_017.xml + let pi_str = " "; + let mut iter = Iter::new(pi_str).unwrap(); + let pi = parse_pi(&mut iter).unwrap(); + assert_eq!("pi", pi.target); + assert_eq!("some data ? > , string_type: StringType) -> Result { + let mut space_buffer = None; + let mut is_non_white_reached = false; let mut result = String::new(); while !is_end_char(iter, string_type) { if iter.st.c == '&' { let c = parse_escape(iter)?; - result.push(c); + if is_whitespace(c) { + if is_non_white_reached { + space_buffer = Some(' '); + } + } else { + is_non_white_reached = true; + result.push(c); + } } else if is_forbidden(iter, string_type) { return parse_err!(iter, "forbidden character in {:?} string", string_type); + } else if iter.is_whitespace() { + if is_non_white_reached { + space_buffer = Some(' '); + } } else { + is_non_white_reached = true; + if let Some(space) = space_buffer { + result.push(space); + space_buffer = None; + } result.push(iter.st.c); } + if !iter.advance() { return parse_err!( iter, diff --git a/src/xdoc/chars.rs b/src/xdoc/chars.rs index f39f5e8..32e87af 100644 --- a/src/xdoc/chars.rs +++ b/src/xdoc/chars.rs @@ -2,13 +2,3 @@ pub fn is_whitespace(c: char) -> bool { c == ' ' || c == '\t' || c == '\r' || c == '\n' } - -/// contains `(#x20 | #x9 | #xD | #xA)`, i.e. space, tab, carriage return, or line feed. -pub fn contains_whitespace>(s: S) -> bool { - for c in s.as_ref().chars() { - if is_whitespace(c) { - return true; - } - } - false -} diff --git a/src/xdoc/mod.rs b/src/xdoc/mod.rs index 6844106..5f36233 100644 --- a/src/xdoc/mod.rs +++ b/src/xdoc/mod.rs @@ -11,7 +11,7 @@ The public concepts in `xdoc` are re-exported by `exile`. #![warn(missing_docs)] -pub use chars::{contains_whitespace, is_whitespace}; +pub use chars::is_whitespace; pub use doc::Document; pub use doc::{Declaration, Encoding, Version}; pub use element::Element; diff --git a/src/xdoc/pi.rs b/src/xdoc/pi.rs index 63e282c..12e17ae 100644 --- a/src/xdoc/pi.rs +++ b/src/xdoc/pi.rs @@ -3,7 +3,7 @@ use std::fmt::{Display, Formatter}; use std::io::{Cursor, Write}; use crate::xdoc::error::Result; -use crate::xdoc::{contains_whitespace, WriteOpts}; +use crate::xdoc::WriteOpts; /// Represents a Processing Instruction (PI) in an XML document. /// @@ -29,8 +29,8 @@ use crate::xdoc::{contains_whitespace, WriteOpts}; pub struct PI { /// The processing instruction target. pub target: String, - /// The processing instructions. - pub instructions: Vec, + /// The processing instruction data. + pub data: String, } impl PI { @@ -44,8 +44,8 @@ impl PI { if let Err(e) = write!(writer, " Result<()> { // TODO - check that the name is compliant - if self.target.is_empty() { - return raise!("Empty processing instruction target."); - } - for s in &self.instructions { - if s.contains("?>") { - return raise!("Processing instruction contains '?>'."); - } - if contains_whitespace(s) { - return raise!("Processing instruction contains whitespace."); - } + if self.data.contains("?>") { + return raise!("Processing instruction data contains '?>'."); } Ok(()) } @@ -91,8 +83,7 @@ impl Display for PI { fn pi_test_simple() { let mut pi = PI::default(); pi.target = "thetarget".into(); - pi.instructions.push("dat1".into()); - pi.instructions.push("dat2".into()); + pi.data = "dat1 dat2".into(); let got = pi.to_string(); let want = ""; assert_eq!(got, want); @@ -111,7 +102,7 @@ fn pi_test_empty() { fn pi_test_bad() { let mut pi = PI::default(); pi.target = "x".into(); - pi.instructions.push("da?>t1".into()); + pi.data = "da?>t1".into(); let got = pi.to_string(); let want = ""; assert_eq!(got, want); diff --git a/testgen/xmltestgen/src/main/java/com/matthewjamesbriggs/xmltestgen/ConfTest.java b/testgen/xmltestgen/src/main/java/com/matthewjamesbriggs/xmltestgen/ConfTest.java index f614fed..7120496 100644 --- a/testgen/xmltestgen/src/main/java/com/matthewjamesbriggs/xmltestgen/ConfTest.java +++ b/testgen/xmltestgen/src/main/java/com/matthewjamesbriggs/xmltestgen/ConfTest.java @@ -1,7 +1,6 @@ package com.matthewjamesbriggs.xmltestgen; import lombok.AllArgsConstructor; -import org.junit.Test; import org.w3c.dom.Element; import lombok.Getter; @@ -25,7 +24,7 @@ @Getter private final String sections; @Getter - private final boolean namespace; + private final XNamespaces namespaces; @Getter private final ConfType confType; @Getter @@ -44,7 +43,11 @@ id = X.getRequiredAttribute(element, "ID"); String entitiesStr = X.getOptionalAttribute(element, "ENTITIES"); // both, general, none, parameter, entities = Entities.fromString(entitiesStr); - namespace = X.getOptionalAttribute(element, "NAMESPACE").equals("yes"); + if (X.getOptionalAttribute(element, "NAMESPACE").equals("yes")) { + namespaces = XNamespaces.ON; + } else { + namespaces = XNamespaces.OFF; + } output = X.getOptionalAttribute(element, "OUTPUT"); String recommendationStr = X.getOptionalAttribute(element, "RECOMMENDATION"); // NS1.0, XML1.0, XML1.0-errata2e, XML1.1 @@ -72,7 +75,7 @@ public String toString() { entities.toString(), path.toString(), output, - namespace, + namespaces, confType.toString(), xmlVersion.toString(), sections); diff --git a/testgen/xmltestgen/src/main/java/com/matthewjamesbriggs/xmltestgen/ConfTestGenerator.java b/testgen/xmltestgen/src/main/java/com/matthewjamesbriggs/xmltestgen/ConfTestGenerator.java index 667b997..a5cc537 100644 --- a/testgen/xmltestgen/src/main/java/com/matthewjamesbriggs/xmltestgen/ConfTestGenerator.java +++ b/testgen/xmltestgen/src/main/java/com/matthewjamesbriggs/xmltestgen/ConfTestGenerator.java @@ -18,7 +18,7 @@ class ConfTestGenerator { /// The maximum number of W3C tests of ConfType.Valid that will be generated. - private static final int MAX_VALID = 5; + private static final int MAX_VALID = 17; /// The maximum number of W3C tests of ConfType.NotWellFormed that will be generated. private static final int MAX_NOT_WELL_FORMED = 5; /// The tests directory, e.g. exile_repo/exile/tests @@ -78,11 +78,12 @@ XmlVersion getVersion() throws TestGenException { } } + // TODO - get rid of this @AllArgsConstructor private static class PI { @Getter private final String target; @Getter - private final List instructions; + private final String data; } /** @@ -352,7 +353,7 @@ private static void writeExpectedFunction(ConfTest t, OutputStreamWriter os) throws TestGenException { F.writeln(os, "fn expected() -> Document {"); - Document doc = X.loadShallow(t.getPath().toFile()); + Document doc = X.loadShallow(t.getPath().toFile(), t.getNamespaces()); F.writeln(os, "let mut doc = Document::new();"); writeExpectedXmlDeclaration(foundDecl, os); List prelude = findPrelude(doc); @@ -399,25 +400,13 @@ private static void writeExpectedPostlude(List postlude, OutputStreamWrite private static PI parseProcessingInstruction(ProcessingInstruction pi) throws TestGenException { String target = pi.getTarget(); String data = pi.getData(); - String[] split = data.split("\\s"); - List instructions = new ArrayList<>(); - for (String s : split) { - String trimmed = s.trim(); - if (!trimmed.isEmpty()) { - instructions.add(trimmed); - } - } - return new PI(target, instructions); + return new PI(target, data); } private static void constructProcessingInstruction(PI pi, OutputStreamWriter os) throws TestGenException { F.writeln(os, "exile::PI {"); - F.writeln(os, "target: r#\"%s\"#.into(),", pi.getTarget()); - F.writeln(os, "instructions: vec!["); - for (String instruction : pi.getInstructions()) { - F.writeln(os, "r#\"%s\"#.to_owned(),", instruction); - } - F.writeln(os, "],"); + F.writeln(os, "target: %s.into(),", rustStringLiteral(pi.getTarget())); + F.writeln(os, "data: %s.into(),", rustStringLiteral(pi.getData())); F.writeln(os, "}"); } @@ -495,17 +484,94 @@ private static void writeTextChild(String parentVariableName, ConfTest t, Document doc, OutputStreamWriter os) throws TestGenException { - // TODO - if we start to support ignorable whitespace nodes or preserve directives, this will not work - if (child.isElementContentWhitespace()) { - return; + XText xtext = new XText(child); + // HACK: this is quite difficult. The DOM presents us with 'ignorable whitespace' but does not mark it as such + // unless the parser is in validation mode. In the presence of a doctype, when an element is specified as + // containing other elements and not PCDATA, then the DOM marks isElementContentWhitespace true. But sometimes + // we have no doctype and we essentially have to guess. + if (xtext.getDocType() == null && xtext.getData().trim().length() == 0) { + // Because there is no doctype and the text is nothing but whitespace, we are assuming that this is just the + // newlines and whitespace pretty-printing between elements. Not something we want to add to the exile DOM. + System.out.println("skipping what is likely element whitespace"); + } else if (!xtext.isElementContentWhitespace()) { + String data = xtext.getData(); + // this is a little bit scary. the exile parser will always treat whitespace as 'replace', which is what + // many(?) parsers do. but the Java parser is more correct than that. it only does so when validating. so + // here we hand-rolled the replacing and collapsing algs to view the string as exile intends to. + data = normalizeWhitespace(data); + if (data.isEmpty()) { + return; + } + data = rustStringLiteral(data); + F.writeln(os, "%s.add_text(%s);", parentVariableName, data); } - // HACK - this is a super-funky way of figuring out whether the text node is ignoreable whitespace - if (child.getWholeText().trim().isEmpty()) { - return; + } + + /** + * All occurrences of #x9 (tab), #xA (line feed) and #xD (carriage return) are replaced with #x20 (space). + */ + private static boolean isWhite(int c) { + return (c == ' ') || (c == '\t') || (c == '\n') || (c == '\r'); + } + + /** + * Subsequent to the replacements specified above under replace, contiguous sequences of #x20s are collapsed to a + * single #x20, and initial and/or final #x20s are deleted. + */ + private static String normalizeWhitespace(String s) { + boolean hasNonWhite = false; + boolean spaceBuffer = false; + StringBuilder result = new StringBuilder(s.length()); + int l = s.length(); + for (int i = 0; i < l; i++) { + int c = s.codePointAt(i); + boolean isW = isWhite(c); + if (isW) { + if (!hasNonWhite) { + continue; + } + if (!spaceBuffer) { + spaceBuffer = true; + } + } else { + hasNonWhite = true; + if (spaceBuffer) { + result.append(' '); + spaceBuffer = false; + } + result.append((char) c); + } } - // TODO - this probably not work once we get into more complicated test cases (e.g. CData and entities, etc) - String text = child.getWholeText(); - F.writeln(os, "%s.add_text(r#\"%s\"#);", parentVariableName, text); + return result.toString(); + } + + private static String rustStringLiteral(String s) { + if (s.contains("\r") || + s.contains("\t") || + s.contains("\b") || + s.contains("\n") || + s.contains("\f") || + s.contains("\u00a0")) { + return String.format("\"%s\"", rustEscape(s)); + } else { + if (s.contains("\"#")) { + return String.format("r###\"%s\"###", s); + } else { + return String.format("r#\"%s\"#", s); + } + } + } + + private static String rustEscape(String s) { + s = s.replaceAll("\\\\", "\\\\"); + s = s.replaceAll("\"", "\\\""); + s = s.replaceAll("\n", "\\\\n"); + s = s.replaceAll("\r", "\\\\r"); + s = s.replaceAll("\t", "\\\\t"); + s = s.replaceAll("\b", "\\\\b"); + s = s.replaceAll("\f", "\\\\f"); + s = s.replaceAll("\u00a0", "\\\\u{00a0}"); + return s; } private static void writeElementChild(String parentVariableName, diff --git a/testgen/xmltestgen/src/main/java/com/matthewjamesbriggs/xmltestgen/ConfTestParser.java b/testgen/xmltestgen/src/main/java/com/matthewjamesbriggs/xmltestgen/ConfTestParser.java index 14e06ea..9c172c8 100644 --- a/testgen/xmltestgen/src/main/java/com/matthewjamesbriggs/xmltestgen/ConfTestParser.java +++ b/testgen/xmltestgen/src/main/java/com/matthewjamesbriggs/xmltestgen/ConfTestParser.java @@ -17,7 +17,7 @@ class ConfTestParser { static List parse(String w3cXmlFilepath) throws TestGenException { - Document doc = X.loadComplete(new File(w3cXmlFilepath)); + Document doc = X.loadComplete(new File(w3cXmlFilepath), XNamespaces.OFF); return parseDocument(doc); } @@ -170,7 +170,7 @@ private static ConfTest makeExileConfTest(ExileFiles location) throws TestGenExc String id = location.getCoreName(); Recommendation recommendation = metadata.getRecommendation(); final String sections = "N/A"; - boolean namespace = ExileTestMetadata.getNamespace(); + XNamespaces namespaces = XNamespaces.fromBoolean(ExileTestMetadata.getNamespace()); ConfType confType = metadata.getSyntax().getConfType(); XmlVersion xmlVersion = metadata.getXmlVersion(); String prefix = confTestCases.getPrefix(); @@ -189,7 +189,7 @@ private static ConfTest makeExileConfTest(ExileFiles location) throws TestGenExc null, recommendation, sections, - namespace, + namespaces, confType, xmlVersion, prefix, diff --git a/testgen/xmltestgen/src/main/java/com/matthewjamesbriggs/xmltestgen/X.java b/testgen/xmltestgen/src/main/java/com/matthewjamesbriggs/xmltestgen/X.java index 0f55d4d..5feae32 100644 --- a/testgen/xmltestgen/src/main/java/com/matthewjamesbriggs/xmltestgen/X.java +++ b/testgen/xmltestgen/src/main/java/com/matthewjamesbriggs/xmltestgen/X.java @@ -70,27 +70,33 @@ static String getOptionalAttribute(Element element, String key) { * @return * @throws TestGenException */ - static Document loadComplete(File file) throws TestGenException { - return load(file, true); + static Document loadComplete(File file, XNamespaces namespaces) throws TestGenException { + return load(file, XEntityExpansion.ON, namespaces); } - static Document loadShallow(File file) throws TestGenException { - return load(file, false); + static Document loadShallow(File file, XNamespaces namespaces) throws TestGenException { + return load(file, XEntityExpansion.OFF, namespaces); } - private static Document load(File file, boolean expandEntities) throws TestGenException { + private static Document load(File file, + XEntityExpansion entityEpansion, + XNamespaces namespace) throws TestGenException { file = F.canonicalize(file); F.checkFile(file); String uri = file.toPath().toUri().toString(); try { DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); - if (!expandEntities) { + if (entityEpansion == XEntityExpansion.OFF) { // https://bugs.openjdk.java.net/browse/JDK-8217937 factory.setExpandEntityReferences(false); } + factory.setNamespaceAware(namespace == XNamespaces.ON); + factory.setIgnoringElementContentWhitespace(true); DocumentBuilder builder = factory.newDocumentBuilder(); Document document = builder.parse(uri); + // TODO - is this working? + document.normalizeDocument(); return document; } catch (FactoryConfigurationError e) { throw new TestGenException("unable to get a document builder factory", e); diff --git a/testgen/xmltestgen/src/main/java/com/matthewjamesbriggs/xmltestgen/XEntityExpansion.java b/testgen/xmltestgen/src/main/java/com/matthewjamesbriggs/xmltestgen/XEntityExpansion.java new file mode 100644 index 0000000..7d93472 --- /dev/null +++ b/testgen/xmltestgen/src/main/java/com/matthewjamesbriggs/xmltestgen/XEntityExpansion.java @@ -0,0 +1,5 @@ +package com.matthewjamesbriggs.xmltestgen; + +public enum XEntityExpansion { + ON, OFF +} diff --git a/testgen/xmltestgen/src/main/java/com/matthewjamesbriggs/xmltestgen/XNamespaces.java b/testgen/xmltestgen/src/main/java/com/matthewjamesbriggs/xmltestgen/XNamespaces.java new file mode 100644 index 0000000..90974b7 --- /dev/null +++ b/testgen/xmltestgen/src/main/java/com/matthewjamesbriggs/xmltestgen/XNamespaces.java @@ -0,0 +1,13 @@ +package com.matthewjamesbriggs.xmltestgen; + +public enum XNamespaces { + ON, OFF; + + public static XNamespaces fromBoolean(boolean isNamespacesOn) { + if (isNamespacesOn) { + return ON; + } else { + return OFF; + } + } +} diff --git a/testgen/xmltestgen/src/main/java/com/matthewjamesbriggs/xmltestgen/XText.java b/testgen/xmltestgen/src/main/java/com/matthewjamesbriggs/xmltestgen/XText.java new file mode 100644 index 0000000..c6cd107 --- /dev/null +++ b/testgen/xmltestgen/src/main/java/com/matthewjamesbriggs/xmltestgen/XText.java @@ -0,0 +1,58 @@ +package com.matthewjamesbriggs.xmltestgen; + +import lombok.Getter; +import org.w3c.dom.DocumentType; +import org.w3c.dom.Node; +import org.w3c.dom.Text; + +/** + * Realizes all of the data fields of an XML text node so that we can see the values. + */ +public class XText { + @Getter + private final String parentNodeName; + @Getter + private final String parentLocalName; + @Getter + private final XType parentNodeType; + @Getter + private final String data; + @Getter + private final String wholeText; + @Getter + private final String textContent; + @Getter + private final int numChildNodes; + @Getter + private final int length; + @Getter + private final XType xtype; + @Getter + private final String nodeValue; + @Getter + private final boolean isElementContentWhitespace; + @Getter + private final DocumentType docType; + + public XText(Text text) { + Node parent = text.getParentNode(); + if (parent != null) { + parentNodeName = parent.getNodeName(); + parentLocalName = parent.getLocalName(); + parentNodeType = XType.fromNode(parent); + } else { + parentNodeName = ""; + parentLocalName = ""; + parentNodeType = XType.Unknown; + } + data = text.getData(); + wholeText = text.getWholeText(); + textContent = text.getTextContent(); + numChildNodes = text.getChildNodes().getLength(); + length = text.getLength(); + xtype = XType.fromNode(text); + nodeValue = text.getNodeValue(); + isElementContentWhitespace = text.isElementContentWhitespace(); + docType = text.getOwnerDocument().getDoctype(); + } +} diff --git a/tests/generated/exile_doctypes_comments_pis.rs b/tests/generated/exile_doctypes_comments_pis.rs index 489593a..d54362b 100644 --- a/tests/generated/exile_doctypes_comments_pis.rs +++ b/tests/generated/exile_doctypes_comments_pis.rs @@ -20,33 +20,33 @@ fn expected() -> Document { // TODO - write doctype information doc.push_prolog_misc(exile::Misc::PI(exile::PI { target: r#"pi"#.into(), - instructions: vec![r#"before"#.to_owned(), r#"doctype"#.to_owned()], + data: r#"before doctype "#.into(), })); doc.push_prolog_misc(exile::Misc::PI(exile::PI { target: r#"pi"#.into(), - instructions: vec![r#"after"#.to_owned(), r#"doctype"#.to_owned()], + data: r#"after doctype "#.into(), })); let root = doc.root_mut(); root.set_name(r#"note"#); - let gen1n3 = root.add_new_child().unwrap(); - gen1n3.set_name(r#"to"#); - gen1n3.add_text(r#"Tove"#); - let gen1n5 = root.add_new_child().unwrap(); - gen1n5.set_name(r#"from"#); - gen1n5.add_text(r#"Jani"#); - gen1n5.add_pi(exile::PI { + let gen1n1 = root.add_new_child().unwrap(); + gen1n1.set_name(r#"to"#); + gen1n1.add_text(r#"Tove"#); + let gen1n2 = root.add_new_child().unwrap(); + gen1n2.set_name(r#"from"#); + gen1n2.add_text(r#"Jani"#); + gen1n2.add_pi(exile::PI { target: r#"pi"#.into(), - instructions: vec![r#"in"#.to_owned(), r#"element"#.to_owned()], + data: r#"in element "#.into(), }); - let gen1n7 = root.add_new_child().unwrap(); - gen1n7.set_name(r#"heading"#); - gen1n7.add_text(r#"Reminder"#); - let gen1n9 = root.add_new_child().unwrap(); - gen1n9.set_name(r#"body"#); - gen1n9.add_text(r#"Don't forget me this weekend"#); + let gen1n3 = root.add_new_child().unwrap(); + gen1n3.set_name(r#"heading"#); + gen1n3.add_text(r#"Reminder"#); + let gen1n4 = root.add_new_child().unwrap(); + gen1n4.set_name(r#"body"#); + gen1n4.add_text(r#"Don't forget me this weekend"#); doc.push_epilog_misc(exile::Misc::PI(exile::PI { target: r#"pi"#.into(), - instructions: vec![r#"at"#.to_owned(), r#"the"#.to_owned(), r#"end"#.to_owned()], + data: r#"at the end "#.into(), })); doc } diff --git a/tests/generated/exile_pi.rs b/tests/generated/exile_pi.rs index 1ec477d..4e4e691 100644 --- a/tests/generated/exile_pi.rs +++ b/tests/generated/exile_pi.rs @@ -19,23 +19,23 @@ fn expected() -> Document { }); doc.push_prolog_misc(exile::Misc::PI(exile::PI { target: r#"a"#.into(), - instructions: vec![r#"b"#.to_owned()], + data: r#"b"#.into(), })); let root = doc.root_mut(); root.set_name(r#"c"#); root.add_pi(exile::PI { target: r#"d"#.into(), - instructions: vec![r#"e"#.to_owned()], + data: r#"e"#.into(), }); let gen1n3 = root.add_new_child().unwrap(); gen1n3.set_name(r#"f"#); root.add_pi(exile::PI { target: r#"g"#.into(), - instructions: vec![r#"h"#.to_owned()], + data: r#"h"#.into(), }); doc.push_epilog_misc(exile::Misc::PI(exile::PI { target: r#"i"#.into(), - instructions: vec![r#"j"#.to_owned()], + data: r#"j"#.into(), })); doc } diff --git a/tests/generated/exile_whitespace_normalization.rs b/tests/generated/exile_whitespace_normalization.rs new file mode 100644 index 0000000..02dfc20 --- /dev/null +++ b/tests/generated/exile_whitespace_normalization.rs @@ -0,0 +1,41 @@ +// generated file, do not edit + +use crate::test_utils::run_parse_test; +use exile::{Declaration, Document, Encoding, Version}; + +const INPUT_FILE: &str = "exile_whitespace_normalization.xml"; + +#[test] +/// figure out how we handle whitespace in text elements +fn whitespace_normalization_parse() { + run_parse_test(INPUT_FILE, &expected()); +} + +fn expected() -> Document { + let mut doc = Document::new(); + doc.set_declaration(Declaration { + version: Some(Version::V10), + encoding: Some(Encoding::Utf8), + }); + let root = doc.root_mut(); + root.set_name(r#"root"#); + let gen1n1 = root.add_new_child().unwrap(); + gen1n1.set_name(r#"text"#); + gen1n1.add_text(r#"Hello World"#); + let gen1n3 = root.add_new_child().unwrap(); + gen1n3.set_name(r#"text"#); + gen1n3.add_text(r#"Hello World"#); + let gen1n5 = root.add_new_child().unwrap(); + gen1n5.set_name(r#"text"#); + gen1n5.add_text("\u{00a0}\u{00a0}Hello\u{00a0}\u{00a0}World\u{00a0}\u{00a0}"); + let gen1n7 = root.add_new_child().unwrap(); + gen1n7.set_name(r#"text"#); + gen1n7.add_text(r#"Hello World"#); + let gen1n9 = root.add_new_child().unwrap(); + gen1n9.set_name(r#"text"#); + gen1n9.add_text(r#"Hello World"#); + let gen1n11 = root.add_new_child().unwrap(); + gen1n11.set_name(r#"text"#); + gen1n11.add_text("\u{00a0}"); + doc +} diff --git a/tests/generated/jclark_valid_sa_006.rs b/tests/generated/jclark_valid_sa_006.rs new file mode 100644 index 0000000..bf473f7 --- /dev/null +++ b/tests/generated/jclark_valid_sa_006.rs @@ -0,0 +1,25 @@ +// generated file, do not edit + +use crate::test_utils::run_parse_test; +use exile::{Declaration, Document}; + +const INPUT_FILE: &str = "jclark_valid_sa_006.xml"; + +#[test] +/// A valid XML file from the W3C conformance test suite: valid-sa-006 +fn valid_sa_006_parse() { + run_parse_test(INPUT_FILE, &expected()); +} + +fn expected() -> Document { + let mut doc = Document::new(); + doc.set_declaration(Declaration { + version: None, + encoding: None, + }); + // TODO - write doctype information + let root = doc.root_mut(); + root.set_name(r#"doc"#); + root.add_attribute(r#"a1"#, r#"v1"#); + doc +} diff --git a/tests/generated/jclark_valid_sa_007.rs b/tests/generated/jclark_valid_sa_007.rs new file mode 100644 index 0000000..b22aa64 --- /dev/null +++ b/tests/generated/jclark_valid_sa_007.rs @@ -0,0 +1,24 @@ +// generated file, do not edit + +use crate::test_utils::run_parse_test; +use exile::{Declaration, Document}; + +const INPUT_FILE: &str = "jclark_valid_sa_007.xml"; + +#[test] +/// A valid XML file from the W3C conformance test suite: valid-sa-007 +fn valid_sa_007_parse() { + run_parse_test(INPUT_FILE, &expected()); +} + +fn expected() -> Document { + let mut doc = Document::new(); + doc.set_declaration(Declaration { + version: None, + encoding: None, + }); + // TODO - write doctype information + let root = doc.root_mut(); + root.set_name(r#"doc"#); + doc +} diff --git a/tests/generated/jclark_valid_sa_008.rs b/tests/generated/jclark_valid_sa_008.rs new file mode 100644 index 0000000..f7e95a3 --- /dev/null +++ b/tests/generated/jclark_valid_sa_008.rs @@ -0,0 +1,25 @@ +// generated file, do not edit + +use crate::test_utils::run_parse_test; +use exile::{Declaration, Document}; + +const INPUT_FILE: &str = "jclark_valid_sa_008.xml"; + +#[test] +/// A valid XML file from the W3C conformance test suite: valid-sa-008 +fn valid_sa_008_parse() { + run_parse_test(INPUT_FILE, &expected()); +} + +fn expected() -> Document { + let mut doc = Document::new(); + doc.set_declaration(Declaration { + version: None, + encoding: None, + }); + // TODO - write doctype information + let root = doc.root_mut(); + root.set_name(r#"doc"#); + root.add_text(r#"&<>"'"#); + doc +} diff --git a/tests/generated/jclark_valid_sa_009.rs b/tests/generated/jclark_valid_sa_009.rs new file mode 100644 index 0000000..9e36650 --- /dev/null +++ b/tests/generated/jclark_valid_sa_009.rs @@ -0,0 +1,24 @@ +// generated file, do not edit + +use crate::test_utils::run_parse_test; +use exile::{Declaration, Document}; + +const INPUT_FILE: &str = "jclark_valid_sa_009.xml"; + +#[test] +/// A valid XML file from the W3C conformance test suite: valid-sa-009 +fn valid_sa_009_parse() { + run_parse_test(INPUT_FILE, &expected()); +} + +fn expected() -> Document { + let mut doc = Document::new(); + doc.set_declaration(Declaration { + version: None, + encoding: None, + }); + // TODO - write doctype information + let root = doc.root_mut(); + root.set_name(r#"doc"#); + doc +} diff --git a/tests/generated/jclark_valid_sa_010.rs b/tests/generated/jclark_valid_sa_010.rs new file mode 100644 index 0000000..47e995b --- /dev/null +++ b/tests/generated/jclark_valid_sa_010.rs @@ -0,0 +1,25 @@ +// generated file, do not edit + +use crate::test_utils::run_parse_test; +use exile::{Declaration, Document}; + +const INPUT_FILE: &str = "jclark_valid_sa_010.xml"; + +#[test] +/// A valid XML file from the W3C conformance test suite: valid-sa-010 +fn valid_sa_010_parse() { + run_parse_test(INPUT_FILE, &expected()); +} + +fn expected() -> Document { + let mut doc = Document::new(); + doc.set_declaration(Declaration { + version: None, + encoding: None, + }); + // TODO - write doctype information + let root = doc.root_mut(); + root.set_name(r#"doc"#); + root.add_attribute(r#"a1"#, r#"v1"#); + doc +} diff --git a/tests/generated/jclark_valid_sa_011.rs b/tests/generated/jclark_valid_sa_011.rs new file mode 100644 index 0000000..d05580a --- /dev/null +++ b/tests/generated/jclark_valid_sa_011.rs @@ -0,0 +1,26 @@ +// generated file, do not edit + +use crate::test_utils::run_parse_test; +use exile::{Declaration, Document}; + +const INPUT_FILE: &str = "jclark_valid_sa_011.xml"; + +#[test] +/// A valid XML file from the W3C conformance test suite: valid-sa-011 +fn valid_sa_011_parse() { + run_parse_test(INPUT_FILE, &expected()); +} + +fn expected() -> Document { + let mut doc = Document::new(); + doc.set_declaration(Declaration { + version: None, + encoding: None, + }); + // TODO - write doctype information + let root = doc.root_mut(); + root.set_name(r#"doc"#); + root.add_attribute(r#"a1"#, r#"v1"#); + root.add_attribute(r#"a2"#, r#"v2"#); + doc +} diff --git a/tests/generated/jclark_valid_sa_012.rs b/tests/generated/jclark_valid_sa_012.rs new file mode 100644 index 0000000..558e14e --- /dev/null +++ b/tests/generated/jclark_valid_sa_012.rs @@ -0,0 +1,25 @@ +// generated file, do not edit + +use crate::test_utils::run_parse_test; +use exile::{Declaration, Document}; + +const INPUT_FILE: &str = "jclark_valid_sa_012.xml"; + +#[test] +/// A valid XML file from the W3C conformance test suite: valid-sa-012 +fn valid_sa_012_parse() { + run_parse_test(INPUT_FILE, &expected()); +} + +fn expected() -> Document { + let mut doc = Document::new(); + doc.set_declaration(Declaration { + version: None, + encoding: None, + }); + // TODO - write doctype information + let root = doc.root_mut(); + root.set_name(r#"doc"#); + root.add_attribute(r#":"#, r#"v1"#); + doc +} diff --git a/tests/generated/jclark_valid_sa_013.rs b/tests/generated/jclark_valid_sa_013.rs new file mode 100644 index 0000000..9cb1c7d --- /dev/null +++ b/tests/generated/jclark_valid_sa_013.rs @@ -0,0 +1,25 @@ +// generated file, do not edit + +use crate::test_utils::run_parse_test; +use exile::{Declaration, Document}; + +const INPUT_FILE: &str = "jclark_valid_sa_013.xml"; + +#[test] +/// A valid XML file from the W3C conformance test suite: valid-sa-013 +fn valid_sa_013_parse() { + run_parse_test(INPUT_FILE, &expected()); +} + +fn expected() -> Document { + let mut doc = Document::new(); + doc.set_declaration(Declaration { + version: None, + encoding: None, + }); + // TODO - write doctype information + let root = doc.root_mut(); + root.set_name(r#"doc"#); + root.add_attribute(r#"_.-0123456789"#, r#"v1"#); + doc +} diff --git a/tests/generated/jclark_valid_sa_014.rs b/tests/generated/jclark_valid_sa_014.rs new file mode 100644 index 0000000..3786d68 --- /dev/null +++ b/tests/generated/jclark_valid_sa_014.rs @@ -0,0 +1,25 @@ +// generated file, do not edit + +use crate::test_utils::run_parse_test; +use exile::{Declaration, Document}; + +const INPUT_FILE: &str = "jclark_valid_sa_014.xml"; + +#[test] +/// A valid XML file from the W3C conformance test suite: valid-sa-014 +fn valid_sa_014_parse() { + run_parse_test(INPUT_FILE, &expected()); +} + +fn expected() -> Document { + let mut doc = Document::new(); + doc.set_declaration(Declaration { + version: None, + encoding: None, + }); + // TODO - write doctype information + let root = doc.root_mut(); + root.set_name(r#"doc"#); + root.add_attribute(r#"abcdefghijklmnopqrstuvwxyz"#, r#"v1"#); + doc +} diff --git a/tests/generated/jclark_valid_sa_015.rs b/tests/generated/jclark_valid_sa_015.rs new file mode 100644 index 0000000..3e788d7 --- /dev/null +++ b/tests/generated/jclark_valid_sa_015.rs @@ -0,0 +1,25 @@ +// generated file, do not edit + +use crate::test_utils::run_parse_test; +use exile::{Declaration, Document}; + +const INPUT_FILE: &str = "jclark_valid_sa_015.xml"; + +#[test] +/// A valid XML file from the W3C conformance test suite: valid-sa-015 +fn valid_sa_015_parse() { + run_parse_test(INPUT_FILE, &expected()); +} + +fn expected() -> Document { + let mut doc = Document::new(); + doc.set_declaration(Declaration { + version: None, + encoding: None, + }); + // TODO - write doctype information + let root = doc.root_mut(); + root.set_name(r#"doc"#); + root.add_attribute(r#"ABCDEFGHIJKLMNOPQRSTUVWXYZ"#, r#"v1"#); + doc +} diff --git a/tests/generated/jclark_valid_sa_016.rs b/tests/generated/jclark_valid_sa_016.rs new file mode 100644 index 0000000..47906fa --- /dev/null +++ b/tests/generated/jclark_valid_sa_016.rs @@ -0,0 +1,28 @@ +// generated file, do not edit + +use crate::test_utils::run_parse_test; +use exile::{Declaration, Document}; + +const INPUT_FILE: &str = "jclark_valid_sa_016.xml"; + +#[test] +/// A valid XML file from the W3C conformance test suite: valid-sa-016 +fn valid_sa_016_parse() { + run_parse_test(INPUT_FILE, &expected()); +} + +fn expected() -> Document { + let mut doc = Document::new(); + doc.set_declaration(Declaration { + version: None, + encoding: None, + }); + // TODO - write doctype information + let root = doc.root_mut(); + root.set_name(r#"doc"#); + root.add_pi(exile::PI { + target: r#"pi"#.into(), + data: r#""#.into(), + }); + doc +} diff --git a/tests/generated/jclark_valid_sa_017.rs b/tests/generated/jclark_valid_sa_017.rs new file mode 100644 index 0000000..1964447 --- /dev/null +++ b/tests/generated/jclark_valid_sa_017.rs @@ -0,0 +1,28 @@ +// generated file, do not edit + +use crate::test_utils::run_parse_test; +use exile::{Declaration, Document}; + +const INPUT_FILE: &str = "jclark_valid_sa_017.xml"; + +#[test] +/// A valid XML file from the W3C conformance test suite: valid-sa-017 +fn valid_sa_017_parse() { + run_parse_test(INPUT_FILE, &expected()); +} + +fn expected() -> Document { + let mut doc = Document::new(); + doc.set_declaration(Declaration { + version: None, + encoding: None, + }); + // TODO - write doctype information + let root = doc.root_mut(); + root.set_name(r#"doc"#); + root.add_pi(exile::PI { + target: r#"pi"#.into(), + data: r#"some data ? > + + Hello World + Hello World +   Hello  World   + Hello World + Hello World +   + diff --git a/tests/input_data/jclark_valid_sa_006.xml b/tests/input_data/jclark_valid_sa_006.xml new file mode 100644 index 0000000..39a3463 --- /dev/null +++ b/tests/input_data/jclark_valid_sa_006.xml @@ -0,0 +1,5 @@ + + +]> + diff --git a/tests/input_data/jclark_valid_sa_007.xml b/tests/input_data/jclark_valid_sa_007.xml new file mode 100644 index 0000000..cc3dc53 --- /dev/null +++ b/tests/input_data/jclark_valid_sa_007.xml @@ -0,0 +1,4 @@ + +]> + diff --git a/tests/input_data/jclark_valid_sa_008.xml b/tests/input_data/jclark_valid_sa_008.xml new file mode 100644 index 0000000..b3370eb --- /dev/null +++ b/tests/input_data/jclark_valid_sa_008.xml @@ -0,0 +1,4 @@ + +]> +&<>"' diff --git a/tests/input_data/jclark_valid_sa_009.xml b/tests/input_data/jclark_valid_sa_009.xml new file mode 100644 index 0000000..0fa183e --- /dev/null +++ b/tests/input_data/jclark_valid_sa_009.xml @@ -0,0 +1,4 @@ + +]> + diff --git a/tests/input_data/jclark_valid_sa_010.xml b/tests/input_data/jclark_valid_sa_010.xml new file mode 100644 index 0000000..eb64d18 --- /dev/null +++ b/tests/input_data/jclark_valid_sa_010.xml @@ -0,0 +1,5 @@ + + +]> + diff --git a/tests/input_data/jclark_valid_sa_011.xml b/tests/input_data/jclark_valid_sa_011.xml new file mode 100644 index 0000000..4cac44b --- /dev/null +++ b/tests/input_data/jclark_valid_sa_011.xml @@ -0,0 +1,5 @@ + + +]> + diff --git a/tests/input_data/jclark_valid_sa_012.xml b/tests/input_data/jclark_valid_sa_012.xml new file mode 100644 index 0000000..6ce2a3e --- /dev/null +++ b/tests/input_data/jclark_valid_sa_012.xml @@ -0,0 +1,5 @@ + + +]> + diff --git a/tests/input_data/jclark_valid_sa_013.xml b/tests/input_data/jclark_valid_sa_013.xml new file mode 100644 index 0000000..2f4aae4 --- /dev/null +++ b/tests/input_data/jclark_valid_sa_013.xml @@ -0,0 +1,5 @@ + + +]> + diff --git a/tests/input_data/jclark_valid_sa_014.xml b/tests/input_data/jclark_valid_sa_014.xml new file mode 100644 index 0000000..47f1f72 --- /dev/null +++ b/tests/input_data/jclark_valid_sa_014.xml @@ -0,0 +1,5 @@ + + +]> + diff --git a/tests/input_data/jclark_valid_sa_015.xml b/tests/input_data/jclark_valid_sa_015.xml new file mode 100644 index 0000000..861df8a --- /dev/null +++ b/tests/input_data/jclark_valid_sa_015.xml @@ -0,0 +1,5 @@ + + +]> + diff --git a/tests/input_data/jclark_valid_sa_016.xml b/tests/input_data/jclark_valid_sa_016.xml new file mode 100644 index 0000000..66b1973 --- /dev/null +++ b/tests/input_data/jclark_valid_sa_016.xml @@ -0,0 +1,4 @@ + +]> + diff --git a/tests/input_data/jclark_valid_sa_017.xml b/tests/input_data/jclark_valid_sa_017.xml new file mode 100644 index 0000000..827ba96 --- /dev/null +++ b/tests/input_data/jclark_valid_sa_017.xml @@ -0,0 +1,4 @@ + +]> +