Skip to content

Commit

Permalink
fix: bom file
Browse files Browse the repository at this point in the history
  • Loading branch information
bokuweb committed Feb 13, 2020
1 parent a49a343 commit 2230efb
Show file tree
Hide file tree
Showing 21 changed files with 141 additions and 18 deletions.
2 changes: 1 addition & 1 deletion docx-core/examples/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use std::fs::*;
use std::io::Read;

pub fn main() {
let mut file = File::open("./run.docx").unwrap();
let mut file = File::open("./10.docx").unwrap();
let mut buf = vec![];
file.read_to_end(&mut buf).unwrap();
dbg!(read_docx(&buf).unwrap().json());
Expand Down
2 changes: 1 addition & 1 deletion docx-core/src/documents/content_types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ impl FromXML for ContentTypes {
Ok(XmlEvent::EndElement { .. }) => {
depth -= 1;
}
Err(_) => return Err(ReaderError::XMLReadError),
Err(_) => {}
_ => {}
}
}
Expand Down
4 changes: 2 additions & 2 deletions docx-core/src/reader/document_rels.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@ pub fn read_document_rels(
.ok_or(ReaderError::DocumentRelsNotFoundError)?;
let p = find_rels_filename(&main_path)?;
let p = p.to_str().ok_or(ReaderError::DocumentRelsNotFoundError)?;
let rels_xml = archive.by_name(&p)?;
let rels = read_rels_xml(rels_xml, dir)?;
let data = read_zip(archive, &p)?;
let rels = read_rels_xml(&data[..], dir)?;
Ok(rels)
}

Expand Down
35 changes: 25 additions & 10 deletions docx-core/src/reader/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ mod level;
mod numbering_property;
mod numberings;
mod paragraph;
mod read_zip;
mod rels;
mod run;
mod style;
Expand All @@ -27,6 +28,7 @@ pub use attributes::*;
pub use document_rels::*;
pub use errors::ReaderError;
pub use from_xml::*;
pub use read_zip::*;
pub use xml_element::*;

const DOC_RELATIONSHIP_TYPE: &str =
Expand All @@ -41,36 +43,49 @@ pub fn read_docx(buf: &[u8]) -> Result<Docx, ReaderError> {
let mut archive = zip::ZipArchive::new(cur)?;
// First, the content type for relationship parts and the Main Document part
// (the only required part) must be defined (physically located at /[Content_Types].xml in the package)
let content_types_xml = archive.by_name("[Content_Types].xml")?;
let _content_types = ContentTypes::from_xml(content_types_xml)?;
let _content_types = {
let data = read_zip(&mut archive, "[Content_Types].xml")?;
ContentTypes::from_xml(&data[..])?
};

// Next, the single required relationship (the package-level relationship to the Main Document part)
// must be defined (physically located at /_rels/.rels in the package)
let rels_xml = archive.by_name("_rels/.rels")?;
let rels = Rels::from_xml(rels_xml)?;
let rels = {
let data = read_zip(&mut archive, "_rels/.rels")?;
Rels::from_xml(&data[..])?
};
// Finally, the minimum content for the Main Document part must be defined
// (physically located at /document.xml in the package):
let main_rel = rels
.find_target(DOC_RELATIONSHIP_TYPE)
.ok_or(ReaderError::DocumentNotFoundError)?;
let document_xml = archive.by_name(&main_rel.2)?;
let document = Document::from_xml(document_xml)?;
let document = {
let data = read_zip(&mut archive, &main_rel.2)?;
Document::from_xml(&data[..])?
};
let mut docx = Docx::new().document(document);
// Read document relationships
let rels = read_document_rels(&mut archive, &main_rel.2)?;

// Read styles
let style_path = rels.find_target_path(STYLE_RELATIONSHIP_TYPE);
if let Some(style_path) = style_path {
let styles_xml = archive.by_name(style_path.to_str().expect("should have styles"))?;
let styles = Styles::from_xml(styles_xml)?;
let data = read_zip(
&mut archive,
style_path.to_str().expect("should have styles"),
)?;
let styles = Styles::from_xml(&data[..])?;
docx = docx.styles(styles);
}

// Read numberings
let num_path = rels.find_target_path(NUMBERING_RELATIONSHIP_TYPE);
if let Some(num_path) = num_path {
let num_xml = archive.by_name(num_path.to_str().expect("should have numberings"))?;
let nums = Numberings::from_xml(num_xml)?;
let data = read_zip(
&mut archive,
num_path.to_str().expect("should have numberings"),
)?;
let nums = Numberings::from_xml(&data[..])?;
docx = docx.numberings(nums);
}

Expand Down
24 changes: 24 additions & 0 deletions docx-core/src/reader/read_zip.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
use std::io::{Cursor, Read};
use zip;

use super::ReaderError;

pub fn read_zip(
archive: &mut zip::read::ZipArchive<Cursor<&[u8]>>,
name: &str,
) -> Result<Vec<u8>, ReaderError> {
let mut p = name.to_owned();
if p.starts_with('/') {
p.remove(0);
}
let mut xml = archive.by_name(&p)?;
let mut data = vec![];
xml.read_to_end(&mut data).unwrap();
// Remove BOM
if (data[0] == 0xef) && (data[1] == 0xbb) && (data[2] == 0xbf) {
data.remove(0);
data.remove(0);
data.remove(0);
}
Ok(data)
}
3 changes: 1 addition & 2 deletions docx-core/src/reader/styles.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,7 @@ mod tests {

#[test]
fn test_from_xml() {
let xml =
r#"<w:styles xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
let xml = r#"<w:styles xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
<w:style w:type="character" w:styleId="FootnoteTextChar">
<w:name w:val="Footnote Text Char"></w:name>
<w:rPr>
Expand Down
7 changes: 6 additions & 1 deletion docx-core/src/types/alignment_type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@ pub enum AlignmentType {
Center,
Left,
Right,
Both,
Justified,
Unsupported,
}

impl fmt::Display for AlignmentType {
Expand All @@ -19,7 +21,9 @@ impl fmt::Display for AlignmentType {
AlignmentType::Center => write!(f, "center"),
AlignmentType::Left => write!(f, "left"),
AlignmentType::Right => write!(f, "right"),
AlignmentType::Both => write!(f, "both"),
AlignmentType::Justified => write!(f, "justified"),
_ => write!(f, "unsupported"),
}
}
}
Expand All @@ -31,8 +35,9 @@ impl FromStr for AlignmentType {
"left" => Ok(AlignmentType::Left),
"right" => Ok(AlignmentType::Right),
"center" => Ok(AlignmentType::Center),
"both" => Ok(AlignmentType::Both),
"justified" => Ok(AlignmentType::Justified),
_ => Err(errors::TypeError::FromStrError),
_ => Ok(AlignmentType::Unsupported),
}
}
}
3 changes: 3 additions & 0 deletions docx-core/src/types/style_type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ pub enum StyleType {
Paragraph,
Character,
Numbering,
Table,
Unsupported,
}

Expand All @@ -21,6 +22,7 @@ impl fmt::Display for StyleType {
StyleType::Paragraph => write!(f, "paragraph"),
StyleType::Character => write!(f, "character"),
StyleType::Numbering => write!(f, "numbering"),
StyleType::Table => write!(f, "table"),
StyleType::Unsupported => write!(f, "unsupported"),
}
}
Expand All @@ -33,6 +35,7 @@ impl FromStr for StyleType {
"paragraph" => Ok(StyleType::Paragraph),
"character" => Ok(StyleType::Character),
"numbering" => Ok(StyleType::Numbering),
"table" => Ok(StyleType::Table),
_ => Ok(StyleType::Unsupported),
}
}
Expand Down
15 changes: 15 additions & 0 deletions docx-core/tests/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -139,3 +139,18 @@ pub fn read_table_merged_libre_office() {
file.write_all(json.as_bytes()).unwrap();
file.flush().unwrap();
}

#[test]
pub fn read_bom() {
let mut file = File::open("../fixtures/bom/bom.docx").unwrap();
let mut buf = vec![];
file.read_to_end(&mut buf).unwrap();
let json = read_docx(&buf).unwrap().json();

assert_debug_snapshot!(&json);

let path = std::path::Path::new("./tests/output/bom.json");
let mut file = std::fs::File::create(&path).unwrap();
file.write_all(json.as_bytes()).unwrap();
file.flush().unwrap();
}
5 changes: 5 additions & 0 deletions docx-core/tests/snapshots/lib__reader__read_bom.snap
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
source: docx-core/tests/reader.rs
expression: "&json"
---
"{\n \"contentType\": {\n \"types\": {\n \"/_rels/.rels\": \"application/vnd.openxmlformats-package.relationships+xml\",\n \"/docProps/app.xml\": \"application/vnd.openxmlformats-officedocument.extended-properties+xml\",\n \"/docProps/core.xml\": \"application/vnd.openxmlformats-package.core-properties+xml\",\n \"/word/_rels/document.xml.rels\": \"application/vnd.openxmlformats-package.relationships+xml\",\n \"/word/comments.xml\": \"application/vnd.openxmlformats-officedocument.wordprocessingml.comments+xml\",\n \"/word/document.xml\": \"application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml\",\n \"/word/fontTable.xml\": \"application/vnd.openxmlformats-officedocument.wordprocessingml.fontTable+xml\",\n \"/word/numbering.xml\": \"application/vnd.openxmlformats-officedocument.wordprocessingml.numbering+xml\",\n \"/word/settings.xml\": \"application/vnd.openxmlformats-officedocument.wordprocessingml.settings+xml\",\n \"/word/styles.xml\": \"application/vnd.openxmlformats-officedocument.wordprocessingml.styles+xml\"\n }\n },\n \"rels\": {\n \"rels\": [\n [\n \"http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties\",\n \"rId1\",\n \"docProps/core.xml\"\n ],\n [\n \"http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties\",\n \"rId2\",\n \"docProps/app.xml\"\n ],\n [\n \"http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument\",\n \"rId3\",\n \"word/document.xml\"\n ]\n ]\n },\n \"documentRels\": {\n \"hasComments\": false,\n \"hasNumberings\": false\n },\n \"docProps\": {\n \"app\": {},\n \"core\": {\n \"config\": {\n \"created\": null,\n \"creator\": null,\n \"description\": null,\n \"language\": null,\n \"lastModifiedBy\": null,\n \"modified\": null,\n \"revision\": null,\n \"subject\": null,\n \"title\": null\n }\n }\n },\n \"styles\": {\n \"docDefaults\": {\n \"runPropertyDefault\": {\n \"runProperty\": {\n \"sz\": null,\n \"szCs\": null,\n \"color\": null,\n \"highlight\": null,\n \"underline\": null,\n \"bold\": null,\n \"boldCs\": null,\n \"italic\": null,\n \"italicCs\": null,\n \"vanish\": null\n }\n }\n },\n \"styles\": [\n {\n \"styleId\": \"Normal\",\n \"name\": \"Normal\",\n \"styleType\": \"paragraph\",\n \"runProperty\": {\n \"sz\": null,\n \"szCs\": null,\n \"color\": null,\n \"highlight\": null,\n \"underline\": null,\n \"bold\": null,\n \"boldCs\": null,\n \"italic\": null,\n \"italicCs\": null,\n \"vanish\": null\n },\n \"paragraphProperty\": {\n \"runProperty\": {\n \"sz\": null,\n \"szCs\": null,\n \"color\": null,\n \"highlight\": null,\n \"underline\": null,\n \"bold\": null,\n \"boldCs\": null,\n \"italic\": null,\n \"italicCs\": null,\n \"vanish\": null\n },\n \"style\": \"Normal\",\n \"numberingProperty\": null,\n \"alignment\": \"both\",\n \"indent\": null\n }\n },\n {\n \"styleId\": \"DefaultParagraphFont\",\n \"name\": \"Default Paragraph Font\",\n \"styleType\": \"character\",\n \"runProperty\": {\n \"sz\": null,\n \"szCs\": null,\n \"color\": null,\n \"highlight\": null,\n \"underline\": null,\n \"bold\": null,\n \"boldCs\": null,\n \"italic\": null,\n \"italicCs\": null,\n \"vanish\": null\n },\n \"paragraphProperty\": {\n \"runProperty\": {\n \"sz\": null,\n \"szCs\": null,\n \"color\": null,\n \"highlight\": null,\n \"underline\": null,\n \"bold\": null,\n \"boldCs\": null,\n \"italic\": null,\n \"italicCs\": null,\n \"vanish\": null\n },\n \"style\": \"Normal\",\n \"numberingProperty\": null,\n \"alignment\": null,\n \"indent\": null\n }\n },\n {\n \"styleId\": \"TableNormal\",\n \"name\": \"Normal Table\",\n \"styleType\": \"table\",\n \"runProperty\": {\n \"sz\": null,\n \"szCs\": null,\n \"color\": null,\n \"highlight\": null,\n \"underline\": null,\n \"bold\": null,\n \"boldCs\": null,\n \"italic\": null,\n \"italicCs\": null,\n \"vanish\": null\n },\n \"paragraphProperty\": {\n \"runProperty\": {\n \"sz\": null,\n \"szCs\": null,\n \"color\": null,\n \"highlight\": null,\n \"underline\": null,\n \"bold\": null,\n \"boldCs\": null,\n \"italic\": null,\n \"italicCs\": null,\n \"vanish\": null\n },\n \"style\": \"Normal\",\n \"numberingProperty\": null,\n \"alignment\": null,\n \"indent\": null\n }\n },\n {\n \"styleId\": \"NoList\",\n \"name\": \"No List\",\n \"styleType\": \"numbering\",\n \"runProperty\": {\n \"sz\": null,\n \"szCs\": null,\n \"color\": null,\n \"highlight\": null,\n \"underline\": null,\n \"bold\": null,\n \"boldCs\": null,\n \"italic\": null,\n \"italicCs\": null,\n \"vanish\": null\n },\n \"paragraphProperty\": {\n \"runProperty\": {\n \"sz\": null,\n \"szCs\": null,\n \"color\": null,\n \"highlight\": null,\n \"underline\": null,\n \"bold\": null,\n \"boldCs\": null,\n \"italic\": null,\n \"italicCs\": null,\n \"vanish\": null\n },\n \"style\": \"Normal\",\n \"numberingProperty\": null,\n \"alignment\": null,\n \"indent\": null\n }\n }\n ]\n },\n \"document\": {\n \"children\": [\n {\n \"type\": \"paragraph\",\n \"data\": {\n \"children\": [\n {\n \"type\": \"unsupported\"\n },\n {\n \"type\": \"unsupported\"\n },\n {\n \"type\": \"run\",\n \"data\": {\n \"runProperty\": {\n \"sz\": null,\n \"szCs\": null,\n \"color\": null,\n \"highlight\": null,\n \"underline\": null,\n \"bold\": null,\n \"boldCs\": null,\n \"italic\": null,\n \"italicCs\": null,\n \"vanish\": null\n },\n \"children\": [\n {\n \"type\": \"text\",\n \"data\": {\n \"preserveSpace\": true,\n \"text\": \"Hello\"\n }\n }\n ]\n }\n }\n ],\n \"property\": {\n \"runProperty\": {\n \"sz\": null,\n \"szCs\": null,\n \"color\": null,\n \"highlight\": null,\n \"underline\": null,\n \"bold\": null,\n \"boldCs\": null,\n \"italic\": null,\n \"italicCs\": null,\n \"vanish\": null\n },\n \"style\": \"Normal\",\n \"numberingProperty\": null,\n \"alignment\": null,\n \"indent\": null\n },\n \"hasNumbering\": false,\n \"attrs\": []\n }\n }\n ],\n \"sectionProperty\": {\n \"pageSize\": {\n \"w\": 11906,\n \"h\": 16838\n },\n \"pageMargin\": {\n \"top\": 1985,\n \"left\": 1701,\n \"bottom\": 1701,\n \"right\": 1701,\n \"header\": 851,\n \"footer\": 992,\n \"gutter\": 0\n },\n \"columns\": 425,\n \"documentGrid\": 360\n },\n \"hasNumbering\": false\n },\n \"comments\": {\n \"comments\": []\n },\n \"numberings\": {\n \"abstractNums\": [],\n \"numberings\": []\n },\n \"settings\": {\n \"defaultTabStop\": 709,\n \"zoom\": 100\n },\n \"fontTable\": {}\n}"

Large diffs are not rendered by default.

Loading

0 comments on commit 2230efb

Please sign in to comment.