Skip to content

Commit

Permalink
Merge pull request #175 from openpreserve/feat/detect-extended-conf
Browse files Browse the repository at this point in the history
FEAT: Detect extended conformance
  • Loading branch information
carlwilson authored Aug 23, 2024
2 parents fed724e + 8570087 commit 4a58004
Show file tree
Hide file tree
Showing 17 changed files with 129 additions and 29 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ public ParsingHandler() {
}

public ParseResult getResult(final boolean isWellFormed, final List<Message> messages) {
return ParseResultImpl.of(isWellFormed, this.rootNamespace, this.declaredNamespaces, this.usedNamespaces, this.rootPrefix,
return ParseResultImpl.of(isWellFormed, this.rootNamespace, this.declaredNamespaces, this.usedNamespaces,
this.rootPrefix,
this.rootLocalName, this.attributes, messages);
}

Expand All @@ -37,6 +38,10 @@ public void startElement(String uri, String localName, String qName, Attributes
} else {
this.usedNamespaces.add(NamespaceImpl.of(uri, splitNamespace(qName)));
}
for (int index = 0; index < attributes.getLength(); index++) {
this.usedNamespaces
.add(NamespaceImpl.of(attributes.getURI(index), splitNamespace(attributes.getQName(index))));
}
}

private static final String splitNamespace(final String qName) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -223,4 +223,11 @@ public interface OdfPackage {
* otherwise <code>false</code>.
*/
public boolean hasDsigEntries();

/**
* Discover if the file uses any namespaces outside of the ODF specification.
*
* @return true if the file uses any namespaces outside of the ODF
*/
public boolean isExtended();
}
Original file line number Diff line number Diff line change
Expand Up @@ -328,4 +328,14 @@ public boolean hasDsigEntries() {
}
return false;
}

@Override
public boolean isExtended() {
for (OdfPackageDocument doc : this.documentMap.values()) {
if (doc.getXmlDocument().isExtended()) {
return true;
}
}
return false;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import org.openpreservation.messages.MessageFactory;
import org.openpreservation.messages.Messages;
import org.openpreservation.odf.document.Documents;
import org.openpreservation.odf.document.OdfDocument;
import org.openpreservation.odf.fmt.OdfFormats;
import org.openpreservation.odf.pkg.FileEntry;
import org.openpreservation.odf.pkg.Manifest;
Expand All @@ -31,6 +32,7 @@
import org.openpreservation.odf.pkg.PackageParser;
import org.openpreservation.odf.xml.Namespaces;
import org.openpreservation.odf.xml.OdfSchemaFactory;
import org.openpreservation.odf.xml.OdfXmlDocuments;
import org.openpreservation.odf.xml.Version;
import org.openpreservation.utils.Checks;
import org.xml.sax.SAXException;
Expand Down Expand Up @@ -120,6 +122,10 @@ private final List<Message> validateOdfXmlDocument(final OdfPackage odfPackage,
final ParseResult parseResult) {
List<Message> messageList = new ArrayList<>();
Namespaces ns = Namespaces.fromId(parseResult.getRootNamespace().getId());
if (OdfXmlDocuments.odfXmlDocumentOf(parseResult).isExtended()) {
messageList.add(FACTORY.getError("DOC-8", xmlPath));
return messageList;
}
Schema schema = (ns == null) ? null
: SCHEMA_FACTORY.getSchema(ns,
getVersionFromPath(odfPackage, xmlPath));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,13 +47,15 @@ public ValidationReport validateSpreadsheet(final File toValidate) throws ParseE
return validateSingleFormat(toValidate, Formats.ODS);
}

public ValidationReport validateSingleFormat(final File toValidate, final Formats legal) throws ParseException, FileNotFoundException {
public ValidationReport validateSingleFormat(final File toValidate, final Formats legal)
throws ParseException, FileNotFoundException {
Objects.requireNonNull(toValidate, String.format(Checks.NOT_NULL, "File", TO_VAL_STRING));
Objects.requireNonNull(legal, String.format(Checks.NOT_NULL, "Formats", "legal"));
return validateSingleFormat(toValidate.toPath(), legal);
}

public ValidationReport validateSingleFormat(final Path toValidate, final Formats legal) throws ParseException, FileNotFoundException {
public ValidationReport validateSingleFormat(final Path toValidate, final Formats legal)
throws ParseException, FileNotFoundException {
Objects.requireNonNull(toValidate, String.format(Checks.NOT_NULL, "Path", TO_VAL_STRING));
Objects.requireNonNull(legal, String.format(Checks.NOT_NULL, "Formats", "legal"));
Checks.existingFileCheck(toValidate);
Expand Down Expand Up @@ -87,7 +89,7 @@ public ValidationReport validate(final Path toValidate)
} else if (OdfXmlDocuments.isXml(toValidate)) {
return validateOpenDocumentXml(toValidate);
}
} catch (IOException| ParserConfigurationException | SAXException e) {
} catch (IOException | ParserConfigurationException | SAXException e) {
throw new ParseException("Exception thrown when validating ODF document.", e);
}

Expand All @@ -100,21 +102,26 @@ private static final ValidationReport notOdf(final Path toValidate) {
return report;
}

private ValidationReport validatePackage(final Path toValidate) throws ParserConfigurationException, SAXException, ParseException, FileNotFoundException {
ValidatingParser parser = Validators.getValidatingParser();
OdfPackage pckg = parser.parsePackage(toValidate);
return parser.validatePackage(pckg);
private ValidationReport validatePackage(final Path toValidate)
throws ParserConfigurationException, SAXException, ParseException, FileNotFoundException {
ValidatingParser parser = Validators.getValidatingParser();
OdfPackage pckg = parser.parsePackage(toValidate);
return parser.validatePackage(pckg);
}

private ValidationReport validateOpenDocumentXml(final Path toValidate) throws ParserConfigurationException, SAXException, IOException {
private ValidationReport validateOpenDocumentXml(final Path toValidate)
throws ParserConfigurationException, SAXException, IOException {
final XmlParser checker = new XmlParser();
ParseResult parseResult = checker.parse(toValidate);
final ValidationReport report = (parseResult.isWellFormed()) ? ValidationReport.of(toValidate.toString(), Documents.openDocumentOf(Documents.odfDocumentOf(parseResult))) : ValidationReport.of(toValidate.toString());
final ValidationReport report = (parseResult.isWellFormed())
? ValidationReport.of(toValidate.toString(),
Documents.openDocumentOf(Documents.odfDocumentOf(parseResult)))
: ValidationReport.of(toValidate.toString());
if (parseResult.isWellFormed()) {
Version version = Version.ODF_13;
final OdfXmlDocument doc = OdfXmlDocuments.odfXmlDocumentOf(parseResult);
final XmlValidator validator = new XmlValidator();
if (parseResult.isRootName(TAG_DOC)) {
final OdfXmlDocument doc = OdfXmlDocuments.odfXmlDocumentOf(parseResult);
version = Version.fromVersion(doc.getVersion());
report.add(toValidate.toString(), FACTORY.getInfo("DOC-2", doc.getVersion()));
if (Formats.fromMime(doc.getMimeType()).isPackage()) {
Expand All @@ -123,8 +130,12 @@ private ValidationReport validateOpenDocumentXml(final Path toValidate) throws P
report.add(toValidate.toString(), FACTORY.getError("DOC-4", doc.getMimeType()));
}
}
Schema schema = new OdfSchemaFactory().getSchema(Namespaces.OFFICE, version);
parseResult = validator.validate(parseResult, Files.newInputStream(toValidate), schema);
if (doc.isExtended()) {
report.add(toValidate.toString(), FACTORY.getError("DOC-8"));
} else {
Schema schema = new OdfSchemaFactory().getSchema(Namespaces.OFFICE, version);
parseResult = validator.validate(parseResult, Files.newInputStream(toValidate), schema);
}
} else {
report.add(toValidate.toString(), FACTORY.getError("DOC-1"));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,9 @@ public enum Namespaces {
XLINK("http://www.w3.org/1999/xlink",
"The XLink namespace (see https://docs.oasis-open.org/office/OpenDocument/v1.3/os/part3-schema/OpenDocument-v1.3-os-part3-schema.html#nref-xlink)."),
ODF("http://docs.oasis-open.org/ns/office/1.2/meta/odf#",
"RDF node property and node elements for OpenDocument package entities.");
"RDF node property and node elements for OpenDocument package entities."),
OOO("http://openoffice.org/2004/office",
"OpenOffice.org namespace.");

public static final Namespaces fromPrefix(final String prefix) {
Objects.requireNonNull(prefix, String.format(Checks.NOT_NULL, "String", "prefix"));
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package org.openpreservation.odf.xml;

import java.util.Set;

import org.openpreservation.format.xml.Namespace;
import org.openpreservation.format.xml.ParseResult;

Expand Down Expand Up @@ -54,6 +56,12 @@ public interface OdfXmlDocument {
*/
public String getVersion();

/**
* Establish whether the document uses a namespace outside of the declared ODF namespaces.
*
* @return true if the document uses a namespace outside of the declared ODF namespaces
*/
public boolean isExtended();
/**
* Get the parse result of the XML document
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
import java.util.Objects;

import javax.xml.parsers.ParserConfigurationException;
Expand All @@ -13,6 +14,8 @@
import org.xml.sax.SAXException;

final class OdfXmlDocumentImpl implements OdfXmlDocument {
static final String[] extendedDocTypes = { "office:document-content", "office:document-styles", "office:styles" };

static final OdfXmlDocumentImpl of(final ParseResult parseResult) {
Objects.requireNonNull(parseResult, String.format(Checks.NOT_NULL, "parseResult", "ParseResult"));
final String version = parseResult.getRootAttributeValue("office:version") != null
Expand Down Expand Up @@ -77,7 +80,12 @@ public String getLocalRootName() {

@Override
public String getRootName() {
return String.format("%s:%s", this.getRootNamespace().getPrefix(), this.getLocalRootName());
return getQualifedName(this.getRootNamespace().getPrefix(), this.getLocalRootName());
}

@Override
public boolean isExtended() {
return usesExtendedNamespace(this.parseResult);
}

@Override
Expand Down Expand Up @@ -128,4 +136,24 @@ public String toString() {
+ mimeType
+ ", version=" + version + "]";
}

static boolean usesExtendedNamespace(final ParseResult parseResult) {
if (!isExtendedRoot(getQualifedName(parseResult.getRootPrefix(), parseResult.getRootName()))) {
return false;
}
for (final Namespace usedNamespace : parseResult.getUsedNamespaces()) {
if (Namespaces.fromId(usedNamespace.getId()) == null) {
return true;
}
}
return false;
}

static boolean isExtendedRoot(final String rootName) {
return Arrays.stream(extendedDocTypes).anyMatch(rootName::equals);
}

private static final String getQualifedName(final String prefix, final String name) {
return String.format("%s:%s", prefix, name);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ DOC-4 = Invalid MIMETYPE declaration %s detected.
DOC-5 = No MIMETYPE declaration detected.
DOC-6 = OpenDocument document SHALL be format %s, no format was detected.
DOC-7 = OpenDocument document SHALL be format %s, but format %s was detected.
DOC-8 = This XML OpenDocument uses extended namespaces.
PKG-1 = An OpenDocument Package SHALL be a well formed Zip Archive.
PKG-2 = All files contained in the Zip file shall be non compressed (STORED) or compressed using the "deflate" (DEFLATED) algorithm. Zip entry %s is compressed with an unknown algorithm.
PKG-3 = An OpenDocument Package SHALL contain a file "META-INF/manifest.xml".
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,6 @@ public void testParseLoExtExtended()
assertEquals("Parse result should have a root attributes", 1, result.getRootAttributes().size());
assertEquals("Parse result should have multiple namespace declarations", 35,
result.getDeclaredNamespaces().size());
assertEquals("Parse result should have multiple used namespaces", 6, result.getUsedNamespaces().size());
assertEquals("Parse result should have multiple used namespaces", 10, result.getUsedNamespaces().size());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ public class TestFiles {
final static String VERSION_TEST_ROOT = PKG_TEST_ROOT + "version/";
final static String EMBEDDED_TEST_ROOT = PKG_TEST_ROOT + "embedded/";
final static String ENCRYPTED_TEST_ROOT = PKG_TEST_ROOT + "encrypted/";
final static String EXTENDED_TEST_ROOT = PKG_TEST_ROOT + "extended/";
final static String DSIG_TEST_ROOT = PKG_TEST_ROOT + "dsigs/";
final static String INVALID_PKG_ROOT = PKG_TEST_ROOT + "invalid/";
final static String RULES_ROOT = PKG_TEST_ROOT + "rules/";
Expand Down Expand Up @@ -60,6 +61,8 @@ public class TestFiles {
.getSystemResource(EMBEDDED_TEST_ROOT + "WithEmbeddedwordDoc.ods");
public final static URL ENCRYPTED_PASSWORDS = ClassLoader
.getSystemResource(ENCRYPTED_TEST_ROOT + "WithPassword.ods");
public final static URL EXTENDED_SPREADSHEET = ClassLoader
.getSystemResource(EXTENDED_TEST_ROOT + "lo_ext.ods");

/**
* OpenDocument XML test files
Expand All @@ -69,7 +72,8 @@ public class TestFiles {
public final static URL FLAT_NOT_VALID = ClassLoader.getSystemResource(XML_TEST_ROOT + "flat_not_valid.fods");
public final static URL FLAT_NO_VERSION = ClassLoader.getSystemResource(XML_TEST_ROOT + "no_version.fods");
public final static URL FLAT_NO_MIME = ClassLoader.getSystemResource(XML_TEST_ROOT + "no_mimetype.fods");
public final static URL LOEXT_EXTENDED_CONFORMANCE = ClassLoader.getSystemResource(XML_TEST_ROOT + "loext_ext_cnfrm.xml");
public final static URL LOEXT_EXTENDED_CONFORMANCE = ClassLoader
.getSystemResource(XML_TEST_ROOT + "loext_ext_cnfrm.xml");

public final static URL EMPTY = ClassLoader.getSystemResource(FILE_TEST_ROOT + "empty.file");
public final static URL STYLES_ONLY_DOC = ClassLoader.getSystemResource(DOCUMENT_TEST_ROOT + "styles_only_doc.ods");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ public void testValidPackage() throws ParserConfigurationException, SAXException
ValidatingParser parser = Validators.getValidatingParser();
OdfPackage pkg = parser.parsePackage(TestFiles.EMPTY_ODS.openStream(), TestFiles.EMPTY_ODS.toString());
ValidationReport report = parser.validatePackage(pkg);
assertTrue("Empty ODS should be valid", report.isValid());
assertFalse("Empty ODS IS invalid", report.isValid());
}

@Test
Expand Down Expand Up @@ -302,7 +302,6 @@ public void testNoThumbnail() throws ParserConfigurationException, SAXException,
ValidatingParser parser = Validators.getValidatingParser();
OdfPackage pkg = parser.parsePackage(TestFiles.NO_THUMBNAIL_ODS.openStream(), TestFiles.NO_THUMBNAIL_ODS.toString());
ValidationReport report = parser.validatePackage(pkg);
assertTrue("NO_THUMBNAIL_ODS should be valid", report.isValid());
assertTrue(report.getMessages().stream().filter(m -> m.getId().equals("PKG-7")).count() > 0);
}

Expand All @@ -311,7 +310,7 @@ public void testNoEmbeddedWord() throws ParserConfigurationException, SAXExcepti
ValidatingParser parser = Validators.getValidatingParser();
OdfPackage pkg = parser.parsePackage(TestFiles.EMBEDDED_WORD.openStream(), TestFiles.EMBEDDED_WORD.toString());
ValidationReport report = parser.validatePackage(pkg);
assertTrue("EMBEDDED_WORD should be valid", report.isValid());
assertFalse("EMBEDDED_WORD isn't valid", report.isValid());
}

@Test
Expand All @@ -329,7 +328,7 @@ public void testDsigValid() throws ParserConfigurationException, SAXException, I
InputStream is = TestFiles.DSIG_VALID.openStream();
OdfPackage pkg = parser.parsePackage(is, TestFiles.DSIG_VALID.toString());
ValidationReport report = parser.validatePackage(pkg);
assertTrue("Package should be be valid" , report.isValid());
assertFalse("Package is not valid" , report.isValid());
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ public void validateDirPath() throws ParserConfigurationException, IOException,
public void validatePath() throws ParseException, IOException, URISyntaxException {
Validator validator = new Validator();
ValidationReport report = validator.validate(new File(TestFiles.EMPTY_ODS.toURI()).toPath());
assertTrue("Package should be valid.", report.isValid());
assertFalse("Package should be valid.", report.isValid());
}

@Test
Expand All @@ -77,7 +77,7 @@ public void validateNullFile() throws ParseException, IOException, URISyntaxExce
public void validateFile() throws ParseException, IOException, URISyntaxException {
Validator validator = new Validator();
ValidationReport report = validator.validate(new File(TestFiles.EMPTY_ODS.toURI()));
assertTrue("Package should be valid.", report.isValid());
assertFalse("Package should be valid.", report.isValid());
}

@Test
Expand Down Expand Up @@ -134,7 +134,7 @@ public void validateSpreadsheetNullPath() throws ParseException, URISyntaxExcept
public void validateSpreadsheetPath() throws ParseException, IOException, URISyntaxException {
Validator validator = new Validator();
ValidationReport report = validator.validateSpreadsheet(new File(TestFiles.EMPTY_ODS.toURI()).toPath());
assertTrue("Package should be valid.", report.isValid());
assertFalse("Package should be valid.", report.isValid());
}

@Test
Expand All @@ -152,7 +152,7 @@ public void validateSpreadsheetNullFile() throws ParserConfigurationException {
public void validateSpreadsheetFile() throws ParseException, IOException, URISyntaxException {
Validator validator = new Validator();
ValidationReport report = validator.validateSpreadsheet(new File(TestFiles.EMPTY_ODS.toURI()));
assertTrue("Package should be valid.", report.isValid());
assertFalse("Package should be valid.", report.isValid());
}

@Test
Expand Down Expand Up @@ -203,4 +203,13 @@ public void validateSpreadsheetDocNotWellFormedXml() throws ParseException, IOEx
assertEquals(1, report.getMessages().stream().filter(m -> m.getId().equals("DOC-1")).count());
assertEquals(1, report.getMessages().stream().filter(m -> m.getId().equals("XML-3")).count());
}

@Test
public void validateExtendedSpreadsheet() throws ParseException, IOException, URISyntaxException {
Validator validator = new Validator();
ValidationReport report = validator.validateSpreadsheet(new File(TestFiles.EXTENDED_SPREADSHEET.toURI()).toPath());
assertFalse("Document should NOT be valid.", report.isValid());
assertEquals(2, report.getMessages().stream().filter(m -> m.getId().equals("DOC-8")).count());
assertEquals(0, report.getMessages().stream().filter(m -> m.getId().equals("XML-3")).count());
}
}
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package org.openpreservation.odf.validation.rules;

import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;

import java.io.File;
import java.io.IOException;
Expand All @@ -25,8 +25,8 @@ public void testCheck() throws IOException, URISyntaxException, ParseException {
OdfPackage pkg = parser.parsePackage(Paths.get(new File(TestFiles.EMPTY_ODS.toURI()).getAbsolutePath()));
ProfileResult result = profile.check(pkg);
assertNotNull(result);
assertTrue(result.getValidationReport().isValid());
assertTrue (result.isValid());
assertFalse(result.getValidationReport().isValid());
assertFalse (result.isValid());
}

@Test
Expand Down
Loading

0 comments on commit 4a58004

Please sign in to comment.