diff --git a/src/main/java/gov/nasa/pds/tools/label/LabelValidator.java b/src/main/java/gov/nasa/pds/tools/label/LabelValidator.java index 7514a39de..c6c08e8e2 100644 --- a/src/main/java/gov/nasa/pds/tools/label/LabelValidator.java +++ b/src/main/java/gov/nasa/pds/tools/label/LabelValidator.java @@ -19,10 +19,14 @@ import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; +import java.nio.file.Paths; import java.util.ArrayList; +import java.util.Arrays; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; import javax.xml.XMLConstants; @@ -81,6 +85,7 @@ import gov.nasa.pds.tools.validate.ValidationProblem; import gov.nasa.pds.validate.constants.Constants; import net.sf.saxon.om.DocumentInfo; +import net.sf.saxon.trans.XPathException; /** * This class is responsible for providing utility functions for validating PDS XML Labels. @@ -120,8 +125,6 @@ public class LabelValidator { private SchematronTransformer schematronTransformer; private XPathFactory xPathFactory; - private String labelExtension; - private long filesProcessed = 0; private double totalTimeElapsed = 0.0; @@ -179,8 +182,6 @@ public LabelValidator() throws ParserConfigurationException, TransformerConfigur // The parser doesn't validate - we use a Validator instead. saxParserFactory.setValidating(false); - setLabelExtension(Constants.DEFAULT_LABEL_EXTENSION); - // Don't add xml:base attributes to xi:include content, or it messes up // PDS4 validation. try { @@ -328,7 +329,6 @@ public synchronized void validate(ProblemHandler handler, URL url) throws SAXExc public synchronized void validate(ProblemHandler handler, URL url, String labelExtension) throws SAXException, IOException, ParserConfigurationException, TransformerException, MissingLabelSchemaException { - this.setLabelExtension(labelExtension); this.parseAndValidate(handler, url); } @@ -383,6 +383,76 @@ private Boolean determineSchematronValidationFlag(URL url) { return (validateAgainstSchematronFlag); } + private void checkSchemaSchematronVersions(ProblemHandler handler, URL url) { + try { + List specifiedSchema = null, specifiedSchematron = null; + XMLExtractor sourceXML = new XMLExtractor(url); + String xmlns = sourceXML.getSchemaLocation(); + specifiedSchematron = sourceXML.getXmlModels(); + if (xmlns != null && 0 < xmlns.strip().length()) + specifiedSchema = Arrays.asList(xmlns.split("\\s")); + if (specifiedSchema == null || specifiedSchema.size() == 0) { + handler.addProblem(new ValidationProblem(new ProblemDefinition(ExceptionType.WARNING, + ProblemType.SCHEMA_WARNING, "Cannot check versioning because no Schema given."), url)); + } else if (specifiedSchematron == null || specifiedSchematron.size() == 0) { + handler.addProblem(new ValidationProblem(new ProblemDefinition(ExceptionType.WARNING, + ProblemType.SCHEMATRON_WARNING, "Cannot check versioning because no Schematron given."), + url)); + } else { + Set schemas = new HashSet(); + Set schematrons = new HashSet(); + for (String schema : specifiedSchema) { + try { + URL schemaURL = new URL(schema); + String schemaName = + Paths.get(schemaURL.getPath()).getFileName().toString().toLowerCase(); + if (schemaName.endsWith(".xsd")) { + schemaName = schemaName.substring(0, schemaName.length() - 4); + schemas.add(schemaName); + } + } catch (MalformedURLException e) { + // Ignore error since XMLExtractor() was able to read it this should be alright + } + } + for (String model : specifiedSchematron) { + for (String part : model.split("\\s")) { + if (part.toLowerCase().startsWith("href")) { + String modelHREF = part.substring(part.indexOf('"') + 1, part.lastIndexOf('"')); + try { + URL schematronURL = new URL(modelHREF); + String schematronName = + Paths.get(schematronURL.getPath()).getFileName().toString().toLowerCase(); + if (schematronName.endsWith(".sch")) { + schematronName = schematronName.substring(0, schematronName.length() - 4); + schematrons.add(schematronName); + } + } catch (MalformedURLException e) { + // Ignore error since XMLExtractor() was able to it this should be alright + } + } + } + } + Set uniqueSchema = new HashSet(schemas); + Set uniqueSchematron = new HashSet(schematrons); + uniqueSchema.removeAll(schematrons); + uniqueSchematron.removeAll(schemas); + if (0 < uniqueSchema.size() + uniqueSchematron.size()) { + handler.addProblem(new ValidationProblem( + new ProblemDefinition(ExceptionType.WARNING, ProblemType.SCHEMA_WARNING, + "The schema version(s) " + uniqueSchema.toString().toUpperCase() + + " does/do not match the schematron version(s) " + + uniqueSchematron.toString().toUpperCase() + "."), + url)); + } + } + } catch (XPathException | XPathExpressionException e) { + handler.addProblem(new ValidationProblem( + new ProblemDefinition(ExceptionType.WARNING, ProblemType.MISSING_REQUIRED_RESOURCE, + "Cannot check versioning because XML could not be parsed."), + url)); + } + } + /** * Parses and validates a label against the schema and Schematron files, and returns the parsed * XML. @@ -415,6 +485,7 @@ public synchronized Document parseAndValidate(ProblemHandler handler, URL url) // Are we perfoming schema validation? if (performsSchemaValidation()) { createParserIfNeeded(handler); + checkSchemaSchematronVersions(handler, url); // Do we need this to clear the cache? @@ -970,10 +1041,6 @@ public void setCachedLSResourceResolver(CachedLSResourceResolver resolver) { this.cachedLSResolver = resolver; } - public void setLabelExtension(String extension) { - this.labelExtension = extension; - } - public Pattern getBundleLabelPattern() { return bundleLabelPattern; } @@ -1007,7 +1074,6 @@ public static void main(String[] args) throws Exception { */ private static class DOMLocator implements Locator { - private URL url; private int lineNumber; private int columnNumber; private String systemId; @@ -1018,7 +1084,6 @@ private static class DOMLocator implements Locator { * @param url the URL of the source document */ public DOMLocator(URL url) { - this.url = url; this.systemId = url.toString(); } diff --git a/src/test/resources/features/validate.feature b/src/test/resources/features/validate.feature index fdc7ed646..ac34d31bb 100644 --- a/src/test/resources/features/validate.feature +++ b/src/test/resources/features/validate.feature @@ -9,6 +9,9 @@ Scenario Outline: Execute validate command for tests below. Examples: | testName | testDir | messageCount | messageText | problemEnum | resourceDir | reportDir | commandArgs | refOutputValue | +# Validate#628 +|"NASA-PDS/validate#628 Warning Version Mismatch" | "github628" | 1 | "1 warnings expected" | "totalWarnings" | "src/test/resources" | "target/test" | "-r {reportDir}/report_github628.json -s json --skip-content-validation -t {resourceDir}/github628/mp2_flat_20061109.xml" | "report_github628.json" | + # Validate#616 |"NASA-PDS/validate#616 Success Multiple Tables One File" | "github616" | 0 | "0 errors expected" | "totalErrors" | "src/test/resources" | "target/test" | "-r {reportDir}/report_github616.json -s json --skip-context-validation -t {resourceDir}/github616/mre_cal_sc_ttcp_delay_schulte_01s_2021069.xml" | "report_github616.json" | @@ -90,7 +93,7 @@ Scenario Outline: Execute validate command for tests below. # So, the 1st error is the file name contains spaces from FileAndDirectoryNamingRule and the 2nd error comes from function validateFileStandardConformity(). |"NASA-PDS/validate#153 1" | "github153" | 2 | "2 error messages expected." | "totalErrors" | "src/test/resources" | "target/test" | "--skip-context-validation -R pds4.label -r {reportDir}/report_github153_invalid.json -s json -t {resourceDir}/github153/iue_asteroid_spectra/document/3juno_lwr01896_ines_fits_headers.pdfa%.xml" | "report_github153_invalid.json" | |"NASA-PDS/validate#153 2" | "github153" | 0 | "No error messages expected." | "totalErrors" | "src/test/resources" | "target/test" | "--skip-context-validation -R pds4.label -r {reportDir}/report_github153_valid.json -s json -t {resourceDir}/github153/iue_asteroid_spectra/document/collection_iue_asteroid_spectra_document.xml" | "report_github153_valid.json" | - |"NASA-PDS/validate#17 1" | "github17" | 2 | "2 warning expected." | "totalWarnings" | "src/test/resources" | "target/test" | "--skip-context-validation -R pds4.label -r {reportDir}/report_github17_invalid.json -s json -t {resourceDir}/github17/delimited_table_bad.xml" | "report_github17_invalid.json" | + |"NASA-PDS/validate#17 1" | "github17" | 3 | "3 warning expected." | "totalWarnings" | "src/test/resources" | "target/test" | "--skip-context-validation -R pds4.label -r {reportDir}/report_github17_invalid.json -s json -t {resourceDir}/github17/delimited_table_bad.xml" | "report_github17_invalid.json" | |"NASA-PDS/validate#17 2" | "github17" | 0 | "No error messages expected." | "totalErrors" | "src/test/resources" | "target/test" | "--skip-context-validation -R pds4.label -r {reportDir}/report_github17_valid.json -s json -t {resourceDir}/github17/delimited_table_good.xml" | "report_github17_valid.json" | |"NASA-PDS/validate#230 1" | "github230" | 2 | "2 errors expected for MISSING_VERSION." | "MISSING_VERSION" | "src/test/resources" | "target/test" | "--skip-content-validation -R pds4.bundle -r {reportDir}/report_github230_invalid.json -s json -t {resourceDir}/github230/invalid/cocirs_c2h4abund/bundle_cocirs_c2h4abund.xml" | "report_github230_invalid.json" | diff --git a/src/test/resources/github628/mp2_flat_20061109.xml b/src/test/resources/github628/mp2_flat_20061109.xml new file mode 100644 index 000000000..eefa04718 --- /dev/null +++ b/src/test/resources/github628/mp2_flat_20061109.xml @@ -0,0 +1,93 @@ + + + + + + + urn:nasa:pds:nh_mvic:calibration_files:mp2_flat + 1.0 + Flat Field for the New Horizons MVIC Instrument Panchromatic CCD #2 in Time Delay Integration (TDI) Mode, 2006-11-09 version + 1.18.0.0 + Product_Ancillary + + + 2022-11 + 1.0 + + A. Raugh: Migrated from PDS3. Note that there are later version + of this file, used to produce MVIC collections later + in the mission lifetime. Labels for calibrated MVIC + data should reference the specific version used in + producing that product. + + + + + + + + Calibration + Derived + Flat Field + + Visible + Imaging + + + + + New Horizons Kuiper Belt Extended Mission 1 + Mission + + urn:nasa:pds:context:investigation:mission.new_horizons_kem1 + ancillary_to_investigation + + + + + + Multispectral Visible Imaging Camera (MVIC) for New Horizons + Instrument + + urn:nasa:pds:context:instrument:nh.mvic + is_instrument + + + + + + + + mp2_flat_20061109.fit + + This file contains the flat field used in calibrating the MVIC + panchromatic CCD #2 image data. + + The digits in the file name indicate the approximate date (YYYYMMDD) + of the delivery of the file for use in pipelines. + + +
+ 0 + 2880 + FITS 3.0 +
+ + FlatField + 2880 + 1 + Last Index Fastest + + IEEE754MSBDouble + + + Sample + 5024 + 1 + + +
+ +