-
Notifications
You must be signed in to change notification settings - Fork 11
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add WARNING for schema/schematron version mismatch #634
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||||
---|---|---|---|---|---|---|---|---|---|---|
|
@@ -19,10 +19,14 @@ | |||||||||
import java.io.IOException; | ||||||||||
import java.net.MalformedURLException; | ||||||||||
import java.net.URL; | ||||||||||
import java.nio.file.Paths; | ||||||||||
import java.util.ArrayList; | ||||||||||
import java.util.Arrays; | ||||||||||
import java.util.HashMap; | ||||||||||
import java.util.HashSet; | ||||||||||
import java.util.List; | ||||||||||
import java.util.Map; | ||||||||||
import java.util.Set; | ||||||||||
import java.util.regex.Matcher; | ||||||||||
import java.util.regex.Pattern; | ||||||||||
import javax.xml.XMLConstants; | ||||||||||
|
@@ -81,6 +85,7 @@ | |||||||||
import gov.nasa.pds.tools.validate.ValidationProblem; | ||||||||||
import gov.nasa.pds.validate.constants.Constants; | ||||||||||
import net.sf.saxon.om.DocumentInfo; | ||||||||||
import net.sf.saxon.trans.XPathException; | ||||||||||
|
||||||||||
/** | ||||||||||
* This class is responsible for providing utility functions for validating PDS XML Labels. | ||||||||||
|
@@ -120,8 +125,6 @@ public class LabelValidator { | |||||||||
private SchematronTransformer schematronTransformer; | ||||||||||
private XPathFactory xPathFactory; | ||||||||||
|
||||||||||
private String labelExtension; | ||||||||||
|
||||||||||
private long filesProcessed = 0; | ||||||||||
private double totalTimeElapsed = 0.0; | ||||||||||
|
||||||||||
|
@@ -179,8 +182,6 @@ public LabelValidator() throws ParserConfigurationException, TransformerConfigur | |||||||||
// The parser doesn't validate - we use a Validator instead. | ||||||||||
saxParserFactory.setValidating(false); | ||||||||||
|
||||||||||
setLabelExtension(Constants.DEFAULT_LABEL_EXTENSION); | ||||||||||
|
||||||||||
// Don't add xml:base attributes to xi:include content, or it messes up | ||||||||||
// PDS4 validation. | ||||||||||
try { | ||||||||||
|
@@ -328,7 +329,6 @@ public synchronized void validate(ProblemHandler handler, URL url) throws SAXExc | |||||||||
public synchronized void validate(ProblemHandler handler, URL url, String labelExtension) | ||||||||||
throws SAXException, IOException, ParserConfigurationException, TransformerException, | ||||||||||
MissingLabelSchemaException { | ||||||||||
this.setLabelExtension(labelExtension); | ||||||||||
this.parseAndValidate(handler, url); | ||||||||||
|
||||||||||
} | ||||||||||
|
@@ -383,6 +383,76 @@ private Boolean determineSchematronValidationFlag(URL url) { | |||||||||
return (validateAgainstSchematronFlag); | ||||||||||
} | ||||||||||
|
||||||||||
private void checkSchemaSchematronVersions(ProblemHandler handler, URL url) { | ||||||||||
try { | ||||||||||
List<String> specifiedSchema = null, specifiedSchematron = null; | ||||||||||
XMLExtractor sourceXML = new XMLExtractor(url); | ||||||||||
String xmlns = sourceXML.getSchemaLocation(); | ||||||||||
specifiedSchematron = sourceXML.getXmlModels(); | ||||||||||
if (xmlns != null && 0 < xmlns.strip().length()) | ||||||||||
specifiedSchema = Arrays.asList(xmlns.split("\\s")); | ||||||||||
if (specifiedSchema == null || specifiedSchema.size() == 0) { | ||||||||||
handler.addProblem(new ValidationProblem(new ProblemDefinition(ExceptionType.WARNING, | ||||||||||
ProblemType.SCHEMA_WARNING, "Cannot check versioning because no Schema given."), url)); | ||||||||||
} else if (specifiedSchematron == null || specifiedSchematron.size() == 0) { | ||||||||||
handler.addProblem(new ValidationProblem(new ProblemDefinition(ExceptionType.WARNING, | ||||||||||
ProblemType.SCHEMATRON_WARNING, "Cannot check versioning because no Schematron given."), | ||||||||||
url)); | ||||||||||
} else { | ||||||||||
Set<String> schemas = new HashSet<String>(); | ||||||||||
Set<String> schematrons = new HashSet<String>(); | ||||||||||
for (String schema : specifiedSchema) { | ||||||||||
try { | ||||||||||
URL schemaURL = new URL(schema); | ||||||||||
String schemaName = | ||||||||||
Paths.get(schemaURL.getPath()).getFileName().toString().toLowerCase(); | ||||||||||
if (schemaName.endsWith(".xsd")) { | ||||||||||
schemaName = schemaName.substring(0, schemaName.length() - 4); | ||||||||||
schemas.add(schemaName); | ||||||||||
} | ||||||||||
} catch (MalformedURLException e) { | ||||||||||
// Ignore error since XMLExtractor() was able to read it this should be alright | ||||||||||
} | ||||||||||
} | ||||||||||
for (String model : specifiedSchematron) { | ||||||||||
for (String part : model.split("\\s")) { | ||||||||||
if (part.toLowerCase().startsWith("href")) { | ||||||||||
String modelHREF = part.substring(part.indexOf('"') + 1, part.lastIndexOf('"')); | ||||||||||
try { | ||||||||||
URL schematronURL = new URL(modelHREF); | ||||||||||
String schematronName = | ||||||||||
Paths.get(schematronURL.getPath()).getFileName().toString().toLowerCase(); | ||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. PATH_TRAVERSAL_IN: This API (java/nio/file/Paths.get(Ljava/lang/String;[Ljava/lang/String;)Ljava/nio/file/Path;) reads a file whose location might be specified by user input ℹ️ Expand to see all @sonatype-lift commandsYou can reply with the following commands. For example, reply with @sonatype-lift ignoreall to leave out all findings.
Note: When talking to LiftBot, you need to refresh the page to see its response. |
||||||||||
if (schematronName.endsWith(".sch")) { | ||||||||||
schematronName = schematronName.substring(0, schematronName.length() - 4); | ||||||||||
schematrons.add(schematronName); | ||||||||||
} | ||||||||||
} catch (MalformedURLException e) { | ||||||||||
// Ignore error since XMLExtractor() was able to it this should be alright | ||||||||||
} | ||||||||||
} | ||||||||||
} | ||||||||||
} | ||||||||||
Set<String> uniqueSchema = new HashSet<String>(schemas); | ||||||||||
Set<String> uniqueSchematron = new HashSet<String>(schematrons); | ||||||||||
uniqueSchema.removeAll(schematrons); | ||||||||||
uniqueSchematron.removeAll(schemas); | ||||||||||
if (0 < uniqueSchema.size() + uniqueSchematron.size()) { | ||||||||||
handler.addProblem(new ValidationProblem( | ||||||||||
new ProblemDefinition(ExceptionType.WARNING, ProblemType.SCHEMA_WARNING, | ||||||||||
"The schema version(s) " + uniqueSchema.toString().toUpperCase() | ||||||||||
+ " does/do not match the schematron version(s) " | ||||||||||
+ uniqueSchematron.toString().toUpperCase() + "."), | ||||||||||
url)); | ||||||||||
} | ||||||||||
} | ||||||||||
} catch (XPathException | XPathExpressionException e) { | ||||||||||
handler.addProblem(new ValidationProblem( | ||||||||||
new ProblemDefinition(ExceptionType.WARNING, ProblemType.MISSING_REQUIRED_RESOURCE, | ||||||||||
"Cannot check versioning because XML could not be parsed."), | ||||||||||
url)); | ||||||||||
} | ||||||||||
} | ||||||||||
|
||||||||||
/** | ||||||||||
* Parses and validates a label against the schema and Schematron files, and returns the parsed | ||||||||||
* XML. | ||||||||||
|
@@ -415,6 +485,7 @@ public synchronized Document parseAndValidate(ProblemHandler handler, URL url) | |||||||||
// Are we perfoming schema validation? | ||||||||||
if (performsSchemaValidation()) { | ||||||||||
createParserIfNeeded(handler); | ||||||||||
checkSchemaSchematronVersions(handler, url); | ||||||||||
|
||||||||||
// Do we need this to clear the cache? | ||||||||||
|
||||||||||
|
@@ -970,10 +1041,6 @@ public void setCachedLSResourceResolver(CachedLSResourceResolver resolver) { | |||||||||
this.cachedLSResolver = resolver; | ||||||||||
} | ||||||||||
|
||||||||||
public void setLabelExtension(String extension) { | ||||||||||
this.labelExtension = extension; | ||||||||||
} | ||||||||||
|
||||||||||
public Pattern getBundleLabelPattern() { | ||||||||||
return bundleLabelPattern; | ||||||||||
} | ||||||||||
|
@@ -1007,7 +1074,6 @@ public static void main(String[] args) throws Exception { | |||||||||
*/ | ||||||||||
private static class DOMLocator implements Locator { | ||||||||||
|
||||||||||
private URL url; | ||||||||||
private int lineNumber; | ||||||||||
private int columnNumber; | ||||||||||
private String systemId; | ||||||||||
|
@@ -1018,7 +1084,6 @@ private static class DOMLocator implements Locator { | |||||||||
* @param url the URL of the source document | ||||||||||
*/ | ||||||||||
public DOMLocator(URL url) { | ||||||||||
this.url = url; | ||||||||||
this.systemId = url.toString(); | ||||||||||
} | ||||||||||
|
||||||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
<?xml version="1.0" encoding="UTF-8"?> | ||
<?xml-model href="https://pds.nasa.gov/pds4/pds/v1/PDS4_PDS_1I00.sch" schematypens="http://purl.oclc.org/dsdl/schematron"?> | ||
|
||
<Product_Ancillary xmlns="http://pds.nasa.gov/pds4/pds/v1" | ||
xmlns:pds="http://pds.nasa.gov/pds4/pds/v1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | ||
xsi:schemaLocation="http://pds.nasa.gov/pds4/pds/v1 https://pds.nasa.gov/pds4/pds/v1/PDS4_PDS_1J00.xsd"> | ||
|
||
<Identification_Area> | ||
<logical_identifier>urn:nasa:pds:nh_mvic:calibration_files:mp2_flat</logical_identifier> | ||
<version_id>1.0</version_id> | ||
<title>Flat Field for the New Horizons MVIC Instrument Panchromatic CCD #2 in Time Delay Integration (TDI) Mode, 2006-11-09 version </title> | ||
<information_model_version>1.18.0.0</information_model_version> | ||
<product_class>Product_Ancillary</product_class> | ||
<Modification_History> | ||
<Modification_Detail> | ||
<modification_date>2022-11</modification_date> | ||
<version_id>1.0</version_id> | ||
<description> | ||
A. Raugh: Migrated from PDS3. Note that there are later version | ||
of this file, used to produce MVIC collections later | ||
in the mission lifetime. Labels for calibrated MVIC | ||
data should reference the specific version used in | ||
producing that product. | ||
</description> | ||
</Modification_Detail> | ||
</Modification_History> | ||
</Identification_Area> | ||
|
||
<Context_Area> | ||
<Primary_Result_Summary> | ||
<purpose>Calibration</purpose> | ||
<processing_level>Derived</processing_level> | ||
<description>Flat Field</description> | ||
<Science_Facets> | ||
<wavelength_range>Visible</wavelength_range> | ||
<discipline_name>Imaging</discipline_name> | ||
</Science_Facets> | ||
</Primary_Result_Summary> | ||
|
||
<Investigation_Area> | ||
<name>New Horizons Kuiper Belt Extended Mission 1</name> | ||
<type>Mission</type> | ||
<Internal_Reference> | ||
<lid_reference>urn:nasa:pds:context:investigation:mission.new_horizons_kem1</lid_reference> | ||
<reference_type>ancillary_to_investigation</reference_type> | ||
</Internal_Reference> | ||
</Investigation_Area> | ||
|
||
<Observing_System> | ||
<Observing_System_Component> | ||
<name>Multispectral Visible Imaging Camera (MVIC) for New Horizons</name> | ||
<type>Instrument</type> | ||
<Internal_Reference> | ||
<lid_reference>urn:nasa:pds:context:instrument:nh.mvic</lid_reference> | ||
<reference_type>is_instrument</reference_type> | ||
</Internal_Reference> | ||
</Observing_System_Component> | ||
</Observing_System> | ||
</Context_Area> | ||
|
||
<File_Area_Ancillary> | ||
<File> | ||
<file_name>mp2_flat_20061109.fit</file_name> | ||
<comment> | ||
This file contains the flat field used in calibrating the MVIC | ||
panchromatic CCD #2 image data. | ||
|
||
The digits in the file name indicate the approximate date (YYYYMMDD) | ||
of the delivery of the file for use in pipelines. | ||
</comment> | ||
</File> | ||
<Header> | ||
<offset unit="byte">0</offset> | ||
<object_length unit="byte">2880</object_length> | ||
<parsing_standard_id>FITS 3.0</parsing_standard_id> | ||
</Header> | ||
<Array_1D> | ||
<local_identifier>FlatField</local_identifier> | ||
<offset unit="byte">2880</offset> | ||
<axes>1</axes> | ||
<axis_index_order>Last Index Fastest</axis_index_order> | ||
<Element_Array> | ||
<data_type>IEEE754MSBDouble</data_type> | ||
</Element_Array> | ||
<Axis_Array> | ||
<axis_name>Sample</axis_name> | ||
<elements>5024</elements> | ||
<sequence_number>1</sequence_number> | ||
</Axis_Array> | ||
</Array_1D> | ||
</File_Area_Ancillary> | ||
|
||
</Product_Ancillary> |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
PATH_TRAVERSAL_IN: This API (java/nio/file/Paths.get(Ljava/lang/String;[Ljava/lang/String;)Ljava/nio/file/Path;) reads a file whose location might be specified by user input
ℹ️ Expand to see all @sonatype-lift commands
You can reply with the following commands. For example, reply with @sonatype-lift ignoreall to leave out all findings.
@sonatype-lift ignore
@sonatype-lift ignoreall
@sonatype-lift exclude <file|issue|path|tool>
file|issue|path|tool
from Lift findings by updating your config.toml fileNote: When talking to LiftBot, you need to refresh the page to see its response.
Click here to add LiftBot to another repo.