Skip to content

Commit

Permalink
Finished minimal working implementation of tdwg/bdq#123 VALIDATION_CL…
Browse files Browse the repository at this point in the history
…ASSIFICATION_CONSISTENT with GBIF and WoRMS authorities. Includes minimal integration test.
  • Loading branch information
chicoreus committed Aug 23, 2022
1 parent 342d0d6 commit 47864c2
Show file tree
Hide file tree
Showing 3 changed files with 190 additions and 104 deletions.
210 changes: 141 additions & 69 deletions src/main/java/org/filteredpush/qc/sciname/DwCSciNameDQ.java
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,9 @@
* #82 VALIDATION_SCIENTIFICNAME_NOTEMPTY 7c4b9498-a8d9-4ebb-85f1-9f200c788595
* #120 VALIDATION_TAXONID_NOTEMPTY 401bf207-9a55-4dff-88a5-abcd58ad97fa
* #161 VALIDATION_TAXONRANK_NOTEMPTY 14da5b87-8304-4b2b-911d-117e3c29e890
* #105 VALIDATION_TAXON_NOTEMPTY ** needs work **
* #105 VALIDATION_TAXON_NOTEMPTY 06851339-843f-4a43-8422-4e61b9a00e75
* #101 VALIDATION_POLYNOMIAL_CONSISTENT 17f03f1f-f74d-40c0-8071-2927cfc9487b
*
* #123 VALIDATION_CLASSIFICATION_CONSISTENT 78640f09-8353-411a-800e-9b6d498fb1c9
* #81 VALIDATION_KINGDOM_FOUND 125b5493-052d-4a0d-a3e1-ed5bf792689e
* #22 VALIDATION_PHYLUM_FOUND eaad41c5-1d46-4917-a08b-4fd1d7ff5c0f
* #77 VALIDATION_CLASS_FOUND 2cd6884e-3d14-4476-94f7-1191cfff309b
Expand All @@ -76,9 +76,6 @@
* a variant of #57 that allows existing taxonID values to be conformed to a specified sourceAuthority
* based on a lookup of the taxon terms on that authority.
*
* Incomplete:
* #123 VALIDATION_CLASSIFICATION_UNAMBIGUOUS 78640f09-8353-411a-800e-9b6d498fb1c9
*
* @author mole
*
*/
Expand Down Expand Up @@ -2090,10 +2087,11 @@ public static DQResponse<ComplianceValue> validateHigherTaxonAtRank(String taxon
return result;
}


/**
* Can the combination of higher classification taxonomic terms be unambiguously resolved using bdq:sourceAuthority?
*
* Provides: VALIDATION_CLASSIFICATION_UNAMBIGUOUS
* Provides: VALIDATION_CLASSIFICATION_CONSISTENT
*
* @param class the provided dwc:class to evaluate
* @param phylum the provided dwc:phylum to evaluate
Expand All @@ -2103,7 +2101,7 @@ public static DQResponse<ComplianceValue> validateHigherTaxonAtRank(String taxon
* @param sourceAuthority in which to look up the taxon
* @return DQResponse the response of type ComplianceValue to return
*/
@Validation(label="VALIDATION_CLASSIFICATION_UNAMBIGUOUS", description="Can the combination of higher classification taxonomic terms be unambiguously resolved using bdq:sourceAuthority?")
@Validation(label="VALIDATION_CLASSIFICATION_CONSISTENT", description="Can the combination of higher classification taxonomic terms be unambiguously resolved using bdq:sourceAuthority?")
@Provides("78640f09-8353-411a-800e-9b6d498fb1c9")
public static DQResponse<ComplianceValue> validationClassificationUnambiguous(
@ActedUpon("dwc:kingdom") String kingdom,
Expand All @@ -2113,70 +2111,144 @@ public static DQResponse<ComplianceValue> validationClassificationUnambiguous(
@ActedUpon("dwc:family") String family,
@Parameter(name="bdq:sourceAuthority") SciNameSourceAuthority sourceAuthority
) {
DQResponse<ComplianceValue> result = new DQResponse<ComplianceValue>();

//TODO: Implement specification
// EXTERNAL_PREREQUISITES_NOT_MET if the bdq:sourceAuthority
// is not available; INTERNAL_PREREQUISITES_NOT_MET if all
// of the fields dwc:kingdom dwc:phylum, dwc:class, dwc:order,
// dwc:family are EMPTY; COMPLIANT if the combination of values
// of higher classification taxonomic terms (dwc:kingdom, dwc:phylum,
// dwc:class, dwc:order, dwc:family) can be unambiguously resolved
// by the bdq:sourceAuthority; otherwise NOT_COMPLIANT bdq:sourceAuthority
// default = "GBIF Backbone Taxonomy" [https://doi.org/10.15468/39omei],
// "API endpoint" [https://api.gbif.org/v1/species?datasetKey=d7dddbf4-2cf0-4f39-9b2a-bb099caae36c&name=]
//
DQResponse<ComplianceValue> result = new DQResponse<ComplianceValue>();

//TODO: Implement specification
// EXTERNAL_PREREQUISITES_NOT_MET if the bdq:sourceAuthority
// is not available; INTERNAL_PREREQUISITES_NOT_MET if all
// of the fields dwc:kingdom dwc:phylum, dwc:class, dwc:order,
// dwc:family are EMPTY; COMPLIANT if the combination of values
// of higher classification taxonomic terms (dwc:kingdom, dwc:phylum,
// dwc:class, dwc:order, dwc:family) can be unambiguously resolved
// by the bdq:sourceAuthority; otherwise NOT_COMPLIANT bdq:sourceAuthority
// default = "GBIF Backbone Taxonomy" [https://doi.org/10.15468/39omei],
// "API endpoint" [https://api.gbif.org/v1/species?datasetKey=d7dddbf4-2cf0-4f39-9b2a-bb099caae36c&name=]
//

// Parameters. This test is defined as parameterized.
// bdq:sourceAuthority default="GBIF Backbone Taxonomy"

if (sourceAuthority==null) {
sourceAuthority = new SciNameSourceAuthority();
}

String lowestRankingTaxon = null;
String lowestRank = null;
if (!SciNameUtils.isEmpty(family)) {
lowestRankingTaxon = family;
lowestRank = "Family";
}
if (lowestRankingTaxon == null && !SciNameUtils.isEmpty(family)) {
lowestRankingTaxon = family;
lowestRank = "Family";
}
if (lowestRankingTaxon == null && !SciNameUtils.isEmpty(order)) {
lowestRankingTaxon = order;
lowestRank = "Order";
}
if (lowestRankingTaxon == null && !SciNameUtils.isEmpty(taxonomic_class)) {
lowestRankingTaxon = taxonomic_class;
lowestRank = "Class";
}
if (lowestRankingTaxon == null && !SciNameUtils.isEmpty(phylum)) {
lowestRankingTaxon = phylum;
lowestRank = "Phylum";
}
if (lowestRankingTaxon == null && !SciNameUtils.isEmpty(kingdom)) {
lowestRankingTaxon = kingdom;
lowestRank = "Kingdom";
}

if (SciNameUtils.isEmpty(lowestRankingTaxon)) {
result.addComment("No value provided for kingdom, phylum, class, order, or family.");
result.setResultState(ResultState.INTERNAL_PREREQUISITES_NOT_MET);
} else {
logger.debug("looking up " + lowestRankingTaxon + " at rank " + lowestRank);
DQResponse<ComplianceValue> lowestLookup = validateHigherTaxonAtRank(lowestRankingTaxon,lowestRank,sourceAuthority);
if (lowestLookup.getResultState()==ResultState.RUN_HAS_RESULT && lowestLookup.getValue().equals(ComplianceValue.COMPLIANT)) {
result.addComment(lowestRank + " " + lowestRankingTaxon + " found in " + sourceAuthority.getName());

try {
List<NameUsage> lookupResult = null;
if (sourceAuthority.isGBIFChecklist()) {
lookupResult = GBIFService.lookupTaxonAtRank(lowestRankingTaxon, sourceAuthority.getAuthoritySubDataset(), lowestRank, 10);
} else if (sourceAuthority.getAuthority().equals(EnumSciNameSourceAuthority.WORMS)) {
lookupResult = WoRMSService.lookupTaxonAtRank(lowestRankingTaxon, lowestRank);
} else {
throw new UnsupportedSourceAuthorityException("Authority " + sourceAuthority.getName() + " Not implemented.");
}
boolean hasMatch = false;
Iterator<NameUsage> i = lookupResult.iterator();
while (i.hasNext()) {
NameUsage aResult = i.next();
logger.debug(aResult.getCanonicalName());
logger.debug(aResult.getKingdom());
logger.debug(aResult.getPhylum());
logger.debug(aResult.getClazz());
logger.debug(aResult.getOrder());
logger.debug(aResult.getFamily());
if (lowestRank.equalsIgnoreCase("Kingdom")) {
hasMatch=true;
} else if (lowestRank.equalsIgnoreCase("Phylum")) {
if (aResult.getKingdom().equals(kingdom)) {
hasMatch=true;
} else if (SciNameUtils.isEmpty(kingdom) || SciNameUtils.isEmpty(aResult.getKingdom())) {
hasMatch=true;
}
} else if (lowestRank.equalsIgnoreCase("Class")) {
if (aResult.getKingdom().equals(kingdom) || SciNameUtils.isEmpty(kingdom) || SciNameUtils.isEmpty(aResult.getKingdom())) {
if (aResult.getPhylum().equals(phylum) || SciNameUtils.isEmpty(phylum) || SciNameUtils.isEmpty(aResult.getPhylum())) {
hasMatch=true;
}
}
} else if (lowestRank.equalsIgnoreCase("Order")) {
if (aResult.getKingdom().equals(kingdom) || SciNameUtils.isEmpty(kingdom) || SciNameUtils.isEmpty(aResult.getKingdom())) {
if (aResult.getPhylum().equals(phylum) || SciNameUtils.isEmpty(phylum) || SciNameUtils.isEmpty(aResult.getPhylum())) {
if (aResult.getClass().equals(taxonomic_class) || SciNameUtils.isEmpty(taxonomic_class) || SciNameUtils.isEmpty(aResult.getClazz())) {
hasMatch=true;
}
}
}
} else if (lowestRank.equalsIgnoreCase("Family")) {
if (aResult.getKingdom().equals(kingdom) || SciNameUtils.isEmpty(kingdom) || SciNameUtils.isEmpty(aResult.getKingdom())) {
if (aResult.getPhylum().equals(phylum) || SciNameUtils.isEmpty(phylum) || SciNameUtils.isEmpty(aResult.getPhylum())) {
if (aResult.getClass().equals(taxonomic_class) || SciNameUtils.isEmpty(taxonomic_class) || SciNameUtils.isEmpty(aResult.getClazz())) {
if (aResult.getOrder().equals(order) || SciNameUtils.isEmpty(order) || SciNameUtils.isEmpty(aResult.getOrder())) {
hasMatch=true;
}
}
}
}
}
if (hasMatch) {
result.addComment("Matching classification found in " + sourceAuthority.getAuthority().getName());
result.setResultState(ResultState.RUN_HAS_RESULT);
result.setValue(ComplianceValue.COMPLIANT);
} else {
result.addComment("No matching classification found in " + sourceAuthority.getAuthority().getName());
result.setResultState(ResultState.RUN_HAS_RESULT);
result.setValue(ComplianceValue.NOT_COMPLIANT);
}
}
} catch (IOException e) {
result.addComment("Error looking up taxon in " + sourceAuthority.getAuthority().getName());
result.setResultState(ResultState.EXTERNAL_PREREQUISITES_NOT_MET);
} catch (UnsupportedSourceAuthorityException e) {
result.addComment("Lookup in source Authority " + sourceAuthority.getAuthority().getName() + " Not implemented");
result.setResultState(ResultState.EXTERNAL_PREREQUISITES_NOT_MET);
} catch (ApiException e) {
result.addComment("Error looking up taxon in " + sourceAuthority.getAuthority().getName());
result.setResultState(ResultState.EXTERNAL_PREREQUISITES_NOT_MET);
}

// Parameters. This test is defined as parameterized.
// bdq:sourceAuthority default="GBIF Backbone Taxonomy"
if (sourceAuthority==null) {
sourceAuthority = new SciNameSourceAuthority();
}

String lowestRankingTaxon = null;
String lowestRank = null;
if (!SciNameUtils.isEmpty(family)) {
lowestRankingTaxon = family;
lowestRank = "Family";
}
if (lowestRankingTaxon == null && !SciNameUtils.isEmpty(family)) {
lowestRankingTaxon = family;
lowestRank = "Family";
}
if (lowestRankingTaxon == null && !SciNameUtils.isEmpty(order)) {
lowestRankingTaxon = order;
lowestRank = "Order";
}
if (lowestRankingTaxon == null && !SciNameUtils.isEmpty(taxonomic_class)) {
lowestRankingTaxon = taxonomic_class;
lowestRank = "Class";
}
if (lowestRankingTaxon == null && !SciNameUtils.isEmpty(phylum)) {
lowestRankingTaxon = phylum;
lowestRank = "Phylum";
}
if (lowestRankingTaxon == null && !SciNameUtils.isEmpty(kingdom)) {
lowestRankingTaxon = kingdom;
lowestRank = "Kingdom";
}

if (SciNameUtils.isEmpty(lowestRankingTaxon)) {
result.addComment("No value provided for kingdom, phylum, class, order, or family.");
result.setResultState(ResultState.INTERNAL_PREREQUISITES_NOT_MET);
} else {
DQResponse<ComplianceValue> lowestLookup = validateHigherTaxonAtRank(lowestRankingTaxon,"lowestRank",sourceAuthority);
if (lowestLookup.getResultState()==ResultState.RUN_HAS_RESULT && lowestLookup.getValue().equals(ComplianceValue.COMPLIANT)) {
result.addComment(lowestRank + " " + lowestRankingTaxon + " found in " + sourceAuthority.getName());


} else {
result.addComment("Value provided for " + lowestRank + " [" + lowestRankingTaxon + "] not found in " + sourceAuthority.getName());
result.setResultState(ResultState.RUN_HAS_RESULT);
result.setValue(ComplianceValue.NOT_COMPLIANT);
}
} else {
result.addComment("Value provided for " + lowestRank + " [" + lowestRankingTaxon + "] not found in " + sourceAuthority.getName());
result.setResultState(ResultState.RUN_HAS_RESULT);
result.setValue(ComplianceValue.NOT_COMPLIANT);
}

}
return result;
}
return result;
}


Expand Down
34 changes: 0 additions & 34 deletions src/main/java/org/filteredpush/qc/sciname/DwCSciNameDQ_stubs.java
Original file line number Diff line number Diff line change
Expand Up @@ -11,40 +11,6 @@
public class DwCSciNameDQ_stubs {


/**
* Can the combination of higher classification taxonomic terms be unambiguously resolved using bdq:sourceAuthority?
*
* Provides: VALIDATION_CLASSIFICATION_UNAMBIGUOUS
*
* @param class the provided dwc:class to evaluate
* @param phylum the provided dwc:phylum to evaluate
* @param kingdom the provided dwc:kingdom to evaluate
* @param family the provided dwc:family to evaluate
* @param order the provided dwc:order to evaluate
* @return DQResponse the response of type ComplianceValue to return
*/
@Validation(label="VALIDATION_CLASSIFICATION_UNAMBIGUOUS", description="Can the combination of higher classification taxonomic terms be unambiguously resolved using bdq:sourceAuthority?")
@Provides("78640f09-8353-411a-800e-9b6d498fb1c9")
public DQResponse<ComplianceValue> validationClassificationUnambiguous(@ActedUpon("dwc:class") String taxonmic_class, @ActedUpon("dwc:phylum") String phylum, @ActedUpon("dwc:kingdom") String kingdom, @ActedUpon("dwc:family") String family, @ActedUpon("dwc:order") String order) {
DQResponse<ComplianceValue> result = new DQResponse<ComplianceValue>();

//TODO: Implement specification
// EXTERNAL_PREREQUISITES_NOT_MET if the bdq:sourceAuthority
// is not available; INTERNAL_PREREQUISITES_NOT_MET if all
// of the fields dwc:kingdom dwc:phylum, dwc:class, dwc:order,
// dwc:family are EMPTY; COMPLIANT if the combination of values
// of higher classification taxonomic terms (dwc:kingdom, dwc:phylum,
// dwc:class, dwc:order, dwc:family) can be unambiguously resolved
// by the bdq:sourceAuthority; otherwise NOT_COMPLIANT bdq:sourceAuthority
// default = "GBIF Backbone Taxonomy" [https://doi.org/10.15468/39omei],
// "API endpoint" [https://api.gbif.org/v1/species?datasetKey=d7dddbf4-2cf0-4f39-9b2a-bb099caae36c&name=]
//

//TODO: Parameters. This test is defined as parameterized.
// bdq:sourceAuthority default="GBIF Backbone Taxonomy"

return result;
}


}
50 changes: 49 additions & 1 deletion src/test/java/org/filteredpush/qc/sciname/DwCSciNameDQ_IT.java
Original file line number Diff line number Diff line change
Expand Up @@ -1017,7 +1017,55 @@ public void testValidationGenusFound() {
*/
@Test
public void testValidationClassificationAmbiguous() {
// TODO: fail("Not yet implemented");

String kingdom="";
String phylum="";
String phylclass="";
String order="";
String family="";
DQResponse<ComplianceValue>result = DwCSciNameDQ.validationClassificationUnambiguous(kingdom, phylum, phylclass, order, family, null);
logger.debug(result.getComment());
assertEquals(ResultState.INTERNAL_PREREQUISITES_NOT_MET.getLabel(), result.getResultState().getLabel());

kingdom="Plantae";
phylum="Magnoliophyta";
phylclass="";
order="";
family="";
result = DwCSciNameDQ.validationClassificationUnambiguous(kingdom, phylum, phylclass, order, family, null);
logger.debug(result.getComment());
assertEquals(ResultState.RUN_HAS_RESULT.getLabel(), result.getResultState().getLabel());
assertEquals(ComplianceValue.COMPLIANT.getLabel(), result.getValue().getLabel());

kingdom="Plantae";
phylum="Magnoliophyta";
phylclass="Mamalia";
order="Carnivora";
family="Muricidae";
result = DwCSciNameDQ.validationClassificationUnambiguous(kingdom, phylum, phylclass, order, family, null);
logger.debug(result.getComment());
assertEquals(ResultState.RUN_HAS_RESULT.getLabel(), result.getResultState().getLabel());
assertEquals(ComplianceValue.NOT_COMPLIANT.getLabel(), result.getValue().getLabel());

kingdom="Animalia";
phylum="Arthropoda";
phylclass="Insecta";
order="Coleoptera";
family="Curculionidae";
result = DwCSciNameDQ.validationClassificationUnambiguous(kingdom, phylum, phylclass, order, family, null);
logger.debug(result.getComment());
assertEquals(ResultState.RUN_HAS_RESULT.getLabel(), result.getResultState().getLabel());
assertEquals(ComplianceValue.COMPLIANT.getLabel(), result.getValue().getLabel());

kingdom=""; phylum="";
phylclass="Magnoliopsida";
order="Myrtales";
family="";
result = DwCSciNameDQ.validationClassificationUnambiguous(kingdom, phylum, phylclass, order, family, null);
logger.debug(result.getComment());
assertEquals(ResultState.RUN_HAS_RESULT.getLabel(), result.getResultState().getLabel());
assertEquals(ComplianceValue.COMPLIANT.getLabel(), result.getValue().getLabel());

}


Expand Down

0 comments on commit 47864c2

Please sign in to comment.