From df786a07a892cac32fe0e683a152d6dbb8fc8760 Mon Sep 17 00:00:00 2001 From: Adam Collins Date: Wed, 1 Mar 2023 14:20:58 +1000 Subject: [PATCH 1/4] match species lists webservices --- README.md | 2 +- .../ala/sds/model/SDSSpeciesListItemDTO.java | 32 +++++++-- .../au/org/ala/sds/util/AUWorkarounds.java | 2 +- .../au/org/ala/sds/util/Configuration.java | 2 +- .../sds/util/SensitiveSpeciesXmlBuilder.java | 8 +-- .../au/org/ala/sds/util/SpeciesListUtil.java | 65 ++++++++++++++----- 6 files changed, 81 insertions(+), 30 deletions(-) diff --git a/README.md b/README.md index d5772fad..701ff9f1 100644 --- a/README.md +++ b/README.md @@ -55,7 +55,7 @@ data service. The disadvantage to this is you will not automatically get the new * cache-data - the location in which to cache the sensitive species data, by default /data/sds/species-cache.ser - * spatial-layer-list - the list of spatial layer ids that will be used for spatial layer processing. The default value for the property is `cl932,cl927,cl23,cl937,cl941,cl938,cl939,cl936,cl940,cl963,cl962,cl961,cl960,cl964,cl965,cl22,cl10925` and can be configured using the property key `sds.spatial.layers`. + * spatial-layer-list - the list of spatial layer ids that will be used for spatial layer processing. The default value for the property is `cl932,cl927,cl959,cl937,cl941,cl938,cl939,cl936,cl940,cl963,cl962,cl961,cl960,cl964,cl965,cl22,cl10925` and can be configured using the property key `sds.spatial.layers`. * spatial-layer-ws - the URL to test for intersection of spatial layers. This is used by the SDS if the required layer values are not provided in the data. The biocache will always provide the layer information to prevent WS bottleneck. The default diff --git a/src/main/java/au/org/ala/sds/model/SDSSpeciesListItemDTO.java b/src/main/java/au/org/ala/sds/model/SDSSpeciesListItemDTO.java index 4e8943b8..93d6c4d8 100644 --- a/src/main/java/au/org/ala/sds/model/SDSSpeciesListItemDTO.java +++ b/src/main/java/au/org/ala/sds/model/SDSSpeciesListItemDTO.java @@ -15,6 +15,7 @@ package au.org.ala.sds.model; import com.google.common.collect.Lists; +import org.codehaus.jackson.annotate.JsonIgnore; import java.util.List; import java.util.Map; @@ -31,6 +32,7 @@ public class SDSSpeciesListItemDTO { private String dataResourceUid; private List> kvpValues; public static final List commonNameLabels= Lists.newArrayList("commonname","vernacularname"); + private String commonName; public String getGuid() { return guid; @@ -40,6 +42,19 @@ public void setGuid(String guid) { this.guid = guid; } + public String getCommonName() { + return commonName; + } + + public void setCommonName(String commonName) { + this.commonName = commonName; + } + + // Added for the change data structure returned by lists + public void setLsid(String lsid) { + this.guid = lsid; + } + public String getName() { return name; } @@ -70,28 +85,35 @@ public void setFamily(String family) { public void setKvpValues(List> kvpValues) { this.kvpValues = kvpValues; + + // family has moved to kvpValues + for(Map pair: kvpValues){ + if("family".equalsIgnoreCase(pair.get("key"))){ + setFamily(pair.get("value")); + } + } } public String getKVPValue(String key){ for(Map pair: kvpValues){ - if(key.equals(pair.get("key"))){ + if(key.equalsIgnoreCase(pair.get("key").replaceAll("[^a-zA-Z]", ""))){ return pair.get("value"); } } return null; } - public String getKVPValue(List keys){ + public String getKVPValueCommonName(){ for(Map pair: kvpValues){ - if(keys.contains(pair.get("key").toLowerCase().replaceAll(" ", ""))){ + if(commonNameLabels.contains(pair.get("key").toLowerCase().replaceAll(" ", ""))){ return pair.get("value"); } } - return null; + return commonName; } @Override public String toString() { return "SDSSpeciesListItemDTO{" + - "guid='" + guid + '\'' + + "lsid='" + guid + '\'' + ", name='" + name + '\'' + ", dataResourceUid='" + dataResourceUid + '\'' + ", kvpValues=" + kvpValues + diff --git a/src/main/java/au/org/ala/sds/util/AUWorkarounds.java b/src/main/java/au/org/ala/sds/util/AUWorkarounds.java index cba60bb7..6222982e 100644 --- a/src/main/java/au/org/ala/sds/util/AUWorkarounds.java +++ b/src/main/java/au/org/ala/sds/util/AUWorkarounds.java @@ -13,7 +13,7 @@ public class AUWorkarounds { public final static String COASTAL_WATERS_LAYER = "cl927"; - public final static String LGA_BOUNDARIES_LAYER = "cl23"; + public final static String LGA_BOUNDARIES_LAYER = "cl959"; final static String TSPZ_LAYER = "cl937"; final static String TSSQZ_LAYER = "cl941"; final static String FFEZ_TRI_STATE_LAYER = "cl938"; diff --git a/src/main/java/au/org/ala/sds/util/Configuration.java b/src/main/java/au/org/ala/sds/util/Configuration.java index 900ae03c..d685d70e 100644 --- a/src/main/java/au/org/ala/sds/util/Configuration.java +++ b/src/main/java/au/org/ala/sds/util/Configuration.java @@ -54,7 +54,7 @@ private Configuration() throws Exception { spatialLayers = new ArrayList(); String configList = config.getProperty("sds.spatial.layers", - "cl932,cl927,cl23,cl937,cl941,cl938,cl939,cl936,cl940,cl963,cl962,cl961,cl960,cl964,cl965,cl22,cl10925"); + "cl932,cl927,cl959,cl937,cl941,cl938,cl939,cl936,cl940,cl963,cl962,cl961,cl960,cl964,cl965,cl22,cl10925"); for(String layerId : configList.split(",")){ spatialLayers.add(layerId.trim()); } diff --git a/src/main/java/au/org/ala/sds/util/SensitiveSpeciesXmlBuilder.java b/src/main/java/au/org/ala/sds/util/SensitiveSpeciesXmlBuilder.java index 8cf2b7d6..3367f56a 100644 --- a/src/main/java/au/org/ala/sds/util/SensitiveSpeciesXmlBuilder.java +++ b/src/main/java/au/org/ala/sds/util/SensitiveSpeciesXmlBuilder.java @@ -71,7 +71,7 @@ public static boolean generateFromWebservices(OutputStream out, Date lastGenerat } //Step 1: get all of the items that have a guid - List guidItems = SpeciesListUtil.getSDSListItems(true); + List guidItems = SpeciesListUtil.getSDSListItems(sdsLists.keySet(), true); if(sdsLists.isEmpty() || guidItems == null || guidItems.isEmpty()) { return false; } @@ -102,7 +102,7 @@ public static boolean generateFromWebservices(OutputStream out, Date lastGenerat } sensitiveSpecies.setAttribute("guid", item.getGuid()); sensitiveSpecies.setAttribute("rank", rank); - String commonName = item.getKVPValue(item.commonNameLabels); + String commonName = item.getKVPValueCommonName(); sensitiveSpecies.setAttribute("commonName", commonName != null ? commonName : ""); doc.getRootElement().addContent(sensitiveSpecies); currentGuid = item.getGuid(); @@ -121,7 +121,7 @@ public static boolean generateFromWebservices(OutputStream out, Date lastGenerat } //Step 2: get all the items that could NOT be matched to the current species list - List unmatchedItems = SpeciesListUtil.getSDSListItems(false); + List unmatchedItems = SpeciesListUtil.getSDSListItems(sdsLists.keySet(), false); String currentName = ""; sensitiveSpecies = null; instances = null; @@ -143,7 +143,7 @@ public static boolean generateFromWebservices(OutputStream out, Date lastGenerat logger.error("Unable to get rank for " + item.getName(), e); } sensitiveSpecies.setAttribute("rank", rank); - String commonName = item.getKVPValue(item.commonNameLabels); + String commonName = item.getKVPValueCommonName(); sensitiveSpecies.setAttribute("commonName", commonName != null ? commonName : ""); //sensitiveSpecies.setAttribute("commonName", st.getCommonName() != null ? st.getCommonName() : ""); doc.getRootElement().addContent(sensitiveSpecies); diff --git a/src/main/java/au/org/ala/sds/util/SpeciesListUtil.java b/src/main/java/au/org/ala/sds/util/SpeciesListUtil.java index 8e3936cc..337d130a 100644 --- a/src/main/java/au/org/ala/sds/util/SpeciesListUtil.java +++ b/src/main/java/au/org/ala/sds/util/SpeciesListUtil.java @@ -17,6 +17,7 @@ import au.org.ala.sds.model.SDSSpeciesListDTO; import au.org.ala.sds.model.SDSSpeciesListItemDTO; +import org.apache.commons.lang3.StringUtils; import org.apache.log4j.Logger; import org.codehaus.jackson.map.DeserializationConfig; import org.codehaus.jackson.map.ObjectMapper; @@ -25,8 +26,7 @@ import java.io.InputStream; import java.net.URL; import java.net.URLConnection; -import java.util.List; -import java.util.Map; +import java.util.*; /** * Provides utility methods required to interface with the species list tool. Allows the SDS @@ -83,21 +83,50 @@ public static Map getSDSLists(){ * Retrieves the "isSDS" species list items ordering them by guid/scientific name * @return */ - public static List getSDSListItems(boolean hasMatch){ + public static List getSDSListItems(Collection dataResourceUids, boolean hasMatch){ try{ - String suffix = hasMatch ? "&guid=isNotNull:guid&sort=guid" : "&guid=isNull:guid&sort=rawScientificName"; - URL url = new URL(Configuration.getInstance().getListToolUrl() + "/ws/speciesListItems?isSDS=eq:true" + suffix); - ObjectMapper mapper = new ObjectMapper(); - mapper.configure(DeserializationConfig.Feature.FAIL_ON_UNKNOWN_PROPERTIES, false); - URLConnection connection = url.openConnection(); - logger.debug("Looking up location using " + url); - InputStream inStream = connection.getInputStream(); + String suffix = hasMatch ? "&sort=guid" : "&sort=rawScientificName"; + List values = new ArrayList(); - java.util.List values = mapper.readValue( - inStream, - new TypeReference>(){} - ); - logger.debug(values); + String drUids = StringUtils.join(dataResourceUids, ","); + int offset = 0; + int max = 400; + boolean moreRecords = true; + while(moreRecords) { + URL url = new URL(Configuration.getInstance().getListToolUrl() + "/ws/speciesListItems?isSDS=eq:true" + suffix + "&druid=" + drUids + "&includeKVP=true&max=" + max + "&offset=" + offset); + offset += max; + + ObjectMapper mapper = new ObjectMapper(); + mapper.configure(DeserializationConfig.Feature.FAIL_ON_UNKNOWN_PROPERTIES, false); + URLConnection connection = url.openConnection(); + logger.error("Looking up location using " + url); + InputStream inStream = connection.getInputStream(); + + java.util.List drValues = mapper.readValue( + inStream, + new TypeReference>() { + } + ); + + if (!hasMatch) { + // include only records without an LSID + for (SDSSpeciesListItemDTO item : drValues) { + if (item.getGuid() == null) { + values.add(item); + } + } + } else { + // include only records with an LSID + for (SDSSpeciesListItemDTO item : drValues) { + if (item.getGuid() != null) { + values.add(item); + } + } + } + + moreRecords = drValues.size() == max; + } + logger.error(values); return values; } catch(Exception e){ logger.error("Unable to get the list items. ", e); @@ -106,8 +135,8 @@ public static List getSDSListItems(boolean hasMatch){ } public static void main(String[] args){ - getSDSLists(); - getSDSListItems(true); - getSDSListItems(false); + Map sdsLists = getSDSLists(); + getSDSListItems(sdsLists.keySet(), true); + getSDSListItems(sdsLists.keySet(), false); } } From 6d450d222ec40a53a3e0d9f7caa9effac42f5879 Mon Sep 17 00:00:00 2001 From: Adam Collins Date: Thu, 2 Mar 2023 09:06:41 +1000 Subject: [PATCH 2/4] revert to cl23 --- README.md | 2 +- src/main/java/au/org/ala/sds/util/AUWorkarounds.java | 2 +- src/main/java/au/org/ala/sds/util/Configuration.java | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 701ff9f1..d5772fad 100644 --- a/README.md +++ b/README.md @@ -55,7 +55,7 @@ data service. The disadvantage to this is you will not automatically get the new * cache-data - the location in which to cache the sensitive species data, by default /data/sds/species-cache.ser - * spatial-layer-list - the list of spatial layer ids that will be used for spatial layer processing. The default value for the property is `cl932,cl927,cl959,cl937,cl941,cl938,cl939,cl936,cl940,cl963,cl962,cl961,cl960,cl964,cl965,cl22,cl10925` and can be configured using the property key `sds.spatial.layers`. + * spatial-layer-list - the list of spatial layer ids that will be used for spatial layer processing. The default value for the property is `cl932,cl927,cl23,cl937,cl941,cl938,cl939,cl936,cl940,cl963,cl962,cl961,cl960,cl964,cl965,cl22,cl10925` and can be configured using the property key `sds.spatial.layers`. * spatial-layer-ws - the URL to test for intersection of spatial layers. This is used by the SDS if the required layer values are not provided in the data. The biocache will always provide the layer information to prevent WS bottleneck. The default diff --git a/src/main/java/au/org/ala/sds/util/AUWorkarounds.java b/src/main/java/au/org/ala/sds/util/AUWorkarounds.java index 6222982e..cba60bb7 100644 --- a/src/main/java/au/org/ala/sds/util/AUWorkarounds.java +++ b/src/main/java/au/org/ala/sds/util/AUWorkarounds.java @@ -13,7 +13,7 @@ public class AUWorkarounds { public final static String COASTAL_WATERS_LAYER = "cl927"; - public final static String LGA_BOUNDARIES_LAYER = "cl959"; + public final static String LGA_BOUNDARIES_LAYER = "cl23"; final static String TSPZ_LAYER = "cl937"; final static String TSSQZ_LAYER = "cl941"; final static String FFEZ_TRI_STATE_LAYER = "cl938"; diff --git a/src/main/java/au/org/ala/sds/util/Configuration.java b/src/main/java/au/org/ala/sds/util/Configuration.java index d685d70e..900ae03c 100644 --- a/src/main/java/au/org/ala/sds/util/Configuration.java +++ b/src/main/java/au/org/ala/sds/util/Configuration.java @@ -54,7 +54,7 @@ private Configuration() throws Exception { spatialLayers = new ArrayList(); String configList = config.getProperty("sds.spatial.layers", - "cl932,cl927,cl959,cl937,cl941,cl938,cl939,cl936,cl940,cl963,cl962,cl961,cl960,cl964,cl965,cl22,cl10925"); + "cl932,cl927,cl23,cl937,cl941,cl938,cl939,cl936,cl940,cl963,cl962,cl961,cl960,cl964,cl965,cl22,cl10925"); for(String layerId : configList.split(",")){ spatialLayers.add(layerId.trim()); } From 6e7b0118c4fd2a3735bf9a9d6411221dc90bba72 Mon Sep 17 00:00:00 2001 From: pal155 Date: Thu, 9 Mar 2023 08:59:14 +1100 Subject: [PATCH 3/4] Bump development version --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 30462082..83af3410 100644 --- a/pom.xml +++ b/pom.xml @@ -9,7 +9,7 @@ 4.0.0 au.org.ala sds - 1.4.7 + 1.4.8-SNAPSHOT GitHub https://github.com/AtlasOfLivingAustralia/sds/issues From 31f050912c5a3fed7ab89bb9dc3a10a1be3dcfd7 Mon Sep 17 00:00:00 2001 From: pal155 Date: Thu, 23 Mar 2023 15:17:35 +1100 Subject: [PATCH 4/4] Release 1.4.8 Contains fixes for #46 --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 83af3410..36b6b1f6 100644 --- a/pom.xml +++ b/pom.xml @@ -9,7 +9,7 @@ 4.0.0 au.org.ala sds - 1.4.8-SNAPSHOT + 1.4.8 GitHub https://github.com/AtlasOfLivingAustralia/sds/issues @@ -56,7 +56,7 @@ au.org.ala ala-name-matching-search - 4.2 + 4.3 compile