Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding GeoCoordinates queries to address #100 #125

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/main/resources/application-context-json-ld.xml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@
<ref bean="schema_org_geoShape_box_west" />
<ref bean="schema_org_geoShape_box_north" />
<ref bean="schema_org_geoShape_box_east" />
<ref bean="schema_org_geoCoordinates_south" />
<ref bean="schema_org_geoCoordinates_west" />
<ref bean="schema_org_geoCoordinates_north" />
<ref bean="schema_org_geoCoordinates_east" />
<ref bean="schema_org_geohash_1" />
<ref bean="schema_org_geohash_2" />
<ref bean="schema_org_geohash_3" />
Expand Down
104 changes: 104 additions & 0 deletions src/main/resources/application-context-schema-org.xml
Original file line number Diff line number Diff line change
Expand Up @@ -576,6 +576,110 @@
<property name="converter" ref="solrLongitudeConverter" />
</bean>

<!-- Extract south bounding coordinate from a 'SO:spatialCoverage' property of type 'GeoCoordinates'. -->
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You added four beans such as schema_org_geoCoordinates_east. But they are not referenced by any place. They should be referenced in the application-context-json-ld.xml file.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added in 355cbec.

<bean id="schema_org_geoCoordinates_south" class="org.dataone.cn.indexer.annotation.SparqlField">
<constructor-arg name="name" value="southBoundCoord" />
<constructor-arg name="query">
<value>
<![CDATA[
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX SO: <http://schema.org/>

SELECT ?southBoundCoord
WHERE {
?datasetId rdf:type SO:Dataset .
?datasetId SO:spatialCoverage ?spatial .
?spatial rdf:type SO:Place .
?spatial SO:geo ?geo .
?geo rdf:type SO:GeoCoordinates .
?geo SO:latitude ?southBoundCoord .
}
LIMIT 1
]]>
</value>
</constructor-arg>
<property name="converter" ref="solrLatitudeConverter" />
</bean>

<!-- Extract west bounding coordinate from a 'SO:spatialCoverage' property of type 'GeoCoordinates'. -->
<bean id="schema_org_geoCoordinates_west" class="org.dataone.cn.indexer.annotation.SparqlField">
<constructor-arg name="name" value="westBoundCoord" />
<constructor-arg name="query">
<value>
<![CDATA[
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX SO: <http://schema.org/>

SELECT ?westBoundCoord
WHERE {
?datasetId rdf:type SO:Dataset .
?datasetId SO:spatialCoverage ?spatial .
?spatial rdf:type SO:Place .
?spatial SO:geo ?geo .
?geo rdf:type SO:GeoCoordinates .
?geo SO:longitude ?westBoundCoord .
}
LIMIT 1
]]>
</value>
</constructor-arg>
<property name="converter" ref="solrLongitudeConverter" />
</bean>

<!-- Extract north bounding coordinate from a 'SO:spatialCoverage' property of type 'GeoCoordinates'. -->
<bean id="schema_org_geoCoordinates_north" class="org.dataone.cn.indexer.annotation.SparqlField">
<constructor-arg name="name" value="northBoundCoord" />
<constructor-arg name="query">
<value>
<![CDATA[
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX SO: <http://schema.org/>

SELECT ?northBoundCoord
WHERE {
?datasetId rdf:type SO:Dataset .
?datasetId SO:spatialCoverage ?spatial .
?spatial rdf:type SO:Place .
?spatial SO:geo ?geo .
?geo rdf:type SO:GeoCoordinates .
?geo SO:latitude ?northBoundCoord .
}
LIMIT 1
]]>
</value>
</constructor-arg>
<property name="converter" ref="solrLatitudeConverter" />
</bean>

<!-- Extract east bounding coordinate from a 'SO:spatialCoverage' property of type 'GeoCoordinates'. -->
<bean id="schema_org_geoCoordinates_east" class="org.dataone.cn.indexer.annotation.SparqlField">
<constructor-arg name="name" value="eastBoundCoord" />
<constructor-arg name="query">
<value>
<![CDATA[
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX SO: <http://schema.org/>

SELECT ?eastBoundCoord
WHERE {
?datasetId rdf:type SO:Dataset .
?datasetId SO:spatialCoverage ?spatial .
?spatial rdf:type SO:Place .
?spatial SO:geo ?geo .
?geo rdf:type SO:GeoCoordinates .
?geo SO:longitude ?eastBoundCoord .
}
LIMIT 1
]]>
</value>
</constructor-arg>
<property name="converter" ref="solrLongitudeConverter" />
</bean>

<!-- Extract 'SO:spatialCoverage'. This property can have several different forms, for different
types of spatial coverage representations. if the value is simple text (i.e. not SO:Place),
then it should be assigned to Solr 'namedLocation' field,
Expand Down
37 changes: 37 additions & 0 deletions src/test/java/org/dataone/cn/index/JsonLdSubprocessorTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,8 @@ public class JsonLdSubprocessorTest extends DataONESolrJettyTestBase {
private String schemaOrgTestDocDryad2Pid = "doi.org_10.5061_dryad.41sk145.jsonld";
private Resource schemaOrgTesHakaiDeep;
private String schemaOrgTesHakaiDeepPid = "hakai-deep-schema.jsonld";
private Resource schemaOrgTestGeoCoordinates;
private String schemaOrgTestGeoCoordinatesPid = "geocoordinates.jsonld";

/* An instance of the RDF/XML Subprocessor */
private JsonLdSubprocessor jsonLdSubprocessor;
Expand Down Expand Up @@ -122,6 +124,7 @@ public void setUp() throws Exception {
schemaOrgTestDocDryad1 = (Resource) context.getBean("schemaOrgTestDryad1");
schemaOrgTestDocDryad2 = (Resource) context.getBean("schemaOrgTestDryad2");
schemaOrgTesHakaiDeep = (Resource) context.getBean("schemaOrgTesHakaiDeep");
schemaOrgTestGeoCoordinates = (Resource) context.getBean("schemaOrgTestGeoCoordinates");

// instantiate the subprocessor
jsonLdSubprocessor = (JsonLdSubprocessor) context.getBean("jsonLdSubprocessor");
Expand Down Expand Up @@ -533,4 +536,38 @@ public void testHakaiDeep() throws Exception {
assertTrue(compareFieldValue(id, "licenseUrl", license));
}

/**
* Test that the JsonLdSubprocessor can sucessfully index JSONLD Dataset description documents from Dryad.
*
* @throws Exception
*/
@Test
public void testGeoCoordinates() throws Exception {
String id = schemaOrgTestGeoCoordinatesPid;
indexObjectToSolr(id, schemaOrgTestGeoCoordinates);

//Thread.sleep(2*SLEEPTIME);
// now process the tasks
//processor.processIndexTaskQueue();
int TIMES = 2*8 + 10*2;
int SLEEP = 500;
for (int i=0; i<TIMES; i++) {
try {
Thread.sleep(SLEEP);
assertPresentInSolrIndex(id);
break;
} catch (Throwable e) {

}
}
String [] coord = {"36.7016"};
assertTrue(compareFieldValue(id, "southBoundCoord", coord));
coord[0] = "-121.90504";
assertTrue(compareFieldValue(id, "westBoundCoord", coord));
coord[0] = "36.7016";
assertTrue(compareFieldValue(id, "northBoundCoord", coord));
coord[0] = "-121.90504";
assertTrue(compareFieldValue(id, "eastBoundCoord", coord));
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
{
"@context": {
"@vocab": "https://schema.org/",
"datacite": "http://purl.org/spar/datacite/"
},
"@id": "https://doi.org/10.26022/IEDA/111561",
"@type": "Dataset",
"author": {
"@list": [
{
"@type": "Role",
"author": [
{
"@type": "Person",
"familyName": "Carlin",
"givenName": "Joseph",
"name": "Joseph A Carlin"
}
],
"roleName": "Lead Author"
},
{}
]
},
"citation": [
"https://doi.org/10.3389/feart.2019.00113 "
],
"dateCreated": "2020-06-01",
"description": "This dataset contains sedimentological and geochronological data from 5 sediment cores collected from Monterey Bay, CA, USA. The purpose of this data was to investigate changes in sedimentation on the Monterey Bay shelf over decadal and centennial time scales. All cores were collected from continental shelf areas, in water depths of ~100 m or less, and the cores were collected in 2014 and 2017. Four of the cores were collected using a multi-corer and were less than ~35 cm in length, the fifth core was collected using a gravity corer and was ~90 cm in length. The data include grain size data, 210Pb activities, 137Cs activities, and 14C dates. ",
"distribution": {
"@type": "DataDownload",
"contentUrl": "https://ecl.earthchem.org/view.php?id=1561",
"datePublished": "2020-06-01 00:00:00",
"encodingFormat": "application/vnd.ms-excel"
},
"funding": [
{
"@id": "http://www.nsf.gov/awardsearch/showAward.do?AwardNumber=9530299",
"@type": "MonetaryGrant",
"funder": {
"@type": "Organization",
"name": "National Science Foundation"
},
"identifier": "9530299",
"url": "http://www.nsf.gov/awardsearch/showAward.do?AwardNumber=9530299"
},
{
"@type": "MonetaryGrant",
"funder": {
"@type": "Organization",
"name": "American Chemical Society Petroleum Research Fund"
},
"identifier": "57363-UNI8"
}
],
"identifier": {
"@type": "PropertyValue",
"propertyID": "https://registry.identifiers.org/registry/doi",
"url": "https://doi.org/10.26022/IEDA/111561",
"value": "doi:10.26022/IEDA/111561"
},
"inLanguage": "English",
"isAccessibleForFree": true,
"isBasedOn": [],
"keywords": [
"Monterey Bay",
"California Coast",
"Pacific Ocean",
"Regional (Continents, Oceans)",
"sediment",
"continental shelf",
"grain size",
"210Pb",
"137Cs",
"14C"
],
"license": "https://spdx.org/licenses/CC-BY-SA-4.0",
"name": "Variability in Shelf Sedimentation in Response to Fluvial Sediment Supply and Coastal Erosion over the Past 1,000 Years in Monterey Bay, CA, USA. ",
"provider": {
"@type": "Organization",
"name": "EarthChem Library"
},
"publisher": {
"@id": "https://www.earthchem.org",
"@type": "Organization",
"contactPoint": {
"@type": "ContactPoint",
"contactType": "Customer Service",
"email": "info@earthchem.org",
"name": "Information Desk",
"url": "https://www.earthchem.org/contact/"
},
"name": "EarthChem Library",
"url": "https://www.earthchem.org/library"
},
"sameAs": "https://ecl.earthchem.org/view.php?id=1561",
"spatialCoverage": {
"@type": "Place",
"geo": [
{
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

May we add another part ( or file) having bounding box?

Copy link
Author

@iannesbitt iannesbitt Oct 16, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@taojing2002 This file is meant to test singular coordinate values. There are two test files that implicitly test the bounding box format (example below):

      "spatialCoverage": {
        "@type": "Place",
        "geo": {
          "@type": "GeoShape",
          "box": "-28.09816 -32.95731 41.000022722222 1.71098"
        }
      },

"@type": "GeoCoordinates",
"latitude": "36.7016",
"longitude": "-121.90504"
}
]
},
"url": "https://doi.org/10.26022/IEDA/111561"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
<?xml version='1.0' encoding='UTF-8'?>
<ns1:systemMetadata xmlns:ns1="http://ns.dataone.org/service/types/v2.0">
<serialVersion>1</serialVersion>
<identifier>geocoordinates.jsonld</identifier>
<formatId>science-on-schema.org/Dataset;ld+json</formatId>
<size>4207</size>
<checksum algorithm="MD5"></checksum>
<submitter>dataone_integration_test_user</submitter>
<rightsHolder>dataone_integration_test_user</rightsHolder>
<accessPolicy>
<allow>
<subject>dataone_public_user</subject>
<permission>read</permission>
</allow>
<allow>
<subject>dataone_integration_test_user</subject>
<permission>write</permission>
</allow>
</accessPolicy>
<replicationPolicy replicationAllowed="true"/>
<dateUploaded>2024-08-17T14:59:47.171874</dateUploaded>
<dateSysMetadataModified>2024-08-17T14:59:47.173344</dateSysMetadataModified>
<originMemberNode>test_documents</originMemberNode>
<authoritativeMemberNode>test_documents</authoritativeMemberNode>
</ns1:systemMetadata>

5 changes: 5 additions & 0 deletions src/test/resources/org/dataone/cn/index/test-context.xml
Original file line number Diff line number Diff line change
Expand Up @@ -506,6 +506,11 @@ xmlns:context="http://www.springframework.org/schema/context"
value="org/dataone/cn/index/resources/d1_testdocs/json-ld/doi.org_10.5061_dryad.41sk145/doi.org_10.5061_dryad.41sk145.jsonld"/>
</bean>

<bean id="schemaOrgTestGeoCoordinates" class="org.springframework.core.io.ClassPathResource" >
<constructor-arg type="java.lang.String"
value="org/dataone/cn/index/resources/d1_testdocs/json-ld/geocoordinates/geocoordinates.jsonld"/>
</bean>

<bean id="emlWithDataTableTestDoc" class="org.springframework.core.io.ClassPathResource" >
<constructor-arg type="java.lang.String"
value="org/dataone/cn/index/resources/d1_testdocs/eml220/eml2.2.0testdatatable/eml2.2.0testdatatable.xml"/>
Expand Down