diff --git a/doc/release-notes/9276-doc-cvoc-index-in.md b/doc/release-notes/9276-doc-cvoc-index-in.md new file mode 100644 index 00000000000..78289201511 --- /dev/null +++ b/doc/release-notes/9276-doc-cvoc-index-in.md @@ -0,0 +1,18 @@ +## Release Highlights + +### Updates on Support for External Vocabulary Services + +Multiple extensions of the External Vocabulary mechanism have been added. These extensions allow interaction with services based on the Ontoportal software and are expected to be generally useful for other service types. + +These changes include: + +#### Improved Indexing with Compound Fields + +When using an external vocabulary service with compound fields, you can now specify which field(s) will include additional indexed information, such as translations of an entry into other languages. This is done by adding the `indexIn` in `retrieval-filtering`. (#10505) +For more information, please check [GDCC/dataverse-external-vocab-support documentation](https://github.com/gdcc/dataverse-external-vocab-support/tree/main/docs). + +#### Broader Support for Indexing Service Responses + +Indexing of the results from `retrieval-filtering` responses can now handle additional formats including Json Arrays of Strings and values from arbitrary keys within a JSON Object. (#10505) + +**** This documentation must be merged with 9276-allow-flexible-params-in-retrievaluri-cvoc.md (#10404) \ No newline at end of file diff --git a/doc/sphinx-guides/source/admin/metadatacustomization.rst b/doc/sphinx-guides/source/admin/metadatacustomization.rst index 00d1e8a7831..e70cf0e0897 100644 --- a/doc/sphinx-guides/source/admin/metadatacustomization.rst +++ b/doc/sphinx-guides/source/admin/metadatacustomization.rst @@ -579,9 +579,9 @@ In general, the external vocabulary support mechanism may be a better choice for The specifics of the user interface for entering/selecting a vocabulary term and how that term is then displayed are managed by third-party Javascripts. The initial Javascripts that have been created provide auto-completion, displaying a list of choices that match what the user has typed so far, but other interfaces, such as displaying a tree of options for a hierarchical vocabulary, are possible. Similarly, existing scripts do relatively simple things for displaying a term - showing the term's name in the appropriate language and providing a link to an external URL with more information, but more sophisticated displays are possible. -Scripts supporting use of vocabularies from services supporting the SKOMOS protocol (see https://skosmos.org), retrieving ORCIDs (from https://orcid.org), and using ROR (https://ror.org/) are available https://github.com/gdcc/dataverse-external-vocab-support. (Custom scripts can also be used and community members are encouraged to share new scripts through the dataverse-external-vocab-support repository.) +Scripts supporting use of vocabularies from services supporting the SKOMOS protocol (see https://skosmos.org), retrieving ORCIDs (from https://orcid.org), services based on Ontoportal product (see https://ontoportal.org/), and using ROR (https://ror.org/) are available https://github.com/gdcc/dataverse-external-vocab-support. (Custom scripts can also be used and community members are encouraged to share new scripts through the dataverse-external-vocab-support repository.) -Configuration involves specifying which fields are to be mapped, whether free-text entries are allowed, which vocabulary(ies) should be used, what languages those vocabulary(ies) are available in, and several service protocol and service instance specific parameters, including the ability to send HTTP headers on calls to the service. +Configuration involves specifying which fields are to be mapped, to which Solr field they should be indexed, whether free-text entries are allowed, which vocabulary(ies) should be used, what languages those vocabulary(ies) are available in, and several service protocol and service instance specific parameters, including the ability to send HTTP headers on calls to the service. These are all defined in the :ref:`:CVocConf <:CVocConf>` setting as a JSON array. Details about the required elements as well as example JSON arrays are available at https://github.com/gdcc/dataverse-external-vocab-support, along with an example metadata block that can be used for testing. The scripts required can be hosted locally or retrieved dynamically from https://gdcc.github.io/ (similar to how dataverse-previewers work). diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldServiceBean.java index bd40dab5af6..14d1f564ee6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldServiceBean.java @@ -42,6 +42,7 @@ import org.apache.commons.codec.digest.DigestUtils; import org.apache.commons.httpclient.HttpException; +import org.apache.commons.lang3.StringUtils; import org.apache.http.HttpResponse; import org.apache.http.HttpResponseInterceptor; import org.apache.http.client.methods.HttpGet; @@ -322,14 +323,15 @@ public Map getCVocConf(boolean byTermUriField){ + jo.getString("term-uri-field")); } } - if (jo.containsKey("child-fields")) { - JsonArray childFields = jo.getJsonArray("child-fields"); - for (JsonString elm : childFields.getValuesAs(JsonString.class)) { - dft = findByNameOpt(elm.getString()); - logger.info("Found: " + dft.getName()); + if (jo.containsKey("managed-fields")) { + JsonObject managedFields = jo.getJsonObject("managed-fields"); + for (String s : managedFields.keySet()) { + dft = findByNameOpt(managedFields.getString(s)); if (dft == null) { logger.warning("Ignoring External Vocabulary setting for non-existent child field: " - + elm.getString()); + + managedFields.getString(s)); + } else { + logger.fine("Found: " + dft.getName()); } } } @@ -346,7 +348,7 @@ public Map getCVocConf(boolean byTermUriField){ * @param df - the primitive/parent compound field containing a newly saved value */ public void registerExternalVocabValues(DatasetField df) { - DatasetFieldType dft =df.getDatasetFieldType(); + DatasetFieldType dft = df.getDatasetFieldType(); logger.fine("Registering for field: " + dft.getName()); JsonObject cvocEntry = getCVocConf(true).get(dft.getId()); if (dft.isPrimitive()) { @@ -371,38 +373,48 @@ public void registerExternalVocabValues(DatasetField df) { } } } - + /** - * Retrieves indexable strings from a cached externalvocabularyvalue entry. - * - * This method assumes externalvocabularyvalue entries have been filtered and - * the externalvocabularyvalue entry contain a single JsonObject whose "personName" or "termName" values - * are either Strings or an array of objects with "lang" and ("value" or "content") keys. The - * string, or the "value/content"s for each language are added to the set. - * + * Retrieves indexable strings from a cached externalvocabularyvalue entry filtered through retrieval-filtering configuration. + *

+ * This method assumes externalvocabularyvalue entries have been filtered and that they contain a single JsonObject. + * Cases Handled : A String, an Array of Strings, an Array of Objects with "value" or "content" keys, an Object with one or more entries that have String values or Array values with a set of String values. + * The string(s), or the "value/content"s for each language are added to the set. + * Retrieved string values are indexed in the term-uri-field (parameter defined in CVOC configuration) by default, or in the field specified by an optional "indexIn" parameter in the retrieval-filtering defined in the CVOC configuration. + *

* Any parsing error results in no entries (there can be unfiltered entries with * unknown structure - getting some strings from such an entry could give fairly * random info that would be bad to addd for searches, etc.) - * - * @param termUri + * + * @param termUri unique identifier to search in database + * @param cvocEntry related cvoc configuration + * @param indexingField name of solr field that will be filled with getStringsFor while indexing * @return - a set of indexable strings */ - public Set getStringsFor(String termUri) { - Set strings = new HashSet(); + public Set getIndexableStringsByTermUri(String termUri, JsonObject cvocEntry, String indexingField) { + Set strings = new HashSet<>(); JsonObject jo = getExternalVocabularyValue(termUri); + JsonObject filtering = cvocEntry.getJsonObject("retrieval-filtering"); + String termUriField = cvocEntry.getJsonString("term-uri-field").getString(); if (jo != null) { try { for (String key : jo.keySet()) { - if (key.equals("termName") || key.equals("personName")) { + String indexIn = filtering.getJsonObject(key).getString("indexIn", null); + // Either we are in mapping mode so indexingField (solr field) equals indexIn (cvoc config) + // Or we are in default mode indexingField is termUriField, indexIn is not defined then only termName and personName keys are used + if (indexingField.equals(indexIn) || + (indexIn == null && termUriField.equals(indexingField) && (key.equals("termName")) || key.equals("personName"))) { JsonValue jv = jo.get(key); if (jv.getValueType().equals(JsonValue.ValueType.STRING)) { logger.fine("adding " + jo.getString(key) + " for " + termUri); strings.add(jo.getString(key)); - } else { - if (jv.getValueType().equals(JsonValue.ValueType.ARRAY)) { - JsonArray jarr = jv.asJsonArray(); - for (int i = 0; i < jarr.size(); i++) { + } else if (jv.getValueType().equals(JsonValue.ValueType.ARRAY)) { + JsonArray jarr = jv.asJsonArray(); + for (int i = 0; i < jarr.size(); i++) { + if (jarr.get(i).getValueType().equals(JsonValue.ValueType.STRING)) { + strings.add(jarr.getString(i)); + } else if (jarr.get(i).getValueType().equals(ValueType.OBJECT)) { // This condition handles SKOMOS format like [{"lang": "en","value": "non-apis bee"},{"lang": "fr","value": "abeille non apis"}] JsonObject entry = jarr.getJsonObject(i); if (entry.containsKey("value")) { logger.fine("adding " + entry.getString("value") + " for " + termUri); @@ -414,6 +426,22 @@ public Set getStringsFor(String termUri) { } } } + } else if (jv.getValueType().equals(JsonValue.ValueType.OBJECT)) { + JsonObject joo = jv.asJsonObject(); + for (Map.Entry entry : joo.entrySet()) { + if (entry.getValue().getValueType().equals(JsonValue.ValueType.STRING)) { // This condition handles format like { "fr": "association de quartier", "en": "neighborhood associations"} + logger.fine("adding " + joo.getString(entry.getKey()) + " for " + termUri); + strings.add(joo.getString(entry.getKey())); + } else if (entry.getValue().getValueType().equals(ValueType.ARRAY)) { // This condition handles format like {"en": ["neighbourhood societies"]} + JsonArray jarr = entry.getValue().asJsonArray(); + for (int i = 0; i < jarr.size(); i++) { + if (jarr.get(i).getValueType().equals(JsonValue.ValueType.STRING)) { + logger.fine("adding " + jarr.getString(i) + " for " + termUri); + strings.add(jarr.getString(i)); + } + } + } + } } } } @@ -425,7 +453,7 @@ public Set getStringsFor(String termUri) { } logger.fine("Returning " + String.join(",", strings) + " for " + termUri); return strings; - } + } /** * Perform a query to retrieve a cached value from the externalvocabularvalue table @@ -461,10 +489,11 @@ public void registerExternalTerm(JsonObject cvocEntry, String term, List langs = settingsService.getConfiguredLanguages(); Map cvocMap = datasetFieldService.getCVocConf(true); + Map> cvocManagedFieldMap = new HashMap<>(); + for (Map.Entry cvocEntry : cvocMap.entrySet()) { + if(cvocEntry.getValue().containsKey("managed-fields")) { + JsonObject managedFields = cvocEntry.getValue().getJsonObject("managed-fields"); + Set managedFieldValues = new HashSet<>(); + for (String s : managedFields.keySet()) { + managedFieldValues.add(managedFields.getString(s)); + } + cvocManagedFieldMap.put(cvocEntry.getKey(), managedFieldValues); + } + } + + + Set metadataBlocksWithValue = new HashSet<>(); for (DatasetField dsf : datasetVersion.getFlatDatasetFields()) { @@ -988,19 +1002,39 @@ public SolrInputDocuments toSolrDocs(IndexableDataset indexableDataset, Set vals = dsf.getValues_nondisplay(); - Set searchStrings = new HashSet(); + Set searchStrings = new HashSet<>(); for (String val: vals) { searchStrings.add(val); - searchStrings.addAll(datasetFieldService.getStringsFor(val)); + // Try to get string values from externalvocabularyvalue using val as termUri + searchStrings.addAll(datasetFieldService.getIndexableStringsByTermUri(val, cvocMap.get(dsfType.getId()), dsfType.getName())); + + if(dsfType.getParentDatasetFieldType()!=null) { + List childDatasetFields = dsf.getParentDatasetFieldCompoundValue().getChildDatasetFields(); + for (DatasetField df : childDatasetFields) { + if(cvocManagedFieldMap.get(dsfType.getId()).contains(df.getDatasetFieldType().getName())) { + String solrManagedFieldSearchable = df.getDatasetFieldType().getSolrField().getNameSearchable(); + // Try to get string values from externalvocabularyvalue but for a managed fields of the CVOCConf + Set stringsForManagedField = datasetFieldService.getIndexableStringsByTermUri(val, cvocMap.get(dsfType.getId()), df.getDatasetFieldType().getName()); + logger.fine(solrManagedFieldSearchable + " filled with externalvocabularyvalue : " + stringsForManagedField); + //.addField works as addition of value not a replace of value + // it allows to add mapped values by CVOCConf before or after indexing real DatasetField value(s) of solrManagedFieldSearchable + solrInputDocument.addField(solrManagedFieldSearchable, stringsForManagedField); + } + } + } } + logger.fine(solrFieldSearchable + " filled with externalvocabularyvalue : " + searchStrings); solrInputDocument.addField(solrFieldSearchable, searchStrings); if (dsfType.getSolrField().isFacetable()) { + logger.fine(solrFieldFacetable + " gets " + vals); solrInputDocument.addField(solrFieldFacetable, vals); } } + if (dsfType.isControlledVocabulary()) { /** If the cvv list is empty but the dfv list is not then it is assumed this was harvested * from an installation that had controlled vocabulary entries that don't exist in our this db diff --git a/src/test/java/edu/harvard/iq/dataverse/DatasetFieldServiceBeanTest.java b/src/test/java/edu/harvard/iq/dataverse/DatasetFieldServiceBeanTest.java new file mode 100644 index 00000000000..873d417131d --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/DatasetFieldServiceBeanTest.java @@ -0,0 +1,179 @@ +package edu.harvard.iq.dataverse; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.Collections; +import java.util.Set; + +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.mockito.AdditionalMatchers; +import org.mockito.Mockito; + +import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import jakarta.json.Json; +import jakarta.json.JsonObject; + +public class DatasetFieldServiceBeanTest { + + private DatasetFieldServiceBean datasetFieldServiceBean; + + static String getCvocJson(String pathToJsonFile) throws IOException { + final File datasetVersionJson = new File(pathToJsonFile); + return new String(Files.readAllBytes(Paths.get(datasetVersionJson.getAbsolutePath()))); + } + + @BeforeEach + void setUp() { + this.datasetFieldServiceBean = Mockito.spy(new DatasetFieldServiceBean()); + } + + @AfterEach + void tearDown() { + this.datasetFieldServiceBean = null; + } + + @Test + void getIndexableStringsByTermUriSkosmos() throws IOException { + String fieldName = "keyword"; + String termURI = "http://aims.fao.org/aos/agrovoc/c_2389"; + + JsonObject cvocEntry = prepare(fieldName, "src/test/resources/json/cvoc-skosmos.json"); + + JsonObject getExtVocabValueReturnedValue = Json.createObjectBuilder() + .add("@id", termURI) + .add("termName", Json.createArrayBuilder() + .add(Json.createObjectBuilder() + .add("lang", "fr") + .add("value", "faux bourdon")) + .add(Json.createObjectBuilder() + .add("lang", "en") + .add("value", "drone (insects)"))) + .add("vocabularyUri", "http://aims.fao.org/aos/agrovoc") + .add("synonyms", Json.createArrayBuilder() + .add(Json.createObjectBuilder() + .add("lang", "fr") + .add("value", "Abeille mâle")) + .add(Json.createObjectBuilder() + .add("lang", "en") + .add("value", "drone honey bees"))) + .add("genericTerm", Json.createArrayBuilder() + .add(Json.createObjectBuilder() + .add("lang", "fr") + .add("value", "Colonie d'abeilles")) + .add(Json.createObjectBuilder() + .add("lang", "en") + .add("value", "bee colonies"))) + .build(); + Mockito.doReturn(getExtVocabValueReturnedValue).when(datasetFieldServiceBean).getExternalVocabularyValue(termURI); + Mockito.doReturn(null).when(datasetFieldServiceBean).getExternalVocabularyValue(AdditionalMatchers.not(Mockito.eq(termURI))); + + // keywordTermURL + Set result = datasetFieldServiceBean.getIndexableStringsByTermUri(termURI, cvocEntry, "keywordTermURL"); + assertEquals(Set.of("faux bourdon", "drone (insects)"), result); + + // keywordValue + result = datasetFieldServiceBean.getIndexableStringsByTermUri(termURI, cvocEntry, "keywordValue"); + assertEquals(Collections.emptySet(), result, "Only 'keywordTermURL' must return values for Skosmos"); + + // Any others field + result = datasetFieldServiceBean.getIndexableStringsByTermUri(termURI, cvocEntry, ""); + assertEquals(Collections.emptySet(), result, "Only 'keywordTermURL' must return values for Skosmos"); + + // Another termURI not in database + result = datasetFieldServiceBean.getIndexableStringsByTermUri("http://example.org/uuid", cvocEntry, "keywordTermURL"); + assertEquals(Collections.emptySet(), result); + } + + @Test + void getIndexableStringsByTermUriAgroportal() throws IOException { + String fieldName = "keyword"; + String termURI = "http://aims.fao.org/aos/agrovoc/c_50265"; + + JsonObject cvocEntry = prepare(fieldName, "src/test/resources/json/cvoc-agroportal.json"); + + JsonObject getExtVocabValueReturnedValue = Json.createObjectBuilder() + .add("@id", termURI) + .add("termName", Json.createObjectBuilder() + .add("fr", "association de quartier") + .add("en", "neighborhood associations")) + .add("vocabularyName", "https://data.agroportal.lirmm.fr/ontologies/AGROVOC") + .add("vocabularyUri", "https://data.agroportal.lirmm.fr/ontologies/AGROVOC") + .add("synonyms", Json.createObjectBuilder() + .add("en", Json.createArrayBuilder().add("neighborhood societies"))) + .build(); + Mockito.doReturn(getExtVocabValueReturnedValue).when(datasetFieldServiceBean).getExternalVocabularyValue(termURI); + Mockito.doReturn(null).when(datasetFieldServiceBean).getExternalVocabularyValue(AdditionalMatchers.not(Mockito.eq(termURI))); + + // keywordValue + Set result = datasetFieldServiceBean.getIndexableStringsByTermUri(termURI, cvocEntry, "keywordValue"); + assertEquals(Set.of("association de quartier", "neighborhood associations", "neighborhood societies"), result); + + // keywordTermURL + result = datasetFieldServiceBean.getIndexableStringsByTermUri(termURI, cvocEntry, "keywordTermURL"); + assertEquals(Collections.emptySet(), result, "Only 'keywordValue' must return values for Agroportal"); + + // Any others field + result = datasetFieldServiceBean.getIndexableStringsByTermUri(termURI, cvocEntry, ""); + assertEquals(Collections.emptySet(), result, "Only 'keywordValue' must return values for Agroportal"); + + // Another termURI not in database + result = datasetFieldServiceBean.getIndexableStringsByTermUri("http://example.org/uuid", cvocEntry, "keywordValue"); + assertEquals(Collections.emptySet(), result); + } + + @Test + void getIndexableStringsByTermUriOrcid() throws IOException { + String fieldName = "creator"; + String termURI = "https://orcid.org/0000-0003-4217-153X"; + + JsonObject cvocEntry = prepare(fieldName, "src/test/resources/json/cvoc-orcid.json"); + + JsonObject getExtVocabValueReturnedValue = Json.createObjectBuilder() + .add("@id", termURI) + .add("scheme", "ORCID") + .add("@type", "https://schema.org/Person") + .add("personName", "Doe, John") + .build(); + Mockito.doReturn(getExtVocabValueReturnedValue).when(datasetFieldServiceBean).getExternalVocabularyValue(termURI); + Mockito.doReturn(null).when(datasetFieldServiceBean).getExternalVocabularyValue(AdditionalMatchers.not(Mockito.eq(termURI))); + + // ORCID match with "personName" field into "getIndexableStringsByTermUri" method + Set result = datasetFieldServiceBean.getIndexableStringsByTermUri(termURI, cvocEntry, ""); + assertEquals(Set.of("Doe, John"), result); + + // Another termURI not in database + result = datasetFieldServiceBean.getIndexableStringsByTermUri("http://example.org/uuid", cvocEntry, fieldName); + assertEquals(Collections.emptySet(), result); + } + + /** + * Prepare unit tests with mock methods. + * + * @param fieldName "field-name" into cvoc configuration file + * @param jsonPath path of the JSON configuration file: src/test/resources/json/... + * @return {@link JsonObject} representing the configuration file + * @throws IOException in case on read error on configuration file + */ + JsonObject prepare(String fieldName, String jsonPath) throws IOException { + Long dftId = Long.parseLong("1"); + // DatasetFieldType name corresponding to "field-name" into cvoc configuration file + DatasetFieldType dft = new DatasetFieldType(fieldName, DatasetFieldType.FieldType.NONE, true); + dft.setId(dftId); + + Mockito.doReturn(dft).when(datasetFieldServiceBean).findByNameOpt(fieldName); + Mockito.doReturn(null).when(datasetFieldServiceBean).findByNameOpt(AdditionalMatchers.not(Mockito.eq(fieldName))); + + SettingsServiceBean settingsService = Mockito.mock(SettingsServiceBean.class); + Mockito.when(settingsService.getValueForKey(SettingsServiceBean.Key.CVocConf)).thenReturn(getCvocJson(jsonPath)); + datasetFieldServiceBean.settingsService = settingsService; + + return datasetFieldServiceBean.getCVocConf(false).get(dftId); + } + +} diff --git a/src/test/resources/json/cvoc-agroportal.json b/src/test/resources/json/cvoc-agroportal.json new file mode 100644 index 00000000000..03c9e2f4d07 --- /dev/null +++ b/src/test/resources/json/cvoc-agroportal.json @@ -0,0 +1,76 @@ +[ + { + "field-name": "keyword", + "term-uri-field": "keywordTermURL", + "cvoc-url": "https://data.agroportal.lirmm.fr/", + "js-url": "https://domain.tld/assets/cvoc/ontoportal.js", + "headers": { + "Authorization": "apikey token=XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX" + }, + "protocol": "ontoportal", + "retrieval-uri": "https://data.agroportal.lirmm.fr/ontologies/{keywordVocabulary}/classes/{encodeUrl:keywordTermURL}?language=en,fr", + "term-parent-uri": "", + "allow-free-text": true, + "languages": "en, fr", + "vocabs": { + "AGROVOC": { + "vocabularyUri": "https://data.agroportal.lirmm.fr/ontologies/AGROVOC", + "uriSpace": "http" + }, + "ONTOBIOTOPE": { + "vocabularyUri": "https://data.agroportal.lirmm.fr/ontologies/ONTOBIOTOPE", + "uriSpace": "http" + }, + "CROPUSAGE": { + "vocabularyUri": "https://data.agroportal.lirmm.fr/ontologies/CROPUSAGE", + "uriSpace": "http" + } + }, + "managed-fields": { + "vocabularyName": "keywordVocabulary", + "termName": "keywordValue", + "vocabularyUri": "keywordVocabularyURI" + }, + "retrieval-filtering": { + "@context": { + "termName": "https://schema.org/name", + "vocabularyName": "https://dataverse.org/schema/vocabularyName", + "vocabularyUri": "https://dataverse.org/schema/vocabularyUri", + "lang": "@language", + "value": "@value" + }, + "@id": { + "pattern": "{0}", + "params": [ + "@id" + ] + }, + "termName": { + "pattern": "{0}", + "params": [ + "/prefLabel" + ], + "indexIn": "keywordValue" + }, + "vocabularyName": { + "pattern": "{0}", + "params": [ + "/links/ontology" + ] + }, + "vocabularyUri": { + "pattern": "{0}", + "params": [ + "/links/ontology" + ] + }, + "synonyms": { + "pattern": "{0}", + "params": [ + "/synonym" + ], + "indexIn": "keywordValue" + } + } + } +] diff --git a/src/test/resources/json/cvoc-orcid.json b/src/test/resources/json/cvoc-orcid.json new file mode 100644 index 00000000000..6b904aefc3f --- /dev/null +++ b/src/test/resources/json/cvoc-orcid.json @@ -0,0 +1,43 @@ +[ + { + "field-name": "creator", + "term-uri-field": "creator", + "js-url": "https://gdcc.github.io/dataverse-external-vocab-support/scripts/people.js", + "protocol": "orcid", + "retrieval-uri": "https://pub.orcid.org/v3.0/{0}/person", + "allow-free-text": true, + "prefix": "https://orcid.org/", + "managed-fields": {}, + "languages": "", + "vocabs": { + "orcid": { + "uriSpace": "https://orcid.org/" + } + }, + "retrieval-filtering": { + "@context": { + "personName": "https://schema.org/name", + "scheme": "http://www.w3.org/2004/02/skos/core#inScheme" + }, + "personName": { + "pattern": "{0}, {1}", + "params": [ + "/name/family-name/value", + "/name/given-names/value" + ] + }, + "@id": { + "pattern": "{0}", + "params": [ + "@id" + ] + }, + "scheme": { + "pattern": "ORCID" + }, + "@type": { + "pattern": "https://schema.org/Person" + } + } + } +] diff --git a/src/test/resources/json/cvoc-skosmos.json b/src/test/resources/json/cvoc-skosmos.json new file mode 100644 index 00000000000..6d32b29f054 --- /dev/null +++ b/src/test/resources/json/cvoc-skosmos.json @@ -0,0 +1,69 @@ +[ + { + "field-name": "keyword", + "term-uri-field": "keywordTermURL", + "cvoc-url": "https://demo.skosmos.org/", + "js-url": "https://github.com/gdcc/dataverse-external-vocab-support/blob/main/scripts/skosmos.js", + "protocol": "skosmos", + "retrieval-uri": "https://demo.skosmos.org/rest/v1/data?uri={0}", + "term-parent-uri": "", + "allow-free-text": true, + "languages": "en, fr", + "vocabs": { + "agrovoc": { + "vocabularyUri": "http://aims.fao.org/vest-registry/kos/agrovoc", + "uriSpace": "http://aims.fao.org/aos/agrovoc/" + } + }, + "managed-fields": { + "vocabularyName": "keywordVocabulary", + "termName": "keywordValue", + "vocabularyUri": "keywordVocabularyURI" + }, + "retrieval-filtering": { + "@context": { + "termName": "https://schema.org/name", + "vocabularyName": "https://dataverse.org/schema/vocabularyName", + "vocabularyUri": "https://dataverse.org/schema/vocabularyUri", + "lang": "@language", + "value": "@value" + }, + "@id": { + "pattern": "{0}", + "params": [ + "@id" + ] + }, + "termName": { + "pattern": "{0}", + "params": [ + "/graph/uri=@id/prefLabel" + ] + }, + "vocabularyName": { + "pattern": "{0}", + "params": [ + "/graph/type=skos:ConceptScheme/prefLabel" + ] + }, + "vocabularyUri": { + "pattern": "{0}", + "params": [ + "/graph/type=skos:ConceptScheme/uri" + ] + }, + "synonyms": { + "pattern": "{0}", + "params": [ + "/graph/uri=@id/altLabel" + ] + }, + "genericTerm": { + "pattern": "{0}", + "params": [ + "/graph/type=skos:Concept/prefLabel" + ] + } + } + } +]