From 6e6eb04b2b3552c5c29f339a810f14d442d0d8cb Mon Sep 17 00:00:00 2001 From: Wojciech Rygielski Date: Fri, 7 Oct 2016 11:59:14 +0200 Subject: [PATCH] New methods for searching and retrieving HEI attributes Fix https://github.com/erasmus-without-paper/ewp-registry-client/issues/1 --- CHANGELOG.md | 13 ++ .../registryclient/CatalogueDocument.java | 121 +++++++++++++++--- .../registryclient/ClientImpl.java | 29 +++++ .../registryclient/HeiEntry.java | 51 ++++++++ .../registryclient/HeiEntryImpl.java | 113 ++++++++++++++++ .../registryclient/RegistryClient.java | 86 ++++++++++++- .../registryclient/Utils.java | 12 +- .../registryclient/ClientImplBasicTests.java | 44 +++++++ src/test/resources/test-files/catalogue1.xml | 1 + 9 files changed, 449 insertions(+), 21 deletions(-) create mode 100644 src/main/java/eu/erasmuswithoutpaper/registryclient/HeiEntry.java create mode 100644 src/main/java/eu/erasmuswithoutpaper/registryclient/HeiEntryImpl.java diff --git a/CHANGELOG.md b/CHANGELOG.md index 1892794..ea61e74 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,19 @@ Release notes Unreleased ---------- + * `RegistryClient` interface (and its implementation) has been extended with + new methods for searching and retrieving HEI attributes (as requested + [here](https://github.com/erasmus-without-paper/ewp-registry-client/issues/1)). + + The following methods were added (details in javadocs): + + - `HeiEntry findHei(String id)` + - `HeiEntry findHei(String type, String value)` + - `Collection findHeis(ApiSearchConditions conditions)` + - `Collection getAllHeis()` + + * New `HeiEntry` interface was added. + * New `setApiClassRequired(namespaceUri, localName, version)` method in `ApiSearchConditions` class. This is just a shorthand which allows you to call `setApiClassRequired(namespaceUri, localName)` and diff --git a/src/main/java/eu/erasmuswithoutpaper/registryclient/CatalogueDocument.java b/src/main/java/eu/erasmuswithoutpaper/registryclient/CatalogueDocument.java index eacf86f..7b9010d 100644 --- a/src/main/java/eu/erasmuswithoutpaper/registryclient/CatalogueDocument.java +++ b/src/main/java/eu/erasmuswithoutpaper/registryclient/CatalogueDocument.java @@ -192,6 +192,16 @@ static boolean doesVersionXMatchMinimumRequiredVersionY(String apiVersion, */ private final Map> heiIdMaps; + /** + * "heiId -> HeiEntry" index for {@link #doc}. + * + *

+ * This field is final, but its data is still mutable (and thus, not thread-safe). Unmodifiable + * views need to be used before its values are exposed outside. + *

+ */ + private final Map heiEntries; + /** * "Unique API ID -> API entry elements" index for {@link #doc}. * @@ -292,6 +302,7 @@ public Iterator getPrefixes(String namespaceUri) { this.hostHeis = new HashMap<>(); this.heiIdMaps = new HashMap<>(); this.apiIndex = new HashMap<>(); + this.heiEntries = new HashMap<>(); // Create indexes. @@ -325,6 +336,12 @@ public Iterator getPrefixes(String namespaceUri) { mapForType.put(getCanonicalId(value), heiId); } + for (Element heiElem : Utils.asElementList( + (NodeList) xpath.evaluate("r:institutions/r:hei", root, XPathConstants.NODESET))) { + String id = heiElem.getAttribute("id"); + HeiEntry hei = new HeiEntryImpl(id, heiElem); + this.heiEntries.put(id, hei); + } for (Element apiElem : Utils.asElementList( (NodeList) xpath.evaluate("r:host/r:apis-implemented/*", root, XPathConstants.NODESET))) { @@ -439,22 +456,7 @@ Element findApi(ApiSearchConditions conditions) { Collection findApis(ApiSearchConditions conditions) { // First, determine the minimum set of elements we need to look through. - List> lookupBase = new ArrayList<>(); - if (conditions.getRequiredNamespaceUri() != null && conditions.getRequiredLocalName() != null) { - - // We can make use of our namespaceUri+localName index in this case. - - List match = this.apiIndex.get( - getApiIndexKey(conditions.getRequiredNamespaceUri(), conditions.getRequiredLocalName())); - if (match != null) { - lookupBase.add(match); - } - } else { - - // We do not have such an index. We'll need to browse through all entries. - - lookupBase.addAll(this.apiIndex.values()); - } + List> lookupBase = this.getApiLookupBase(conditions); // Then, iterate through all the elements and filter the ones that match. @@ -471,6 +473,26 @@ Collection findApis(ApiSearchConditions conditions) { return results; } + /** + * This implements {@link RegistryClient#findHei(String)}, but only for this particular version of + * the catalogue document. + */ + HeiEntry findHei(String id) { + return this.heiEntries.get(id); + } + + /** + * This implements {@link RegistryClient#findHei(String, String)}, but only for this particular + * version of the catalogue document. + */ + HeiEntry findHei(String type, String value) { + String heiId = this.findHeiId(type, value); + if (heiId == null) { + return null; + } + return this.findHei(heiId); + } + /** * This implements {@link RegistryClient#findHeiId(String, String)}, but only for this particular * version of the catalogue document. @@ -485,6 +507,73 @@ String findHeiId(String type, String value) { return mapForType.get(value); } + /** + * This implements {@link RegistryClient#findHeis(ApiSearchConditions)}, but only for this + * particular version of the catalogue document. + */ + Collection findHeis(ApiSearchConditions conditions) { + + // First, determine the minimum set of elements we need to look through. + + List> lookupBase = this.getApiLookupBase(conditions); + + // Then, find all elements which include the matched APIs. + + Set hostElems = new HashSet<>(); + for (List lst : lookupBase) { + for (Element apiElem : lst) { + if (this.doesElementMatchConditions(apiElem, conditions)) { + Element hostElem = (Element) apiElem.getParentNode().getParentNode(); + hostElems.add(hostElem); + } + } + } + + // Finally, collect the unique HEI entries covered by these hosts. + + Set results = new HashSet<>(); + for (Element hostElem : hostElems) { + Set heiIds = this.hostHeis.get(hostElem); + for (String heiId : heiIds) { + HeiEntry hei = this.heiEntries.get(heiId); + if (hei == null) { + // Should not happen, but just in case. + continue; + } + results.add(hei); + } + } + return results; + } + + /** + * This implements {@link RegistryClient#getAllHeis()}, but only for this particular version of + * the catalogue document. + */ + Collection getAllHeis() { + return Collections.unmodifiableCollection(this.heiEntries.values()); + } + + List> getApiLookupBase(ApiSearchConditions conditions) { + List> lookupBase = new ArrayList<>(); + if (conditions.getRequiredNamespaceUri() != null && conditions.getRequiredLocalName() != null) { + + // We can make use of our namespaceUri+localName index in this case. + + List match = this.apiIndex.get( + getApiIndexKey(conditions.getRequiredNamespaceUri(), conditions.getRequiredLocalName())); + if (match != null) { + lookupBase.add(match); + } + } else { + + // We do not have such an index. We'll need to browse through all entries. + + lookupBase.addAll(this.apiIndex.values()); + } + return lookupBase; + } + /** * @return ETag of this document. */ diff --git a/src/main/java/eu/erasmuswithoutpaper/registryclient/ClientImpl.java b/src/main/java/eu/erasmuswithoutpaper/registryclient/ClientImpl.java index 8880753..525d3a5 100644 --- a/src/main/java/eu/erasmuswithoutpaper/registryclient/ClientImpl.java +++ b/src/main/java/eu/erasmuswithoutpaper/registryclient/ClientImpl.java @@ -315,6 +315,20 @@ public Collection findApis(ApiSearchConditions conditions) { return this.doc.findApis(conditions); } + @Override + public HeiEntry findHei(String id) throws UnacceptableStalenessException { + // Since expiry date can only be extended, there is no need to synchronize. + this.assertAcceptableStaleness(); + return this.doc.findHei(id); + } + + @Override + public HeiEntry findHei(String type, String value) throws UnacceptableStalenessException { + // Since expiry date can only be extended, there is no need to synchronize. + this.assertAcceptableStaleness(); + return this.doc.findHei(type, value); + } + @Override public String findHeiId(String type, String value) { // Since expiry date can only be extended, there is no need to synchronize. @@ -322,6 +336,21 @@ public String findHeiId(String type, String value) { return this.doc.findHeiId(type, value); } + @Override + public Collection findHeis(ApiSearchConditions conditions) + throws UnacceptableStalenessException { + // Since expiry date can only be extended, there is no need to synchronize. + this.assertAcceptableStaleness(); + return this.doc.findHeis(conditions); + } + + @Override + public Collection getAllHeis() throws UnacceptableStalenessException { + // Since expiry date can only be extended, there is no need to synchronize. + this.assertAcceptableStaleness(); + return this.doc.getAllHeis(); + } + @Override public Date getExpiryDate() { // No need to synchronize. Simply get the expiry date of the currently held doc. diff --git a/src/main/java/eu/erasmuswithoutpaper/registryclient/HeiEntry.java b/src/main/java/eu/erasmuswithoutpaper/registryclient/HeiEntry.java new file mode 100644 index 0000000..aa21bc0 --- /dev/null +++ b/src/main/java/eu/erasmuswithoutpaper/registryclient/HeiEntry.java @@ -0,0 +1,51 @@ +package eu.erasmuswithoutpaper.registryclient; + +import java.util.Collection; + + +/** + * Describes a single HEI entry, as found in the EWP Registry's catalogue. + * + *

+ * Note, that the EWP Registry keeps only the very import attributes of each HEI (identifiers and + * name). If you are looking for more information on HEIs, then you should make use of the + * Institutions + * API. + *

+ * + * @since 1.2.0 + */ +public interface HeiEntry { + + /** + * @return SCHAC ID of this HEI. + */ + String getId(); + + /** + * Get the name of this HEI. + * + * @return We will try to return the name in English. If we cannot find it, we will return the + * name in any other language. If we fail this too, we will return the HEI's ID, so you + * will never get null here. + */ + String getName(); + + /** + * Get a name in the given language. + * + * @param langCode An ISO 639-1 code of the language (2 lower-case letters). + * @return String (if the name was found), or null (if it hasn't). + */ + String getName(String langCode); + + /** + * Retrieve all <other-id> values of certain type. + * + * @param type type identifier, see {@link RegistryClient#findHei(String, String)} for more + * information on these types. + * @return A collection of all matched values for the given type. In no matches were found, an + * empty collection will be returned. + */ + Collection getOtherIds(String type); +} diff --git a/src/main/java/eu/erasmuswithoutpaper/registryclient/HeiEntryImpl.java b/src/main/java/eu/erasmuswithoutpaper/registryclient/HeiEntryImpl.java new file mode 100644 index 0000000..2996236 --- /dev/null +++ b/src/main/java/eu/erasmuswithoutpaper/registryclient/HeiEntryImpl.java @@ -0,0 +1,113 @@ +package eu.erasmuswithoutpaper.registryclient; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import javax.xml.XMLConstants; + +import org.w3c.dom.Element; +import org.w3c.dom.Node; + + +class HeiEntryImpl implements HeiEntry { + + private static class Extras { + private final String primaryName; + private final Map allNames; + private final Map> otherIds; + + private Extras(HeiEntryImpl hei) { + this.allNames = new HashMap<>(); + this.otherIds = new HashMap<>(); + for (Node node : Utils.asNodeList(hei.elem.getChildNodes())) { + if (node.getNodeType() != Node.ELEMENT_NODE) { + continue; + } + Element elem = (Element) node; + String value = elem.getTextContent(); + switch (elem.getTagName()) { + case "name": + String lang = elem.getAttributeNS(XMLConstants.XML_NS_URI, "lang"); + if (value.length() > 0) { + this.allNames.put(lang, value); + } + break; + + case "other-id": + String idType = elem.getAttribute("type"); + List lst = this.otherIds.get(idType); + if (lst == null) { + lst = new ArrayList<>(); + this.otherIds.put(idType, lst); + } + lst.add(value); + break; + + default: + // Ingore. + } + } + String primaryName = this.allNames.get("en"); + if (primaryName == null) { + // No English name found. We'll use any name we have. + Collection names = this.allNames.values(); + if (names.size() > 0) { + primaryName = names.iterator().next(); + } else { + // No name at all! + primaryName = hei.id; + } + } + this.primaryName = primaryName; + } + } + + private final String id; + private final Element elem; + + private volatile Extras extras = null; + + HeiEntryImpl(String id, Element heiElem) { + this.id = id; + this.elem = heiElem; + } + + @Override + public String getId() { + return this.id; + } + + @Override + public String getName() { + return this.getExtras().primaryName; + } + + @Override + public String getName(String langCode) { + return this.getExtras().allNames.get(langCode); + } + + @Override + public Collection getOtherIds(String type) { + List values = this.getExtras().otherIds.get(type); + if (values == null) { + return Collections.emptyList(); + } + return Collections.unmodifiableCollection(values); + } + + private Extras getExtras() { + if (this.extras == null) { + synchronized (this) { + if (this.extras == null) { + this.extras = new Extras(this); + } + } + } + return this.extras; + } +} diff --git a/src/main/java/eu/erasmuswithoutpaper/registryclient/RegistryClient.java b/src/main/java/eu/erasmuswithoutpaper/registryclient/RegistryClient.java index 1135782..a92ac98 100644 --- a/src/main/java/eu/erasmuswithoutpaper/registryclient/RegistryClient.java +++ b/src/main/java/eu/erasmuswithoutpaper/registryclient/RegistryClient.java @@ -269,12 +269,26 @@ Collection findApis(ApiSearchConditions conditions) throws UnacceptableStalenessException; /** - * Find HEI's SCHAC ID by providing other type of ID. + * Retrieve a {@link HeiEntry} for a given HEI SCHAC ID. + * + * @param id HEI's SCHAC ID. If you don't have a SCHAC ID, then take a look at + * {@link #findHei(String, String)} and {@link #findHeiId(String, String)}. + * @return {@link HeiEntry}, or null if no such HEI has been found. + * @throws UnacceptableStalenessException if the catalogue copy is "too old". See + * {@link UnacceptableStalenessException} for more information. + * @since 1.2.0 + */ + HeiEntry findHei(String id) throws UnacceptableStalenessException; + + /** + * Find {@link HeiEntry} by other (non-SCHAC) ID. * *

- * Registry Service keeps a mapping of various popular HEI IDs and allows you to translate them to - * SCHAC IDs used within the EWP Network. You can use this method, for example, to periodically - * populate your database fields with SCHAC IDs. + * EWP Network uses SCHAC IDs as primary HEI IDs (if you know a SCHAC ID, then you should use the + * {@link #findHei(String)} method instead of this one). However, Registry Service also keeps a + * mapping of various other popular types of HEI IDs and allows you to translate them to SCHAC + * IDs. (You can use this method, for example, to periodically populate your database fields with + * SCHAC IDs.) *

* * @param type This can be any string, but in most cases you will use "pic", @@ -286,6 +300,22 @@ Collection findApis(ApiSearchConditions conditions) * argument, then this should be the PIC code of the HEI being searched for). Note, that * {@link RegistryClient} implementations are allowed to transform your input slightly * (e.g. remove whitespace, or ignore the case) before the matching occurs. + * @return {@link HeiEntry}, or null if no matching HEI has been found. + * @throws UnacceptableStalenessException if the catalogue copy is "too old". See + * {@link UnacceptableStalenessException} for more information. + * @since 1.2.0 + */ + HeiEntry findHei(String type, String value) throws UnacceptableStalenessException; + + /** + * Find the HEI's SCHAC ID by providing an other (non-SCHAC) type of ID. + * + *

+ * This is equivalent to calling {@link #findHei(String, String)} and then retrieving ID from it. + *

+ * + * @param type as in {@link #findHei(String, String)}. + * @param value as in {@link #findHei(String, String)}. * @return Either String or null. String with a valid SCHAC ID of this HEI is returned, if * a matching HEI was found. If no match was found, null is returned. * @throws UnacceptableStalenessException if the catalogue copy is "too old". See @@ -293,6 +323,54 @@ Collection findApis(ApiSearchConditions conditions) */ String findHeiId(String type, String value) throws UnacceptableStalenessException; + /** + * Find HEIs for which a particular API has been implemented. + * + *

Example

+ * + *

+ * The following call will return all HEIs which have implemented EWP's Echo API in version + * 1.0.1 or later: + *

+ * + *
+   * ApiSearchConditions myEchoConditions = new ApiSearchConditions();
+   * String ns = "https://github.com/erasmus-without-paper/"
+   *     + "ewp-specs-api-echo/blob/stable-v1/manifest-entry.xsd";
+   * myEchoConditions.setApiClassRequired(ns, "echo", "1.0.1");
+   * Collection<HeiEntry> heis = client.findHeis(myEchoConditions);
+   * 
+ * + *

+ * The above gives you HEIs, but not Echo API URLs. In order to get those, you will need to call + * {@link #findApi(ApiSearchConditions)} later on (with revised {@link ApiSearchConditions}). + *

+ * + * @param conditions Describes the conditions which at least one of the HEIs' APIs must + * meet. + * @return A list of matching {@link HeiEntry} objects. + * @throws UnacceptableStalenessException if the catalogue copy is "too old". See + * {@link UnacceptableStalenessException} for more information. + * @since 1.2.0 + */ + Collection findHeis(ApiSearchConditions conditions) + throws UnacceptableStalenessException; + + /** + * Retrieve a list of all HEIs described in the Registry's catalogue. + * + *

+ * Note, that this list may contain HEIs which don't implement any API. If you want to find HEIs + * which implement particular API, then use {@link #findHeis(ApiSearchConditions)} instead. + *

+ * + * @return A list of {@link HeiEntry} objects. + * @throws UnacceptableStalenessException if the catalogue copy is "too old". See + * {@link UnacceptableStalenessException} for more information. + * @since 1.2.0 + */ + Collection getAllHeis() throws UnacceptableStalenessException; + /** * Get the expiry date of the currently held copy of the catalogue. * diff --git a/src/main/java/eu/erasmuswithoutpaper/registryclient/Utils.java b/src/main/java/eu/erasmuswithoutpaper/registryclient/Utils.java index 7e515b5..4eba264 100644 --- a/src/main/java/eu/erasmuswithoutpaper/registryclient/Utils.java +++ b/src/main/java/eu/erasmuswithoutpaper/registryclient/Utils.java @@ -36,7 +36,7 @@ public int size() { } /** - * Transform a {@link NodeList} into a list of {@link Element}s. + * Transform a {@link NodeList} into a {@link List} of {@link Element}s. * * @param list a {@link NodeList}. It MUST contain {@link Element}s only. * @return a list of {@link Element}s. @@ -46,4 +46,14 @@ static List asElementList(NodeList list) { return (List) (list.getLength() == 0 ? Collections.emptyList() : new NodeListWrapper(list)); } + + /** + * Transform a {@link NodeList} into a {@link List} of {@link Node}s. + * + * @param list a {@link NodeList}. + * @return a list of {@link Node}s. + */ + static List asNodeList(NodeList list) { + return list.getLength() == 0 ? Collections.emptyList() : new NodeListWrapper(list); + } } diff --git a/src/test/java/eu/erasmuswithoutpaper/registryclient/ClientImplBasicTests.java b/src/test/java/eu/erasmuswithoutpaper/registryclient/ClientImplBasicTests.java index e826a77..ce58397 100644 --- a/src/test/java/eu/erasmuswithoutpaper/registryclient/ClientImplBasicTests.java +++ b/src/test/java/eu/erasmuswithoutpaper/registryclient/ClientImplBasicTests.java @@ -245,6 +245,31 @@ public void testFindApis() { assertThat(cli.findApis(conds)).hasSize(0); } + @Test + public void testFindHeiById() { + HeiEntry hei = cli.findHei("bob.example.com"); + assertThat(hei).isNotNull(); + assertThat(hei.getId()).isEqualTo("bob.example.com"); + assertThat(hei.getName()).isIn("Bob's University", "University of the Bob"); + assertThat(hei.getName("en")).isIn("Bob's University", "University of the Bob"); + assertThat(hei.getName("es")).isEqualTo("Universidad de Bob"); + assertThat(hei.getName("pl")).isNull(); + assertThat(hei.getOtherIds("erasmus")).containsExactlyInAnyOrder("BOB01"); + assertThat(hei.getOtherIds("previous-schac")).containsExactlyInAnyOrder("bob.com", "bob.org"); + hei = cli.findHei("nonexistent"); + assertThat(hei).isNull(); + } + + @Test + public void testFindHeiByOtherId() { + HeiEntry hei1 = cli.findHei("bob.example.com"); + HeiEntry hei2 = cli.findHei("previous-schac", "bob.org"); + assertThat(hei1).isNotNull(); + assertThat(hei2).isNotNull(); + assertThat(hei1).isSameAs(hei2); + assertThat(cli.findHei("nonexistent", "nonexistent")).isNull(); + } + @Test public void testFindHeiId() { assertThat(cli.findHeiId("a", "b")).isNull(); @@ -254,6 +279,25 @@ public void testFindHeiId() { assertThat(cli.findHeiId("erasmus", " Bob 01 ")).isNull(); assertThat(cli.findHeiId("erasmus", " Bob02 ")).isNull(); assertThat(cli.findHeiId("pic", "12346")).isEqualTo("john.example.com"); + assertThat(cli.findHeiId("previous-schac", "bob.com")).isEqualTo("bob.example.com"); + assertThat(cli.findHeiId("previous-schac", "bob.org")).isEqualTo("bob.example.com"); + } + + @Test + public void testFindHeis() { + ApiSearchConditions conds = new ApiSearchConditions(); + conds.setApiClassRequired("urn:other", "other-api", "1.1.6"); + assertThat(cli.findHeis(conds)).containsExactlyInAnyOrder(cli.findHei("john.example.com"), + cli.findHei("fred.example.com")); + conds.setMinVersionRequired("7.0.0"); + assertThat(cli.findHeis(conds)).isEmpty(); + } + + @Test + public void testGetAllHeis() { + assertThat(cli.getAllHeis()).containsExactlyInAnyOrder(cli.findHei("bob.example.com"), + cli.findHei("john.example.com"), cli.findHei("fred.example.com"), + cli.findHei("weird.example.com")); } @Test diff --git a/src/test/resources/test-files/catalogue1.xml b/src/test/resources/test-files/catalogue1.xml index eff5bb1..5a2789d 100644 --- a/src/test/resources/test-files/catalogue1.xml +++ b/src/test/resources/test-files/catalogue1.xml @@ -121,6 +121,7 @@ BOB01 12345 bob.com + bob.org Bob's University University of the Bob Universidad de Bob