From 99183e1680fc7dfaa7b3b3f1628fa1331a041594 Mon Sep 17 00:00:00 2001 From: Christoph Date: Wed, 18 Mar 2020 20:28:36 +0100 Subject: [PATCH] Add APS Fetcher (refactored) (#6143) * Add APS fetcher * Fix case sensitivity bug * Refactor ApsFetcher * Add note about APS fetcher * Refactor findFulltext() * Refactor getId() * Parameterize ApsFetcherTest * Add link to APS changelog entry * Refactor APS Fetcher * make separate tests Co-authored-by: August Janse --- CHANGELOG.md | 2 + .../jabref/logic/importer/WebFetchers.java | 2 + .../logic/importer/fetcher/ApsFetcher.java | 93 +++++++++++++++++++ .../importer/fetcher/ApsFetcherTest.java | 48 ++++++++++ 4 files changed, 145 insertions(+) create mode 100644 src/main/java/org/jabref/logic/importer/fetcher/ApsFetcher.java create mode 100644 src/test/java/org/jabref/logic/importer/fetcher/ApsFetcherTest.java diff --git a/CHANGELOG.md b/CHANGELOG.md index ed063a01f04..32c7ef06b39 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -40,6 +40,8 @@ Note that this project **does not** adhere to [Semantic Versioning](http://semve - Filenames of external files can no longer contain curly braces. [#5926](https://github.com/JabRef/jabref/pull/5926) - We made the filters more easily accessible in the integrity check dialog. [#5955](https://github.com/JabRef/jabref/pull/5955) - We reimplemented and improved the dialog "Customize entry types". [#4719](https://github.com/JabRef/jabref/issues/4719) +- We reimplemented and improved the dialog "Customize entry types" [#4719](https://github.com/JabRef/jabref/issues/4719) +- We added an [American Physical Society](https://journals.aps.org/) fetcher. [#818](https://github.com/JabRef/jabref/issues/818) ### Fixed diff --git a/src/main/java/org/jabref/logic/importer/WebFetchers.java b/src/main/java/org/jabref/logic/importer/WebFetchers.java index f901ea0588a..72c3ace87ab 100644 --- a/src/main/java/org/jabref/logic/importer/WebFetchers.java +++ b/src/main/java/org/jabref/logic/importer/WebFetchers.java @@ -8,6 +8,7 @@ import java.util.TreeSet; import org.jabref.logic.importer.fetcher.ACS; +import org.jabref.logic.importer.fetcher.ApsFetcher; import org.jabref.logic.importer.fetcher.ArXiv; import org.jabref.logic.importer.fetcher.AstrophysicsDataSystem; import org.jabref.logic.importer.fetcher.CiteSeer; @@ -159,6 +160,7 @@ public static Set getFullTextFetchers(ImportFormatPreferences i fetchers.add(new ACS()); fetchers.add(new ArXiv(importFormatPreferences)); fetchers.add(new IEEE(importFormatPreferences)); + fetchers.add(new ApsFetcher()); // Meta search fetchers.add(new GoogleScholar(importFormatPreferences)); fetchers.add(new OpenAccessDoi()); diff --git a/src/main/java/org/jabref/logic/importer/fetcher/ApsFetcher.java b/src/main/java/org/jabref/logic/importer/fetcher/ApsFetcher.java new file mode 100644 index 00000000000..a0716f7cfe6 --- /dev/null +++ b/src/main/java/org/jabref/logic/importer/fetcher/ApsFetcher.java @@ -0,0 +1,93 @@ +package org.jabref.logic.importer.fetcher; + +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.net.URLConnection; +import java.util.Objects; +import java.util.Optional; + +import org.jabref.logic.importer.FulltextFetcher; +import org.jabref.model.entry.BibEntry; +import org.jabref.model.entry.field.StandardField; +import org.jabref.model.entry.identifier.DOI; + +import kong.unirest.Unirest; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * FulltextFetcher implementation that attempts to find a PDF URL at APS. Also see the API, although it isn't currently used. + */ +public class ApsFetcher implements FulltextFetcher { + + private static final Logger LOGGER = LoggerFactory.getLogger(ApsFetcher.class); + + // The actual API needs either an API key or a header. This is a workaround. + private static final String DOI_URL = "https://www.doi.org/"; + private static final String PDF_URL = "https://journals.aps.org/prl/pdf/"; + + @Override + public Optional findFullText(BibEntry entry) throws IOException { + Objects.requireNonNull(entry); + + Optional doi = entry.getField(StandardField.DOI).flatMap(DOI::parse); + + if (!doi.isPresent()) { + return Optional.empty(); + } + + Optional id = getId(doi.get().getDOI()); + + if (id.isPresent()) { + + String pdfRequestUrl = PDF_URL + id.get(); + int code = Unirest.head(pdfRequestUrl).asJson().getStatus(); + + if (code == 200) { + LOGGER.info("Fulltext PDF found @ APS."); + try { + return Optional.of(new URL(pdfRequestUrl)); + } catch (MalformedURLException e) { + LOGGER.warn("APS returned malformed URL, cannot find PDF."); + } + } + } + return Optional.empty(); + } + + @Override + public TrustLevel getTrustLevel() { + return TrustLevel.PUBLISHER; + } + + /** + * Convert a DOI into an appropriate APS id. + * + * @param doi A case insensitive DOI + * @return A DOI cased as APS likes it + */ + private Optional getId(String doi) { + // DOI is not case sensitive, but the id for the PDF URL is, + // so we follow DOI.org redirects to get the proper id. + // https://stackoverflow.com/a/5270162/1729441 + + String doiRequest = DOI_URL + doi; + + URLConnection con; + try { + con = new URL(doiRequest).openConnection(); + con.connect(); + con.getInputStream(); + String[] urlParts = con.getURL().toString().split("abstract/"); + if (urlParts.length == 2) { + return Optional.of(urlParts[1]); + } + + } catch (IOException e) { + LOGGER.warn("Error connecting to APS", e); + } + return Optional.empty(); + } +} diff --git a/src/test/java/org/jabref/logic/importer/fetcher/ApsFetcherTest.java b/src/test/java/org/jabref/logic/importer/fetcher/ApsFetcherTest.java new file mode 100644 index 00000000000..0e545a15064 --- /dev/null +++ b/src/test/java/org/jabref/logic/importer/fetcher/ApsFetcherTest.java @@ -0,0 +1,48 @@ +package org.jabref.logic.importer.fetcher; + +import java.net.URL; +import java.util.Optional; + +import org.jabref.model.entry.BibEntry; +import org.jabref.model.entry.field.StandardField; +import org.jabref.testutils.category.FetcherTest; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +@FetcherTest +class ApsFetcherTest { + + private ApsFetcher finder; + + @BeforeEach + void setUp() { + finder = new ApsFetcher(); + } + + @Test + void findFullTextFromDoi() throws Exception { + BibEntry entry = new BibEntry().withField(StandardField.DOI, "10.1103/PhysRevLett.116.061102"); + assertEquals(Optional.of(new URL("https://journals.aps.org/prl/pdf/10.1103/PhysRevLett.116.061102")), finder.findFullText(entry)); + } + + @Test + void findFullTextFromLowercaseDoi() throws Exception { + BibEntry entry = new BibEntry().withField(StandardField.DOI, "10.1103/physrevlett.124.029002"); + assertEquals(Optional.of(new URL("https://journals.aps.org/prl/pdf/10.1103/PhysRevLett.124.029002")), finder.findFullText(entry)); + } + + @Test + void notFindFullTextForUnauthorized() throws Exception { + BibEntry entry = new BibEntry().withField(StandardField.DOI, "10.1103/PhysRevLett.89.127401"); + assertEquals(Optional.empty(), finder.findFullText(entry)); + } + + @Test + void notFindFullTextForUnknownEntry() throws Exception { + BibEntry entry = new BibEntry().withField(StandardField.DOI, "10.1016/j.aasri.2014.0559.002"); + assertEquals(Optional.empty(), finder.findFullText(entry)); + } +}