From 3bc1f0236cdfef81cb36ebce23f9350b1615928e Mon Sep 17 00:00:00 2001 From: Christoph Date: Tue, 14 Apr 2020 09:37:25 +0200 Subject: [PATCH] Fix inspire fetcher (#6258) * Fix inspire fetcher Use application/x-bibtex header Fixes #6229 * update changelog * extract urldownload method for easier overwriting revert not related changes Co-authored-by: Tobias Diez --- CHANGELOG.md | 3 +- .../importer/SearchBasedParserFetcher.java | 13 ++++- .../importer/fetcher/INSPIREFetcher.java | 52 ++++++------------- .../importer/fetcher/INSPIREFetcherTest.java | 43 ++++++++------- 4 files changed, 52 insertions(+), 59 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9f589696a6e..89324b0615c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -34,8 +34,9 @@ Note that this project **does not** adhere to [Semantic Versioning](http://semve - We fixed an issue with inconsistent capitalization of file extensions when downloading files. [#6115](https://github.com/JabRef/jabref/issues/6115) - We fixed the display of language and encoding in the preferences dialog. [#6130](https://github.com/JabRef/jabref/pull/6130) - We fixed an issue where search full-text documents downloaded files with same name, overwriting existing files. [#6174](https://github.com/JabRef/jabref/pull/6174) -- We fixed an issue where when importing into current library an erroneous message "import cancelled" is displayed even though import is successful. [#6266](https://github.com/JabRef/jabref/issues/6266) +- We fixed an issue when importing into current library an erroneous message "import cancelled" is displayed even though import is successful. [#6266](https://github.com/JabRef/jabref/issues/6266) - We fixed an issue where custom jstyles for Open/LibreOffice where not saved correctly. [#6170](https://github.com/JabRef/jabref/issues/6170) +- We fixed an issue where the INSPIRE fetcher was no longer working [#6229](https://github.com/JabRef/jabref/issues/6229) ### Removed diff --git a/src/main/java/org/jabref/logic/importer/SearchBasedParserFetcher.java b/src/main/java/org/jabref/logic/importer/SearchBasedParserFetcher.java index 6a65eabe21d..d5dd7d05364 100644 --- a/src/main/java/org/jabref/logic/importer/SearchBasedParserFetcher.java +++ b/src/main/java/org/jabref/logic/importer/SearchBasedParserFetcher.java @@ -49,13 +49,24 @@ default void doPostCleanup(BibEntry entry) { // Do nothing by default } + /** + * Gets the {@link URLDownload} object for downloading content. Overwrite, if you need to send additional headers for the download + * @param query The search query + * @throws MalformedURLException + * @throws FetcherException + * @throws URISyntaxException + */ + default URLDownload getUrlDownload(String query) throws MalformedURLException, FetcherException, URISyntaxException { + return new URLDownload(getURLForQuery(query)); + } + @Override default List performSearch(String query) throws FetcherException { if (StringUtil.isBlank(query)) { return Collections.emptyList(); } - try (InputStream stream = new URLDownload(getURLForQuery(query)).asInputStream()) { + try (InputStream stream = getUrlDownload(query).asInputStream()) { List fetchedEntries = getParser().parseEntries(stream); // Post-cleanup diff --git a/src/main/java/org/jabref/logic/importer/fetcher/INSPIREFetcher.java b/src/main/java/org/jabref/logic/importer/fetcher/INSPIREFetcher.java index 5497e49c120..e8b953531d7 100644 --- a/src/main/java/org/jabref/logic/importer/fetcher/INSPIREFetcher.java +++ b/src/main/java/org/jabref/logic/importer/fetcher/INSPIREFetcher.java @@ -1,14 +1,9 @@ package org.jabref.logic.importer.fetcher; -import java.io.BufferedReader; -import java.io.InputStreamReader; import java.net.MalformedURLException; import java.net.URISyntaxException; import java.net.URL; -import java.util.ArrayList; -import java.util.List; import java.util.Optional; -import java.util.stream.Collectors; import org.jabref.logic.formatter.bibtexfields.ClearFormatter; import org.jabref.logic.formatter.bibtexfields.RemoveBracesFormatter; @@ -18,7 +13,8 @@ import org.jabref.logic.importer.Parser; import org.jabref.logic.importer.SearchBasedParserFetcher; import org.jabref.logic.importer.fileformat.BibtexParser; -import org.jabref.logic.util.OS; +import org.jabref.logic.importer.util.MediaTypes; +import org.jabref.logic.net.URLDownload; import org.jabref.model.cleanup.FieldFormatterCleanup; import org.jabref.model.entry.BibEntry; import org.jabref.model.entry.field.StandardField; @@ -26,24 +22,19 @@ import org.jabref.model.util.DummyFileUpdateMonitor; import org.apache.http.client.utils.URIBuilder; -import org.jsoup.Jsoup; -import org.jsoup.nodes.Document; -import org.jsoup.nodes.Element; -import org.jsoup.select.Elements; /** * Fetches data from the INSPIRE database. * - * @implNote We just use the normal search interface since it provides direct BibTeX export while the API (http://inspirehep.net/info/hep/api) currently only supports JSON and XML */ public class INSPIREFetcher implements SearchBasedParserFetcher { - private static final String INSPIRE_HOST = "https://inspirehep.net/search"; + private static final String INSPIRE_HOST = "https://inspirehep.net/api/literature/"; - private final ImportFormatPreferences preferences; + private final ImportFormatPreferences importFormatPreferences; public INSPIREFetcher(ImportFormatPreferences preferences) { - this.preferences = preferences; + this.importFormatPreferences = preferences; } @Override @@ -59,33 +50,15 @@ public Optional getHelpPage() { @Override public URL getURLForQuery(String query) throws URISyntaxException, MalformedURLException, FetcherException { URIBuilder uriBuilder = new URIBuilder(INSPIRE_HOST); - uriBuilder.addParameter("p", query); // Query - // uriBuilder.addParameter("jrec", "1"); // Start index (not needed at the moment) - uriBuilder.addParameter("rg", "100"); // Should return up to 100 items (instead of default 25) - uriBuilder.addParameter("of", "hx"); // BibTeX format + uriBuilder.addParameter("q", query); // Query return uriBuilder.build().toURL(); } @Override - public Parser getParser() { - // Inspire returns the BibTeX result embedded in HTML - // So we extract the BibTeX string from the
bibtex
tags and pass the content to the BibTeX parser - return inputStream -> { - String response = new BufferedReader(new InputStreamReader(inputStream)).lines().collect(Collectors.joining(OS.NEWLINE)); - - List entries = new ArrayList<>(); - - Document doc = Jsoup.parse(response); - Elements preElements = doc.getElementsByTag("pre"); - - for (Element elem : preElements) { - // We have to use a new instance here, because otherwise only the first entry gets parsed - BibtexParser bibtexParser = new BibtexParser(preferences, new DummyFileUpdateMonitor()); - List entry = bibtexParser.parseEntries(elem.text()); - entries.addAll(entry); - } - return entries; - }; + public URLDownload getUrlDownload(String query) throws MalformedURLException, FetcherException, URISyntaxException { + URLDownload download = new URLDownload(getURLForQuery(query)); + download.addHeader("Accept", MediaTypes.APPLICATION_BIBTEX); + return download; } @Override @@ -96,4 +69,9 @@ public void doPostCleanup(BibEntry entry) { // Remove braces around content of "title" field new FieldFormatterCleanup(StandardField.TITLE, new RemoveBracesFormatter()).cleanup(entry); } + + @Override + public Parser getParser() { + return new BibtexParser(importFormatPreferences, new DummyFileUpdateMonitor()); + } } diff --git a/src/test/java/org/jabref/logic/importer/fetcher/INSPIREFetcherTest.java b/src/test/java/org/jabref/logic/importer/fetcher/INSPIREFetcherTest.java index 34e900d7d1e..744ed17ac8f 100644 --- a/src/test/java/org/jabref/logic/importer/fetcher/INSPIREFetcherTest.java +++ b/src/test/java/org/jabref/logic/importer/fetcher/INSPIREFetcherTest.java @@ -1,6 +1,6 @@ package org.jabref.logic.importer.fetcher; -import java.util.Arrays; +import java.util.Collections; import java.util.List; import org.jabref.logic.bibtex.FieldContentFormatterPreferences; @@ -32,24 +32,6 @@ void setUp() { @Test void searchByQueryFindsEntry() throws Exception { - BibEntry phd = new BibEntry(StandardEntryType.PhdThesis); - phd.setCiteKey("Diez:2019pkg"); - phd.setField(StandardField.AUTHOR, "Diez, Tobias"); - phd.setField(StandardField.TITLE, "Normal Form of Equivariant Maps and Singular Symplectic Reduction in Infinite Dimensions with Applications to Gauge Field Theory"); - phd.setField(StandardField.YEAR, "2019"); - phd.setField(StandardField.EPRINT, "1909.00744"); - phd.setField(new UnknownField("reportnumber"), "urn:nbn:de:bsz:15-qucosa2-352179"); - phd.setField(StandardField.ARCHIVEPREFIX, "arXiv"); - phd.setField(StandardField.PRIMARYCLASS, "math.SG"); - - BibEntry article = new BibEntry(StandardEntryType.Article); - article.setCiteKey("Diez:2018gjz"); - article.setField(StandardField.AUTHOR, "Diez, Tobias and Rudolph, Gerd"); - article.setField(StandardField.TITLE, "Singular symplectic cotangent bundle reduction of gauge field theory"); - article.setField(StandardField.YEAR, "2018"); - article.setField(StandardField.EPRINT, "1812.04707"); - article.setField(StandardField.ARCHIVEPREFIX, "arXiv"); - article.setField(StandardField.PRIMARYCLASS, "math-ph"); BibEntry master = new BibEntry(StandardEntryType.MastersThesis); master.setCiteKey("Diez:2014ppa"); @@ -63,6 +45,27 @@ void searchByQueryFindsEntry() throws Exception { List fetchedEntries = fetcher.performSearch("Fr\\'echet group actions field"); - assertEquals(Arrays.asList(phd, article, master), fetchedEntries); + assertEquals(Collections.singletonList(master), fetchedEntries); + } + + @Test + public void searchByIdentifierFindsEntry() throws Exception { + BibEntry article = new BibEntry(StandardEntryType.Article); + article.setCiteKey("Melnikov:1998pr"); + article.setField(StandardField.AUTHOR, "Melnikov, Kirill and Yelkhovsky, Alexander"); + article.setField(StandardField.TITLE, "Top quark production at threshold with O(alpha-s**2) accuracy"); + article.setField(StandardField.DOI, "10.1016/S0550-3213(98)00348-4"); + article.setField(StandardField.JOURNAL, "Nucl.\\ Phys.\\ B"); + article.setField(StandardField.PAGES, "59--72"); + article.setField(StandardField.VOLUME, "528"); + article.setField(StandardField.YEAR, "1998"); + article.setField(StandardField.EPRINT, "hep-ph/9802379"); + article.setField(StandardField.ARCHIVEPREFIX, "arXiv"); + article.setField(new UnknownField("reportnumber"), "BUDKER-INP-1998-7, TTP-98-10"); + + List fetchedEntries = fetcher.performSearch("hep-ph/9802379"); + + assertEquals(Collections.singletonList(article), fetchedEntries); + } }