From 3bc1f0236cdfef81cb36ebce23f9350b1615928e Mon Sep 17 00:00:00 2001
From: Christoph <siedlerkiller@gmail.com>
Date: Tue, 14 Apr 2020 09:37:25 +0200
Subject: [PATCH] Fix inspire fetcher (#6258)

* Fix inspire fetcher

Use application/x-bibtex header
Fixes #6229

* update changelog

* extract urldownload method for easier overwriting
revert not related changes

Co-authored-by: Tobias Diez <tobiasdiez@gmx.de>
---
 CHANGELOG.md                                  |  3 +-
 .../importer/SearchBasedParserFetcher.java    | 13 ++++-
 .../importer/fetcher/INSPIREFetcher.java      | 52 ++++++-------------
 .../importer/fetcher/INSPIREFetcherTest.java  | 43 ++++++++-------
 4 files changed, 52 insertions(+), 59 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9f589696a6e..89324b0615c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -34,8 +34,9 @@ Note that this project **does not** adhere to [Semantic Versioning](http://semve
 - We fixed an issue with inconsistent capitalization of file extensions when downloading files. [#6115](https://github.com/JabRef/jabref/issues/6115)
 - We fixed the display of language and encoding in the preferences dialog. [#6130](https://github.com/JabRef/jabref/pull/6130)
 - We fixed an issue where search full-text documents downloaded files with same name, overwriting existing files. [#6174](https://github.com/JabRef/jabref/pull/6174)
-- We fixed an issue where when importing into current library an erroneous message "import cancelled" is displayed even though import is successful. [#6266](https://github.com/JabRef/jabref/issues/6266)
+- We fixed an issue when importing into current library an erroneous message "import cancelled" is displayed even though import is successful. [#6266](https://github.com/JabRef/jabref/issues/6266)
 - We fixed an issue where custom jstyles for Open/LibreOffice where not saved correctly. [#6170](https://github.com/JabRef/jabref/issues/6170)
+- We fixed an issue where the INSPIRE fetcher was no longer working [#6229](https://github.com/JabRef/jabref/issues/6229)
 
 
 ### Removed
diff --git a/src/main/java/org/jabref/logic/importer/SearchBasedParserFetcher.java b/src/main/java/org/jabref/logic/importer/SearchBasedParserFetcher.java
index 6a65eabe21d..d5dd7d05364 100644
--- a/src/main/java/org/jabref/logic/importer/SearchBasedParserFetcher.java
+++ b/src/main/java/org/jabref/logic/importer/SearchBasedParserFetcher.java
@@ -49,13 +49,24 @@ default void doPostCleanup(BibEntry entry) {
         // Do nothing by default
     }
 
+    /**
+     * Gets the {@link URLDownload} object for downloading content. Overwrite, if you need to send additional headers for the download
+     * @param query The search query
+     * @throws MalformedURLException
+     * @throws FetcherException
+     * @throws URISyntaxException
+     */
+    default URLDownload getUrlDownload(String query) throws MalformedURLException, FetcherException, URISyntaxException {
+        return new URLDownload(getURLForQuery(query));
+    }
+
     @Override
     default List<BibEntry> performSearch(String query) throws FetcherException {
         if (StringUtil.isBlank(query)) {
             return Collections.emptyList();
         }
 
-        try (InputStream stream = new URLDownload(getURLForQuery(query)).asInputStream()) {
+        try (InputStream stream = getUrlDownload(query).asInputStream()) {
             List<BibEntry> fetchedEntries = getParser().parseEntries(stream);
 
             // Post-cleanup
diff --git a/src/main/java/org/jabref/logic/importer/fetcher/INSPIREFetcher.java b/src/main/java/org/jabref/logic/importer/fetcher/INSPIREFetcher.java
index 5497e49c120..e8b953531d7 100644
--- a/src/main/java/org/jabref/logic/importer/fetcher/INSPIREFetcher.java
+++ b/src/main/java/org/jabref/logic/importer/fetcher/INSPIREFetcher.java
@@ -1,14 +1,9 @@
 package org.jabref.logic.importer.fetcher;
 
-import java.io.BufferedReader;
-import java.io.InputStreamReader;
 import java.net.MalformedURLException;
 import java.net.URISyntaxException;
 import java.net.URL;
-import java.util.ArrayList;
-import java.util.List;
 import java.util.Optional;
-import java.util.stream.Collectors;
 
 import org.jabref.logic.formatter.bibtexfields.ClearFormatter;
 import org.jabref.logic.formatter.bibtexfields.RemoveBracesFormatter;
@@ -18,7 +13,8 @@
 import org.jabref.logic.importer.Parser;
 import org.jabref.logic.importer.SearchBasedParserFetcher;
 import org.jabref.logic.importer.fileformat.BibtexParser;
-import org.jabref.logic.util.OS;
+import org.jabref.logic.importer.util.MediaTypes;
+import org.jabref.logic.net.URLDownload;
 import org.jabref.model.cleanup.FieldFormatterCleanup;
 import org.jabref.model.entry.BibEntry;
 import org.jabref.model.entry.field.StandardField;
@@ -26,24 +22,19 @@
 import org.jabref.model.util.DummyFileUpdateMonitor;
 
 import org.apache.http.client.utils.URIBuilder;
-import org.jsoup.Jsoup;
-import org.jsoup.nodes.Document;
-import org.jsoup.nodes.Element;
-import org.jsoup.select.Elements;
 
 /**
  * Fetches data from the INSPIRE database.
  *
- * @implNote We just use the normal search interface since it provides direct BibTeX export while the API (http://inspirehep.net/info/hep/api) currently only supports JSON and XML
  */
 public class INSPIREFetcher implements SearchBasedParserFetcher {
 
-    private static final String INSPIRE_HOST = "https://inspirehep.net/search";
+    private static final String INSPIRE_HOST = "https://inspirehep.net/api/literature/";
 
-    private final ImportFormatPreferences preferences;
+    private final ImportFormatPreferences importFormatPreferences;
 
     public INSPIREFetcher(ImportFormatPreferences preferences) {
-        this.preferences = preferences;
+        this.importFormatPreferences = preferences;
     }
 
     @Override
@@ -59,33 +50,15 @@ public Optional<HelpFile> getHelpPage() {
     @Override
     public URL getURLForQuery(String query) throws URISyntaxException, MalformedURLException, FetcherException {
         URIBuilder uriBuilder = new URIBuilder(INSPIRE_HOST);
-        uriBuilder.addParameter("p", query); // Query
-        // uriBuilder.addParameter("jrec", "1"); // Start index (not needed at the moment)
-        uriBuilder.addParameter("rg", "100"); // Should return up to 100 items (instead of default 25)
-        uriBuilder.addParameter("of", "hx"); // BibTeX format
+        uriBuilder.addParameter("q", query); // Query
         return uriBuilder.build().toURL();
     }
 
     @Override
-    public Parser getParser() {
-        // Inspire returns the BibTeX result embedded in HTML
-        // So we extract the BibTeX string from the <pre>bibtex</pre> tags and pass the content to the BibTeX parser
-        return inputStream -> {
-            String response = new BufferedReader(new InputStreamReader(inputStream)).lines().collect(Collectors.joining(OS.NEWLINE));
-
-            List<BibEntry> entries = new ArrayList<>();
-
-            Document doc = Jsoup.parse(response);
-            Elements preElements = doc.getElementsByTag("pre");
-
-            for (Element elem : preElements) {
-                // We have to use a new instance here, because otherwise only the first entry gets parsed
-                BibtexParser bibtexParser = new BibtexParser(preferences, new DummyFileUpdateMonitor());
-                List<BibEntry> entry = bibtexParser.parseEntries(elem.text());
-                entries.addAll(entry);
-            }
-            return entries;
-        };
+    public URLDownload getUrlDownload(String query) throws MalformedURLException, FetcherException, URISyntaxException {
+        URLDownload download = new URLDownload(getURLForQuery(query));
+        download.addHeader("Accept", MediaTypes.APPLICATION_BIBTEX);
+        return download;
     }
 
     @Override
@@ -96,4 +69,9 @@ public void doPostCleanup(BibEntry entry) {
         // Remove braces around content of "title" field
         new FieldFormatterCleanup(StandardField.TITLE, new RemoveBracesFormatter()).cleanup(entry);
     }
+
+    @Override
+    public Parser getParser() {
+        return new BibtexParser(importFormatPreferences, new DummyFileUpdateMonitor());
+    }
 }
diff --git a/src/test/java/org/jabref/logic/importer/fetcher/INSPIREFetcherTest.java b/src/test/java/org/jabref/logic/importer/fetcher/INSPIREFetcherTest.java
index 34e900d7d1e..744ed17ac8f 100644
--- a/src/test/java/org/jabref/logic/importer/fetcher/INSPIREFetcherTest.java
+++ b/src/test/java/org/jabref/logic/importer/fetcher/INSPIREFetcherTest.java
@@ -1,6 +1,6 @@
 package org.jabref.logic.importer.fetcher;
 
-import java.util.Arrays;
+import java.util.Collections;
 import java.util.List;
 
 import org.jabref.logic.bibtex.FieldContentFormatterPreferences;
@@ -32,24 +32,6 @@ void setUp() {
 
     @Test
     void searchByQueryFindsEntry() throws Exception {
-        BibEntry phd = new BibEntry(StandardEntryType.PhdThesis);
-        phd.setCiteKey("Diez:2019pkg");
-        phd.setField(StandardField.AUTHOR, "Diez, Tobias");
-        phd.setField(StandardField.TITLE, "Normal Form of Equivariant Maps and Singular Symplectic Reduction in Infinite Dimensions with Applications to Gauge Field Theory");
-        phd.setField(StandardField.YEAR, "2019");
-        phd.setField(StandardField.EPRINT, "1909.00744");
-        phd.setField(new UnknownField("reportnumber"), "urn:nbn:de:bsz:15-qucosa2-352179");
-        phd.setField(StandardField.ARCHIVEPREFIX, "arXiv");
-        phd.setField(StandardField.PRIMARYCLASS, "math.SG");
-
-        BibEntry article = new BibEntry(StandardEntryType.Article);
-        article.setCiteKey("Diez:2018gjz");
-        article.setField(StandardField.AUTHOR, "Diez, Tobias and Rudolph, Gerd");
-        article.setField(StandardField.TITLE, "Singular symplectic cotangent bundle reduction of gauge field theory");
-        article.setField(StandardField.YEAR, "2018");
-        article.setField(StandardField.EPRINT, "1812.04707");
-        article.setField(StandardField.ARCHIVEPREFIX, "arXiv");
-        article.setField(StandardField.PRIMARYCLASS, "math-ph");
 
         BibEntry master = new BibEntry(StandardEntryType.MastersThesis);
         master.setCiteKey("Diez:2014ppa");
@@ -63,6 +45,27 @@ void searchByQueryFindsEntry() throws Exception {
 
         List<BibEntry> fetchedEntries = fetcher.performSearch("Fr\\'echet group actions field");
 
-        assertEquals(Arrays.asList(phd, article, master), fetchedEntries);
+        assertEquals(Collections.singletonList(master), fetchedEntries);
+    }
+
+    @Test
+    public void searchByIdentifierFindsEntry() throws Exception {
+        BibEntry article = new BibEntry(StandardEntryType.Article);
+        article.setCiteKey("Melnikov:1998pr");
+        article.setField(StandardField.AUTHOR, "Melnikov, Kirill and Yelkhovsky, Alexander");
+        article.setField(StandardField.TITLE, "Top quark production at threshold with O(alpha-s**2) accuracy");
+        article.setField(StandardField.DOI, "10.1016/S0550-3213(98)00348-4");
+        article.setField(StandardField.JOURNAL, "Nucl.\\ Phys.\\ B");
+        article.setField(StandardField.PAGES, "59--72");
+        article.setField(StandardField.VOLUME, "528");
+        article.setField(StandardField.YEAR, "1998");
+        article.setField(StandardField.EPRINT, "hep-ph/9802379");
+        article.setField(StandardField.ARCHIVEPREFIX, "arXiv");
+        article.setField(new UnknownField("reportnumber"), "BUDKER-INP-1998-7, TTP-98-10");
+
+        List<BibEntry> fetchedEntries = fetcher.performSearch("hep-ph/9802379");
+
+        assertEquals(Collections.singletonList(article), fetchedEntries);
+
     }
 }