Skip to content

Commit

Permalink
Fix inspire fetcher (#6258)
Browse files Browse the repository at this point in the history
* Fix inspire fetcher

Use application/x-bibtex header
Fixes #6229

* update changelog

* extract urldownload method for easier overwriting
revert not related changes

Co-authored-by: Tobias Diez <tobiasdiez@gmx.de>
  • Loading branch information
Siedlerchr and tobiasdiez authored Apr 14, 2020
1 parent 940ef9d commit 3bc1f02
Show file tree
Hide file tree
Showing 4 changed files with 52 additions and 59 deletions.
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,9 @@ Note that this project **does not** adhere to [Semantic Versioning](http://semve
- We fixed an issue with inconsistent capitalization of file extensions when downloading files. [#6115](https://github.com/JabRef/jabref/issues/6115)
- We fixed the display of language and encoding in the preferences dialog. [#6130](https://github.com/JabRef/jabref/pull/6130)
- We fixed an issue where search full-text documents downloaded files with same name, overwriting existing files. [#6174](https://github.com/JabRef/jabref/pull/6174)
- We fixed an issue where when importing into current library an erroneous message "import cancelled" is displayed even though import is successful. [#6266](https://github.com/JabRef/jabref/issues/6266)
- We fixed an issue when importing into current library an erroneous message "import cancelled" is displayed even though import is successful. [#6266](https://github.com/JabRef/jabref/issues/6266)
- We fixed an issue where custom jstyles for Open/LibreOffice where not saved correctly. [#6170](https://github.com/JabRef/jabref/issues/6170)
- We fixed an issue where the INSPIRE fetcher was no longer working [#6229](https://github.com/JabRef/jabref/issues/6229)


### Removed
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,13 +49,24 @@ default void doPostCleanup(BibEntry entry) {
// Do nothing by default
}

/**
* Gets the {@link URLDownload} object for downloading content. Overwrite, if you need to send additional headers for the download
* @param query The search query
* @throws MalformedURLException
* @throws FetcherException
* @throws URISyntaxException
*/
default URLDownload getUrlDownload(String query) throws MalformedURLException, FetcherException, URISyntaxException {
return new URLDownload(getURLForQuery(query));
}

@Override
default List<BibEntry> performSearch(String query) throws FetcherException {
if (StringUtil.isBlank(query)) {
return Collections.emptyList();
}

try (InputStream stream = new URLDownload(getURLForQuery(query)).asInputStream()) {
try (InputStream stream = getUrlDownload(query).asInputStream()) {
List<BibEntry> fetchedEntries = getParser().parseEntries(stream);

// Post-cleanup
Expand Down
52 changes: 15 additions & 37 deletions src/main/java/org/jabref/logic/importer/fetcher/INSPIREFetcher.java
Original file line number Diff line number Diff line change
@@ -1,14 +1,9 @@
package org.jabref.logic.importer.fetcher;

import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
import java.util.stream.Collectors;

import org.jabref.logic.formatter.bibtexfields.ClearFormatter;
import org.jabref.logic.formatter.bibtexfields.RemoveBracesFormatter;
Expand All @@ -18,32 +13,28 @@
import org.jabref.logic.importer.Parser;
import org.jabref.logic.importer.SearchBasedParserFetcher;
import org.jabref.logic.importer.fileformat.BibtexParser;
import org.jabref.logic.util.OS;
import org.jabref.logic.importer.util.MediaTypes;
import org.jabref.logic.net.URLDownload;
import org.jabref.model.cleanup.FieldFormatterCleanup;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.field.StandardField;
import org.jabref.model.entry.field.UnknownField;
import org.jabref.model.util.DummyFileUpdateMonitor;

import org.apache.http.client.utils.URIBuilder;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

/**
* Fetches data from the INSPIRE database.
*
* @implNote We just use the normal search interface since it provides direct BibTeX export while the API (http://inspirehep.net/info/hep/api) currently only supports JSON and XML
*/
public class INSPIREFetcher implements SearchBasedParserFetcher {

private static final String INSPIRE_HOST = "https://inspirehep.net/search";
private static final String INSPIRE_HOST = "https://inspirehep.net/api/literature/";

private final ImportFormatPreferences preferences;
private final ImportFormatPreferences importFormatPreferences;

public INSPIREFetcher(ImportFormatPreferences preferences) {
this.preferences = preferences;
this.importFormatPreferences = preferences;
}

@Override
Expand All @@ -59,33 +50,15 @@ public Optional<HelpFile> getHelpPage() {
@Override
public URL getURLForQuery(String query) throws URISyntaxException, MalformedURLException, FetcherException {
URIBuilder uriBuilder = new URIBuilder(INSPIRE_HOST);
uriBuilder.addParameter("p", query); // Query
// uriBuilder.addParameter("jrec", "1"); // Start index (not needed at the moment)
uriBuilder.addParameter("rg", "100"); // Should return up to 100 items (instead of default 25)
uriBuilder.addParameter("of", "hx"); // BibTeX format
uriBuilder.addParameter("q", query); // Query
return uriBuilder.build().toURL();
}

@Override
public Parser getParser() {
// Inspire returns the BibTeX result embedded in HTML
// So we extract the BibTeX string from the <pre>bibtex</pre> tags and pass the content to the BibTeX parser
return inputStream -> {
String response = new BufferedReader(new InputStreamReader(inputStream)).lines().collect(Collectors.joining(OS.NEWLINE));

List<BibEntry> entries = new ArrayList<>();

Document doc = Jsoup.parse(response);
Elements preElements = doc.getElementsByTag("pre");

for (Element elem : preElements) {
// We have to use a new instance here, because otherwise only the first entry gets parsed
BibtexParser bibtexParser = new BibtexParser(preferences, new DummyFileUpdateMonitor());
List<BibEntry> entry = bibtexParser.parseEntries(elem.text());
entries.addAll(entry);
}
return entries;
};
public URLDownload getUrlDownload(String query) throws MalformedURLException, FetcherException, URISyntaxException {
URLDownload download = new URLDownload(getURLForQuery(query));
download.addHeader("Accept", MediaTypes.APPLICATION_BIBTEX);
return download;
}

@Override
Expand All @@ -96,4 +69,9 @@ public void doPostCleanup(BibEntry entry) {
// Remove braces around content of "title" field
new FieldFormatterCleanup(StandardField.TITLE, new RemoveBracesFormatter()).cleanup(entry);
}

@Override
public Parser getParser() {
return new BibtexParser(importFormatPreferences, new DummyFileUpdateMonitor());
}
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package org.jabref.logic.importer.fetcher;

import java.util.Arrays;
import java.util.Collections;
import java.util.List;

import org.jabref.logic.bibtex.FieldContentFormatterPreferences;
Expand Down Expand Up @@ -32,24 +32,6 @@ void setUp() {

@Test
void searchByQueryFindsEntry() throws Exception {
BibEntry phd = new BibEntry(StandardEntryType.PhdThesis);
phd.setCiteKey("Diez:2019pkg");
phd.setField(StandardField.AUTHOR, "Diez, Tobias");
phd.setField(StandardField.TITLE, "Normal Form of Equivariant Maps and Singular Symplectic Reduction in Infinite Dimensions with Applications to Gauge Field Theory");
phd.setField(StandardField.YEAR, "2019");
phd.setField(StandardField.EPRINT, "1909.00744");
phd.setField(new UnknownField("reportnumber"), "urn:nbn:de:bsz:15-qucosa2-352179");
phd.setField(StandardField.ARCHIVEPREFIX, "arXiv");
phd.setField(StandardField.PRIMARYCLASS, "math.SG");

BibEntry article = new BibEntry(StandardEntryType.Article);
article.setCiteKey("Diez:2018gjz");
article.setField(StandardField.AUTHOR, "Diez, Tobias and Rudolph, Gerd");
article.setField(StandardField.TITLE, "Singular symplectic cotangent bundle reduction of gauge field theory");
article.setField(StandardField.YEAR, "2018");
article.setField(StandardField.EPRINT, "1812.04707");
article.setField(StandardField.ARCHIVEPREFIX, "arXiv");
article.setField(StandardField.PRIMARYCLASS, "math-ph");

BibEntry master = new BibEntry(StandardEntryType.MastersThesis);
master.setCiteKey("Diez:2014ppa");
Expand All @@ -63,6 +45,27 @@ void searchByQueryFindsEntry() throws Exception {

List<BibEntry> fetchedEntries = fetcher.performSearch("Fr\\'echet group actions field");

assertEquals(Arrays.asList(phd, article, master), fetchedEntries);
assertEquals(Collections.singletonList(master), fetchedEntries);
}

@Test
public void searchByIdentifierFindsEntry() throws Exception {
BibEntry article = new BibEntry(StandardEntryType.Article);
article.setCiteKey("Melnikov:1998pr");
article.setField(StandardField.AUTHOR, "Melnikov, Kirill and Yelkhovsky, Alexander");
article.setField(StandardField.TITLE, "Top quark production at threshold with O(alpha-s**2) accuracy");
article.setField(StandardField.DOI, "10.1016/S0550-3213(98)00348-4");
article.setField(StandardField.JOURNAL, "Nucl.\\ Phys.\\ B");
article.setField(StandardField.PAGES, "59--72");
article.setField(StandardField.VOLUME, "528");
article.setField(StandardField.YEAR, "1998");
article.setField(StandardField.EPRINT, "hep-ph/9802379");
article.setField(StandardField.ARCHIVEPREFIX, "arXiv");
article.setField(new UnknownField("reportnumber"), "BUDKER-INP-1998-7, TTP-98-10");

List<BibEntry> fetchedEntries = fetcher.performSearch("hep-ph/9802379");

assertEquals(Collections.singletonList(article), fetchedEntries);

}
}

0 comments on commit 3bc1f02

Please sign in to comment.