-
-
Notifications
You must be signed in to change notification settings - Fork 2.6k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
5 changed files
with
222 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
136 changes: 136 additions & 0 deletions
136
src/main/java/org/jabref/logic/importer/fetcher/JstorFetcher.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,136 @@ | ||
package org.jabref.logic.importer.fetcher; | ||
|
||
import java.io.IOException; | ||
import java.net.MalformedURLException; | ||
import java.net.URISyntaxException; | ||
import java.net.URL; | ||
import java.util.ArrayList; | ||
import java.util.List; | ||
import java.util.Optional; | ||
|
||
import org.jabref.logic.importer.FetcherException; | ||
import org.jabref.logic.importer.FulltextFetcher; | ||
import org.jabref.logic.importer.ImportFormatPreferences; | ||
import org.jabref.logic.importer.ParseException; | ||
import org.jabref.logic.importer.Parser; | ||
import org.jabref.logic.importer.SearchBasedParserFetcher; | ||
import org.jabref.logic.importer.fileformat.BibtexParser; | ||
import org.jabref.logic.net.URLDownload; | ||
import org.jabref.model.entry.BibEntry; | ||
import org.jabref.model.entry.field.StandardField; | ||
import org.jabref.model.util.DummyFileUpdateMonitor; | ||
|
||
import org.apache.http.client.utils.URIBuilder; | ||
import org.jsoup.Jsoup; | ||
import org.jsoup.nodes.Document; | ||
import org.jsoup.nodes.Element; | ||
|
||
/** | ||
* Fetcher for jstor.org | ||
**/ | ||
public class JstorFetcher implements SearchBasedParserFetcher, FulltextFetcher { | ||
|
||
private static final String HOST = "https://www.jstor.org"; | ||
private static final String SEARCH_HOST = HOST + "/open/search"; | ||
private static final String CITE_HOST = HOST + "/citation/text"; | ||
|
||
private final ImportFormatPreferences importFormatPreferences; | ||
|
||
public JstorFetcher(ImportFormatPreferences importFormatPreferences) { | ||
this.importFormatPreferences = importFormatPreferences; | ||
} | ||
|
||
@Override | ||
public URL getURLForQuery(String query) throws URISyntaxException, MalformedURLException { | ||
URIBuilder uriBuilder = new URIBuilder(SEARCH_HOST); | ||
uriBuilder.addParameter("Query", query); | ||
return uriBuilder.build().toURL(); | ||
} | ||
|
||
@Override | ||
public URL getComplexQueryURL(ComplexSearchQuery complexSearchQuery) throws URISyntaxException, MalformedURLException, FetcherException { | ||
URIBuilder uriBuilder = new URIBuilder(SEARCH_HOST); | ||
StringBuilder stringBuilder = new StringBuilder(); | ||
if (!complexSearchQuery.getDefaultFieldPhrases().isEmpty()) { | ||
stringBuilder.append(complexSearchQuery.getDefaultFieldPhrases()); | ||
} | ||
if (!complexSearchQuery.getAuthors().isEmpty()) { | ||
for (String author : complexSearchQuery.getAuthors()) { | ||
stringBuilder.append("au:").append(author); | ||
} | ||
} | ||
if (!complexSearchQuery.getTitlePhrases().isEmpty()) { | ||
for (String title : complexSearchQuery.getTitlePhrases()) { | ||
stringBuilder.append("ti:").append(title); | ||
} | ||
} | ||
if (complexSearchQuery.getJournal().isPresent()) { | ||
stringBuilder.append("pt:").append(complexSearchQuery.getJournal().get()); | ||
} | ||
if (complexSearchQuery.getSingleYear().isPresent()) { | ||
uriBuilder.addParameter("sd", String.valueOf(complexSearchQuery.getSingleYear().get())); | ||
uriBuilder.addParameter("ed", String.valueOf(complexSearchQuery.getSingleYear().get())); | ||
} | ||
if (complexSearchQuery.getFromYear().isPresent()) { | ||
uriBuilder.addParameter("sd", String.valueOf(complexSearchQuery.getFromYear().get())); | ||
} | ||
if (complexSearchQuery.getToYear().isPresent()) { | ||
uriBuilder.addParameter("ed", String.valueOf(complexSearchQuery.getToYear().get())); | ||
} | ||
|
||
uriBuilder.addParameter("Query", stringBuilder.toString()); | ||
return uriBuilder.build().toURL(); | ||
} | ||
|
||
@Override | ||
public Parser getParser() { | ||
return inputStream -> { | ||
List<BibEntry> entries; | ||
try { | ||
Document doc = Jsoup.parse(inputStream, null, HOST); | ||
List<Element> elements = doc.body().getElementsByClass("cite-this-item"); | ||
StringBuilder stringBuilder = new StringBuilder(); | ||
for (Element element : elements) { | ||
String id = element.attr("href").replace("citation/info/", ""); | ||
|
||
String data = new URLDownload(CITE_HOST + id).asString(); | ||
stringBuilder.append(data); | ||
} | ||
BibtexParser parser = new BibtexParser(importFormatPreferences, new DummyFileUpdateMonitor()); | ||
entries = new ArrayList<>(parser.parseEntries(stringBuilder.toString())); | ||
} catch (IOException e) { | ||
throw new ParseException("Could not download data from jstor.org", e); | ||
} | ||
return entries; | ||
}; | ||
} | ||
|
||
@Override | ||
public String getName() { | ||
return "JSTOR"; | ||
} | ||
|
||
@Override | ||
public Optional<URL> findFullText(BibEntry entry) throws IOException, FetcherException { | ||
if (entry.getField(StandardField.URL).isEmpty()) { | ||
return Optional.empty(); | ||
} | ||
|
||
String page = new URLDownload(entry.getField(StandardField.URL).get()).asString(); | ||
|
||
Document doc = Jsoup.parse(page); | ||
|
||
List<Element> elements = doc.getElementsByAttribute("data-doi"); | ||
if (elements.size() != 1) { | ||
return Optional.empty(); | ||
} | ||
|
||
String url = elements.get(0).attr("href"); | ||
return Optional.of(new URL(url)); | ||
} | ||
|
||
@Override | ||
public TrustLevel getTrustLevel() { | ||
return TrustLevel.META_SEARCH; | ||
} | ||
} |
81 changes: 81 additions & 0 deletions
81
src/test/java/org/jabref/logic/importer/fetcher/JstorFetcherTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
package org.jabref.logic.importer.fetcher; | ||
|
||
import java.io.IOException; | ||
import java.net.URL; | ||
import java.util.Collections; | ||
import java.util.List; | ||
import java.util.Optional; | ||
|
||
import org.jabref.logic.importer.FetcherException; | ||
import org.jabref.logic.importer.ImportFormatPreferences; | ||
import org.jabref.logic.importer.SearchBasedFetcher; | ||
import org.jabref.model.entry.BibEntry; | ||
import org.jabref.model.entry.field.StandardField; | ||
import org.jabref.model.entry.types.StandardEntryType; | ||
import org.jabref.testutils.category.FetcherTest; | ||
|
||
import org.junit.jupiter.api.Disabled; | ||
import org.junit.jupiter.api.Test; | ||
import org.mockito.Answers; | ||
|
||
import static org.junit.jupiter.api.Assertions.assertEquals; | ||
import static org.mockito.Mockito.mock; | ||
|
||
@FetcherTest | ||
public class JstorFetcherTest implements SearchBasedFetcherCapabilityTest { | ||
|
||
private final JstorFetcher fetcher = new JstorFetcher(mock(ImportFormatPreferences.class, Answers.RETURNS_DEEP_STUBS)); | ||
|
||
private final BibEntry bibEntry = new BibEntry(StandardEntryType.Article) | ||
.withCitationKey("10.2307/90002164") | ||
.withField(StandardField.AUTHOR, "Yang Yanxia") | ||
.withField(StandardField.TITLE, "Test Anxiety Analysis of Chinese College Students in Computer-based Spoken English Test") | ||
.withField(StandardField.ISSN, "11763647, 14364522") | ||
.withField(StandardField.JOURNAL, "Journal of Educational Technology & Society") | ||
.withField(StandardField.ABSTRACT, "ABSTRACT Test anxiety was a commonly known or assumed factor that could greatly influence performance of test takers. With the employment of designed questionnaires and computer-based spoken English test, this paper explored test anxiety manifestation of Chinese college students from both macro and micro aspects, and found out that the major anxiety in computer-based spoken English test was spoken English test anxiety, which consisted of test anxiety and communication apprehension. Regard to proximal test anxiety, the causes listed in proper order as low spoken English abilities, lack of speaking techniques, anxiety from the evaluative process and inadaptability with computer-based spoken English test format. As to distal anxiety causes, attitude toward learning spoken English and self-evaluation of speaking abilities were significantly negatively correlated with test anxiety. Besides, as test anxiety significantly associated often with test performance, a look at pedagogical implications has been discussed in this paper.") | ||
.withField(StandardField.PUBLISHER, "International Forum of Educational Technology & Society") | ||
.withField(StandardField.NUMBER, "2") | ||
.withField(StandardField.PAGES, "63--73") | ||
.withField(StandardField.VOLUME, "20") | ||
.withField(StandardField.URL, "http://www.jstor.org/stable/90002164") | ||
.withField(StandardField.YEAR, "2017"); | ||
|
||
@Test | ||
void searchByTitle() throws Exception { | ||
List<BibEntry> entries = fetcher.performSearch("ti: \"Test Anxiety Analysis of Chinese College Students in Computer-based Spoken English Test\""); | ||
assertEquals(Collections.singletonList(bibEntry), entries); | ||
} | ||
|
||
@Test | ||
void fetchPDF() throws IOException, FetcherException { | ||
Optional<URL> url = fetcher.findFullText(bibEntry); | ||
assertEquals(Optional.of(new URL("https://www.jstor.org/stable/pdf/90002164.pdf")), url); | ||
} | ||
|
||
@Override | ||
public SearchBasedFetcher getFetcher() { | ||
return fetcher; | ||
} | ||
|
||
@Override | ||
public List<String> getTestAuthors() { | ||
return List.of("Haman", "Medlin"); | ||
} | ||
|
||
@Override | ||
public String getTestJournal() { | ||
return "Test"; | ||
} | ||
|
||
@Disabled("jstor does not support search only based on year") | ||
@Override | ||
public void supportsYearRangeSearch() throws Exception { | ||
|
||
} | ||
|
||
@Disabled("jstor does not support search only based on year") | ||
@Override | ||
public void supportsYearSearch() throws Exception { | ||
|
||
} | ||
} |