-
-
Notifications
You must be signed in to change notification settings - Fork 2.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Added option to import CFF files #7946
Merged
Merged
Changes from all commits
Commits
Show all changes
9 commits
Select commit
Hold shift + click to select a range
32f7297
Added option to import CFF files
AidanM11 4ddc414
Added tests, also minor fixes
AidanM11 119c943
Improved author handling, added identifier support
AidanM11 b9d3da4
Support for unmapped fields and dataset type, also more tests
AidanM11 44dd028
Simplified tests, minor fixes
AidanM11 78eb210
CHANGELOG.md update
AidanM11 f4bb7e4
Merge branch 'main' into addCffImporter
AidanM11 33329c8
code style fixes
AidanM11 4a9d13d
Fixed minor style issues
calixtus File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
201 changes: 201 additions & 0 deletions
201
src/main/java/org/jabref/logic/importer/fileformat/CffImporter.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,201 @@ | ||
package org.jabref.logic.importer.fileformat; | ||
|
||
import java.io.BufferedReader; | ||
import java.io.IOException; | ||
import java.util.ArrayList; | ||
import java.util.HashMap; | ||
import java.util.List; | ||
import java.util.Map; | ||
import java.util.stream.Collectors; | ||
|
||
import org.jabref.logic.importer.Importer; | ||
import org.jabref.logic.importer.ParserResult; | ||
import org.jabref.logic.util.StandardFileType; | ||
import org.jabref.model.entry.Author; | ||
import org.jabref.model.entry.AuthorList; | ||
import org.jabref.model.entry.BibEntry; | ||
import org.jabref.model.entry.field.Field; | ||
import org.jabref.model.entry.field.StandardField; | ||
import org.jabref.model.entry.field.UnknownField; | ||
import org.jabref.model.entry.types.StandardEntryType; | ||
|
||
import com.fasterxml.jackson.annotation.JsonAnySetter; | ||
import com.fasterxml.jackson.annotation.JsonProperty; | ||
import com.fasterxml.jackson.databind.ObjectMapper; | ||
import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; | ||
|
||
public class CffImporter extends Importer { | ||
|
||
@Override | ||
public String getName() { | ||
return "CFF"; | ||
} | ||
|
||
@Override | ||
public StandardFileType getFileType() { | ||
return StandardFileType.CFF; | ||
} | ||
|
||
@Override | ||
public String getId() { | ||
return "cff"; | ||
} | ||
|
||
@Override | ||
public String getDescription() { | ||
return "Importer for the CFF format. Is only used to cite software, one entry per file."; | ||
} | ||
|
||
// POJO classes for yaml data | ||
private static class CffFormat { | ||
private final HashMap<String, String> values = new HashMap<>(); | ||
|
||
@JsonProperty("authors") | ||
private List<CffAuthor> authors; | ||
|
||
@JsonProperty("identifiers") | ||
private List<CffIdentifier> ids; | ||
|
||
public CffFormat() { | ||
} | ||
|
||
@JsonAnySetter | ||
private void setValues(String key, String value) { | ||
values.put(key, value); | ||
} | ||
} | ||
|
||
private static class CffAuthor { | ||
private final HashMap<String, String> values = new HashMap<>(); | ||
|
||
public CffAuthor() { | ||
} | ||
|
||
@JsonAnySetter | ||
private void setValues(String key, String value) { | ||
values.put(key, value); | ||
} | ||
|
||
} | ||
|
||
private static class CffIdentifier { | ||
@JsonProperty("type") | ||
private String type; | ||
@JsonProperty("value") | ||
private String value; | ||
|
||
public CffIdentifier() { | ||
} | ||
} | ||
|
||
@Override | ||
public ParserResult importDatabase(BufferedReader reader) throws IOException { | ||
ObjectMapper mapper = new ObjectMapper(new YAMLFactory()); | ||
CffFormat citation = mapper.readValue(reader, CffFormat.class); | ||
HashMap<Field, String> entryMap = new HashMap<>(); | ||
StandardEntryType entryType = StandardEntryType.Software; | ||
|
||
// Map CFF fields to JabRef Fields | ||
HashMap<String, StandardField> fieldMap = getFieldMappings(); | ||
for (Map.Entry<String, String> property : citation.values.entrySet()) { | ||
if (fieldMap.containsKey(property.getKey())) { | ||
entryMap.put(fieldMap.get(property.getKey()), property.getValue()); | ||
} else if (property.getKey().equals("type")) { | ||
if (property.getValue().equals("dataset")) { | ||
entryType = StandardEntryType.Dataset; | ||
} | ||
} else if (getUnmappedFields().contains(property.getKey())) { | ||
entryMap.put(new UnknownField(property.getKey()), property.getValue()); | ||
} | ||
} | ||
|
||
// Translate CFF author format to JabRef author format | ||
String authorStr = citation.authors.stream() | ||
.map((author) -> author.values) | ||
.map((vals) -> vals.get("name") != null ? | ||
new Author(vals.get("name"), "", "", "", "") : | ||
new Author(vals.get("given-names"), null, vals.get("name-particle"), | ||
vals.get("family-names"), vals.get("name-suffix"))) | ||
.collect(AuthorList.collect()) | ||
.getAsFirstLastNamesWithAnd(); | ||
entryMap.put(StandardField.AUTHOR, authorStr); | ||
|
||
// Select DOI to keep | ||
if (entryMap.get(StandardField.DOI) == null && citation.ids != null) { | ||
List<CffIdentifier> doiIds = citation.ids.stream() | ||
.filter(id -> id.type.equals("doi")) | ||
.collect(Collectors.toList()); | ||
if (doiIds.size() == 1) { | ||
entryMap.put(StandardField.DOI, doiIds.get(0).value); | ||
} | ||
} | ||
|
||
// Select SWHID to keep | ||
if (citation.ids != null) { | ||
List<String> swhIds = citation.ids.stream() | ||
.filter(id -> id.type.equals("swh")) | ||
.map(id -> id.value) | ||
.collect(Collectors.toList()); | ||
|
||
if (swhIds.size() == 1) { | ||
entryMap.put(StandardField.SWHID, swhIds.get(0)); | ||
} else if (swhIds.size() > 1) { | ||
List<String> relSwhIds = swhIds.stream() | ||
.filter(id -> id.split(":").length > 3) // quick filter for invalid swhids | ||
.filter(id -> id.split(":")[2].equals("rel")) | ||
.collect(Collectors.toList()); | ||
if (relSwhIds.size() == 1) { | ||
entryMap.put(StandardField.SWHID, relSwhIds.get(0)); | ||
} | ||
} | ||
} | ||
|
||
BibEntry entry = new BibEntry(entryType); | ||
entry.setField(entryMap); | ||
|
||
List<BibEntry> entriesList = new ArrayList<>(); | ||
entriesList.add(entry); | ||
|
||
return new ParserResult(entriesList); | ||
} | ||
|
||
@Override | ||
public boolean isRecognizedFormat(BufferedReader reader) throws IOException { | ||
|
||
ObjectMapper mapper = new ObjectMapper(new YAMLFactory()); | ||
CffFormat citation; | ||
|
||
try { | ||
citation = mapper.readValue(reader, CffFormat.class); | ||
return citation != null && citation.values.get("title") != null; | ||
} catch (IOException e) { | ||
return false; | ||
} | ||
} | ||
|
||
private HashMap<String, StandardField> getFieldMappings() { | ||
HashMap<String, StandardField> fieldMappings = new HashMap<>(); | ||
fieldMappings.put("title", StandardField.TITLE); | ||
fieldMappings.put("version", StandardField.VERSION); | ||
fieldMappings.put("doi", StandardField.DOI); | ||
fieldMappings.put("license", StandardField.LICENSE); | ||
fieldMappings.put("repository", StandardField.REPOSITORY); | ||
fieldMappings.put("url", StandardField.URL); | ||
fieldMappings.put("abstract", StandardField.ABSTRACT); | ||
fieldMappings.put("message", StandardField.COMMENT); | ||
fieldMappings.put("date-released", StandardField.DATE); | ||
fieldMappings.put("keywords", StandardField.KEYWORDS); | ||
return fieldMappings; | ||
} | ||
|
||
private List<String> getUnmappedFields() { | ||
List<String> fields = new ArrayList<>(); | ||
|
||
fields.add("commit"); | ||
fields.add("license-url"); | ||
fields.add("repository-code"); | ||
fields.add("repository-artifact"); | ||
|
||
return fields; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
165 changes: 165 additions & 0 deletions
165
src/test/java/org/jabref/logic/importer/fileformat/CffImporterTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,165 @@ | ||
package org.jabref.logic.importer.fileformat; | ||
|
||
import java.io.IOException; | ||
import java.net.URISyntaxException; | ||
import java.nio.charset.StandardCharsets; | ||
import java.nio.file.Path; | ||
import java.util.Arrays; | ||
import java.util.List; | ||
|
||
import org.jabref.logic.util.StandardFileType; | ||
import org.jabref.model.entry.BibEntry; | ||
import org.jabref.model.entry.field.StandardField; | ||
import org.jabref.model.entry.field.UnknownField; | ||
import org.jabref.model.entry.types.StandardEntryType; | ||
|
||
import org.junit.jupiter.api.BeforeEach; | ||
import org.junit.jupiter.api.Test; | ||
|
||
import static org.junit.jupiter.api.Assertions.assertEquals; | ||
import static org.junit.jupiter.api.Assertions.assertFalse; | ||
import static org.junit.jupiter.api.Assertions.assertTrue; | ||
|
||
public class CffImporterTest { | ||
|
||
private CffImporter importer; | ||
|
||
@BeforeEach | ||
public void setUp() { | ||
importer = new CffImporter(); | ||
} | ||
|
||
@Test | ||
public void testGetFormatName() { | ||
assertEquals("CFF", importer.getName()); | ||
} | ||
|
||
@Test | ||
public void testGetCLIId() { | ||
assertEquals("cff", importer.getId()); | ||
} | ||
|
||
@Test | ||
public void testsGetExtensions() { | ||
assertEquals(StandardFileType.CFF, importer.getFileType()); | ||
} | ||
|
||
@Test | ||
public void testGetDescription() { | ||
assertEquals("Importer for the CFF format. Is only used to cite software, one entry per file.", | ||
importer.getDescription()); | ||
} | ||
|
||
@Test | ||
public void testIsRecognizedFormat() throws IOException, URISyntaxException { | ||
Path file = Path.of(CffImporterTest.class.getResource("CffImporterTestValid.cff").toURI()); | ||
assertTrue(importer.isRecognizedFormat(file, StandardCharsets.UTF_8)); | ||
} | ||
|
||
@Test | ||
public void testIsRecognizedFormatReject() throws IOException, URISyntaxException { | ||
List<String> list = Arrays.asList("CffImporterTestInvalid1.cff", "CffImporterTestInvalid2.cff"); | ||
|
||
for (String string : list) { | ||
Path file = Path.of(CffImporterTest.class.getResource(string).toURI()); | ||
assertFalse(importer.isRecognizedFormat(file, StandardCharsets.UTF_8)); | ||
} | ||
} | ||
|
||
@Test | ||
public void testImportEntriesBasic() throws IOException, URISyntaxException { | ||
Path file = Path.of(CffImporterTest.class.getResource("CffImporterTestValid.cff").toURI()); | ||
List<BibEntry> bibEntries = importer.importDatabase(file, StandardCharsets.UTF_8).getDatabase().getEntries(); | ||
BibEntry entry = bibEntries.get(0); | ||
|
||
BibEntry expected = getPopulatedEntry().withField(StandardField.AUTHOR, "Joe van Smith"); | ||
|
||
assertEquals(entry, expected); | ||
} | ||
|
||
@Test | ||
public void testImportEntriesMultipleAuthors() throws IOException, URISyntaxException { | ||
Path file = Path.of(CffImporterTest.class.getResource("CffImporterTestValidMultAuthors.cff").toURI()); | ||
List<BibEntry> bibEntries = importer.importDatabase(file, StandardCharsets.UTF_8).getDatabase().getEntries(); | ||
BibEntry entry = bibEntries.get(0); | ||
|
||
BibEntry expected = getPopulatedEntry(); | ||
|
||
assertEquals(entry, expected); | ||
|
||
} | ||
|
||
@Test | ||
public void testImportEntriesSwhIdSelect1() throws IOException, URISyntaxException { | ||
Path file = Path.of(CffImporterTest.class.getResource("CffImporterTestValidSwhIdSelect1.cff").toURI()); | ||
List<BibEntry> bibEntries = importer.importDatabase(file, StandardCharsets.UTF_8).getDatabase().getEntries(); | ||
BibEntry entry = bibEntries.get(0); | ||
|
||
BibEntry expected = getPopulatedEntry().withField(StandardField.SWHID, "swh:1:rel:22ece559cc7cc2364edc5e5593d63ae8bd229f9f"); | ||
|
||
assertEquals(entry, expected); | ||
} | ||
|
||
@Test | ||
public void testImportEntriesSwhIdSelect2() throws IOException, URISyntaxException { | ||
Path file = Path.of(CffImporterTest.class.getResource("CffImporterTestValidSwhIdSelect2.cff").toURI()); | ||
List<BibEntry> bibEntries = importer.importDatabase(file, StandardCharsets.UTF_8).getDatabase().getEntries(); | ||
BibEntry entry = bibEntries.get(0); | ||
|
||
BibEntry expected = getPopulatedEntry().withField(StandardField.SWHID, "swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2"); | ||
|
||
assertEquals(entry, expected); | ||
} | ||
|
||
@Test | ||
public void testImportEntriesDataset() throws IOException, URISyntaxException { | ||
Path file = Path.of(CffImporterTest.class.getResource("CffImporterTestDataset.cff").toURI()); | ||
List<BibEntry> bibEntries = importer.importDatabase(file, StandardCharsets.UTF_8).getDatabase().getEntries(); | ||
BibEntry entry = bibEntries.get(0); | ||
|
||
BibEntry expected = getPopulatedEntry(); | ||
expected.setType(StandardEntryType.Dataset); | ||
|
||
assertEquals(entry, expected); | ||
} | ||
|
||
@Test | ||
public void testImportEntriesDoiSelect() throws IOException, URISyntaxException { | ||
Path file = Path.of(CffImporterTest.class.getResource("CffImporterTestDoiSelect.cff").toURI()); | ||
List<BibEntry> bibEntries = importer.importDatabase(file, StandardCharsets.UTF_8).getDatabase().getEntries(); | ||
BibEntry entry = bibEntries.get(0); | ||
|
||
BibEntry expected = getPopulatedEntry(); | ||
|
||
assertEquals(entry, expected); | ||
} | ||
|
||
@Test | ||
public void testImportEntriesUnknownFields() throws IOException, URISyntaxException { | ||
Path file = Path.of(CffImporterTest.class.getResource("CffImporterTestUnknownFields.cff").toURI()); | ||
List<BibEntry> bibEntries = importer.importDatabase(file, StandardCharsets.UTF_8).getDatabase().getEntries(); | ||
BibEntry entry = bibEntries.get(0); | ||
|
||
BibEntry expected = getPopulatedEntry().withField(new UnknownField("commit"), "10ad"); | ||
|
||
assertEquals(entry, expected); | ||
} | ||
|
||
public BibEntry getPopulatedEntry() { | ||
BibEntry entry = new BibEntry(); | ||
entry.setType(StandardEntryType.Software); | ||
|
||
entry.setField(StandardField.AUTHOR, "Joe van Smith and Bob Jones, Jr."); | ||
entry.setField(StandardField.TITLE, "Test"); | ||
entry.setField(StandardField.URL, "www.google.com"); | ||
entry.setField(StandardField.REPOSITORY, "www.github.com"); | ||
entry.setField(StandardField.DOI, "10.0000/TEST"); | ||
entry.setField(StandardField.DATE, "2000-07-02"); | ||
entry.setField(StandardField.COMMENT, "Test entry."); | ||
entry.setField(StandardField.ABSTRACT, "Test abstract."); | ||
entry.setField(StandardField.LICENSE, "MIT"); | ||
entry.setField(StandardField.VERSION, "1.0"); | ||
|
||
return entry; | ||
} | ||
} |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
and here you can also use findFirst again as above