Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Review date handling with header consolidation and date serialization in references #807

Merged
merged 10 commits into from
Aug 18, 2021
178 changes: 81 additions & 97 deletions grobid-core/src/main/java/org/grobid/core/data/BiblioItem.java
Original file line number Diff line number Diff line change
Expand Up @@ -906,6 +906,10 @@ public void setNormalizedPublicationDate(Date theDate) {
this.normalized_publication_date = theDate;
}

public void mergeNormalizedPublicationDate(Date theDate) {
this.normalized_publication_date = Date.merge(this.normalized_publication_date , theDate);
}

public void setEditors(String theEditors) {
this.editors = StringUtils.normalizeSpace(theEditors);
}
Expand Down Expand Up @@ -1982,11 +1986,14 @@ public String toBibTeX(String id, GrobidAnalysisConfig config) {

// dates
if (normalized_publication_date != null) {
String isoDate = TEIFormatter.toISOString(normalized_publication_date);
String isoDate = Date.toISOString(normalized_publication_date);
if (isoDate != null) {
bibtex.add(" date = {" + isoDate + "}");
}
}
if (publication_date != null) {
bibtex.add(" year = {" + publication_date + "}");
}
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think date is preferred over year and if used, year should only consist of a numeric year.
I see two options here:

  1. Put this in an else branch to the if before it and write it to the date field.
    So, if a normalized_publication_date exists, use it as iso string, else if the publication_date exists use that.
  2. Don't format the normalized_publication_date as ISO string but use the year, month and day fields instead. This would ensure that year, month and day are numeric and the (more detailed, with day ranges) publication_date could be put as a string in the date field. If both are given, it is up to the user to decide which to keep.


// address
if (location != null) {
Expand Down Expand Up @@ -2475,40 +2482,22 @@ else if (bookTitle == null) {
}

if (normalized_publication_date != null) {
if ((normalized_publication_date.getDay() != -1) ||
(normalized_publication_date.getMonth() != -1) ||
(normalized_publication_date.getYear() != -1)) {
int year = normalized_publication_date.getYear();
int month = normalized_publication_date.getMonth();
int day = normalized_publication_date.getDay();

if (year != -1) {
String when = "";
if (year <= 9)
when += "000" + year;
else if (year <= 99)
when += "00" + year;
else if (year <= 999)
when += "0" + year;
else
when += year;
if (month != -1) {
if (month <= 9)
when += "-0" + month;
else
when += "-" + month;
if (day != -1) {
if (day <= 9)
when += "-0" + day;
else
when += "-" + day;
}
}
if (normalized_publication_date.getYear() != -1) {
String when = Date.toISOString(normalized_publication_date);
if (when != null) {
for (int i = 0; i < indent + 3; i++) {
tei.append("\t");
}
tei.append("<date type=\"published\" when=\"");
tei.append(when + "\" />\n");
tei.append(when + "\"");

if (publication_date != null && publication_date.length() > 0) {
tei.append(">");
tei.append(TextUtilities.HTMLEncode(publication_date) );
tei.append("</date>\n");
} else {
tei.append(" />\n");
}
}
} else if (this.getYear() != null) {
String when = "";
Expand Down Expand Up @@ -2537,7 +2526,15 @@ else if (this.getYear().length() == 4)
tei.append("\t");
}
tei.append("<date type=\"published\" when=\"");
tei.append(when + "\" />\n");
tei.append(when + "\"");

if (publication_date != null && publication_date.length() > 0) {
tei.append(">");
tei.append(TextUtilities.HTMLEncode(publication_date) );
tei.append("</date>\n");
} else {
tei.append(" />\n");
}
} else {
for (int i = 0; i < indent + 3; i++) {
tei.append("\t");
Expand Down Expand Up @@ -2688,40 +2685,22 @@ else if (this.getYear().length() == 4)

// date
if (normalized_publication_date != null) {
if ((normalized_publication_date.getDay() != -1) ||
(normalized_publication_date.getMonth() != -1) ||
(normalized_publication_date.getYear() != -1)) {
int year = normalized_publication_date.getYear();
int month = normalized_publication_date.getMonth();
int day = normalized_publication_date.getDay();

if (year != -1) {
String when = "";
if (year <= 9)
when += "000" + year;
else if (year <= 99)
when += "00" + year;
else if (year <= 999)
when += "0" + year;
else
when += year;
if (month != -1) {
if (month <= 9)
when += "-0" + month;
else
when += "-" + month;
if (day != -1) {
if (day <= 9)
when += "-0" + day;
else
when += "-" + day;
}
}
if (normalized_publication_date.getYear() != -1) {
String when = Date.toISOString(normalized_publication_date);
if (when != null) {
for (int i = 0; i < indent + 3; i++) {
tei.append("\t");
}
tei.append("<date type=\"published\" when=\"");
tei.append(when + "\" />\n");
tei.append(when + "\"");

if (publication_date != null && publication_date.length() > 0) {
tei.append(">");
tei.append(TextUtilities.HTMLEncode(publication_date) );
tei.append("</date>\n");
} else {
tei.append(" />\n");
}
}
} else if (this.getYear() != null) {
String when = "";
Expand Down Expand Up @@ -2750,7 +2729,15 @@ else if (this.getYear().length() == 4)
tei.append("\t");
}
tei.append("<date type=\"published\" when=\"");
tei.append(when + "\" />\n");
tei.append(when + "\"");

if (publication_date != null && publication_date.length() > 0) {
tei.append(">");
tei.append(TextUtilities.HTMLEncode(publication_date) );
tei.append("</date>\n");
} else {
tei.append(" />\n");
}
} else {
for (int i = 0; i < indent + 3; i++) {
tei.append("\t");
Expand Down Expand Up @@ -2819,40 +2806,22 @@ else if (this.getYear().length() == 4)
}
// date
if (normalized_publication_date != null) {
if ((normalized_publication_date.getDay() != -1) |
(normalized_publication_date.getMonth() != -1) |
(normalized_publication_date.getYear() != -1)) {
int year = normalized_publication_date.getYear();
int month = normalized_publication_date.getMonth();
int day = normalized_publication_date.getDay();

if (year != -1) {
String when = "";
if (year <= 9)
when += "000" + year;
else if (year <= 99)
when += "00" + year;
else if (year <= 999)
when += "0" + year;
else
when += year;
if (month != -1) {
if (month <= 9)
when += "-0" + month;
else
when += "-" + month;
if (day != -1) {
if (day <= 9)
when += "-0" + day;
else
when += "-" + day;
}
}
if (normalized_publication_date.getYear() != -1) {
String when = Date.toISOString(normalized_publication_date);
if (when != null) {
for (int i = 0; i < indent + 3; i++) {
tei.append("\t");
}
tei.append("<date type=\"published\" when=\"");
tei.append(when + "\" />\n");
tei.append(when + "\"");

if (publication_date != null && publication_date.length() > 0) {
tei.append(">");
tei.append(TextUtilities.HTMLEncode(publication_date) );
tei.append("</date>\n");
} else {
tei.append(" />\n");
}
}
} else if (this.getYear() != null) {
String when = "";
Expand Down Expand Up @@ -2881,7 +2850,15 @@ else if (this.getYear().length() == 4)
tei.append("\t");
}
tei.append("<date type=\"published\" when=\"");
tei.append(when + "\" />\n");
tei.append(when + "\"");

if (publication_date != null && publication_date.length() > 0) {
tei.append(">");
tei.append(TextUtilities.HTMLEncode(publication_date) );
tei.append("</date>\n");
} else {
tei.append(" />\n");
}
} else {
for (int i = 0; i < indent + 3; i++) {
tei.append("\t");
Expand Down Expand Up @@ -4390,10 +4367,17 @@ public static void correct(BiblioItem bib, BiblioItem bibo) {
bib.setSubmissionDate(bibo.getSubmissionDate());
if (bibo.getDownloadDate() != null)
bib.setDownloadDate(bibo.getDownloadDate());
if (bibo.getYear() != null)

if (bibo.getNormalizedPublicationDate() != null) {
if (bib.getNormalizedPublicationDate() != null) {
bib.mergeNormalizedPublicationDate(bibo.getNormalizedPublicationDate());
}
else {
bib.setNormalizedPublicationDate(bibo.getNormalizedPublicationDate());
}
}
if (bibo.getYear() != null)
bib.setYear(bibo.getYear());
if (bibo.getNormalizedPublicationDate() != null)
bib.setNormalizedPublicationDate(bibo.getNormalizedPublicationDate());
if (bibo.getMonth() != null)
bib.setMonth(bibo.getMonth());
if (bibo.getDay() != null)
Expand Down
78 changes: 78 additions & 0 deletions grobid-core/src/main/java/org/grobid/core/data/Date.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,19 @@ public class Date implements Comparable {
private String monthString = null;
private String yearString = null;

public Date() {
}

public Date(Date fromDate) {
this.day = fromDate.day;
this.month = fromDate.month;
this.year = fromDate.year;
this.rawDate = fromDate.rawDate;
this.dayString = fromDate.dayString;
this.monthString = fromDate.monthString;
this.yearString = fromDate.yearString;
}

public int getDay() {
return day;
}
Expand Down Expand Up @@ -144,6 +157,71 @@ public boolean isAmbiguous() {
return false;
}

public static String toISOString(Date date) {
int year = date.getYear();
int month = date.getMonth();
int day = date.getDay();

String when = "";
if (year != -1) {
if (year <= 9)
when += "000" + year;
else if (year <= 99)
when += "00" + year;
else if (year <= 999)
when += "0" + year;
else
when += year;
if (month != -1) {
if (month <= 9)
when += "-0" + month;
else
when += "-" + month;
if (day != -1) {
if (day <= 9)
when += "-0" + day;
else
when += "-" + day;
}
}
}
return when;
}

/**
* Return a new date instance by merging the date information from a first date with
* the date information from a second date.
* The merging follows the year, month, day sequence. If the years
* for instance clash, the merging is stopped.
*
* Examples of merging:
* "2010" "2010-10" -> "2010-10"
* "2010" "2010-10-27" -> "2010-10-27"
* "2010-10" "2010-10-27" -> "2010-10-27"
* "2010-10-27" "2010-10" -> "2010-10-27"
* "2011-10" "2010-10-27" -> "2011-10"
* "2010" "2016-10-27" -> "2010"
* "2011" "2010" -> 2011
*/
public static Date merge(Date date1, Date date2) {
if (date1.getYear() == -1) {
return new Date(date2);
}

if (date1.getYear() == date2.getYear()) {
if (date1.getMonth() == -1 && date2.getMonth() != -1) {
return new Date(date2);
}
if (date1.getMonth() == date2.getMonth()) {
if (date1.getDay() == -1 && date2.getDay() != -1) {
return new Date(date2);
}
}
}

return new Date(date1);
}

public String toString() {
String theDate = "";
if (day != -1) {
Expand Down
Loading