Skip to content

Commit

Permalink
Merge pull request #8732 from tcoupin/tc-citationdate-harvested-dataset
Browse files Browse the repository at this point in the history
Use production date in citation for harvested dataset
  • Loading branch information
sekmiller authored Oct 12, 2022
2 parents 9686ab8 + 9d0ea68 commit f6c677b
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 15 deletions.
7 changes: 7 additions & 0 deletions doc/release-notes/8732-date-in-citation-harvested-datasets.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
Fix the year displayed in citation for harvested dataset, specialy for oai_dc format.

For normal datasets, the date used is the "citation date" which is by default the publication date (the first release date) (https://guides.dataverse.org/en/latest/api/native-api.html?highlight=citationdate#set-citation-date-field-type-for-a-dataset).

But for a harvested dataset, the distribution date is used instead and this date is not always present in the harvested metadata. With oai_dc format the date tag if used as production date.

Now, the production date is used for harvested dataset in addition to distribution date.
30 changes: 15 additions & 15 deletions src/main/java/edu/harvard/iq/dataverse/DataCitation.java
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
import javax.xml.stream.XMLStreamWriter;

import edu.harvard.iq.dataverse.util.BundleUtil;
import edu.harvard.iq.dataverse.util.DateUtil;
import org.apache.commons.text.StringEscapeUtils;
import org.apache.commons.lang3.StringUtils;

Expand Down Expand Up @@ -713,25 +714,24 @@ private String flattenHtml(String html) {

private Date getDateFrom(DatasetVersion dsv) {
Date citationDate = null;
SimpleDateFormat sdf = new SimpleDateFormat("yyyy");
if (!dsv.getDataset().isHarvested()) {
citationDate = dsv.getCitationDate();

if (dsv.getDataset().isHarvested()) {
citationDate = DateUtil.parseDate(dsv.getProductionDate());
if (citationDate == null) {
if (dsv.getDataset().getCitationDate() != null) {
citationDate = dsv.getDataset().getCitationDate();
} else { // for drafts
citationDate = dsv.getLastUpdateTime();
}
citationDate = DateUtil.parseDate(dsv.getDistributionDate());
}
} else {
try {
citationDate= sdf.parse(dsv.getDistributionDate());
} catch (ParseException ex) {
// ignore
} catch (Exception ex) {
// ignore
}

if (citationDate == null) {
if (dsv.getCitationDate() != null) {
citationDate = dsv.getCitationDate();
} else if (dsv.getDataset().getCitationDate() != null) {
citationDate = dsv.getDataset().getCitationDate();
} else { // for drafts
citationDate = dsv.getLastUpdateTime();
}
}

if (citationDate == null) {
//As a last resort, pick the current date
logger.warning("Unable to find citation date for datasetversion: " + dsv.getId());
Expand Down
38 changes: 38 additions & 0 deletions src/main/java/edu/harvard/iq/dataverse/util/DateUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@
*/
public class DateUtil {

public static String YEAR_PATTERN = "yyyy";
public static String YEAR_DASH_MONTH_PATTERN = "yyyy-MM";
public static String YEAR_DASH_MONTH_DASH_DAY_PATTERN = "yyyy-MM-dd";

public static String formatDate(Date dateToformat) {
String formattedDate;
Expand Down Expand Up @@ -63,4 +65,40 @@ public static String formatDate(Timestamp datetimeToformat) {
}
}

public static Date parseDate(String dateString) {
SimpleDateFormat sdf;
Date date;

// YYYY-MM-DD
date = parseDate(dateString, YEAR_DASH_MONTH_DASH_DAY_PATTERN);
if (date != null) {
return date;
}

// YYYY-MM
date = parseDate(dateString, YEAR_DASH_MONTH_PATTERN);
if (date != null) {
return date;
}

// YYYT
date = parseDate(dateString, YEAR_PATTERN);
return date;

}

public static Date parseDate(String dateString, String format) {

try {
SimpleDateFormat sdf = new SimpleDateFormat(format);
Date date = sdf.parse(dateString);
return date;
} catch (ParseException ex) {
// ignore
} catch (Exception ex) {
// ignore
}
return null;
}

}

0 comments on commit f6c677b

Please sign in to comment.