From ff223d7a4bd360315cf61bfc7144885bd359444e Mon Sep 17 00:00:00 2001 From: Thibault Coupin Date: Tue, 24 May 2022 17:19:29 +0200 Subject: [PATCH 1/3] Use production date in citation for harvested dataset --- .../harvard/iq/dataverse/DataCitation.java | 38 +++++++++++++------ 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DataCitation.java b/src/main/java/edu/harvard/iq/dataverse/DataCitation.java index 9027be1350b..325533fb03b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataCitation.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataCitation.java @@ -713,25 +713,39 @@ private String flattenHtml(String html) { private Date getDateFrom(DatasetVersion dsv) { Date citationDate = null; - SimpleDateFormat sdf = new SimpleDateFormat("yyyy"); - if (!dsv.getDataset().isHarvested()) { - citationDate = dsv.getCitationDate(); - if (citationDate == null) { - if (dsv.getDataset().getCitationDate() != null) { - citationDate = dsv.getDataset().getCitationDate(); - } else { // for drafts - citationDate = dsv.getLastUpdateTime(); - } - } - } else { + + if (dsv.getDataset().isHarvested()) { try { - citationDate= sdf.parse(dsv.getDistributionDate()); + SimpleDateFormat sdffull = new SimpleDateFormat("yyyy-MM-dd"); + citationDate = sdffull.parse(dsv.getProductionDate()); } catch (ParseException ex) { // ignore } catch (Exception ex) { // ignore } + + if (citationDate == null) { + try { + SimpleDateFormat sdfshort = new SimpleDateFormat("yyyy"); + citationDate = sdfshort.parse(dsv.getDistributionDate()); + } catch (ParseException ex) { + // ignore + } catch (Exception ex) { + // ignore + } + } } + + if (citationDate == null) { + if (dsv.getCitationDate() != null) { + citationDate = dsv.getCitationDate(); + } else if (dsv.getDataset().getCitationDate() != null) { + citationDate = dsv.getDataset().getCitationDate(); + } else { // for drafts + citationDate = dsv.getLastUpdateTime(); + } + } + if (citationDate == null) { //As a last resort, pick the current date logger.warning("Unable to find citation date for datasetversion: " + dsv.getId()); From ddbd3e5dbeba25a0aa1c78f34b5fef74a15817d0 Mon Sep 17 00:00:00 2001 From: Thibault Coupin Date: Mon, 30 May 2022 09:16:15 +0200 Subject: [PATCH 2/3] add release note --- .../8732-date-in-citation-harvested-datasets.md | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 doc/release-notes/8732-date-in-citation-harvested-datasets.md diff --git a/doc/release-notes/8732-date-in-citation-harvested-datasets.md b/doc/release-notes/8732-date-in-citation-harvested-datasets.md new file mode 100644 index 00000000000..85f2d24a8a9 --- /dev/null +++ b/doc/release-notes/8732-date-in-citation-harvested-datasets.md @@ -0,0 +1,7 @@ +Fix the year displayed in citation for harvested dataset, specialy for oai_dc format. + +For normal datasets, the date used is the "citation date" which is by default the publication date (the first release date) (https://guides.dataverse.org/en/latest/api/native-api.html?highlight=citationdate#set-citation-date-field-type-for-a-dataset). + +But for a harvested dataset, the distribution date is used instead and this date is not always present in the harvested metadata. With oai_dc format the date tag if used as production date. + +Now, the production date is used for harvested dataset in addition to distribution date. \ No newline at end of file From 7254c0cd28ec0fbf7606090f2cf138674e950c13 Mon Sep 17 00:00:00 2001 From: Thibault Coupin Date: Mon, 12 Sep 2022 15:19:36 +0200 Subject: [PATCH 3/3] improve date parsing --- .../harvard/iq/dataverse/DataCitation.java | 20 ++-------- .../harvard/iq/dataverse/util/DateUtil.java | 38 +++++++++++++++++++ 2 files changed, 41 insertions(+), 17 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DataCitation.java b/src/main/java/edu/harvard/iq/dataverse/DataCitation.java index 325533fb03b..abe3cc3e6d7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataCitation.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataCitation.java @@ -33,6 +33,7 @@ import javax.xml.stream.XMLStreamWriter; import edu.harvard.iq.dataverse.util.BundleUtil; +import edu.harvard.iq.dataverse.util.DateUtil; import org.apache.commons.text.StringEscapeUtils; import org.apache.commons.lang3.StringUtils; @@ -715,24 +716,9 @@ private Date getDateFrom(DatasetVersion dsv) { Date citationDate = null; if (dsv.getDataset().isHarvested()) { - try { - SimpleDateFormat sdffull = new SimpleDateFormat("yyyy-MM-dd"); - citationDate = sdffull.parse(dsv.getProductionDate()); - } catch (ParseException ex) { - // ignore - } catch (Exception ex) { - // ignore - } - + citationDate = DateUtil.parseDate(dsv.getProductionDate()); if (citationDate == null) { - try { - SimpleDateFormat sdfshort = new SimpleDateFormat("yyyy"); - citationDate = sdfshort.parse(dsv.getDistributionDate()); - } catch (ParseException ex) { - // ignore - } catch (Exception ex) { - // ignore - } + citationDate = DateUtil.parseDate(dsv.getDistributionDate()); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/util/DateUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/DateUtil.java index d6f22471f68..669780b9436 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/DateUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/DateUtil.java @@ -16,7 +16,9 @@ */ public class DateUtil { + public static String YEAR_PATTERN = "yyyy"; public static String YEAR_DASH_MONTH_PATTERN = "yyyy-MM"; + public static String YEAR_DASH_MONTH_DASH_DAY_PATTERN = "yyyy-MM-dd"; public static String formatDate(Date dateToformat) { String formattedDate; @@ -63,4 +65,40 @@ public static String formatDate(Timestamp datetimeToformat) { } } + public static Date parseDate(String dateString) { + SimpleDateFormat sdf; + Date date; + + // YYYY-MM-DD + date = parseDate(dateString, YEAR_DASH_MONTH_DASH_DAY_PATTERN); + if (date != null) { + return date; + } + + // YYYY-MM + date = parseDate(dateString, YEAR_DASH_MONTH_PATTERN); + if (date != null) { + return date; + } + + // YYYT + date = parseDate(dateString, YEAR_PATTERN); + return date; + + } + + public static Date parseDate(String dateString, String format) { + + try { + SimpleDateFormat sdf = new SimpleDateFormat(format); + Date date = sdf.parse(dateString); + return date; + } catch (ParseException ex) { + // ignore + } catch (Exception ex) { + // ignore + } + return null; + } + }