Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DOI Validation #1484

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changes/1484.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Added validation to the `digital_object_identifier` Dataset field.
2 changes: 2 additions & 0 deletions ckanext/canada/plugins.py
Original file line number Diff line number Diff line change
Expand Up @@ -859,6 +859,8 @@ def get_validators(self):
validators.canada_output_none,
'protect_registry_access':
validators.protect_registry_access,
'digital_object_identifier':
validators.digital_object_identifier,
}


Expand Down
3 changes: 2 additions & 1 deletion ckanext/canada/schemas/presets.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2329,6 +2329,7 @@ presets:


# Field = Digital Object Identifier (DOI).
# TODO: fix broken help link!!!
# {The Digital Object Identifier assigned to the dataset. For more information visit: http://cisti-icist.nrc-cnrc.gc.ca/eng/services/cisti/datacite-canada/index.html}
- preset_name: canada_digital_object_identifier
values:
Expand All @@ -2339,7 +2340,7 @@ presets:
help_text:
en: "The Digital Object Identifier assigned to the dataset. For more information visit: http://cisti-icist.nrc-cnrc.gc.ca/eng/services/cisti/datacite-canada/index.html"
fr: "L'identificateur d'objet numérique assigné au jeu de données. Pour obtenir de plus amples renseignements, veuillez consulter le site http://cisti-icist.nrc-cnrc.gc.ca/fra/services/icist/datacite-canada/index.html"
validators: scheming_required string_safe
validators: scheming_required string_safe digital_object_identifier

# Field = Jurisdiction.
# Default displayed value = Federal.
Expand Down
75 changes: 75 additions & 0 deletions ckanext/canada/tests/test_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -514,6 +514,81 @@ def test_validation_options(self):
resource = self.sysadmin_action.resource_create(**resource_data)
assert 'validation_options' not in resource or resource['validation_options'] == None

def test_digital_object_identifier(self):
"DOI values should conform to the standar DOI practices."
JVickery-TBS marked this conversation as resolved.
Show resolved Hide resolved
pkg = self.sysadmin_action.package_create(**self.complete_pkg)

# valid DOI
pkg['digital_object_identifier'] = '10.1000/xyz-123'
up_pkg = self.sysadmin_action.package_update(**pkg)
assert up_pkg['digital_object_identifier'] == '10.1000/xyz-123'

# valid DOI
pkg['digital_object_identifier'] = '10.1016.12.31/nature.S0735-1097(98)2000/12/31/34:7-7'
up_pkg = self.sysadmin_action.package_update(**pkg)
assert up_pkg['digital_object_identifier'] == '10.1016.12.31/nature.S0735-1097(98)2000/12/31/34:7-7'

# valid DOI
pkg['digital_object_identifier'] = '10.1002/(SICI)1522-2594(199911)42:5<952::AID-MRM16>3.0.CO;2-S'
up_pkg = self.sysadmin_action.package_update(**pkg)
assert up_pkg['digital_object_identifier'] == '10.1002/(SICI)1522-2594(199911)42:5<952::AID-MRM16>3.0.CO;2-S'

# invalid DOI
pkg['digital_object_identifier'] = '7.0182/(TBS-SCT)1522-2594'
with pytest.raises(ValidationError) as ve:
self.normal_action.package_update(**pkg)
err = ve.value.error_dict
assert 'digital_object_identifier' in err
assert err['digital_object_identifier'] == ['Invalid value for a digital object identifier.']

# invalid DOI
pkg['digital_object_identifier'] = '10.01.02/(TBS-SCT)1522-2594'
with pytest.raises(ValidationError) as ve:
self.normal_action.package_update(**pkg)
err = ve.value.error_dict
assert 'digital_object_identifier' in err
assert err['digital_object_identifier'] == ['Invalid value for a digital object identifier.']

# invalid DOI
pkg['digital_object_identifier'] = '10.1016/(TBS-SCT)152???2-2594'
with pytest.raises(ValidationError) as ve:
self.normal_action.package_update(**pkg)
err = ve.value.error_dict
assert 'digital_object_identifier' in err
assert err['digital_object_identifier'] == ['Invalid value for a digital object identifier.']

# invalid DOI
pkg['digital_object_identifier'] = '10.1016/(TBS-SCT)152&&&2-2594'
with pytest.raises(ValidationError) as ve:
self.normal_action.package_update(**pkg)
err = ve.value.error_dict
assert 'digital_object_identifier' in err
assert err['digital_object_identifier'] == ['Invalid value for a digital object identifier.']

# invalid DOI
pkg['digital_object_identifier'] = '10.1016/(TBS-SCT)152"""2-2594'
with pytest.raises(ValidationError) as ve:
self.normal_action.package_update(**pkg)
err = ve.value.error_dict
assert 'digital_object_identifier' in err
assert err['digital_object_identifier'] == ['Invalid value for a digital object identifier.']

# invalid DOI
pkg['digital_object_identifier'] = "10.1016/(TBS-SCT)152'''2-2594"
with pytest.raises(ValidationError) as ve:
self.normal_action.package_update(**pkg)
err = ve.value.error_dict
assert 'digital_object_identifier' in err
assert err['digital_object_identifier'] == ['Invalid value for a digital object identifier.']

# invalid DOI
pkg['digital_object_identifier'] = "This is extremely not a DOI number"
with pytest.raises(ValidationError) as ve:
self.normal_action.package_update(**pkg)
err = ve.value.error_dict
assert 'digital_object_identifier' in err
assert err['digital_object_identifier'] == ['Invalid value for a digital object identifier.']


class TestSysadminUpdate(CanadaTestBase):
@classmethod
Expand Down
42 changes: 42 additions & 0 deletions ckanext/canada/validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,39 @@
MIN_TAG_LENGTH = 2
MAX_TAG_LENGTH = 140 # because twitter

# DOI Validation
#
# See: https://www.doi.org/the-identifier/resources/handbook/
# As of 2012, assigned registrant codes can be 3 or 4 digits long.
# NOTE: this does not support ShortDOI (https://shortdoi.org/)
#
# <prefix>/<suffix>
#
# prefix: refers to the DOI namespace (a namespace is allocated to a given service provider).
# The prefix can contain only numeric values and the "." character which is used to
# delimit a hierarchical level in the namespace allocation: a one-delimiter prefix
# (for example, "10.1000") derives from a zero-delimiter prefix ("10").
# The prefix 10 is allocated to the DOI Foundation.
#
# suffix: is a unique local name in the namespace. Any Unicode 2.0 character can be used
# in the suffix (there is no practical limitation on the length of a DOI name).
# This unique string may be an existing identifier, or any unique string chosen
# by the Registration Agency or the referent owner (registrant). Some limitations
# should be applied due to DOI HTTPS Links.
#
# https limitations: double quotes (")
# single quotes (')
# &
# ?
#
# Examples: 10.1000/xyz-123
# 10.1109/5.771073
# 10.231/JIM.0b013e31820bab4c
# 10.1016.12.31/nature.S0735-1097(98)2000/12/31/34:7-7
# 10.1002/(SICI)1522-2594(199911)42:5<952::AID-MRM16>3.0.CO;2-S
#
doi_match = re.compile(r'\b(10[.][0-9]{3,}(?:[.][0-9]+)*/(?:(?!["&\'\?])\S)+)\b')
JVickery-TBS marked this conversation as resolved.
Show resolved Hide resolved


def protect_portal_release_date(key, data, errors, context):
"""
Expand Down Expand Up @@ -531,3 +564,12 @@ def protect_registry_access(key, data, errors, context):
" from '%s' to '%s'. This field is read-only." %
(original, value)))
raise StopOnError


def digital_object_identifier(value, context):
if not value or value is missing:
return value
match = re.match(doi_match, value)
if not match or match.group(0) != value:
raise Invalid(_('Invalid value for a digital object identifier.'))
return value
Loading