Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Construct DSS JSON #781

Merged
merged 1 commit into from
Sep 17, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 73 additions & 0 deletions app/models/dspace_metadata.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
class DspaceMetadata
def initialize(thesis)
@dc = {}
@dc['dc.publisher'] = 'Massachusetts Institute of Technology'
@dc['dc.type'] = 'Thesis'
title(thesis)
contributors(thesis.users, thesis.advisors)
departments(thesis.departments)
degrees(thesis.degrees)
copyright(thesis.copyright, thesis.license)
date_transferred(thesis.files)
end

# Generates JSON metadata file required for submission to DSS.
def serialize_dss_metadata
{ 'metadata' => @dc.map { |k, v| { 'key' => k, 'value' => v } } }.to_json
end

def title(thesis)
@dc['dc.title'] = thesis.title
@dc['dc.description.abstract'] = thesis.abstract if thesis.abstract
@dc['dc.date.issued'] = thesis.grad_date.strftime('%Y-%m')
end

def contributors(thesis_users, thesis_advisors)
@dc['dc.contributor.author'] = thesis_users.map(&:preferred_name)
@dc['dc.identifier.orcid'] = parse_orcids(thesis_users) if parse_orcids(thesis_users)
@dc['dc.contributor.advisor'] = thesis_advisors.map(&:name)
end

# We don't care about the order of the ORCIDs because DSpace can't assign them to a specific user.
def parse_orcids(thesis_users)
return unless thesis_users.any?(&:orcid)

orcids = thesis_users.map(&:orcid).compact
return unless orcids.present?

orcids
end

def departments(thesis_depts)
@dc['dc.contributor.department'] = thesis_depts.map(&:name_dspace)
end

def degrees(thesis_degrees)
@dc['dc.description.degree'] = thesis_degrees.map(&:abbreviation)
@dc['thesis.degree.name'] = thesis_degrees.map(&:name_dspace)

# Degree types should not be repeated if they are the same type.
types = thesis_degrees.map { |degree| degree.degree_type.name }.uniq
@dc['mit.thesis.degree'] = types
end

def copyright(thesis_copyright, thesis_license)
if thesis_copyright.holder != 'Author' # copyright holder is anyone but author
@dc['dc.rights'] = [thesis_copyright.statement_dspace, "U+00A9 #{thesis_copyright.holder}"]
@dc['dc.rights.uri'] = thesis_copyright.url if thesis_copyright.url
elsif thesis_license # author holds copyright and provides a license
@dc['dc.rights'] = [thesis_license.license_type, 'Copyright retained by author(s)']

# Theoretically both license and copyright URLs are required for publication, but there are no constraints on
# the models, and we want to future-proof this.
@dc['dc.rights.uri'] = thesis_license.url if thesis_license.url
else # author holds copyright and no license provided
@dc['dc.rights'] = [thesis_copyright.statement_dspace, 'Copyright retained by author(s)']
@dc['dc.rights.uri'] = thesis_copyright.url if thesis_copyright.url
end
end

def date_transferred(files)
@dc['dc.date.submitted'] = files.select { |file| file.purpose == 'thesis_pdf' }.first.blob.created_at
end
end
4 changes: 4 additions & 0 deletions test/fixtures/users.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ yo:
given_name: 'Yo'
surname: 'Yobot'
display_name: 'Yo Yobot'
orcid: '0001'

admin:
uid: 'admin_id'
Expand Down Expand Up @@ -58,6 +59,7 @@ basic:
given_name: 'Basic'
surname: 'Robot'
display_name: 'Basic Robot'
orcid: '0002'

second:
uid: 'second'
Expand All @@ -66,6 +68,7 @@ second:
given_name: 'Second'
surname: 'Student'
display_name: 'Second Student'
preferred_name: 'Student, Second'

third:
uid: 'third'
Expand All @@ -74,6 +77,7 @@ third:
given_name: 'Third'
surname: 'Student'
display_name: 'Third Student'
preferred_name: 'Student, Third'

processor:
uid: 'processor_id'
Expand Down
210 changes: 210 additions & 0 deletions test/models/dspace_metadata_test.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,210 @@
require 'test_helper'

class DspaceMetadataTest < ActiveSupport::TestCase
# Adding some properties that are not included in our fixtures
def dss_friendly_thesis(thesis)
degree = thesis.degrees.first
degree.degree_type_id = degree_types(:bachelor).id
degree.save
file = Rails.root.join('test', 'fixtures', 'files', 'a_pdf.pdf')
thesis.files.attach(io: File.open(file), filename: 'a_pdf.pdf')
thesis.files.first.description = 'My thesis'
thesis.files.first.purpose = 'thesis_pdf'
thesis.save
end

test 'parses thesis data as DSpace DC' do
t = theses(:one)
dss_friendly_thesis(t)
dc = DspaceMetadata.new(t).instance_variable_get(:@dc)
assert_equal 'MyString', dc['dc.title']
assert_equal '2017-09', dc['dc.date.issued']
assert_equal 'MyText', dc['dc.description.abstract']

# No abstract (optional for undergraduate theses)
t.abstract = nil
t.save
dc = DspaceMetadata.new(t).instance_variable_get(:@dc)
assert_nil dc['dc.description.abstract']
end

test 'parses author data as DSpace DC' do
# One author
t = Thesis.create(title: 'Who cares', graduation_year: '2021', graduation_month: 'February',
advisors: [advisors(:first)], users: [users(:second)], degrees: [degrees(:one)],
departments: [departments(:one)], copyright: copyrights(:mit))
dss_friendly_thesis(t)
dc = DspaceMetadata.new(t).instance_variable_get(:@dc)
assert_equal ['Student, Second'], dc['dc.contributor.author']

# More than one author
t.users = [users(:second), users(:third)]
t.save
dc = DspaceMetadata.new(t).instance_variable_get(:@dc)
assert_equal ['Student, Second', 'Student, Third'], dc['dc.contributor.author']
end

test 'parses ORCIDs' do
# One author and one ORCID
t1 = theses(:one)
dss_friendly_thesis(t1)
dc = DspaceMetadata.new(t1).instance_variable_get(:@dc)
assert_equal ['0001'], dc['dc.identifier.orcid']

# Multiple authors and multiple ORCIDs
t2 = theses(:two)
dss_friendly_thesis(t2)

# Since theses(:two) has no advisors, we need to attach one as DspaceMetadata expects this. This can be can be
# removed once we update the Thesis validations to reflect publication requirements.
t2.advisors = [advisors(:first)]
t2.save
dc = DspaceMetadata.new(t2).instance_variable_get(:@dc)
assert_equal %w[0002 0001], dc['dc.identifier.orcid']

# Multiple authors and only one ORCID
t2.users.second.orcid = nil
t2.users.second.save
dc = DspaceMetadata.new(t2).instance_variable_get(:@dc)
assert_equal ['0002'], dc['dc.identifier.orcid']

# One author and no ORCID
t1.users.first.orcid = nil
t1.users.first.save
dc = DspaceMetadata.new(t1).instance_variable_get(:@dc)
assert_nil dc['dc.identifier.orcid']
end

test 'parses advisor data as DSpace DC' do
t = theses(:one)
dss_friendly_thesis(t)
dc = DspaceMetadata.new(t).instance_variable_get(:@dc)

# One advisor
assert_equal ['Addy McAdvisor'], dc['dc.contributor.advisor']

# More than one advisor
t.advisors = [advisors(:first), advisors(:second)]
t.save
dc = DspaceMetadata.new(t).instance_variable_get(:@dc)
assert_equal ['Addy McAdvisor', 'Viola McAdvisor'], dc['dc.contributor.advisor']
end

test 'parses copyright data as DSpace DC' do
# Author holds copyright and license is present
t = theses(:downloaded)
t.copyright = copyrights(:author)
t.license = licenses(:ccby)
t.users = [users(:yo)]
t.advisors = [advisors(:first)]
t.save
dss_friendly_thesis(t)
dc = DspaceMetadata.new(t).instance_variable_get(:@dc)
assert_equal ['Attribution 4.0 International (CC BY 4.0)', 'Copyright retained by author(s)'],
dc['dc.rights']
assert_equal 'https://creativecommons.org/licenses/by/4.0/', dc['dc.rights.uri']

# No URI
t.license = licenses(:nocc)
t.save
dc = DspaceMetadata.new(t).instance_variable_get(:@dc)
assert_nil dc['dc.rights.uri']

# Author holds copyright and no license
t.license = nil
t.save
dc = DspaceMetadata.new(t).instance_variable_get(:@dc)
assert_equal ['In Copyright', 'Copyright retained by author(s)'], dc['dc.rights']
assert_equal 'https://rightsstatements.org/page/InC/1.0/', dc['dc.rights.uri']

# Any other copyright holder
t.copyright = copyrights(:mit)
t.save
dc = DspaceMetadata.new(t).instance_variable_get(:@dc)
assert_equal ['In Copyright - Educational Use Permitted', 'U+00A9 MIT'], dc['dc.rights']
assert_equal 'http://rightsstatements.org/page/InC-EDU/1.0/', dc['dc.rights.uri']
end

test 'parses department data as DSpace DC' do
# One department
t = theses(:one)
dss_friendly_thesis(t)
dc = DspaceMetadata.new(t).instance_variable_get(:@dc)
assert_equal ['Massachusetts Institute of Technology. Department of Aeronautics and Astronautics'],
dc['dc.contributor.department']

# Multiple departments
t.departments = [departments(:one), departments(:two)]
t.save
dc = DspaceMetadata.new(t).instance_variable_get(:@dc)
assert_equal ['Massachusetts Institute of Technology. Department of Aeronautics and Astronautics',
'MIT Anthropology Program'], dc['dc.contributor.department']
end

test 'parses degree data as DSpace DC' do
# One degree
t = theses(:one)
dss_friendly_thesis(t)
d1 = degrees(:one)
d1.degree_type_id = degree_types(:bachelor).id
d1.save
dc = DspaceMetadata.new(t).instance_variable_get(:@dc)
assert_equal ['MFA'], dc['dc.description.degree']
assert_equal ['Master of Fine Arts'], dc['thesis.degree.name']
assert_equal ['Bachelor'], dc['mit.thesis.degree']

# Multiple degrees
d2 = degrees(:two)
d2.degree_type_id = degree_types(:master).id
d2.save
t.degrees = [d1, d2]
t.save
dc = DspaceMetadata.new(t).instance_variable_get(:@dc)
assert_equal %w[MFA JD], dc['dc.description.degree']
assert_equal ['Master of Fine Arts', 'Master of Fine Arts'], dc['thesis.degree.name']
assert_equal %w[Bachelor Master], dc['mit.thesis.degree']

# Does not repeat degree types
d2.degree_type_id = degree_types(:bachelor).id
d2.save
dc = DspaceMetadata.new(t).instance_variable_get(:@dc)
assert_equal ['Bachelor'], dc['mit.thesis.degree']
end

test 'parses file data as DSpace DC' do
t = theses(:one)
dss_friendly_thesis(t)
dc = DspaceMetadata.new(t).instance_variable_get(:@dc)
blob = t.files.first.blob
assert_equal blob.created_at, dc['dc.date.submitted']
end

test 'compiles constituent DC metadata on instantiation' do
t = theses(:one)
dss_friendly_thesis(t)
dc = DspaceMetadata.new(t).instance_variable_get(:@dc)
assert_equal 'Massachusetts Institute of Technology', dc['dc.publisher']
assert_equal 'Thesis', dc['dc.type']

# Checking for presence of keys instead of exact values, as we verify values in other tests.
assert_equal ['dc.publisher', 'dc.type', 'dc.title', 'dc.description.abstract', 'dc.date.issued',
'dc.contributor.author', 'dc.identifier.orcid', 'dc.contributor.advisor', 'dc.contributor.department',
'dc.description.degree', 'thesis.degree.name', 'mit.thesis.degree', 'dc.rights', 'dc.rights.uri',
'dc.date.submitted'], dc.keys
end

test 'metadata file is structured as expected for DSS' do
t = theses(:one)
dss_friendly_thesis(t)
serialized = DspaceMetadata.new(t).serialize_dss_metadata

# Make sure the JSON is actually serialized
assert_equal String, serialized.class

# Unserialize the JSON so we can check that it's well-formed
unserialized = JSON.parse(serialized)
assert_equal ['metadata'], unserialized.keys
assert_equal unserialized['metadata'].first, { 'key' => 'dc.publisher',
'value' => 'Massachusetts Institute of Technology' }
end
end