diff --git a/app/models/dspace_metadata.rb b/app/models/dspace_metadata.rb new file mode 100644 index 00000000..ea3a6077 --- /dev/null +++ b/app/models/dspace_metadata.rb @@ -0,0 +1,73 @@ +class DspaceMetadata + def initialize(thesis) + @dc = {} + @dc['dc.publisher'] = 'Massachusetts Institute of Technology' + @dc['dc.type'] = 'Thesis' + title(thesis) + contributors(thesis.users, thesis.advisors) + departments(thesis.departments) + degrees(thesis.degrees) + copyright(thesis.copyright, thesis.license) + date_transferred(thesis.files) + end + + # Generates JSON metadata file required for submission to DSS. + def serialize_dss_metadata + { 'metadata' => @dc.map { |k, v| { 'key' => k, 'value' => v } } }.to_json + end + + def title(thesis) + @dc['dc.title'] = thesis.title + @dc['dc.description.abstract'] = thesis.abstract if thesis.abstract + @dc['dc.date.issued'] = thesis.grad_date.strftime('%Y-%m') + end + + def contributors(thesis_users, thesis_advisors) + @dc['dc.contributor.author'] = thesis_users.map(&:preferred_name) + @dc['dc.identifier.orcid'] = parse_orcids(thesis_users) if parse_orcids(thesis_users) + @dc['dc.contributor.advisor'] = thesis_advisors.map(&:name) + end + + # We don't care about the order of the ORCIDs because DSpace can't assign them to a specific user. + def parse_orcids(thesis_users) + return unless thesis_users.any?(&:orcid) + + orcids = thesis_users.map(&:orcid).compact + return unless orcids.present? + + orcids + end + + def departments(thesis_depts) + @dc['dc.contributor.department'] = thesis_depts.map(&:name_dspace) + end + + def degrees(thesis_degrees) + @dc['dc.description.degree'] = thesis_degrees.map(&:abbreviation) + @dc['thesis.degree.name'] = thesis_degrees.map(&:name_dspace) + + # Degree types should not be repeated if they are the same type. + types = thesis_degrees.map { |degree| degree.degree_type.name }.uniq + @dc['mit.thesis.degree'] = types + end + + def copyright(thesis_copyright, thesis_license) + if thesis_copyright.holder != 'Author' # copyright holder is anyone but author + @dc['dc.rights'] = [thesis_copyright.statement_dspace, "U+00A9 #{thesis_copyright.holder}"] + @dc['dc.rights.uri'] = thesis_copyright.url if thesis_copyright.url + elsif thesis_license # author holds copyright and provides a license + @dc['dc.rights'] = [thesis_license.license_type, 'Copyright retained by author(s)'] + + # Theoretically both license and copyright URLs are required for publication, but there are no constraints on + # the models, and we want to future-proof this. + @dc['dc.rights.uri'] = thesis_license.url if thesis_license.url + else # author holds copyright and no license provided + @dc['dc.rights'] = [thesis_copyright.statement_dspace, 'Copyright retained by author(s)'] + @dc['dc.rights.uri'] = thesis_copyright.url if thesis_copyright.url + end + end + + def date_transferred(files) + @dc['dc.date.submitted'] = files.select { |file| file.purpose == 'thesis_pdf' }.first.blob.created_at + end +end diff --git a/test/fixtures/users.yml b/test/fixtures/users.yml index ad23c618..f01d806f 100644 --- a/test/fixtures/users.yml +++ b/test/fixtures/users.yml @@ -31,6 +31,7 @@ yo: given_name: 'Yo' surname: 'Yobot' display_name: 'Yo Yobot' + orcid: '0001' admin: uid: 'admin_id' @@ -58,6 +59,7 @@ basic: given_name: 'Basic' surname: 'Robot' display_name: 'Basic Robot' + orcid: '0002' second: uid: 'second' @@ -66,6 +68,7 @@ second: given_name: 'Second' surname: 'Student' display_name: 'Second Student' + preferred_name: 'Student, Second' third: uid: 'third' @@ -74,6 +77,7 @@ third: given_name: 'Third' surname: 'Student' display_name: 'Third Student' + preferred_name: 'Student, Third' processor: uid: 'processor_id' diff --git a/test/models/dspace_metadata_test.rb b/test/models/dspace_metadata_test.rb new file mode 100644 index 00000000..15ddb960 --- /dev/null +++ b/test/models/dspace_metadata_test.rb @@ -0,0 +1,210 @@ +require 'test_helper' + +class DspaceMetadataTest < ActiveSupport::TestCase + # Adding some properties that are not included in our fixtures + def dss_friendly_thesis(thesis) + degree = thesis.degrees.first + degree.degree_type_id = degree_types(:bachelor).id + degree.save + file = Rails.root.join('test', 'fixtures', 'files', 'a_pdf.pdf') + thesis.files.attach(io: File.open(file), filename: 'a_pdf.pdf') + thesis.files.first.description = 'My thesis' + thesis.files.first.purpose = 'thesis_pdf' + thesis.save + end + + test 'parses thesis data as DSpace DC' do + t = theses(:one) + dss_friendly_thesis(t) + dc = DspaceMetadata.new(t).instance_variable_get(:@dc) + assert_equal 'MyString', dc['dc.title'] + assert_equal '2017-09', dc['dc.date.issued'] + assert_equal 'MyText', dc['dc.description.abstract'] + + # No abstract (optional for undergraduate theses) + t.abstract = nil + t.save + dc = DspaceMetadata.new(t).instance_variable_get(:@dc) + assert_nil dc['dc.description.abstract'] + end + + test 'parses author data as DSpace DC' do + # One author + t = Thesis.create(title: 'Who cares', graduation_year: '2021', graduation_month: 'February', + advisors: [advisors(:first)], users: [users(:second)], degrees: [degrees(:one)], + departments: [departments(:one)], copyright: copyrights(:mit)) + dss_friendly_thesis(t) + dc = DspaceMetadata.new(t).instance_variable_get(:@dc) + assert_equal ['Student, Second'], dc['dc.contributor.author'] + + # More than one author + t.users = [users(:second), users(:third)] + t.save + dc = DspaceMetadata.new(t).instance_variable_get(:@dc) + assert_equal ['Student, Second', 'Student, Third'], dc['dc.contributor.author'] + end + + test 'parses ORCIDs' do + # One author and one ORCID + t1 = theses(:one) + dss_friendly_thesis(t1) + dc = DspaceMetadata.new(t1).instance_variable_get(:@dc) + assert_equal ['0001'], dc['dc.identifier.orcid'] + + # Multiple authors and multiple ORCIDs + t2 = theses(:two) + dss_friendly_thesis(t2) + + # Since theses(:two) has no advisors, we need to attach one as DspaceMetadata expects this. This can be can be + # removed once we update the Thesis validations to reflect publication requirements. + t2.advisors = [advisors(:first)] + t2.save + dc = DspaceMetadata.new(t2).instance_variable_get(:@dc) + assert_equal %w[0002 0001], dc['dc.identifier.orcid'] + + # Multiple authors and only one ORCID + t2.users.second.orcid = nil + t2.users.second.save + dc = DspaceMetadata.new(t2).instance_variable_get(:@dc) + assert_equal ['0002'], dc['dc.identifier.orcid'] + + # One author and no ORCID + t1.users.first.orcid = nil + t1.users.first.save + dc = DspaceMetadata.new(t1).instance_variable_get(:@dc) + assert_nil dc['dc.identifier.orcid'] + end + + test 'parses advisor data as DSpace DC' do + t = theses(:one) + dss_friendly_thesis(t) + dc = DspaceMetadata.new(t).instance_variable_get(:@dc) + + # One advisor + assert_equal ['Addy McAdvisor'], dc['dc.contributor.advisor'] + + # More than one advisor + t.advisors = [advisors(:first), advisors(:second)] + t.save + dc = DspaceMetadata.new(t).instance_variable_get(:@dc) + assert_equal ['Addy McAdvisor', 'Viola McAdvisor'], dc['dc.contributor.advisor'] + end + + test 'parses copyright data as DSpace DC' do + # Author holds copyright and license is present + t = theses(:downloaded) + t.copyright = copyrights(:author) + t.license = licenses(:ccby) + t.users = [users(:yo)] + t.advisors = [advisors(:first)] + t.save + dss_friendly_thesis(t) + dc = DspaceMetadata.new(t).instance_variable_get(:@dc) + assert_equal ['Attribution 4.0 International (CC BY 4.0)', 'Copyright retained by author(s)'], + dc['dc.rights'] + assert_equal 'https://creativecommons.org/licenses/by/4.0/', dc['dc.rights.uri'] + + # No URI + t.license = licenses(:nocc) + t.save + dc = DspaceMetadata.new(t).instance_variable_get(:@dc) + assert_nil dc['dc.rights.uri'] + + # Author holds copyright and no license + t.license = nil + t.save + dc = DspaceMetadata.new(t).instance_variable_get(:@dc) + assert_equal ['In Copyright', 'Copyright retained by author(s)'], dc['dc.rights'] + assert_equal 'https://rightsstatements.org/page/InC/1.0/', dc['dc.rights.uri'] + + # Any other copyright holder + t.copyright = copyrights(:mit) + t.save + dc = DspaceMetadata.new(t).instance_variable_get(:@dc) + assert_equal ['In Copyright - Educational Use Permitted', 'U+00A9 MIT'], dc['dc.rights'] + assert_equal 'http://rightsstatements.org/page/InC-EDU/1.0/', dc['dc.rights.uri'] + end + + test 'parses department data as DSpace DC' do + # One department + t = theses(:one) + dss_friendly_thesis(t) + dc = DspaceMetadata.new(t).instance_variable_get(:@dc) + assert_equal ['Massachusetts Institute of Technology. Department of Aeronautics and Astronautics'], + dc['dc.contributor.department'] + + # Multiple departments + t.departments = [departments(:one), departments(:two)] + t.save + dc = DspaceMetadata.new(t).instance_variable_get(:@dc) + assert_equal ['Massachusetts Institute of Technology. Department of Aeronautics and Astronautics', + 'MIT Anthropology Program'], dc['dc.contributor.department'] + end + + test 'parses degree data as DSpace DC' do + # One degree + t = theses(:one) + dss_friendly_thesis(t) + d1 = degrees(:one) + d1.degree_type_id = degree_types(:bachelor).id + d1.save + dc = DspaceMetadata.new(t).instance_variable_get(:@dc) + assert_equal ['MFA'], dc['dc.description.degree'] + assert_equal ['Master of Fine Arts'], dc['thesis.degree.name'] + assert_equal ['Bachelor'], dc['mit.thesis.degree'] + + # Multiple degrees + d2 = degrees(:two) + d2.degree_type_id = degree_types(:master).id + d2.save + t.degrees = [d1, d2] + t.save + dc = DspaceMetadata.new(t).instance_variable_get(:@dc) + assert_equal %w[MFA JD], dc['dc.description.degree'] + assert_equal ['Master of Fine Arts', 'Master of Fine Arts'], dc['thesis.degree.name'] + assert_equal %w[Bachelor Master], dc['mit.thesis.degree'] + + # Does not repeat degree types + d2.degree_type_id = degree_types(:bachelor).id + d2.save + dc = DspaceMetadata.new(t).instance_variable_get(:@dc) + assert_equal ['Bachelor'], dc['mit.thesis.degree'] + end + + test 'parses file data as DSpace DC' do + t = theses(:one) + dss_friendly_thesis(t) + dc = DspaceMetadata.new(t).instance_variable_get(:@dc) + blob = t.files.first.blob + assert_equal blob.created_at, dc['dc.date.submitted'] + end + + test 'compiles constituent DC metadata on instantiation' do + t = theses(:one) + dss_friendly_thesis(t) + dc = DspaceMetadata.new(t).instance_variable_get(:@dc) + assert_equal 'Massachusetts Institute of Technology', dc['dc.publisher'] + assert_equal 'Thesis', dc['dc.type'] + + # Checking for presence of keys instead of exact values, as we verify values in other tests. + assert_equal ['dc.publisher', 'dc.type', 'dc.title', 'dc.description.abstract', 'dc.date.issued', + 'dc.contributor.author', 'dc.identifier.orcid', 'dc.contributor.advisor', 'dc.contributor.department', + 'dc.description.degree', 'thesis.degree.name', 'mit.thesis.degree', 'dc.rights', 'dc.rights.uri', + 'dc.date.submitted'], dc.keys + end + + test 'metadata file is structured as expected for DSS' do + t = theses(:one) + dss_friendly_thesis(t) + serialized = DspaceMetadata.new(t).serialize_dss_metadata + + # Make sure the JSON is actually serialized + assert_equal String, serialized.class + + # Unserialize the JSON so we can check that it's well-formed + unserialized = JSON.parse(serialized) + assert_equal ['metadata'], unserialized.keys + assert_equal unserialized['metadata'].first, { 'key' => 'dc.publisher', + 'value' => 'Massachusetts Institute of Technology' } + end +end