Skip to content

Commit

Permalink
Theses not publishable with duplicate filenames
Browse files Browse the repository at this point in the history
Why are these changes being introduced:

* we cannot send theses to preservation with duplicate filenames
* preservation is a step after publication, so doing this check before
  publication will ensure we don't publish things we can't preserve

Relevant ticket(s):

* https://mitlibraries.atlassian.net/browse/ETD-648

How does this address that need:

* Moves the baggable? logic to it's own module
* Inlcudes that module in both SIP and Thesis models
* Uses the unique_filenames? check as part of the thesis publication
  checks
* Adds a visual display to the processor checklist to indicate if
  filenames are unique

Document any side effects to this change:

* There is no direct mechanism in the application to allow a processor
  to resolve duplicate filename issues
  • Loading branch information
JPrevost committed Oct 4, 2023
1 parent 1a92aa6 commit 2f3faf3
Show file tree
Hide file tree
Showing 5 changed files with 78 additions and 29 deletions.
19 changes: 19 additions & 0 deletions app/models/baggable.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
module Baggable
# Before we try to bag anything, we need to check if it meets a few conditions. All published theses should have
# at least one file attached, no duplicate filenames, a handle pointing to its DSpace record, and an accession number.
def baggable_thesis?(thesis)
return false unless thesis

thesis.files.any? && thesis.dspace_handle.present? && unique_filenames?(thesis) && thesis.copyright.present? \
&& thesis.accession_number.present?
end

def unique_filenames?(thesis)
!duplicate_filenames?(thesis)
end

def duplicate_filenames?(thesis)
filenames = thesis.files.map { |f| f.filename.to_s }
filenames.select { |f| filenames.count(f) > 1 }.uniq.any?
end
end
21 changes: 6 additions & 15 deletions app/models/submission_information_package.rb
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,14 @@
# Note: instances of this class are invalid without an associated thesis that has a DSpace handle, a copyright, and
# at least one attached file with no duplicate filenames.
class SubmissionInformationPackage < ApplicationRecord
include Baggable
include Checksums

has_paper_trail
belongs_to :thesis
has_one_attached :bag

validates :baggable_thesis?, presence: true
validates :baggable?, presence: true

before_create :set_metadata, :set_bag_declaration, :set_manifest, :set_bag_name

Expand All @@ -43,6 +44,10 @@ def data

private

def baggable?
baggable_thesis?(thesis)
end

def set_metadata
self.metadata = ArchivematicaMetadata.new(thesis).to_csv
end
Expand All @@ -68,18 +73,4 @@ def set_bag_name
safe_handle = thesis.dspace_handle.gsub('/', '_')
self.bag_name = "#{safe_handle}-thesis-#{thesis.submission_information_packages.count + 1}"
end

# Before we try to bag anything, we need to check if it meets a few conditions. All published theses should have
# at least one file attached, no duplicate filenames, a handle pointing to its DSpace record, and an accession number.
def baggable_thesis?
return false unless thesis

thesis.files.any? && thesis.dspace_handle.present? && !duplicate_filenames? && thesis.copyright.present? \
&& thesis.accession_number.present?
end

def duplicate_filenames?
filenames = thesis.files.map { |f| f.filename.to_s }
filenames.select { |f| filenames.count(f) > 1 }.uniq.any?
end
end
5 changes: 4 additions & 1 deletion app/models/thesis.rb
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
#

class Thesis < ApplicationRecord
include Baggable

has_paper_trail

belongs_to :copyright, optional: true
Expand Down Expand Up @@ -203,7 +205,8 @@ def evaluate_status
authors_graduated?,
departments_have_dspace_name?,
degrees_have_types?,
accession_number.present?
accession_number.present?,
unique_filenames?(self)
].all?
end

Expand Down
7 changes: 7 additions & 0 deletions app/views/thesis/process_theses.html.erb
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,13 @@
label: false %>
</fieldset>
</li>
<li>
<%= f.input :files_have_unique_names?, as: :string,
readonly: true,
label_html: { style: 'width: 50%' },
input_html: { class: 'disabled', style: 'width: 40%', value: f.object.unique_filenames?(f.object)? 'Yes' : 'No' },
label: 'All files have unique names' %>
</li>
</ul>
</div>
</div>
Expand Down
55 changes: 42 additions & 13 deletions test/models/thesis_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -598,19 +598,20 @@ def attach_file_with_purpose_to(thesis, purpose = 'thesis_pdf')
thesis.save
thesis.reload
# Fixture meets all conditions
assert_equal true, thesis.valid?
assert_equal true, thesis.files?
assert_equal true, thesis.files_have_purpose?
assert_equal true, thesis.files_complete
assert_equal true, thesis.metadata_complete
assert_equal false, thesis.issues_found
assert_equal true, thesis.no_issues_found?
assert_equal true, thesis.authors_graduated?
assert_equal false, thesis.active_holds?
assert_equal true, thesis.no_active_holds?
assert_equal true, thesis.departments_have_dspace_name?
assert_equal true, thesis.degrees_have_types?
assert_equal true, thesis.accession_number.present?
assert thesis.valid?
assert thesis.files?
assert thesis.files_have_purpose?
assert thesis.files_complete
assert thesis.metadata_complete
refute thesis.issues_found
assert thesis.no_issues_found?
assert thesis.authors_graduated?
refute thesis.active_holds?
assert thesis.no_active_holds?
assert thesis.departments_have_dspace_name?
assert thesis.degrees_have_types?
assert thesis.accession_number.present?
assert thesis.unique_filenames?(thesis)
assert_equal 'Publication review', thesis.publication_status
# Attempting to set a different status will be overwritten by the update_status method
thesis.publication_status = 'Not ready for publication'
Expand Down Expand Up @@ -1519,4 +1520,32 @@ def attach_file_with_purpose_to(thesis, purpose = 'thesis_pdf')
assert_nil t.accession_number
assert_not_equal 'Publication review', t.publication_status
end

test 'master theses cannot be put into publication review without unique filenames' do
t = theses(:master)
t.save
t.reload
assert_equal 'Publication review', t.publication_status
assert t.unique_filenames?(t)

attach_file_with_purpose_to(t, purpose = 'signature_page')
t.save
t.reload
assert_not_equal 'Publication review', t.publication_status
refute t.unique_filenames?(t)
end

test 'doctoral theses cannot be put into publication review without unique filenames' do
t = theses(:doctor)
t.save
t.reload
assert_equal 'Publication review', t.publication_status
assert t.unique_filenames?(t)

attach_file_with_purpose_to(t, purpose = 'signature_page')
t.save
t.reload
assert_not_equal 'Publication review', t.publication_status
refute t.unique_filenames?(t)
end
end

0 comments on commit 2f3faf3

Please sign in to comment.