From cbb18a68e74d6fde04570da801ab122ae822cee6 Mon Sep 17 00:00:00 2001 From: Mark Bussey Date: Sat, 13 Mar 2021 12:55:15 -0600 Subject: [PATCH] Refactor the GraduationService for improved performance The current GraduationService takes over 30 minutes to run against production data. Analysis indicated that the current query for candiate ETD works takes over 10 minutes to process and returns over 6000 ETDs that have already been processed by the service and are already in a 'published' state. This commit queries only for ETDs that are 'approved' which is the only group that are eligible for graduation and publication. Querying in this way is much faster and also eliminates the need for additional error handling code, thereby simplifying the overall service. The commit also removes dead code for the GraduationdateService. --- app/services/graduation_service.rb | 17 +++++++---------- app/services/graduationdate_service.rb | 6 ------ 2 files changed, 7 insertions(+), 16 deletions(-) delete mode 100644 app/services/graduationdate_service.rb diff --git a/app/services/graduation_service.rb b/app/services/graduation_service.rb index 5515e678c..8fe58a7c1 100644 --- a/app/services/graduation_service.rb +++ b/app/services/graduation_service.rb @@ -27,22 +27,19 @@ def self.load_data(path_to_data) @registrar_data = JSON.parse(File.read(path_to_data)) end - # Find all Etds in the 'approved' workflow state that do not yet have a degree_awarded value + # Find all Etds in the 'approved' workflow state that are eligible for graduation # @return [Array] An Array of ETD objects def self.graduation_eligible_works eligible_works = [] - problem_works = [] - no_degree_yet = Etd.where(degree_awarded: nil).to_a - no_degree_yet.each do |etd| - begin - eligible_works << etd if etd.to_sipity_entity.workflow_state_name == 'approved' - rescue - problem_works << etd.id + # Use #search_in_batches to avoid timeouts in the case where there are a large number of ETDs + # that have been approved and are pending grqaduation (i.e. publication) + Etd.search_in_batches({ workflow_state_name_ssim: 'approved' }, batch_size: 50) do |batch| + batch.each do |doc| + eligible_works << Etd.find(doc['id']) end end + Rails.logger.warn "Graduation service: There were #{eligible_works.count} ETDs eligible for graduation" - Rails.logger.warn "Graduation service: There were #{problem_works.count} where the workflow status could not be queried." - Rails.logger.error "Graduation service: Could not query workflow status for these works: #{problem_works.inspect}" if problem_works.count > 0 eligible_works end diff --git a/app/services/graduationdate_service.rb b/app/services/graduationdate_service.rb deleted file mode 100644 index dc4965bcd..000000000 --- a/app/services/graduationdate_service.rb +++ /dev/null @@ -1,6 +0,0 @@ -# services/graduationdate_service.rb -class GraduationdateService < Hyrax::LaevigataAuthorityService - def initialize - super('graduation_dates') - end -end