Skip to content

Commit

Permalink
Merge pull request #310 from internetee/119627029-legal_doc_dublicates-2
Browse files Browse the repository at this point in the history
119627029 legal doc dublicates 2
  • Loading branch information
vohmar authored Dec 23, 2016
2 parents afc65b3 + 01a352e commit c1cb598
Show file tree
Hide file tree
Showing 7 changed files with 327 additions and 47 deletions.
5 changes: 5 additions & 0 deletions app/models/domain_cron.rb
Original file line number Diff line number Diff line change
Expand Up @@ -142,4 +142,9 @@ def self.destroy_with_message(domain)
)
end

def self.delete_legal_doc_duplicates
Rake::Task['legal_doc:remove_duplicates'].reenable
Rake::Task['legal_doc:remove_duplicates'].invoke
end

end
79 changes: 70 additions & 9 deletions app/models/legal_document.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
class LegalDocument < ActiveRecord::Base
cattr_accessor :explicitly_write_file
include EppErrors
MIN_BODY_SIZE = (1.37 * 3.kilobytes).ceil

Expand All @@ -16,7 +17,7 @@ class LegalDocument < ActiveRecord::Base
validate :val_body_length, if: ->(file){ file.path.blank? && !Rails.env.staging?}

before_create :add_creator
before_save :save_to_filesystem
before_save :save_to_filesystem, if: :body

def epp_code_map
{
Expand All @@ -32,22 +33,82 @@ def val_body_length


def save_to_filesystem
loop do
rand = SecureRandom.random_number.to_s.last(4)
next if rand.to_i == 0 || rand.length < 4
binary = Base64.decode64(body)
digest = Digest::SHA1.new.update(binary).to_s

dir = "#{ENV['legal_documents_dir']}/#{Time.zone.now.strftime('%Y/%m/%d')}"
FileUtils.mkdir_p(dir, mode: 0775)
self.path = "#{dir}/#{Time.zone.now.to_formatted_s(:number)}_#{rand}.#{document_type}"
break unless File.file?(path)
loop do
rand = SecureRandom.random_number.to_s.last(4)
next if rand.to_i == 0 || rand.length < 4
dir = "#{ENV['legal_documents_dir']}/#{Time.zone.now.strftime('%Y/%m/%d')}"
FileUtils.mkdir_p(dir, mode: 0775)
self.path = "#{dir}/#{Time.zone.now.to_formatted_s(:number)}_#{rand}.#{document_type}"
break unless File.file?(path)
end

File.open(path, 'wb') { |f| f.write(Base64.decode64(body)) } unless Rails.env.test?
File.open(path, 'wb') { |f| f.write(binary) } if !Rails.env.test? || self.class.explicitly_write_file
self.path = path
self.checksum = digest
end

def calc_checksum
digest = Digest::SHA1.new
digest.update File.binread(path)
digest.to_s
end

def add_creator
self.creator_str = ::PaperTrail.whodunnit
true
end


def self.remove_duplicates
start = Time.zone.now.to_f
puts '-----> Removing legal documents duplicates'
count = 0
modified = Array.new

LegalDocument.where(documentable_type: "Domain").where.not(checksum: [nil, ""]).find_each do |orig_legal|
next if modified.include?(orig_legal.checksum)
next if !File.exist?(orig_legal.path)
modified.push(orig_legal.checksum)

LegalDocument.where(documentable_type: "Domain", documentable_id: orig_legal.documentable_id).
where(checksum: orig_legal.checksum).
where.not(id: orig_legal.id).where.not(path: orig_legal.path).each do |new_legal|
unless modified.include?(orig_legal.id)
File.delete(new_legal.path) if File.exist?(new_legal.path)
new_legal.update(path: orig_legal.path)
count += 1
puts "File #{new_legal.path} has been removed by Domain #{new_legal.documentable_id}. Document id: #{new_legal.id}"
end
end

contact_ids = DomainVersion.where(item_id: orig_legal.documentable_id).distinct.
pluck("object->>'registrant_id'", "object_changes->>'registrant_id'",
"children->>'tech_contacts'", "children->>'admin_contacts'",
"tech_contact_ids", "admin_contact_ids").flatten.uniq
contact_ids = contact_ids.map{|id|
case id
when Hash
id["id"]
when String
JSON.parse(id) rescue id.to_i
else
id
end
}.flatten.compact.uniq
LegalDocument.where(documentable_type: "Contact", documentable_id: contact_ids).
where(checksum: orig_legal.checksum).where.not(path: orig_legal.path).each do |new_legal|
unless modified.include?(orig_legal.id)
File.delete(new_legal.path) if File.exist?(new_legal.path)
new_legal.update(path: orig_legal.path)
count += 1
puts "File #{new_legal.path} has been removed by Contact #{new_legal.documentable_id}. Document id: #{new_legal.id}"
end
end
end
puts "-----> Duplicates fixed for #{count} rows in #{(Time.zone.now.to_f - start).round(2)} seconds"

end
end
6 changes: 6 additions & 0 deletions db/migrate/20160629114503_add_hash_to_legal_doc.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
class AddHashToLegalDoc < ActiveRecord::Migration
def change
add_column :legal_documents, :checksum, :string
add_index :legal_documents, :checksum
end
end
Loading

0 comments on commit c1cb598

Please sign in to comment.