-
Notifications
You must be signed in to change notification settings - Fork 1
/
aims_ingestor.rb
executable file
·69 lines (51 loc) · 1.57 KB
/
aims_ingestor.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#!/usr/bin/env ruby
require 'FileUtils'
obj = '/tmp/Full_House'
Dir["#{obj}/**/**"].each do |f|
=begin
# convert html file into PS file
if f.include?('.htm')
fout = File.join(File.dirname(f), File.basename(f, '.htm'))
s = system("perl /usr/local/bin/html2ps -f /Users/cfitz/Downloads/html2ps-1.0b7/sample -o #{fout}.ps #{f}")
puts s
end
=end
=begin
#convert ps file into jp2000
if f.include?('.ps')
fout = File.join(File.dirname(f), "page.jp2")
s = system("convert #{f} #{fout}")
end
=end
=begin
# imageMAgick starts with 0 for pages, which we don't want...
if f.include?('.jp2')
fbase = File.basename(f, '.jp2')
fparts = fbase.split('-')
num = fparts[1].to_i + 1
newname = fparts[0] + '-' + num.to_s + '.jp2'
FileUtils.mkdir_p(File.join('/tmp', File.dirname(f)))
fout = File.join('/tmp', File.dirname(f), newname)
FileUtils.move f, fout, :verbose => true
end
=end
=begin
#convert ps to pdf
if f.include?('.ps')
fname = File.basename(f, '.ps') + ".pdf"
fout = File.join(File.dirname(f), fname)
s = system("ps2pdf #{f} #{fout}")
end
=end
if f.include?('.jp2')
jp2name = File.basename(f, '.jp2')
jp2part = jp2name.split("-")
num = jp2part[1]
Dir["#{File.dirname(f)}/*.pdf"].each do |f|
out = File.join()
pdf = `pdftotext -f #{num} -l #{num} -layout #{f} #{File.dirname(f)}/page-#{num}.txt`
puts pdf
end
end
end
# this moves the files back into the director