-
Notifications
You must be signed in to change notification settings - Fork 1
/
BibleApi.rb
executable file
·204 lines (167 loc) · 6.48 KB
/
BibleApi.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
#!/usr/bin/env ruby
# using bundler http://gembundler.com/
require 'rubygems'
require 'bundler/setup'
Bundler.require(:default)
# all the other things we want to use
require 'pp' # prettyprint (for errors and testing)
# other includes
require './api-key.rb' # BIBLE_KEY
require './models/BibleSearch.rb' # Bible Search
require './models/Pack.rb' # Pack model
require './models/Verse.rb' # Verse model
require './models/VerseBase.rb' # VerseBase model
class BibleApi
# Constants
# amount of time to sleep between API calls
SLEEP_TIME = 0.1
def initialize(opts = {})
@options = {
:useMongo => true,
:overwrite => false,
:translations => ['eng-ESV']
}.merge(opts)
if @options[:useMongo]
# TODO add error handling when server is unreachable
MongoMapper.connection = Mongo::Connection.new('localhost', 27017)
MongoMapper.database = "versemachine"
end
end
def get_pack_data(pack)
# determine verses needed
versesNeeded = self.get_verses_needed(pack)
# get the verses
verses = self.get_verses(versesNeeded)
return verses
end
def get_verses_needed(pack)
versesNeeded = []
if @options[:useMongo]
# only get the verses we don't already have in Mongo
versesNeeded = self.check_for_verses(pack)
if versesNeeded.length == 0
puts "Pack #{pack.title}: all verses found in MongoDB"
return
end
end
puts "Verses needed for #{pack.title}: #{versesNeeded.to_s}"
return versesNeeded
end
def get_verses(versesNeeded)
# TODO optimize for multiple verses
# using single-verse query for now in order to
# be able to map a single reference_requested
# to a single passage (easier to check for
# existence in mongo already if we know this)
# array of resulting verses
verses = []
bibleSearch = BibleSearch.new(@options[:translations])
# TODO find a way to minize API calls and still keep
# track of the original reference requested
if(!versesNeeded.nil? && !versesNeeded.empty?)
puts "Verses needed: #{versesNeeded.to_s}"
versesNeeded.each do |verseToGet|
url = bibleSearch.get_search_url(verseToGet)
data = bibleSearch.get_search_result(url)
puts "Retrieving from URL: " + url
@passages = self.get_passages(data)
# distill the verses from the results
@passages.each_entry do |passage|
verse = self.distill(passage, verseToGet)
verses.push(verse)
end
# be nice: don't flood the service
sleep(SLEEP_TIME)
end
end
return verses
end
def check_for_verses(pack)
# see if the verses are in db or if we need to fetch them
versesNeeded = []
pack.verses.each do |reference|
@options[:translations].each do |translation|
cache_key = translation.downcase + '::' + reference.downcase.gsub(/\s+/, "")
curVerse = Verse.find_by_cache_key(cache_key)
# if the verse is empty or we're overwriting it,
# add it to the list to retrieve
if curVerse.nil? || @options[:overwrite]
# puts "Needed: #{reference} (#{translation})"
versesNeeded.push(reference)
end
end
end
# We only want each reference once.
# When we search, we grab results for all the translations desired.
versesNeeded = versesNeeded.uniq
return versesNeeded
end
def get_passages(data)
# prepare to parse
@doc = Nokogiri::XML(data) do |config|
config.nocdata
end
# grab passages
return @doc.css('passages passage')
end
def distill(passage, reference_requested)
passage.css('sup').remove
translation = passage.at_css('version').content
reference = passage.at_css('display').content
copyright = passage.at_css('copyright').content
path = passage.at_css('path').content
text = self.clean_text(passage.at_css('text').content)
verse_options = {}
if @options[:useMongo]
verse_options = {
'path' => path,
'reference_requested' => reference_requested
}
end
verse = self.create_verse(reference, text, translation, copyright, verse_options)
return verse
end
def clean_text(passageText)
text_html = Nokogiri::HTML(passageText)
text_html.xpath("//sup").remove
text_html.xpath("//h3").remove
return self.cleanser(text_html.content)
end
def cleanser(text)
coder = HTMLEntities.new
# trim the leading/trailing whitespace, remove linebreaks,
# remove tabs, remove excessive whitespace
text = coder.encode(text)
# keep strip on its own line because it returns nil when
# there is no change to the string (and you can't chain it)
text.strip!
text.gsub!(/[\n\t]/, ' ')
text.gsub!(/\s+/, " ")
return text
end
def create_verse(reference, text, translation, copyright, verse_options = {})
# this is where the verse factory could be handy
settings = {
:reference => reference,
:text => text,
:translation => translation,
:copyright => copyright
}
if @options[:useMongo]
# info needed to make retrieving from Mongo easier
settings['reference_requested'] = verse_options['reference_requested']
settings['path'] = verse_options['path']
# TODO when using overwrite mode, check for existence of verse here
# before writing to it, and if it's there, replace the old one.
# As it is, we can end up with two copies of the same cache_key
# (this can be worked around by setting
# db.members.ensureIndex( { "cache_key": 1 }, { unique: true } )
# in Mongo, but it isn't perfect and doesn't help us update the db.
verse = Verse.create(settings)
verse.save!
else
verse = VerseBase.new(settings)
end
return verse
end
end