-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfetcher.rb
42 lines (36 loc) · 1.16 KB
/
fetcher.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
require 'date'
require 'open-uri'
require 'pathname'
SLEEP_TIME = ENV['SLEEP_TIME'] ? ENV['SLEEP_TIME'].to_i : 3 # Be nice to the host and sleep between http requests
module Fetcher
def cache_dir
@cache_dir ||= Pathname.new(".cache/#{Date.today.strftime('%y%m%d')}")
end
def _get_cache_file_and_data(url, cache_file=nil)
url = URI.parse(url) unless url.is_a? URI::HTTP
unless cache_file
cache_file = cache_dir.join(url.path[1..-1])
cache_file = cache_file.join(url.query) if url.query
end
cache_file.dirname.mkpath unless cache_file.dirname.exist?
data = if cache_file.exist?
STDERR.puts "Reading data from #{cache_file} ..."
cache_file.read
else
cache_file.open('w') { |f|
data = open(url).read
STDERR.puts "Caching data to #{cache_file} ..."
f.write(data)
STDERR.puts "Sleeping for #{SLEEP_TIME} seconds ..."; sleep SLEEP_TIME
data
}
end
[cache_file, data]
end
def get_cache_file(url, cache_file=nil)
_get_cache_file_and_data(url, cache_file)[0]
end
def get_data(url, cache_file=nil)
_get_cache_file_and_data(url, cache_file)[1]
end
end