-
Notifications
You must be signed in to change notification settings - Fork 2
/
index.rb
69 lines (64 loc) · 1.88 KB
/
index.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#!/usr/bin/ruby
require 'rubygems'
require 'indextank'
require 'json'
require 'yaml'
config = YAML::load(File.open('config.yaml'))
api = IndexTank::Client.new config['api_url']
categories = config['categories'].split('|').map{|x| x.split(",")}
idxname = config['index_name']
index = api.indexes idxname
if not index.exists?
index.add({:public_search => true})
while not index.running?
sleep 0.5
printf "waiting for index %s to be ready...\n", idxname
end
end
printf "Ready.\n"
batches = 0
categories.each do |c|
things = JSON.parse(File.read('data/' +c[1] + '.js'))
count = 0
docs = []
things.each_with_index do |x, i|
description = c[0]
f = 'data/' + c[1] + '/' + x['permalink'] + '.js'
begin
props = JSON.parse(File.read(f))
rescue => e
next
end
if description == "person"
name = props['first_name'] + ' ' + props['last_name']
else
name = props['name']
end
rel = 0
products = 0
overview = Math.log(1 + (props['overview'] or '').length)
if description == "company" #make companies more likely to be a top result
rel = props['relationships'].length
products = props['products'].length
overview *= 2
end
url = props['crunchbase_url']
t = props['image']
if t
thumbnail = 'http://www.crunchbase.com/' + props['image']['available_sizes'][0][-1]
else
thumbnail = 'http://www.gravatar.com/avatar/00000000000000000000000000000000?default=mm'
end
docs << { :docid => url,
:fields => {:name => name, :thumbnail => thumbnail, :url => url, :description => description} ,
:variables => {0 => rel, 1 => products, 2 => overview}}
count +=1
if count == 3000 or i == (things.length - 1)
response = index.batch_insert(docs)
batches += 1
puts "batches: ", batches, docs.length
docs = []
count = 0
end
end
end