-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathparse.rb
47 lines (40 loc) · 1.3 KB
/
parse.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
require 'csv'
# import countries
countries_filename = "countries.tsv"
countries = CSV.read("./countries.tsv",
{ col_sep: "\t",
headers: true, header_converters: :symbol })
output_file = "./cities.txt"
CSV.open( output_file, 'w' ) do |writer|
countries = {}
CSV.foreach("./countries.tsv",
{ encoding: "UTF-8", col_sep: "\t",
headers: true, header_converters: :symbol,
}) do |row|
code = row[:country]
name = row[:name]
countries[code] = name
writer << [name, code, row[:latitude], row[:longitude]]
end
# parse cities15000.txt from geonames, header added manually
input_file = "../cities15000.txt"
rows = []
CSV.foreach(input_file,
{ encoding: "UTF-8", col_sep: "\t", quote_char: ";",
headers: true, header_converters: :symbol
}) do |row|
rows << row
name = row[:name]
country_code = row[:country]
country = countries[country_code]
if country_code == 'BQ' and name == 'Kralendijk'
country = "Bonaire"
end
if country.nil?
puts "#{name}, #{country_code} -- country code not found"
else
name = "#{name}, #{country}"
writer << [name, country_code, row[:lat], row[:long]]
end
end
end