This repository has been archived by the owner on Jun 28, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
taxonomy_mapper.py
48 lines (41 loc) · 1.95 KB
/
taxonomy_mapper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# -*- coding: utf-8 -*-
import json
import os
import urllib
MAPPER_TEMPLATE = "http://im.govwizely.com/api/terms.json?mapped_term=%s&source=%s&log_failed=true"
TAXONOMY_TEMPLATE = "https://api.trade.gov/ita_taxonomies/search.json?size=1&types=Countries&api_key=%s&q=%s"
class TaxonomyMapper:
def __init__(self, options):
self.config_array = options['config']
self.mapper_source = options['mapper_source']
self.request_cache = {}
def add_taxonomy_fields(self, entry):
for config in self.config_array:
term = None
entry[config['desired_field']] = ''
mapper_response = self.cached_response_for(MAPPER_TEMPLATE % (entry[config['starting_field']], self.mapper_source))
if len(mapper_response) > 0:
term = mapper_response[0]
if self.country_should_be_added(term, config):
entry[config['desired_field']] = term["name"]
elif self.world_regions_should_be_added(term, config):
entry[config['desired_field']] = self.add_world_region(term, mapper_response)
return entry
def add_world_region(self, term, mapper_response):
if "Countries" in term["taxonomies"]:
country = term["name"]
taxonomy_response = self.cached_response_for(TAXONOMY_TEMPLATE % (os.environ['API_KEY'], country))
return taxonomy_response["results"][0]["related_terms"]["world_regions"]
elif "World Regions" in term["taxonomies"]:
return [term["name"] for term in mapper_response]
def country_should_be_added(self, term, config):
return term is not None and config['desired_field'] == "country" and "Countries" in term["taxonomies"]
def world_regions_should_be_added(self, term, config):
return term is not None and config['desired_field'] == "world_region"
def cached_response_for(self, url):
if url in self.request_cache:
return self.request_cache[url]
else:
response = json.loads(urllib.urlopen(url).read())
self.request_cache[url] = response
return response