Skip to content

Commit

Permalink
don't try to geocode values like missing or unknown
Browse files Browse the repository at this point in the history
  • Loading branch information
ktmeaton committed May 8, 2020
1 parent 1c8210b commit 91c168d
Showing 1 changed file with 3 additions and 2 deletions.
5 changes: 3 additions & 2 deletions scripts/geocode_NextStrain.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
--in-tsv ../metadata_assembly_nextstrain_edit_name.tsv \
--loc-col BioSampleGeographicLocation \
--out-tsv ../metadata_assembly_nextstrain_edit_name_geocode.tsv \
--out-lat-lon metadata_assembly_nextstrain_edit_name_lat_lon.tsv
--out-lat-lon ../metadata_assembly_nextstrain_edit_name_lat_lon.tsv
"""

# This program should only be called from the command-line
Expand Down Expand Up @@ -88,6 +88,7 @@
DELIM = "\t"
# No data values will be replaced by this char
NO_DATA_CHAR = "?"
MISSING_DATA_LIST = ["missing", "unknown"]

# Dictionary to store latitude and longitude
geo_loc_dict = {} # {'Location String' : {latitude: float, longitude: float, address_dict}}
Expand Down Expand Up @@ -150,7 +151,7 @@
if geo_loc not in geo_loc_dict:
# Copy in the blank address dictionary, not by reference!
geo_loc_dict[geo_loc] = copy.deepcopy(address_dict)
if geo_loc != NO_DATA_CHAR:
if geo_loc != NO_DATA_CHAR and geo_loc.lower() not in MISSING_DATA_LIST:
# Geocode the string location
location = geolocator.geocode(geo_loc, language='en')
if location:
Expand Down

0 comments on commit 91c168d

Please sign in to comment.