Skip to content

Commit

Permalink
Replace mongoimport for importing sample cases with API-based import #…
Browse files Browse the repository at this point in the history
  • Loading branch information
iamleeg committed May 5, 2022
1 parent d06706d commit 6a0b309
Show file tree
Hide file tree
Showing 2 changed files with 92 additions and 1 deletion.
91 changes: 91 additions & 0 deletions data-serving/scripts/setup-db/import-sample-data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
#!/usr/bin/env python3

from datetime import datetime
import json
import os
import requests
import sys

def convert_dict_to_float(aDict):
return float(aDict['$numberDouble'])

def convert_dict_to_date(aDict):
date_to_use = None
if isinstance(aDict, dict):
if date_value := aDict.get('$date'):
maybe_date = date_value
if isinstance(maybe_date, dict):
date_value = maybe_date.get('$numberLong')
date_to_use = datetime.utcfromtimestamp(int(date_value)/1000.0).strftime('%Y-%m-%dT%H:%M:%S.%f')
else:
date_to_use = maybe_date
return date_to_use


def convert_case(case):
"""Some cases in the sample file are in some mongoexport format that defines types of numbers.
Decomplect that information so they're compatible with the API's expectation."""
converted = dict(case)
latitude = case["location"]["geometry"]["latitude"]
if isinstance(latitude, dict):
converted["location"]["geometry"]["latitude"] = convert_dict_to_float(latitude)
longitude = case["location"]["geometry"]["longitude"]
if isinstance(longitude, dict):
converted["location"]["geometry"]["longitude"] = convert_dict_to_float(longitude)
if travelHistory := case.get("travelHistory"):
for i in range(len(travelHistory["travel"])):
travel = travelHistory['travel'][i]
latitude = travel["location"]["geometry"]["latitude"]
if isinstance(latitude, dict):
converted["travelHistory"]["travel"][i]["location"]["geometry"]["latitude"] = convert_dict_to_float(latitude)
longitude = travel["location"]["geometry"]["longitude"]
if isinstance(longitude, dict):
converted["travelHistory"]["travel"][i]["location"]["geometry"]["longitude"] = convert_dict_to_float(longitude)
converted['travelHistory']['travel'][i]['dateRange']['start'] = convert_dict_to_date(travel['dateRange']['start'])
converted['travelHistory']['travel'][i]['dateRange']['end'] = convert_dict_to_date(travel['dateRange']['end'])
if genomeSequences := case.get('genomeSequences'):
for i in range(len(genomeSequences)):
date = genomeSequences[i]['sampleCollectionDate']
converted['genomeSequences'][i]['sampleCollectionDate'] = convert_dict_to_date(date)
events = case['events']
for i in range(len(events)):
converted['events'][i]['dateRange']['start'] = convert_dict_to_date(events[i]['dateRange']['start'])
converted['events'][i]['dateRange']['end'] = convert_dict_to_date(events[i]['dateRange']['end'])

return converted

def main():
"""Import sample data from ../../samples/cases.json into the G.h instance at $GH_BASE_URL
using the curator API key at $GH_API_KEY. This is done through the curator-service
rather than loading directly into the database via mongoimport because the links to
other collections, particularly the age buckets, need setting up in the application."""
if not (base_url := os.getenv('GH_BASE_URL')):
print("Define the Global.health API location in $GH_BASE_URL.")
sys.exit(1)
if not (api_key := os.getenv('GH_API_KEY')):
print("Supply a Global.health API key in $GH_API_KEY.")
sys.exit(1)
batch_upsert_endpoint = f"{base_url}/api/cases/batchUpsert"
with open("../../samples/cases.json") as f:
sample_cases = json.load(f)
converted_cases = [convert_case(c) for c in sample_cases]
request_body = {
"cases": converted_cases
}
request_headers = {
'X-Api-Key': api_key
}
response = requests.post(batch_upsert_endpoint, json=request_body, headers=request_headers)
if response.ok:
report = response.json()
print(f"Success response from API. {report['numCreated']} cases created, {report['numUpdated']} updated.")
if errors := report.get('errors'):
print(f"Errors: {errors}")
sys.exit(0)
else:
print("Unsuccessful in importing sample cases")
print(response.text)
sys.exit(1)

if __name__ == '__main__':
main()
2 changes: 1 addition & 1 deletion data-serving/scripts/setup-db/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"scripts": {
"build": "tsc",
"lint": "tsc --noEmit && eslint '*/**/*.{js,ts,tsx}' --quiet --fix",
"import-sample-data": "mongoimport --uri=$CONN --db $DB --collection cases --jsonArray --file ../../samples/cases.json",
"import-sample-data": "python3 ./import-sample-data.py",
"migrate": "npm ci && migrate-mongo up",
"delete-all-cases": "mongo $CONN --eval 'db.collection.deleteMany({})'"
},
Expand Down

0 comments on commit 6a0b309

Please sign in to comment.