Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add composite_trip Migration Scripts #1009

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions bin/historical/migrations/_common.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import os
import subprocess
import importlib
import logging
import tempfile
import time

import emission.core.get_database as edb

DB_HOST_TEMPLATE = os.environ.get('DB_HOST_TEMPLATE', "mongodb://localhost:27017/openpath_prod_REPLACEME")

if 'PROD_LIST' in os.environ:
PROD_LIST=os.environ['PROD_LIST'].split(",")
else:
with tempfile.TemporaryDirectory() as tmpdirname:
print(f"created {tmpdirname=} to find list of configs")
os.chdir(tmpdirname)
proc = subprocess.run(
f"git clone https://github.com/e-mission/nrel-openpath-deploy-configs.git", shell=True)
filenames = os.listdir(f"nrel-openpath-deploy-configs/configs/")

PROD_LIST = [
fname.split(".")[0]
for fname in filenames
if fname and 'dev-' not in fname and 'stage-' not in fname
]
print(f"PROD_LIST: {PROD_LIST}")

def run_on_all_deployments(fn_to_run):
"""
Run the given function on the database for each deployment by setting the
DB_HOST environment variable in between each function call.
The list of deployments (PROD_LIST) is retrieved from the
nrel-openpath-deploy-configs repo upon initialization of this module.
"""
for prod in PROD_LIST:
prod_db_name = prod.replace("-", "_")
print(f"Running {fn_to_run.__name__} for {prod} on DB {prod_db_name}")
os.environ['DB_HOST'] = DB_HOST_TEMPLATE.replace(
"REPLACEME", prod_db_name)
importlib.reload(edb)
fn_to_run()
8 changes: 8 additions & 0 deletions bin/historical/migrations/all_deployments_template.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import emission.core.get_database as edb

from _common import run_on_all_deployments

def print_connect_url():
print("Connecting to database URL"+edb.url)

run_on_all_deployments(print_connect_url)
52 changes: 52 additions & 0 deletions bin/historical/migrations/trim_fluff_from_composite_trips.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import emission.core.get_database as edb

from _common import run_on_all_deployments


def trim_fluff_from_composite_trips():
"""
Trim unnecessary fields from composite trips in the analysis_timeseries_db.
The shape of the remaining fields is unchanged.
"""
print("Trimming fluff from composite trips")
analysis_ts = edb.get_analysis_timeseries_db()
for ct in analysis_ts.find({'metadata.key': 'analysis/composite_trip'}):
# print(f"Trimming {ct['_id']}, {ct['data'].get('start_ts')} - {ct['data'].get('end_ts')}")
for l in ct['data'].get('locations', []):
trim_entry(l, {
'metadata': [],
'data': ['loc', 'ts'],
})

for s in ct['data'].get('sections', []):
trim_entry(s, {
'metadata': [],
'data': ['start_ts', 'end_ts', 'sensed_mode', 'sensed_mode_str',
'ble_sensed_mode', 'distance', 'duration'],
})

for key in ['start_confirmed_place', 'end_confirmed_place']:
trim_entry(ct['data'].get(key), {
'_id': True,
'metadata': ['key'],
'data': ['enter_ts', 'exit_ts', 'location', 'duration',
'user_input', 'additions'],
})

analysis_ts.update_one(
{'_id': ct['_id']},
{'$set': {'data': ct['data']}}
)


def trim_entry(entry, fields_to_keep):
if entry is None:
return
for key in list(entry):
if key not in fields_to_keep:
del entry[key]
elif isinstance(entry[key], dict) and isinstance(fields_to_keep, dict):
trim_entry(entry[key], fields_to_keep[key])


run_on_all_deployments(trim_fluff_from_composite_trips)
Loading