Skip to content

Commit

Permalink
Add manual migration helper for ageRange to ageBuckets
Browse files Browse the repository at this point in the history
Fixes: #2914
  • Loading branch information
abhidg committed Oct 26, 2022
1 parent d95a90b commit e311bb8
Show file tree
Hide file tree
Showing 8 changed files with 560 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# `python-base` sets up all our shared environment variables
FROM python:3.10-slim as python-base

ENV PYTHONUNBUFFERED=1 \
# prevents python creating .pyc files
PYTHONDONTWRITEBYTECODE=1 \
\
PIP_NO_CACHE_DIR=off \
PIP_DISABLE_PIP_VERSION_CHECK=on \
PIP_DEFAULT_TIMEOUT=100 \
\
# https://python-poetry.org/docs/configuration/#using-environment-variables
POETRY_VERSION=1.2.2 \
# make poetry install to this location
POETRY_HOME="/opt/poetry" \
# make poetry create the virtual environment in the project's root
# it gets named `.venv`
POETRY_VIRTUALENVS_IN_PROJECT=true \
# do not ask any interactive question
POETRY_NO_INTERACTION=1 \
\
# this is where our requirements + virtual environment will live
PYSETUP_PATH="/opt/pysetup" \
VENV_PATH="/opt/pysetup/.venv"

# prepend poetry and venv to path
ENV PATH="$POETRY_HOME/bin:$VENV_PATH/bin:$PATH"

# `builder-base` stage is used to build deps + create our virtual environment
FROM python-base as builder-base
RUN apt-get update \
&& apt-get install --no-install-recommends -y curl

# install poetry - respects $POETRY_VERSION & $POETRY_HOME
RUN curl -sSL https://install.python-poetry.org | python3 -

# copy project requirement files here to ensure they will be cached.
WORKDIR $PYSETUP_PATH
COPY poetry.lock pyproject.toml ./

# install runtime deps - uses $POETRY_VIRTUALENVS_IN_PROJECT internally
RUN poetry install --no-dev

# `development` image is used during development / testing
FROM python-base as development

RUN apt-get update && apt-get upgrade -y curl

WORKDIR $PYSETUP_PATH

# copy in our built poetry + venv
COPY --from=builder-base $POETRY_HOME $POETRY_HOME
COPY --from=builder-base $PYSETUP_PATH $PYSETUP_PATH

# quicker install as runtime deps are already installed
RUN poetry install

# will become mountpoint of our code
WORKDIR /app

COPY ./ ./

CMD ["./test.sh"]
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
version: "3.7"

services:
test:
build:
context: .
dockerfile: Dockerfile-test
mongo:
image: mongo:5.0
ports:
- "27017:27017"

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
[tool.poetry]
name = "age-buckets-transition"
version = "0.1.0"
description = "Manual migration script for age buckets transition"
authors = ["Global.health maintainers <info@global.health>"]
license = "MIT"

[tool.poetry.dependencies]
python = "^3.10"
pymongo = {extras = ["srv"], version = "^4.3.2"}
tqdm = "^4.64.1"

[tool.poetry.dev-dependencies]
pytest = "^7.1.3"

[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
"""
Manual migration script for age bucket transition
This script deploys the data transition from
demographics.ageRange.{start,end} to demographics.ageBuckets. The age
buckets are defined in an ageBuckets collection in the DB that is created
using an automatic migration. While this defines the age buckets, it does
not alter the currently existing information in the database which still
uses ageRange. To manually transition the data, this reads in each case and
uses the ageBuckets collection to figure out the buckets and write them to
the database.
"""

import os
import logging
from typing import Hashable

import pymongo
from tqdm import tqdm

DEFAULT_DB = "covid19"


def find_age_buckets(
start: int, end: int, age_buckets: dict[Hashable, tuple[int, int]]
) -> list[Hashable]:
return [
bucket
for (bucket, (bstart, bend)) in age_buckets.items()
if (bstart <= start <= bend)
or (bstart <= end <= bend)
or (bstart > start and bend < end)
]


def migrate_age_buckets(db, collection: str = "cases"):
age_buckets = {
record["_id"]: (record["start"], record["end"])
for record in db.ageBuckets.find()
}
assert age_buckets
for case in tqdm(
db[collection].find({"list": True, "demographics.ageRange": {"$exists": True}})
):
db[collection].find_one_and_update(
{"_id": case["_id"]},
{
"$set": {
"demographics.ageBuckets": find_age_buckets(
int(case["demographics"]["ageRange"]["start"]),
int(case["demographics"]["ageRange"]["end"]),
age_buckets,
)
},
"$unset": {"demographics.ageRange": ""},
},
)


if __name__ == "__main__":
try:
if (CONN := os.getenv("CONN")):
client = pymongo.MongoClient(CONN)
else:
client = pymongo.MongoClient()
except Exception as e:
logging.error(e)
raise

db = client[os.getenv("DB_NAME", DEFAULT_DB)]
migrate_age_buckets(db)
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/bin/bash

set -eou pipefail

DOCKERIZED=1 poetry run pytest .
echo "Tests and code quality checks passed"
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#!/bin/bash

set -eo pipefail

pushd "$(dirname $0)"

function cleanup() {
docker compose -f docker-compose-test.yml stop
docker compose -f docker-compose-test.yml down -v --remove-orphans
popd
}

trap cleanup EXIT

docker compose -f docker-compose-test.yml up --build --exit-code-from test
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
import os

import pytest
import pymongo

import run

AGE_BUCKETS = [
{"_id": "0", "start": 0, "end": 0},
{"_id": "1-5", "start": 1, "end": 5},
{"_id": "6-10", "start": 6, "end": 10},
{"_id": "11-15", "start": 11, "end": 15},
{"_id": "16-20", "start": 16, "end": 20},
{"_id": "21-25", "start": 21, "end": 25},
{"_id": "26-30", "start": 26, "end": 30},
{"_id": "31-35", "start": 31, "end": 35},
{"_id": "36-40", "start": 36, "end": 40},
{"_id": "41-45", "start": 41, "end": 45},
{"_id": "46-50", "start": 46, "end": 50},
{"_id": "51-55", "start": 51, "end": 55},
{"_id": "56-60", "start": 56, "end": 60},
{"_id": "61-65", "start": 61, "end": 65},
{"_id": "66-70", "start": 66, "end": 70},
{"_id": "71-75", "start": 71, "end": 75},
{"_id": "76-80", "start": 76, "end": 80},
{"_id": "81-85", "start": 81, "end": 85},
{"_id": "86-90", "start": 86, "end": 90},
{"_id": "91-95", "start": 91, "end": 95},
{"_id": "96-100", "start": 96, "end": 100},
{"_id": "101-105", "start": 101, "end": 105},
{"_id": "106-110", "start": 106, "end": 110},
{"_id": "111-115", "start": 111, "end": 115},
{"_id": "116-120", "start": 116, "end": 120},
]

AGES = [(60, 60), (72, 80), (70, 79), (130, 140)]


@pytest.fixture
def db():
client = pymongo.MongoClient(host="mongo")
return client.covid19


@pytest.fixture
def age_buckets(db):
db.ageBuckets.drop()
db.ageBuckets.insert_many(AGE_BUCKETS)
return {
record["_id"]: (record["start"], record["end"])
for record in db.ageBuckets.find()
}


@pytest.fixture
def setup_cases(db):
db.cases.drop()
db.cases.insert_many(
[
{"list": True, "demographics": {"ageRange": {"start": start, "end": end}}}
for start, end in AGES
]
)


@pytest.mark.skipif(
os.getenv("DOCKERIZED") is None,
reason="Test disabled outside dockerized environment",
)
@pytest.mark.parametrize(
"age_limits,expected",
[
((60, 60), ["56-60"]),
((72, 80), ["71-75", "76-80"]),
((70, 79), ["66-70", "71-75", "76-80"]),
((130, 140), []),
],
)
def test_find_age_buckets(age_buckets, age_limits, expected):
assert run.find_age_buckets(*age_limits, age_buckets) == expected


@pytest.mark.skipif(
os.getenv("DOCKERIZED") is None,
reason="Test disabled outside dockerized environment",
)
def test_migrate_age_buckets(db, setup_cases):
run.migrate_age_buckets(db)
# no demographics.ageRange should be present
assert not list(
db.cases.find({"list": True, "demographics.ageRange": {"$exists": True}})
)
assert [case["demographics"]["ageBuckets"] for case in db.cases.find()] == [
["56-60"],
["71-75", "76-80"],
["66-70", "71-75", "76-80"],
[],
]

0 comments on commit e311bb8

Please sign in to comment.