Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Seeding] Create dummy csv generator #341

Merged
merged 41 commits into from
Nov 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
0a8cba5
move up hba1c targe
anchit-chandran Oct 29, 2024
7eaf58d
init create csv
anchit-chandran Oct 29, 2024
44cb3b7
working but need to confirm headers
anchit-chandran Oct 29, 2024
1dac498
output to csv
anchit-chandran Oct 29, 2024
3782eb0
leave local data folder empty so can git ignore contents
anchit-chandran Oct 29, 2024
af264fe
add `local_generated_data/` to gitignore
anchit-chandran Oct 29, 2024
f87d55a
height should be weight in clean_weight
eatyourpeas Oct 30, 2024
cf74e5e
Merge remote-tracking branch 'origin/live' into anchit/kpis/single-pt
anchit-chandran Nov 1, 2024
4e1a186
change gitignore to ignoring CONTENTS of \local_genreated_data\
anchit-chandran Nov 1, 2024
5e227ef
different method to try keeping the empty folder
anchit-chandran Nov 1, 2024
bc8291c
basic upload test (error: `ValueError: Missing column provided to 'pa…
anchit-chandran Nov 1, 2024
ea30989
add docstrings
anchit-chandran Nov 1, 2024
c4e90c0
rm mocking session data as think not needed
anchit-chandran Nov 1, 2024
6b42613
Merge remote-tracking branch 'origin/live' into anchit/kpis/single-pt
anchit-chandran Nov 2, 2024
5a80aad
adds local clean docker setup script
anchit-chandran Nov 2, 2024
1829ecc
adds input age_range
anchit-chandran Nov 2, 2024
85f9021
rm csv data file from tracking
anchit-chandran Nov 2, 2024
3115149
update pattern for local data folder gitignoring
anchit-chandran Nov 2, 2024
7b7b1f4
further specify pattern
anchit-chandran Nov 2, 2024
4ed9125
make our constant header name same as template (add comma)
anchit-chandran Nov 2, 2024
220b48b
ensure date formatting is as we need
anchit-chandran Nov 2, 2024
4cf51ea
refactor age ranges and hb target inputs to take multiple
anchit-chandran Nov 2, 2024
64865de
Revert "refactor age ranges and hb target inputs to take multiple"
anchit-chandran Nov 2, 2024
b190d3c
formatting print info
anchit-chandran Nov 2, 2024
89da124
adds build and coalesce flags, implements build
anchit-chandran Nov 3, 2024
2d481c1
make print info prettier
anchit-chandran Nov 3, 2024
249d830
formatting
anchit-chandran Nov 3, 2024
9a6a0b7
formatting / tidying
anchit-chandran Nov 3, 2024
c12e71c
implements coalesce flag
anchit-chandran Nov 3, 2024
1f87da1
skip upload test for now
anchit-chandran Nov 3, 2024
0e4188c
set dtypes to being same as `dummy_csv`
anchit-chandran Nov 3, 2024
28de203
adds docstrings on --build and --coalesce with example cmd
anchit-chandran Nov 3, 2024
a4c29e7
setting dtypes
anchit-chandran Nov 3, 2024
5231a7d
update docstrings
anchit-chandran Nov 3, 2024
ed8fab9
formatting
anchit-chandran Nov 4, 2024
d4ab147
first step define dtypes
eatyourpeas Nov 9, 2024
9beb65a
introduce dtypes to read_csv method. This removes dtype collision lat…
eatyourpeas Nov 9, 2024
8229cae
fix csv_summarize call in submissions listview
eatyourpeas Nov 9, 2024
2b3a8c9
Merge branch 'live' into anchit/kpis/single-pt
eatyourpeas Nov 9, 2024
484a622
fix tests by fixing missing dtypes
eatyourpeas Nov 9, 2024
da8e06f
fix closed_loop randomisation to constrain to available choices rathe…
eatyourpeas Nov 9, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,7 @@ node_modules
build_info.json

media/submissions/csv/*
*.crt
*.crt

project/npda/dummy_sheets/local_generated_data/*
!project/npda/dummy_sheets/local_generated_data/.gitkeep
3 changes: 2 additions & 1 deletion envs/example.env
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@ NHS_ODS_API_URL_SUBSCRIPTION_KEY=##########

NHS_SPINE_SERVICES_URL="https://uat.directory.spineservices.nhs.uk/ORD/2-0-0"

POSTCODE_API_BASE_URL="https://findthatpostcode.uk/"
POSTCODES_IO_API_URL="https://api.postcodes.io/" #RCPCH host their own instance of postcodes io: this it he opensource project
POSTCODES_IO_API_KEY="XXXXXX"

# DJANGO POSTGRES DATABASE CONNECTION
NPDA_POSTGRES_DB_HOST="postgis"
Expand Down
45 changes: 44 additions & 1 deletion project/constants/csv_headings.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import pandas as pd

CSV_HEADINGS = (
# Patient
{"heading": "NHS Number", "model_field": "nhs_number", "model": "Patient"},
Expand Down Expand Up @@ -128,7 +130,7 @@
"model": "Visit",
},
{
"heading": "At time of or following measurement of thyroid function, was the patient prescribed any thyroid treatment?",
"heading": "At time of, or following measurement of thyroid function, was the patient prescribed any thyroid treatment?",
"model_field": "thyroid_treatment_status",
"model": "Visit",
},
Expand Down Expand Up @@ -282,3 +284,44 @@
("hospital_admission_date", "Start date (Hospital Provider Spell)"),
("hospital_discharge_date", "Discharge date (Hospital provider spell)"),
]

CSV_DATA_TYPES_MINUS_DATES = {
"NHS Number": "str",
"Postcode of usual address": "str",
"Stated gender": "Int8",
"Ethnic Category": "str", # choices are all capital letters
"Diabetes Type": "Int8",
"Reason for leaving service": "Int8",
"GP Practice Code": "str",
"PDU Number": "str",
"Patient Height (cm)": "float32",
"Patient Weight (kg)": "float32",
"Hba1c Value": "float32",
"HbA1c result format": "Int8",
"Diabetes Treatment at time of Hba1c measurement": "Int8",
"If treatment included insulin pump therapy (i.e. option 3 or 6 selected), was this part of a closed loop system?": "Int8",
"At the time of HbA1c measurement, in addition to standard blood glucose monitoring (SBGM), was the patient using any other method of glucose monitoring?": "str",
"Systolic Blood Pressure": "Int8",
"Diastolic Blood pressure": "Int8",
"Retinal Screening Result": "Int8",
"Urinary Albumin Level (ACR)": "float64",
"Albuminuria Stage": "Int8",
"Total Cholesterol Level (mmol/l)": "float64",
"At time of, or following measurement of thyroid function, was the patient prescribed any thyroid treatment?": "Int8",
"Has the patient been recommended a Gluten-free diet?": "Int8",
"Was the patient assessed as requiring additional psychological/CAMHS support outside of MDT clinics?": "Int8",
"Does the patient smoke?": "Int8",
"Was the patient offered an additional appointment with a paediatric dietitian?": "Int8",
"Was the patient using (or trained to use) blood ketone testing equipment at time of visit?": "Int8",
"Reason for admission": "Int8",
"Only complete if DKA selected in previous question: During this DKA admission did the patient receive any of the following therapies?": "Int8",
"Only complete if OTHER selected: Reason for admission (free text)": "str",
}

NONNULL_FIELDS = [
"NHS Number",
"Date of Birth",
"Diabetes Type",
"PDU Number",
"Visit/Appointment Date",
]
5 changes: 5 additions & 0 deletions project/constants/postcodes.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
"""
Constants for 'unknown' postcodes
These are Office for National Statistics (ONS) codes for where a postcode is not known
ZZ99 3VZ No fixed abode
ZZ99 3CZ England/U.K not otherwise specified
ZZ99 3GZ Wales not otherwise specified
ZZ99 1WZ Scotland not otherwise specified
ZZ99 2WZ Northern Ireland not otherwise specified
"""

UNKNOWN_POSTCODES_NO_SPACES = ["ZZ993CZ", "ZZ993GZ", "ZZ993WZ", "ZZ993VZ"]
Empty file.
Original file line number Diff line number Diff line change
@@ -1 +1 @@
NHS Number,Date of Birth,Postcode of usual address,Stated gender,Ethnic Category,Diabetes Type,Date of Diabetes Diagnosis,Date of leaving service,Reason for leaving service,Death Date,GP Practice Code,PDU Number,Visit/Appointment Date,Patient Height (cm),Patient Weight (kg),Observation Date (Height and weight),Hba1c Value,HbA1c result format,Observation Date: Hba1c Value,Diabetes Treatment at time of Hba1c measurement,"If treatment included insulin pump therapy (i.e. option 3 or 6 selected), was this part of a closed loop system?","At the time of HbA1c measurement, in addition to standard blood glucose monitoring (SBGM), was the patient using any other method of glucose monitoring?",Systolic Blood Pressure,Diastolic Blood pressure,Observation Date (Blood Pressure),Foot Assessment / Examination Date,Retinal Screening date,Retinal Screening Result,Urinary Albumin Level (ACR),Observation Date: Urinary Albumin Level,Albuminuria Stage,Total Cholesterol Level (mmol/l),Observation Date: Total Cholesterol Level,Observation Date: Thyroid Function ,"At time of, or following measurement of thyroid function, was the patient prescribed any thyroid treatment?",Observation Date: Coeliac Disease Screening,Has the patient been recommended a Gluten-free diet?,Observation Date - Psychological Screening Assessment,Was the patient assessed as requiring additional psychological/CAMHS support outside of MDT clinics?,Does the patient smoke?,Date of offer of referral to smoking cessation service (if patient is a current smoker),Date of Level 3 carbohydrate counting education received,Was the patient offered an additional appointment with a paediatric dietitian?,Date of additional appointment with dietitian,Was the patient using (or trained to use) blood ketone testing equipment at time of visit?,Date that influenza immunisation was recommended,Date of provision of advice ('sick-day rules') about managing diabetes during intercurrent illness or episodes of hyperglycaemia,Start date (Hospital Provider Spell),Discharge date (Hospital provider spell),Reason for admission,Only complete if DKA selected in previous question: During this DKA admission did the patient receive any of the following therapies?,Only complete if OTHER selected: Reason for admission (free text)
NHS Number,Date of Birth,Postcode of usual address,Stated gender,Ethnic Category,Diabetes Type,Date of Diabetes Diagnosis,Date of leaving service,Reason for leaving service,Death Date,GP Practice Code,PDU Number,Visit/Appointment Date,Patient Height (cm),Patient Weight (kg),Observation Date (Height and weight),Hba1c Value,HbA1c result format,Observation Date: Hba1c Value,Diabetes Treatment at time of Hba1c measurement,"If treatment included insulin pump therapy (i.e. option 3 or 6 selected), was this part of a closed loop system?","At the time of HbA1c measurement, in addition to standard blood glucose monitoring (SBGM), was the patient using any other method of glucose monitoring?",Systolic Blood Pressure,Diastolic Blood pressure,Observation Date (Blood Pressure),Foot Assessment / Examination Date,Retinal Screening date,Retinal Screening Result,Urinary Albumin Level (ACR),Observation Date: Urinary Albumin Level,Albuminuria Stage,Total Cholesterol Level (mmol/l),Observation Date: Total Cholesterol Level,Observation Date: Thyroid Function,"At time of, or following measurement of thyroid function, was the patient prescribed any thyroid treatment?",Observation Date: Coeliac Disease Screening,Has the patient been recommended a Gluten-free diet?,Observation Date - Psychological Screening Assessment,Was the patient assessed as requiring additional psychological/CAMHS support outside of MDT clinics?,Does the patient smoke?,Date of offer of referral to smoking cessation service (if patient is a current smoker),Date of Level 3 carbohydrate counting education received,Was the patient offered an additional appointment with a paediatric dietitian?,Date of additional appointment with dietitian,Was the patient using (or trained to use) blood ketone testing equipment at time of visit?,Date that influenza immunisation was recommended,Date of provision of advice ('sick-day rules') about managing diabetes during intercurrent illness or episodes of hyperglycaemia,Start date (Hospital Provider Spell),Discharge date (Hospital provider spell),Reason for admission,Only complete if DKA selected in previous question: During this DKA admission did the patient receive any of the following therapies?,Only complete if OTHER selected: Reason for admission (free text)
90 changes: 57 additions & 33 deletions project/npda/forms/external_patient_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,12 @@
from django.core.exceptions import ValidationError
from httpx import HTTPError, AsyncClient

from ..general_functions import (gp_details_for_ods_code,
gp_ods_code_for_postcode,
validate_postcode,
imd_for_postcode)
from ..general_functions import (
gp_details_for_ods_code,
gp_ods_code_for_postcode,
validate_postcode,
imd_for_postcode,
)


logger = logging.getLogger(__name__)
Expand All @@ -21,46 +23,47 @@ class PatientExternalValidationResult:
postcode: str | ValidationError | None
gp_practice_ods_code: str | ValidationError | None
gp_practice_postcode: str | ValidationError | None
index_of_multiple_deprivation_quintile: str | None
index_of_multiple_deprivation_quintile: str | None


async def _validate_postcode(postcode: str | None, async_client: AsyncClient) -> str | None:
async def _validate_postcode(
postcode: str | None, async_client: AsyncClient
) -> str | None:
if postcode:
try:
normalised_postcode = await validate_postcode(postcode, async_client)

if not normalised_postcode:
raise ValidationError(
"Invalid postcode %(postcode)s", params={"postcode":postcode}
"Invalid postcode %(postcode)s", params={"postcode": postcode}
)

return normalised_postcode
except HTTPError as err:
logger.warning(f"Error validating postcode {err}")


async def _imd_for_postcode(postcode: str | None, async_client: AsyncClient) -> str | None:
async def _imd_for_postcode(
postcode: str | None, async_client: AsyncClient
) -> str | None:
if postcode:
try:
imd = await imd_for_postcode(
postcode, async_client
)
imd = await imd_for_postcode(postcode, async_client)

return imd
except HTTPError as err:
logger.warning(
f"Cannot calculate deprivation score for {postcode} {err}"
)
logger.warning(f"Cannot calculate deprivation score for {postcode} {err}")


async def _gp_details_from_ods_code(ods_code: str | None, async_client: AsyncClient) -> tuple[str, str] | None:
async def _gp_details_from_ods_code(
ods_code: str | None, async_client: AsyncClient
) -> tuple[str, str] | None:
try:
result = await gp_details_for_ods_code(ods_code, async_client)

if not result:
raise ValidationError(
"Could not find GP practice with ODS code %(ods_code)s",
params={"ods_code":ods_code}
params={"ods_code": ods_code},
)
else:
postcode = result["GeoLoc"]["Location"]["PostCode"]
Expand All @@ -69,15 +72,19 @@ async def _gp_details_from_ods_code(ods_code: str | None, async_client: AsyncCli
logger.warning(f"Error looking up GP practice by ODS code {err}")


async def _gp_details_from_postcode(gp_practice_postcode: str, async_client: AsyncClient) -> tuple[str, str] | None:
async def _gp_details_from_postcode(
gp_practice_postcode: str, async_client: AsyncClient
) -> tuple[str, str] | None:
try:
normalised_postcode = await validate_postcode(gp_practice_postcode, async_client)
normalised_postcode = await validate_postcode(
gp_practice_postcode, async_client
)
ods_code = await gp_ods_code_for_postcode(normalised_postcode, async_client)

if not ods_code:
raise ValidationError(
"Could not find GP practice with postcode %(postcode)s",
params={"postcode":gp_practice_postcode}
params={"postcode": gp_practice_postcode},
)
else:
return [ods_code, normalised_postcode]
Expand All @@ -86,7 +93,12 @@ async def _gp_details_from_postcode(gp_practice_postcode: str, async_client: Asy


# Run lookups to external APIs asynchronously to speed up CSV upload by processing patients in parallel
async def validate_patient_async(postcode: str, gp_practice_ods_code: str | None, gp_practice_postcode: str | None, async_client: AsyncClient) -> PatientExternalValidationResult:
async def validate_patient_async(
postcode: str,
gp_practice_ods_code: str | None,
gp_practice_postcode: str | None,
async_client: AsyncClient,
) -> PatientExternalValidationResult:
ret = PatientExternalValidationResult(None, None, None, None)

validate_postcode_task = _validate_postcode(postcode, async_client)
Expand All @@ -99,25 +111,32 @@ async def validate_patient_async(postcode: str, gp_practice_ods_code: str | None
else:
gp_details_task = asyncio.Future()
gp_details_task.set_result(None)

# This is the Python equivalent of Promise.allSettled
# Run all the lookups in parallel but retain exceptions per job rather than returning the first one
[postcode, index_of_multiple_deprivation_quintile, gp_details] = await asyncio.gather(
validate_postcode_task,
imd_for_postcode_task,
gp_details_task,
return_exceptions=True
[postcode, index_of_multiple_deprivation_quintile, gp_details] = (
await asyncio.gather(
validate_postcode_task,
imd_for_postcode_task,
gp_details_task,
return_exceptions=True,
)
)

if isinstance(postcode, Exception) and not type(postcode) is ValidationError:
raise postcode
else:
ret.postcode = postcode

if isinstance(index_of_multiple_deprivation_quintile, Exception) and not type(index_of_multiple_deprivation_quintile) is ValidationError:

if (
isinstance(index_of_multiple_deprivation_quintile, Exception)
and not type(index_of_multiple_deprivation_quintile) is ValidationError
):
raise index_of_multiple_deprivation_quintile
else:
ret.index_of_multiple_deprivation_quintile = index_of_multiple_deprivation_quintile
ret.index_of_multiple_deprivation_quintile = (
index_of_multiple_deprivation_quintile
)

if type(gp_details) is ValidationError:
if gp_practice_ods_code:
Expand All @@ -135,10 +154,15 @@ async def validate_patient_async(postcode: str, gp_practice_ods_code: str | None

return ret

def validate_patient_sync(postcode: str, gp_practice_ods_code: str | None, gp_practice_postcode: str | None) -> PatientExternalValidationResult:

def validate_patient_sync(
postcode: str, gp_practice_ods_code: str | None, gp_practice_postcode: str | None
) -> PatientExternalValidationResult:
async def wrapper():
async with AsyncClient() as client:
ret = await validate_patient_async(postcode, gp_practice_ods_code, gp_practice_postcode, client)
ret = await validate_patient_async(
postcode, gp_practice_ods_code, gp_practice_postcode, client
)
return ret

return async_to_sync(wrapper)()
return async_to_sync(wrapper)()
Loading