diff --git a/.gitignore b/.gitignore index aa182b88..acb6ce24 100644 --- a/.gitignore +++ b/.gitignore @@ -17,4 +17,7 @@ node_modules build_info.json media/submissions/csv/* -*.crt \ No newline at end of file +*.crt + +project/npda/dummy_sheets/local_generated_data/* +!project/npda/dummy_sheets/local_generated_data/.gitkeep diff --git a/envs/example.env b/envs/example.env index a71fba5d..9a2a15c1 100644 --- a/envs/example.env +++ b/envs/example.env @@ -10,7 +10,8 @@ NHS_ODS_API_URL_SUBSCRIPTION_KEY=########## NHS_SPINE_SERVICES_URL="https://uat.directory.spineservices.nhs.uk/ORD/2-0-0" -POSTCODE_API_BASE_URL="https://findthatpostcode.uk/" +POSTCODES_IO_API_URL="https://api.postcodes.io/" #RCPCH host their own instance of postcodes io: this it he opensource project +POSTCODES_IO_API_KEY="XXXXXX" # DJANGO POSTGRES DATABASE CONNECTION NPDA_POSTGRES_DB_HOST="postgis" diff --git a/project/constants/csv_headings.py b/project/constants/csv_headings.py index b7a14a31..20e2e32f 100644 --- a/project/constants/csv_headings.py +++ b/project/constants/csv_headings.py @@ -1,3 +1,5 @@ +import pandas as pd + CSV_HEADINGS = ( # Patient {"heading": "NHS Number", "model_field": "nhs_number", "model": "Patient"}, @@ -128,7 +130,7 @@ "model": "Visit", }, { - "heading": "At time of or following measurement of thyroid function, was the patient prescribed any thyroid treatment?", + "heading": "At time of, or following measurement of thyroid function, was the patient prescribed any thyroid treatment?", "model_field": "thyroid_treatment_status", "model": "Visit", }, @@ -282,3 +284,44 @@ ("hospital_admission_date", "Start date (Hospital Provider Spell)"), ("hospital_discharge_date", "Discharge date (Hospital provider spell)"), ] + +CSV_DATA_TYPES_MINUS_DATES = { + "NHS Number": "str", + "Postcode of usual address": "str", + "Stated gender": "Int8", + "Ethnic Category": "str", # choices are all capital letters + "Diabetes Type": "Int8", + "Reason for leaving service": "Int8", + "GP Practice Code": "str", + "PDU Number": "str", + "Patient Height (cm)": "float32", + "Patient Weight (kg)": "float32", + "Hba1c Value": "float32", + "HbA1c result format": "Int8", + "Diabetes Treatment at time of Hba1c measurement": "Int8", + "If treatment included insulin pump therapy (i.e. option 3 or 6 selected), was this part of a closed loop system?": "Int8", + "At the time of HbA1c measurement, in addition to standard blood glucose monitoring (SBGM), was the patient using any other method of glucose monitoring?": "str", + "Systolic Blood Pressure": "Int8", + "Diastolic Blood pressure": "Int8", + "Retinal Screening Result": "Int8", + "Urinary Albumin Level (ACR)": "float64", + "Albuminuria Stage": "Int8", + "Total Cholesterol Level (mmol/l)": "float64", + "At time of, or following measurement of thyroid function, was the patient prescribed any thyroid treatment?": "Int8", + "Has the patient been recommended a Gluten-free diet?": "Int8", + "Was the patient assessed as requiring additional psychological/CAMHS support outside of MDT clinics?": "Int8", + "Does the patient smoke?": "Int8", + "Was the patient offered an additional appointment with a paediatric dietitian?": "Int8", + "Was the patient using (or trained to use) blood ketone testing equipment at time of visit?": "Int8", + "Reason for admission": "Int8", + "Only complete if DKA selected in previous question: During this DKA admission did the patient receive any of the following therapies?": "Int8", + "Only complete if OTHER selected: Reason for admission (free text)": "str", +} + +NONNULL_FIELDS = [ + "NHS Number", + "Date of Birth", + "Diabetes Type", + "PDU Number", + "Visit/Appointment Date", +] diff --git a/project/constants/postcodes.py b/project/constants/postcodes.py index 83db87de..71c55ed3 100644 --- a/project/constants/postcodes.py +++ b/project/constants/postcodes.py @@ -1,6 +1,11 @@ """ Constants for 'unknown' postcodes These are Office for National Statistics (ONS) codes for where a postcode is not known +ZZ99 3VZ No fixed abode +ZZ99 3CZ England/U.K not otherwise specified +ZZ99 3GZ Wales not otherwise specified +ZZ99 1WZ Scotland not otherwise specified +ZZ99 2WZ Northern Ireland not otherwise specified """ UNKNOWN_POSTCODES_NO_SPACES = ["ZZ993CZ", "ZZ993GZ", "ZZ993WZ", "ZZ993VZ"] diff --git a/project/npda/dummy_sheets/local_generated_data/.gitkeep b/project/npda/dummy_sheets/local_generated_data/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/project/npda/dummy_sheets/npda_csv_submission_template_for_use_from_april_2021.csv b/project/npda/dummy_sheets/npda_csv_submission_template_for_use_from_april_2021.csv index 05ce275d..92f9002b 100644 --- a/project/npda/dummy_sheets/npda_csv_submission_template_for_use_from_april_2021.csv +++ b/project/npda/dummy_sheets/npda_csv_submission_template_for_use_from_april_2021.csv @@ -1 +1 @@ -NHS Number,Date of Birth,Postcode of usual address,Stated gender,Ethnic Category,Diabetes Type,Date of Diabetes Diagnosis,Date of leaving service,Reason for leaving service,Death Date,GP Practice Code,PDU Number,Visit/Appointment Date,Patient Height (cm),Patient Weight (kg),Observation Date (Height and weight),Hba1c Value,HbA1c result format,Observation Date: Hba1c Value,Diabetes Treatment at time of Hba1c measurement,"If treatment included insulin pump therapy (i.e. option 3 or 6 selected), was this part of a closed loop system?","At the time of HbA1c measurement, in addition to standard blood glucose monitoring (SBGM), was the patient using any other method of glucose monitoring?",Systolic Blood Pressure,Diastolic Blood pressure,Observation Date (Blood Pressure),Foot Assessment / Examination Date,Retinal Screening date,Retinal Screening Result,Urinary Albumin Level (ACR),Observation Date: Urinary Albumin Level,Albuminuria Stage,Total Cholesterol Level (mmol/l),Observation Date: Total Cholesterol Level,Observation Date: Thyroid Function ,"At time of, or following measurement of thyroid function, was the patient prescribed any thyroid treatment?",Observation Date: Coeliac Disease Screening,Has the patient been recommended a Gluten-free diet?,Observation Date - Psychological Screening Assessment,Was the patient assessed as requiring additional psychological/CAMHS support outside of MDT clinics?,Does the patient smoke?,Date of offer of referral to smoking cessation service (if patient is a current smoker),Date of Level 3 carbohydrate counting education received,Was the patient offered an additional appointment with a paediatric dietitian?,Date of additional appointment with dietitian,Was the patient using (or trained to use) blood ketone testing equipment at time of visit?,Date that influenza immunisation was recommended,Date of provision of advice ('sick-day rules') about managing diabetes during intercurrent illness or episodes of hyperglycaemia,Start date (Hospital Provider Spell),Discharge date (Hospital provider spell),Reason for admission,Only complete if DKA selected in previous question: During this DKA admission did the patient receive any of the following therapies?,Only complete if OTHER selected: Reason for admission (free text) +NHS Number,Date of Birth,Postcode of usual address,Stated gender,Ethnic Category,Diabetes Type,Date of Diabetes Diagnosis,Date of leaving service,Reason for leaving service,Death Date,GP Practice Code,PDU Number,Visit/Appointment Date,Patient Height (cm),Patient Weight (kg),Observation Date (Height and weight),Hba1c Value,HbA1c result format,Observation Date: Hba1c Value,Diabetes Treatment at time of Hba1c measurement,"If treatment included insulin pump therapy (i.e. option 3 or 6 selected), was this part of a closed loop system?","At the time of HbA1c measurement, in addition to standard blood glucose monitoring (SBGM), was the patient using any other method of glucose monitoring?",Systolic Blood Pressure,Diastolic Blood pressure,Observation Date (Blood Pressure),Foot Assessment / Examination Date,Retinal Screening date,Retinal Screening Result,Urinary Albumin Level (ACR),Observation Date: Urinary Albumin Level,Albuminuria Stage,Total Cholesterol Level (mmol/l),Observation Date: Total Cholesterol Level,Observation Date: Thyroid Function,"At time of, or following measurement of thyroid function, was the patient prescribed any thyroid treatment?",Observation Date: Coeliac Disease Screening,Has the patient been recommended a Gluten-free diet?,Observation Date - Psychological Screening Assessment,Was the patient assessed as requiring additional psychological/CAMHS support outside of MDT clinics?,Does the patient smoke?,Date of offer of referral to smoking cessation service (if patient is a current smoker),Date of Level 3 carbohydrate counting education received,Was the patient offered an additional appointment with a paediatric dietitian?,Date of additional appointment with dietitian,Was the patient using (or trained to use) blood ketone testing equipment at time of visit?,Date that influenza immunisation was recommended,Date of provision of advice ('sick-day rules') about managing diabetes during intercurrent illness or episodes of hyperglycaemia,Start date (Hospital Provider Spell),Discharge date (Hospital provider spell),Reason for admission,Only complete if DKA selected in previous question: During this DKA admission did the patient receive any of the following therapies?,Only complete if OTHER selected: Reason for admission (free text) diff --git a/project/npda/forms/external_patient_validators.py b/project/npda/forms/external_patient_validators.py index 29612911..9070cee0 100644 --- a/project/npda/forms/external_patient_validators.py +++ b/project/npda/forms/external_patient_validators.py @@ -7,10 +7,12 @@ from django.core.exceptions import ValidationError from httpx import HTTPError, AsyncClient -from ..general_functions import (gp_details_for_ods_code, - gp_ods_code_for_postcode, - validate_postcode, - imd_for_postcode) +from ..general_functions import ( + gp_details_for_ods_code, + gp_ods_code_for_postcode, + validate_postcode, + imd_for_postcode, +) logger = logging.getLogger(__name__) @@ -21,46 +23,47 @@ class PatientExternalValidationResult: postcode: str | ValidationError | None gp_practice_ods_code: str | ValidationError | None gp_practice_postcode: str | ValidationError | None - index_of_multiple_deprivation_quintile: str | None + index_of_multiple_deprivation_quintile: str | None -async def _validate_postcode(postcode: str | None, async_client: AsyncClient) -> str | None: +async def _validate_postcode( + postcode: str | None, async_client: AsyncClient +) -> str | None: if postcode: try: normalised_postcode = await validate_postcode(postcode, async_client) if not normalised_postcode: raise ValidationError( - "Invalid postcode %(postcode)s", params={"postcode":postcode} + "Invalid postcode %(postcode)s", params={"postcode": postcode} ) - return normalised_postcode except HTTPError as err: logger.warning(f"Error validating postcode {err}") -async def _imd_for_postcode(postcode: str | None, async_client: AsyncClient) -> str | None: +async def _imd_for_postcode( + postcode: str | None, async_client: AsyncClient +) -> str | None: if postcode: try: - imd = await imd_for_postcode( - postcode, async_client - ) + imd = await imd_for_postcode(postcode, async_client) return imd except HTTPError as err: - logger.warning( - f"Cannot calculate deprivation score for {postcode} {err}" - ) + logger.warning(f"Cannot calculate deprivation score for {postcode} {err}") -async def _gp_details_from_ods_code(ods_code: str | None, async_client: AsyncClient) -> tuple[str, str] | None: +async def _gp_details_from_ods_code( + ods_code: str | None, async_client: AsyncClient +) -> tuple[str, str] | None: try: result = await gp_details_for_ods_code(ods_code, async_client) if not result: raise ValidationError( "Could not find GP practice with ODS code %(ods_code)s", - params={"ods_code":ods_code} + params={"ods_code": ods_code}, ) else: postcode = result["GeoLoc"]["Location"]["PostCode"] @@ -69,15 +72,19 @@ async def _gp_details_from_ods_code(ods_code: str | None, async_client: AsyncCli logger.warning(f"Error looking up GP practice by ODS code {err}") -async def _gp_details_from_postcode(gp_practice_postcode: str, async_client: AsyncClient) -> tuple[str, str] | None: +async def _gp_details_from_postcode( + gp_practice_postcode: str, async_client: AsyncClient +) -> tuple[str, str] | None: try: - normalised_postcode = await validate_postcode(gp_practice_postcode, async_client) + normalised_postcode = await validate_postcode( + gp_practice_postcode, async_client + ) ods_code = await gp_ods_code_for_postcode(normalised_postcode, async_client) if not ods_code: raise ValidationError( "Could not find GP practice with postcode %(postcode)s", - params={"postcode":gp_practice_postcode} + params={"postcode": gp_practice_postcode}, ) else: return [ods_code, normalised_postcode] @@ -86,7 +93,12 @@ async def _gp_details_from_postcode(gp_practice_postcode: str, async_client: Asy # Run lookups to external APIs asynchronously to speed up CSV upload by processing patients in parallel -async def validate_patient_async(postcode: str, gp_practice_ods_code: str | None, gp_practice_postcode: str | None, async_client: AsyncClient) -> PatientExternalValidationResult: +async def validate_patient_async( + postcode: str, + gp_practice_ods_code: str | None, + gp_practice_postcode: str | None, + async_client: AsyncClient, +) -> PatientExternalValidationResult: ret = PatientExternalValidationResult(None, None, None, None) validate_postcode_task = _validate_postcode(postcode, async_client) @@ -99,25 +111,32 @@ async def validate_patient_async(postcode: str, gp_practice_ods_code: str | None else: gp_details_task = asyncio.Future() gp_details_task.set_result(None) - + # This is the Python equivalent of Promise.allSettled # Run all the lookups in parallel but retain exceptions per job rather than returning the first one - [postcode, index_of_multiple_deprivation_quintile, gp_details] = await asyncio.gather( - validate_postcode_task, - imd_for_postcode_task, - gp_details_task, - return_exceptions=True + [postcode, index_of_multiple_deprivation_quintile, gp_details] = ( + await asyncio.gather( + validate_postcode_task, + imd_for_postcode_task, + gp_details_task, + return_exceptions=True, + ) ) if isinstance(postcode, Exception) and not type(postcode) is ValidationError: raise postcode else: ret.postcode = postcode - - if isinstance(index_of_multiple_deprivation_quintile, Exception) and not type(index_of_multiple_deprivation_quintile) is ValidationError: + + if ( + isinstance(index_of_multiple_deprivation_quintile, Exception) + and not type(index_of_multiple_deprivation_quintile) is ValidationError + ): raise index_of_multiple_deprivation_quintile else: - ret.index_of_multiple_deprivation_quintile = index_of_multiple_deprivation_quintile + ret.index_of_multiple_deprivation_quintile = ( + index_of_multiple_deprivation_quintile + ) if type(gp_details) is ValidationError: if gp_practice_ods_code: @@ -135,10 +154,15 @@ async def validate_patient_async(postcode: str, gp_practice_ods_code: str | None return ret -def validate_patient_sync(postcode: str, gp_practice_ods_code: str | None, gp_practice_postcode: str | None) -> PatientExternalValidationResult: + +def validate_patient_sync( + postcode: str, gp_practice_ods_code: str | None, gp_practice_postcode: str | None +) -> PatientExternalValidationResult: async def wrapper(): async with AsyncClient() as client: - ret = await validate_patient_async(postcode, gp_practice_ods_code, gp_practice_postcode, client) + ret = await validate_patient_async( + postcode, gp_practice_ods_code, gp_practice_postcode, client + ) return ret - return async_to_sync(wrapper)() \ No newline at end of file + return async_to_sync(wrapper)() diff --git a/project/npda/general_functions/csv_upload.py b/project/npda/general_functions/csv_upload.py index a72d2b33..738931be 100644 --- a/project/npda/general_functions/csv_upload.py +++ b/project/npda/general_functions/csv_upload.py @@ -15,9 +15,7 @@ import httpx # RCPCH imports -from ...constants import ( - ALL_DATES, -) +from ...constants import ALL_DATES, CSV_DATA_TYPES_MINUS_DATES, NONNULL_FIELDS # Logging setup logger = logging.getLogger(__name__) @@ -27,14 +25,26 @@ def read_csv(csv_file): - df = pd.read_csv(csv_file) + """ + Read the csv file and return a pandas dataframe + Assigns the correct data types to the columns + Parses the dates in the columns to the correct format + """ + # Parse the dates in the columns to the correct format first + df = pd.read_csv(csv_file) # Remove leading and trailing whitespace on column names # The template published on the RCPCH website has trailing spaces on 'Observation Date: Thyroid Function ' df.columns = df.columns.str.strip() for column in ALL_DATES: - df[column] = pd.to_datetime(df[column], format="%d/%m/%Y") + if column in df.columns: + df[column] = pd.to_datetime(df[column], format="%d/%m/%Y", errors="coerce") + + # Apply the dtype to non-date columns + for column, dtype in CSV_DATA_TYPES_MINUS_DATES.items(): + df[column] = df[column].astype(dtype) + df[column] = df[column].where(pd.notnull(df[column]), 0) return df @@ -42,7 +52,8 @@ def read_csv(csv_file): async def csv_upload(user, dataframe, csv_file, pdu_pz_code): """ Processes standardised NPDA csv file and persists results in NPDA tables - Returns the empty dict if successful, otherwise ValidationErrors indexed by the row they occurred at + Returns the empty dict if successful, otherwise ValidationErrors indexed by the row they occurred at + Also return the dataframe for later summary purposes """ Patient = apps.get_model("npda", "Patient") Transfer = apps.get_model("npda", "Transfer") @@ -81,12 +92,14 @@ async def csv_upload(user, dataframe, csv_file, pdu_pz_code): if csv_file: # save the csv file with a custom name - new_filename = f"{pdu.pz_code}_{timezone.now().strftime('%Y%m%d_%H%M%S')}.csv" + new_filename = ( + f"{pdu.pz_code}_{timezone.now().strftime('%Y%m%d_%H%M%S')}.csv" + ) # save=False so it doesn't try to save the parent, which would cause an error in an async context # we save immediately after this anyway new_submission.csv_file.save(new_filename, csv_file, save=False) - + await new_submission.asave() except Exception as e: @@ -101,7 +114,9 @@ async def csv_upload(user, dataframe, csv_file, pdu_pz_code): # now can delete all patients and visits from the previous active submission if original_submission: try: - original_submission_patient_count = await Patient.objects.filter(submissions=original_submission).acount() + original_submission_patient_count = await Patient.objects.filter( + submissions=original_submission + ).acount() print( f"Deleting patients from previous submission: {original_submission_patient_count}" ) @@ -139,9 +154,9 @@ def csv_value_to_model_value(model_field, value): if isinstance(value, pd.Timestamp): return value.to_pydatetime().date() - if model_field.choices: - # If the model field has choices, we need to convert the value to the correct type otherwise 1, 2 will be saved as booleans - return model_field.to_python(value) + # if model_field.choices: + # # If the model field has choices, we need to convert the value to the correct type otherwise 1, 2 will be saved as booleans + # return model_field.to_python(value) return value @@ -180,13 +195,13 @@ async def validate_patient_using_form(row, async_client): "death_date": "Death Date", }, ) - + form = PatientForm(fields) form.async_validation_results = await validate_patient_async( - postcode=fields["postcode"], - gp_practice_ods_code=fields["gp_practice_ods_code"], - gp_practice_postcode=None, - async_client=async_client + postcode=fields["postcode"], + gp_practice_ods_code=fields["gp_practice_ods_code"], + gp_practice_postcode=None, + async_client=async_client, ) return form @@ -250,7 +265,10 @@ async def validate_rows(rows, async_client): patient_form = await validate_patient_using_form(first_row, async_client) visits = rows.apply( - lambda row: (validate_visit_using_form(patient_form.instance, row), row["row_index"]), + lambda row: ( + validate_visit_using_form(patient_form.instance, row), + row["row_index"], + ), axis=1, ) @@ -292,9 +310,16 @@ async def validate_rows_in_parallel(rows_by_patient, async_client): errors_to_return = collections.defaultdict(lambda: collections.defaultdict(list)) async with httpx.AsyncClient() as async_client: - validation_results_by_patient = await validate_rows_in_parallel(visits_by_patient, async_client) + validation_results_by_patient = await validate_rows_in_parallel( + visits_by_patient, async_client + ) - for (patient_form, transfer_fields, patient_row_index, visits) in validation_results_by_patient: + for ( + patient_form, + transfer_fields, + patient_row_index, + visits, + ) in validation_results_by_patient: for field, error in patient_form.errors.as_data().items(): errors_to_return[patient_row_index][field].append(error) @@ -302,7 +327,9 @@ async def validate_rows_in_parallel(rows_by_patient, async_client): patient = create_instance(Patient, patient_form) # We don't call PatientForm.save as there's no async version so we have to set this manually - patient.index_of_multiple_deprivation_quintile = patient_form.async_validation_results.index_of_multiple_deprivation_quintile + patient.index_of_multiple_deprivation_quintile = ( + patient_form.async_validation_results.index_of_multiple_deprivation_quintile + ) await patient.asave() @@ -316,7 +343,7 @@ async def validate_rows_in_parallel(rows_by_patient, async_client): # We don't know what field caused the error so add to __all__ errors_to_return[patient_row_index]["__all__"].append(error) - for (visit_form, visit_row_index) in visits: + for visit_form, visit_row_index in visits: for field, error in visit_form.errors.as_data().items(): errors_to_return[visit_row_index][field].append(error) @@ -326,5 +353,5 @@ async def validate_rows_in_parallel(rows_by_patient, async_client): await visit.asave() except Exception as error: errors_to_return[visit_row_index]["__all__"].append(error) - + return errors_to_return diff --git a/project/npda/general_functions/data_generator_extended.py b/project/npda/general_functions/data_generator_extended.py index 51955f39..e56fc3cd 100644 --- a/project/npda/general_functions/data_generator_extended.py +++ b/project/npda/general_functions/data_generator_extended.py @@ -28,6 +28,7 @@ SMOKING_STATUS, HOSPITAL_ADMISSION_REASONS, DKA_ADDITIONAL_THERAPIES, + CLOSED_LOOP_TYPES, ) @@ -147,13 +148,9 @@ def build_fake_visits( # q1 visit_types[0:n_visits_per_quarter], # q2 - visit_types[ - n_visits_per_quarter : n_visits_per_quarter * 2 - ], + visit_types[n_visits_per_quarter : n_visits_per_quarter * 2], # q3 - visit_types[ - n_visits_per_quarter * 2 : n_visits_per_quarter * 3 - ], + visit_types[n_visits_per_quarter * 2 : n_visits_per_quarter * 3], # q4 - all remaining visit_types[n_visits_per_quarter * 3 :], ] @@ -170,9 +167,7 @@ def build_fake_visits( # For each visit, randomly assign a date within quarter for visit_type in visits_in_q: - visit_date = get_random_date( - quarter_start_date, quarter_end_date - ) + visit_date = get_random_date(quarter_start_date, quarter_end_date) # Get the correct kwarg measurements for the visit type # These will be fed into this VisitFactory's.build() call @@ -306,9 +301,7 @@ def _clinic_measures( - BP """ height, weight, height_weight_observation_date = ( - self._height_weight_observations( - age_range=age_range, visit_date=visit_date - ) + self._height_weight_observations(age_range=age_range, visit_date=visit_date) ) hba1c, hba1c_format, hba1c_date = self._hba1c_observations( @@ -378,23 +371,23 @@ def _annual_review_measures(self, visit_date: date): albumin_creatinine_ratio_date, albuminuria_stage, ) = self._acr_observations(visit_date=visit_date) - total_cholesterol, total_cholesterol_date = ( - self._cholesterol_observations(visit_date=visit_date) + total_cholesterol, total_cholesterol_date = self._cholesterol_observations( + visit_date=visit_date ) - thyroid_function_date, thyroid_treatment_status = ( - self._thyroid_observations(visit_date=visit_date) + thyroid_function_date, thyroid_treatment_status = self._thyroid_observations( + visit_date=visit_date ) coeliac_screen_date, gluten_free_diet = self._coeliac_observations( visit_date=visit_date ) - smoking_status, smoking_cessation_referral_date = ( - self._smoking_observations(visit_date=visit_date) + smoking_status, smoking_cessation_referral_date = self._smoking_observations( + visit_date=visit_date ) carbohydrate_counting_level_three_education_date = ( self._carbohydrate_counting_observations(visit_date=visit_date) ) - flu_immunisation_recommended_date = ( - self._flu_immunisation_observations(visit_date=visit_date) + flu_immunisation_recommended_date = self._flu_immunisation_observations( + visit_date=visit_date ) ketone_meter_training = self._ketone_meter_observations() sick_day_rules_training_date = self._sick_day_rules_observations( @@ -431,9 +424,7 @@ def _dietician_observations(self, visit_date: date): dietician_additional_appointment_offered: int dietician_additional_appointment_date: date """ - dietician_additional_appointment_offered = random.choice( - YES_NO_UNKNOWN - )[0] + dietician_additional_appointment_offered = random.choice(YES_NO_UNKNOWN)[0] dietician_additional_appointment_date = visit_date return { "dietician_additional_appointment_offered": dietician_additional_appointment_offered, @@ -450,9 +441,7 @@ def _psychological_observations(self, visit_date: date): psychological_additional_support_status: int """ psychological_screening_assessment_date = visit_date - psychological_additional_support_status = random.choice( - YES_NO_UNKNOWN - )[0] + psychological_additional_support_status = random.choice(YES_NO_UNKNOWN)[0] return { "psychological_screening_assessment_date": psychological_screening_assessment_date, "psychological_additional_support_status": psychological_additional_support_status, @@ -470,9 +459,7 @@ def _hospital_admission_observations( """ hospital_admission_date = visit_date hospital_discharge_date = visit_date - hospital_admission_reason = random.choice(HOSPITAL_ADMISSION_REASONS)[ - 0 - ] + hospital_admission_reason = random.choice(HOSPITAL_ADMISSION_REASONS)[0] dka_additional_therapies = random.choice(DKA_ADDITIONAL_THERAPIES)[0] hospital_admission_other = None return { @@ -506,8 +493,8 @@ def _height_weight_observations( AgeRange.AGE_20_25: (170, 190, 60, 90), } - height_min, height_max, weight_min, weight_max = ( - height_weight_observations.get(AgeRange(age_range.value)) + height_min, height_max, weight_min, weight_max = height_weight_observations.get( + AgeRange(age_range.value) ) height = round(random.uniform(height_min, height_max), 2) @@ -570,16 +557,14 @@ def _treatment_observations(self, diabetes_type: int): closed_loop_system: """ if diabetes_type == 1: - treatment = random.choice(TREATMENT_TYPES[0:6])[ - 0 - ] # MDI or pump options + treatment = random.choice(TREATMENT_TYPES[0:6])[0] # MDI or pump options else: treatment = random.choice( [1, 2, 4, 5, 7, 8, 9] ) # insulin or non-insulin options compatible with type 2 diabetes if diabetes_type == 1: - closed_loop_system = random.choice([True, False]) + closed_loop_system = random.choice(CLOSED_LOOP_TYPES)[0] else: closed_loop_system = YES_NO_UNKNOWN[0][0] # No diff --git a/project/npda/management/commands/create_csv.py b/project/npda/management/commands/create_csv.py new file mode 100644 index 00000000..49e3e8c9 --- /dev/null +++ b/project/npda/management/commands/create_csv.py @@ -0,0 +1,642 @@ +"""TODO: + - [ ] Move constants to a separate file. Currently importing from `seed_submission.py`. + - [ ] Generalise the parsing of inputs and share between this and `seed_submission.py`. + +Generate a CSV using data generator. + +Default behavior of data generator is creating VALID visits - CSV. + +Example use: + + python manage.py create_csv \ + --pts=5 \ + --visits="CDCD DHPC ACDC CDCD" \ + --hb_target=T \ + --age_range=11_15 + + Will generate 1 csv file with 5 patients, each with 12 visits, with the visit encoding provided. + The HbA1c target range for each visit will be set to 'TARGET'. + The resulting csv will have 5 * 12 = 60 rows (one for each visit). + + ## Building multiple larger csv files + + This can be used to create a spread of data with different ages, visits, hb_targets etc. + + Using the `--build` flag will generate a `build` csv file, the same as above, but with + a `build_` filename prefix. The `--coalesce` flag can be used to combine all the build files + into a single csv file. + + python manage.py create_csv \ + --pts=5 \ + --visits="CDCD DHPC ACDC CDCD" \ + --hb_target=T \ + --age_range=11_15 \ + --build \ + && python manage.py create_csv \ + --pts=5 \ + --visits="CDCCD DDCC CACC" \ + --hb_target=A \ + --age_range=16_19 \ + --build \ + && python manage.py create_csv \ + --pts=5 \ + --visits="CDC ACDC CDCD" \ + --hb_target=T \ + --age_range=0_4 \ + --build \ + && python manage.py create_csv \ + --coalesce + + + Options: + + --pts (int, required): + The number of pts to seed for this csv file. (NOTE: resulting rows will be pts * visits) + + --visits (str, required): + A string encoding the VisitTypes each patient should have. Use + visit type abbreviations. Can use whitespace (ignored) + (e.g., "CDCD DHPC ACDC CDCD"). + Each patient will have associated Visits in the sequence provided, + evenly spread throughout the audit year's quarters, randomly within + each quarter. + + Visit type options: + - C (CLINIC) + - A (ANNUAL_REVIEW) + - D (DIETICIAN) + - P (PSYCHOLOGY) + - H (HOSPITAL_ADMISSION) + + --hb_target (str, required): + Character setting for HbA1c target range per visit: + - T (TARGET) + - A (ABOVE) + - W (WELL_ABOVE) + + --age_range (str, optional): + The possible age range for the patients to be seeded. + Defaults to 11_15. + - 0_4 + - 5_10 + - 11_15 + - 16_19 + - 20_25 + + --submission_date (str, optional): + The submission date in YYYY-MM-DD format. Defaults to today. This + date is used to set the audit period's start and end dates, and visit + values e.g. diabetes diagnosis date. + + --output_path (str, optional): + Path to save the csv. Defaults to `project/npda/dummy_sheets/local_generated_data`. + +Implementation notes: + + We can use the `FakePatientCreator`'s `.build()` methods to generate Python object stubs of Patients and Visits. We then use pandas to concatenate these values into the csv. + + Factory `.build()` will not create related objects (but is significantly quicker). At the end, + need to additionally add the `Transfer` column values manually. +""" + +from collections import defaultdict +from datetime import datetime +import os +import random +import sys + +from django.utils import timezone +from django.core.management.base import BaseCommand +import pandas as pd + +from project.constants.csv_headings import ALL_DATES, CSV_HEADINGS +from project.npda.general_functions.audit_period import ( + get_audit_period_for_date, +) +from project.npda.general_functions.data_generator_extended import ( + AgeRange, + FakePatientCreator, + HbA1cTargetRange, +) +from project.npda.management.commands.seed_submission import ( + letter_name_map, + hb_target_map, + age_range_map, + CYAN, + RESET, + GREEN, +) + +PZ_CODE = "PZ999" +GP_ODS_CODES = [ + "A81001", + "A81002", + "A81004", + "A81005", + "A81006", + "A81007", + "A81009", + "A81011", + "A81012", + "A81013", + "A81014", + "A81016", + "A81017", + "A81018", + "A81019", + "A81020", + "A81021", + "A81022", + "A81023", + "A81025", +] +TEMPLATE_HEADERS = pd.read_csv( + "project/npda/dummy_sheets/npda_csv_submission_template_for_use_from_april_2021.csv" +).columns + + +class Command(BaseCommand): + help = "Creates a csv file that can be uploaded to the NPDA platform." + + def print_success(self, message: str): + self.stdout.write(self.style.SUCCESS(message)) + + def print_info(self, message: str): + self.stdout.write(self.style.WARNING(message)) + + def print_error(self, message: str): + self.stdout.write(self.style.ERROR(f"ERROR: {message}")) + + def add_arguments(self, parser): + # Primary parser for standard arguments + parser.add_argument( + "--pts", + type=int, + help="Number of patients to seed.", + required="--coalesce" + not in sys.argv, # Set required only if --coalesce is not used + ) + parser.add_argument( + "--visits", + type=str, + help="Visit types (e.g., 'CDCD DHPC ACDC CDCD'). Can have whitespaces, these will be ignored.", + required="--coalesce" + not in sys.argv, # Set required only if --coalesce is not used + ) + parser.add_argument( + "--hb_target", + type=str, + choices=["T", "A", "W"], + help="HBA1C Target range for visit seeding.", + required="--coalesce" + not in sys.argv, # Set required only if --coalesce is not used + ) + parser.add_argument( + "--submission_date", + type=str, + help="Submission date in YYYY-MM-DD format (optional, defaults to today).", + ) + parser.add_argument( + "--output_path", + type=str, + help="Path to save the csv", + default="project/npda/dummy_sheets/local_generated_data", + ) + parser.add_argument( + "--age_range", + type=str, + default="11_15", + choices=["0_4", "5_10", "11_15", "16_19", "20_25"], + help="Age range for patients to be seeded.", + ) + + # Mutually exclusive group for --build and --coalesce + mutex_group = parser.add_mutually_exclusive_group() + mutex_group.add_argument( + "--build", + action="store_true", + help="Outputs a build csv file.", + ) + mutex_group.add_argument( + "--coalesce", + action="store_true", + help="Coalesces build csv files.", + ) + + def handle(self, *args, **options): + + # If --coalesce is provided, ignore other options + if options["coalesce"]: + # Only coalesce, ignoring all other arguments + self._run_coalesce(**options) + return + + if not (parsed_values := self._parse_values_from_options(**options)): + return + + audit_start_date = parsed_values["audit_start_date"] + audit_end_date = parsed_values["audit_end_date"] + n_pts_to_seed = parsed_values["n_pts_to_seed"] + hba1c_target = parsed_values["hba1c_target"] + visits = parsed_values["visits"] + visit_types = parsed_values["visit_types"] + submission_date = parsed_values["submission_date"] + age_range = parsed_values["age_range"] + output_path = parsed_values["output_path"] + build_flag = parsed_values["build_flag"] + + # PRINT INFORMATION + # Header + self.print_info(f"{CYAN}--- Build Information ---{RESET}\n") + + # Build information table + build_info = [ + ["Build Mode", "ON" if build_flag else "OFF"], + ["Submission Date", submission_date], + ["Audit Start Date", audit_start_date], + ["Audit End Date", audit_end_date], + ] + for item in build_info: + self.print_info(f"{CYAN}{item[0]:<30}{RESET} {item[1]}") + + # Seeding information table + seeding_info = [ + ["Number of Patients to Seed", n_pts_to_seed], + ["Number of Visits per Patient", len(visit_types)], + ["Total Rows in Resulting CSV", n_pts_to_seed * len(visit_types)], + ["HbA1c Target Range", hba1c_target.name], + ["Age Range", f"{age_range.name}"], + ] + self.print_info("-" * 45) + for item in seeding_info: + self.print_info(f"{CYAN}{item[0]:<30}{RESET} {item[1]}") + # Visit types table + + self.print_info(f"\n--- Visit Types Provided ---\n") + + # Divide the list into chunks of 4 for a compact table + visit_types_chunks = [ + visit_types[i : i + 4] for i in range(0, len(visit_types), 4) + ] + for chunk in visit_types_chunks: + self.print_info( + " ".join(f"{CYAN}{visit}{RESET}" for visit in chunk) + ) + print() + + self.generate_csv( + audit_start_date, + audit_end_date, + n_pts_to_seed, + age_range, + hba1c_target, + visits, + visit_types, + output_path, + build_flag, + ) + self.print_success( + f"✨ CSV generated successfully at {self.csv_name}.\n" + ) + if build_flag: + self.print_info( + f"Coalesce the build csv files using the --coalesce flag." + ) + + def generate_csv( + self, + audit_start_date, + audit_end_date, + n_pts_to_seed, + age_range, + hba1c_target, + visits, + visit_types, + output_path, + build_flag, + ): + + # Start csv logic + + # First initialise FakePatientCreator object + fake_patient_creator = FakePatientCreator( + audit_start_date=audit_start_date, + audit_end_date=audit_end_date, + ) + + # Build pt stubs + new_pts = fake_patient_creator.build_fake_patients( + n=n_pts_to_seed, + age_range=age_range, + ) + + # For each pt, add visits + new_visits = fake_patient_creator.build_fake_visits( + patients=new_pts, + age_range=age_range, + hb1ac_target_range=hba1c_target, + visit_types=visit_types, + ) + + # `CSV_HEADINGS` is a tuple for csv headings and model fields + # Create a map = { + # model : { + # model_field : csv_heading + # } + # } + csv_map = self._get_map_model_csv_heading_field() + + # Initialise data list, where each item is a dict relating to a row in the csv + # Each dict will have keys as csv headings and values as the data + data = [] + + # We're using the build method so Patients and Visits are separate objects + # Need to manually iterate and join the data + N_VISIT_TYPES = len(visit_types) + for ix, pt in enumerate(new_pts): + gp_ods_code = random.choice(GP_ODS_CODES) + visit_start_idx = ix * N_VISIT_TYPES + visit_end_idx = visit_start_idx + N_VISIT_TYPES + for visit in new_visits[visit_start_idx:visit_end_idx]: + visit_dict = {} + + for model, field_heading_mappings in csv_map.items(): + for ( + model_field, + csv_heading, + ) in field_heading_mappings.items(): + if model == "Visit": + visit_dict[csv_heading] = getattr( + visit, model_field + ) + elif model == "Patient": + # Foreign key so need to manually set the value + if model_field == "pdu": + visit_dict[csv_heading] = PZ_CODE + continue + if model_field == "gp_ods_code": + visit_dict[csv_heading] = gp_ods_code + continue + + visit_dict[csv_heading] = getattr(pt, model_field) + + # date of leaving service + # & reason for leaving service. Ignore for now + elif model == "Transfer": + visit_dict[csv_heading] = None + + data.append(visit_dict) + + df = self._set_valid_dtypes(pd.DataFrame(data)) + + self.csv_name = self._get_file_name( + n_pts_to_seed=n_pts_to_seed, + visits=visits, + build=build_flag, + output_path=output_path, + age_range=age_range, + hb_target=hba1c_target, + ) + df.to_csv( + self.csv_name, + index=False, + ) + + def _run_coalesce(self, **options): + self.print_info("Coalescing build csv files...") + + # Get the existing build files + existing_build_files = [ + f + for f in os.listdir(options["output_path"]) + if f.startswith("build") + ] + if not existing_build_files: + self.print_error( + f"No build files to coalesce in {options['output_path']}/" + ) + return + self.print_info(f"{CYAN}Existing build files: {RESET}\n") + for file in existing_build_files: + self.print_info(f"\t{file}") + print() + # Coalesce the build files + + # First read all the files into a list of dataframes + dfs = [] + for file in existing_build_files: + dfs.append(pd.read_csv(os.path.join(options["output_path"], file))) + + # Make sure the columns are the same + seen_cols = set(dfs[0].columns) + for i, df in enumerate(dfs[1:], 1): + if set(df.columns) != seen_cols: + self.print_error( + f"Column mismatch in {existing_build_files[i]} compared to {existing_build_files[0]}" + ) + + # Concatenate the dataframes + df = pd.concat(dfs, axis=0, join="outer").reset_index(drop=True) + df = self._set_valid_dtypes(df) + + df.info() + + csv_file_name = ( + f"coalesced_{datetime.now().strftime('%Y%m%d%H%M%S')}.csv" + ) + full_csv_path = os.path.join(options["output_path"], csv_file_name) + df.to_csv( + full_csv_path, + index=False, + ) + + self.print_success( + f"\n✨ CSV coalesced successfully at {full_csv_path}.\n" + ) + + # PRINT OUT DIFFERENCE IN DATA TYPES + comparison_csv = "dummy_sheet_invalid.csv" + orig = pd.read_csv(f"project/npda/dummy_sheets/{comparison_csv}") + # Get data types for both DataFrames + orig_dtypes = orig.dtypes + new_dtypes = df.dtypes + # Identify columns with differing data types + mismatched_dtypes = {} + for col in orig_dtypes.index: + if col in new_dtypes.index and orig_dtypes[col] != new_dtypes[col]: + mismatched_dtypes[col] = (orig_dtypes[col], new_dtypes[col]) + + # Print out mismatched columns and their respective data types + self.print_error( + f"Columns with differing data types from {comparison_csv}:" + ) + self.print_info("NOTE: columns with Nan are cast to float") + for col, (orig_type, new_type) in mismatched_dtypes.items(): + print( + f"{col}: original type = {GREEN}{orig_type}{RESET}, coalesced type = {CYAN}{new_type}{RESET}" + ) + print( + f"Value in original: {GREEN}{orig[col].iloc[0]}{RESET}, value in coalesced: {CYAN}{df[col].iloc[0]}{RESET}\n" + ) + + return + + def _set_valid_dtypes(self, df: pd.DataFrame) -> pd.DataFrame: + """Sets the correct data types for the dataframe, making them same as original + dummy_sheet_invalid.csv file (to ensure we handle errors). + """ + + df = ( + df.assign( + # Convert each date column in ALL_DATES to the desired format, preserving null values as NaT + **{ + date_header: lambda x, date_header=date_header: pd.to_datetime( + x[date_header], format="%d/%m/%Y", errors="coerce" + ) + for date_header in ALL_DATES + }, + **{ + # Convert mismatched columns to the correct data type + "NHS Number": lambda x: x["NHS Number"].astype(str), + + }, + ) + # Reorder columns + [TEMPLATE_HEADERS] + ) + + # Ensure the formatting is right for validation + for date_header in ALL_DATES: + df[date_header] = df[date_header].dt.strftime("%d/%m/%Y") + + return df + + def _parse_values_from_options(self, **options): + + # Handle submission_date with default to today's date if not provided + submission_date_str = options.get("submission_date") + if submission_date_str: + try: + submission_date = timezone.make_aware( + datetime.strptime(submission_date_str, "%Y-%m-%d") + ).date() + except ValueError: + self.print_error( + "Invalid submission_date format. Use YYYY-MM-DD." + ) + return + else: + submission_date = timezone.now().date() + + audit_start_date, audit_end_date = get_audit_period_for_date( + submission_date + ) + + # Number of patients to seed (pts) + n_pts_to_seed = options["pts"] + + # Visit types + visits: str = options["visits"] + # Map to actual VisitType + # NOTE: `_map_visit_type_letters_to_names` already did some basic validation + visit_types = list( + map( + lambda letter: letter_name_map[letter], + visits.replace(" ", ""), + ) + ) + + # hba1c target + hba1c_target = hb_target_map[options["hb_target"]] + + # age range + age_range = age_range_map[options["age_range"]] + + # output path + output_path = options["output_path"] + + # flags + build_flag = options["build"] + + return { + "n_pts_to_seed": n_pts_to_seed, + "audit_start_date": audit_start_date, + "audit_end_date": audit_end_date, + "hba1c_target": hba1c_target, + "visits": visits, + "visit_types": visit_types, + "submission_date": submission_date, + "output_path": output_path, + "age_range": age_range, + "build_flag": build_flag, + } + + def _get_file_name( + self, + n_pts_to_seed: str, + visits: str, + output_path: str, + age_range: AgeRange, + hb_target: HbA1cTargetRange, + build: bool = False, + ) -> str: + + building_str = "" + if build: + # First count the number of existing files to use this as filename prefix + existing_files = [ + f for f in os.listdir(output_path) if f.startswith("build") + ] + + # Set the building string filename prefix + building_str = f"build__{len(existing_files) + 1}_" + + output_path = os.path.join( + output_path, + f"{building_str}{datetime.now().strftime("%Y%m%d%H%M%S")}-npda-seed-data-{n_pts_to_seed}pts-{age_range.name}-{hb_target.name}-{visits.replace(' ', '')}.csv", + ) + return output_path + + def _map_visit_type_letters_to_names(self, vt_letters: str) -> str: + rendered_vt_names: list[str] = [] + + for letter in vt_letters: + if letter == " ": + rendered_vt_names.append("\n\t") + continue + if letter.upper() not in "CADPH": + self.print_error("INVALID VISIT TYPE LETTER: " + letter) + + rendered_vt_names.append(f"\n\t{letter_name_map[letter]}") + + return "".join(rendered_vt_names) + + def _get_map_model_csv_heading_field(self) -> dict: + """Generates dict that looks like: + + { + 'Patient': { + 'nhs_number': 'NHS Number', + 'date_of_birth': 'Date of Birth', + ... + }, + 'Visit': { + 'visit_type': 'Visit Type', + 'visit_date': 'Visit/Appointment Date', + ... + }, + 'Transfer': { + 'date_leaving_service': 'Date Leaving Service', + ... + } + """ + + map_model_csv_heading_field = defaultdict(dict) + + for item in CSV_HEADINGS: + model = item["model"] + csv_heading = item["heading"] + model_field = item["model_field"] + map_model_csv_heading_field[model][model_field] = csv_heading + + return map_model_csv_heading_field diff --git a/project/npda/management/commands/seed_submission.py b/project/npda/management/commands/seed_submission.py index 3a7affdb..40d8f52a 100644 --- a/project/npda/management/commands/seed_submission.py +++ b/project/npda/management/commands/seed_submission.py @@ -38,6 +38,16 @@ - T (TARGET) - A (ABOVE) - W (WELL_ABOVE) + + --age_range (str, optional): + The possible age range for the patients to be seeded. + Defaults to 11_15. + - 0_4 + - 5_10 + - 11_15 + - 16_19 + - 20_25 + --user_pk (int, optional): The primary key of the user for whom the submission is created. Defaults to the seeded SuperuserAda. Note that Submission.pdu is set @@ -87,11 +97,18 @@ "A": HbA1cTargetRange.ABOVE, "W": HbA1cTargetRange.WELL_ABOVE, } +age_range_map = { + "0_4": AgeRange.AGE_0_4, + "5_10": AgeRange.AGE_5_10, + "11_15": AgeRange.AGE_11_15, + "16_19": AgeRange.AGE_16_19, + "20_25": AgeRange.AGE_20_25, +} # ANSI Colour Codes CYAN = "\033[96m" RESET = "\033[0m" - +GREEN = "\033[92m" class Command(BaseCommand): help = "Seeds submission with specific user, submission date, number of patients, and visit types." @@ -136,6 +153,13 @@ def add_arguments(self, parser): choices=["T", "A", "W"], help="HBA1C Target range for visit seeding.", ) + parser.add_argument( + "--age_range", + type=str, + default="11_15", + choices=["0_4", "5_10", "11_15", "16_19", "20_25"], + help="Age range for patients to be seeded.", + ) def handle(self, *args, **options): @@ -151,6 +175,7 @@ def handle(self, *args, **options): user_pk = parsed_values["user_pk"] submission_by = parsed_values["submission_by"] submission_date = parsed_values["submission_date"] + age_range = age_range_map[options["age_range"]] # Associate submission's PDU with user primary_pdu_for_user = ( @@ -183,6 +208,9 @@ def handle(self, *args, **options): ).split("\n") ) self.print_info(f"Visit types provided:\n {formatted_visits}\n") + # Now create the submission + self.print_info(f"HbA1c target: {CYAN}{hba1c_target.name}{RESET}\n") + self.print_info(f"Age range: {CYAN}{age_range.name}{RESET}\n") # Start seeding logic @@ -193,14 +221,13 @@ def handle(self, *args, **options): ) new_pts = fake_patient_creator.create_and_save_fake_patients( n=n_pts_to_seed, - age_range=AgeRange.AGE_11_15, + age_range=age_range, hb1ac_target_range=hba1c_target, visit_types=visit_types, visit_kwargs={"is_valid": True}, ) - # Now create the submission - self.print_info(f"HbA1c target: {CYAN}{hba1c_target.name}{RESET}\n") + # Need a mock csv with open("project/npda/dummy_sheets/dummy_sheet.csv", "rb") as f: @@ -277,6 +304,9 @@ def _parse_values_from_options(self, **options): # hba1c target hba1c_target = hb_target_map[options["hb_target"]] + + # Age range + age_range = age_range_map[options["age_range"]] return { "n_pts_to_seed": n_pts_to_seed, @@ -288,6 +318,7 @@ def _parse_values_from_options(self, **options): "submission_by": submission_by, "user_pk": user_pk, "submission_date": submission_date, + "age_range": age_range, } def _map_visit_type_letters_to_names(self, vt_letters: str) -> str: diff --git a/project/npda/tests/test_csv_upload.py b/project/npda/tests/test_csv_upload.py index 3e6b3a0e..8b474e76 100644 --- a/project/npda/tests/test_csv_upload.py +++ b/project/npda/tests/test_csv_upload.py @@ -17,24 +17,36 @@ from project.npda.general_functions.csv_upload import csv_upload, read_csv from project.npda.models import NPDAUser, Patient, Visit from project.npda.tests.factories.patient_factory import ( - INDEX_OF_MULTIPLE_DEPRIVATION_QUINTILE, TODAY, VALID_FIELDS) -from project.npda.forms.external_patient_validators import PatientExternalValidationResult + INDEX_OF_MULTIPLE_DEPRIVATION_QUINTILE, + TODAY, + VALID_FIELDS, +) +from project.npda.forms.external_patient_validators import ( + PatientExternalValidationResult, +) MOCK_EXTERNAL_VALIDATION_RESULT = PatientExternalValidationResult( postcode=VALID_FIELDS["postcode"], gp_practice_ods_code=VALID_FIELDS["gp_practice_ods_code"], gp_practice_postcode=None, - index_of_multiple_deprivation_quintile=INDEX_OF_MULTIPLE_DEPRIVATION_QUINTILE + index_of_multiple_deprivation_quintile=INDEX_OF_MULTIPLE_DEPRIVATION_QUINTILE, ) + def mock_external_validation_result(**kwargs): - return AsyncMock(return_value=dataclasses.replace(MOCK_EXTERNAL_VALIDATION_RESULT, **kwargs)) + return AsyncMock( + return_value=dataclasses.replace(MOCK_EXTERNAL_VALIDATION_RESULT, **kwargs) + ) + # We don't want to call remote services in unit tests @pytest.fixture(autouse=True) def mock_remote_calls(): - with patch("project.npda.general_functions.csv_upload.validate_patient_async", AsyncMock(return_value=MOCK_EXTERNAL_VALIDATION_RESULT)): + with patch( + "project.npda.general_functions.csv_upload.validate_patient_async", + AsyncMock(return_value=MOCK_EXTERNAL_VALIDATION_RESULT), + ): yield None @@ -43,59 +55,72 @@ def mock_remote_calls(): @pytest.fixture def valid_df(dummy_sheets_folder): - return read_csv(dummy_sheets_folder / 'dummy_sheet.csv') + return read_csv(dummy_sheets_folder / "dummy_sheet.csv") + @pytest.fixture def single_row_valid_df(dummy_sheets_folder): - df = read_csv(dummy_sheets_folder / 'dummy_sheet.csv').head(1) - assert(len(df) == 1) + df = read_csv(dummy_sheets_folder / "dummy_sheet.csv").head(1) + assert len(df) == 1 return df + @pytest.fixture def one_patient_two_visits(dummy_sheets_folder): - df = read_csv(dummy_sheets_folder / 'dummy_sheet.csv').head(2) + df = read_csv(dummy_sheets_folder / "dummy_sheet.csv").head(2) - assert(len(df) == 2) - assert(df["NHS Number"][0] == df["NHS Number"][1]) + assert len(df) == 2 + assert df["NHS Number"][0] == df["NHS Number"][1] return df + @pytest.fixture def two_patients_first_with_two_visits_second_with_one(dummy_sheets_folder): - df = read_csv(dummy_sheets_folder / 'dummy_sheet.csv').head(3) + df = read_csv(dummy_sheets_folder / "dummy_sheet.csv").head(3) - assert(len(df) == 3) - assert(df["NHS Number"][0] == df["NHS Number"][1]) - assert(df["NHS Number"][2] != df["NHS Number"][0]) + assert len(df) == 3 + assert df["NHS Number"][0] == df["NHS Number"][1] + assert df["NHS Number"][2] != df["NHS Number"][0] return df + @pytest.fixture def two_patients_with_one_visit_each(dummy_sheets_folder): - df = read_csv(dummy_sheets_folder / 'dummy_sheet.csv').drop([0]).head(2).reset_index(drop=True) + df = ( + read_csv(dummy_sheets_folder / "dummy_sheet.csv") + .drop([0]) + .head(2) + .reset_index(drop=True) + ) - assert(len(df) == 2) - assert(df["NHS Number"][1] != df["NHS Number"][0]) + assert len(df) == 2 + assert df["NHS Number"][1] != df["NHS Number"][0] return df + @pytest.fixture def test_user(seed_groups_fixture, seed_users_fixture): return NPDAUser.objects.filter( organisation_employers__pz_code=ALDER_HEY_PZ_CODE ).first() + @sync_to_async def async_get_all(query_set_fn): return list(query_set_fn()) + # The database is not rolled back if we used the built in async support for pytest # https://github.com/pytest-dev/pytest-asyncio/issues/226 @async_to_sync async def csv_upload_sync(user, dataframe, csv_file, pdu_pz_code): return await csv_upload(user, dataframe, csv_file, pdu_pz_code) + def read_csv_from_str(contents): with tempfile.NamedTemporaryFile() as f: f.write(contents.encode()) @@ -109,11 +134,16 @@ def test_create_patient(test_user, single_row_valid_df): csv_upload_sync(test_user, single_row_valid_df, None, ALDER_HEY_PZ_CODE) patient = Patient.objects.first() - assert(patient.nhs_number == nhs_number.standardise_format(single_row_valid_df["NHS Number"][0])) - assert(patient.date_of_birth == single_row_valid_df["Date of Birth"][0].date()) - assert(patient.diabetes_type == single_row_valid_df["Diabetes Type"][0]) - assert(patient.diagnosis_date == single_row_valid_df["Date of Diabetes Diagnosis"][0].date()) - assert(patient.death_date is None) + assert patient.nhs_number == nhs_number.standardise_format( + single_row_valid_df["NHS Number"][0] + ) + assert patient.date_of_birth == single_row_valid_df["Date of Birth"][0].date() + assert patient.diabetes_type == single_row_valid_df["Diabetes Type"][0] + assert ( + patient.diagnosis_date + == single_row_valid_df["Date of Diabetes Diagnosis"][0].date() + ) + assert patient.death_date is None @pytest.mark.django_db @@ -124,140 +154,164 @@ def test_create_patient_with_death_date(test_user, single_row_valid_df): csv_upload_sync(test_user, single_row_valid_df, None, ALDER_HEY_PZ_CODE) patient = Patient.objects.first() - assert(patient.death_date == single_row_valid_df["Death Date"][0].date()) + assert patient.death_date == single_row_valid_df["Death Date"][0].date() @pytest.mark.django_db -def test_multiple_patients(test_user, two_patients_first_with_two_visits_second_with_one): +def test_multiple_patients( + test_user, two_patients_first_with_two_visits_second_with_one +): df = two_patients_first_with_two_visits_second_with_one - assert(df["NHS Number"][0] == df["NHS Number"][1]) - assert(df["NHS Number"][0] != df["NHS Number"][2]) + assert df["NHS Number"][0] == df["NHS Number"][1] + assert df["NHS Number"][0] != df["NHS Number"][2] csv_upload_sync(test_user, df, None, ALDER_HEY_PZ_CODE) - assert(Patient.objects.count() == 2) + assert Patient.objects.count() == 2 [first_patient, second_patient] = Patient.objects.all() - assert(Visit.objects.filter(patient=first_patient).count() == 2) - assert(Visit.objects.filter(patient=second_patient).count() == 1) - - assert(first_patient.nhs_number == nhs_number.standardise_format(df["NHS Number"][0])) - assert(first_patient.date_of_birth == df["Date of Birth"][0].date()) - assert(first_patient.diabetes_type == df["Diabetes Type"][0]) - assert(first_patient.diagnosis_date == df["Date of Diabetes Diagnosis"][0].date()) - - assert(second_patient.nhs_number == nhs_number.standardise_format(df["NHS Number"][2])) - assert(second_patient.date_of_birth == df["Date of Birth"][2].date()) - assert(second_patient.diabetes_type == df["Diabetes Type"][2]) - assert(second_patient.diagnosis_date == df["Date of Diabetes Diagnosis"][2].date()) - - -@pytest.mark.parametrize("column,model_field", [ - pytest.param("NHS Number", "nhs_number"), - pytest.param("Date of Birth", "date_of_birth"), - pytest.param("Diabetes Type", "diabetes_type"), - pytest.param("Date of Diabetes Diagnosis", "diagnosis_date") -]) + assert Visit.objects.filter(patient=first_patient).count() == 2 + assert Visit.objects.filter(patient=second_patient).count() == 1 + + assert first_patient.nhs_number == nhs_number.standardise_format( + df["NHS Number"][0] + ) + assert first_patient.date_of_birth == df["Date of Birth"][0].date() + assert first_patient.diabetes_type == df["Diabetes Type"][0] + assert first_patient.diagnosis_date == df["Date of Diabetes Diagnosis"][0].date() + + assert second_patient.nhs_number == nhs_number.standardise_format( + df["NHS Number"][2] + ) + assert second_patient.date_of_birth == df["Date of Birth"][2].date() + assert second_patient.diabetes_type == df["Diabetes Type"][2] + assert second_patient.diagnosis_date == df["Date of Diabetes Diagnosis"][2].date() + + +@pytest.mark.parametrize( + "column,model_field", + [ + pytest.param("NHS Number", "nhs_number"), + pytest.param("Date of Birth", "date_of_birth"), + pytest.param("Diabetes Type", "diabetes_type"), + pytest.param("Date of Diabetes Diagnosis", "diagnosis_date"), + ], +) @pytest.mark.django_db def test_missing_mandatory_field(test_user, valid_df, column, model_field): valid_df.loc[0, column] = None with transaction.atomic(): errors = csv_upload_sync(test_user, valid_df, None, ALDER_HEY_PZ_CODE) - - assert(model_field in errors[0]) + + assert model_field in errors[0] # Catastrophic - we can't save this patient at all so we won't save any of the patients in the submission - assert(Patient.objects.count() == 0) + assert Patient.objects.count() == 0 @pytest.mark.django_db def test_error_in_single_visit(test_user, single_row_valid_df): - single_row_valid_df.loc[0, 'Diabetes Treatment at time of Hba1c measurement'] = 45 + single_row_valid_df.loc[0, "Diabetes Treatment at time of Hba1c measurement"] = 45 errors = csv_upload_sync(test_user, single_row_valid_df, None, ALDER_HEY_PZ_CODE) - assert("treatment" in errors[0]) + assert "treatment" in errors[0] visit = Visit.objects.first() - assert(visit.treatment == 45) - assert("treatment" in visit.errors) + assert visit.treatment == 45 + assert "treatment" in visit.errors @pytest.mark.django_db def test_error_in_multiple_visits(test_user, one_patient_two_visits): df = one_patient_two_visits - df.loc[0, 'Diabetes Treatment at time of Hba1c measurement'] = 45 + df.loc[0, "Diabetes Treatment at time of Hba1c measurement"] = 45 errors = csv_upload_sync(test_user, df, None, ALDER_HEY_PZ_CODE) - assert("treatment" in errors[0]) + assert "treatment" in errors[0] - assert(Visit.objects.count() == 2) + assert Visit.objects.count() == 2 - [first_visit, second_visit] = Visit.objects.all().order_by('visit_date') + [first_visit, second_visit] = Visit.objects.all().order_by("visit_date") - assert(first_visit.treatment == 45) - assert("treatment" in first_visit.errors) + assert first_visit.treatment == 45 + assert "treatment" in first_visit.errors - assert(second_visit.treatment == df["Diabetes Treatment at time of Hba1c measurement"][1]) - assert(second_visit.errors is None) + assert ( + second_visit.treatment + == df["Diabetes Treatment at time of Hba1c measurement"][1] + ) + assert second_visit.errors is None @pytest.mark.django_db -def test_multiple_patients_where_one_has_visit_errors_and_the_other_does_not(test_user, two_patients_first_with_two_visits_second_with_one): +def test_multiple_patients_where_one_has_visit_errors_and_the_other_does_not( + test_user, two_patients_first_with_two_visits_second_with_one +): df = two_patients_first_with_two_visits_second_with_one - assert(df["NHS Number"][0] == df["NHS Number"][1]) - assert(df["NHS Number"][0] != df["NHS Number"][2]) + assert df["NHS Number"][0] == df["NHS Number"][1] + assert df["NHS Number"][0] != df["NHS Number"][2] - df.loc[0, 'Diabetes Treatment at time of Hba1c measurement'] = 45 + df.loc[0, "Diabetes Treatment at time of Hba1c measurement"] = 45 errors = csv_upload_sync(test_user, df, None, ALDER_HEY_PZ_CODE) - assert("treatment" in errors[0]) + assert "treatment" in errors[0] [patient_one, patient_two] = Patient.objects.all() - assert(Visit.objects.count() == 3) + assert Visit.objects.count() == 3 - [first_visit_for_first_patient, second_visit_for_first_patient] = Visit.objects.filter(patient=patient_one).order_by('visit_date') + [first_visit_for_first_patient, second_visit_for_first_patient] = ( + Visit.objects.filter(patient=patient_one).order_by("visit_date") + ) [visit_for_second_patient] = Visit.objects.filter(patient=patient_two) - assert(first_visit_for_first_patient.treatment == 45) - assert("treatment" in first_visit_for_first_patient.errors) + assert first_visit_for_first_patient.treatment == 45 + assert "treatment" in first_visit_for_first_patient.errors - assert(second_visit_for_first_patient.treatment == df["Diabetes Treatment at time of Hba1c measurement"][1]) - assert(second_visit_for_first_patient.errors is None) + assert ( + second_visit_for_first_patient.treatment + == df["Diabetes Treatment at time of Hba1c measurement"][1] + ) + assert second_visit_for_first_patient.errors is None - assert(visit_for_second_patient.treatment == df["Diabetes Treatment at time of Hba1c measurement"][2]) - assert(visit_for_second_patient.errors is None) + assert ( + visit_for_second_patient.treatment + == df["Diabetes Treatment at time of Hba1c measurement"][2] + ) + assert visit_for_second_patient.errors is None @pytest.mark.django_db -def test_multiple_patients_with_visit_errors(test_user, two_patients_with_one_visit_each): +def test_multiple_patients_with_visit_errors( + test_user, two_patients_with_one_visit_each +): df = two_patients_with_one_visit_each - df.loc[0, 'Diabetes Treatment at time of Hba1c measurement'] = 45 - df.loc[1, 'Diabetes Treatment at time of Hba1c measurement'] = 45 + df.loc[0, "Diabetes Treatment at time of Hba1c measurement"] = 45 + df.loc[1, "Diabetes Treatment at time of Hba1c measurement"] = 45 errors = csv_upload_sync(test_user, df, None, ALDER_HEY_PZ_CODE) - - assert("treatment" in errors[0]) - assert("treatment" in errors[1]) + + assert "treatment" in errors[0] + assert "treatment" in errors[1] [patient_one, patient_two] = Patient.objects.all() - assert(Visit.objects.count() == 2) + assert Visit.objects.count() == 2 visit_for_first_patient = Visit.objects.filter(patient=patient_one).first() visit_for_second_patient = Visit.objects.filter(patient=patient_two).first() - assert(visit_for_first_patient.treatment == 45) - assert("treatment" in visit_for_first_patient.errors) + assert visit_for_first_patient.treatment == 45 + assert "treatment" in visit_for_first_patient.errors - assert(visit_for_second_patient.treatment == 45) - assert("treatment" in visit_for_second_patient.errors) + assert visit_for_second_patient.treatment == 45 + assert "treatment" in visit_for_second_patient.errors @pytest.mark.django_db @@ -266,15 +320,15 @@ def test_invalid_nhs_number(test_user, single_row_valid_df): single_row_valid_df["NHS Number"] = invalid_nhs_number errors = csv_upload_sync(test_user, single_row_valid_df, None, ALDER_HEY_PZ_CODE) - assert("nhs_number" in errors[0]) + assert "nhs_number" in errors[0] # Not catastrophic - error saved in model and raised back to caller patient = Patient.objects.first() - assert(patient.nhs_number == invalid_nhs_number) + assert patient.nhs_number == invalid_nhs_number # TODO MRB: create a ValidationError model field (https://github.com/rcpch/national-paediatric-diabetes-audit/issues/332) - assert("nhs_number" in patient.errors) + assert "nhs_number" in patient.errors @pytest.mark.django_db @@ -283,32 +337,32 @@ def test_future_date_of_birth(test_user, single_row_valid_df): single_row_valid_df["Date of Birth"] = pd.to_datetime(date_of_birth) errors = csv_upload_sync(test_user, single_row_valid_df, None, ALDER_HEY_PZ_CODE) - assert("date_of_birth" in errors[0]) + assert "date_of_birth" in errors[0] patient = Patient.objects.first() - assert(patient.date_of_birth == date_of_birth) - assert("date_of_birth" in patient.errors) + assert patient.date_of_birth == date_of_birth + assert "date_of_birth" in patient.errors - error_message = patient.errors["date_of_birth"][0]['message'] - assert(error_message == "Cannot be in the future") + error_message = patient.errors["date_of_birth"][0]["message"] + assert error_message == "Cannot be in the future" @pytest.mark.django_db def test_over_25(test_user, single_row_valid_df): - date_of_birth = TODAY + - relativedelta(years=25, days=1) + date_of_birth = TODAY + -relativedelta(years=25, days=1) single_row_valid_df["Date of Birth"] = pd.to_datetime(date_of_birth) errors = csv_upload_sync(test_user, single_row_valid_df, None, ALDER_HEY_PZ_CODE) - assert("date_of_birth" in errors[0]) + assert "date_of_birth" in errors[0] patient = Patient.objects.first() - assert(patient.date_of_birth == date_of_birth) - assert("date_of_birth" in patient.errors) + assert patient.date_of_birth == date_of_birth + assert "date_of_birth" in patient.errors - error_message = patient.errors["date_of_birth"][0]['message'] - assert(error_message == "NPDA patients cannot be 25+ years old. This patient is 25") + error_message = patient.errors["date_of_birth"][0]["message"] + assert error_message == "NPDA patients cannot be 25+ years old. This patient is 25" @pytest.mark.django_db @@ -316,12 +370,12 @@ def test_invalid_diabetes_type(test_user, single_row_valid_df): single_row_valid_df["Diabetes Type"] = 45 errors = csv_upload_sync(test_user, single_row_valid_df, None, ALDER_HEY_PZ_CODE) - assert("diabetes_type" in errors[0]) + assert "diabetes_type" in errors[0] patient = Patient.objects.first() - assert(patient.diabetes_type == 45) - assert("diabetes_type" in patient.errors) + assert patient.diabetes_type == 45 + assert "diabetes_type" in patient.errors @pytest.mark.django_db @@ -330,35 +384,38 @@ def test_future_diagnosis_date(test_user, single_row_valid_df): single_row_valid_df["Date of Diabetes Diagnosis"] = pd.to_datetime(diagnosis_date) errors = csv_upload_sync(test_user, single_row_valid_df, None, ALDER_HEY_PZ_CODE) - assert("diagnosis_date" in errors[0]) + assert "diagnosis_date" in errors[0] patient = Patient.objects.first() - assert(patient.diagnosis_date == diagnosis_date) - assert("diagnosis_date" in patient.errors) + assert patient.diagnosis_date == diagnosis_date + assert "diagnosis_date" in patient.errors - error_message = patient.errors["diagnosis_date"][0]['message'] - assert(error_message == "Cannot be in the future") + error_message = patient.errors["diagnosis_date"][0]["message"] + assert error_message == "Cannot be in the future" @pytest.mark.django_db def test_diagnosis_date_before_date_of_birth(test_user, single_row_valid_df): - date_of_birth = VALID_FIELDS["date_of_birth"], + date_of_birth = (VALID_FIELDS["date_of_birth"],) diagnosis_date = VALID_FIELDS["date_of_birth"] - relativedelta(years=1) single_row_valid_df["Date of Diabetes Diagnosis"] = pd.to_datetime(diagnosis_date) errors = csv_upload_sync(test_user, single_row_valid_df, None, ALDER_HEY_PZ_CODE) - assert("diagnosis_date" in errors[0]) + assert "diagnosis_date" in errors[0] patient = Patient.objects.first() - assert(patient.diagnosis_date == diagnosis_date) - assert("diagnosis_date" in patient.errors) + assert patient.diagnosis_date == diagnosis_date + assert "diagnosis_date" in patient.errors - error_message = patient.errors["diagnosis_date"][0]['message'] + error_message = patient.errors["diagnosis_date"][0]["message"] # TODO MRB: why does this have entity encoding issues? (https://github.com/rcpch/national-paediatric-diabetes-audit/issues/333) - assert(error_message == "'Date of Diabetes Diagnosis' cannot be before 'Date of Birth'") + assert ( + error_message + == "'Date of Diabetes Diagnosis' cannot be before 'Date of Birth'" + ) @pytest.mark.django_db @@ -366,12 +423,12 @@ def test_invalid_sex(test_user, single_row_valid_df): single_row_valid_df["Stated gender"] = 45 errors = csv_upload_sync(test_user, single_row_valid_df, None, ALDER_HEY_PZ_CODE) - assert("sex" in errors[0]) + assert "sex" in errors[0] patient = Patient.objects.first() - assert(patient.sex == 45) - assert("sex" in patient.errors) + assert patient.sex == 45 + assert "sex" in patient.errors @pytest.mark.django_db @@ -379,12 +436,12 @@ def test_invalid_ethnicity(test_user, single_row_valid_df): single_row_valid_df["Ethnic Category"] = "45" errors = csv_upload_sync(test_user, single_row_valid_df, None, ALDER_HEY_PZ_CODE) - assert("ethnicity" in errors[0]) + assert "ethnicity" in errors[0] patient = Patient.objects.first() - assert(patient.ethnicity == "45") - assert("ethnicity" in patient.errors) + assert patient.ethnicity == "45" + assert "ethnicity" in patient.errors @pytest.mark.django_db @@ -392,106 +449,125 @@ def test_missing_gp_ods_code(test_user, single_row_valid_df): single_row_valid_df["GP Practice Code"] = None errors = csv_upload_sync(test_user, single_row_valid_df, None, ALDER_HEY_PZ_CODE) - assert("gp_practice_ods_code" in errors[0]) + assert "gp_practice_ods_code" in errors[0] patient = Patient.objects.first() - assert("gp_practice_ods_code" in patient.errors) + assert "gp_practice_ods_code" in patient.errors - error_message = patient.errors["gp_practice_ods_code"][0]['message'] + error_message = patient.errors["gp_practice_ods_code"][0]["message"] # TODO MRB: why does this have entity encoding issues? (https://github.com/rcpch/national-paediatric-diabetes-audit/issues/333) - assert(error_message == "'GP Practice ODS code' and 'GP Practice postcode' cannot both be empty") - + assert ( + error_message + == "'GP Practice ODS code' and 'GP Practice postcode' cannot both be empty" + ) @pytest.mark.django_db def test_future_death_date(test_user, single_row_valid_df): - death_date = TODAY + relativedelta(days = 1) + death_date = TODAY + relativedelta(days=1) single_row_valid_df["Death Date"] = pd.to_datetime(death_date) errors = csv_upload_sync(test_user, single_row_valid_df, None, ALDER_HEY_PZ_CODE) - assert("death_date" in errors[0]) + assert "death_date" in errors[0] patient = Patient.objects.first() - assert(patient.death_date == death_date) - assert("death_date" in patient.errors) + assert patient.death_date == death_date + assert "death_date" in patient.errors - error_message = patient.errors["death_date"][0]['message'] - assert(error_message == "Cannot be in the future") + error_message = patient.errors["death_date"][0]["message"] + assert error_message == "Cannot be in the future" @pytest.mark.django_db def test_death_date_before_date_of_birth(test_user, single_row_valid_df): - date_of_birth = VALID_FIELDS["date_of_birth"], + date_of_birth = (VALID_FIELDS["date_of_birth"],) death_date = VALID_FIELDS["date_of_birth"] - relativedelta(years=1) single_row_valid_df["Death Date"] = pd.to_datetime(death_date) errors = csv_upload_sync(test_user, single_row_valid_df, None, ALDER_HEY_PZ_CODE) - assert("death_date" in errors[0]) + assert "death_date" in errors[0] patient = Patient.objects.first() - assert(patient.death_date == death_date) - assert("death_date" in patient.errors) + assert patient.death_date == death_date + assert "death_date" in patient.errors - error_message = patient.errors["death_date"][0]['message'] + error_message = patient.errors["death_date"][0]["message"] # TODO MRB: why does this have entity encoding issues? (https://github.com/rcpch/national-paediatric-diabetes-audit/issues/333) - assert(error_message == "'Death Date' cannot be before 'Date of Birth'") + assert ( + error_message + == "'Death Date' cannot be before 'Date of Birth'" + ) @pytest.mark.django_db -@patch("project.npda.general_functions.csv_upload.validate_patient_async", mock_external_validation_result(postcode=ValidationError("Invalid postcode"))) +@patch( + "project.npda.general_functions.csv_upload.validate_patient_async", + mock_external_validation_result(postcode=ValidationError("Invalid postcode")), +) def test_invalid_postcode(test_user, single_row_valid_df): single_row_valid_df["Postcode of usual address"] = "not a postcode" errors = csv_upload_sync(test_user, single_row_valid_df, None, ALDER_HEY_PZ_CODE) - assert("postcode" in errors[0]) + assert "postcode" in errors[0] patient = Patient.objects.first() - assert(patient.postcode == "not a postcode") - assert("postcode" in patient.errors) + assert patient.postcode == "not a postcode" + assert "postcode" in patient.errors @pytest.mark.django_db -@patch("project.npda.general_functions.csv_upload.validate_patient_async", mock_external_validation_result(postcode=None)) +@patch( + "project.npda.general_functions.csv_upload.validate_patient_async", + mock_external_validation_result(postcode=None), +) def test_error_validating_postcode(test_user, single_row_valid_df): single_row_valid_df["Postcode of usual address"] = "WC1X 8SH" errors = csv_upload_sync(test_user, single_row_valid_df, None, ALDER_HEY_PZ_CODE) - assert(len(errors) == 0) + assert len(errors) == 0 patient = Patient.objects.first() - assert(patient.postcode == "WC1X8SH") + assert patient.postcode == "WC1X8SH" @pytest.mark.django_db -@patch("project.npda.general_functions.csv_upload.validate_patient_async", mock_external_validation_result(gp_practice_ods_code=ValidationError("Invalid ODS code"))) +@patch( + "project.npda.general_functions.csv_upload.validate_patient_async", + mock_external_validation_result( + gp_practice_ods_code=ValidationError("Invalid ODS code") + ), +) def test_invalid_gp_ods_code(test_user, single_row_valid_df): single_row_valid_df["GP Practice Code"] = "not a GP code" errors = csv_upload_sync(test_user, single_row_valid_df, None, ALDER_HEY_PZ_CODE) - assert("gp_practice_ods_code" in errors[0]) + assert "gp_practice_ods_code" in errors[0] patient = Patient.objects.first() - assert(patient.gp_practice_ods_code == "not a GP code") - assert("gp_practice_ods_code" in patient.errors) + assert patient.gp_practice_ods_code == "not a GP code" + assert "gp_practice_ods_code" in patient.errors @pytest.mark.django_db -@patch("project.npda.general_functions.csv_upload.validate_patient_async", mock_external_validation_result(postcode=None)) +@patch( + "project.npda.general_functions.csv_upload.validate_patient_async", + mock_external_validation_result(postcode=None), +) def test_error_validating_gp_ods_code(test_user, single_row_valid_df): single_row_valid_df["GP Practice Code"] = "G85023" errors = csv_upload_sync(test_user, single_row_valid_df, None, ALDER_HEY_PZ_CODE) - assert(len(errors) == 0) + assert len(errors) == 0 patient = Patient.objects.first() - assert(patient.gp_practice_ods_code == "G85023") + assert patient.gp_practice_ods_code == "G85023" @pytest.mark.django_db @@ -499,16 +575,22 @@ def test_lookup_index_of_multiple_deprivation(test_user, single_row_valid_df): csv_upload_sync(test_user, single_row_valid_df, None, ALDER_HEY_PZ_CODE) patient = Patient.objects.first() - assert(patient.index_of_multiple_deprivation_quintile == INDEX_OF_MULTIPLE_DEPRIVATION_QUINTILE) + assert ( + patient.index_of_multiple_deprivation_quintile + == INDEX_OF_MULTIPLE_DEPRIVATION_QUINTILE + ) @pytest.mark.django_db -@patch("project.npda.general_functions.csv_upload.validate_patient_async", mock_external_validation_result(index_of_multiple_deprivation_quintile=None)) +@patch( + "project.npda.general_functions.csv_upload.validate_patient_async", + mock_external_validation_result(index_of_multiple_deprivation_quintile=None), +) def test_error_looking_up_index_of_multiple_deprivation(test_user, single_row_valid_df): csv_upload_sync(test_user, single_row_valid_df, None, ALDER_HEY_PZ_CODE) patient = Patient.objects.first() - assert(patient.index_of_multiple_deprivation_quintile is None) + assert patient.index_of_multiple_deprivation_quintile is None @pytest.mark.django_db @@ -516,12 +598,12 @@ def test_strip_first_spaces_in_column_name(test_user, dummy_sheet_csv): csv = dummy_sheet_csv.replace("NHS Number", " NHS Number") df = read_csv_from_str(csv) - assert(df.columns[0] == "NHS Number") + assert df.columns[0] == "NHS Number" csv_upload_sync(test_user, df, None, ALDER_HEY_PZ_CODE) patient = Patient.objects.first() - assert(patient.nhs_number == nhs_number.standardise_format(df["NHS Number"][0])) + assert patient.nhs_number == nhs_number.standardise_format(df["NHS Number"][0]) @pytest.mark.django_db @@ -529,12 +611,12 @@ def test_strip_last_spaces_in_column_name(test_user, dummy_sheet_csv): csv = dummy_sheet_csv.replace("NHS Number", "NHS Number ") df = read_csv_from_str(csv) - assert(df.columns[0] == "NHS Number") + assert df.columns[0] == "NHS Number" csv_upload_sync(test_user, df, None, ALDER_HEY_PZ_CODE) patient = Patient.objects.first() - assert(patient.nhs_number == nhs_number.standardise_format(df["NHS Number"][0])) + assert patient.nhs_number == nhs_number.standardise_format(df["NHS Number"][0]) # Originally found in https://github.com/rcpch/national-paediatric-diabetes-audit/actions/runs/11627684066/job/32381466250 @@ -547,4 +629,4 @@ def test_spaces_in_date_column_name(test_user, dummy_sheet_csv): csv_upload_sync(test_user, df, None, ALDER_HEY_PZ_CODE) patient = Patient.objects.first() - assert(patient.date_of_birth == df["Date of Birth"][0].date()) + assert patient.date_of_birth == df["Date of Birth"][0].date() diff --git a/project/npda/tests/view_tests/test_upload.py b/project/npda/tests/view_tests/test_upload.py new file mode 100644 index 00000000..b87436ca --- /dev/null +++ b/project/npda/tests/view_tests/test_upload.py @@ -0,0 +1,84 @@ +from datetime import datetime +import os +import pytest +from django.urls import reverse +from django.core.files.uploadedfile import SimpleUploadedFile + +from project.npda.general_functions.data_generator_extended import ( + AgeRange, + HbA1cTargetRange, + VisitType, +) +from project.npda.models.npda_user import NPDAUser +from project.npda.tests.model_tests.test_submissions import ALDER_HEY_PZ_CODE +from project.npda.tests.utils import login_and_verify_user +from project.npda.management.commands.create_csv import Command as GenerateCSVCommand + +@pytest.mark.skip(reason="CSV upload validation errors") +@pytest.mark.django_db +def test_csv_upload_view( + seed_groups_fixture, + seed_users_fixture, + client, + tmpdir, +): + """Use the generate csv function to assert basic behaviors for uploading + csv. + """ + + # Get a user + ah_user = NPDAUser.objects.filter( + organisation_employers__pz_code=ALDER_HEY_PZ_CODE + ).first() + client = login_and_verify_user(client, ah_user) + + # Define parameters for CSV generation + audit_start_date = datetime(2024, 4, 1) + audit_end_date = datetime(2025, 3, 31) + n_pts_to_seed = 5 + age_range = AgeRange.AGE_11_15 + hba1c_target = HbA1cTargetRange.TARGET + visits = "CDCD DHPC ACDC CDCD" + visit_types = [ + VisitType.CLINIC, + VisitType.DIETICIAN, + VisitType.CLINIC, + VisitType.DIETICIAN, + VisitType.DIETICIAN, + VisitType.HOSPITAL_ADMISSION, + VisitType.PSYCHOLOGY, + VisitType.CLINIC, + VisitType.ANNUAL_REVIEW, + VisitType.CLINIC, + VisitType.DIETICIAN, + VisitType.CLINIC, + VisitType.CLINIC, + VisitType.DIETICIAN, + VisitType.CLINIC, + VisitType.DIETICIAN, + ] + output_path = tmpdir.mkdir("csv_output") + + # Generate CSV + file_path = os.path.join(output_path, f"npda_seed_data-{n_pts_to_seed}-{visits.replace(' ', '')}.csv") + GenerateCSVCommand().generate_csv( + audit_start_date=audit_start_date, + audit_end_date=audit_end_date, + n_pts_to_seed=n_pts_to_seed, + age_range=age_range, + hba1c_target=hba1c_target, + visits=visits, + visit_types=visit_types, + output_path=str(output_path), + ) + + # Read the generated CSV for upload + with open(file_path, "rb") as f: + csv_file = SimpleUploadedFile(f.name, f.read(), content_type="text/csv") + + # Send POST request with CSV file + url = reverse("home") + response = client.post(url, {"csv_upload": csv_file}) + + # Assert the response to ensure no error + assert response.status_code == 302 diff --git a/project/npda/views/home.py b/project/npda/views/home.py index b9a900db..79dd05c3 100644 --- a/project/npda/views/home.py +++ b/project/npda/views/home.py @@ -86,6 +86,7 @@ async def home(request): message=f"You have do not have permission to upload csvs for {pz_code}.", ) form = UploadFileForm() + else: form = UploadFileForm() diff --git a/project/npda/views/submissions.py b/project/npda/views/submissions.py index c0a1ee83..3eba3698 100644 --- a/project/npda/views/submissions.py +++ b/project/npda/views/submissions.py @@ -75,10 +75,13 @@ def get_context_data(self, **kwargs: Any) -> dict: paediatric_diabetes_unit__pz_code=self.request.session.get("pz_code"), ).first() # there can be only one of these if latest_active_submission: - # If a submission exists, summarize the csv data - if latest_active_submission.csv_file: - # If the submission has a csv file, summarize it + # If a submission exists and it was created by uploading a csv, summarize the csv data + if self.request.session.get( + "can_upload_csv" + ): + # check if the user has permission to upload csv (not this function is not available in this brance but is in live) context["data"] = csv_summarize(latest_active_submission.csv_file) + # Get some summary data about the patients in the submission... context["patients"] = Patient.objects.filter( submissions=latest_active_submission diff --git a/s/local-clean-reset b/s/local-clean-reset new file mode 100755 index 00000000..66dfc5be --- /dev/null +++ b/s/local-clean-reset @@ -0,0 +1,17 @@ +#!/bin/bash -e + +# This script stops and cleans up our Docker Compose setup defined in `docker-compose.yml`. +# +# Specifically, it performs the following actions: +# 1. Stops all running containers associated with the Docker Compose setup. +# 2. Removes the containers, any named or anonymous volumes, and images used by this Compose file. +# 3. Then, it runs `s/up` to start the Docker Compose setup again. +# +# Flags: +# - `--volumes` removes both named and anonymous volumes created by the Docker Compose setup. +# - `--rmi all` removes all images used by the Compose setup, including pulled and locally built images. +# +# Note: Be cautious, as this will remove images and data volumes, which could result in data loss. + +docker compose -f docker-compose.yml down --volumes --rmi all +s/up \ No newline at end of file