Skip to content

Commit

Permalink
[pre-commit.ci] auto fixes from pre-commit.com hooks
Browse files Browse the repository at this point in the history
for more information, see https://pre-commit.ci
  • Loading branch information
pre-commit-ci[bot] committed Jun 20, 2023
1 parent e719bf9 commit 9c217cc
Show file tree
Hide file tree
Showing 34 changed files with 2 additions and 35 deletions.
2 changes: 0 additions & 2 deletions cishouseholds/derive.py
Original file line number Diff line number Diff line change
Expand Up @@ -454,7 +454,6 @@ def assign_datetime_from_coalesced_columns_and_log_source(
reference_datetime_days_offset_value: int = -2,
final_fallback_column: str = None,
):

"""
Assign a timestamp column from coalesced list of columns with a default timestamp if timestamp missing in column
Expand Down Expand Up @@ -1827,7 +1826,6 @@ def assign_completion_status(
df: DataFrame,
column_name_to_assign: str,
) -> DataFrame:

"""
Function to assign a completion status equivalent for PHM
questionnaire responses from pre-defined variables
Expand Down
1 change: 0 additions & 1 deletion cishouseholds/phm/json_decode.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ def decode_phm_json(json_str: Union[str, bytes]) -> List[Tuple]:
# table = json_dict["submission"]
answers_list = []
for table in json_list:

meta = table.pop("survey_metadata")
data = table.pop("data")
meta.update(table)
Expand Down
1 change: 0 additions & 1 deletion cishouseholds/pipeline/pipeline_stages.py
Original file line number Diff line number Diff line change
Expand Up @@ -861,7 +861,6 @@ def update_vaccine_types(input_survey_table: str, output_survey_table: str, vacc
df = extract_from_table(input_survey_table)
lookup_df = extract_from_table(vaccine_type_lookup)
for vaccine_number in range(0, 7):

vaccine_type_other_col = "cis_covid_vaccine_type_other"
vaccine_type_col = "cis_covid_vaccine_type"
vaccine_date_col = "cis_covid_vaccine_date"
Expand Down
1 change: 0 additions & 1 deletion cishouseholds/pipeline/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@ def check_conditions(stage_responses: dict, stage_config: dict):


def check_dependencies(stages_to_run, stages_config): # TODO: ensure check in order. look before current stage only

available_tables = []
for stage in stages_to_run: # generate available and required tables from stage config
required_tables = stages_config[stage].get("input_tables", {})
Expand Down
1 change: 0 additions & 1 deletion cishouseholds/pipeline/translate.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@ def translate_welsh_survey_responses(df: DataFrame) -> DataFrame:
translation_lookup_path_in_pipeline_config = translation_lookup_path != "inactive"

if translation_settings_in_pipeline_config:

translation_directory = translation_settings.get("translation_directory", None)
translation_lookup_directory = translation_settings.get("translation_lookup_directory", None)
translation_backup_directory = translation_settings.get("translation_backup_directory", None)
Expand Down
1 change: 0 additions & 1 deletion cishouseholds/pipeline/vaccine_transformations.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,6 @@ def deduplication(df: DataFrame):


def first_second_doses(df: DataFrame):

# assign first dose based on visit
df = assign_nth_dose(
df=df,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@


def clean_survey_responses_version_1(df: DataFrame) -> DataFrame:

health_care_area_map = {
"Primary care for example in a GP or dentist": "Primary",
"Secondary care for example in a hospital": "Secondary",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,6 @@ def derive_work_status_columns(df: DataFrame) -> DataFrame:


def clean_survey_responses_version_2(df: DataFrame) -> DataFrame:

# Map to digital from raw V2 values, before editing them to V1 below
df = assign_from_map(
df,
Expand Down
1 change: 0 additions & 1 deletion cishouseholds/pipeline/visit_transformations.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ def visit_transformations(df: DataFrame):


def visit_derivations(df: DataFrame):

df = assign_fake_id(df, "ordered_household_id_new", "ons_household_id")
df = assign_visit_order(
df=df,
Expand Down
1 change: 0 additions & 1 deletion cishouseholds/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -405,7 +405,6 @@ def validate_config_stages(pipeline_stage_functions: Dict, stages_to_run: List[s
not in str(inspect.signature(pipeline_stage_functions[function_name]).parameters[arg])
]
if not (set(function_config_other_params) == set(input_arguments_needed)):

list_not_passed_arg = [x for x in input_arguments_needed if x not in function_config_other_params]
list_of_unrecognised_arg = [
x
Expand Down
1 change: 0 additions & 1 deletion dummy_data_generation/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,6 @@ class Meta:
name = "custom_random"

def __init__(self, *args: Any, **kwargs: Any) -> None:

super().__init__(*args, **kwargs)

def random_date(self, start, end, format="%d/%m/%Y"):
Expand Down
2 changes: 0 additions & 2 deletions dummy_data_generation/helpers_weight.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ class Meta:
name = "base_stats"

def __init__(self, *args: Any, **kwargs: Any) -> None:

super().__init__(*args, **kwargs)

np.random.seed(self.seed)
Expand Down Expand Up @@ -106,7 +105,6 @@ class Meta:
name = "distribution"

def __init__(self, *args: Any, **kwargs: Any) -> None:

super().__init__(*args, **kwargs)

def generic_distribution(self, func: Callable, null_prop: float = 0, null_value: Any = None, **kwargs: Any) -> Any:
Expand Down
1 change: 0 additions & 1 deletion tests/compare/test_prepare_for_union.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@


def test_prepare_for_union(spark_session):

example_ref = spark_session.createDataFrame(
data=[
("ABC123", 1, "ABC7673", 1, 2, 2),
Expand Down
1 change: 0 additions & 1 deletion tests/derive/test_aggregated_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@


def test_aggregated_output(spark_session):

input_df = spark_session.createDataFrame(
data=[
# fmt: off
Expand Down
1 change: 0 additions & 1 deletion tests/derive/test_assign_column_to_date_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
[("1966-07-30 15:00:00", "1966-07-30"), (None, None)],
)
def test_convert_to_date(spark_session, expected_data):

expected_schema = "time_example string, date_example string"

expected_df = (
Expand Down
1 change: 0 additions & 1 deletion tests/derive/test_assign_school_year.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,6 @@ def school_year_lookup(spark_session):
# fmt: on
)
def test_assign_school_year(spark_session, expected_data, school_year_lookup):

expected_schema = "visit_date string, dob string, country string, school_year integer"

expected_df = (
Expand Down
1 change: 0 additions & 1 deletion tests/derive/test_at_childcare_coding.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@


def test_childcare_identifier(prepare_regex_test_cases, spark_session):

test_cases = prepare_regex_test_cases(test_data)

expected_df = spark_session.createDataFrame(test_cases, schema="test_case string, in_childcare boolean")
Expand Down
1 change: 0 additions & 1 deletion tests/derive/test_at_school_coding.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@


def test_add_at_school_identifier(prepare_regex_test_cases, spark_session):

at_school_test_cases = prepare_regex_test_cases(test_data)

expected_df = spark_session.createDataFrame(at_school_test_cases, schema="test_case string, at_school boolean")
Expand Down
1 change: 0 additions & 1 deletion tests/derive/test_at_university_coding.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@


def test_add_at_university_identifier(prepare_regex_test_cases, spark_session):

at_university_test_cases = prepare_regex_test_cases(test_data)

expected_df = spark_session.createDataFrame(
Expand Down
1 change: 0 additions & 1 deletion tests/derive/test_derive_from_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@


def test_assign_isin_list(spark_session):

column_names = "pattern string, outcome integer"

expected_df = spark_session.createDataFrame(
Expand Down
1 change: 0 additions & 1 deletion tests/derive/test_derive_regex_match.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
[("A1", True), ("AA", False), ("11", False), ("", False), ("?", False), (None, None)],
)
def test_derive_regex_match(spark_session, expected_data):

expected_schema = "reference_column string, match boolean"
expected_df = spark_session.createDataFrame([expected_data], schema=expected_schema)

Expand Down
1 change: 0 additions & 1 deletion tests/derive/test_flag_records_patient_facing_rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@


def test_patient_facing_identifier(prepare_regex_test_cases, spark_session):

test_cases = prepare_regex_test_cases(test_data)

expected_df = spark_session.createDataFrame(test_cases, schema="test_case string, patient_facing boolean")
Expand Down
1 change: 0 additions & 1 deletion tests/derive/test_furlough_coding.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@


def test_furloughed_identifier(prepare_regex_test_cases, spark_session):

test_cases = prepare_regex_test_cases(test_data)

expected_df = spark_session.createDataFrame(test_cases, schema="test_case string, furloughed boolean")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@


def test_add_at_school_identifier(prepare_regex_test_cases, spark_session):

test_cases = prepare_regex_test_cases(test_data)

expected_df = spark_session.createDataFrame(
Expand Down
1 change: 0 additions & 1 deletion tests/derive/test_not_working_coding.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@


def test_add_not_working_identifier(prepare_regex_test_cases, spark_session):

test_cases = prepare_regex_test_cases(test_data)

expected_df = spark_session.createDataFrame(test_cases, schema="test_case string, is_not_working boolean")
Expand Down
1 change: 0 additions & 1 deletion tests/derive/test_retired_coding.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@


def test_add_retired_identifier(prepare_regex_test_cases, spark_session):

retired_cases = prepare_regex_test_cases(test_data)

expected_df = spark_session.createDataFrame(retired_cases, schema="test_case string, is_retired boolean")
Expand Down
1 change: 0 additions & 1 deletion tests/derive/test_self_employed_coding.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@


def test_add_self_employed_identifier(prepare_regex_test_cases, spark_session):

test_cases = prepare_regex_test_cases(test_data)

expected_df = spark_session.createDataFrame(test_cases, schema="test_case string, is_self_employed boolean")
Expand Down
1 change: 0 additions & 1 deletion tests/derive/test_working_from_home_coding.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@


def test_add_work_from_home_identifier(prepare_regex_test_cases, spark_session):

wfh_cases = prepare_regex_test_cases(test_data)

expected_df = spark_session.createDataFrame(wfh_cases, schema="test_case string, is_working_from_home boolean")
Expand Down
1 change: 0 additions & 1 deletion tests/edit/test_apply_lower_case_to_columns.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@


def test_update_strings_to_sentence_case(spark_session):

input_df = spark_session.createDataFrame(
data=[
("good MORning", "word soup"),
Expand Down
1 change: 0 additions & 1 deletion tests/edit/test_cast_columns_from_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@


def test_re_cast_columns_from_string(spark_session):

schema_input_df = """col_to_cast_1 string,
col_to_cast_2 string,
col_to_cast_3 string"""
Expand Down
1 change: 0 additions & 1 deletion tests/edit/test_clean_work_main_job_role.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@


def test_clean_work_main_job_role(spark_session):

input_df = spark_session.createDataFrame(
data=[
(1, "good&MORning "),
Expand Down
1 change: 0 additions & 1 deletion tests/edit/test_convert_columns_to_timestamps.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
],
)
def test_convert_to_timestamp(spark_session, input_data, column_map):

input_schema = "time_example string, ID string, second_time_example string"

expected_schema = StructType(
Expand Down
1 change: 0 additions & 1 deletion tests/edit/test_translate_column_regex_replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@


def test_translate_column_regex_replace(spark_session):

expected_df = spark_session.createDataFrame(
data=[
# fmt: off
Expand Down
2 changes: 2 additions & 0 deletions tests/validate/test_sparkvalidate.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ def test_sparkvalidate(spark_session):
"non_existent_col": {"contains": "a"},
}
validate_df.validate_column(operations=validation_checks_dict)

# user defined function external definition
def function_add_up_to(error_message, column_1, column_2):
return (F.col(column_1) + F.col(column_2)) < 10, error_message
Expand Down Expand Up @@ -119,6 +120,7 @@ def test_sparkvalidate_multiple_column_checks(spark_session):
)
df_input = df_expected.drop("error")
validate_df = SparkValidate(df_input, "error") # initialise dataframe

# user defined function external definition
def function_add_up_to(error_message, column_1, column_2):
return (F.col(column_1) + F.col(column_2)) < 10, error_message
Expand Down

0 comments on commit 9c217cc

Please sign in to comment.