From 4e8bca29215f990c2f7048fccd8e70f001b0f9e0 Mon Sep 17 00:00:00 2001 From: elijahbenizzy Date: Sun, 5 Mar 2023 11:14:28 -0800 Subject: [PATCH] Utilizes proper python types rather than numpy types Numpy switched over. While pyspark still requires an older version it seems, we can upgrade what we have now. --- examples/data_quality/pandera/data_loaders.py | 13 ++++++------- examples/data_quality/simple/data_loaders.py | 13 ++++++------- 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/examples/data_quality/pandera/data_loaders.py b/examples/data_quality/pandera/data_loaders.py index 32ae71206..5ede6482e 100644 --- a/examples/data_quality/pandera/data_loaders.py +++ b/examples/data_quality/pandera/data_loaders.py @@ -11,7 +11,6 @@ """ from typing import List -import numpy as np import pandas as pd from hamilton.function_modifiers import config, extract_columns @@ -62,11 +61,11 @@ def raw_data__base(location: str) -> pd.DataFrame: df.columns = _sanitize_columns(df.columns) # create proper index -- ID-Month-Day; index = ( - df["id"].astype(np.str) + df["id"].astype(str) + "-" - + df["month_of_absence"].astype(np.str) + + df["month_of_absence"].astype(str) + "-" - + df["day_of_the_week"].astype(np.str) + + df["day_of_the_week"].astype(str) ) df.index = index return df @@ -109,11 +108,11 @@ def raw_data__spark(location: str) -> pd.DataFrame: df.columns = _sanitize_columns(df.columns) # create proper index -- ID-Month-Day; index = ( - df["id"].astype(np.str) + df["id"].astype(str) + "-" - + df["month_of_absence"].astype(np.str) + + df["month_of_absence"].astype(str) + "-" - + df["day_of_the_week"].astype(np.str) + + df["day_of_the_week"].astype(str) ) df.index = index df["index_col"] = df.index diff --git a/examples/data_quality/simple/data_loaders.py b/examples/data_quality/simple/data_loaders.py index b1500d0ed..7fce3b703 100644 --- a/examples/data_quality/simple/data_loaders.py +++ b/examples/data_quality/simple/data_loaders.py @@ -9,7 +9,6 @@ """ from typing import List -import numpy as np import pandas as pd from hamilton.function_modifiers import config, extract_columns @@ -60,11 +59,11 @@ def raw_data__base(location: str) -> pd.DataFrame: df.columns = _sanitize_columns(df.columns) # create proper index -- ID-Month-Day; index = ( - df["id"].astype(np.str) + df["id"].astype(str) + "-" - + df["month_of_absence"].astype(np.str) + + df["month_of_absence"].astype(str) + "-" - + df["day_of_the_week"].astype(np.str) + + df["day_of_the_week"].astype(str) ) df.index = index return df @@ -107,11 +106,11 @@ def raw_data__spark(location: str) -> pd.DataFrame: df.columns = _sanitize_columns(df.columns) # create proper index -- ID-Month-Day; index = ( - df["id"].astype(np.str) + df["id"].astype(str) + "-" - + df["month_of_absence"].astype(np.str) + + df["month_of_absence"].astype(str) + "-" - + df["day_of_the_week"].astype(np.str) + + df["day_of_the_week"].astype(str) ) df.index = index df["index_col"] = df.index