Utilizes proper python types rather than numpy types

Numpy switched over. While pyspark still requires an older version it seems, we can upgrade what we have now.
DAGWorks-Inc · Mar 5, 2023 · 4e8bca2 · 4e8bca2
1 parent 0254a4e
commit 4e8bca2
Show file tree

Hide file tree

Showing 2 changed files with 12 additions and 14 deletions.
diff --git a/examples/data_quality/pandera/data_loaders.py b/examples/data_quality/pandera/data_loaders.py
@@ -11,7 +11,6 @@
 """
 from typing import List
 
-import numpy as np
 import pandas as pd
 
 from hamilton.function_modifiers import config, extract_columns
@@ -62,11 +61,11 @@ def raw_data__base(location: str) -> pd.DataFrame:
     df.columns = _sanitize_columns(df.columns)
     # create proper index -- ID-Month-Day;
     index = (
-        df["id"].astype(np.str)
+        df["id"].astype(str)
         + "-"
-        + df["month_of_absence"].astype(np.str)
+        + df["month_of_absence"].astype(str)
         + "-"
-        + df["day_of_the_week"].astype(np.str)
+        + df["day_of_the_week"].astype(str)
     )
     df.index = index
     return df
@@ -109,11 +108,11 @@ def raw_data__spark(location: str) -> pd.DataFrame:
     df.columns = _sanitize_columns(df.columns)
     # create proper index -- ID-Month-Day;
     index = (
-        df["id"].astype(np.str)
+        df["id"].astype(str)
         + "-"
-        + df["month_of_absence"].astype(np.str)
+        + df["month_of_absence"].astype(str)
         + "-"
-        + df["day_of_the_week"].astype(np.str)
+        + df["day_of_the_week"].astype(str)
     )
     df.index = index
     df["index_col"] = df.index

diff --git a/examples/data_quality/simple/data_loaders.py b/examples/data_quality/simple/data_loaders.py
@@ -9,7 +9,6 @@
 """
 from typing import List
 
-import numpy as np
 import pandas as pd
 
 from hamilton.function_modifiers import config, extract_columns
@@ -60,11 +59,11 @@ def raw_data__base(location: str) -> pd.DataFrame:
     df.columns = _sanitize_columns(df.columns)
     # create proper index -- ID-Month-Day;
     index = (
-        df["id"].astype(np.str)
+        df["id"].astype(str)
         + "-"
-        + df["month_of_absence"].astype(np.str)
+        + df["month_of_absence"].astype(str)
         + "-"
-        + df["day_of_the_week"].astype(np.str)
+        + df["day_of_the_week"].astype(str)
     )
     df.index = index
     return df
@@ -107,11 +106,11 @@ def raw_data__spark(location: str) -> pd.DataFrame:
     df.columns = _sanitize_columns(df.columns)
     # create proper index -- ID-Month-Day;
     index = (
-        df["id"].astype(np.str)
+        df["id"].astype(str)
         + "-"
-        + df["month_of_absence"].astype(np.str)
+        + df["month_of_absence"].astype(str)
         + "-"
-        + df["day_of_the_week"].astype(np.str)
+        + df["day_of_the_week"].astype(str)
     )
     df.index = index
     df["index_col"] = df.index