Skip to content

Commit

Permalink
Utilizes proper python types rather than numpy types
Browse files Browse the repository at this point in the history
Numpy switched over. While pyspark still requires an older version it
seems, we can upgrade what we have now.
  • Loading branch information
elijahbenizzy committed Mar 5, 2023
1 parent 0254a4e commit 4e8bca2
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 14 deletions.
13 changes: 6 additions & 7 deletions examples/data_quality/pandera/data_loaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
"""
from typing import List

import numpy as np
import pandas as pd

from hamilton.function_modifiers import config, extract_columns
Expand Down Expand Up @@ -62,11 +61,11 @@ def raw_data__base(location: str) -> pd.DataFrame:
df.columns = _sanitize_columns(df.columns)
# create proper index -- ID-Month-Day;
index = (
df["id"].astype(np.str)
df["id"].astype(str)
+ "-"
+ df["month_of_absence"].astype(np.str)
+ df["month_of_absence"].astype(str)
+ "-"
+ df["day_of_the_week"].astype(np.str)
+ df["day_of_the_week"].astype(str)
)
df.index = index
return df
Expand Down Expand Up @@ -109,11 +108,11 @@ def raw_data__spark(location: str) -> pd.DataFrame:
df.columns = _sanitize_columns(df.columns)
# create proper index -- ID-Month-Day;
index = (
df["id"].astype(np.str)
df["id"].astype(str)
+ "-"
+ df["month_of_absence"].astype(np.str)
+ df["month_of_absence"].astype(str)
+ "-"
+ df["day_of_the_week"].astype(np.str)
+ df["day_of_the_week"].astype(str)
)
df.index = index
df["index_col"] = df.index
Expand Down
13 changes: 6 additions & 7 deletions examples/data_quality/simple/data_loaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
"""
from typing import List

import numpy as np
import pandas as pd

from hamilton.function_modifiers import config, extract_columns
Expand Down Expand Up @@ -60,11 +59,11 @@ def raw_data__base(location: str) -> pd.DataFrame:
df.columns = _sanitize_columns(df.columns)
# create proper index -- ID-Month-Day;
index = (
df["id"].astype(np.str)
df["id"].astype(str)
+ "-"
+ df["month_of_absence"].astype(np.str)
+ df["month_of_absence"].astype(str)
+ "-"
+ df["day_of_the_week"].astype(np.str)
+ df["day_of_the_week"].astype(str)
)
df.index = index
return df
Expand Down Expand Up @@ -107,11 +106,11 @@ def raw_data__spark(location: str) -> pd.DataFrame:
df.columns = _sanitize_columns(df.columns)
# create proper index -- ID-Month-Day;
index = (
df["id"].astype(np.str)
df["id"].astype(str)
+ "-"
+ df["month_of_absence"].astype(np.str)
+ df["month_of_absence"].astype(str)
+ "-"
+ df["day_of_the_week"].astype(np.str)
+ df["day_of_the_week"].astype(str)
)
df.index = index
df["index_col"] = df.index
Expand Down

0 comments on commit 4e8bca2

Please sign in to comment.