Skip to content

Commit

Permalink
fixed: formatted with black
Browse files Browse the repository at this point in the history
  • Loading branch information
Casyfill committed Aug 13, 2024
1 parent 03fe530 commit 3976656
Show file tree
Hide file tree
Showing 9 changed files with 78 additions and 27 deletions.
15 changes: 6 additions & 9 deletions dfschema/core/column.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,16 +52,16 @@ def _validate_column_presence(
raise DataFrameValidationError(text)


def _is_string(series: pd.Series, strict:bool=False) -> bool:
'''Check if series is string-like
def _is_string(series: pd.Series, strict: bool = False) -> bool:
"""Check if series is string-like
NOTE: Pandas 2 does not accept object dtype as string;
THis is a workaround.
THis is a workaround.
TODO: explicitly check for object dtype and raise warning
'''
"""
result = pd.api.types.is_string_dtype(series) or pd.isnull(series).all()
if strict:
return result

object_like = pd.api.types.is_object_dtype(series)

return result or object_like
Expand Down Expand Up @@ -227,7 +227,7 @@ class ColSchema(BaseModel):
lt=1.0,
description="limit of missing values. If set to true, will raise if all values are empty. If set to a number, will raise if more than given perecnt of values are empty (Nan)",
alias="na_limit",
alias_priority=2
alias_priority=2,
)

value_limits: Optional[ValueLimits] = Field(
Expand Down Expand Up @@ -264,9 +264,6 @@ def _map_dtype(
warn(f"Unsupported dtype: {self.dtype}")
return None




# abstract dtypes with a corresponding checker
_dtype_test_func = {
"numeric": pd.api.types.is_numeric_dtype,
Expand Down
7 changes: 5 additions & 2 deletions dfschema/core/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,10 @@ def from_df(
"""

schema = generate_schema_dict_from_df(df)
schema["metadata"] = {"protocol_version": 2.0, "version": datetime.now().isoformat()}
schema["metadata"] = {
"protocol_version": 2.0,
"version": datetime.now().isoformat(),
}
subset_schemas = []
if subset_predicates:
for predicate in subset_predicates:
Expand All @@ -296,7 +299,7 @@ def from_df(

if return_dict:
return schema

return cls(**schema)


Expand Down
1 change: 1 addition & 0 deletions dfschema/core/dtype.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
mostly based on pandas.core.dtypes.dtypes.
https://pandas.pydata.org/docs/user_guide/basics.html#basics-dtypes
"""

import sys

if sys.version_info >= (3, 8):
Expand Down
1 change: 1 addition & 0 deletions dfschema/core/generate.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import pandas as pd


def generate_schema_dict_from_df(df: pd.DataFrame) -> dict:
"""
generate Schema object from given dataframe.
Expand Down
54 changes: 50 additions & 4 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ MarkupSafe = "2.0.1"
jupyter = "^1.0.0"
watermark = "^2.3.1"
gitchangelog = "^3.0.4"
black = "^24.8.0"

[build-system]
requires = ["poetry-core>=1.0.0"]
Expand Down
1 change: 0 additions & 1 deletion tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
from pydantic.v1.error_wrappers import ValidationError



def test_schema_objects(good_schema: dict):
from dfschema.core.core import DfSchema

Expand Down
2 changes: 1 addition & 1 deletion tests/test_str_patterns.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def str_df() -> pd.DataFrame:
"z": ["A-1", "A-10", "A-12", None],
}
)

for col in df.columns:
df[col] = df[col].astype("string")
return df
Expand Down
23 changes: 13 additions & 10 deletions tests/test_validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,23 +146,25 @@ def test_validate_df3_raises(df3, schema):
validate(df3, schema)



prediction_good_schemas = [
Path(__file__).parent / "test_schemas/v2/good/v2_predictions.json",
Path(__file__).parent / "test_schemas/v2/good/v2_predictions2.json"
Path(__file__).parent / "test_schemas/v2/good/v2_predictions2.json",
]


@pytest.fixture
def df4():
import pandas as pd
df = pd.DataFrame({
"value": [100000, 200000, 300000, 500000],
"inferred_at": ["2020-01-01", "2020-01-02", "2020-01-03", "2020-01-03"],
"trained_at": ["2020-01-01", "2020-01-02", "2020-01-03", "2020-01-03"],
"model": ["main", "certainty_low", "certainty_hight", "certainty_median"],
"version": ["1.0.0", "1.0.1", "1.0.2", "1.0.3"],
})

df = pd.DataFrame(
{
"value": [100000, 200000, 300000, 500000],
"inferred_at": ["2020-01-01", "2020-01-02", "2020-01-03", "2020-01-03"],
"trained_at": ["2020-01-01", "2020-01-02", "2020-01-03", "2020-01-03"],
"model": ["main", "certainty_low", "certainty_hight", "certainty_median"],
"version": ["1.0.0", "1.0.1", "1.0.2", "1.0.3"],
}
)
for col in ["inferred_at", "trained_at"]:
df[col] = pd.to_datetime(df[col])
return df
Expand All @@ -171,5 +173,6 @@ def df4():
@pytest.mark.parametrize("schema", prediction_good_schemas)
def test_validate_df4(df4, schema):
from dfschema import DfSchema

schema = DfSchema.from_file(schema)
schema.validate_df(df4)
schema.validate_df(df4)

0 comments on commit 3976656

Please sign in to comment.