Skip to content

Commit

Permalink
fix(eda): remove unecessary compute
Browse files Browse the repository at this point in the history
  • Loading branch information
dovahcrow committed Sep 13, 2020
1 parent bad6a87 commit 98c4ab0
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 10 deletions.
10 changes: 7 additions & 3 deletions dataprep/eda/missing/compute/bivariate.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,7 @@
from .common import LABELS, histogram


@staged
def compute_missing_bivariate( # pylint: disable=too-many-locals
def _compute_missing_bivariate( # pylint: disable=too-many-locals
df: DataArray,
x: str,
y: str,
Expand All @@ -32,7 +31,6 @@ def compute_missing_bivariate( # pylint: disable=too-many-locals
# pylint: disable=too-many-arguments
"""Calculate the distribution change on another column y when
the missing values in x is dropped."""
df.compute("nulls")

xloc = df.columns.get_loc(x)
yloc = df.columns.get_loc(y)
Expand Down Expand Up @@ -144,3 +142,9 @@ def compute_missing_bivariate( # pylint: disable=too-many-locals
hist=df_ret, x=x, y=y, meta=meta["y"], visual_type="missing_impact_1v1",
)
return itmdt


# Not using decorator here because jupyter autoreload does not support it.
compute_missing_bivariate = staged( # pylint: disable=invalid-name
_compute_missing_bivariate
)
9 changes: 7 additions & 2 deletions dataprep/eda/missing/compute/nullivariate.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,7 @@
from ...staged import staged


@staged
def compute_missing_nullivariate(
def _compute_missing_nullivariate(
df: DataArray, bins: int
) -> Generator[Any, Any, Intermediate]:
"""Calculate the data for visualizing the plot_missing(df).
Expand Down Expand Up @@ -56,6 +55,12 @@ def compute_missing_nullivariate(
)


# Not using decorator here because jupyter autoreload does not support it.
compute_missing_nullivariate = staged( # pylint: disable=invalid-name
_compute_missing_nullivariate
)


def missing_perc_blockwise(bin_size: int) -> Callable[[np.ndarray], np.ndarray]:
"""Compute the missing percentage in a block."""

Expand Down
13 changes: 8 additions & 5 deletions dataprep/eda/missing/compute/univariate.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,11 @@
from .common import LABELS, histogram


@staged
def compute_missing_univariate( # pylint: disable=too-many-locals
def _compute_missing_univariate( # pylint: disable=too-many-locals
df: DataArray, x: str, bins: int, dtype: Optional[DTypeDef] = None,
) -> Generator[Any, Any, Intermediate]:
"""Calculate the distribution change on other columns when
the missing values in x is dropped."""
df.compute("nulls")

j = df.columns.get_loc(x)

hists = {}
Expand All @@ -46,7 +43,7 @@ def compute_missing_univariate( # pylint: disable=too-many-locals
hist_range = (col0.min(axis=0), col0.max(axis=0))

hists[col_name] = [
histogram(col, dtype=dtype, bins=bins, return_edges=True, range=hist_range,)
histogram(col, dtype=dtype, bins=bins, return_edges=True, range=hist_range)
for col in [col0, col1]
]

Expand Down Expand Up @@ -97,3 +94,9 @@ def compute_missing_univariate( # pylint: disable=too-many-locals
dfs[col_name] = ret_df

return Intermediate(data=dfs, x=x, meta=meta, visual_type="missing_impact_1vn")


# Not using decorator here because jupyter autoreload does not support it.
compute_missing_univariate = staged( # pylint: disable=invalid-name
_compute_missing_univariate
)

0 comments on commit 98c4ab0

Please sign in to comment.