Skip to content

Commit

Permalink
fix n_outliers property
Browse files Browse the repository at this point in the history
  • Loading branch information
tvdboom committed Aug 28, 2023
1 parent d15e5e6 commit c0ae0d5
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 6 deletions.
3 changes: 1 addition & 2 deletions atom/atom.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,8 +274,7 @@ def n_outliers(self) -> int | None:
"""Number of samples in the training set containing outliers."""
if not is_sparse(self.X):
data = self.train.select_dtypes(include=["number"])
z_scores = (np.abs(stats.zscore(data.values.astype(float))) > 3)
return (z_scores.abs() > 3).any(axis=1).sum()
return (np.abs(stats.zscore(data.values.astype(float))) > 3).any(axis=1).sum()

@property
def classes(self) -> pd.DataFrame | None:
Expand Down
6 changes: 2 additions & 4 deletions atom/basetransformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -756,8 +756,6 @@ def _no_data_sets(
else:
test_size = self.test_size

splitter = self._get_est_class("train_test_split", "model_selection")

try:
# Define holdout set size
if self.holdout_size:
Expand All @@ -773,7 +771,7 @@ def _no_data_sets(
f"got {self.holdout_size}."
)

data, holdout = splitter(
data, holdout = train_test_split(
data,
test_size=holdout_size,
random_state=self.random_state,
Expand All @@ -784,7 +782,7 @@ def _no_data_sets(
else:
holdout = None

train, test = splitter(
train, test = train_test_split(
data,
test_size=test_size,
random_state=self.random_state,
Expand Down

0 comments on commit c0ae0d5

Please sign in to comment.