Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use correct metric name #145

Merged
merged 8 commits into from
Dec 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 19 additions & 21 deletions src/insight/metrics/metrics_usage.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,6 @@ class OneColumnMap(DataFrameMetric):
Mapping of a metric to each column of a dataframe.
"""

def summarize_result(self, result: pd.DataFrame):
"""
Give a single value that summarizes the result of the metric. For OneColumnMap it is the mean of the results.

Args:
result: the result of the metric computation.
"""
return result["metric_val"].mean(axis=0)

def __init__(self, metric: OneColumnMetric):
self._metric = metric
self.name = f"{metric.name}_map"
Expand All @@ -30,11 +21,20 @@ def _compute_result(self, df: pd.DataFrame) -> pd.DataFrame:
col: self._metric(df[col], dataset_name=df.attrs.get("name", "") + f"_{col}")
for col in df.columns
}
result = pd.DataFrame(data=columns_map.values(), index=df.columns, columns=["metric_val"])
result = pd.DataFrame(data=columns_map.values(), index=df.columns, columns=[self.name])

result.name = self._metric.name
return result

def summarize_result(self, result: pd.DataFrame):
"""
Give a single value that summarizes the result of the metric. For OneColumnMap it is the mean of the results.

Args:
result: the result of the metric computation.
"""
return result[self.name].mean(axis=0)


class CorrMatrix(DataFrameMetric):
"""Computes the correlation between each pair of columns in the given dataframe
Expand Down Expand Up @@ -100,14 +100,6 @@ class TwoColumnMap(TwoDataFrameMetric):
"""Compares columns with the same name from two given dataframes and return a DataFrame
with index as the column name and the columns as metric_val"""

def summarize_result(self, result: pd.DataFrame):
"""
Give a single value that summarizes the result of the metric. For TwoColumnMap it is the mean of the results.
Args:
result: the result of the metric computation.
"""
return result["metric_val"].mean(axis=0)

def __init__(self, metric: TwoColumnMetric):
self._metric = metric
self.name = f"{metric.name}_map"
Expand All @@ -121,9 +113,15 @@ def _compute_result(self, df_old: pd.DataFrame, df_new: pd.DataFrame) -> pd.Data
)
for col in df_old.columns
}
result = pd.DataFrame(
data=columns_map.values(), index=df_old.columns, columns=["metric_val"]
)
result = pd.DataFrame(data=columns_map.values(), index=df_old.columns, columns=[self.name])

result.name = self._metric.name
return result

def summarize_result(self, result: pd.DataFrame):
"""
Give a single value that summarizes the result of the metric. For TwoColumnMap it is the mean of the results.
Args:
result: the result of the metric computation.
"""
return result[self.name].mean(axis=0)
18 changes: 10 additions & 8 deletions tests/test_metrics/test_metrics_usage.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,12 @@ def test_two_column_map(data):

col_map = TwoColumnMap(emd)
emd_map_df = col_map(df1, df2)
assert col_map.name == f"{str(emd)}_map"
expected_column_name = f"{str(emd)}_map"

assert set(emd_map_df.columns.to_list()) == set(["metric_val"])
assert all(not np.isnan(emd_map_df["metric_val"][cat]) for cat in categorical_cols)
assert all(np.isnan(emd_map_df["metric_val"][cont]) for cont in continuous_cols)
assert col_map.name == expected_column_name
assert set(emd_map_df.columns.to_list()) == set([expected_column_name])
assert all(not np.isnan(emd_map_df[expected_column_name][cat]) for cat in categorical_cols)
assert all(np.isnan(emd_map_df[expected_column_name][cont]) for cont in continuous_cols)


def test_two_column_map_with_ksd(data):
Expand All @@ -60,11 +61,12 @@ def test_two_column_map_with_ksd(data):

col_map = TwoColumnMap(ksd)
ksd_map_df = col_map(df1, df2)
assert col_map.name == f"{str(ksd)}_map"
expected_column_name = f"{str(ksd)}_map"

assert set(ksd_map_df.columns.to_list()) == set(["metric_val"])
assert all(not np.isnan(ksd_map_df["metric_val"][cat]) for cat in categorical_cols)
assert all(not np.isnan(ksd_map_df["metric_val"][cont]) for cont in continuous_cols)
assert col_map.name == expected_column_name
assert set(ksd_map_df.columns.to_list()) == set([expected_column_name])
assert all(not np.isnan(ksd_map_df[expected_column_name][cat]) for cat in categorical_cols)
assert all(not np.isnan(ksd_map_df[expected_column_name][cont]) for cont in continuous_cols)


def test_metric_matrix(data):
Expand Down
Loading