diff --git a/src/insight/metrics/metrics_usage.py b/src/insight/metrics/metrics_usage.py index f0740da3..66f6a758 100644 --- a/src/insight/metrics/metrics_usage.py +++ b/src/insight/metrics/metrics_usage.py @@ -12,15 +12,6 @@ class OneColumnMap(DataFrameMetric): Mapping of a metric to each column of a dataframe. """ - def summarize_result(self, result: pd.DataFrame): - """ - Give a single value that summarizes the result of the metric. For OneColumnMap it is the mean of the results. - - Args: - result: the result of the metric computation. - """ - return result["metric_val"].mean(axis=0) - def __init__(self, metric: OneColumnMetric): self._metric = metric self.name = f"{metric.name}_map" @@ -30,11 +21,20 @@ def _compute_result(self, df: pd.DataFrame) -> pd.DataFrame: col: self._metric(df[col], dataset_name=df.attrs.get("name", "") + f"_{col}") for col in df.columns } - result = pd.DataFrame(data=columns_map.values(), index=df.columns, columns=["metric_val"]) + result = pd.DataFrame(data=columns_map.values(), index=df.columns, columns=[self.name]) result.name = self._metric.name return result + def summarize_result(self, result: pd.DataFrame): + """ + Give a single value that summarizes the result of the metric. For OneColumnMap it is the mean of the results. + + Args: + result: the result of the metric computation. + """ + return result[self.name].mean(axis=0) + class CorrMatrix(DataFrameMetric): """Computes the correlation between each pair of columns in the given dataframe @@ -100,14 +100,6 @@ class TwoColumnMap(TwoDataFrameMetric): """Compares columns with the same name from two given dataframes and return a DataFrame with index as the column name and the columns as metric_val""" - def summarize_result(self, result: pd.DataFrame): - """ - Give a single value that summarizes the result of the metric. For TwoColumnMap it is the mean of the results. - Args: - result: the result of the metric computation. - """ - return result["metric_val"].mean(axis=0) - def __init__(self, metric: TwoColumnMetric): self._metric = metric self.name = f"{metric.name}_map" @@ -121,9 +113,15 @@ def _compute_result(self, df_old: pd.DataFrame, df_new: pd.DataFrame) -> pd.Data ) for col in df_old.columns } - result = pd.DataFrame( - data=columns_map.values(), index=df_old.columns, columns=["metric_val"] - ) + result = pd.DataFrame(data=columns_map.values(), index=df_old.columns, columns=[self.name]) result.name = self._metric.name return result + + def summarize_result(self, result: pd.DataFrame): + """ + Give a single value that summarizes the result of the metric. For TwoColumnMap it is the mean of the results. + Args: + result: the result of the metric computation. + """ + return result[self.name].mean(axis=0) diff --git a/tests/test_metrics/test_metrics_usage.py b/tests/test_metrics/test_metrics_usage.py index fd02313a..a5fe6a52 100644 --- a/tests/test_metrics/test_metrics_usage.py +++ b/tests/test_metrics/test_metrics_usage.py @@ -44,11 +44,12 @@ def test_two_column_map(data): col_map = TwoColumnMap(emd) emd_map_df = col_map(df1, df2) - assert col_map.name == f"{str(emd)}_map" + expected_column_name = f"{str(emd)}_map" - assert set(emd_map_df.columns.to_list()) == set(["metric_val"]) - assert all(not np.isnan(emd_map_df["metric_val"][cat]) for cat in categorical_cols) - assert all(np.isnan(emd_map_df["metric_val"][cont]) for cont in continuous_cols) + assert col_map.name == expected_column_name + assert set(emd_map_df.columns.to_list()) == set([expected_column_name]) + assert all(not np.isnan(emd_map_df[expected_column_name][cat]) for cat in categorical_cols) + assert all(np.isnan(emd_map_df[expected_column_name][cont]) for cont in continuous_cols) def test_two_column_map_with_ksd(data): @@ -60,11 +61,12 @@ def test_two_column_map_with_ksd(data): col_map = TwoColumnMap(ksd) ksd_map_df = col_map(df1, df2) - assert col_map.name == f"{str(ksd)}_map" + expected_column_name = f"{str(ksd)}_map" - assert set(ksd_map_df.columns.to_list()) == set(["metric_val"]) - assert all(not np.isnan(ksd_map_df["metric_val"][cat]) for cat in categorical_cols) - assert all(not np.isnan(ksd_map_df["metric_val"][cont]) for cont in continuous_cols) + assert col_map.name == expected_column_name + assert set(ksd_map_df.columns.to_list()) == set([expected_column_name]) + assert all(not np.isnan(ksd_map_df[expected_column_name][cat]) for cat in categorical_cols) + assert all(not np.isnan(ksd_map_df[expected_column_name][cont]) for cont in continuous_cols) def test_metric_matrix(data):