From e084c041bd4bf0af30aa54ff6fea964d881ee750 Mon Sep 17 00:00:00 2001 From: Niall <89581219+nialldevlin1@users.noreply.github.com> Date: Wed, 20 Sep 2023 11:05:19 +0100 Subject: [PATCH 1/6] Use correct metric name --- src/insight/metrics/metrics_usage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/insight/metrics/metrics_usage.py b/src/insight/metrics/metrics_usage.py index 8bf4bac6..2f028d45 100644 --- a/src/insight/metrics/metrics_usage.py +++ b/src/insight/metrics/metrics_usage.py @@ -128,7 +128,7 @@ def _compute_result( for col in df_old.columns } result = pd.DataFrame( - data=columns_map.values(), index=df_old.columns, columns=["metric_val"] + data=columns_map.values(), index=df_old.columns, columns=[self.name] ) result.name = self._metric.name From 909533cfd1ffd3bed18ffd80d4c89d202ee221e3 Mon Sep 17 00:00:00 2001 From: Niall <89581219+nialldevlin1@users.noreply.github.com> Date: Wed, 20 Sep 2023 11:09:35 +0100 Subject: [PATCH 2/6] self.name in TwoColumnMap summarize_result --- src/insight/metrics/metrics_usage.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/insight/metrics/metrics_usage.py b/src/insight/metrics/metrics_usage.py index 2f028d45..654e6e8a 100644 --- a/src/insight/metrics/metrics_usage.py +++ b/src/insight/metrics/metrics_usage.py @@ -104,14 +104,6 @@ class TwoColumnMap(TwoDataFrameMetric): """Compares columns with the same name from two given dataframes and return a DataFrame with index as the column name and the columns as metric_val""" - def summarize_result(self, result: pd.DataFrame): - """ - Give a single value that summarizes the result of the metric. For TwoColumnMap it is the mean of the results. - Args: - result: the result of the metric computation. - """ - return result["metric_val"].mean(axis=0) - def __init__(self, metric: TwoColumnMetric): self._metric = metric self.name = f"{metric.name}_map" @@ -133,3 +125,11 @@ def _compute_result( result.name = self._metric.name return result + + def summarize_result(self, result: pd.DataFrame): + """ + Give a single value that summarizes the result of the metric. For TwoColumnMap it is the mean of the results. + Args: + result: the result of the metric computation. + """ + return result[self.name].mean(axis=0) From 6a9e0975c7d7692e1a0baf5b9f83aa195d05f8bb Mon Sep 17 00:00:00 2001 From: Niall <89581219+nialldevlin1@users.noreply.github.com> Date: Wed, 20 Sep 2023 11:10:45 +0100 Subject: [PATCH 3/6] Update OneColumnMap --- src/insight/metrics/metrics_usage.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/insight/metrics/metrics_usage.py b/src/insight/metrics/metrics_usage.py index 654e6e8a..33bedff3 100644 --- a/src/insight/metrics/metrics_usage.py +++ b/src/insight/metrics/metrics_usage.py @@ -12,15 +12,6 @@ class OneColumnMap(DataFrameMetric): Mapping of a metric to each column of a dataframe. """ - def summarize_result(self, result: pd.DataFrame): - """ - Give a single value that summarizes the result of the metric. For OneColumnMap it is the mean of the results. - - Args: - result: the result of the metric computation. - """ - return result["metric_val"].mean(axis=0) - def __init__(self, metric: OneColumnMetric): self._metric = metric self.name = f"{metric.name}_map" @@ -33,12 +24,21 @@ def _compute_result(self, df: pd.DataFrame) -> pd.DataFrame: for col in df.columns } result = pd.DataFrame( - data=columns_map.values(), index=df.columns, columns=["metric_val"] + data=columns_map.values(), index=df.columns, columns=[self.name] ) result.name = self._metric.name return result + def summarize_result(self, result: pd.DataFrame): + """ + Give a single value that summarizes the result of the metric. For OneColumnMap it is the mean of the results. + + Args: + result: the result of the metric computation. + """ + return result[self.name].mean(axis=0) + class CorrMatrix(DataFrameMetric): """Computes the correlation between each pair of columns in the given dataframe From 38cc212ea64903c90b157f564eaa4d16332958ec Mon Sep 17 00:00:00 2001 From: Niall <89581219+nialldevlin1@users.noreply.github.com> Date: Wed, 20 Sep 2023 11:38:14 +0100 Subject: [PATCH 4/6] Update two column map test --- tests/test_metrics/test_metrics_usage.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tests/test_metrics/test_metrics_usage.py b/tests/test_metrics/test_metrics_usage.py index b3269fdd..641a8921 100644 --- a/tests/test_metrics/test_metrics_usage.py +++ b/tests/test_metrics/test_metrics_usage.py @@ -31,11 +31,13 @@ def test_two_column_map(data): col_map = TwoColumnMap(emd) emd_map_df = col_map(df1, df2) - assert col_map.name == f'{str(emd)}_map' + expected_column_name = f'{str(emd)}_map' + + assert col_map.name == expected_column_name - assert set(emd_map_df.columns.to_list()) == set(['metric_val']) - assert all(not np.isnan(emd_map_df['metric_val'][cat]) for cat in categorical_cols) - assert all(np.isnan(emd_map_df['metric_val'][cont]) for cont in continuous_cols) + assert set(emd_map_df.columns.to_list()) == set([expected_column_name]) + assert all(not np.isnan(emd_map_df[expected_column_name][cat]) for cat in categorical_cols) + assert all(np.isnan(emd_map_df[expected_column_name][cont]) for cont in continuous_cols) def test_metric_matrix(data): From 3cec9fa32b789ebff54cc9a34d164b1a4d5aebaf Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 20 Sep 2023 10:39:05 +0000 Subject: [PATCH 5/6] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/test_metrics/test_metrics_usage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_metrics/test_metrics_usage.py b/tests/test_metrics/test_metrics_usage.py index 641a8921..2bb580f8 100644 --- a/tests/test_metrics/test_metrics_usage.py +++ b/tests/test_metrics/test_metrics_usage.py @@ -32,7 +32,7 @@ def test_two_column_map(data): col_map = TwoColumnMap(emd) emd_map_df = col_map(df1, df2) expected_column_name = f'{str(emd)}_map' - + assert col_map.name == expected_column_name assert set(emd_map_df.columns.to_list()) == set([expected_column_name]) From 11c800b46a36d5d75f65451fc96a0d0063ec2033 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 19 Dec 2023 14:35:41 +0000 Subject: [PATCH 6/6] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/insight/metrics/metrics_usage.py | 8 ++------ tests/test_metrics/test_metrics_usage.py | 2 +- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/src/insight/metrics/metrics_usage.py b/src/insight/metrics/metrics_usage.py index c4a14569..66f6a758 100644 --- a/src/insight/metrics/metrics_usage.py +++ b/src/insight/metrics/metrics_usage.py @@ -21,9 +21,7 @@ def _compute_result(self, df: pd.DataFrame) -> pd.DataFrame: col: self._metric(df[col], dataset_name=df.attrs.get("name", "") + f"_{col}") for col in df.columns } - result = pd.DataFrame( - data=columns_map.values(), index=df.columns, columns=[self.name] - ) + result = pd.DataFrame(data=columns_map.values(), index=df.columns, columns=[self.name]) result.name = self._metric.name return result @@ -115,9 +113,7 @@ def _compute_result(self, df_old: pd.DataFrame, df_new: pd.DataFrame) -> pd.Data ) for col in df_old.columns } - result = pd.DataFrame( - data=columns_map.values(), index=df_old.columns, columns=[self.name] - ) + result = pd.DataFrame(data=columns_map.values(), index=df_old.columns, columns=[self.name]) result.name = self._metric.name return result diff --git a/tests/test_metrics/test_metrics_usage.py b/tests/test_metrics/test_metrics_usage.py index 3afe4f8f..a5fe6a52 100644 --- a/tests/test_metrics/test_metrics_usage.py +++ b/tests/test_metrics/test_metrics_usage.py @@ -44,7 +44,7 @@ def test_two_column_map(data): col_map = TwoColumnMap(emd) emd_map_df = col_map(df1, df2) - expected_column_name = f'{str(emd)}_map' + expected_column_name = f"{str(emd)}_map" assert col_map.name == expected_column_name assert set(emd_map_df.columns.to_list()) == set([expected_column_name])