From 0e14fb821d3eaf16f1a211278b885cc7aef89386 Mon Sep 17 00:00:00 2001 From: NavaSergio Date: Thu, 14 Mar 2024 13:18:37 -0600 Subject: [PATCH] Prueba6 multimetrica --- CompStats/performance.py | 136 +++++++++++++++++++++++++--- CompStats/tests/test_performance.py | 18 +++- 2 files changed, 140 insertions(+), 14 deletions(-) diff --git a/CompStats/performance.py b/CompStats/performance.py index d0bfdb3..0208ee1 100644 --- a/CompStats/performance.py +++ b/CompStats/performance.py @@ -195,8 +195,7 @@ def plot_performance2(results: dict, CI: float=0.05, diccionario_ordenado = {nombre: muestras for nombre, muestras in lista_ordenada} df2 = pd.DataFrame(diccionario_ordenado).melt(var_name=var_name, value_name=value_name) - else: - df2 = statistic_samples + if isinstance(CI, float): ci = lambda x: measurements.CI(x, alpha=CI) f_grid = sns.catplot(df2, x=value_name, y=var_name, @@ -217,20 +216,22 @@ def plot_performance_multiple(results_dict, CI=0.05, capsize=0.2, linestyle='non """ for metric_name, metric_results in results_dict.items(): # Usa catplot para crear y mostrar el gráfico - g = plot_performance2(metric_results, CI=CI) + g = plot_performance2(metric_results['diff'], CI=CI) g.figure.suptitle(metric_name) - plt.show() + # plt.show() -def difference_multiple(results_dict): +def difference_multiple(results_dict, CI: float=0.05,): """ - Calculate performance differences for multiple metrics, returning the vector of differences - to the best performing algorithm for each metric. + Calculate performance differences for multiple metrics, excluding the comparison of the best + with itself. Additionally, identify the best performing algorithm for each metric. :param results_dict: A dictionary where keys are metric names and values are dictionaries. Each sub-dictionary has algorithm names as keys and lists of performance scores as values. :return: A dictionary with the same structure, but where the scores for each algorithm are replaced - by their differences to the scores of the best performing algorithm for that metric. + by their differences to the scores of the best performing algorithm for that metric, + excluding the best performing algorithm comparing with itself. + Also includes the best algorithm name for each metric. """ differences_dict = {} for metric, results in results_dict.items(): @@ -239,14 +240,123 @@ def difference_multiple(results_dict): # Identify the best performing algorithm (highest mean score) best_alg = max(scores_arrays, key=lambda alg: np.mean(scores_arrays[alg])) - - # Calculate differences to the best performing algorithm best_scores = scores_arrays[best_alg] - differences = {alg: best_scores - scores for alg, scores in scores_arrays.items()} - # Store the differences under the current metric - differences_dict[metric] = differences + # Calculate differences to the best performing algorithm, excluding the best from comparing with itself + differences = {alg: best_scores - scores for alg, scores in scores_arrays.items() if alg != best_alg} + + # Calculate Confidence interval for differences to the bet performing algorithm. + CI_differences = {alg: measurements.CI(np.array(scores), alpha=CI) for alg, scores in differences.items()} + + + # Store the differences and the best algorithm under the current metric + differences_dict[metric] = {'best': best_alg, 'diff': differences,'CI':CI_differences} return differences_dict +def plot_difference2(diff_dictionary: dict, + var_name='Comparison', value_name='Difference', + set_refline=True, set_title=True, + hue='Significant', palette=None, + **kwargs): + """Plot the difference in performance with its confidence intervals + + >>> from CompStats import performance, difference, plot_difference + >>> from CompStats.tests.test_performance import DATA + >>> from sklearn.metrics import f1_score + >>> import pandas as pd + >>> df = pd.read_csv(DATA) + >>> score = lambda y, hy: f1_score(y, hy, average='weighted') + >>> perf = performance(df, score=score) + >>> diff = difference(perf) + >>> ins = plot_difference(diff) + """ + + df2 = pd.DataFrame(diff_dictionary['diff']).melt(var_name=var_name, + value_name=value_name) + if hue is not None: + df2[hue] = True + at_least_one = False + for key, (left, _) in diff_dictionary['CI'].items(): + if left < 0: + rows = df2[var_name] == key + df2.loc[rows, hue] = False + at_least_one = True + if at_least_one and palette is None: + palette = ['r', 'b'] + f_grid = plot_performance(df2, var_name=var_name, + value_name=value_name, hue=hue, + palette=palette, + **kwargs) + if set_refline: + f_grid.refline(x=0) + if set_title: + best = diff_dictionary['best'] + f_grid.facet_axis(0, 0).set_title(f'Best: {best}') + return f_grid + + + +def plot_difference_multiple(results_dict, CI=0.05, capsize=0.2, linestyle='none', kind='point', **kwargs): + """ + Create multiple performance plots, one for each performance metric in the results dictionary. + + :param results_dict: A dictionary where keys are metric names and values are dictionaries with algorithm names as keys and lists of scores as values. + :param CI: Confidence interval level for error bars. + :param capsize: Cap size for error bars. + :param linestyle: Line style for the plot. + :param kind: Type of the plot, e.g., 'point', 'bar'. + :param kwargs: Additional keyword arguments for seaborn.catplot. + """ + for metric_name, metric_results in results_dict.items(): + # Usa catplot para crear y mostrar el gráfico + g = plot_difference2(metric_results) + g.figure.suptitle(metric_name) + # plt.show() + + + +def plot_difference_scatter_multiple(results_dict,algorithm: str): + dict = {} + for metric_name, metric_results in results_dict.items(): + # Usa catplot para crear y mostrar el gráfico + g = plot_difference2(metric_results) + g.figure.suptitle(metric_name) + # plt.show() + + + +def plot_scatter_matrix(perf,alg=None): + """ + Generate a scatter plot matrix comparing the performance of the same algorithm + across different metrics contained in the 'perf' dictionary. + + :param perf: A dictionary where keys are metric names and values are dictionaries with algorithm names as keys + and lists of performance scores as values. + """ + # Convertir 'perf' en un DataFrame de pandas para facilitar la manipulación + df_long = pd.DataFrame([ + {"Metric": metric, "Algorithm": alg, "Score": score, "Indice": i} + for metric, alg_scores in perf.items() + for alg, scores in alg_scores.items() + for i, (score) in enumerate(scores) + ]) + + if alg is not None: + df_long = df_long[df_long['Algorithm'] == alg] + + # Crear un DataFrame 'wide' para facilitar la creación de gráficos de dispersión + df_wide = df_long.pivot(index='Indice',columns='Metric',values='Score') + + # Generar la matriz de gráficos de dispersión + sns.pairplot(df_wide, diag_kind='kde', corner=True) + plt.suptitle(alg, y=1.02) + else: + df_wide = df_long.pivot(index=['Algorithm','Indice'],columns='Metric',values='Score') + df_wide = df_wide.reset_index(level=[0]) + sns.pairplot(df_wide, diag_kind='kde',hue="Algorithm", corner=True) + plt.suptitle('Scatter Plot Matrix of Algorithms Performance Across Different Metrics', y=1.02) + plt.show() + + diff --git a/CompStats/tests/test_performance.py b/CompStats/tests/test_performance.py index a56cbbb..36daa44 100644 --- a/CompStats/tests/test_performance.py +++ b/CompStats/tests/test_performance.py @@ -16,7 +16,7 @@ import os from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score import seaborn as sns -from CompStats.performance import performance, plot_performance, difference, plot_difference, all_differences, performance_multiple_metrics, plot_performance2, plot_performance_multiple, difference_multiple +from CompStats.performance import performance, plot_performance, difference, plot_difference, all_differences, performance_multiple_metrics, plot_performance2, plot_performance_multiple, difference_multiple, plot_scatter_matrix DATA = os.path.join(os.path.dirname(__file__), 'data.csv') @@ -73,3 +73,19 @@ def test_performance_multiple_metrics(): assert 'y' not in perf['accuracy_score_'] assert 'INGEOTEC' in perf['accuracy_score_'] + +def test_difference_multiple(): + """Test difference_multiple""" + df = pd.read_csv(DATA) + metrics = [ + {"func": accuracy_score}, + {"func": f1_score, "args": {"average": "macro"}}, + {"func": precision_score, "args": {"average": "macro"}}, + {"func": recall_score, "args": {"average": "macro"}} + ] + perf = performance_multiple_metrics(df, "y", metrics) + diff = difference_multiple(perf) + assert diff['accuracy_score_']['best'] == 'BoW' + assert 'BoW' not in diff['accuracy_score_']['diff'].keys() + # ins = plot_performance_multiple(diff) + # assert isinstance(ins, sns.FacetGrid)