From cff893ae0dbd9c0019a166f56de5e8af5a145d66 Mon Sep 17 00:00:00 2001 From: gAldeia Date: Tue, 15 Oct 2024 17:49:37 -0300 Subject: [PATCH 1/3] Updated FEAT to work with `get_population` --- experiment/methods/feat/regressor.py | 89 +++++++++++++++++++++++++++- 1 file changed, 87 insertions(+), 2 deletions(-) diff --git a/experiment/methods/feat/regressor.py b/experiment/methods/feat/regressor.py index 56fe27f5..5f46e3dc 100644 --- a/experiment/methods/feat/regressor.py +++ b/experiment/methods/feat/regressor.py @@ -1,6 +1,21 @@ # This example submission shows the submission of FEAT (cavalab.org/feat). from feat import FeatRegressor from sklearn.base import BaseEstimator, RegressorMixin +from feat import Feat, FeatRegressor, FeatClassifier + +from sklearn.datasets import load_diabetes, make_blobs +from sklearn.base import clone +from sklearn.pipeline import make_pipeline +from sklearn.metrics.pairwise import rbf_kernel +from sklearn.metrics import r2_score +from sklearn.base import BaseEstimator, RegressorMixin, ClassifierMixin +import unittest +import argparse +import sys +import pandas as pd +import numpy as np +import pickle + """ est: a sklearn-compatible regressor. @@ -19,6 +34,59 @@ ) # want to tune your estimator? wrap it in a sklearn CV class. + +class FeatPopEstimator(RegressorMixin): + """ + FeatPopEstimator is a custom regressor that wraps a fitted FEAT estimator + to call `model` and `predict` from its archive. + + Attributes: + est (object): The fitted FEAT estimator. + id (int): The identifier for the specific model in the estimator's archive. + Methods: + __init__(est, id): + Initializes the FeatPopEstimator with a fitted FEAT estimator + and a model ID. + fit(X, y): + Dummy fit method to set the estimator as fitted. + predict(X): + Prepares the input data and predicts the output using the + model from the estimator's archive. + score(X, y): + Computes the R^2 score of the prediction. + model(): + Retrieves the model equation from the estimator's archive. + """ + def __init__(self, est, id): + self.est = est + self.id = id + + def fit(self, X, y): + self.is_fitted_ = True + + def predict(self, X): + + X = self.est._prep_X(X) + + return self.est.cfeat_.predict_archive(self.id, X) + + def score(self, X, y): + yhat = self.predict(X).flatten() + return r2_score(y,yhat) + + def model(self): + archive = self.est.cfeat_.get_archive(False) + ind = [i for i in archive if i['id']==self.id][0] + + eqn = f"{np.round(ind['ml']['bias'], 5)}" + for eq, w in zip(ind['eqn'].replace('[', '').split(']'), ind['w']): + if str(w)[0]=='-': + eqn = eqn + f'{np.round(float(w), 2)}*{eq}' + else: + eqn = eqn + f'+{np.round(float(w), 2)}*{eq}' + + return eqn + def model(est, X=None) -> str: """ Return a sympy-compatible string of the final model. @@ -57,6 +125,12 @@ def model(est, X): https://github.com/cavalab/srbench/issues/new/choose """ + model_str = None + if isinstance(est, FeatPopEstimator): + model_str = est.model() + else: + model_str = est.cfeat_.get_eqn() + # Here we replace "|" with "" to handle # protecte sqrt (expressed as sqrt(|.|)) in FEAT) model_str = est.cfeat_.get_eqn() @@ -67,6 +141,7 @@ def model(est, X): return model_str + def get_population(est) -> list[RegressorMixin]: """ Return the final population of the model. This final population should @@ -78,11 +153,21 @@ def get_population(est) -> list[RegressorMixin]: Returns ------- - A list of scikit-learn compatible estimators + A list of scikit-learn compatible estimators that can be used for prediction. """ - return [est] + # passing True will return just the front, and False will return final population + archive = est.cfeat_.get_archive(False) + + pop = [] + + # archive contains individuals serialized in json objects. let's get their ids + for ind in archive: + pop.append( + FeatPopEstimator(est, ind['id']) + ) + return pop def get_best_solution(est) -> RegressorMixin: """ From 69e294100ce4f5b5856560e7d515b4918be2da9f Mon Sep 17 00:00:00 2001 From: gAldeia Date: Tue, 15 Oct 2024 18:26:31 -0300 Subject: [PATCH 2/3] Limits population size to 100 --- experiment/methods/feat/regressor.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/experiment/methods/feat/regressor.py b/experiment/methods/feat/regressor.py index 5f46e3dc..52bfec33 100644 --- a/experiment/methods/feat/regressor.py +++ b/experiment/methods/feat/regressor.py @@ -163,10 +163,16 @@ def get_population(est) -> list[RegressorMixin]: # archive contains individuals serialized in json objects. let's get their ids for ind in archive: + # Archive is sorted by complexity pop.append( FeatPopEstimator(est, ind['id']) ) + # Stopping here to avoid too many models + if len(pop) >= 100: + break + + return pop def get_best_solution(est) -> RegressorMixin: From 289cb7cb31edc128c0f5b9b5153a0712871a405c Mon Sep 17 00:00:00 2001 From: gAldeia Date: Tue, 15 Oct 2024 18:37:30 -0300 Subject: [PATCH 3/3] Fixed wrong call of get_eqn from FeatPopEstimators --- experiment/methods/feat/regressor.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/experiment/methods/feat/regressor.py b/experiment/methods/feat/regressor.py index 52bfec33..871e9e87 100644 --- a/experiment/methods/feat/regressor.py +++ b/experiment/methods/feat/regressor.py @@ -132,8 +132,7 @@ def model(est, X): model_str = est.cfeat_.get_eqn() # Here we replace "|" with "" to handle - # protecte sqrt (expressed as sqrt(|.|)) in FEAT) - model_str = est.cfeat_.get_eqn() + # protected sqrt (expressed as sqrt(|.|)) in FEAT) model_str = model_str.replace('|','') # use python syntax for exponents