Merge pull request #189 from alcides/docker-compose

Updated. GeneticEngine to latest version (2024 competition)
cavalab · Oct 14, 2024 · 105888b · 105888b
2 parents 45f770c + dfd6065
commit 105888b
Show file tree

Hide file tree

Showing 10 changed files with 185 additions and 153 deletions.
diff --git a/algorithms/geneticengine/environment.yml b/algorithms/geneticengine/environment.yml
@@ -3,9 +3,9 @@ channels:
   - conda-forge
   - defaults
 dependencies:
-  - python=3.9.12
-  - python_abi=3.9
-  - setuptools=62.1.0
-  - pip=22.0.4
+  - python=3.12.6
+  - setuptools=75.1.0
+  - pip=24.2
+  - dill=0.3.9
   - pip:
-    - geneticengine==0.0.18
+      - geneticengine @ git+https://github.com/alcides/GeneticEngine.git@5e7c4ec8807b8299f92e0347443714e16f22e40f
diff --git a/algorithms/geneticengine/metadata.yml b/algorithms/geneticengine/metadata.yml
@@ -1,16 +1,10 @@
 authors: # the participants
-  - Guilherme Espada, Leon Ingelse, Paulo Santos, Pedro Barbosa, Alcides Fonseca
-email: gjespada@fc.ul.pt, leoningelse@gmail.com, pacsantos@fc.ul.pt, psbarbosa@ciencias.ulisboa.pt, me@alcidesfonseca.com
-name: Genetic Engine 
+  - Alcides Fonseca, Guilherme Espada, Leon Ingelse, Eduardo Madeira
+email: me@alcidesfonseca.com, gjespada@fc.ul.pt, leoningelse@gmail.com, jmadeira@lasige.di.fc.ul.pt
+name: Genetic Engine
 description:
-  Genetic Engine is a framework for using Genetic Programming in different contexts. Genetic Engine allows the user to define trees in terms of Classes and Inheritance, as they would in a regular programming environment. Our framework takes care of generating individuals, mutating them and crossing them over. The user also defines a fitness function that takes a tree and returns a fitness score. This often requires to write (or reuse) a tree interpreter, as it is custom in these types of approaches.
+  GeneticEngine is an extensible framework for implementing evolutionary algorithms. Users can write their own class structure and select the algorithm they want to run. Each algorithm will generate random trees following the class specification.
+  Currently, GeneticEngine supports several representations (tree-based, grammatical evolution, structured grammatical evolution, stack-based), several algorithms (hill-climbing, Genetic Programming, 1+1) and several operators (Tournament, Lexicase, different mutations and crossovers).
+  A distinctive feature of this framework is that it supports dependent types that semantically constraint the tree generation.
 
-  The main different between STGP and GGGP is that the restrictions on what trees are valid are done via types, while in GGGP they are expressed using a grammar. Genetic Engine extracts the grammar from the types and their relationship, allowing to use any technique from GGGP (such as Grammatical Evolution) in STGP.
-
-  The advantages of using STGP are:
-
-  * the user does not need to know grammars, EBNF or any other grammar syntax
-  * there is no need for a textual representation of programs, as trees can be the only representation (à lá lisp).
-  * there is no need for parsing a textual program to a tree, to then interpret the tree
-  * Mutations and Recombination are automatically type-safe, where in a grammar that type-safety is implicit in the structure of the grammar (and hard to reason)
 url: "https://github.com/alcides/GeneticEngine/"
diff --git a/experiment/methods/geneticengine/metadata.yml b/experiment/methods/geneticengine/metadata.yml
@@ -0,0 +1,10 @@
+authors: # the participants
+  - Alcides Fonseca, Guilherme Espada, Leon Ingelse, Eduardo Madeira
+email: me@alcidesfonseca.com, gjespada@fc.ul.pt, leoningelse@gmail.com, jmadeira@lasige.di.fc.ul.pt
+name: Genetic Engine
+description:
+  GeneticEngine is an extensible framework for implementing evolutionary algorithms. Users can write their own class structure and select the algorithm they want to run. Each algorithm will generate random trees following the class specification.
+  Currently, GeneticEngine supports several representations (tree-based, grammatical evolution, structured grammatical evolution, stack-based), several algorithms (hill-climbing, Genetic Programming, 1+1) and several operators (Tournament, Lexicase, different mutations and crossovers).
+  A distinctive feature of this framework is that it supports dependent types that semantically constraint the tree generation.
+
+url: "https://github.com/alcides/GeneticEngine/"
diff --git a/experiment/methods/geneticengine/regressor.py b/experiment/methods/geneticengine/regressor.py
@@ -1,156 +1,46 @@
-from audioop import cross
-from typing import Union
-import geneticengine.off_the_shelf.regressors as gengy_regressors
-from sklearn.base import BaseEstimator, TransformerMixin
-from sklearn.model_selection import GridSearchCV
+# This example submission shows the submission of FEAT (cavalab.org/feat).
+from geml.regressors import GeneticProgrammingRegressor
+from geml.regressors import model
+from sklearn.base import RegressorMixin
 
 """
 est: a sklearn-compatible regressor. 
     if you don't have one they are fairly easy to create. 
     see https://scikit-learn.org/stable/developers/develop.html
 """
+est: RegressorMixin = GeneticProgrammingRegressor(
+    max_time=3,  # 8 hrs. Your algorithm should have this feature
+)
 
-class OptimisedGPRegressor(BaseEstimator, TransformerMixin):
-    def __init__(
-        self,
-        population_size = 50,
-        n_novelties = 10,
-        favor_less_deep_trees = True,
-        random_state = 123,
-        max_time = 100,
-        optimisation_dedicated_proportion = 0.2,
-        slack_time = 0.05,
-    ):
-        self.population_size = population_size
-        self.n_novelties = n_novelties
-        self.favor_less_deep_trees = favor_less_deep_trees
-        self.random_state = random_state
-        self.max_time = max_time
-        self.optimisation_dedicated_proportion = optimisation_dedicated_proportion
-        self.slack_time = slack_time
-        self.model = None
-
-
-    def fit(self, X, y):
-        n_elites = [ 5, 10 ]
-        max_depths = [ 10, 15 ]
-        hill_climbings = [ True, False ]
-        mutation_probs = [ 0.01, 0.05, 0.1, 0.2 ]
-        crossover_probs = [ 0.8, 0.9, 0.95 ]
-
-        CVS = 2
-
-        param_grid_size = len(n_elites) * len(max_depths) * len(hill_climbings) * len(mutation_probs) * len(crossover_probs)
-        param_alloted_time = int((self.max_time * self.optimisation_dedicated_proportion) / (param_grid_size * CVS))
-        if param_alloted_time < 1: # For testing
-            n_elites = [ 5 ]
-            max_depths = [ 10 ]
-            mutation_probs = [ 0.01 ]
-            crossover_probs = [ 0.8 ]
-
-            param_alloted_time = 1
-
-        param_grid: Union[dict, list] = { 
-                                "population_size": [ self.population_size ],
-                                "n_elites": n_elites,
-                                "n_novelties": [ self.n_novelties ],
-                                "max_depth": max_depths,
-                                "favor_less_deep_trees": [ self.favor_less_deep_trees ],
-                                "seed": [ self.random_state ],
-                                "hill_climbing": hill_climbings,
-                                "probability_mutation": mutation_probs,
-                                "probability_crossover": crossover_probs,
-                                "timer_stop_criteria": [ True ],
-                                "timer_limit": [ param_alloted_time ],
-                                "metric": [ 'r2' ],
-                                }
 
-        grid_search = GridSearchCV(gengy_regressors.GeneticProgrammingRegressor(),param_grid,cv=CVS)
-
-        grid_search.fit(X,y)
-        model = grid_search.best_estimator_
-
-        model_alloted_time = int(self.max_time * (1 - self.optimisation_dedicated_proportion - self.slack_time))
-        if "timer_limit" in model.get_params():
-            model.set_params(timer_limit=model_alloted_time)
-
-        model.fit(X,y)
-        self.model = model
-
-        self.sympy_compatible_phenotype = model.sympy_compatible_phenotype
+def get_population(est) -> list[RegressorMixin]:
+    """
+    Return the final population of the model. This final population should
+    be a list with at most 100 individuals. Each of the individuals must
+    be compatible with scikit-learn, so they should have a predict method.
 
-        return model
-
-    def predict(self, X):
-        assert self.model != None
-        y_pred = self.model.predict(X)
-
-        return y_pred 
-
-    def score(self, X, y):
-        self.model.score(X, y)
+    Also, it is expected that the `model()` function can operate with them,
+    so they should have a way of getting a simpy string representation.
 
-
-
+    Returns
+    -------
+    A list of scikit-learn compatible estimators
+    """
 
+    return est.get_population()
 
-est = OptimisedGPRegressor(
-        population_size = 250,
-        n_novelties = 10,
-        favor_less_deep_trees = True,
-                   )
 
-def model(est, X=None):
+def get_best_solution(est) -> RegressorMixin:
     """
-    Return a sympy-compatible string of the final model. 
-    Parameters
-    ----------
-    est: sklearn regressor
-        The fitted model. 
-    X: pd.DataFrame, default=None
-        The training data. This argument can be dropped if desired.
+    Return the best solution from the final model.
+
     Returns
     -------
-    A sympy-compatible string of the final model. 
-    Notes
-    -----
-    Ensure that the variable names appearing in the model are identical to 
-    those in the training data, `X`, which is a `pd.Dataframe`. 
-    If your method names variables some other way, e.g. `[x_0 ... x_m]`, 
-    you can specify a mapping in the `model` function such as:
-        ```
-        def model(est, X):
-            mapping = {'x_'+str(i):k for i,k in enumerate(X.columns)}
-            new_model = est.model_
-            for k,v in mapping.items():
-                new_model = new_model.replace(k,v)
-        ```
-    If you have special operators such as protected division or protected log,
-    you will need to handle these to assure they conform to sympy format. 
-    One option is to replace them with the unprotected versions. Post an issue
-    if you have further questions: 
-    https://github.com/cavalab/srbench/issues/new/choose
+    A scikit-learn compatible estimator
     """
 
-    # Here we replace "|" with "" to handle
-    # protecte sqrt (expressed as sqrt(|.|)) in FEAT) 
-    model_str = est.sympy_compatible_phenotype
-
-    return model_str
+    return est.get_best_solution()
 
-def pre_train_fn(est, X, y): 
-    """set max_time in seconds based on length of X."""
-    slack = 20
-    if len(X)<=1000:
-        max_time = 3600 - slack
-    else:
-        max_time = 36000 - slack
-    est.set_params(max_time=max_time)
-
-# pass the function to eval_kwargs
-eval_kwargs = dict(
-    pre_train=pre_train_fn,
-    test_params={'max_time': 100,
-                 }
-)
 
+# define eval_kwargs.
+eval_kwargs = {}
diff --git a/experiment/methods/geneticengine_1p1/__init__.py b/experiment/methods/geneticengine_1p1/__init__.py
diff --git a/experiment/methods/geneticengine_1p1/regressor.py b/experiment/methods/geneticengine_1p1/regressor.py
@@ -0,0 +1,46 @@
+# This example submission shows the submission of FEAT (cavalab.org/feat).
+from geml.regressors import RandomSearchRegressor
+from geml.regressors import model
+from sklearn.base import RegressorMixin
+
+"""
+est: a sklearn-compatible regressor. 
+    if you don't have one they are fairly easy to create. 
+    see https://scikit-learn.org/stable/developers/develop.html
+"""
+est: RegressorMixin = RandomSearchRegressor(
+    max_time=3,  # 8 hrs. Your algorithm should have this feature
+)
+
+
+def get_population(est) -> list[RegressorMixin]:
+    """
+    Return the final population of the model. This final population should
+    be a list with at most 100 individuals. Each of the individuals must
+    be compatible with scikit-learn, so they should have a predict method.
+
+    Also, it is expected that the `model()` function can operate with them,
+    so they should have a way of getting a simpy string representation.
+
+    Returns
+    -------
+    A list of scikit-learn compatible estimators
+    """
+
+    return est.get_population()
+
+
+def get_best_solution(est) -> RegressorMixin:
+    """
+    Return the best solution from the final model.
+
+    Returns
+    -------
+    A scikit-learn compatible estimator
+    """
+
+    return est.get_best_solution()
+
+
+# define eval_kwargs.
+eval_kwargs = {}
diff --git a/experiment/methods/geneticengine_hc/__init__.py b/experiment/methods/geneticengine_hc/__init__.py
diff --git a/experiment/methods/geneticengine_hc/regressor.py b/experiment/methods/geneticengine_hc/regressor.py
@@ -0,0 +1,46 @@
+# This example submission shows the submission of FEAT (cavalab.org/feat).
+from geml.regressors import HillClimbingRegressor
+from geml.regressors import model
+from sklearn.base import RegressorMixin
+
+"""
+est: a sklearn-compatible regressor. 
+    if you don't have one they are fairly easy to create. 
+    see https://scikit-learn.org/stable/developers/develop.html
+"""
+est: RegressorMixin = HillClimbingRegressor(
+    max_time=3,  # 8 hrs. Your algorithm should have this feature
+)
+
+
+def get_population(est) -> list[RegressorMixin]:
+    """
+    Return the final population of the model. This final population should
+    be a list with at most 100 individuals. Each of the individuals must
+    be compatible with scikit-learn, so they should have a predict method.
+
+    Also, it is expected that the `model()` function can operate with them,
+    so they should have a way of getting a simpy string representation.
+
+    Returns
+    -------
+    A list of scikit-learn compatible estimators
+    """
+
+    return est.get_population()
+
+
+def get_best_solution(est) -> RegressorMixin:
+    """
+    Return the best solution from the final model.
+
+    Returns
+    -------
+    A scikit-learn compatible estimator
+    """
+
+    return est.get_best_solution()
+
+
+# define eval_kwargs.
+eval_kwargs = {}
diff --git a/experiment/methods/geneticengine_rs/__init__.py b/experiment/methods/geneticengine_rs/__init__.py
diff --git a/experiment/methods/geneticengine_rs/regressor.py b/experiment/methods/geneticengine_rs/regressor.py
@@ -0,0 +1,46 @@
+# This example submission shows the submission of FEAT (cavalab.org/feat).
+from geml.regressors import OnePlusOneRegressor
+from geml.regressors import model
+from sklearn.base import RegressorMixin
+
+"""
+est: a sklearn-compatible regressor. 
+    if you don't have one they are fairly easy to create. 
+    see https://scikit-learn.org/stable/developers/develop.html
+"""
+est: RegressorMixin = OnePlusOneRegressor(
+    max_time=3,  # 8 hrs. Your algorithm should have this feature
+)
+
+
+def get_population(est) -> list[RegressorMixin]:
+    """
+    Return the final population of the model. This final population should
+    be a list with at most 100 individuals. Each of the individuals must
+    be compatible with scikit-learn, so they should have a predict method.
+
+    Also, it is expected that the `model()` function can operate with them,
+    so they should have a way of getting a simpy string representation.
+
+    Returns
+    -------
+    A list of scikit-learn compatible estimators
+    """
+
+    return est.get_population()
+
+
+def get_best_solution(est) -> RegressorMixin:
+    """
+    Return the best solution from the final model.
+
+    Returns
+    -------
+    A scikit-learn compatible estimator
+    """
+
+    return est.get_best_solution()
+
+
+# define eval_kwargs.
+eval_kwargs = {}