Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

84 compare basic fedot vs fedot eith contextual mab warm start #86

Open
wants to merge 39 commits into
base: main
Choose a base branch
from
Open
Changes from 1 commit
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
763581d
remove datasets list from datasets loaders
MorrisNein Nov 14, 2023
50b3ea4
introduce generic typing & index retrieval for dataset data
MorrisNein Nov 16, 2023
2b2b28b
remove meta-features cache, refactor mf extractor interface
MorrisNein Nov 16, 2023
e94f973
minor fixes
MorrisNein Nov 16, 2023
d496bcb
remove redundant constant
MorrisNein Nov 24, 2023
4148123
add persistent_cache.py
MorrisNein Nov 14, 2023
4b9c12a
add logging messages
MorrisNein Nov 15, 2023
869e5e4
finalize persistent_cache.py
MorrisNein Nov 16, 2023
b353401
finalize persistent_cache.py
MorrisNein Nov 16, 2023
ea24a17
create Dockerfile abd .dockerignore
MorrisNein Apr 20, 2023
34591a5
create the experiment script & config
MorrisNein Jul 20, 2023
c4c2680
adapt to #39
MorrisNein Jul 27, 2023
3e5e7bb
add config for debugging
MorrisNein Jul 28, 2023
bbfd898
remove data leak
MorrisNein Oct 12, 2023
e4fd2ff
persist train/test datasets split
MorrisNein Oct 12, 2023
bd9697a
add final choices to the best models
MorrisNein Oct 12, 2023
b765671
add FedotHistoryLoader
MorrisNein Oct 22, 2023
dbfdfb4
add MetaLearningApproach and its children
MorrisNein Oct 22, 2023
24f33aa
set TMPDIR from script
MorrisNein Nov 3, 2023
274da9a
simplify MetaLearningApproach
MorrisNein Nov 4, 2023
5a4a9e4
set logging level of FEDOT
MorrisNein Nov 7, 2023
7c00db9
create config_light.yaml
MorrisNein Nov 10, 2023
acdf7a8
add dataset_id to description
MorrisNein Nov 10, 2023
70549e5
fix train/test split
MorrisNein Nov 13, 2023
f3d79c7
fix progress bar
MorrisNein Nov 13, 2023
5a98422
make fit unnecessary for MetaLearningApproach
MorrisNein Nov 14, 2023
02c9af4
fix n_datasets
MorrisNein Nov 14, 2023
4962931
add evaluation caching
MorrisNein Nov 15, 2023
e943c02
split config file
MorrisNein Nov 21, 2023
d4eeb69
add data split
MorrisNein Nov 21, 2023
41148fb
fix types into inner components
MorrisNein Nov 21, 2023
5195176
increase debug fedot timeout
MorrisNein Nov 21, 2023
40feb19
fix knn experiment
MorrisNein Nov 21, 2023
0913354
fix pipeline evaluation, compute fitness on test data
MorrisNein Nov 24, 2023
aa6a03d
refactor
maypink Nov 27, 2023
3ccb77d
add new infrastructure
maypink Nov 30, 2023
b47c9cc
refactor
maypink Dec 4, 2023
bc77eef
add consideration of datasets
maypink Dec 8, 2023
7a9e882
move datasets from train to test
maypink Dec 22, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
introduce generic typing & index retrieval for dataset data
MorrisNein committed Nov 24, 2023
commit 50b3ea4adb975c192552b24b92c48454349017e9
2 changes: 1 addition & 1 deletion meta_automl/data_preparation/dataset/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from .dataset_base import DatasetBase, DatasetData, DatasetIDType, TimeSeriesData
from .dataset_base import DatasetBase, DatasetData, DatasetIDType, TabularData, TimeSeriesData
from .custom_dataset import CustomDataset, DataNotFoundError
from .openml_dataset import OpenMLDataset, OpenMLDatasetIDType
from .time_series_dataset import TimeSeriesDataset
3 changes: 1 addition & 2 deletions meta_automl/data_preparation/dataset/custom_dataset.py
Original file line number Diff line number Diff line change
@@ -4,8 +4,7 @@
from pathlib import Path
from typing import Optional

from meta_automl.data_preparation.dataset import DatasetBase
from meta_automl.data_preparation.dataset.dataset_base import DatasetData
from meta_automl.data_preparation.dataset import DatasetBase, DatasetData


class DataNotFoundError(FileNotFoundError):
38 changes: 28 additions & 10 deletions meta_automl/data_preparation/dataset/dataset_base.py
Original file line number Diff line number Diff line change
@@ -1,44 +1,62 @@
from __future__ import annotations

from abc import ABC, abstractmethod
from copy import copy
from dataclasses import dataclass
from pathlib import Path
from typing import Any, List, Optional
from typing import Generic, Hashable, List, Optional, TypeVar

import numpy as np

from meta_automl.data_preparation.file_system import CacheOperator, get_dataset_cache_path

DatasetIDType = Any


@dataclass
class DatasetData:
dataset: DatasetBase
x: np.array
y: Optional[np.array] = None

@property
def id(self):
return self.dataset.id

def __getitem__(self, item):
other = copy(self)
if self.y is not None:
other.y = self.y[item]
other.x = self.x[item]
return other


@dataclass
class TabularData(DatasetData):
categorical_indicator: Optional[List[bool]] = None
attribute_names: Optional[List[str]] = None


@dataclass
class TimeSeriesData:
x: np.array
class TimeSeriesData(DatasetData):
# time series has already split
y: np.array
forecast_length: int = 1


class DatasetBase(ABC, CacheOperator):
DatasetDataType_co = TypeVar('DatasetDataType_co', bound=DatasetData, covariant=True)

DatasetIDType = TypeVar('DatasetIDType', bound=Hashable)


class DatasetBase(Generic[DatasetDataType_co], CacheOperator, ABC):

def __init__(self, id_: DatasetIDType, name: Optional[str] = None):
self.id_ = id_
self.id = id_
self.name = name

def __repr__(self):
return f'{self.__class__.__name__}(id_={self.id_}, name={self.name})'
return f'{self.__class__.__name__}(id_={self.id}, name={self.name})'

@abstractmethod
def get_data(self) -> DatasetData:
def get_data(self) -> DatasetDataType_co:
raise NotImplementedError()

@property
11 changes: 5 additions & 6 deletions meta_automl/data_preparation/dataset/openml_dataset.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
from __future__ import annotations

from typing import Union
from typing import TypeVar, Union

import openml

from meta_automl.data_preparation.dataset import DatasetBase
from meta_automl.data_preparation.dataset.dataset_base import DatasetData
from meta_automl.data_preparation.dataset import DatasetBase, TabularData
from meta_automl.data_preparation.file_system import update_openml_cache_dir

OpenMLDatasetIDType = int
OpenMLDatasetIDType = TypeVar('OpenMLDatasetIDType', bound=int)

update_openml_cache_dir()

@@ -33,8 +32,8 @@ def from_search(cls, id_: Union[OpenMLDatasetIDType, str], **get_dataset_kwargs)
**get_dataset_kwargs)
return cls(openml_dataset.id)

def get_data(self) -> DatasetData:
def get_data(self) -> TabularData:
X, y, categorical_indicator, attribute_names = self._openml_dataset.get_data(
target=self._openml_dataset.default_target_attribute
)
return DatasetData(X, y, categorical_indicator, attribute_names)
return TabularData(self, X, y, categorical_indicator, attribute_names)
8 changes: 4 additions & 4 deletions meta_automl/data_preparation/dataset/time_series_dataset.py
Original file line number Diff line number Diff line change
@@ -2,8 +2,8 @@

import pandas as pd

from meta_automl.data_preparation.dataset import DatasetIDType
from meta_automl.data_preparation.dataset.dataset_base import DatasetBase, TimeSeriesData
from meta_automl.data_preparation.dataset import DatasetIDType, TimeSeriesData
from meta_automl.data_preparation.dataset.dataset_base import DatasetBase
from meta_automl.data_preparation.file_system import get_project_root


@@ -18,8 +18,8 @@ def __init__(self, id_: DatasetIDType, forecast_length: int = 1, custom_path=Non
self.path_to_knowledge_base = Path(get_project_root(), 'data', 'knowledge_base_time_series_0', 'datasets')

def get_data(self) -> TimeSeriesData:
path_to_series = Path(self.path_to_knowledge_base, self.id_, 'data.csv')
path_to_series = Path(self.path_to_knowledge_base, self.id, 'data.csv')
series = pd.read_csv(path_to_series)['value'].values
x = series[:-self.forecast_length]
y = series[-self.forecast_length:]
return TimeSeriesData(x, y, self.forecast_length)
return TimeSeriesData(self, x, y, self.forecast_length)
10 changes: 6 additions & 4 deletions meta_automl/data_preparation/datasets_loaders/datasets_loader.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,24 @@
from __future__ import annotations

from abc import ABC, abstractmethod
from typing import List, Sequence
from typing import Generic, List, Sequence, TypeVar

from meta_automl.data_preparation.dataset import DatasetBase, DatasetIDType

DatasetType_co = TypeVar('DatasetType_co', bound=DatasetBase, covariant=True)

class DatasetsLoader(ABC):

def load(self, dataset_ids: Sequence[DatasetIDType]) -> List[DatasetBase]:
class DatasetsLoader(Generic[DatasetType_co], ABC):

def load(self, dataset_ids: Sequence[DatasetIDType]) -> List[DatasetType_co]:
datasets = []
for dataset_id in dataset_ids:
dataset = self.load_single(dataset_id)
datasets.append(dataset)
return datasets

@abstractmethod
def load_single(self, *args, **kwargs) -> DatasetBase:
def load_single(self, *args, **kwargs) -> DatasetType_co:
raise NotImplementedError()

@property