Skip to content

Commit

Permalink
Fix first stages of experiments
Browse files Browse the repository at this point in the history
  • Loading branch information
azoz01 committed Aug 3, 2024
1 parent f6fe864 commit d07ac98
Show file tree
Hide file tree
Showing 14 changed files with 130 additions and 245 deletions.
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,6 @@ errors
results
d2v*
!d2v*.py
.venv/
.venv/
dist/
wsmf.egg-info/
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,9 @@ export PYTHONPATH=`pwd`
### Loading data
```
python bin/load_data.py
```

### Release
```
python -m build
```
5 changes: 4 additions & 1 deletion bin/load_data/download_data_from_openml.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import json
import warnings
from operator import itemgetter

import pytorch_lightning as pl
Expand All @@ -12,8 +13,10 @@
)
from experiments_engine.paths import paths_provider

warnings.simplefilter("ignore")

def main():

def main() -> None:
pl.seed_everything(123)
logger.info("Loading tasks ids")
with open(paths_provider.tasks_ids_path, "r") as f:
Expand Down
69 changes: 0 additions & 69 deletions bin/load_data/generate_synthetic_data.py

This file was deleted.

13 changes: 11 additions & 2 deletions bin/metatrain_preparation/calculate_metafeatures.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def calculate_meta_features_record_from_path(
return ft # type: ignore


def main():
def main() -> None:
logger.info("Calculating meta-features")
paths = list(sorted(paths_provider.datasets_splitted_path.iterdir()))
with Pool(14) as p:
Expand All @@ -57,7 +57,16 @@ def main():
logger.info("Processing meta-features")
meta_features_df = pd.concat(meta_features)
meta_features_df = meta_features_df.drop(
columns=["num_to_cat", "sd_ratio", "lh_trace", "roy_root"]
columns=[
"num_to_cat",
"sd_ratio",
"lh_trace",
"roy_root",
"can_cor.mean",
"nr_disc",
"p_trace",
"w_lambda",
]
)
meta_features_df.iloc[:, :-1] = meta_features_df.iloc[:, :-1].fillna(
meta_features_df.iloc[:, :-1].mean(axis=0)
Expand Down
2 changes: 1 addition & 1 deletion bin/metatrain_preparation/perform_hpo_on_all_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from experiments_engine.utils import extract_dataset_name_from_path


def main():
def main() -> None:
objectives_to_evaluate = [XGBoostObjective]
erroneous_datasets = []
for dataset_path in (
Expand Down
135 changes: 0 additions & 135 deletions bin/metatrain_preparation/select_hp_for_portfolio_half_random.py

This file was deleted.

1 change: 1 addition & 0 deletions data/openml/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
**.parquet
18 changes: 18 additions & 0 deletions data/openml/prohibited_datasets.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
[
"Amazon_employee_access",
"DRSongsLyrics",
"StackOverflow-polarity",
"StackOverflow-polarity-test",
"SyskillWebert-Bands",
"SyskillWebert-BioMedical",
"SyskillWebert-Sheep",
"Traffic_violations",
"breastTumor",
"credit-g",
"kick",
"lymph",
"okcupid-stem",
"online_shoppers",
"primary-tumor",
"usp05-ft"
]
1 change: 1 addition & 0 deletions data/openml/tasks.json

Large diffs are not rendered by default.

38 changes: 38 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
[build-system]
requires = ["setuptools", "setuptools-scm"]
build-backend = "setuptools.build_meta"

[tool.setuptools.packages.find]
include = ["wsmf"]

[project]
name = "wsmf"
version = "0.0.1"
authors = [
{ name="Antoni Zajko", email="antoni.zajko.1@gmail.com" }
]
description = "Package with encoder-based approaches to warm-starting Bayesian Hyperparameter Optimization."
readme = "README.md"
requires-python = ">=3.10"
classifiers = [
"Programming Language :: Python :: 3",
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
]

dependencies = [
"dataset2vec>=1.0.0",
"openml>=0.14.2",
"loguru>=0.7.2",
"optuna>=3.6.1",
"xgboost>=2.0.3",
"scikit-learn>=1.4.2",
"pytest>=8.2.1",
"pymfe>=0.4.3",
"seaborn>=0.13.2",
"tensorboard>=2.17.0",
"numpy>=1.26.4",
]

[project.urls]
"Homepage" = "https://github.com/azoz01/wsmf"
4 changes: 3 additions & 1 deletion requirements_dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,6 @@ numpy==1.26.4
isort==5.13.2
black==24.8.0
flake8==7.1.0
mypy==1.11.1
mypy==1.11.1
twine==5.0.0
build==1.2.1
Loading

0 comments on commit d07ac98

Please sign in to comment.