Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve mlflow integration and add more models #1331

Merged
merged 43 commits into from
Aug 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
0b66a07
Add more spark models and improved mlflow integration
thinkall Aug 8, 2024
4a5d58d
Update test_extra_models, setup and gitignore
thinkall Aug 8, 2024
55daa8d
Remove autofe
thinkall Aug 8, 2024
dd9a06b
Remove autofe
thinkall Aug 8, 2024
ba3e8ff
Remove autofe
thinkall Aug 8, 2024
6d393e6
Sync changes in internal
thinkall Aug 8, 2024
a19ce88
Fix test for env without pyspark
thinkall Aug 8, 2024
0be22d7
Fix import errors
thinkall Aug 8, 2024
fa6844a
Fix tests
thinkall Aug 9, 2024
1ebcf29
Fix typos
thinkall Aug 9, 2024
a920007
Fix pytorch-forecasting version
thinkall Aug 9, 2024
adb8a41
Remove internal funcs, rename _mlflow.py
thinkall Aug 9, 2024
ed86f6b
Fix import error
thinkall Aug 9, 2024
81ab7be
Fix dependency
thinkall Aug 9, 2024
25c1416
Fix experiment name setting
thinkall Aug 9, 2024
571fa8a
Fix dependency
thinkall Aug 9, 2024
bc932d0
Update pandas version
thinkall Aug 9, 2024
41494a3
Update pytorch-forecasting version
thinkall Aug 9, 2024
5e32c56
Add warning message for not has_automl
thinkall Aug 11, 2024
87549b2
Fix test errors with nltk 3.8.2
thinkall Aug 11, 2024
6481efa
Don't enable mlflow logging w/o an active run
thinkall Aug 11, 2024
e02b60e
Fix pytorch-forecasting can't be pickled issue
thinkall Aug 11, 2024
19ce4d0
Update pyspark tests condition
thinkall Aug 11, 2024
f795738
Update synapseml
thinkall Aug 11, 2024
6697c7c
Update synapseml
thinkall Aug 11, 2024
45a52f0
Merge branch 'main' into ms-add-spark-models
thinkall Aug 12, 2024
640e80a
No parent run, no logging for OSS
thinkall Aug 12, 2024
87a57f3
Log when autolog is enabled
thinkall Aug 12, 2024
bd8d72f
merge main
thinkall Aug 12, 2024
2c1059c
upgrade code
thinkall Aug 12, 2024
d4a4de2
Merge branch 'main' into ms-add-spark-models
thinkall Aug 12, 2024
836e033
Enable autolog for tune
thinkall Aug 12, 2024
66d602b
Increase time budget for test
thinkall Aug 12, 2024
09416cd
End run before start a new run
thinkall Aug 12, 2024
ff86d03
Update parent run
thinkall Aug 12, 2024
39d6224
Fix import error
thinkall Aug 12, 2024
75bf137
clean up
thinkall Aug 12, 2024
29a3326
Merge branch 'main' into ms-add-spark-models
thinkall Aug 12, 2024
8bb28f8
Merge branch 'ms-add-spark-models' of https://github.com/microsoft/FL…
thinkall Aug 12, 2024
e72ce20
skip macos and win
thinkall Aug 12, 2024
347cca9
Update notes
thinkall Aug 12, 2024
8e9e7af
Update default value of model_history
thinkall Aug 12, 2024
5869c69
Merge branch 'main' into ms-add-spark-models
thinkall Aug 13, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,15 @@ jobs:
pip install -e .
python -c "import flaml"
pip install -e .[test]
- name: On Ubuntu python 3.8, install pyspark 3.2.3
if: matrix.python-version == '3.8' && matrix.os == 'ubuntu-latest'
- name: On Ubuntu python 3.10, install pyspark 3.4.1
if: matrix.python-version == '3.10' && matrix.os == 'ubuntu-latest'
run: |
pip install pyspark==3.2.3
pip install pyspark==3.4.1
pip list | grep "pyspark"
- name: On Ubuntu python 3.11, install pyspark 3.5.1
if: matrix.python-version == '3.11' && matrix.os == 'ubuntu-latest'
run: |
pip install pyspark==3.5.1
pip list | grep "pyspark"
- name: If linux and python<3.11, install ray 2
if: matrix.os == 'ubuntu-latest' && matrix.python-version != '3.11'
Expand All @@ -77,11 +82,6 @@ jobs:
if: matrix.python-version == '3.8' || matrix.python-version == '3.9'
run: |
pip install -e .[vw]
- name: Uninstall pyspark on (python 3.9) or windows
if: matrix.python-version == '3.9' || matrix.os == 'windows-2019'
run: |
# Uninstall pyspark to test env without pyspark
pip uninstall -y pyspark
- name: Test with pytest
if: matrix.python-version != '3.10'
run: |
Expand Down
18 changes: 18 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,24 @@ output/
flaml/tune/spark/mylearner.py
*.pkl

data/
benchmark/pmlb/csv_datasets
benchmark/*.csv

checkpoints/
test/default
test/housing.json
test/nlp/default/transformer_ms/seq-classification.json

flaml/fabric/fanova/_fanova.c
# local config files
*.config.local

local_debug/
patch.diff

# Test things
notebook/lightning_logs/
lightning_logs/
flaml/autogen/extensions/tmp/
test/autogen/my_tmp/
10 changes: 9 additions & 1 deletion flaml/__init__.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,18 @@
import logging

from flaml.automl import AutoML, logger_formatter
try:
from flaml.automl import AutoML, logger_formatter

has_automl = True
except ImportError:
has_automl = False
from flaml.onlineml.autovw import AutoVW
from flaml.tune.searcher import CFO, FLOW2, BlendSearch, BlendSearchTuner, RandomSearch
from flaml.version import __version__

# Set the root logger.
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)

if not has_automl:
logger.warning("flaml.automl is not available. Please install flaml[automl] to enable AutoML functionalities.")
187 changes: 138 additions & 49 deletions flaml/automl/automl.py

Large diffs are not rendered by default.

26 changes: 24 additions & 2 deletions flaml/automl/ml.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from flaml.automl.spark import ERROR as SPARK_ERROR
from flaml.automl.spark import DataFrame, Series, psDataFrame, psSeries
from flaml.automl.task.task import Task
from flaml.automl.time_series import TimeSeriesDataset

try:
from sklearn.metrics import (
Expand All @@ -33,7 +34,6 @@
if SPARK_ERROR is None:
from flaml.automl.spark.metrics import spark_metric_loss_score

from flaml.automl.time_series import TimeSeriesDataset

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -89,6 +89,11 @@
"wer": "min",
}
huggingface_submetric_to_metric = {"rouge1": "rouge", "rouge2": "rouge"}
spark_metric_name_dict = {
"Regression": ["r2", "rmse", "mse", "mae", "var"],
"Binary Classification": ["pr_auc", "roc_auc"],
"Multi-class Classification": ["accuracy", "log_loss", "f1", "micro_f1", "macro_f1"],
}


def metric_loss_score(
Expand Down Expand Up @@ -122,7 +127,7 @@ def metric_loss_score(
import datasets

datasets_metric_name = huggingface_submetric_to_metric.get(metric_name, metric_name.split(":")[0])
metric = datasets.load_metric(datasets_metric_name)
metric = datasets.load_metric(datasets_metric_name, trust_remote_code=True)
metric_mode = huggingface_metric_to_mode[datasets_metric_name]

if metric_name.startswith("seqeval"):
Expand Down Expand Up @@ -334,6 +339,14 @@ def compute_estimator(
if fit_kwargs is None:
fit_kwargs = {}

fe_params = {}
for param, value in config_dic.items():
if param.startswith("fe."):
fe_params[param] = value

for param, value in fe_params.items():
config_dic.pop(param)

estimator_class = estimator_class or task.estimator_class_from_str(estimator_name)
estimator = estimator_class(
**config_dic,
Expand Down Expand Up @@ -401,12 +414,21 @@ def train_estimator(
free_mem_ratio=0,
) -> Tuple[EstimatorSubclass, float]:
start_time = time.time()
fe_params = {}
for param, value in config_dic.items():
if param.startswith("fe."):
fe_params[param] = value

for param, value in fe_params.items():
config_dic.pop(param)

estimator_class = estimator_class or task.estimator_class_from_str(estimator_name)
estimator = estimator_class(
**config_dic,
task=task,
n_jobs=n_jobs,
)

if fit_kwargs is None:
fit_kwargs = {}

Expand Down
Loading
Loading