diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index a2be03edb6..3a4e2b6c69 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -30,7 +30,7 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest, macos-latest, windows-2019] - python-version: ["3.8", "3.9", "3.10"] + python-version: ["3.8", "3.9", "3.10", "3.11"] steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} @@ -59,22 +59,22 @@ jobs: run: | pip install pyspark==3.2.3 pip list | grep "pyspark" - - name: If linux, install ray 2 - if: matrix.os == 'ubuntu-latest' + - name: If linux and python<3.11, install ray 2 + if: matrix.os == 'ubuntu-latest' && matrix.python-version != '3.11' run: | pip install "ray[tune]<2.5.0" - - name: If mac, install ray and xgboost 1 - if: matrix.os == 'macOS-latest' + - name: If mac and python 3.10, install ray and xgboost 1 + if: matrix.os == 'macOS-latest' && matrix.python-version == '3.10' run: | pip install -e .[ray] # use macOS to test xgboost 1, but macOS also supports xgboost 2 pip install "xgboost<2" - name: If linux, install prophet on python < 3.9 - if: matrix.os == 'ubuntu-latest' && matrix.python-version < '3.9' + if: matrix.os == 'ubuntu-latest' && matrix.python-version == '3.8' run: | pip install -e .[forecast] - name: Install vw on python < 3.10 - if: matrix.python-version != '3.10' + if: matrix.python-version == '3.8' || matrix.python-version == '3.9' run: | pip install -e .[vw] - name: Uninstall pyspark on (python 3.9) or windows diff --git a/flaml/automl/time_series/ts_data.py b/flaml/automl/time_series/ts_data.py index 3bd9f20fea..95cb91f833 100644 --- a/flaml/automl/time_series/ts_data.py +++ b/flaml/automl/time_series/ts_data.py @@ -26,6 +26,8 @@ class PD: DataFrame = Series = None +# dataclass will remove empty default value even with field(default_factory=lambda: []) +# Change into default=None to place the attr @dataclass class TimeSeriesDataset: train_data: pd.DataFrame @@ -34,10 +36,10 @@ class TimeSeriesDataset: target_names: List[str] frequency: str test_data: pd.DataFrame - time_varying_known_categoricals: List[str] = field(default_factory=lambda: []) - time_varying_known_reals: List[str] = field(default_factory=lambda: []) - time_varying_unknown_categoricals: List[str] = field(default_factory=lambda: []) - time_varying_unknown_reals: List[str] = field(default_factory=lambda: []) + time_varying_known_categoricals: List[str] = field(default=None) + time_varying_known_reals: List[str] = field(default=None) + time_varying_unknown_categoricals: List[str] = field(default=None) + time_varying_unknown_reals: List[str] = field(default=None) def __init__( self, @@ -403,7 +405,7 @@ def fit(self, X: Union[DataFrame, np.array], y): self.cat_columns.append(column) elif X[column].nunique(dropna=True) < 2: self.drop_columns.append(column) - elif X[column].dtype.name == "datetime64[ns]": + elif X[column].dtype.name in ["datetime64[ns]", "datetime64[s]"]: pass # these will be processed at model level, # so they can also be done in the predict method else: diff --git a/setup.py b/setup.py index 7bb35716bb..4193f13327 100644 --- a/setup.py +++ b/setup.py @@ -40,7 +40,7 @@ "xgboost>=0.90,<2.0.0", "scipy>=1.4.1", "pandas>=1.1.4", - "scikit-learn>=0.24", + "scikit-learn>=1.0.0", ], "notebook": [ "jupyter", @@ -51,11 +51,12 @@ "joblib<=1.3.2", ], "test": [ + "jupyter", "lightgbm>=2.3.1", "xgboost>=0.90,<2.0.0", "scipy>=1.4.1", "pandas>=1.1.4", - "scikit-learn>=0.24", + "scikit-learn>=1.0.0", "thop", "pytest>=6.1.1", "coverage>=5.3", @@ -92,7 +93,10 @@ "sympy", "wolframalpha", ], - "catboost": ["catboost>=0.26"], + "catboost": [ + "catboost>=0.26,<1.2; python_version<'3.11'", + "catboost>=0.26,<=1.2.5; python_version>='3.11'", + ], "blendsearch": [ "optuna>=2.8.0,<=3.6.1", "packaging", diff --git a/test/automl/test_forecast.py b/test/automl/test_forecast.py index d1064bd0c5..c305e78ae2 100644 --- a/test/automl/test_forecast.py +++ b/test/automl/test_forecast.py @@ -1,7 +1,9 @@ import datetime +import sys import numpy as np import pandas as pd +import pytest from flaml import AutoML from flaml.automl.task.time_series_task import TimeSeriesTask @@ -495,6 +497,10 @@ def get_stalliion_data(): return data, special_days +@pytest.mark.skipif( + "3.11" in sys.version, + reason="do not run on py 3.11", +) def test_forecast_panel(budget=5): data, special_days = get_stalliion_data() time_horizon = 6 # predict six months @@ -666,7 +672,7 @@ def split_by_date(df: pd.DataFrame, dt: datetime.date): # test_forecast_automl(60) # test_multivariate_forecast_num(5) # test_multivariate_forecast_cat(5) - # test_numpy() + test_numpy() # test_forecast_classification(5) - test_forecast_panel(5) + # test_forecast_panel(5) # test_cv_step() diff --git a/test/tune/test_lexiflow.py b/test/tune/test_lexiflow.py index 0a6d4f02d9..dda6b924cb 100644 --- a/test/tune/test_lexiflow.py +++ b/test/tune/test_lexiflow.py @@ -6,7 +6,11 @@ import torch import torch.nn as nn import torch.nn.functional as F -import torchvision + +try: + import torchvision +except ImportError: + torchvision = None from flaml import tune @@ -35,6 +39,9 @@ def _BraninCurrin(config): def test_lexiflow(): + if torchvision is None: + return False + train_dataset = torchvision.datasets.FashionMNIST( "test/data", train=True,