diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
index 3a4e2b6c69..082eb3face 100644
--- a/.github/workflows/python-package.yml
+++ b/.github/workflows/python-package.yml
@@ -54,10 +54,15 @@ jobs:
           pip install -e .
           python -c "import flaml"
           pip install -e .[test]
-      - name: On Ubuntu python 3.8, install pyspark 3.2.3
-        if: matrix.python-version == '3.8' && matrix.os == 'ubuntu-latest'
+      - name: On Ubuntu python 3.10, install pyspark 3.4.1
+        if: matrix.python-version == '3.10' && matrix.os == 'ubuntu-latest'
         run: |
-          pip install pyspark==3.2.3
+          pip install pyspark==3.4.1
+          pip list | grep "pyspark"
+      - name: On Ubuntu python 3.11, install pyspark 3.5.1
+        if: matrix.python-version == '3.11' && matrix.os == 'ubuntu-latest'
+        run: |
+          pip install pyspark==3.5.1
           pip list | grep "pyspark"
       - name: If linux and python<3.11, install ray 2
         if: matrix.os == 'ubuntu-latest' && matrix.python-version != '3.11'
@@ -77,11 +82,6 @@ jobs:
         if: matrix.python-version == '3.8' || matrix.python-version == '3.9'
         run: |
           pip install -e .[vw]
-      - name: Uninstall pyspark on (python 3.9) or windows
-        if: matrix.python-version == '3.9' || matrix.os == 'windows-2019'
-        run: |
-          # Uninstall pyspark to test env without pyspark
-          pip uninstall -y pyspark
       - name: Test with pytest
         if: matrix.python-version != '3.10'
         run: |
diff --git a/.gitignore b/.gitignore
index 9dc1eea63c..8a3365b203 100644
--- a/.gitignore
+++ b/.gitignore
@@ -163,6 +163,24 @@ output/
 flaml/tune/spark/mylearner.py
 *.pkl
 
+data/
+benchmark/pmlb/csv_datasets
+benchmark/*.csv
+
+checkpoints/
+test/default
+test/housing.json
+test/nlp/default/transformer_ms/seq-classification.json
+
+flaml/fabric/fanova/_fanova.c
 # local config files
 *.config.local
+
+local_debug/
 patch.diff
+
+# Test things
+notebook/lightning_logs/
+lightning_logs/
+flaml/autogen/extensions/tmp/
+test/autogen/my_tmp/
diff --git a/flaml/__init__.py b/flaml/__init__.py
index ab323377fb..8664127e3a 100644
--- a/flaml/__init__.py
+++ b/flaml/__init__.py
@@ -1,6 +1,11 @@
 import logging
 
-from flaml.automl import AutoML, logger_formatter
+try:
+    from flaml.automl import AutoML, logger_formatter
+
+    has_automl = True
+except ImportError:
+    has_automl = False
 from flaml.onlineml.autovw import AutoVW
 from flaml.tune.searcher import CFO, FLOW2, BlendSearch, BlendSearchTuner, RandomSearch
 from flaml.version import __version__
@@ -8,3 +13,6 @@
 # Set the root logger.
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
+
+if not has_automl:
+    logger.warning("flaml.automl is not available. Please install flaml[automl] to enable AutoML functionalities.")
diff --git a/flaml/automl/automl.py b/flaml/automl/automl.py
index 77023987ee..115f9748d0 100644
--- a/flaml/automl/automl.py
+++ b/flaml/automl/automl.py
@@ -7,6 +7,7 @@
 import json
 import logging
 import os
+import random
 import sys
 import time
 from functools import partial
@@ -16,7 +17,7 @@
 
 from flaml import tune
 from flaml.automl.logger import logger, logger_formatter
-from flaml.automl.ml import train_estimator
+from flaml.automl.ml import huggingface_metric_to_mode, sklearn_metric_name_set, spark_metric_name_dict, train_estimator
 from flaml.automl.spark import DataFrame, Series, psDataFrame, psSeries
 from flaml.automl.state import AutoMLState, SearchState
 from flaml.automl.task.factory import task_factory
@@ -45,6 +46,7 @@
 
 try:
     from sklearn.base import BaseEstimator
+    from sklearn.pipeline import Pipeline
 except ImportError:
     BaseEstimator = object
     ERROR = ERROR or ImportError("please install flaml[automl] option to use the flaml.automl package.")
@@ -54,6 +56,14 @@
 except ImportError:
     mlflow = None
 
+try:
+    from flaml.fabric.mlflow import MLflowIntegration, get_mlflow_log_latency, infer_signature, is_autolog_enabled
+
+    internal_mlflow = True
+except ImportError:
+    internal_mlflow = False
+
+
 try:
     from ray import __version__ as ray_version
 
@@ -171,7 +181,7 @@ def custom_metric(
                 'better' only logs configs with better loss than previos iters
                 'all' logs all the tried configs.
             model_history: A boolean of whether to keep the best
-                model per estimator. Make sure memory is large enough if setting to True.
+                model per estimator. Make sure memory is large enough if setting to True. Default False.
             log_training_metric: A boolean of whether to log the training
                 metric for each model.
             mem_thres: A float of the memory size constraint in bytes.
@@ -247,7 +257,10 @@ def custom_metric(
                 search is considered to converge.
             force_cancel: boolean, default=False | Whether to forcely cancel Spark jobs if the
                 search time exceeded the time budget.
-            append_log: boolean, default=False | Whether to directly append the log
+            mlflow_exp_name: str, default=None | The name of the mlflow experiment. This should be specified if
+                enable mlflow autologging on Spark. Otherwise it will log all the results into the experiment of the
+                same name as the basename of main entry file.
+            append_log: boolean, default=False | Whetehr to directly append the log
                 records to the input log file if it exists.
             auto_augment: boolean, default=True | Whether to automatically
                 augment rare classes.
@@ -320,9 +333,7 @@ def custom_metric(
             }
         }
         ```
-            mlflow_logging: boolean, default=True | Whether to log the training results to mlflow.
-                This requires mlflow to be installed and to have an active mlflow run.
-                FLAML will create nested runs.
+            mlflow_logging: boolean, default=True | Whether to log the training results to mlflow. Not valid if mlflow is not installed.
 
         """
         if ERROR:
@@ -331,6 +342,8 @@ def custom_metric(
         self._state = AutoMLState()
         self._state.learner_classes = {}
         self._settings = settings
+        self._automl_user_configurations = settings.copy()
+        self._settings.pop("automl_user_configurations", None)
         # no budget by default
         settings["time_budget"] = settings.get("time_budget", -1)
         settings["task"] = settings.get("task", "classification")
@@ -362,6 +375,7 @@ def custom_metric(
         settings["preserve_checkpoint"] = settings.get("preserve_checkpoint", True)
         settings["early_stop"] = settings.get("early_stop", False)
         settings["force_cancel"] = settings.get("force_cancel", False)
+        settings["mlflow_exp_name"] = settings.get("mlflow_exp_name", None)
         settings["append_log"] = settings.get("append_log", False)
         settings["min_sample_size"] = settings.get("min_sample_size", MIN_SAMPLE_TRAIN)
         settings["use_ray"] = settings.get("use_ray", False)
@@ -377,6 +391,7 @@ def custom_metric(
         settings["mlflow_logging"] = settings.get("mlflow_logging", True)
 
         self._estimator_type = "classifier" if settings["task"] in CLASSIFICATION else "regressor"
+        self.best_run_id = None
 
     def get_params(self, deep: bool = False) -> dict:
         return self._settings.copy()
@@ -475,14 +490,29 @@ def save_best_config(self, filename):
         with open(filename, "w") as f:
             json.dump(best, f)
 
+    @property
+    def supported_metrics(self):
+        """
+        Returns a tuple of supported metrics for the task.
+
+            Returns:
+                    metrics (Tuple): sklearn metrics from sklearn package;
+                                    huggingface metrics from datasets package;
+                                    spark metrics from pyspark package
+
+        """
+
+        return sklearn_metric_name_set, huggingface_metric_to_mode.keys(), spark_metric_name_dict
+
     @property
     def feature_transformer(self):
-        """Returns feature transformer which is used to preprocess data before applying training or inference."""
-        return getattr(self, "_transformer", None)
+        """Returns AutoML Transformer"""
+        data_precessor = getattr(self, "_transformer", None)
+        return data_precessor
 
     @property
     def label_transformer(self):
-        """Returns label transformer which is used to preprocess labels before scoring, and inverse transform labels after inference."""
+        """Returns AutoML label transformer"""
         return getattr(self, "_label_transformer", None)
 
     @property
@@ -779,7 +809,7 @@ def retrain_from_log(
                     max_epochs: int, default = 20 | Maximum number of epochs to run training,
                         only used by TemporalFusionTransformerEstimator.
                     batch_size: int, default = 64 | Batch size for training model, only
-                        used by TemporalFusionTransformerEstimator.
+                        used by TemporalFusionTransformerEstimator and TCNEstimator.
         """
         task = task or self._settings.get("task")
         if isinstance(task, str):
@@ -1201,6 +1231,7 @@ def fit(
         skip_transform=None,
         mlflow_logging=None,
         fit_kwargs_by_estimator=None,
+        mlflow_exp_name=None,
         **fit_kwargs,
     ):
         """Find a model for a given task.
@@ -1294,7 +1325,7 @@ def custom_metric(
                 'all' logs all the tried configs.
             model_history: A boolean of whether to keep the trained best
                 model per estimator. Make sure memory is large enough if setting to True.
-                Default value is False: best_model_for_estimator would return a
+                Default value is False. If False, best_model_for_estimator would return a
                 untrained model for non-best learner.
             log_training_metric: A boolean of whether to log the training
                 metric for each model.
@@ -1380,7 +1411,10 @@ def custom_metric(
             early_stop: boolean, default=False | Whether to stop early if the
                 search is considered to converge.
             force_cancel: boolean, default=False | Whether to forcely cancel the PySpark job if overtime.
-            append_log: boolean, default=False | Whether to directly append the log
+            mlflow_exp_name: str, default=None | The name of the mlflow experiment. This should be specified if
+                enable mlflow autologging on Spark. Otherwise it will log all the results into the experiment of the
+                same name as the basename of main entry file.
+            append_log: boolean, default=False | Whetehr to directly append the log
                 records to the input log file if it exists.
             auto_augment: boolean, default=True | Whether to automatically
                 augment rare classes.
@@ -1465,9 +1499,7 @@ def cv_score_agg_func(val_loss_folds, log_metrics_folds):
             skip_transform: boolean, default=False | Whether to pre-process data prior to modeling.
             mlflow_logging: boolean, default=None | Whether to log the training results to mlflow.
                 Default value is None, which means the logging decision is made based on
-                AutoML.__init__'s mlflow_logging argument.
-                This requires mlflow to be installed and to have an active mlflow run.
-                FLAML will create nested runs.
+                AutoML.__init__'s mlflow_logging argument. Not valid if mlflow is not installed.
             fit_kwargs_by_estimator: dict, default=None | The user specified keywords arguments, grouped by estimator name.
                 For TransformersEstimator, available fit_kwargs can be found from
                 [TrainingArgumentsForAuto](nlp/huggingface/training_args).
@@ -1517,7 +1549,7 @@ def cv_score_agg_func(val_loss_folds, log_metrics_folds):
                     max_epochs: int, default = 20 | Maximum number of epochs to run training,
                         only used by TemporalFusionTransformerEstimator.
                     batch_size: int, default = 64 | Batch size for training model, only
-                        used by TemporalFusionTransformerEstimator.
+                        used by TemporalFusionTransformerEstimator and TCNEstimator.
         """
 
         self._state._start_time_flag = self._start_time_flag = time.time()
@@ -1568,6 +1600,7 @@ def cv_score_agg_func(val_loss_folds, log_metrics_folds):
         )
         early_stop = self._settings.get("early_stop") if early_stop is None else early_stop
         force_cancel = self._settings.get("force_cancel") if force_cancel is None else force_cancel
+        mlflow_exp_name = self._settings.get("mlflow_exp_name") if mlflow_exp_name is None else mlflow_exp_name
         # no search budget is provided?
         no_budget = time_budget < 0 and max_iter is None and not early_stop
         append_log = self._settings.get("append_log") if append_log is None else append_log
@@ -1620,7 +1653,6 @@ def cv_score_agg_func(val_loss_folds, log_metrics_folds):
         self._use_ray = use_ray
         # use the following condition if we have an estimation of average_trial_time and average_trial_overhead
         # self._use_ray = use_ray or n_concurrent_trials > ( average_trial_time + average_trial_overhead) / (average_trial_time)
-
         if self._use_ray is not False:
             import ray
 
@@ -1654,11 +1686,29 @@ def cv_score_agg_func(val_loss_folds, log_metrics_folds):
         self._state.fit_kwargs = fit_kwargs
         custom_hp = custom_hp or self._settings.get("custom_hp")
         self._skip_transform = self._settings.get("skip_transform") if skip_transform is None else skip_transform
-        self._mlflow_logging = self._settings.get("mlflow_logging") if mlflow_logging is None else mlflow_logging
+        self._mlflow_logging = (
+            False
+            if mlflow is None
+            else self._settings.get("mlflow_logging")
+            if mlflow_logging is None
+            else mlflow_logging
+        )
         fit_kwargs_by_estimator = fit_kwargs_by_estimator or self._settings.get("fit_kwargs_by_estimator")
         self._state.fit_kwargs_by_estimator = fit_kwargs_by_estimator.copy()  # shallow copy of fit_kwargs_by_estimator
         self._state.weight_val = sample_weight_val
-
+        self._mlflow_exp_name = mlflow_exp_name
+        self.mlflow_integration = None
+        self.autolog_extra_tag = {
+            "extra_tag.sid": f"flaml_{flaml_version}_{int(time.time())}_{random.randint(1001, 9999)}"
+        }
+        if internal_mlflow and self._mlflow_logging and (mlflow.active_run() or is_autolog_enabled()):
+            try:
+                self.mlflow_integration = MLflowIntegration("automl", mlflow_exp_name, extra_tag=self.autolog_extra_tag)
+                self._mlflow_exp_name = self.mlflow_integration.experiment_name
+                if not (mlflow.active_run() is not None or is_autolog_enabled()):
+                    self.mlflow_integration.only_history = True
+            except KeyError:
+                print("Not in Fabric, Skipped")
         task.validate_data(
             self,
             self._state,
@@ -1723,6 +1773,11 @@ def cv_score_agg_func(val_loss_folds, log_metrics_folds):
         self._min_sample_size_input = min_sample_size
         self._prepare_data(eval_method, split_ratio, n_splits)
 
+        # infer the signature of the input/output data
+        if self.mlflow_integration is not None:
+            self.estimator_signature = infer_signature(self._state.X_train, self._state.y_train)
+            self.pipeline_signature = infer_signature(X_train, y_train, dataframe, label)
+
         # TODO pull this to task as decide_sample_size
         if isinstance(self._min_sample_size, dict):
             self._sample = {
@@ -1821,6 +1876,11 @@ def is_to_reverse_metric(metric, task):
             and (max_iter > 0 or retrain_full is True)
             or max_iter == 1
         )
+        if self.mlflow_integration is not None and all(
+            [self.mlflow_integration.parent_run_id is None, not self.mlflow_integration.only_history]
+        ):
+            # force not retrain if no active run
+            self._state.retrain_final = False
         # add custom learner
         for estimator_name in estimator_list:
             if estimator_name not in self._state.learner_classes:
@@ -1953,6 +2013,8 @@ def is_to_reverse_metric(metric, task):
             )  # NOTE: this is after kwargs is updated to fit_kwargs_by_estimator
             del self._state.groups, self._state.groups_all, self._state.groups_val
         logger.setLevel(old_level)
+        if self.mlflow_integration is not None:
+            self.mlflow_integration.resume_mlflow()
 
     def _search_parallel(self):
         if self._use_ray is not False:
@@ -2049,6 +2111,14 @@ def _search_parallel(self):
 
         if self._use_spark:
             # use spark as parallel backend
+            mlflow_log_latency = (
+                get_mlflow_log_latency(model_history=self._state.model_history) if self.mlflow_integration else 0
+            )
+            (
+                logger.info(f"Estimated mlflow_log_latency: {mlflow_log_latency} seconds.")
+                if mlflow_log_latency > 0
+                else None
+            )
             analysis = tune.run(
                 self.trainable,
                 search_alg=search_alg,
@@ -2061,6 +2131,9 @@ def _search_parallel(self):
                 use_ray=False,
                 use_spark=True,
                 force_cancel=self._force_cancel,
+                mlflow_exp_name=self._mlflow_exp_name,
+                automl_info=(mlflow_log_latency,),  # pass automl info to tune.run
+                extra_tag=self.autolog_extra_tag,
                 # raise_on_failed_trial=False,
                 # keep_checkpoints_num=1,
                 # checkpoint_score_attr="min-val_loss",
@@ -2121,6 +2194,8 @@ def _search_parallel(self):
                     self._search_states[estimator].best_config = config
                 if better or self._log_type == "all":
                     self._log_trial(search_state, estimator)
+                if self.mlflow_integration:
+                    self.mlflow_integration.record_state(self, search_state, estimator)
 
     def _log_trial(self, search_state, estimator):
         if self._training_log:
@@ -2134,36 +2209,6 @@ def _log_trial(self, search_state, estimator):
                 estimator,
                 search_state.sample_size,
             )
-        if self._mlflow_logging and mlflow is not None and mlflow.active_run():
-            with mlflow.start_run(nested=True):
-                mlflow.log_metric("iter_counter", self._track_iter)
-                if (search_state.metric_for_logging is not None) and (
-                    "intermediate_results" in search_state.metric_for_logging
-                ):
-                    for each_entry in search_state.metric_for_logging["intermediate_results"]:
-                        with mlflow.start_run(nested=True):
-                            mlflow.log_metrics(each_entry)
-                            mlflow.log_metric("iter_counter", self._iter_per_learner[estimator])
-                    del search_state.metric_for_logging["intermediate_results"]
-                if search_state.metric_for_logging:
-                    mlflow.log_metrics(search_state.metric_for_logging)
-                mlflow.log_metric("trial_time", search_state.trial_time)
-                mlflow.log_metric("wall_clock_time", self._state.time_from_start)
-                mlflow.log_metric("validation_loss", search_state.val_loss)
-                mlflow.log_params(search_state.config)
-                mlflow.log_param("learner", estimator)
-                mlflow.log_param("sample_size", search_state.sample_size)
-                mlflow.log_metric("best_validation_loss", search_state.best_loss)
-                mlflow.log_param("best_config", search_state.best_config)
-                mlflow.log_param("best_learner", self._best_estimator)
-                mlflow.log_metric(
-                    self._state.metric if isinstance(self._state.metric, str) else self._state.error_metric,
-                    1 - search_state.val_loss
-                    if self._state.error_metric.startswith("1-")
-                    else -search_state.val_loss
-                    if self._state.error_metric.startswith("-")
-                    else search_state.val_loss,
-                )
 
     def _search_sequential(self):
         try:
@@ -2317,9 +2362,18 @@ def _search_sequential(self):
                 verbose=max(self.verbose - 3, 0),
                 use_ray=False,
                 use_spark=False,
+                force_cancel=self._force_cancel,
+                mlflow_exp_name=self._mlflow_exp_name,
+                automl_info=(0,),  # pass automl info to tune.run
+                extra_tag=self.autolog_extra_tag,
             )
             time_used = time.time() - start_run_time
             better = False
+            (
+                logger.debug(f"result in automl: {analysis.trials}, {analysis.trials[-1].last_result}")
+                if analysis.trials
+                else logger.debug("result in automl: [], None")
+            )
             if analysis.trials and analysis.trials[-1].last_result:
                 result = analysis.trials[-1].last_result
                 search_state.update(result, time_used=time_used)
@@ -2382,6 +2436,8 @@ def _search_sequential(self):
                     search_state.trained_estimator.cleanup()
                 if better or self._log_type == "all":
                     self._log_trial(search_state, estimator)
+                if self.mlflow_integration:
+                    self.mlflow_integration.record_state(self, search_state, estimator)
 
                 logger.info(
                     " at {:.1f}s,\testimator {}'s best error={:.4f},\tbest estimator {}'s best error={:.4f}".format(
@@ -2482,6 +2538,12 @@ def _search(self):
             self._training_log.checkpoint()
         self._state.time_from_start = time.time() - self._start_time_flag
         if self._best_estimator:
+            if self.mlflow_integration:
+                self.mlflow_integration.log_automl(self)
+                if mlflow.active_run() is None:
+                    if self.mlflow_integration.parent_run_id is not None and self.mlflow_integration.autolog:
+                        # ensure result of retrain autolog to parent run
+                        mlflow.start_run(run_id=self.mlflow_integration.parent_run_id)
             self._selected = self._search_states[self._best_estimator]
             self.modelcount = sum(search_state.total_iter for search_state in self._search_states.values())
             if self._trained_estimator:
@@ -2618,11 +2680,34 @@ def _search(self):
                         self._best_estimator,
                         state.best_config,
                         self.data_size_full,
+                        is_retrain=True,
                     )
                     logger.info(f"retrain {self._best_estimator} for {retrain_time:.1f}s")
                     state.best_config_train_time = retrain_time
                     if self._trained_estimator:
                         logger.info(f"retrained model: {self._trained_estimator.model}")
+                        if self.best_run_id is not None:
+                            logger.info(f"Best MLflow run name: {self.best_run_name}")
+                            logger.info(f"Best MLflow run id: {self.best_run_id}")
+                        if self.mlflow_integration is not None:
+                            # try log retrained model
+                            if all(
+                                [
+                                    self.mlflow_integration.manual_log,
+                                    not self.mlflow_integration.has_model,
+                                    self.mlflow_integration.parent_run_id is not None,
+                                ]
+                            ):
+                                if mlflow.active_run() is None:
+                                    mlflow.start_run(run_id=self.mlflow_integration.parent_run_id)
+                                self.mlflow_integration.log_model(
+                                    self._trained_estimator.model,
+                                    self.best_estimator,
+                                    signature=self.estimator_signature,
+                                )
+                                self.mlflow_integration.pickle_and_log_automl_artifacts(
+                                    self, self.model, self.best_estimator, signature=self.pipeline_signature
+                                )
                 else:
                     logger.info("not retraining because the time budget is too small.")
 
@@ -2696,3 +2781,7 @@ def _select_estimator(self, estimator_list):
                 q += inv[i] / s
                 if p < q:
                     return estimator_list[i]
+
+    @property
+    def automl_pipeline(self):
+        return None
diff --git a/flaml/automl/ml.py b/flaml/automl/ml.py
index 4f39a09889..bd13d8259e 100644
--- a/flaml/automl/ml.py
+++ b/flaml/automl/ml.py
@@ -13,6 +13,7 @@
 from flaml.automl.spark import ERROR as SPARK_ERROR
 from flaml.automl.spark import DataFrame, Series, psDataFrame, psSeries
 from flaml.automl.task.task import Task
+from flaml.automl.time_series import TimeSeriesDataset
 
 try:
     from sklearn.metrics import (
@@ -33,7 +34,6 @@
 if SPARK_ERROR is None:
     from flaml.automl.spark.metrics import spark_metric_loss_score
 
-from flaml.automl.time_series import TimeSeriesDataset
 
 logger = logging.getLogger(__name__)
 
@@ -89,6 +89,11 @@
     "wer": "min",
 }
 huggingface_submetric_to_metric = {"rouge1": "rouge", "rouge2": "rouge"}
+spark_metric_name_dict = {
+    "Regression": ["r2", "rmse", "mse", "mae", "var"],
+    "Binary Classification": ["pr_auc", "roc_auc"],
+    "Multi-class Classification": ["accuracy", "log_loss", "f1", "micro_f1", "macro_f1"],
+}
 
 
 def metric_loss_score(
@@ -122,7 +127,7 @@ def metric_loss_score(
             import datasets
 
             datasets_metric_name = huggingface_submetric_to_metric.get(metric_name, metric_name.split(":")[0])
-            metric = datasets.load_metric(datasets_metric_name)
+            metric = datasets.load_metric(datasets_metric_name, trust_remote_code=True)
             metric_mode = huggingface_metric_to_mode[datasets_metric_name]
 
             if metric_name.startswith("seqeval"):
@@ -334,6 +339,14 @@ def compute_estimator(
     if fit_kwargs is None:
         fit_kwargs = {}
 
+    fe_params = {}
+    for param, value in config_dic.items():
+        if param.startswith("fe."):
+            fe_params[param] = value
+
+    for param, value in fe_params.items():
+        config_dic.pop(param)
+
     estimator_class = estimator_class or task.estimator_class_from_str(estimator_name)
     estimator = estimator_class(
         **config_dic,
@@ -401,12 +414,21 @@ def train_estimator(
     free_mem_ratio=0,
 ) -> Tuple[EstimatorSubclass, float]:
     start_time = time.time()
+    fe_params = {}
+    for param, value in config_dic.items():
+        if param.startswith("fe."):
+            fe_params[param] = value
+
+    for param, value in fe_params.items():
+        config_dic.pop(param)
+
     estimator_class = estimator_class or task.estimator_class_from_str(estimator_name)
     estimator = estimator_class(
         **config_dic,
         task=task,
         n_jobs=n_jobs,
     )
+
     if fit_kwargs is None:
         fit_kwargs = {}
 
diff --git a/flaml/automl/model.py b/flaml/automl/model.py
index b451f60c59..8d7c15fcce 100644
--- a/flaml/automl/model.py
+++ b/flaml/automl/model.py
@@ -2,6 +2,7 @@
 #  * Copyright (c) FLAML authors. All rights reserved.
 #  * Licensed under the MIT License. See LICENSE file in the
 #  * project root for license information.
+import inspect
 import logging
 import math
 import os
@@ -9,52 +10,41 @@
 import signal
 import sys
 import time
+import warnings
 from contextlib import contextmanager
 from functools import partial
 from typing import Callable, List, Union
 
 import numpy as np
+import sklearn
+from sklearn.dummy import DummyClassifier, DummyRegressor
+from sklearn.ensemble import ExtraTreesClassifier, ExtraTreesRegressor, RandomForestClassifier, RandomForestRegressor
+from sklearn.exceptions import ConvergenceWarning
+from sklearn.linear_model import ElasticNet, LassoLars, LogisticRegression, SGDClassifier, SGDRegressor
+from sklearn.preprocessing import Normalizer
+from sklearn.svm import LinearSVC
+from xgboost import __version__ as xgboost_version
 
 from flaml import tune
-from flaml.automl.data import (
-    group_counts,
-)
+from flaml.automl.data import group_counts
+from flaml.automl.spark import ERROR as SPARK_ERROR
+from flaml.automl.spark import DataFrame, Series, psDataFrame, psSeries, sparkDataFrame
+from flaml.automl.spark.utils import len_labels, to_pandas_on_spark
 from flaml.automl.task.factory import task_factory
-from flaml.automl.task.task import (
-    NLG_TASKS,
-    SEQCLASSIFICATION,
-    SEQREGRESSION,
-    SUMMARIZATION,
-    TOKENCLASSIFICATION,
-    Task,
-)
+from flaml.automl.task.task import NLG_TASKS, SEQCLASSIFICATION, SEQREGRESSION, SUMMARIZATION, TOKENCLASSIFICATION, Task
+
+SKLEARN_VERSION = sklearn.__version__
+
+warnings.filterwarnings("ignore", category=ConvergenceWarning)
 
-try:
-    from sklearn.dummy import DummyClassifier, DummyRegressor
-    from sklearn.ensemble import (
-        ExtraTreesClassifier,
-        ExtraTreesRegressor,
-        RandomForestClassifier,
-        RandomForestRegressor,
-    )
-    from sklearn.linear_model import LogisticRegression
-    from xgboost import __version__ as xgboost_version
-except ImportError:
-    pass
 
 try:
     from scipy.sparse import issparse
 except ImportError:
-    pass
 
-from flaml.automl.spark import ERROR as SPARK_ERROR
-from flaml.automl.spark import DataFrame, Series, psDataFrame, psSeries, sparkDataFrame
-from flaml.automl.spark.configs import (
-    ParamList_LightGBM_Classifier,
-    ParamList_LightGBM_Ranker,
-    ParamList_LightGBM_Regressor,
-)
-from flaml.automl.spark.utils import len_labels, to_pandas_on_spark
+    def issparse(x):
+        return False
+
 
 if DataFrame is not None:
     from pandas import to_datetime
@@ -248,6 +238,8 @@ def fit(self, X_train, y_train, budget=None, free_mem_ratio=0, **kwargs):
         Returns:
             train_time: A float of the training time in seconds.
         """
+        if "is_retrain" in kwargs:
+            kwargs.pop("is_retrain")
         if (
             getattr(self, "limit_resource", None)
             and resource is not None
@@ -461,6 +453,8 @@ def fit(
         Returns:
             train_time: A float of the training time in seconds.
         """
+        if "is_retrain" in kwargs:
+            kwargs.pop("is_retrain")
         df_train, label_col = self._preprocess(X_train, y_train, index_col=index_col, return_label=True)
         kwargs["labelCol"] = label_col
         train_time = self._fit(df_train, **kwargs)
@@ -471,11 +465,10 @@ def _fit(self, df_train: sparkDataFrame, **kwargs):
         pipeline_model = self.estimator_class(**self.params, **kwargs)
         if logger.level == logging.DEBUG:
             logger.debug(f"flaml.automl.model - {pipeline_model} fit started with params {self.params}")
-        pipeline_model.fit(df_train)
+        self._model = pipeline_model.fit(df_train)
         if logger.level == logging.DEBUG:
             logger.debug(f"flaml.automl.model - {pipeline_model} fit finished")
         train_time = time.time() - current_time
-        self._model = pipeline_model
         return train_time
 
     def predict(self, X, index_col="tmp_index_col", return_all=False, **kwargs):
@@ -527,6 +520,13 @@ class j.
             logger.warning("Estimator is not fit yet. Please run fit() before predict().")
             return np.ones(X.shape[0])
 
+    @property
+    def estimator_params(self):
+        if hasattr(self, "estimator_class") and self.estimator_class is not None:
+            return list(inspect.signature(self.estimator_class).parameters.keys())
+        else:
+            return []
+
 
 class SparkLGBMEstimator(SparkEstimator):
     """The class for fine-tuning spark version lightgbm models, using SynapseML API."""
@@ -602,7 +602,6 @@ def __init__(self, task="binary", **config):
                 raise ImportError(err_msg)
 
             self.estimator_class = LightGBMRegressor
-            self.estimator_params = ParamList_LightGBM_Regressor
         elif "rank" == task:
             try:
                 from synapse.ml.lightgbm import LightGBMRanker
@@ -610,7 +609,6 @@ def __init__(self, task="binary", **config):
                 raise ImportError(err_msg)
 
             self.estimator_class = LightGBMRanker
-            self.estimator_params = ParamList_LightGBM_Ranker
         else:
             try:
                 from synapse.ml.lightgbm import LightGBMClassifier
@@ -618,7 +616,6 @@ def __init__(self, task="binary", **config):
                 raise ImportError(err_msg)
 
             self.estimator_class = LightGBMClassifier
-            self.estimator_params = ParamList_LightGBM_Classifier
         self._time_per_iter = None
         self._train_size = 0
         self._mem_per_iter = -1
@@ -634,6 +631,8 @@ def fit(
         index_col="tmp_index_col",
         **kwargs,
     ):
+        if "is_retrain" in kwargs:
+            kwargs.pop("is_retrain")
         start_time = time.time()
         if self.model_n_classes_ is None and self._task not in ["regression", "rank"]:
             self.model_n_classes_, self.model_classes_ = len_labels(y_train, return_labels=True)
@@ -703,6 +702,8 @@ def fit(
 
     def _fit(self, df_train: sparkDataFrame, **kwargs):
         current_time = time.time()
+        if "dataTransferMode" not in kwargs:
+            kwargs["dataTransferMode"] = "bulk"
         model = self.estimator_class(**self.params, **kwargs)
         if logger.level == logging.DEBUG:
             logger.debug(f"flaml.automl.model - {model} fit started with params {self.params}")
@@ -715,6 +716,138 @@ def _fit(self, df_train: sparkDataFrame, **kwargs):
         return train_time
 
 
+class SparkRandomForestEstimator(SparkEstimator):
+    """The SparkEstimator class for Random Forest."""
+
+    nrows = 101
+    ITER_HP = "maxIter"
+
+    @classmethod
+    def search_space(cls, data_size, task, **params):
+        SparkRandomForestEstimator.nrows = int(data_size[0])
+        upper = min(2048, SparkRandomForestEstimator.nrows)
+        init = 1 / np.sqrt(data_size[1]) if task.is_classification() else 1
+        lower = min(0.1, init)
+        # upper = max(5, min(32768, int(data_size[0])))  # upper must be larger than lower
+
+        space = {
+            "numTrees": {
+                "domain": tune.lograndint(lower=4, upper=max(5, upper)),
+                "init_value": 4,
+                "low_cost_init_value": 4,
+            },
+            "featureSubsetStrategy": {
+                "domain": tune.loguniform(lower=lower, upper=1.0),
+                "init_value": init,
+            },
+            "maxDepth": {
+                "domain": tune.lograndint(
+                    lower=4,
+                    upper=max(5, min(32768, SparkRandomForestEstimator.nrows >> 1)),  #
+                ),
+                "init_value": 4,
+                "low_cost_init_value": 4,
+            },
+        }
+
+        if task.is_classification():
+            space["impurity"] = {
+                "domain": tune.choice(["gini", "entropy"]),
+                # "init_value": "gini",
+            }
+
+        return space
+
+    def __init__(self, task="classification", **config):
+        super().__init__(task, **config)
+        if "verbose" in self.params:
+            self.params.pop("verbose")
+        if "n_jobs" in self.params:
+            self.params.pop("n_jobs")
+        if self._task.is_classification():
+            from pyspark.ml.classification import RandomForestClassifier
+
+            self.estimator_class = RandomForestClassifier
+        else:
+            from pyspark.ml.regression import RandomForestRegressor
+
+            self.estimator_class = RandomForestRegressor
+
+        self._task = task
+        self._model = None
+        self._time_per_iter = None
+        self._train_size = 0
+        self._mem_per_iter = -1
+        self.model_classes_ = None
+        self.model_n_classes_ = None
+
+    def fit(
+        self,
+        X_train,
+        y_train=None,
+        budget=None,
+        free_mem_ratio=0,
+        index_col="tmp_index_col",
+        **kwargs,
+    ):
+        if "is_retrain" in kwargs:
+            kwargs.pop("is_retrain")
+        start_time = time.time()
+        if self.model_n_classes_ is None and self._task not in ["regression", "rank"]:
+            self.model_n_classes_, self.model_classes_ = len_labels(y_train, return_labels=True)
+        df_train, label_col = self._preprocess(X_train, y_train, index_col=index_col, return_label=True)
+        _kwargs = kwargs.copy()
+        # TODO: update regression model and rank model, update ParamList_LightGBM_
+        if self._task not in ["regression", "rank"]:
+            if "objective" not in _kwargs:
+                _kwargs["objective"] = "binary" if self.model_n_classes_ == 2 else "multiclass"
+        for k in list(_kwargs.keys()):
+            if k not in self.estimator_params:
+                _kwargs.pop(k)
+        self.params["featureSubsetStrategy"] = str(self.params["featureSubsetStrategy"])
+        _kwargs["labelCol"] = label_col
+        self._fit(df_train, **_kwargs)
+        train_time = time.time() - start_time
+        return train_time
+
+    def _fit(self, df_train: sparkDataFrame, **kwargs):
+        current_time = time.time()
+        model = self.estimator_class(**self.params, **kwargs)
+        if logger.level == logging.DEBUG:
+            logger.debug(f"flaml.automl.model - {model} fit started with params {self.params}")
+        self._model = model.fit(df_train)
+        self._model.classes_ = self.model_classes_
+        self._model.n_classes_ = self.model_n_classes_
+        if logger.level == logging.DEBUG:
+            logger.debug(f"flaml.automl.model - {model} fit finished")
+        train_time = time.time() - current_time
+        return train_time
+
+    def predict(self, X, index_col="tmp_index_col", return_all=False, **kwargs):
+        """Predict label from features.
+        Args:
+            X: A pyspark or pyspark.pandas dataframe of featurized instances, shape n*m.
+            index_col: A str of the index column name. Default to "tmp_index_col".
+            return_all: A bool of whether to return all the prediction results. Default to False.
+
+        Returns:
+            A pyspark.pandas series of shape n*1 if return_all is False. Otherwise, a pyspark.pandas dataframe.
+        """
+        if self._model is not None:
+            X = self._preprocess(X, index_col=index_col)
+            pred = self._model.transform(X)
+            predictions = to_pandas_on_spark(pred, index_col=index_col)
+            predictions.index.name = None
+            pred_y = predictions["prediction"]
+            if return_all:
+                return predictions
+            else:
+                return pred_y
+        else:
+            logger.warning("Estimator is not fit yet. Please run fit() before predict().")
+            return np.ones(X.shape[0])
+
+
 class TransformersEstimator(BaseEstimator):
     """The class for fine-tuning language models, using huggingface transformers API."""
 
@@ -726,13 +859,9 @@ def __init__(self, task="seq-classification", **config):
 
         self.trial_id = str(uuid.uuid1().hex)[:8]
         if task not in NLG_TASKS:  # TODO: not in NLG_TASKS
-            from .nlp.huggingface.training_args import (
-                TrainingArgumentsForAuto as TrainingArguments,
-            )
+            from .nlp.huggingface.training_args import TrainingArgumentsForAuto as TrainingArguments
         else:
-            from .nlp.huggingface.training_args import (
-                Seq2SeqTrainingArgumentsForAuto as TrainingArguments,
-            )
+            from .nlp.huggingface.training_args import Seq2SeqTrainingArgumentsForAuto as TrainingArguments
         self._TrainingArguments = TrainingArguments
 
     @classmethod
@@ -887,9 +1016,7 @@ def tokenizer(self):
 
     @property
     def data_collator(self):
-        from flaml.automl.nlp.huggingface.data_collator import (
-            task_to_datacollator_class,
-        )
+        from flaml.automl.nlp.huggingface.data_collator import task_to_datacollator_class
         from flaml.automl.task.task import Task
 
         data_collator_class = task_to_datacollator_class.get(
@@ -941,6 +1068,8 @@ def fit(
         except ImportError:
             self._use_ray = False
 
+        if "is_retrain" in kwargs:
+            kwargs.pop("is_retrain")
         this_params = self.params
         self._kwargs = kwargs
 
@@ -1029,6 +1158,10 @@ def on_epoch_end(self, args, state, control, **callback_kwargs):
             self.intermediate_results = [
                 x[1] for x in sorted(self._trainer.intermediate_results.items(), key=lambda x: x[0])
             ]
+        self._model = {
+            "model": self._trainer.model,
+            "tokenizer": self.tokenizer,
+        }
         self._trainer = None
 
         return time.time() - start_time
@@ -1346,6 +1479,10 @@ def _preprocess(self, X):
         return X
 
     def fit(self, X_train, y_train, budget=None, free_mem_ratio=0, **kwargs):
+        if "is_retrain" in kwargs:
+            is_retrain = kwargs.pop("is_retrain")
+        else:
+            is_retrain = False
         start_time = time.time()
         deadline = start_time + budget if budget else np.inf
         n_iter = self.params.get(self.ITER_HP, self.DEFAULT_ITER)
@@ -1353,11 +1490,15 @@ def fit(self, X_train, y_train, budget=None, free_mem_ratio=0, **kwargs):
         if not self.HAS_CALLBACK:
             mem0 = psutil.virtual_memory().available if psutil is not None else 1
             if (
-                (not self._time_per_iter or abs(self._train_size - X_train.shape[0]) > 4)
-                and budget is not None
-                or self._mem_per_iter < 0
-                and psutil is not None
-            ) and n_iter > 1:
+                (
+                    (not self._time_per_iter or abs(self._train_size - X_train.shape[0]) > 4)
+                    and budget is not None
+                    or self._mem_per_iter < 0
+                    and psutil is not None
+                )
+                and n_iter > 1
+                and not is_retrain
+            ):
                 self.params[self.ITER_HP] = 1
                 self._t1 = self._fit(X_train, y_train, **kwargs)
                 if budget is not None and self._t1 >= budget or n_iter == 1:
@@ -1542,6 +1683,8 @@ def __init__(
     def fit(self, X_train, y_train, budget=None, free_mem_ratio=0, **kwargs):
         import xgboost as xgb
 
+        if "is_retrain" in kwargs:
+            kwargs.pop("is_retrain")
         start_time = time.time()
         deadline = start_time + budget if budget else np.inf
         if issparse(X_train):
@@ -1591,26 +1734,11 @@ def predict(self, X, **kwargs):
 
     @classmethod
     def _callbacks(cls, start_time, deadline, free_mem_ratio):
-        try:
-            from xgboost.callback import TrainingCallback
-        except ImportError:  # for xgboost<1.3
+        if xgb_callback:
+            return [XGBoostResourceLimit(start_time, deadline, free_mem_ratio)]
+        else:
             return None
 
-        class ResourceLimit(TrainingCallback):
-            def after_iteration(self, model, epoch, evals_log) -> bool:
-                now = time.time()
-                if epoch == 0:
-                    self._time_per_iter = now - start_time
-                if now + self._time_per_iter > deadline:
-                    return True
-                if psutil is not None:
-                    mem = psutil.virtual_memory()
-                    if mem.available / mem.total < free_mem_ratio:
-                        return True
-                return False
-
-        return [ResourceLimit()]
-
 
 class XGBoostSklearnEstimator(SKLearnEstimator, LGBMEstimator):
     """The class for tuning XGBoost with unlimited depth, using sklearn API."""
@@ -1658,6 +1786,8 @@ def __init__(
         self._xgb_version = xgb.__version__
 
     def fit(self, X_train, y_train, budget=None, free_mem_ratio=0, **kwargs):
+        if "is_retrain" in kwargs:
+            kwargs.pop("is_retrain")
         if issparse(X_train) and self._xgb_version < "1.6.0":
             # "auto" fails for sparse input since xgboost 1.6.0
             self.params["tree_method"] = "auto"
@@ -1913,6 +2043,8 @@ def __init__(
             self.estimator_class = CatBoostRegressor
 
     def fit(self, X_train, y_train, budget=None, free_mem_ratio=0, **kwargs):
+        if "is_retrain" in kwargs:
+            kwargs.pop("is_retrain")
         start_time = time.time()
         deadline = start_time + budget if budget else np.inf
         train_dir = f"catboost_{str(start_time)}"
@@ -1964,20 +2096,7 @@ def fit(self, X_train, y_train, budget=None, free_mem_ratio=0, **kwargs):
 
     @classmethod
     def _callbacks(cls, start_time, deadline, free_mem_ratio):
-        class ResourceLimit:
-            def after_iteration(self, info) -> bool:
-                now = time.time()
-                if info.iteration == 1:
-                    self._time_per_iter = now - start_time
-                if now + self._time_per_iter > deadline:
-                    return False
-                if psutil is not None and free_mem_ratio is not None:
-                    mem = psutil.virtual_memory()
-                    if mem.available / mem.total < free_mem_ratio:
-                        return False
-                return True  # can continue
-
-        return [ResourceLimit()]
+        return [CatBoostResourceLimit(start_time, deadline, free_mem_ratio)]
 
 
 class KNeighborsEstimator(BaseEstimator):
@@ -2030,6 +2149,633 @@ def _preprocess(self, X):
         return X
 
 
+class SVCEstimator(SKLearnEstimator):
+    """The class for tuning Linear Support Vector Machine Classifier."""
+
+    """Reference: https://scikit-learn.org/stable/modules/generated/sklearn.svm.LinearSVC.html"""
+    ITER_HP = "max_iter"
+
+    @classmethod
+    def search_space(cls, **params):
+        return {
+            "C": {
+                "domain": tune.loguniform(lower=0.03125, upper=32768.0),
+                "init_value": 1.0,
+            },
+            "penalty": {
+                "domain": tune.choice(["l1", "l2"]),
+                "init_value": "l2",
+            },
+        }
+
+    def config2params(self, config: dict) -> dict:
+        params = super().config2params(config)
+        params["tol"] = params.get("tol", 0.0001)
+        if params.get("penalty", "l2") == "l1":
+            params["dual"] = False
+            params["loss"] = "squared_hinge"
+        else:
+            params["dual"] = False
+            params["loss"] = params.get("loss", "squared_hinge")
+
+        if "n_jobs" in params:
+            params.pop("n_jobs")
+        return params
+
+    def __init__(self, task="binary", **config):
+        super().__init__(task, **config)
+        assert self._task.is_classification(), "LinearSVC for classification task only"
+        self.estimator_class = LinearSVC
+
+    def predict_proba(self, X, **kwargs):
+        """Predict the probability of each class from features.
+
+        Only works for classification problems
+
+        Args:
+            X: A numpy array of featurized instances, shape n*m.
+
+        Returns:
+            A numpy array of shape n*c. c is the # classes.
+            Each element at (i,j) is the probability for instance i to be in
+                class j.
+        """
+        assert self._task.is_classification(), "predict_proba() only for classification."
+
+        X = self._preprocess(X)
+        return self._model._predict_proba_lr(X, **kwargs)
+
+
+class SparkNaiveBayesEstimator(SparkEstimator):
+    """The class for tuning Naive Bayes Classifier."""
+
+    """Reference: https://spark.apache.org/docs/latest/api/python/reference/api/pyspark.ml.classification.NaiveBayes.html"""
+
+    ITER_HP = "maxIter"
+
+    @classmethod
+    def search_space(cls, data_size, task, **params):
+        space = {
+            "smoothing": {
+                "domain": tune.loguniform(0.01, 2.0),
+                "init_value": 1.0,
+            },
+            "modelType": {
+                # Not using multinomial since it only support binary features
+                "domain": tune.choice(["multinomial", "gaussian"]),
+            },
+        }
+
+        return space
+
+    def __init__(self, task="binary", **config):
+        super().__init__(task, **config)
+        assert self._task.is_classification(), "Naive Bayes for classification task only"
+        if "verbose" in self.params:
+            self.params.pop("verbose")
+        if "n_jobs" in self.params:
+            self.params.pop("n_jobs")
+
+        from pyspark.ml.classification import NaiveBayes
+
+        self.estimator_class = NaiveBayes
+
+        self._task = task
+        self._model = None
+        self._time_per_iter = None
+        self._train_size = 0
+        self._mem_per_iter = -1
+        self.model_classes_ = None
+        self.model_n_classes_ = None
+
+
+class SGDEstimator(SKLearnEstimator):
+    """The class for tuning Stoachastic Gradient Descent model."""
+
+    """Reference: https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.SGDClassifier.html"""
+    """Reference: https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.SGDRegressor.html"""
+
+    ITER_HP = "max_iter"
+
+    @classmethod
+    def search_space(cls, task, **params):
+        if task.is_classification():
+            loss_func_space = [
+                "log_loss" if SKLEARN_VERSION >= "1.1" else "log",
+                "modified_huber",
+            ]
+            eps_init = 0.1
+            power_t_init = 0.5
+        else:
+            loss_func_space = ["squared_error", "huber", "epsilon_insensitive", "squared_epsilon_insensitive"]
+            eps_init = 0.1
+            power_t_init = 0.25
+        space = {
+            "loss": {
+                "domain": tune.choice(loss_func_space),
+            },
+            "penalty": {
+                "domain": tune.choice(["l1", "l2", "elasticnet", "None"]),
+                "init_value": "l2",
+            },
+            "alpha": {
+                "domain": tune.loguniform(lower=1e-7, upper=1e-1),
+                "init_value": 0.0001,
+            },
+            "l1_ratio": {
+                "domain": tune.loguniform(lower=1e-9, upper=1),
+                "init_value": 0.15,
+            },
+            "epsilon": {
+                "domain": tune.loguniform(lower=1e-5, upper=1e-1),
+                "init_value": eps_init,
+            },
+            "learning_rate": {
+                "domain": tune.choice(["optimal", "invscaling", "constant"]),
+                "init_value": "invscaling",
+            },
+            "eta0": {
+                "domain": tune.loguniform(lower=1e-7, upper=1e-1),
+                "init_value": 0.01,
+            },
+            "power_t": {
+                "domain": tune.uniform(lower=1e-5, upper=1),
+                "init_value": power_t_init,
+            },
+            "average": {
+                "domain": tune.choice([False, True]),
+                "init_value": False,
+            },
+        }
+        return space
+
+    def config2params(self, config: dict) -> dict:
+        params = super().config2params(config)
+        params["tol"] = params.get("tol", 0.0001)
+        params["loss"] = params.get("loss", None)
+        if params["loss"] is None and self._task.is_classification():
+            params["loss"] = "log_loss" if SKLEARN_VERSION >= "1.1" else "log"
+        if not self._task.is_classification():
+            params.pop("n_jobs")
+
+        if params.get("penalty") != "elasticnet":
+            if "l1_ratio" in params:
+                params.pop("l1_ratio")
+
+        # loss = "modified_huber" -> requires epsilon
+        if params.get("loss") != "modified_huber":
+            if "epsilon" in params:
+                params.pop("epsilon")
+
+        # learning_rate = "invscaling" -> requires power_t
+        if params.get("learning_rate") != "invscaling":
+            if "power_t" in params:
+                params.pop("power_t")
+
+        # learning_rate in ["invscaling", "constant"] -> requires eta0
+        if params.get("learning_rate") not in ["invscaling", "constant"]:
+            if "eta0" in params:
+                params.pop("eta0")
+
+        return params
+
+    def __init__(self, task="binary", **config):
+        super().__init__(task, **config)
+        if self._task.is_classification():
+            self.estimator_class = SGDClassifier
+        elif self._task.is_regression():
+            self.estimator_class = SGDRegressor
+        else:
+            raise ValueError("SGD only supports classification and regression tasks")
+        self.normalizer = Normalizer()
+
+    def _fit(self, X_train, y_train, **kwargs):
+        current_time = time.time()
+        if "groups" in kwargs:
+            kwargs = kwargs.copy()
+            groups = kwargs.pop("groups")
+            if self._task == "rank":
+                kwargs["group"] = group_counts(groups)
+        X_train = self._preprocess(X_train)
+        params = self.params.copy()
+        if params.get("penalty") == "None":
+            params["penalty"] = None
+        model = self.estimator_class(**params)
+        if logger.level == logging.DEBUG:
+            # xgboost 1.6 doesn't display all the params in the model str
+            logger.debug(f"flaml.automl.model - {model} fit started with params {self.params}")
+        model.fit(X_train, y_train, **kwargs)
+        if logger.level == logging.DEBUG:
+            logger.debug(f"flaml.automl.model - {model} fit finished")
+        train_time = time.time() - current_time
+        self._model = model
+        return train_time
+
+    def predict_proba(self, X, **kwargs):
+        """Predict the probability of each class from features.
+
+        Only works for classification problems
+
+        Args:
+            X: A numpy array of featurized instances, shape n*m.
+
+        Returns:
+            A numpy array of shape n*c. c is the # classes.
+            Each element at (i,j) is the probability for instance i to be in
+                class j.
+        """
+        assert self._task.is_classification(), "predict_proba() only for classification."
+
+        X = self._preprocess(X)
+        return self._model.predict_proba(X)
+
+    def _preprocess(self, X):
+        X = super()._preprocess(X)
+        X = self.normalizer.fit_transform(X)
+        return X
+
+
+class ElasticNetEstimator(SKLearnEstimator):
+    """The class for tuning Elastic Net regression model."""
+
+    """Reference: https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.ElasticNet.html"""
+
+    ITER_HP = "max_iter"
+
+    @classmethod
+    def search_space(cls, **params):
+        return {
+            "alpha": {
+                "domain": tune.loguniform(lower=0.0001, upper=1.0),
+                "init_value": 0.1,
+            },
+            "l1_ratio": {
+                "domain": tune.uniform(lower=0.0, upper=1.0),
+                "init_value": 0.5,
+            },
+            "selection": {
+                "domain": tune.choice(["cyclic", "random"]),
+                "init_value": "cyclic",
+            },
+        }
+
+    def config2params(self, config: dict) -> dict:
+        params = super().config2params(config)
+        params["tol"] = params.get("tol", 0.0001)
+        if "n_jobs" in params:
+            params.pop("n_jobs")
+        return params
+
+    def __init__(self, task="regression", **config):
+        super().__init__(task, **config)
+        assert self._task.is_regression(), "ElasticNet for regression task only"
+        self.estimator_class = ElasticNet
+
+
+class LassoLarsEstimator(SKLearnEstimator):
+    """The class for tuning Lasso model fit with Least Angle Regression a.k.a. Lars."""
+
+    """Reference: https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LassoLars.html"""
+
+    ITER_HP = "max_iter"
+
+    @classmethod
+    def search_space(cls, task=None, **params):
+        return {
+            "alpha": {
+                "domain": tune.loguniform(lower=1e-4, upper=1.0),
+                "init_value": 0.1,
+            },
+            "fit_intercept": {
+                "domain": tune.choice([True, False]),
+                "init_value": True,
+            },
+            "eps": {
+                "domain": tune.loguniform(lower=1e-16, upper=1e-4),
+                "init_value": 2.220446049250313e-16,
+            },
+        }
+
+    def config2params(self, config: dict) -> dict:
+        params = super().config2params(config)
+        if "n_jobs" in params:
+            params.pop("n_jobs")
+        return params
+
+    def __init__(self, task="regression", **config):
+        super().__init__(task, **config)
+        assert self._task.is_regression(), "LassoLars for regression task only"
+        self.estimator_class = LassoLars
+
+    def predict(self, X, **kwargs):
+        X = self._preprocess(X)
+        return self._model.predict(X, **kwargs)
+
+
+class SparkGLREstimator(SparkEstimator):
+    """The class for tuning Generalized Linear Regression PySpark model."""
+
+    """Reference: https://spark.apache.org/docs/latest/api/python/reference/api/pyspark.ml.regression.GeneralizedLinearRegression.html"""
+
+    ITER_HP = "maxIter"
+
+    @classmethod
+    def search_space(cls, data_size, task, **params):
+        rules = {
+            "gaussian": ["identity", "log", "inverse"],
+            "binomial": ["logit", "probit", "cloglog"],
+            "poisson": ["log", "identity", "sqrt"],
+            "gamma": ["inverse", "identity", "log"],
+        }
+
+        space = {
+            "regParam": {
+                "domain": tune.loguniform(0.01, 1.0),
+                "init_value": 0.1,
+            },
+        }
+
+        familyLinks = []
+
+        for family, members in rules.items():
+            for member in members:
+                familyLinks.append({"family": family, "link": member})
+        familyLinks.append({"family": "tweedie", "link": None})
+        space["familyLinks"] = {"domain": tune.choice(familyLinks), "init_value": familyLinks[0]}
+        return space
+
+    def config2params(self, config):
+        config = super().config2params(config)
+        for k, v in config["familyLinks"].items():
+            config[k] = v
+        del config["familyLinks"]
+        return config
+
+    def __init__(self, task="binary", **config):
+        super().__init__(task, **config)
+        assert self._task.is_regression(), "Generalized Linear Regression for regression task only"
+        if "verbose" in self.params:
+            self.params.pop("verbose")
+        if "n_jobs" in self.params:
+            self.params.pop("n_jobs")
+
+        from pyspark.ml.regression import GeneralizedLinearRegression
+
+        self.estimator_class = GeneralizedLinearRegression
+
+        self._task = task
+        self._model = None
+        self._time_per_iter = None
+        self._train_size = 0
+        self._mem_per_iter = -1
+        self.model_classes_ = None
+        self.model_n_classes_ = None
+
+
+class SparkLinearRegressionEstimator(SparkEstimator):
+    """The class for tuning Linear Regression PySpark model."""
+
+    """Reference: https://spark.apache.org/docs/latest/api/python/reference/api/pyspark.ml.regression.LinearRegression.html"""
+
+    ITER_HP = "maxIter"
+
+    @classmethod
+    def search_space(cls, data_size, task, **params):
+        space = {
+            "regParam": {
+                "domain": tune.loguniform(0.01, 1.0),
+                "init_value": 0.1,
+            },
+            "elasticNetParam": {
+                "domain": tune.uniform(0.0, 1.0),
+                "init_value": 0.0,
+            },
+            "fitIntercept": {
+                "domain": tune.choice([True, False]),
+                "init_value": True,
+            },
+            "standardization": {
+                "domain": tune.choice([True, False]),
+                "init_value": True,
+            },
+            "aggregationDepth": {
+                "domain": tune.randint(2, 10),
+                "init_value": 2,
+            },
+            "loss": {
+                "domain": tune.choice(["squaredError", "huber"]),
+                "init_value": "squaredError",
+            },
+            "epsilon": {
+                "domain": tune.uniform(1.0001, 2),
+                "init_value": 1.35,
+            },
+        }
+
+        return space
+
+    def __init__(self, task="binary", **config):
+        super().__init__(task, **config)
+        assert self._task.is_regression(), "Linear Regression for regression task only"
+        if "verbose" in self.params:
+            self.params.pop("verbose")
+        if "n_jobs" in self.params:
+            self.params.pop("n_jobs")
+
+        from pyspark.ml.regression import LinearRegression
+
+        self.estimator_class = LinearRegression
+
+        self._task = task
+        self._model = None
+        self._time_per_iter = None
+        self._train_size = 0
+        self._mem_per_iter = -1
+        self.model_classes_ = None
+        self.model_n_classes_ = None
+
+    def config2params(self, config):
+        config = super().config2params(config)
+        if config["loss"] == "huber":
+            config.pop("elasticNetParam")
+        return config
+
+
+class SparkLinearSVCEstimator(SparkEstimator):
+    """The class for tuning Linear SVC PySpark model."""
+
+    """Reference: https://spark.apache.org/docs/latest/api/python/reference/api/pyspark.ml.classification.LinearSVC.html"""
+
+    ITER_HP = "maxIter"
+
+    @classmethod
+    def search_space(cls, data_size, task, **params):
+        space = {
+            "aggregationDepth": {
+                "domain": tune.randint(2, 10),
+                "init_value": 2,
+            },
+            "regParam": {
+                "domain": tune.uniform(0, 1.0),
+                "init_value": 0,
+            },
+            "fitIntercept": {
+                "domain": tune.choice([True, False]),
+                "init_value": True,
+            },
+            "standardization": {
+                "domain": tune.choice([True, False]),
+                "init_value": True,
+            },
+            "threshold": {
+                "domain": tune.uniform(0, 1.0),
+                "init_value": 0,
+            },
+        }
+        return space
+
+    def __init__(self, task="binary", **config):
+        super().__init__(task, **config)
+        assert self._task.is_binary(), "Linear SVC for binary classification task only"
+        if "verbose" in self.params:
+            self.params.pop("verbose")
+        if "n_jobs" in self.params:
+            self.params.pop("n_jobs")
+        from pyspark.ml.classification import LinearSVC
+
+        self.estimator_class = LinearSVC
+
+
+class SparkGBTEstimator(SparkEstimator):
+    """The class for tuning GBT PySpark model."""
+
+    """Reference: https://spark.apache.org/docs/latest/api/python/reference/api/pyspark.ml.classification.GBTClassifier.html"""
+    """Reference: https://spark.apache.org/docs/latest/api/python/reference/api/pyspark.ml.regression.GBTRegressor.html"""
+
+    ITER_HP = "maxIter"
+
+    @classmethod
+    def search_space(cls, data_size, task, **params):
+        space = {
+            "maxDepth": {
+                "domain": tune.randint(3, 10),
+                "init_value": 5,
+            },
+            "maxBins": {
+                "domain": tune.randint(10, 100),
+                "init_value": 32,
+            },
+            "stepSize": {
+                "domain": tune.loguniform(0.01, 1.0),
+                "init_value": 0.1,
+            },
+            "subsamplingRate": {
+                "domain": tune.uniform(0.0001, 1.0),
+                "init_value": 1.0,
+            },
+            "minInstancesPerNode": {
+                "domain": tune.randint(1, 10),
+                "init_value": 1,
+            },
+            "minWeightFractionPerNode": {
+                "domain": tune.uniform(0.0, 0.4999),
+                "init_value": 0.0,
+            },
+            "minInfoGain": {
+                "domain": tune.uniform(0.0, 0.1),
+                "init_value": 0.0,
+            },
+        }
+        return space
+
+    def __init__(self, task="binary", **config):
+        super().__init__(task, **config)
+        assert (
+            self._task.is_binary() or self._task.is_regression()
+        ), "GBT for binary classification task or regression only"
+        if "verbose" in self.params:
+            self.params.pop("verbose")
+        if "n_jobs" in self.params:
+            self.params.pop("n_jobs")
+        if self._task.is_binary():
+            from pyspark.ml.classification import GBTClassifier
+
+            self.estimator_class = GBTClassifier
+        else:
+            from pyspark.ml.regression import GBTRegressor
+
+            self.estimator_class = GBTRegressor
+
+
+class SparkAFTSurvivalRegressionEstimator(SparkEstimator):
+    """The class for tuning AFTSurvivalRegression PySpark model."""
+
+    """Reference: https://spark.apache.org/docs/latest/api/python/reference/api/pyspark.ml.regression.AFTSurvivalRegression.html"""
+
+    ITER_HP = "maxIter"
+
+    @classmethod
+    def search_space(cls, data_size, task, **params):
+        space = {
+            "fitIntercept": {
+                "domain": tune.choice([True, False]),
+                "init_value": True,
+            },
+            "aggregationDepth": {
+                "domain": tune.randint(2, 10),
+                "init_value": 2,
+            },
+        }
+
+        return space
+
+    def __init__(self, task="binary", **config):
+        super().__init__(task, **config)
+        assert self._task.is_regression(), "AFTSurvivalRegression for regression task only"
+        if "verbose" in self.params:
+            self.params.pop("verbose")
+        if "n_jobs" in self.params:
+            self.params.pop("n_jobs")
+
+        from pyspark.ml.regression import AFTSurvivalRegression
+
+        self.estimator_class = AFTSurvivalRegression
+
+
+class BaseResourceLimit:
+    def __init__(self, start_time, deadline, free_mem_ratio):
+        self.start_time = start_time
+        self.deadline = deadline
+        self.free_mem_ratio = free_mem_ratio
+        self._time_per_iter = None
+
+    def check_resource_limits(self, current_time, current_iteration, mllib):
+        if (mllib == "xgb" and current_iteration == 0) or (mllib == "cat" and current_iteration == 1):
+            self._time_per_iter = current_time - self.start_time
+        if current_time + self._time_per_iter > self.deadline:
+            return False
+        if psutil is not None and self.free_mem_ratio is not None:
+            mem = psutil.virtual_memory()
+            if mem.available / mem.total < self.free_mem_ratio:
+                return False
+        return True
+
+    def after_iteration(self, *args, **kwargs) -> bool:
+        raise NotImplementedError
+
+
+class XGBoostResourceLimit(BaseResourceLimit, TrainingCallback):
+    def after_iteration(self, model, epoch, evals_log) -> bool:
+        now = time.time()
+        return not self.check_resource_limits(now, epoch, "xgb")
+
+
+class CatBoostResourceLimit(BaseResourceLimit):
+    def after_iteration(self, info) -> bool:
+        now = time.time()
+        return self.check_resource_limits(now, info.iteration, "cat")
+
+
 class suppress_stdout_stderr:
     def __init__(self):
         # Open a pair of null files
diff --git a/flaml/automl/spark/configs.py b/flaml/automl/spark/configs.py
deleted file mode 100644
index 26584dc479..0000000000
--- a/flaml/automl/spark/configs.py
+++ /dev/null
@@ -1,97 +0,0 @@
-ParamList_LightGBM_Base = [
-    "baggingFraction",
-    "baggingFreq",
-    "baggingSeed",
-    "binSampleCount",
-    "boostFromAverage",
-    "boostingType",
-    "catSmooth",
-    "categoricalSlotIndexes",
-    "categoricalSlotNames",
-    "catl2",
-    "chunkSize",
-    "dataRandomSeed",
-    "defaultListenPort",
-    "deterministic",
-    "driverListenPort",
-    "dropRate",
-    "dropSeed",
-    "earlyStoppingRound",
-    "executionMode",
-    "extraSeed" "featureFraction",
-    "featureFractionByNode",
-    "featureFractionSeed",
-    "featuresCol",
-    "featuresShapCol",
-    "fobj" "improvementTolerance",
-    "initScoreCol",
-    "isEnableSparse",
-    "isProvideTrainingMetric",
-    "labelCol",
-    "lambdaL1",
-    "lambdaL2",
-    "leafPredictionCol",
-    "learningRate",
-    "matrixType",
-    "maxBin",
-    "maxBinByFeature",
-    "maxCatThreshold",
-    "maxCatToOnehot",
-    "maxDeltaStep",
-    "maxDepth",
-    "maxDrop",
-    "metric",
-    "microBatchSize",
-    "minDataInLeaf",
-    "minDataPerBin",
-    "minDataPerGroup",
-    "minGainToSplit",
-    "minSumHessianInLeaf",
-    "modelString",
-    "monotoneConstraints",
-    "monotoneConstraintsMethod",
-    "monotonePenalty",
-    "negBaggingFraction",
-    "numBatches",
-    "numIterations",
-    "numLeaves",
-    "numTasks",
-    "numThreads",
-    "objectiveSeed",
-    "otherRate",
-    "parallelism",
-    "passThroughArgs",
-    "posBaggingFraction",
-    "predictDisableShapeCheck",
-    "predictionCol",
-    "repartitionByGroupingColumn",
-    "seed",
-    "skipDrop",
-    "slotNames",
-    "timeout",
-    "topK",
-    "topRate",
-    "uniformDrop",
-    "useBarrierExecutionMode",
-    "useMissing",
-    "useSingleDatasetMode",
-    "validationIndicatorCol",
-    "verbosity",
-    "weightCol",
-    "xGBoostDartMode",
-    "zeroAsMissing",
-    "objective",
-]
-ParamList_LightGBM_Classifier = ParamList_LightGBM_Base + [
-    "isUnbalance",
-    "probabilityCol",
-    "rawPredictionCol",
-    "thresholds",
-]
-ParamList_LightGBM_Regressor = ParamList_LightGBM_Base + ["tweedieVariancePower"]
-ParamList_LightGBM_Ranker = ParamList_LightGBM_Base + [
-    "groupCol",
-    "evalAt",
-    "labelGain",
-    "maxPosition",
-]
diff --git a/flaml/automl/state.py b/flaml/automl/state.py
index f966111696..a5897f7234 100644
--- a/flaml/automl/state.py
+++ b/flaml/automl/state.py
@@ -65,6 +65,7 @@ def __init__(
         custom_hp=None,
         max_iter=None,
         budget=None,
+        featurization="auto",
     ):
         self.init_eci = learner_class.cost_relative2lgbm() if budget >= 0 else 1
         self._search_space_domain = {}
@@ -82,6 +83,7 @@ def __init__(
         else:
             data_size = data.shape
             search_space = learner_class.search_space(data_size=data_size, task=task)
+
         self.data_size = data_size
 
         if custom_hp is not None:
@@ -288,9 +290,11 @@ def _compute_with_config_base(
         budget = (
             None
             if state.time_budget < 0
-            else state.time_budget - state.time_from_start
-            if sample_size == state.data_size[0]
-            else (state.time_budget - state.time_from_start) / 2 * sample_size / state.data_size[0]
+            else (
+                state.time_budget - state.time_from_start
+                if sample_size == state.data_size[0]
+                else (state.time_budget - state.time_from_start) / 2 * sample_size / state.data_size[0]
+            )
         )
 
         (
@@ -351,6 +355,7 @@ def _train_with_config(
         estimator: str,
         config_w_resource: dict,
         sample_size: Optional[int] = None,
+        is_retrain: bool = False,
     ):
         if not sample_size:
             sample_size = config_w_resource.get("FLAML_sample_size", len(self.y_train_all))
@@ -376,9 +381,8 @@ def _train_with_config(
             this_estimator_kwargs[
                 "groups"
             ] = groups  # NOTE: _train_with_config is after kwargs is updated to fit_kwargs_by_estimator
-
+        this_estimator_kwargs.update({"is_retrain": is_retrain})
         budget = None if self.time_budget < 0 else self.time_budget - self.time_from_start
-
         estimator, train_time = train_estimator(
             X_train=sampled_X_train,
             y_train=sampled_y_train,
diff --git a/flaml/automl/task/generic_task.py b/flaml/automl/task/generic_task.py
index 8d7b4defdd..df61d7e664 100644
--- a/flaml/automl/task/generic_task.py
+++ b/flaml/automl/task/generic_task.py
@@ -16,12 +16,7 @@
     unique_pandas_on_spark,
     unique_value_first_index,
 )
-from flaml.automl.task.task import (
-    TS_FORECAST,
-    TS_FORECASTPANEL,
-    Task,
-    get_classification_objective,
-)
+from flaml.automl.task.task import TS_FORECAST, TS_FORECASTPANEL, Task, get_classification_objective
 from flaml.config import RANDOM_SEED
 
 try:
@@ -53,13 +48,24 @@ def estimators(self):
             from flaml.automl.contrib.histgb import HistGradientBoostingEstimator
             from flaml.automl.model import (
                 CatBoostEstimator,
+                ElasticNetEstimator,
                 ExtraTreesEstimator,
                 KNeighborsEstimator,
+                LassoLarsEstimator,
                 LGBMEstimator,
                 LRL1Classifier,
                 LRL2Classifier,
                 RandomForestEstimator,
+                SGDEstimator,
+                SparkAFTSurvivalRegressionEstimator,
+                SparkGBTEstimator,
+                SparkGLREstimator,
                 SparkLGBMEstimator,
+                SparkLinearRegressionEstimator,
+                SparkLinearSVCEstimator,
+                SparkNaiveBayesEstimator,
+                SparkRandomForestEstimator,
+                SVCEstimator,
                 TransformersEstimator,
                 TransformersEstimatorModelSelection,
                 XGBoostLimitDepthEstimator,
@@ -72,6 +78,7 @@ def estimators(self):
                 "rf": RandomForestEstimator,
                 "lgbm": LGBMEstimator,
                 "lgbm_spark": SparkLGBMEstimator,
+                "rf_spark": SparkRandomForestEstimator,
                 "lrl1": LRL1Classifier,
                 "lrl2": LRL2Classifier,
                 "catboost": CatBoostEstimator,
@@ -80,6 +87,17 @@ def estimators(self):
                 "transformer": TransformersEstimator,
                 "transformer_ms": TransformersEstimatorModelSelection,
                 "histgb": HistGradientBoostingEstimator,
+                # Above are open-source, below are internal
+                "svc": SVCEstimator,
+                "sgd": SGDEstimator,
+                "nb_spark": SparkNaiveBayesEstimator,
+                "enet": ElasticNetEstimator,
+                "lassolars": LassoLarsEstimator,
+                "glr_spark": SparkGLREstimator,
+                "lr_spark": SparkLinearRegressionEstimator,
+                "svc_spark": SparkLinearSVCEstimator,
+                "gbt_spark": SparkGBTEstimator,
+                "aft_spark": SparkAFTSurvivalRegressionEstimator,
             }
         return self._estimators
 
@@ -271,8 +289,8 @@ def _split_pyspark(state, X_train_all, y_train_all, split_ratio, stratify=None):
             seed=RANDOM_SEED,
         )
         columns_to_drop = [c for c in df_all_train.columns if c in [stratify_column, "sample_weight"]]
-        X_train = df_all_train.drop(columns_to_drop)
-        X_val = df_all_val.drop(columns_to_drop)
+        X_train = df_all_train.drop(columns=columns_to_drop)
+        X_val = df_all_val.drop(columns=columns_to_drop)
         y_train = df_all_train[stratify_column]
         y_val = df_all_val[stratify_column]
 
@@ -497,14 +515,37 @@ def prepare_data(
                     last = first[i] + 1
                 rest.extend(range(last, len(y_train_all)))
                 X_first = X_train_all.iloc[first] if data_is_df else X_train_all[first]
-                X_rest = X_train_all.iloc[rest] if data_is_df else X_train_all[rest]
-                y_rest = (
-                    y_train_all[rest]
-                    if isinstance(y_train_all, np.ndarray)
-                    else iloc_pandas_on_spark(y_train_all, rest)
-                    if is_spark_dataframe
-                    else y_train_all.iloc[rest]
-                )
+                if len(first) < len(y_train_all) / 2:
+                    # Get X_rest and y_rest with drop, sparse matrix can't apply np.delete
+                    X_rest = (
+                        np.delete(X_train_all, first, axis=0)
+                        if isinstance(X_train_all, np.ndarray)
+                        else X_train_all.drop(first.tolist())
+                        if data_is_df
+                        else X_train_all[rest]
+                    )
+                    y_rest = (
+                        np.delete(y_train_all, first, axis=0)
+                        if isinstance(y_train_all, np.ndarray)
+                        else y_train_all.drop(first.tolist())
+                        if data_is_df
+                        else y_train_all[rest]
+                    )
+                else:
+                    X_rest = (
+                        iloc_pandas_on_spark(X_train_all, rest)
+                        if is_spark_dataframe
+                        else X_train_all.iloc[rest]
+                        if data_is_df
+                        else X_train_all[rest]
+                    )
+                    y_rest = (
+                        iloc_pandas_on_spark(y_train_all, rest)
+                        if is_spark_dataframe
+                        else y_train_all.iloc[rest]
+                        if data_is_df
+                        else y_train_all[rest]
+                    )
                 stratify = y_rest if split_type == "stratified" else None
                 X_train, X_val, y_train, y_val = self._train_test_split(
                     state, X_rest, y_rest, first, rest, split_ratio, stratify
@@ -513,6 +554,12 @@ def prepare_data(
                 y_train = concat(label_set, y_train) if data_is_df else np.concatenate([label_set, y_train])
                 X_val = concat(X_first, X_val)
                 y_val = concat(label_set, y_val) if data_is_df else np.concatenate([label_set, y_val])
+
+                if isinstance(y_train, (psDataFrame, pd.DataFrame)) and y_train.shape[1] == 1:
+                    y_train = y_train[y_train.columns[0]]
+                    y_val = y_val[y_val.columns[0]]
+                    y_train.name = y_val.name = y_rest.name
+
             elif self.is_regression():
                 X_train, X_val, y_train, y_val = self._train_test_split(
                     state, X_train_all, y_train_all, split_ratio=split_ratio
@@ -810,27 +857,23 @@ def default_estimator_list(self, estimator_list: List[str], is_spark_dataframe:
         elif self.is_ts_forecastpanel():
             estimator_list = ["tft"]
         else:
+            estimator_list = [
+                "lgbm",
+                "rf",
+                "xgboost",
+                "extra_tree",
+                "xgb_limitdepth",
+                "lgbm_spark",
+                "rf_spark",
+                "sgd",
+            ]
             try:
                 import catboost
 
-                estimator_list = [
-                    "lgbm",
-                    "rf",
-                    "catboost",
-                    "xgboost",
-                    "extra_tree",
-                    "xgb_limitdepth",
-                    "lgbm_spark",
-                ]
+                estimator_list += ["catboost"]
             except ImportError:
-                estimator_list = [
-                    "lgbm",
-                    "rf",
-                    "xgboost",
-                    "extra_tree",
-                    "xgb_limitdepth",
-                    "lgbm_spark",
-                ]
+                pass
+
             # if self.is_ts_forecast():
             #     # catboost is removed because it has a `name` parameter, making it incompatible with hcrystalball
             #     if "catboost" in estimator_list:
@@ -862,9 +905,7 @@ def default_metric(self, metric: str) -> str:
             return metric
 
         if self.is_nlp():
-            from flaml.automl.nlp.utils import (
-                load_default_huggingface_metric_for_task,
-            )
+            from flaml.automl.nlp.utils import load_default_huggingface_metric_for_task
 
             return load_default_huggingface_metric_for_task(self.name)
         elif self.is_binary():
diff --git a/flaml/automl/task/time_series_task.py b/flaml/automl/task/time_series_task.py
index 7dc9f84a22..15eac2a8e8 100644
--- a/flaml/automl/task/time_series_task.py
+++ b/flaml/automl/task/time_series_task.py
@@ -36,11 +36,17 @@ def estimators(self):
                 LGBM_TS,
                 RF_TS,
                 SARIMAX,
+                Average,
                 CatBoost_TS,
                 ExtraTrees_TS,
                 HoltWinters,
+                LassoLars_TS,
+                Naive,
                 Orbit,
                 Prophet,
+                SeasonalAverage,
+                SeasonalNaive,
+                TCNEstimator,
                 TemporalFusionTransformerEstimator,
                 XGBoost_TS,
                 XGBoostLimitDepth_TS,
@@ -57,8 +63,19 @@ def estimators(self):
                 "holt-winters": HoltWinters,
                 "catboost": CatBoost_TS,
                 "tft": TemporalFusionTransformerEstimator,
+                "lassolars": LassoLars_TS,
+                "tcn": TCNEstimator,
+                "snaive": SeasonalNaive,
+                "naive": Naive,
+                "savg": SeasonalAverage,
+                "avg": Average,
             }
 
+            if self._estimators["tcn"] is None:
+                # remove TCN if import failed
+                del self._estimators["tcn"]
+                logger.info("Couldn't import pytorch_lightning, skipping TCN estimator")
+
             try:
                 from prophet import Prophet as foo
 
@@ -71,7 +88,7 @@ def estimators(self):
 
                 self._estimators["orbit"] = Orbit
             except ImportError:
-                logger.info("Couldn't import Prophet, skipping")
+                logger.info("Couldn't import orbit, skipping")
 
         return self._estimators
 
diff --git a/flaml/automl/time_series/__init__.py b/flaml/automl/time_series/__init__.py
index b48f266161..76a3087588 100644
--- a/flaml/automl/time_series/__init__.py
+++ b/flaml/automl/time_series/__init__.py
@@ -1,16 +1,27 @@
 from .tft import TemporalFusionTransformerEstimator
-from .ts_data import TimeSeriesDataset
 from .ts_model import (
     ARIMA,
     LGBM_TS,
     RF_TS,
     SARIMAX,
+    Average,
     CatBoost_TS,
     ExtraTrees_TS,
     HoltWinters,
+    LassoLars_TS,
+    Naive,
     Orbit,
     Prophet,
+    SeasonalAverage,
+    SeasonalNaive,
     TimeSeriesEstimator,
     XGBoost_TS,
     XGBoostLimitDepth_TS,
 )
+
+try:
+    from .tcn import TCNEstimator
+except ImportError:
+    TCNEstimator = None
+
+from .ts_data import TimeSeriesDataset
diff --git a/flaml/automl/time_series/tcn.py b/flaml/automl/time_series/tcn.py
new file mode 100644
index 0000000000..cfd04d78f6
--- /dev/null
+++ b/flaml/automl/time_series/tcn.py
@@ -0,0 +1,285 @@
+# This file is adapted from
+# https://github.com/locuslab/TCN/blob/master/TCN/tcn.py
+# https://github.com/locuslab/TCN/blob/master/TCN/adding_problem/add_test.py
+
+import datetime
+import logging
+import time
+
+import pandas as pd
+import pytorch_lightning as pl
+import torch
+import torch.nn as nn
+import torch.optim as optim
+from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor
+from pytorch_lightning.loggers import TensorBoardLogger
+from torch.nn.utils import weight_norm
+from torch.utils.data import DataLoader, TensorDataset
+
+from flaml import tune
+from flaml.automl.data import add_time_idx_col
+from flaml.automl.logger import logger, logger_formatter
+from flaml.automl.time_series.ts_data import TimeSeriesDataset
+from flaml.automl.time_series.ts_model import TimeSeriesEstimator
+
+
+class Chomp1d(nn.Module):
+    def __init__(self, chomp_size):
+        super().__init__()
+        self.chomp_size = chomp_size
+
+    def forward(self, x):
+        return x[:, :, : -self.chomp_size].contiguous()
+
+
+class TemporalBlock(nn.Module):
+    def __init__(self, n_inputs, n_outputs, kernel_size, stride, dilation, padding, dropout=0.2):
+        super().__init__()
+        self.conv1 = weight_norm(
+            nn.Conv1d(n_inputs, n_outputs, kernel_size, stride=stride, padding=padding, dilation=dilation)
+        )
+        self.chomp1 = Chomp1d(padding)
+        self.relu1 = nn.ReLU()
+        self.dropout1 = nn.Dropout(dropout)
+
+        self.conv2 = weight_norm(
+            nn.Conv1d(n_outputs, n_outputs, kernel_size, stride=stride, padding=padding, dilation=dilation)
+        )
+        self.chomp2 = Chomp1d(padding)
+        self.relu2 = nn.ReLU()
+        self.dropout2 = nn.Dropout(dropout)
+
+        self.net = nn.Sequential(
+            self.conv1, self.chomp1, self.relu1, self.dropout1, self.conv2, self.chomp2, self.relu2, self.dropout2
+        )
+        self.downsample = nn.Conv1d(n_inputs, n_outputs, 1) if n_inputs != n_outputs else None
+        self.relu = nn.ReLU()
+        self.init_weights()
+
+    def init_weights(self):
+        self.conv1.weight.data.normal_(0, 0.01)
+        self.conv2.weight.data.normal_(0, 0.01)
+        if self.downsample is not None:
+            self.downsample.weight.data.normal_(0, 0.01)
+
+    def forward(self, x):
+        out = self.net(x)
+        res = x if self.downsample is None else self.downsample(x)
+        return self.relu(out + res)
+
+
+class TCNForecaster(nn.Module):
+    def __init__(
+        self,
+        input_feature_num,
+        num_outputs,
+        num_channels,
+        kernel_size=2,
+        dropout=0.2,
+    ):
+        super().__init__()
+        layers = []
+        num_levels = len(num_channels)
+        for i in range(num_levels):
+            dilation_size = 2**i
+            in_channels = input_feature_num if i == 0 else num_channels[i - 1]
+            out_channels = num_channels[i]
+            layers += [
+                TemporalBlock(
+                    in_channels,
+                    out_channels,
+                    kernel_size,
+                    stride=1,
+                    dilation=dilation_size,
+                    padding=(kernel_size - 1) * dilation_size,
+                    dropout=dropout,
+                )
+            ]
+
+        self.network = nn.Sequential(*layers)
+        self.linear = nn.Linear(num_channels[-1], num_outputs)
+
+    def forward(self, x):
+        y1 = self.network(x)
+        return self.linear(y1[:, :, -1])
+
+
+class TCNForecasterLightningModule(pl.LightningModule):
+    def __init__(self, model: TCNForecaster, learning_rate: float = 1e-3):
+        super().__init__()
+        self.model = model
+        self.learning_rate = learning_rate
+        self.loss_fn = nn.MSELoss()
+
+    def forward(self, x):
+        return self.model(x)
+
+    def step(self, batch, batch_idx):
+        x, y = batch
+        y_hat = self.model(x)
+        loss = self.loss_fn(y_hat, y)
+        return loss
+
+    def training_step(self, batch, batch_idx):
+        loss = self.step(batch, batch_idx)
+        self.log("train_loss", loss)
+        return loss
+
+    def validation_step(self, batch, batch_idx):
+        loss = self.step(batch, batch_idx)
+        self.log("val_loss", loss)
+        return loss
+
+    def configure_optimizers(self):
+        return torch.optim.Adam(self.parameters(), lr=self.learning_rate)
+
+
+class DataframeDataset(torch.utils.data.Dataset):
+    def __init__(self, dataframe, target_column, features_columns, sequence_length, train=True):
+        self.data = torch.tensor(dataframe[features_columns].to_numpy(), dtype=torch.float)
+        self.sequence_length = sequence_length
+        if train:
+            self.labels = torch.tensor(dataframe[target_column].to_numpy(), dtype=torch.float)
+        self.is_train = train
+
+    def __len__(self):
+        return len(self.data) - self.sequence_length + 1
+
+    def __getitem__(self, idx):
+        data = self.data[idx : idx + self.sequence_length]
+        data = data.permute(1, 0)
+        if self.is_train:
+            label = self.labels[idx : idx + self.sequence_length]
+            return data, label
+        else:
+            return data
+
+
+class TCNEstimator(TimeSeriesEstimator):
+    """The class for tuning TCN Forecaster"""
+
+    @classmethod
+    def search_space(cls, data, task, pred_horizon, **params):
+        space = {
+            "num_levels": {
+                "domain": tune.randint(lower=4, upper=20),  # hidden = 2^num_hidden
+                "init_value": 4,
+            },
+            "num_hidden": {
+                "domain": tune.randint(lower=4, upper=8),  # hidden = 2^num_hidden
+                "init_value": 5,
+            },
+            "kernel_size": {
+                "domain": tune.choice([2, 3, 5, 7]),  # common choices for kernel size
+                "init_value": 3,
+            },
+            "dropout": {
+                "domain": tune.uniform(lower=0.0, upper=0.5),  # standard range for dropout
+                "init_value": 0.1,
+            },
+            "learning_rate": {
+                "domain": tune.loguniform(lower=1e-4, upper=1e-1),  # typical range for learning rate
+                "init_value": 1e-3,
+            },
+        }
+        return space
+
+    def __init__(self, task="ts_forecast", n_jobs=1, **params):
+        super().__init__(task, **params)
+        logging.getLogger("pytorch_lightning").setLevel(logging.WARNING)
+
+    def fit(self, X_train: TimeSeriesDataset, y_train=None, budget=None, **kwargs):
+        start_time = time.time()
+        if budget is not None:
+            deltabudget = datetime.timedelta(seconds=budget)
+        else:
+            deltabudget = None
+        X_train = self.enrich(X_train)
+        super().fit(X_train, y_train, budget, **kwargs)
+
+        self.batch_size = kwargs.get("batch_size", 64)
+        self.horizon = kwargs.get("period", 1)
+        self.feature_cols = X_train.time_varying_known_reals
+        self.target_col = X_train.target_names[0]
+
+        train_dataset = DataframeDataset(
+            X_train.train_data,
+            self.target_col,
+            self.feature_cols,
+            self.horizon,
+        )
+        train_loader = DataLoader(train_dataset, batch_size=self.batch_size, shuffle=False)
+        if not X_train.test_data.empty:
+            val_dataset = DataframeDataset(
+                X_train.test_data,
+                self.target_col,
+                self.feature_cols,
+                self.horizon,
+            )
+        else:
+            val_dataset = DataframeDataset(
+                X_train.train_data.sample(frac=0.2, random_state=kwargs.get("random_state", 0)),
+                self.target_col,
+                self.feature_cols,
+                self.horizon,
+            )
+
+        val_loader = DataLoader(val_dataset, batch_size=self.batch_size, shuffle=False)
+
+        model = TCNForecaster(
+            len(self.feature_cols),
+            self.horizon,
+            [2 ** self.params["num_hidden"]] * self.params["num_levels"],
+            self.params["kernel_size"],
+            self.params["dropout"],
+        )
+
+        pl_module = TCNForecasterLightningModule(model, self.params["learning_rate"])
+
+        # Training loop
+        # gpus is deprecated in v1.7 and removed in v2.0
+        # accelerator="auto" can cast all condition.
+        trainer = pl.Trainer(
+            max_epochs=kwargs.get("max_epochs", 10),
+            accelerator="auto",
+            callbacks=[
+                EarlyStopping(monitor="val_loss", min_delta=1e-4, patience=10, verbose=False, mode="min"),
+                LearningRateMonitor(),
+            ],
+            logger=TensorBoardLogger(kwargs.get("log_dir", "logs/lightning_logs")),  # logging results to a tensorboard
+            max_time=deltabudget,
+            enable_model_summary=False,
+            enable_progress_bar=False,
+        )
+        trainer.fit(
+            pl_module,
+            train_dataloaders=train_loader,
+            val_dataloaders=val_loader,
+        )
+        best_model = trainer.model
+        self._model = best_model
+        train_time = time.time() - start_time
+        return train_time
+
+    def predict(self, X):
+        X = self.enrich(X)
+        if isinstance(X, TimeSeriesDataset):
+            df = X.X_val
+        else:
+            df = X
+        dataset = DataframeDataset(
+            df,
+            self.target_col,
+            self.feature_cols,
+            self.horizon,
+            train=False,
+        )
+        data_loader = DataLoader(dataset, batch_size=self.batch_size, shuffle=False)
+        self._model.eval()
+        raw_preds = []
+        for batch_x in data_loader:
+            raw_pred = self._model(batch_x)
+            raw_preds.append(raw_pred)
+        raw_preds = torch.cat(raw_preds, dim=0)
+        preds = pd.Series(raw_preds.detach().numpy().ravel())
+        return preds
diff --git a/flaml/automl/time_series/ts_model.py b/flaml/automl/time_series/ts_model.py
index 1b581c6a7c..c0a8fe33fc 100644
--- a/flaml/automl/time_series/ts_model.py
+++ b/flaml/automl/time_series/ts_model.py
@@ -26,6 +26,7 @@ class PD:
 from flaml.automl.model import (
     CatBoostEstimator,
     ExtraTreesEstimator,
+    LassoLarsEstimator,
     LGBMEstimator,
     RandomForestEstimator,
     SKLearnEstimator,
@@ -631,6 +632,125 @@ def fit(self, X_train, y_train, budget=None, free_mem_ratio=0, **kwargs):
         return train_time
 
 
+class SimpleForecaster(StatsModelsEstimator):
+    """Base class for Naive Forecaster like Seasonal Naive, Naive, Seasonal Average, Average"""
+
+    @classmethod
+    def _search_space(cls, data: TimeSeriesDataset, task: Task, pred_horizon: int, **params):
+        return {
+            "season": {
+                "domain": tune.randint(1, pred_horizon),
+                "init_value": pred_horizon,
+            }
+        }
+
+    def joint_preprocess(self, X_train, y_train=None):
+        X_train = self.enrich(X_train)
+
+        self.regressors = []
+
+        if isinstance(X_train, TimeSeriesDataset):
+            data = X_train
+            target_col = data.target_names[0]
+            # this class only supports univariate regression
+            train_df = data.train_data[self.regressors + [target_col]]
+            train_df.index = to_datetime(data.train_data[data.time_col])
+        else:
+            target_col = TS_VALUE_COL
+            train_df = self._join(X_train, y_train)
+
+        self.time_col = data.time_col
+        self.target_names = data.target_names
+
+        train_df = self._preprocess(train_df)
+        return train_df, target_col
+
+    def fit(self, X_train, y_train=None, budget=None, **kwargs):
+        import warnings
+
+        warnings.filterwarnings("ignore")
+        from statsmodels.tsa.holtwinters import SimpleExpSmoothing
+
+        self.season = self.params.get("season", 1)
+        current_time = time.time()
+        super().fit(X_train, y_train, budget=budget, **kwargs)
+
+        train_df, target_col = self.joint_preprocess(X_train, y_train)
+
+        model = SimpleExpSmoothing(
+            train_df[[target_col]],
+        )
+        with suppress_stdout_stderr():
+            model = model.fit(smoothing_level=self.smoothing_level)
+        train_time = time.time() - current_time
+        self._model = model
+        return train_time
+
+
+class SeasonalNaive(SimpleForecaster):
+    smoothing_level = 1.0
+
+    def predict(self, X, **kwargs):
+        if isinstance(X, int):
+            forecasts = []
+            for i in range(X):
+                forecast = self._model.forecast(steps=self.season)[0]
+                forecasts.append(forecast)
+            return pd.Series(forecasts)
+        else:
+            return super().predict(X, **kwargs)
+
+
+class Naive(SimpleForecaster):
+    smoothing_level = 0.0
+
+    @classmethod
+    def _search_space(cls, data: TimeSeriesDataset, task: Task, pred_horizon: int, **params):
+        return {}
+
+    def predict(self, X, **kwargs):
+        if isinstance(X, int):
+            last_observation = self._model.params["initial_level"]
+            return pd.Series([last_observation] * X)
+        else:
+            return super().predict(X, **kwargs)
+
+
+class SeasonalAverage(SimpleForecaster):
+    def fit(self, X_train, y_train=None, budget=None, **kwargs):
+        from statsmodels.tsa.ar_model import AutoReg, ar_select_order
+
+        start_time = time.time()
+
+        self.season = kwargs.get("season", 1)  # seasonality period
+        train_df, target_col = self.joint_preprocess(X_train, y_train)
+        selection_res = ar_select_order(train_df[target_col], maxlag=self.season)
+
+        # Fit autoregressive model with optimal order
+        model = AutoReg(train_df[target_col], lags=selection_res.ar_lags)
+        self._model = model.fit()
+        end_time = time.time()
+
+        return end_time - start_time
+
+
+class Average(SimpleForecaster):
+    @classmethod
+    def _search_space(cls, data: TimeSeriesDataset, task: Task, pred_horizon: int, **params):
+        return {}
+
+    def fit(self, X_train, y_train=None, budget=None, **kwargs):
+        from statsmodels.tsa.ar_model import AutoReg
+
+        start_time = time.time()
+        train_df, target_col = self.joint_preprocess(X_train, y_train)
+        model = AutoReg(train_df[target_col], lags=0)
+        self._model = model.fit()
+        end_time = time.time()
+
+        return end_time - start_time
+
+
 class TS_SKLearn(TimeSeriesEstimator):
     """The class for tuning SKLearn Regressors for time-series forecasting"""
 
@@ -757,3 +877,7 @@ class XGBoostLimitDepth_TS(TS_SKLearn):
 # catboost regressor is invalid because it has a `name` parameter, making it incompatible with hcrystalball
 class CatBoost_TS(TS_SKLearn):
     base_class = CatBoostEstimator
+
+
+class LassoLars_TS(TS_SKLearn):
+    base_class = LassoLarsEstimator
diff --git a/flaml/fabric/__init__.py b/flaml/fabric/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/flaml/fabric/mlflow.py b/flaml/fabric/mlflow.py
new file mode 100644
index 0000000000..5eebefa961
--- /dev/null
+++ b/flaml/fabric/mlflow.py
@@ -0,0 +1,689 @@
+import json
+import os
+import pickle
+import random
+import sys
+import time
+from typing import MutableMapping
+
+import mlflow
+import pandas as pd
+from mlflow.entities import Metric, Param, RunTag
+from mlflow.exceptions import MlflowException
+from mlflow.utils.autologging_utils import AUTOLOGGING_INTEGRATIONS, autologging_is_disabled
+from scipy.sparse import issparse
+from sklearn import tree
+
+try:
+    from pyspark.ml import Pipeline as SparkPipeline
+except ImportError:
+
+    class SparkPipeline:
+        pass
+
+
+# from mlflow.store.tracking import SEARCH_MAX_RESULTS_THRESHOLD
+from sklearn.pipeline import Pipeline
+
+from flaml.automl.logger import logger
+from flaml.automl.spark import DataFrame, Series, psDataFrame, psSeries
+from flaml.version import __version__
+
+SEARCH_MAX_RESULTS = 5000  # Each train should not have more than 5000 trials
+IS_RENAME_CHILD_RUN = os.environ.get("FLAML_IS_RENAME_CHILD_RUN", "false").lower() == "true"
+
+
+def flatten_dict(d: MutableMapping, sep: str = ".") -> MutableMapping:
+    if len(d) == 0:
+        return d
+    [flat_dict] = pd.json_normalize(d, sep=sep).to_dict(orient="records")
+    keys = list(flat_dict.keys())
+    for key in keys:
+        if not isinstance(flat_dict[key], (int, float)):
+            flat_dict.pop(key)
+    return flat_dict
+
+
+def is_autolog_enabled():
+    return not all(autologging_is_disabled(k) for k in AUTOLOGGING_INTEGRATIONS.keys())
+
+
+def get_mlflow_log_latency(model_history=False):
+    st = time.time()
+    with mlflow.start_run(nested=True, run_name="get_mlflow_log_latency") as run:
+        if model_history:
+            sk_model = tree.DecisionTreeClassifier()
+            mlflow.sklearn.log_model(sk_model, "sk_models")
+            mlflow.sklearn.log_model(Pipeline([("estimator", sk_model)]), "sk_pipeline")
+            pickle_fpath = f"tmp_{int(time.time()*1000)}"
+            with open(pickle_fpath, "wb") as f:
+                pickle.dump(sk_model, f)
+            mlflow.log_artifact(pickle_fpath, "sk_model1")
+            mlflow.log_artifact(pickle_fpath, "sk_model2")
+            os.remove(pickle_fpath)
+        mlflow.set_tag("synapseml.ui.visible", "false")  # not shown inline in fabric
+    mlflow.delete_run(run.info.run_id)
+    et = time.time()
+    return et - st
+
+
+def infer_signature(X_train=None, y_train=None, dataframe=None, label=None):
+    if X_train is not None:
+        if issparse(X_train):
+            X_train = X_train.tocsr()
+        elif isinstance(X_train, psDataFrame):
+            X_train = X_train.to_spark(index_col="tmp_index_col")
+            y_train = None
+        try:
+            signature = mlflow.models.infer_signature(X_train, y_train)
+            return signature
+        except (TypeError, MlflowException, Exception) as e:
+            logger.debug(
+                f"Failed to infer signature from X_train {type(X_train)} and y_train {type(y_train)}, error: {e}"
+            )
+    else:
+        if dataframe is not None and label is not None:
+            X = dataframe.drop(columns=label)
+            y = dataframe[label]
+            if isinstance(dataframe, psDataFrame):
+                X = X.to_spark(index_col="tmp_index_col")
+                y = None
+            try:
+                signature = mlflow.models.infer_signature(X, y)
+                return signature
+            except (TypeError, MlflowException, Exception) as e:
+                logger.debug(
+                    f"Failed to infer signature from dataframe {type(dataframe)} and label {label}, error: {e}"
+                )
+
+
+def _mlflow_wrapper(evaluation_func, mlflow_exp_id, mlflow_config=None, extra_tags=None, autolog=False):
+    def wrapped(*args, **kwargs):
+        if mlflow_config is not None:
+            from synapse.ml.mlflow import set_mlflow_env_config
+
+            set_mlflow_env_config(mlflow_config)
+        import mlflow
+
+        if mlflow_exp_id is not None:
+            mlflow.set_experiment(experiment_id=mlflow_exp_id)
+        if autolog:
+            if mlflow.__version__ > "2.5.0" and extra_tags is not None:
+                mlflow.autolog(silent=True, extra_tags=extra_tags)
+            else:
+                mlflow.autolog(silent=True)
+            logger.debug("activated mlflow autologging on executor")
+        else:
+            mlflow.autolog(disable=True, silent=True)
+        # with mlflow.start_run(nested=True):
+        result = evaluation_func(*args, **kwargs)
+        return result
+
+    return wrapped
+
+
+def _get_notebook_name():
+    return None
+
+
+class MLflowIntegration:
+    def __init__(self, experiment_type="automl", mlflow_exp_name=None, extra_tag=None):
+        try:
+            from synapse.ml.mlflow import get_mlflow_env_config
+
+            self.driver_mlflow_env_config = get_mlflow_env_config()
+            self._on_internal = True
+            self._notebook_name = _get_notebook_name()
+        except ModuleNotFoundError:
+            self.driver_mlflow_env_config = None
+            self._on_internal = False
+            self._notebook_name = None
+
+        self.autolog = False
+        self.manual_log = False
+        self.parent_run_id = None
+        self.parent_run_name = None
+        self.log_type = "null"
+        self.resume_params = {}
+        self.train_func = None
+        self.best_iteration = None
+        self.best_run_id = None
+        self.child_counter = 0
+        self.infos = []
+        self.manual_run_ids = []
+        self.has_summary = False
+        self.has_model = False
+        self.only_history = False
+        self._do_log_model = True
+
+        self.extra_tag = (
+            extra_tag
+            if extra_tag is not None
+            else {"extra_tag.sid": f"flaml_{__version__}_{int(time.time())}_{random.randint(1001, 9999)}"}
+        )
+        self.start_time = time.time()
+        self.mlflow_client = mlflow.tracking.MlflowClient()
+        parent_run_info = mlflow.active_run().info if mlflow.active_run() is not None else None
+        if parent_run_info:
+            self.experiment_id = parent_run_info.experiment_id
+            self.parent_run_id = parent_run_info.run_id
+            # attribute run_name is not available before mlflow 2.0.1
+            self.parent_run_name = parent_run_info.run_name if hasattr(parent_run_info, "run_name") else "flaml_run"
+            if self.parent_run_name == "":
+                self.parent_run_name = mlflow.active_run().data.tags["mlflow.runName"]
+        else:
+            if mlflow_exp_name is None:
+                if mlflow.tracking.fluent._active_experiment_id is None:
+                    mlflow_exp_name = self._notebook_name if self._notebook_name else "flaml_default_experiment"
+                    mlflow.set_experiment(experiment_name=mlflow_exp_name)
+            else:
+                mlflow.set_experiment(experiment_name=mlflow_exp_name)
+            self.experiment_id = mlflow.tracking.fluent._active_experiment_id
+        self.experiment_name = mlflow.get_experiment(self.experiment_id).name
+        self.experiment_type = experiment_type
+        self.update_autolog_state()
+
+        if self.autolog:
+            # only end user created parent run in autolog scenario
+            mlflow.end_run()
+
+    def set_mlflow_config(self):
+        if self.driver_mlflow_env_config is not None:
+            from synapse.ml.mlflow import set_mlflow_env_config
+
+            set_mlflow_env_config(self.driver_mlflow_env_config)
+
+    def wrap_evaluation_function(self, evaluation_function):
+        wrapped_evaluation_function = _mlflow_wrapper(
+            evaluation_function, self.experiment_id, self.driver_mlflow_env_config, self.extra_tag, self.autolog
+        )
+        return wrapped_evaluation_function
+
+    def set_best_iter(self, result):
+        # result: AutoML or ExperimentAnalysis
+        try:
+            self.best_iteration = result.best_iteration
+        except AttributeError:
+            self.best_iteration = None
+
+    def update_autolog_state(
+        self,
+    ):
+        # Currently we disable autologging for better control in AutoML
+        _autolog = is_autolog_enabled()
+        self._do_log_model = AUTOLOGGING_INTEGRATIONS["mlflow"].get("log_models", True)
+        if self.experiment_type == "automl":
+            self.autolog = False
+            self.manual_log = mlflow.active_run() is not None or _autolog
+            self.log_type = "manual"
+            if _autolog:
+                logger.debug("Disabling autologging")
+                self.resume_params = AUTOLOGGING_INTEGRATIONS["mlflow"].copy()
+                mlflow.autolog(disable=True, silent=True, log_models=self._do_log_model)
+                self.log_type = "r_autolog"  # 'r' for replace autolog with manual log
+
+        elif self.experiment_type == "tune":
+            self.autolog = _autolog
+            self.manual_log = not self.autolog and mlflow.active_run() is not None
+
+            if self.autolog:
+                self.log_type = "autolog"
+
+            if self.manual_log:
+                self.log_type = "manual"
+        else:
+            raise ValueError(f"Unknown experiment type: {self.experiment_type}")
+
+    def copy_mlflow_run(self, src_id, target_id, components=["param", "metric", "tag"]):
+        src_run = self.mlflow_client.get_run(src_id)
+        if "param" in components:
+            for param_name, param_value in src_run.data.params.items():
+                try:
+                    self.mlflow_client.log_param(target_id, param_name, param_value)
+                except mlflow.exceptions.MlflowException:
+                    pass
+
+        timestamp = int(time.time() * 1000)
+
+        if "metric" in components:
+            _metrics = [Metric(key, value, timestamp, 0) for key, value in src_run.data.metrics.items()]
+        else:
+            _metrics = []
+
+        if "tag" in components:
+            _tags = [
+                RunTag(key, str(value))
+                for key, value in src_run.data.tags.items()
+                if key.startswith("flaml") or key.startswith("synapseml")
+            ]
+        else:
+            _tags = []
+        self.mlflow_client.log_batch(run_id=target_id, metrics=_metrics, params=[], tags=_tags)
+
+    def record_trial(self, result, trial, metric):
+        if isinstance(result, dict):
+            metrics = flatten_dict(result)
+            metric_name = str(list(metrics.keys()))
+        else:
+            metrics = {metric: result}
+            metric_name = metric
+
+        if "ml" in trial.config.keys():
+            params = trial.config["ml"]
+        else:
+            params = trial.config
+
+        info = {
+            "metrics": metrics,
+            "params": params,
+            "tags": {
+                "flaml.best_run": False,
+                "flaml.iteration_number": self.child_counter,
+                "flaml.version": __version__,
+                "flaml.meric": metric_name,
+                "flaml.run_source": "flaml-tune",
+                "flaml.log_type": self.log_type,
+            },
+            "submetrics": {
+                "values": [],
+            },
+        }
+
+        self.infos.append(info)
+
+        if not self.autolog and not self.manual_log:
+            return
+
+        if self.manual_log:
+            with mlflow.start_run(
+                nested=True, run_name=f"{self.parent_run_name}_child_{self.child_counter}"
+            ) as child_run:
+                self._log_info_to_run(info, child_run.info.run_id, log_params=True)
+                self.manual_run_ids.append(child_run.info.run_id)
+        self.child_counter += 1
+
+    def log_tune(self, analysis, metric):
+        self.set_best_iter(analysis)
+        if self.autolog:
+            if self.parent_run_id is not None:
+                mlflow.start_run(run_id=self.parent_run_id, experiment_id=self.experiment_id)
+                mlflow.log_metric("num_child_runs", len(self.infos))
+            self.adopt_children(analysis)
+
+        if self.manual_log:
+            if "ml" in analysis.best_config.keys():
+                mlflow.log_params(analysis.best_config["ml"])
+            else:
+                mlflow.log_params(analysis.best_config)
+            mlflow.log_metric("best_" + metric, analysis.best_result[metric])
+            best_mlflow_run_id = self.manual_run_ids[analysis.best_iteration]
+            best_mlflow_run_name = self.mlflow_client.get_run(best_mlflow_run_id).info.run_name
+            analysis.best_run_id = best_mlflow_run_id
+            analysis.best_run_name = best_mlflow_run_name
+            self.mlflow_client.set_tag(best_mlflow_run_id, "flaml.best_run", True)
+            self.best_run_id = best_mlflow_run_id
+            if not self.has_summary:
+                self.copy_mlflow_run(best_mlflow_run_id, self.parent_run_id)
+                self.has_summary = True
+
+    def log_model(self, model, estimator, signature=None):
+        if not self._do_log_model:
+            return
+        logger.debug(f"logging model {estimator}")
+        if estimator.endswith("_spark"):
+            mlflow.spark.log_model(model, estimator, signature=signature)
+            mlflow.spark.log_model(model, "model", signature=signature)
+        elif estimator in ["lgbm"]:
+            mlflow.lightgbm.log_model(model, estimator, signature=signature)
+        elif estimator in ["transformer", "transformer_ms"]:
+            mlflow.transformers.log_model(model, estimator, signature=signature)
+        elif estimator in ["arima", "sarimax", "holt-winters", "snaive", "naive", "savg", "avg", "ets"]:
+            mlflow.statsmodels.log_model(model, estimator, signature=signature)
+        elif estimator in ["tcn", "tft"]:
+            mlflow.pytorch.log_model(model, estimator, signature=signature)
+        elif estimator in ["prophet"]:
+            mlflow.prophet.log_model(model, estimator, signature=signature)
+        elif estimator in ["orbit"]:
+            pass
+        else:
+            mlflow.sklearn.log_model(model, estimator, signature=signature)
+
+    def _pickle_and_log_artifact(self, obj, artifact_name, pickle_fpath="temp_.pkl"):
+        if not self._do_log_model:
+            return
+        with open(pickle_fpath, "wb") as f:
+            pickle.dump(obj, f)
+        mlflow.log_artifact(pickle_fpath, artifact_name)
+
+    def pickle_and_log_automl_artifacts(self, automl, model, estimator, signature=None):
+        """log automl artifacts to mlflow
+        load back with `automl = mlflow.pyfunc.load_model(model_run_id_or_uri)`, then do prediction with `automl.predict(X)`
+        """
+        logger.debug(f"logging automl artifacts {estimator}")
+        self._pickle_and_log_artifact(automl.feature_transformer, "feature_transformer", "feature_transformer.pkl")
+        self._pickle_and_log_artifact(automl.label_transformer, "label_transformer", "label_transformer.pkl")
+        # Test test_mlflow 1 and 4 will get error: TypeError: cannot pickle '_io.TextIOWrapper' object
+        # try:
+        #     self._pickle_and_log_artifact(automl, "automl", "automl.pkl")
+        # except TypeError:
+        #     pass
+        if estimator.endswith("_spark"):
+            # spark pipeline is not supported yet
+            return
+        feature_transformer = automl.feature_transformer
+        if isinstance(feature_transformer, Pipeline):
+            pipeline = feature_transformer
+            pipeline.steps.append(("estimator", model))
+        elif isinstance(feature_transformer, SparkPipeline):
+            pipeline = feature_transformer
+            pipeline.stages.append(model)
+        elif not estimator.endswith("_spark"):
+            steps = [("feature_transformer", feature_transformer)]
+            steps.append(("estimator", model))
+            pipeline = Pipeline(steps)
+        else:
+            stages = [feature_transformer]
+            stages.append(model)
+            pipeline = SparkPipeline(stages=stages)
+        if isinstance(pipeline, SparkPipeline):
+            logger.debug(f"logging spark pipeline {estimator}")
+            mlflow.spark.log_model(pipeline, "automl_pipeline", signature=signature)
+        else:
+            # Add a log named "model" to fit default settings
+            logger.debug(f"logging sklearn pipeline {estimator}")
+            mlflow.sklearn.log_model(pipeline, "automl_pipeline", signature=signature)
+            mlflow.sklearn.log_model(pipeline, "model", signature=signature)
+
+    def record_state(self, automl, search_state, estimator):
+        _st = time.time()
+        automl_metric_name = (
+            automl._state.metric if isinstance(automl._state.metric, str) else automl._state.error_metric
+        )
+
+        if automl._state.error_metric.startswith("1-"):
+            automl_metric_value = 1 - search_state.val_loss
+        elif automl._state.error_metric.startswith("-"):
+            automl_metric_value = -search_state.val_loss
+        else:
+            automl_metric_value = search_state.val_loss
+
+        if "ml" in search_state.config:
+            config = search_state.config["ml"]
+        else:
+            config = search_state.config
+
+        info = {
+            "metrics": {
+                "iter_counter": automl._track_iter,
+                "trial_time": search_state.trial_time,
+                "wall_clock_time": automl._state.time_from_start,
+                "validation_loss": search_state.val_loss,
+                "best_validation_loss": search_state.best_loss,
+                automl_metric_name: automl_metric_value,
+            },
+            "tags": {
+                "flaml.best_run": False,
+                "flaml.estimator_name": estimator,
+                "flaml.estimator_class": search_state.learner_class.__name__,
+                "flaml.iteration_number": automl._track_iter,
+                "flaml.version": __version__,
+                "flaml.learner": estimator,
+                "flaml.sample_size": search_state.sample_size,
+                "flaml.meric": automl_metric_name,
+                "flaml.run_source": "flaml-automl",
+                "flaml.log_type": self.log_type,
+                "flaml.automl_user_configurations": json.dumps(automl._automl_user_configurations),
+            },
+            "params": {
+                "sample_size": search_state.sample_size,
+                "learner": estimator,
+                **config,
+            },
+            "submetrics": {
+                "iter_counter": automl._iter_per_learner[estimator],
+                "values": [],
+            },
+        }
+
+        if (search_state.metric_for_logging is not None) and (
+            "intermediate_results" in search_state.metric_for_logging
+        ):
+            info["submetrics"]["values"] = search_state.metric_for_logging["intermediate_results"]
+
+        self.infos.append(info)
+
+        if not self.autolog and not self.manual_log:
+            return
+        if self.manual_log:
+            if self.parent_run_name is not None:
+                run_name = f"{self.parent_run_name}_child_{self.child_counter}"
+            else:
+                run_name = None
+            with mlflow.start_run(nested=True, run_name=run_name) as child_run:
+                self._log_info_to_run(info, child_run.info.run_id, log_params=True)
+                if automl._state.model_history:
+                    self.log_model(
+                        search_state.trained_estimator._model, estimator, signature=automl.estimator_signature
+                    )
+                    self.pickle_and_log_automl_artifacts(
+                        automl, search_state.trained_estimator, estimator, signature=automl.pipeline_signature
+                    )
+                self.manual_run_ids.append(child_run.info.run_id)
+            self.child_counter += 1
+
+    def log_automl(self, automl):
+        self.set_best_iter(automl)
+        if self.autolog:
+            if self.parent_run_id is not None:
+                mlflow.start_run(run_id=self.parent_run_id, experiment_id=self.experiment_id)
+                mlflow.log_metric("best_validation_loss", automl._state.best_loss)
+                mlflow.log_metric("best_iteration", automl._best_iteration)
+                mlflow.log_metric("num_child_runs", len(self.infos))
+                if automl._trained_estimator is not None and not self.has_model:
+                    self.log_model(
+                        automl._trained_estimator._model, automl.best_estimator, signature=automl.estimator_signature
+                    )
+                    self.pickle_and_log_automl_artifacts(
+                        automl, automl.model, automl.best_estimator, signature=automl.pipeline_signature
+                    )
+                    self.has_model = True
+
+            self.adopt_children(automl)
+
+        if self.manual_log:
+            best_mlflow_run_id = self.manual_run_ids[automl._best_iteration]
+            best_run_name = self.mlflow_client.get_run(best_mlflow_run_id).info.run_name
+            automl.best_run_id = best_mlflow_run_id
+            automl.best_run_name = best_run_name
+            self.mlflow_client.set_tag(best_mlflow_run_id, "flaml.best_run", True)
+            self.best_run_id = best_mlflow_run_id
+            if self.parent_run_id is not None:
+                conf = automl._config_history[automl._best_iteration][1].copy()
+                if "ml" in conf.keys():
+                    conf = conf["ml"]
+
+                mlflow.log_params(conf)
+                mlflow.log_param("best_learner", automl._best_estimator)
+                if not self.has_summary:
+                    logger.info(f"logging best model {automl.best_estimator}")
+                    self.copy_mlflow_run(best_mlflow_run_id, self.parent_run_id)
+                    self.has_summary = True
+                    if automl._trained_estimator is not None and not self.has_model:
+                        self.log_model(
+                            automl._trained_estimator._model,
+                            automl.best_estimator,
+                            signature=automl.estimator_signature,
+                        )
+                        self.pickle_and_log_automl_artifacts(
+                            automl, automl.model, automl.best_estimator, signature=automl.pipeline_signature
+                        )
+                        self.has_model = True
+
+    def resume_mlflow(self):
+        if len(self.resume_params) > 0:
+            mlflow.autolog(**self.resume_params)
+
+    def _log_info_to_run(self, info, run_id, log_params=False):
+        _metrics = [Metric(key, value, int(time.time() * 1000), 0) for key, value in info["metrics"].items()]
+        _tags = [RunTag(key, str(value)) for key, value in info["tags"].items()]
+        _params = [
+            Param(key, str(value))
+            for key, value in info["params"].items()
+            if log_params or key in ["sample_size", "learner"]
+        ]
+        self.mlflow_client.log_batch(run_id=run_id, metrics=_metrics, params=_params, tags=_tags)
+
+        if len(info["submetrics"]["values"]) > 0:
+            for each_entry in info["submetrics"]["values"]:
+                with mlflow.start_run(nested=True) as run:
+                    each_entry.update({"iter_counter": info["submetrics"]["iter_counter"]})
+                    _metrics = [Metric(key, value, int(time.time() * 1000), 0) for key, value in each_entry.items()]
+                    _tags = [RunTag("mlflow.parentRunId", run_id)]
+                    self.mlflow_client.log_batch(run_id=run.info.run_id, metrics=_metrics, params=[], tags=_tags)
+            del info["submetrics"]["values"]
+
+    def adopt_children(self, result=None):
+        """
+        Set autologging child runs to nested by fetching them after all child runs are completed.
+        Note that this may cause disorder when concurrently starting multiple AutoML processes
+        with the same experiment name if the MLflow version is less than or equal to "2.5.0".
+        """
+        if self.autolog:
+            best_iteration = self.best_iteration
+            if best_iteration is None:
+                logger.warning("best_iteration is None, cannot identify best run")
+            raw_autolog_child_runs = mlflow.search_runs(
+                experiment_ids=[self.experiment_id],
+                order_by=["attributes.start_time DESC"],
+                max_results=SEARCH_MAX_RESULTS,
+                output_format="list",
+                filter_string=(
+                    f"tags.extra_tag.sid = '{self.extra_tag['extra_tag.sid']}'" if mlflow.__version__ > "2.5.0" else ""
+                ),
+            )
+            self.child_counter = 0
+
+            # From latest to earliest, remove duplicate cross-validation runs
+            _exist_child_run_params = []  # for deduplication of cross-validation child runs
+            _to_keep_autolog_child_runs = []
+            for autolog_child_run in raw_autolog_child_runs:
+                child_start_time = autolog_child_run.info.start_time / 1000
+
+                if child_start_time < self.start_time:
+                    continue
+
+                _current_child_run_params = autolog_child_run.data.params
+                # remove n_estimators as some models will train with small n_estimators to estimate time budget
+                if self.experiment_type == "automl":
+                    _current_child_run_params.pop("n_estimators", None)
+                if _current_child_run_params in _exist_child_run_params:
+                    # remove duplicate cross-validation run
+                    self.mlflow_client.delete_run(autolog_child_run.info.run_id)
+                    continue
+                else:
+                    _exist_child_run_params.append(_current_child_run_params)
+                    _to_keep_autolog_child_runs.append(autolog_child_run)
+
+            # From earliest to latest, set tags and child_counter
+            autolog_child_runs = _to_keep_autolog_child_runs[::-1]
+            for autolog_child_run in autolog_child_runs:
+                child_run_id = autolog_child_run.info.run_id
+                child_run_parent_id = autolog_child_run.data.tags.get("mlflow.parentRunId", None)
+                child_start_time = autolog_child_run.info.start_time / 1000
+
+                if child_start_time < self.start_time:
+                    continue
+
+                if all(
+                    [
+                        len(autolog_child_run.data.params) == 0,
+                        len(autolog_child_run.data.metrics) == 0,
+                        child_run_id != self.parent_run_id,
+                    ]
+                ):
+                    # remove empty run
+                    # empty run could be created by mlflow autologging
+                    self.mlflow_client.delete_run(autolog_child_run.info.run_id)
+                    continue
+
+                if all(
+                    [
+                        child_run_id != self.parent_run_id,
+                        child_run_parent_id is None or child_run_parent_id == self.parent_run_id,
+                    ]
+                ):
+                    if self.parent_run_id is not None:
+                        self.mlflow_client.set_tag(
+                            child_run_id,
+                            "mlflow.parentRunId",
+                            self.parent_run_id,
+                        )
+                        if IS_RENAME_CHILD_RUN:
+                            self.mlflow_client.set_tag(
+                                child_run_id,
+                                "mlflow.runName",
+                                f"{self.parent_run_name}_child_{self.child_counter}",
+                            )
+                        self.mlflow_client.set_tag(child_run_id, "flaml.child_counter", self.child_counter)
+
+                    # merge autolog child run and corresponding manual run
+                    flaml_info = self.infos[self.child_counter]
+                    child_run = self.mlflow_client.get_run(child_run_id)
+                    self._log_info_to_run(flaml_info, child_run_id, log_params=False)
+
+                    if self.experiment_type == "automl":
+                        if "learner" not in child_run.data.params:
+                            self.mlflow_client.log_param(child_run_id, "learner", flaml_info["params"]["learner"])
+                        if "sample_size" not in child_run.data.params:
+                            self.mlflow_client.log_param(
+                                child_run_id, "sample_size", flaml_info["params"]["sample_size"]
+                            )
+
+                    if self.child_counter == best_iteration:
+                        self.mlflow_client.set_tag(child_run_id, "flaml.best_run", True)
+                        if result is not None:
+                            result.best_run_id = child_run_id
+                            result.best_run_name = child_run.info.run_name
+                            self.best_run_id = child_run_id
+                        if self.parent_run_id is not None and not self.has_summary:
+                            self.copy_mlflow_run(child_run_id, self.parent_run_id)
+                            self.has_summary = True
+                    self.child_counter += 1
+
+    def retrain(self, train_func, config):
+        """retrain with given config, added for logging the best config and model to parent run.
+        No more needed after v2.0.2post2 as we no longer log best config and model to parent run.
+        """
+        if self.autolog:
+            self.set_mlflow_config()
+            self.has_summary = True
+            with mlflow.start_run(run_id=self.parent_run_id):
+                train_func(config)
+
+    def __del__(self):
+        # mlflow.end_run()  # this will end the parent run when re-fit an AutoML instance. Bug 2922020: Inconsistent Run Creation Output
+        self.resume_mlflow()
+
+
+def register_automl_pipeline(automl, model_name=None, signature=None):
+    pipeline = automl.automl_pipeline
+    if pipeline is None:
+        logger.warning("pipeline not found, cannot register it")
+        return
+    if model_name is None:
+        model_name = automl._mlflow_exp_name + "_pipeline"
+    if automl.best_run_id is None:
+        mlflow.sklearn.log_model(
+            pipeline,
+            "automl_pipeline",
+            registered_model_name=model_name,
+            signature=automl.pipeline_signature if signature is None else signature,
+        )
+        mvs = mlflow.search_model_versions(
+            filter_string=f"name='{model_name}'", order_by=["attribute.version_number ASC"], max_results=1
+        )
+        return mvs[0]
+    else:
+        best_run = mlflow.get_run(automl.best_run_id)
+        model_uri = f"runs:/{best_run.info.run_id}/automl_pipeline"
+        return mlflow.register_model(model_uri, model_name)
diff --git a/flaml/tune/tune.py b/flaml/tune/tune.py
index ce249621d5..0bae6b9109 100644
--- a/flaml/tune/tune.py
+++ b/flaml/tune/tune.py
@@ -29,6 +29,18 @@
 from .result import DEFAULT_METRIC
 from .trial import Trial
 
+try:
+    import mlflow
+except ImportError:
+    mlflow = None
+try:
+    from flaml.fabric.mlflow import MLflowIntegration, is_autolog_enabled
+
+    internal_mlflow = True
+except ImportError:
+    internal_mlflow = False
+
+
 logger = logging.getLogger(__name__)
 logger.propagate = False
 _use_ray = True
@@ -44,6 +56,7 @@ class ExperimentAnalysis(EA):
     """Class for storing the experiment results."""
 
     def __init__(self, trials, metric, mode, lexico_objectives=None):
+        self.best_run_id = None
         try:
             super().__init__(self, None, trials, metric, mode)
             self.lexico_objectives = lexico_objectives
@@ -128,6 +141,16 @@ def best_result(self) -> Dict:
         else:
             return self.best_trial.last_result
 
+    @property
+    def best_iteration(self) -> List[str]:
+        """Help better navigate"""
+        best_trial = self.best_trial
+        best_trial_id = best_trial.trial_id
+        for i, trial in enumerate(self.trials):
+            if trial.trial_id == best_trial_id:
+                return i
+        return None
+
 
 def report(_metric=None, **kwargs):
     """A function called by the HPO application to report final or intermediate
@@ -234,6 +257,9 @@ def run(
     lexico_objectives: Optional[dict] = None,
     force_cancel: Optional[bool] = False,
     n_concurrent_trials: Optional[int] = 0,
+    mlflow_exp_name: Optional[str] = None,
+    automl_info: Optional[Tuple[float]] = None,
+    extra_tag: Optional[dict] = None,
     **ray_args,
 ):
     """The function-based way of performing HPO.
@@ -424,6 +450,10 @@ def easy_objective(config):
     }
     ```
         force_cancel: boolean, default=False | Whether to forcely cancel the PySpark job if overtime.
+        mlflow_exp_name: str, default=None | The name of the mlflow experiment. This should be specified if
+            enable mlflow autologging on Spark. Otherwise it will log all the results into the experiment of the
+            same name as the basename of main entry file.
+        automl_info: tuple, default=None | The information of the automl run. It should be a tuple of (mlflow_log_latency,).
         n_concurrent_trials: int, default=0 | The number of concurrent trials when perform hyperparameter
             tuning with Spark. Only valid when use_spark=True and spark is required:
             `pip install flaml[spark]`. Please check
@@ -431,6 +461,7 @@ def easy_objective(config):
             for more details about installing Spark. When tune.run() is called from AutoML, it will be
             overwritten by the value of `n_concurrent_trials` in AutoML. When <= 0, the concurrent trials
             will be set to the number of executors.
+        extra_tag: dict, default=None | Extra tags to be added to the mlflow runs created by autologging.
         **ray_args: keyword arguments to pass to ray.tune.run().
             Only valid when use_ray=True.
     """
@@ -438,10 +469,12 @@ def easy_objective(config):
     global _verbose
     global _running_trial
     global _training_iteration
+    global internal_mlflow
     old_use_ray = _use_ray
     old_verbose = _verbose
     old_running_trial = _running_trial
     old_training_iteration = _training_iteration
+
     if log_file_name:
         dir_name = os.path.dirname(log_file_name)
         if dir_name:
@@ -486,6 +519,13 @@ def easy_objective(config):
         else:
             logger.setLevel(logging.CRITICAL)
 
+    if internal_mlflow and not automl_info and (mlflow.active_run() or is_autolog_enabled()):
+        mlflow_integration = MLflowIntegration("tune", mlflow_exp_name, extra_tag)
+        evaluation_function = mlflow_integration.wrap_evaluation_function(evaluation_function)
+        _internal_mlflow = not automl_info  # True if mlflow_integration will be used for logging
+    else:
+        _internal_mlflow = False
+
     from .searcher.blendsearch import CFO, BlendSearch, RandomSearch
 
     if lexico_objectives is not None:
@@ -713,11 +753,15 @@ def easy_objective(config):
                         time_budget_s = np.inf
                     num_failures = 0
                     upperbound_num_failures = (len(evaluated_rewards) if evaluated_rewards else 0) + max_failure
+                    logger.debug(f"automl_info: {automl_info}")
                     while (
                         time.time() - time_start < time_budget_s
                         and (num_samples < 0 or num_trials < num_samples)
                         and num_failures < upperbound_num_failures
                     ):
+                        if automl_info and automl_info[0] > 0 and time_budget_s < np.inf:
+                            time_budget_s -= automl_info[0]
+                            logger.debug(f"Remaining time budget with mlflow log latency: {time_budget_s} seconds.")
                         while len(_runner.running_trials) < n_concurrent_trials:
                             # suggest trials for spark
                             trial_next = _runner.step()
@@ -750,6 +794,9 @@ def easy_objective(config):
                             trial_to_run = trials_to_run[0]
                             _runner.running_trial = trial_to_run
                             if result is not None:
+                                if _internal_mlflow:
+                                    mlflow_integration.record_trial(result, trial_to_run, metric)
+
                                 if isinstance(result, dict):
                                     if result:
                                         logger.info(f"Brief result: {result}")
@@ -768,6 +815,20 @@ def easy_objective(config):
                         mode=mode,
                         lexico_objectives=lexico_objectives,
                     )
+                    analysis.search_space = config
+
+                    if _internal_mlflow:
+                        mlflow_integration.log_tune(analysis, metric)
+                        # try:
+                        #     _best_config = analysis.best_config
+                        # except Exception:
+                        #     _best_config = None
+                        # if _best_config:
+                        #     parallel(
+                        #         delayed(mlflow_integration.retrain)(evaluation_function, analysis.best_config)
+                        #         for dummy in [0]
+                        #     )
+
                     return analysis
                 finally:
                     # recover the global variables in case of nested run
@@ -779,6 +840,8 @@ def easy_objective(config):
                         _runner = old_runner
                         logger.handlers = old_handlers
                         logger.setLevel(old_level)
+                    if _internal_mlflow:
+                        mlflow_integration.adopt_children()
 
     # simple sequential run without using tune.run() from ray
     time_start = time.time()
@@ -812,7 +875,11 @@ def easy_objective(config):
                 result = None
                 with PySparkOvertimeMonitor(time_start, time_budget_s, force_cancel):
                     result = evaluation_function(trial_to_run.config)
+                logger.debug(f"result in tune: {trial_to_run}, {result}")
                 if result is not None:
+                    if _internal_mlflow:
+                        mlflow_integration.record_trial(result, trial_to_run, metric)
+
                     if isinstance(result, dict):
                         if result:
                             report(**result)
@@ -838,6 +905,19 @@ def easy_objective(config):
             mode=mode,
             lexico_objectives=lexico_objectives,
         )
+        analysis.search_space = config
+        if _internal_mlflow:
+            mlflow_integration.log_tune(analysis, metric)
+            if analysis.best_run_id is not None:
+                logger.info(f"Best MLflow run name: {analysis.best_run_name}")
+                logger.info(f"Best MLflow run id: {analysis.best_run_id}")
+            # try:
+            #     _best_config = analysis.best_config
+            # except Exception:
+            #     _best_config = None
+            # if _best_config:
+            #     mlflow_integration.retrain(evaluation_function, analysis.best_config)
+
         return analysis
     finally:
         # recover the global variables in case of nested run
@@ -849,6 +929,8 @@ def easy_objective(config):
             _runner = old_runner
             logger.handlers = old_handlers
             logger.setLevel(old_level)
+        if _internal_mlflow:
+            mlflow_integration.adopt_children()
 
 
 class Tuner:
diff --git a/setup.py b/setup.py
index 8592d7fee5..5783e99de0 100644
--- a/setup.py
+++ b/setup.py
@@ -55,7 +55,8 @@
             "lightgbm>=2.3.1",
             "xgboost>=0.90,<2.0.0",
             "scipy>=1.4.1",
-            "pandas>=1.1.4",
+            "pandas>=1.1.4,<2.0.0; python_version<'3.10'",
+            "pandas>=1.1.4; python_version>='3.10'",
             "scikit-learn>=1.0.0",
             "thop",
             "pytest>=6.1.1",
@@ -78,8 +79,8 @@
             "hcrystalball==0.1.10",
             "seqeval",
             "pytorch-forecasting>=0.9.0,<=0.10.1; python_version<'3.11'",
-            "mlflow",
-            "pyspark>=3.2.0",
+            # "pytorch-forecasting==0.10.1; python_version=='3.11'",
+            "mlflow==2.15.1",
             "joblibspark>=0.5.0",
             "joblib<=1.3.2",
             "nbconvert",
@@ -92,6 +93,7 @@
             "pydantic==1.10.9",
             "sympy",
             "wolframalpha",
+            "dill",  # a drop in replacement of pickle
         ],
         "catboost": [
             "catboost>=0.26,<1.2; python_version<'3.11'",
@@ -139,7 +141,8 @@
             "prophet>=1.0.1",
             "statsmodels>=0.12.2",
             "hcrystalball==0.1.10",
-            "pytorch-forecasting>=0.9.0",
+            "pytorch-forecasting>=0.9.0; python_version<'3.11'",
+            # "pytorch-forecasting==0.10.1; python_version=='3.11'",
             "pytorch-lightning==1.9.0",
             "tensorboardX==2.6",
         ],
diff --git a/test/automl/test_extra_models.py b/test/automl/test_extra_models.py
new file mode 100644
index 0000000000..6c5cac0992
--- /dev/null
+++ b/test/automl/test_extra_models.py
@@ -0,0 +1,310 @@
+import os
+import sys
+import unittest
+import warnings
+from collections import defaultdict
+
+import mlflow
+import numpy as np
+import pandas as pd
+import pytest
+import scipy
+from packaging.version import Version
+from sklearn.datasets import load_breast_cancer, load_diabetes, load_iris
+from sklearn.model_selection import train_test_split
+
+from flaml import AutoML
+from flaml.automl.ml import sklearn_metric_loss_score
+from flaml.tune.spark.utils import check_spark
+
+leaderboard = defaultdict(dict)
+
+warnings.simplefilter(action="ignore")
+if sys.platform == "darwin" or "nt" in os.name:
+    # skip this test if the platform is not linux
+    skip_spark = True
+else:
+    try:
+        import pyspark
+        from pyspark.ml.evaluation import MulticlassClassificationEvaluator, RegressionEvaluator
+        from pyspark.ml.feature import VectorAssembler
+
+        from flaml.automl.spark.utils import to_pandas_on_spark
+
+        spark = (
+            pyspark.sql.SparkSession.builder.appName("MyApp")
+            .master("local[2]")
+            .config(
+                "spark.jars.packages",
+                (
+                    "com.microsoft.azure:synapseml_2.12:1.0.2,"
+                    "org.apache.hadoop:hadoop-azure:3.3.5,"
+                    "com.microsoft.azure:azure-storage:8.6.6,"
+                    f"org.mlflow:mlflow-spark_2.12:{mlflow.__version__}"
+                    if Version(mlflow.__version__) >= Version("2.9.0")
+                    else f"org.mlflow:mlflow-spark:{mlflow.__version__}"
+                ),
+            )
+            .config("spark.jars.repositories", "https://mmlspark.azureedge.net/maven")
+            .config("spark.sql.debug.maxToStringFields", "100")
+            .config("spark.driver.extraJavaOptions", "-Xss1m")
+            .config("spark.executor.extraJavaOptions", "-Xss1m")
+            .getOrCreate()
+        )
+        spark.sparkContext._conf.set(
+            "spark.mlflow.pysparkml.autolog.logModelAllowlistFile",
+            "https://mmlspark.blob.core.windows.net/publicwasb/log_model_allowlist.txt",
+        )
+        # spark.sparkContext.setLogLevel("ERROR")
+        spark_available, _ = check_spark()
+        skip_spark = not spark_available
+    except ImportError:
+        skip_spark = True
+
+
+def _test_regular_models(estimator_list, task):
+    if isinstance(estimator_list, str):
+        estimator_list = [estimator_list]
+    if task == "classification":
+        load_dataset_func = load_iris
+        metric = "accuracy"
+    else:
+        load_dataset_func = load_diabetes
+        metric = "r2"
+
+    x, y = load_dataset_func(return_X_y=True, as_frame=True)
+    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=7654321)
+
+    automl_experiment = AutoML()
+    automl_settings = {
+        "max_iter": 5,
+        "task": task,
+        "estimator_list": estimator_list,
+        "metric": metric,
+    }
+    automl_experiment.fit(X_train=x_train, y_train=y_train, **automl_settings)
+    predictions = automl_experiment.predict(x_test)
+    score = sklearn_metric_loss_score(metric, predictions, y_test)
+    for estimator_name in estimator_list:
+        leaderboard[task][estimator_name] = score
+
+
+def _test_spark_models(estimator_list, task):
+    if isinstance(estimator_list, str):
+        estimator_list = [estimator_list]
+    if task == "classification":
+        load_dataset_func = load_iris
+        evaluator = MulticlassClassificationEvaluator(
+            labelCol="target", predictionCol="prediction", metricName="accuracy"
+        )
+        metric = "accuracy"
+
+    elif task == "regression":
+        load_dataset_func = load_diabetes
+        evaluator = RegressionEvaluator(labelCol="target", predictionCol="prediction", metricName="r2")
+        metric = "r2"
+
+    elif task == "binary":
+        load_dataset_func = load_breast_cancer
+        evaluator = MulticlassClassificationEvaluator(
+            labelCol="target", predictionCol="prediction", metricName="accuracy"
+        )
+        metric = "accuracy"
+
+    final_cols = ["target", "features"]
+    extra_args = {}
+
+    if estimator_list is not None and "aft_spark" in estimator_list:
+        # survival analysis task
+        pd_df = pd.read_csv(
+            "https://raw.githubusercontent.com/CamDavidsonPilon/lifelines/master/lifelines/datasets/rossi.csv"
+        )
+        pd_df.rename(columns={"week": "target"}, inplace=True)
+        final_cols += ["arrest"]
+        extra_args["censorCol"] = "arrest"
+    else:
+        pd_df = load_dataset_func(as_frame=True).frame
+
+    rename = {}
+    for attr in pd_df.columns:
+        rename[attr] = attr.replace(" ", "_")
+    pd_df = pd_df.rename(columns=rename)
+    df = spark.createDataFrame(pd_df)
+    df = df.repartition(4)
+    train, test = df.randomSplit([0.8, 0.2], seed=7654321)
+    feature_cols = [col for col in df.columns if col not in ["target", "arrest"]]
+    featurizer = VectorAssembler(inputCols=feature_cols, outputCol="features")
+    train_data = featurizer.transform(train)[final_cols]
+    test_data = featurizer.transform(test)[final_cols]
+    automl = AutoML()
+    settings = {
+        "max_iter": 1,
+        "estimator_list": estimator_list,  # ML learner we intend to test
+        "task": task,  # task type
+        "metric": metric,  # metric to optimize
+    }
+    settings.update(extra_args)
+    df = to_pandas_on_spark(to_pandas_on_spark(train_data).to_spark(index_col="index"))
+
+    automl.fit(
+        dataframe=df,
+        label="target",
+        **settings,
+    )
+
+    model = automl.model.estimator
+    predictions = model.transform(test_data)
+    predictions.show(5)
+
+    score = evaluator.evaluate(predictions)
+    if estimator_list is not None:
+        for estimator_name in estimator_list:
+            leaderboard[task][estimator_name] = score
+
+
+def _test_sparse_matrix_classification(estimator):
+    automl_experiment = AutoML()
+    automl_settings = {
+        "estimator_list": [estimator],
+        "time_budget": 2,
+        "metric": "auto",
+        "task": "classification",
+        "log_file_name": "test/sparse_classification.log",
+        "split_type": "uniform",
+        "n_jobs": 1,
+        "model_history": True,
+    }
+    X_train = scipy.sparse.random(1554, 21, dtype=int)
+    y_train = np.random.randint(3, size=1554)
+    automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings)
+
+
+def load_multi_dataset():
+    """multivariate time series forecasting dataset"""
+    import pandas as pd
+
+    # pd.set_option("display.max_rows", None, "display.max_columns", None)
+    df = pd.read_csv(
+        "https://raw.githubusercontent.com/srivatsan88/YouTubeLI/master/dataset/nyc_energy_consumption.csv"
+    )
+    # preprocessing data
+    df["timeStamp"] = pd.to_datetime(df["timeStamp"])
+    df = df.set_index("timeStamp")
+    df = df.resample("D").mean()
+    df["temp"] = df["temp"].fillna(method="ffill")
+    df["precip"] = df["precip"].fillna(method="ffill")
+    df = df[:-2]  # last two rows are NaN for 'demand' column so remove them
+    df = df.reset_index()
+
+    return df
+
+
+def _test_forecast(estimator_list, budget=10):
+    if isinstance(estimator_list, str):
+        estimator_list = [estimator_list]
+    df = load_multi_dataset()
+    # split data into train and test
+    time_horizon = 180
+    num_samples = df.shape[0]
+    split_idx = num_samples - time_horizon
+    train_df = df[:split_idx]
+    test_df = df[split_idx:]
+    # test dataframe must contain values for the regressors / multivariate variables
+    X_test = test_df[["timeStamp", "precip", "temp"]]
+    y_test = test_df["demand"]
+    # return
+    automl = AutoML()
+    settings = {
+        "time_budget": budget,  # total running time in seconds
+        "metric": "mape",  # primary metric
+        "task": "ts_forecast",  # task type
+        "log_file_name": "test/energy_forecast_numerical.log",  # flaml log file
+        "log_dir": "logs/forecast_logs",  # tcn/tft log folder
+        "eval_method": "holdout",
+        "log_type": "all",
+        "label": "demand",
+        "estimator_list": estimator_list,
+    }
+    """The main flaml automl API"""
+    automl.fit(dataframe=train_df, **settings, period=time_horizon)
+    print(automl.best_config)
+    pred_y = automl.predict(X_test)
+    mape = sklearn_metric_loss_score("mape", pred_y, y_test)
+    for estimator_name in estimator_list:
+        leaderboard["forecast"][estimator_name] = mape
+
+
+class TestExtraModel(unittest.TestCase):
+    @unittest.skipIf(skip_spark, reason="Spark is not installed. Skip all spark tests.")
+    def test_rf_spark(self):
+        tasks = ["classification", "regression"]
+        for task in tasks:
+            _test_spark_models("rf_spark", task)
+
+    @unittest.skipIf(skip_spark, reason="Spark is not installed. Skip all spark tests.")
+    def test_nb_spark(self):
+        _test_spark_models("nb_spark", "classification")
+
+    @unittest.skipIf(skip_spark, reason="Spark is not installed. Skip all spark tests.")
+    def test_glr(self):
+        _test_spark_models("glr_spark", "regression")
+
+    @unittest.skipIf(skip_spark, reason="Spark is not installed. Skip all spark tests.")
+    def test_lr(self):
+        _test_spark_models("lr_spark", "regression")
+
+    @unittest.skipIf(skip_spark, reason="Spark is not installed. Skip all spark tests.")
+    def test_svc_spark(self):
+        _test_spark_models("svc_spark", "binary")
+
+    @unittest.skipIf(skip_spark, reason="Spark is not installed. Skip all spark tests.")
+    def test_gbt_spark(self):
+        tasks = ["binary", "regression"]
+        for task in tasks:
+            _test_spark_models("gbt_spark", task)
+
+    @unittest.skipIf(skip_spark, reason="Spark is not installed. Skip all spark tests.")
+    def test_aft(self):
+        _test_spark_models("aft_spark", "regression")
+
+    @unittest.skipIf(skip_spark, reason="Spark is not installed. Skip all spark tests.")
+    def test_default_spark(self):
+        _test_spark_models(None, "classification")
+
+    def test_svc(self):
+        _test_regular_models("svc", "classification")
+        _test_sparse_matrix_classification("svc")
+
+    def test_sgd(self):
+        tasks = ["classification", "regression"]
+        for task in tasks:
+            _test_regular_models("sgd", task)
+        _test_sparse_matrix_classification("sgd")
+
+    def test_enet(self):
+        _test_regular_models("enet", "regression")
+
+    def test_lassolars(self):
+        _test_regular_models("lassolars", "regression")
+        _test_forecast("lassolars")
+
+    def test_seasonal_naive(self):
+        _test_forecast("snaive")
+
+    def test_naive(self):
+        _test_forecast("naive")
+
+    def test_seasonal_avg(self):
+        _test_forecast("savg")
+
+    def test_avg(self):
+        _test_forecast("avg")
+
+    @unittest.skipIf(skip_spark, reason="Skip on Mac or Windows")
+    def test_tcn(self):
+        _test_forecast("tcn")
+
+
+if __name__ == "__main__":
+    unittest.main()
+    print(leaderboard)
diff --git a/test/automl/test_forecast.py b/test/automl/test_forecast.py
index 8f0a24a1cf..6e5d97d4f8 100644
--- a/test/automl/test_forecast.py
+++ b/test/automl/test_forecast.py
@@ -1,4 +1,5 @@
 import datetime
+import os
 import sys
 
 import numpy as np
@@ -95,6 +96,7 @@ def test_forecast_automl(budget=10, estimators_when_no_prophet=["arima", "sarima
         )
 
 
+@pytest.mark.skipif(sys.platform == "darwin" or "nt" in os.name, reason="skip on mac or windows")
 def test_models(budget=3):
     n = 200
     X = pd.DataFrame(
@@ -571,7 +573,7 @@ def test_forecast_panel(budget=5):
     print(f"Training duration of best run: {automl.best_config_train_time}s")
     print(automl.model.estimator)
     """ pickle and save the automl object """
-    import pickle
+    import dill as pickle
 
     with open("automl.pkl", "wb") as f:
         pickle.dump(automl, f, pickle.HIGHEST_PROTOCOL)
diff --git a/test/automl/test_mlflow.py b/test/automl/test_mlflow.py
index 36fc53dba8..3ce893d223 100644
--- a/test/automl/test_mlflow.py
+++ b/test/automl/test_mlflow.py
@@ -1,3 +1,5 @@
+import pickle
+
 import mlflow
 import mlflow.entities
 import pytest
@@ -9,43 +11,55 @@
 
 class TestMLFlowLoggingParam:
     def test_should_start_new_run_by_default(self, automl_settings):
-        with mlflow.start_run():
-            parent = mlflow.last_active_run()
+        with mlflow.start_run() as parent_run:
             automl = AutoML()
             X_train, y_train = load_iris(return_X_y=True)
             automl.fit(X_train=X_train, y_train=y_train, **automl_settings)
+            try:
+                self._check_mlflow_parameters(automl, parent_run.info)
+            except FileNotFoundError:
+                print("[WARNING]: No file found")
 
-        children = self._get_child_runs(parent)
+        children = self._get_child_runs(parent_run)
         assert len(children) >= 1, f"Expected at least 1 child run, got {len(children)}"
 
     def test_should_not_start_new_run_when_mlflow_logging_set_to_false_in_init(self, automl_settings):
-        with mlflow.start_run():
-            parent = mlflow.last_active_run()
+        with mlflow.start_run() as parent_run:
             automl = AutoML(mlflow_logging=False)
             X_train, y_train = load_iris(return_X_y=True)
             automl.fit(X_train=X_train, y_train=y_train, **automl_settings)
+            try:
+                self._check_mlflow_parameters(automl, parent_run.info)
+            except FileNotFoundError:
+                print("[WARNING]: No file found")
 
-        children = self._get_child_runs(parent)
+        children = self._get_child_runs(parent_run)
         assert len(children) == 0, f"Expected 0 child runs, got {len(children)}"
 
     def test_should_not_start_new_run_when_mlflow_logging_set_to_false_in_fit(self, automl_settings):
-        with mlflow.start_run():
-            parent = mlflow.last_active_run()
+        with mlflow.start_run() as parent_run:
             automl = AutoML()
             X_train, y_train = load_iris(return_X_y=True)
             automl.fit(X_train=X_train, y_train=y_train, mlflow_logging=False, **automl_settings)
+            try:
+                self._check_mlflow_parameters(automl, parent_run.info)
+            except FileNotFoundError:
+                print("[WARNING]: No file found")
 
-        children = self._get_child_runs(parent)
+        children = self._get_child_runs(parent_run)
         assert len(children) == 0, f"Expected 0 child runs, got {len(children)}"
 
     def test_should_start_new_run_when_mlflow_logging_set_to_true_in_fit(self, automl_settings):
-        with mlflow.start_run():
-            parent = mlflow.last_active_run()
+        with mlflow.start_run() as parent_run:
             automl = AutoML(mlflow_logging=False)
             X_train, y_train = load_iris(return_X_y=True)
             automl.fit(X_train=X_train, y_train=y_train, mlflow_logging=True, **automl_settings)
+            try:
+                self._check_mlflow_parameters(automl, parent_run.info)
+            except FileNotFoundError:
+                print("[WARNING]: No file found")
 
-        children = self._get_child_runs(parent)
+        children = self._get_child_runs(parent_run)
         assert len(children) >= 1, f"Expected at least 1 child run, got {len(children)}"
 
     @staticmethod
@@ -55,11 +69,40 @@ def _get_child_runs(parent_run: mlflow.entities.Run) -> DataFrame:
             [experiment_id], filter_string=f"tags.mlflow.parentRunId = '{parent_run.info.run_id}'"
         )
 
+    @staticmethod
+    def _check_mlflow_parameters(automl: AutoML, run_info: mlflow.entities.RunInfo):
+        with open(
+            f"./mlruns/{run_info.experiment_id}/{run_info.run_id}/artifacts/automl_pipeline/model.pkl", "rb"
+        ) as f:
+            t = pickle.load(f)
+            if __name__ == "__main__":
+                print(t)
+            for param in automl.model._model._get_param_names():
+                assert eval("t._final_estimator._model" + f".{param}") == eval(
+                    "automl.model._model" + f".{param}"
+                ), "The mlflow logging not consistent with automl model"
+                if __name__ == "__main__":
+                    print(param, "\t", eval("automl.model._model" + f".{param}"))
+        print("[INFO]: Successfully Logged")
+
     @pytest.fixture(scope="class")
     def automl_settings(self):
+        mlflow.end_run()
         return {
-            "time_budget": 2,  # in seconds
+            "time_budget": 5,  # in seconds
             "metric": "accuracy",
             "task": "classification",
             "log_file_name": "iris.log",
         }
+
+
+if __name__ == "__main__":
+    s = TestMLFlowLoggingParam()
+    automl_settings = {
+        "time_budget": 5,  # in seconds
+        "metric": "accuracy",
+        "task": "classification",
+        "log_file_name": "iris.log",
+    }
+    s.test_should_start_new_run_by_default(automl_settings)
+    s.test_should_start_new_run_when_mlflow_logging_set_to_true_in_fit(automl_settings)
diff --git a/test/spark/test_0sparkml.py b/test/spark/test_0sparkml.py
index 8ff3a1f2af..3f2198241c 100644
--- a/test/spark/test_0sparkml.py
+++ b/test/spark/test_0sparkml.py
@@ -5,6 +5,7 @@
 import mlflow
 import pytest
 import sklearn.datasets as skds
+from packaging.version import Version
 
 from flaml import AutoML
 from flaml.tune.spark.utils import check_spark
@@ -20,23 +21,26 @@
 
         from flaml.automl.spark.utils import to_pandas_on_spark
 
-        postfix_version = "-spark3.3," if pyspark.__version__ > "3.2" else ","
         spark = (
             pyspark.sql.SparkSession.builder.appName("MyApp")
             .master("local[2]")
             .config(
                 "spark.jars.packages",
                 (
-                    f"com.microsoft.azure:synapseml_2.12:0.11.3{postfix_version}"
+                    "com.microsoft.azure:synapseml_2.12:1.0.4,"
                     "org.apache.hadoop:hadoop-azure:3.3.5,"
                     "com.microsoft.azure:azure-storage:8.6.6,"
-                    f"org.mlflow:mlflow-spark:2.6.0"
+                    f"org.mlflow:mlflow-spark_2.12:{mlflow.__version__}"
+                    if Version(mlflow.__version__) >= Version("2.9.0")
+                    else f"org.mlflow:mlflow-spark:{mlflow.__version__}"
                 ),
             )
             .config("spark.jars.repositories", "https://mmlspark.azureedge.net/maven")
             .config("spark.sql.debug.maxToStringFields", "100")
             .config("spark.driver.extraJavaOptions", "-Xss1m")
             .config("spark.executor.extraJavaOptions", "-Xss1m")
+            # .config("spark.executor.memory", "48G")
+            # .config("spark.driver.memory", "48G")
             .getOrCreate()
         )
         spark.sparkContext._conf.set(
@@ -49,6 +53,10 @@
     except ImportError:
         skip_spark = True
 
+if sys.version_info >= (3, 11):
+    skip_py311 = True
+else:
+    skip_py311 = False
 
 pytestmark = pytest.mark.skipif(skip_spark, reason="Spark is not installed. Skip all spark tests.")
 
@@ -159,10 +167,11 @@ def test_spark_input_df():
     settings = {
         "time_budget": 30,  # total running time in seconds
         "metric": "roc_auc",
-        "estimator_list": ["lgbm_spark"],  # list of ML learners; we tune lightgbm in this example
+        # "estimator_list": ["lgbm_spark"],  # list of ML learners; we tune lightgbm in this example
         "task": "classification",  # task type
         "log_file_name": "flaml_experiment.log",  # flaml log file
         "seed": 7654321,  # random seed
+        "eval_method": "holdout",
     }
     df = to_pandas_on_spark(to_pandas_on_spark(train_data).to_spark(index_col="index"))
 
@@ -176,17 +185,17 @@ def test_spark_input_df():
     try:
         model = automl.model.estimator
         predictions = model.transform(test_data)
-        predictions.show()
-
-        # from synapse.ml.train import ComputeModelStatistics
 
-        # metrics = ComputeModelStatistics(
-        #     evaluationMetric="classification",
-        #     labelCol="Bankrupt?",
-        #     scoredLabelsCol="prediction",
-        # ).transform(predictions)
-        # metrics.show()
+        from synapse.ml.train import ComputeModelStatistics
 
+        if not skip_py311:
+            # ComputeModelStatistics doesn't support python 3.11
+            metrics = ComputeModelStatistics(
+                evaluationMetric="classification",
+                labelCol="Bankrupt?",
+                scoredLabelsCol="prediction",
+            ).transform(predictions)
+            metrics.show()
     except AttributeError:
         print("No fitted model because of too short training time.")
 
@@ -207,6 +216,86 @@ def test_spark_input_df():
     assert "No estimator is left." in str(excinfo.value)
 
 
+def _test_spark_large_df():
+    """Test with large dataframe, should not run in pipeline."""
+    import os
+    import time
+
+    import pandas as pd
+    from pyspark.sql import functions as F
+
+    import flaml
+
+    os.environ["FLAML_MAX_CONCURRENT"] = "8"
+    start_time = time.time()
+
+    def load_higgs():
+        # 11M rows, 29 columns, 1.1GB
+        df = (
+            spark.read.format("csv")
+            .option("header", False)
+            .option("inferSchema", True)
+            .load("/datadrive/datasets/HIGGS.csv")
+            .withColumnRenamed("_c0", "target")
+            .withColumn("target", F.col("target").cast("integer"))
+            .limit(1000000)
+            .fillna(0)
+            .na.drop(how="any")
+            .repartition(64)
+            .cache()
+        )
+        print("Number of rows in data: ", df.count())
+        return df
+
+    def load_bosch():
+        # 1.184M rows, 969 cols, 1.5GB
+        df = (
+            spark.read.format("csv")
+            .option("header", True)
+            .option("inferSchema", True)
+            .load("/datadrive/datasets/train_numeric.csv")
+            .withColumnRenamed("Response", "target")
+            .withColumn("target", F.col("target").cast("integer"))
+            .limit(1000000)
+            .fillna(0)
+            .drop("Id")
+            .repartition(64)
+            .cache()
+        )
+        print("Number of rows in data: ", df.count())
+        return df
+
+    def prepare_data(dataset_name="higgs"):
+        df = load_higgs() if dataset_name == "higgs" else load_bosch()
+        train, test = df.randomSplit([0.75, 0.25], seed=7654321)
+        feature_cols = [col for col in df.columns if col not in ["target", "arrest"]]
+        final_cols = ["target", "features"]
+        featurizer = VectorAssembler(inputCols=feature_cols, outputCol="features")
+        train_data = featurizer.transform(train)[final_cols]
+        test_data = featurizer.transform(test)[final_cols]
+        train_data = to_pandas_on_spark(to_pandas_on_spark(train_data).to_spark(index_col="index"))
+        return train_data, test_data
+
+    train_data, test_data = prepare_data("higgs")
+    end_time = time.time()
+    print("time cost in minutes for prepare data: ", (end_time - start_time) / 60)
+    automl = flaml.AutoML()
+    automl_settings = {
+        "max_iter": 3,
+        "time_budget": 7200,
+        "metric": "accuracy",
+        "task": "classification",
+        "seed": 1234,
+        "eval_method": "holdout",
+    }
+    automl.fit(dataframe=train_data, label="target", ensemble=False, **automl_settings)
+    model = automl.model.estimator
+    predictions = model.transform(test_data)
+    predictions.show(5)
+    end_time = time.time()
+    print("time cost in minutes: ", (end_time - start_time) / 60)
+
+
 if __name__ == "__main__":
     test_spark_synapseml_classification()
     test_spark_synapseml_regression()
@@ -217,6 +306,6 @@ def test_spark_input_df():
     # import pstats
     # from pstats import SortKey
 
-    # cProfile.run("test_spark_input_df()", "test_spark_input_df.profile")
-    # p = pstats.Stats("test_spark_input_df.profile")
-    # p.strip_dirs().sort_stats(SortKey.CUMULATIVE).print_stats("utils.py")
+    # cProfile.run("_test_spark_large_df()", "_test_spark_large_df.profile")
+    # p = pstats.Stats("_test_spark_large_df.profile")
+    # p.strip_dirs().sort_stats(SortKey.CUMULATIVE).print_stats(50)
diff --git a/test/spark/test_mlflow.py b/test/spark/test_mlflow.py
new file mode 100644
index 0000000000..5a809d5acd
--- /dev/null
+++ b/test/spark/test_mlflow.py
@@ -0,0 +1,342 @@
+import importlib
+import os
+import sys
+import time
+import warnings
+
+import mlflow
+import pytest
+from packaging.version import Version
+from sklearn.datasets import fetch_california_housing, load_diabetes
+from sklearn.ensemble import RandomForestRegressor
+from sklearn.metrics import r2_score
+from sklearn.model_selection import train_test_split
+
+import flaml
+from flaml.automl.spark.utils import to_pandas_on_spark
+
+try:
+    import pyspark
+    from pyspark.ml.evaluation import RegressionEvaluator
+    from pyspark.ml.feature import VectorAssembler
+except ImportError:
+    pass
+warnings.filterwarnings("ignore")
+
+skip_spark = importlib.util.find_spec("pyspark") is None
+client = mlflow.tracking.MlflowClient()
+
+if (sys.platform.startswith("darwin") or sys.platform.startswith("nt")) and (
+    sys.version_info[0] == 3 and sys.version_info[1] >= 10
+):
+    # TODO: remove this block when tests are stable
+    # Below tests will fail, but the functions run without error if run individually.
+    # test_tune_autolog_parentrun_nonparallel()
+    # test_tune_autolog_noparentrun_nonparallel()
+    # test_tune_noautolog_parentrun_nonparallel()
+    # test_tune_noautolog_noparentrun_nonparallel()
+    pytest.skip("skipping MacOS and Windows for python 3.10 and 3.11", allow_module_level=True)
+
+"""
+The spark used in below tests should be initiated in test_0sparkml.py when run with pytest.
+"""
+
+
+def _sklearn_tune(config):
+    is_autolog = config.pop("is_autolog")
+    is_parent_run = config.pop("is_parent_run")
+    is_parallel = config.pop("is_parallel")
+    X, y = load_diabetes(return_X_y=True, as_frame=True)
+    train_x, test_x, train_y, test_y = train_test_split(X, y, test_size=0.25)
+    rf = RandomForestRegressor(**config)
+    rf.fit(train_x, train_y)
+    pred = rf.predict(test_x)
+    r2 = r2_score(test_y, pred)
+    if not is_autolog and not is_parent_run and not is_parallel:
+        with mlflow.start_run(nested=True):
+            mlflow.log_metric("r2", r2)
+    return {"r2": r2}
+
+
+def _test_tune(is_autolog, is_parent_run, is_parallel):
+    mlflow.end_run()
+    mlflow_exp_name = f"test_mlflow_integration_{int(time.time())}"
+    mlflow_experiment = mlflow.set_experiment(mlflow_exp_name)
+    params = {
+        "n_estimators": flaml.tune.randint(100, 1000),
+        "min_samples_leaf": flaml.tune.randint(1, 10),
+        "is_autolog": is_autolog,
+        "is_parent_run": is_parent_run,
+        "is_parallel": is_parallel,
+    }
+    if is_autolog:
+        mlflow.autolog()
+    else:
+        mlflow.autolog(disable=True)
+    if is_parent_run:
+        mlflow.start_run(run_name=f"tune_autolog_{is_autolog}_sparktrial_{is_parallel}")
+    flaml.tune.run(
+        _sklearn_tune,
+        params,
+        metric="r2",
+        mode="max",
+        num_samples=3,
+        use_spark=True if is_parallel else False,
+        n_concurrent_trials=2 if is_parallel else 1,
+        mlflow_exp_name=mlflow_exp_name,
+    )
+    mlflow.end_run()  # end current run
+    mlflow.autolog(disable=True)
+    return mlflow_experiment.experiment_id
+
+
+def _check_mlflow_logging(possible_num_runs, metric, is_parent_run, experiment_id, is_automl=False, skip_tags=False):
+    if isinstance(possible_num_runs, int):
+        possible_num_runs = [possible_num_runs]
+    if is_parent_run:
+        parent_run = mlflow.last_active_run()
+        child_runs = client.search_runs(
+            experiment_ids=[experiment_id],
+            filter_string=f"tags.mlflow.parentRunId = '{parent_run.info.run_id}'",
+        )
+    else:
+        child_runs = client.search_runs(experiment_ids=[experiment_id])
+    experiment_name = client.get_experiment(experiment_id).name
+    metrics = [metric in run.data.metrics for run in child_runs]
+    tags = ["flaml.version" in run.data.tags for run in child_runs]
+    params = ["learner" in run.data.params for run in child_runs]
+    assert (
+        len(child_runs) in possible_num_runs
+    ), f"The number of child runs is not correct on experiment {experiment_name}."
+    if possible_num_runs[0] > 0:
+        assert all(metrics), f"The metrics are not logged correctly on experiment {experiment_name}."
+        assert (
+            all(tags) if not skip_tags else True
+        ), f"The tags are not logged correctly on experiment {experiment_name}."
+        assert (
+            all(params) if is_automl else True
+        ), f"The params are not logged correctly on experiment {experiment_name}."
+    # mlflow.delete_experiment(experiment_id)
+
+
+@pytest.mark.skipif(skip_spark, reason="Spark is not installed. Skip all spark tests.")
+def test_tune_autolog_parentrun_parallel():
+    experiment_id = _test_tune(is_autolog=True, is_parent_run=True, is_parallel=True)
+    _check_mlflow_logging([4, 3], "r2", True, experiment_id)
+
+
+def test_tune_autolog_parentrun_nonparallel():
+    experiment_id = _test_tune(is_autolog=True, is_parent_run=True, is_parallel=False)
+    _check_mlflow_logging(3, "r2", True, experiment_id)
+
+
+@pytest.mark.skipif(skip_spark, reason="Spark is not installed. Skip all spark tests.")
+def test_tune_autolog_noparentrun_parallel():
+    experiment_id = _test_tune(is_autolog=True, is_parent_run=False, is_parallel=True)
+    _check_mlflow_logging([4, 3], "r2", False, experiment_id)
+
+
+@pytest.mark.skipif(skip_spark, reason="Spark is not installed. Skip all spark tests.")
+def test_tune_noautolog_parentrun_parallel():
+    experiment_id = _test_tune(is_autolog=False, is_parent_run=True, is_parallel=True)
+    _check_mlflow_logging([4, 3], "r2", True, experiment_id)
+
+
+def test_tune_autolog_noparentrun_nonparallel():
+    experiment_id = _test_tune(is_autolog=True, is_parent_run=False, is_parallel=False)
+    _check_mlflow_logging(3, "r2", False, experiment_id)
+
+
+def test_tune_noautolog_parentrun_nonparallel():
+    experiment_id = _test_tune(is_autolog=False, is_parent_run=True, is_parallel=False)
+    _check_mlflow_logging(3, "r2", True, experiment_id)
+
+
+@pytest.mark.skipif(skip_spark, reason="Spark is not installed. Skip all spark tests.")
+def test_tune_noautolog_noparentrun_parallel():
+    experiment_id = _test_tune(is_autolog=False, is_parent_run=False, is_parallel=True)
+    _check_mlflow_logging(0, "r2", False, experiment_id)
+
+
+def test_tune_noautolog_noparentrun_nonparallel():
+    experiment_id = _test_tune(is_autolog=False, is_parent_run=False, is_parallel=False)
+    _check_mlflow_logging(3, "r2", False, experiment_id, skip_tags=True)
+
+
+def _test_automl_sparkdata(is_autolog, is_parent_run):
+    mlflow.end_run()
+    mlflow_exp_name = f"test_mlflow_integration_{int(time.time())}"
+    mlflow_experiment = mlflow.set_experiment(mlflow_exp_name)
+    if is_autolog:
+        mlflow.autolog()
+    else:
+        mlflow.autolog(disable=True)
+    if is_parent_run:
+        mlflow.start_run(run_name=f"automl_sparkdata_autolog_{is_autolog}")
+    spark = pyspark.sql.SparkSession.builder.getOrCreate()
+    pd_df = load_diabetes(as_frame=True).frame
+    df = spark.createDataFrame(pd_df)
+    df = df.repartition(4).cache()
+    train, test = df.randomSplit([0.8, 0.2], seed=1)
+    feature_cols = df.columns[:-1]
+    featurizer = VectorAssembler(inputCols=feature_cols, outputCol="features")
+    train_data = featurizer.transform(train)["target", "features"]
+    featurizer.transform(test)["target", "features"]
+    automl = flaml.AutoML()
+    settings = {
+        "max_iter": 3,
+        "metric": "mse",
+        "task": "regression",  # task type
+        "log_file_name": "flaml_experiment.log",  # flaml log file
+        "mlflow_exp_name": mlflow_exp_name,
+        "log_type": "all",
+        "n_splits": 2,
+        "model_history": True,
+    }
+    df = to_pandas_on_spark(to_pandas_on_spark(train_data).to_spark(index_col="index"))
+    automl.fit(
+        dataframe=df,
+        label="target",
+        **settings,
+    )
+    mlflow.end_run()  # end current run
+    mlflow.autolog(disable=True)
+    return mlflow_experiment.experiment_id
+
+
+def _test_automl_nonsparkdata(is_autolog, is_parent_run):
+    mlflow_exp_name = f"test_mlflow_integration_{int(time.time())}"
+    mlflow_experiment = mlflow.set_experiment(mlflow_exp_name)
+    if is_autolog:
+        mlflow.autolog()
+    else:
+        mlflow.autolog(disable=True)
+    if is_parent_run:
+        mlflow.start_run(run_name=f"automl_nonsparkdata_autolog_{is_autolog}")
+    automl_experiment = flaml.AutoML()
+    automl_settings = {
+        "max_iter": 3,
+        "metric": "r2",
+        "task": "regression",
+        "n_concurrent_trials": 2,
+        "use_spark": True,
+        "mlflow_exp_name": None if is_parent_run else mlflow_exp_name,
+        "log_type": "all",
+        "n_splits": 2,
+        "model_history": True,
+    }
+    X, y = load_diabetes(return_X_y=True, as_frame=True)
+    train_x, test_x, train_y, test_y = train_test_split(X, y, test_size=0.25)
+    automl_experiment.fit(X_train=train_x, y_train=train_y, **automl_settings)
+    mlflow.end_run()  # end current run
+    mlflow.autolog(disable=True)
+    return mlflow_experiment.experiment_id
+
+
+@pytest.mark.skipif(skip_spark, reason="Spark is not installed. Skip all spark tests.")
+def test_automl_sparkdata_autolog_parentrun():
+    experiment_id = _test_automl_sparkdata(is_autolog=True, is_parent_run=True)
+    _check_mlflow_logging(3, "mse", True, experiment_id, is_automl=True)
+
+
+@pytest.mark.skipif(skip_spark, reason="Spark is not installed. Skip all spark tests.")
+def test_automl_sparkdata_autolog_noparentrun():
+    experiment_id = _test_automl_sparkdata(is_autolog=True, is_parent_run=False)
+    _check_mlflow_logging(3, "mse", False, experiment_id, is_automl=True)
+
+
+@pytest.mark.skipif(skip_spark, reason="Spark is not installed. Skip all spark tests.")
+def test_automl_sparkdata_noautolog_parentrun():
+    experiment_id = _test_automl_sparkdata(is_autolog=False, is_parent_run=True)
+    _check_mlflow_logging(3, "mse", True, experiment_id, is_automl=True)
+
+
+@pytest.mark.skipif(skip_spark, reason="Spark is not installed. Skip all spark tests.")
+def test_automl_sparkdata_noautolog_noparentrun():
+    experiment_id = _test_automl_sparkdata(is_autolog=False, is_parent_run=False)
+    _check_mlflow_logging(0, "mse", False, experiment_id, is_automl=True)  # no logging
+
+
+@pytest.mark.skipif(skip_spark, reason="Spark is not installed. Skip all spark tests.")
+def test_automl_nonsparkdata_autolog_parentrun():
+    experiment_id = _test_automl_nonsparkdata(is_autolog=True, is_parent_run=True)
+    _check_mlflow_logging([4, 3], "r2", True, experiment_id, is_automl=True)
+
+
+@pytest.mark.skipif(skip_spark, reason="Spark is not installed. Skip all spark tests.")
+def test_automl_nonsparkdata_autolog_noparentrun():
+    experiment_id = _test_automl_nonsparkdata(is_autolog=True, is_parent_run=False)
+    _check_mlflow_logging([4, 3], "r2", False, experiment_id, is_automl=True)
+
+
+@pytest.mark.skipif(skip_spark, reason="Spark is not installed. Skip all spark tests.")
+def test_automl_nonsparkdata_noautolog_parentrun():
+    experiment_id = _test_automl_nonsparkdata(is_autolog=False, is_parent_run=True)
+    _check_mlflow_logging([4, 3], "r2", True, experiment_id, is_automl=True)
+
+
+@pytest.mark.skipif(skip_spark, reason="Spark is not installed. Skip all spark tests.")
+def test_automl_nonsparkdata_noautolog_noparentrun():
+    experiment_id = _test_automl_nonsparkdata(is_autolog=False, is_parent_run=False)
+    _check_mlflow_logging(0, "r2", False, experiment_id, is_automl=True)  # no logging
+
+
+@pytest.mark.skipif(skip_spark, reason="Spark is not installed. Skip all spark tests.")
+def test_exit_pyspark_autolog():
+    import pyspark
+
+    spark = pyspark.sql.SparkSession.builder.getOrCreate()
+    spark.sparkContext._gateway.shutdown_callback_server()  # this is to avoid stucking
+    mlflow.autolog(disable=True)
+
+
+def _init_spark_for_main():
+    import pyspark
+
+    spark = (
+        pyspark.sql.SparkSession.builder.appName("MyApp")
+        .master("local[2]")
+        .config(
+            "spark.jars.packages",
+            (
+                "com.microsoft.azure:synapseml_2.12:1.0.4,"
+                "org.apache.hadoop:hadoop-azure:3.3.5,"
+                "com.microsoft.azure:azure-storage:8.6.6,"
+                f"org.mlflow:mlflow-spark_2.12:{mlflow.__version__}"
+                if Version(mlflow.__version__) >= Version("2.9.0")
+                else f"org.mlflow:mlflow-spark:{mlflow.__version__}"
+            ),
+        )
+        .config("spark.jars.repositories", "https://mmlspark.azureedge.net/maven")
+        .config("spark.sql.debug.maxToStringFields", "100")
+        .config("spark.driver.extraJavaOptions", "-Xss1m")
+        .config("spark.executor.extraJavaOptions", "-Xss1m")
+        .getOrCreate()
+    )
+    spark.sparkContext._conf.set(
+        "spark.mlflow.pysparkml.autolog.logModelAllowlistFile",
+        "https://mmlspark.blob.core.windows.net/publicwasb/log_model_allowlist.txt",
+    )
+
+
+if __name__ == "__main__":
+    _init_spark_for_main()
+
+    # test_tune_autolog_parentrun_parallel()
+    # test_tune_autolog_parentrun_nonparallel()
+    test_tune_autolog_noparentrun_parallel()  # TODO: runs not removed
+    # test_tune_noautolog_parentrun_parallel()
+    # test_tune_autolog_noparentrun_nonparallel()
+    # test_tune_noautolog_parentrun_nonparallel()
+    # test_tune_noautolog_noparentrun_parallel()
+    # test_tune_noautolog_noparentrun_nonparallel()
+    # test_automl_sparkdata_autolog_parentrun()
+    # test_automl_sparkdata_autolog_noparentrun()
+    # test_automl_sparkdata_noautolog_parentrun()
+    # test_automl_sparkdata_noautolog_noparentrun()
+    # test_automl_nonsparkdata_autolog_parentrun()
+    # test_automl_nonsparkdata_autolog_noparentrun()  # TODO: runs not removed
+    # test_automl_nonsparkdata_noautolog_parentrun()
+    # test_automl_nonsparkdata_noautolog_noparentrun()
+
+    test_exit_pyspark_autolog()