recipe.yaml

# `recipe.yaml` is the main configuration file for an MLflow Recipe.
# Required recipe parameters should be defined in this file with either concrete values or
# variables such as {{ INGEST_DATA_LOCATION }}.
#
# Variables must be dereferenced in a profile YAML file, located under `profiles/`.
# See `profiles/local.yaml` for example usage. One may switch among profiles quickly by
# providing a profile name such as `local` in the Recipe object constructor:
# `r = Recipe(profile="local")`
#
# NOTE: All "FIXME::REQUIRED" fields in recipe.yaml and profiles/*.yaml must be set correctly
#       to adapt this template to a specific regression problem. To find all required fields,
#       under the root directory of this recipe, type on a unix-like command line:
#       $> grep "# FIXME::REQUIRED:" recipe.yaml profiles/*.yaml
#
# NOTE: YAML does not support tabs for indentation. Please use spaces and ensure that all YAML
#       files are properly formatted.

recipe: "regression/v1"
# FIXME::REQUIRED: Specifies the target column name for model training and evaluation.
target_col: ""
# FIXME::REQUIRED: Sets the primary metric to use to evaluate model performance. This primary
#                  metric is used to select best performing models in MLflow UI as well as in
#                  train and evaluation step.
#                  Built-in metrics are: example_count, mean_absolute_error, mean_squared_error,
#                  root_mean_squared_error, sum_on_label, mean_on_label, max_error,
#                  mean_absolute_percentage_error
primary_metric: ""
steps:
  # Specifies the dataset to use for model development
  ingest: {{INGEST_CONFIG}}
  split:
    #
    # FIXME::OPTIONAL: Adjust the train/validation/test split ratios below.
    #
    split_ratios: [0.75, 0.125, 0.125]
    #
    #  FIXME::OPTIONAL: Specifies the method to use to "post-process" the split datasets. Note that
    #                   arbitrary transformations should go into the transform step.
    post_split_filter_method: create_dataset_filter
  transform:
    using: "custom"
    #
    #  FIXME::OPTIONAL: Specifies the method that defines an sklearn-compatible transformer, which
    #                   applies input feature transformation during model training and inference.
    transformer_method: transformer_fn
  train:
    #
    # FIXME::REQUIRED: Specifies the method to use for training. Options are "automl/flaml" for
    #                  AutoML training or "custom" for user-defined estimators.
    using: ""
  evaluate:
    #
    # FIXME::OPTIONAL: Sets performance thresholds that a trained model must meet in order to be
    #                  eligible for registration to the MLflow Model Registry.
    #
    # validation_criteria:
    #   - metric: root_mean_squared_error
    #     threshold: 10
  register:
    # Indicates whether or not a model that fails to meet performance thresholds should still
    # be registered to the MLflow Model Registry
    allow_non_validated_model: false
  # FIXME::OPTIONAL: Specify the dataset to use for batch scoring. All params serve the same function
  #                  as in `data`
  # ingest_scoring: {{INGEST_SCORING_CONFIG}}
  # predict:
  #   output: {{PREDICT_OUTPUT_CONFIG}}
  #   model_uri: "models/model.pkl"
  #   result_type: "double"
  #   save_mode: "default
# custom_metrics:
#   FIXME::OPTIONAL: Defines custom performance metrics to compute during model development.
#     - name: ""
#       function: get_custom_metrics
#       greater_is_better: False