-
Notifications
You must be signed in to change notification settings - Fork 34
/
recipe.yaml
76 lines (74 loc) · 3.45 KB
/
recipe.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# `recipe.yaml` is the main configuration file for an MLflow Recipe.
# Required recipe parameters should be defined in this file with either concrete values or
# variables such as {{ INGEST_DATA_LOCATION }}.
#
# Variables must be dereferenced in a profile YAML file, located under `profiles/`.
# See `profiles/local.yaml` for example usage. One may switch among profiles quickly by
# providing a profile name such as `local` in the Recipe object constructor:
# `r = Recipe(profile="local")`
#
# NOTE: All "FIXME::REQUIRED" fields in recipe.yaml and profiles/*.yaml must be set correctly
# to adapt this template to a specific regression problem. To find all required fields,
# under the root directory of this recipe, type on a unix-like command line:
# $> grep "# FIXME::REQUIRED:" recipe.yaml profiles/*.yaml
#
# NOTE: YAML does not support tabs for indentation. Please use spaces and ensure that all YAML
# files are properly formatted.
recipe: "regression/v1"
# FIXME::REQUIRED: Specifies the target column name for model training and evaluation.
target_col: ""
# FIXME::REQUIRED: Sets the primary metric to use to evaluate model performance. This primary
# metric is used to select best performing models in MLflow UI as well as in
# train and evaluation step.
# Built-in metrics are: example_count, mean_absolute_error, mean_squared_error,
# root_mean_squared_error, sum_on_label, mean_on_label, max_error,
# mean_absolute_percentage_error
primary_metric: ""
steps:
# Specifies the dataset to use for model development
ingest: {{INGEST_CONFIG}}
split:
#
# FIXME::OPTIONAL: Adjust the train/validation/test split ratios below.
#
split_ratios: [0.75, 0.125, 0.125]
#
# FIXME::OPTIONAL: Specifies the method to use to "post-process" the split datasets. Note that
# arbitrary transformations should go into the transform step.
post_split_filter_method: create_dataset_filter
transform:
using: "custom"
#
# FIXME::OPTIONAL: Specifies the method that defines an sklearn-compatible transformer, which
# applies input feature transformation during model training and inference.
transformer_method: transformer_fn
train:
#
# FIXME::REQUIRED: Specifies the method to use for training. Options are "automl/flaml" for
# AutoML training or "custom" for user-defined estimators.
using: ""
evaluate:
#
# FIXME::OPTIONAL: Sets performance thresholds that a trained model must meet in order to be
# eligible for registration to the MLflow Model Registry.
#
# validation_criteria:
# - metric: root_mean_squared_error
# threshold: 10
register:
# Indicates whether or not a model that fails to meet performance thresholds should still
# be registered to the MLflow Model Registry
allow_non_validated_model: false
# FIXME::OPTIONAL: Specify the dataset to use for batch scoring. All params serve the same function
# as in `data`
# ingest_scoring: {{INGEST_SCORING_CONFIG}}
# predict:
# output: {{PREDICT_OUTPUT_CONFIG}}
# model_uri: "models/model.pkl"
# result_type: "double"
# save_mode: "default
# custom_metrics:
# FIXME::OPTIONAL: Defines custom performance metrics to compute during model development.
# - name: ""
# function: get_custom_metrics
# greater_is_better: False