optimize mlflow

apache · Sep 20, 2022 · 82b9001 · 82b9001
1 parent 8cddb10
commit 82b9001
Show file tree

Hide file tree

Showing 20 changed files with 259 additions and 425 deletions.
diff --git a/docs/docs/en/guide/task/mlflow.md b/docs/docs/en/guide/task/mlflow.md
@@ -20,7 +20,6 @@ The MLflow plugin currently supports and will support the following:
 - MLflow Models
   - MLFLOW: Use `MLflow models serve` to deploy a model service
   - Docker: Run the container after packaging the docker image
-  - Docker Compose: Use docker compose to run the container, it will replace the docker run above
 
 ## Create Task
 
@@ -98,22 +97,26 @@ You can now use this feature to run all MLFlow projects on Github (For example [
 
 ![mlflow-models-docker](../../../../img/tasks/demo/mlflow-models-docker.png)
 
-#### DOCKER COMPOSE
+## Environment to Prepare
 
-![mlflow-models-docker-compose](../../../../img/tasks/demo/mlflow-models-docker-compose.png)
+### Conda Environment
+Please install [anaconda](https://docs.continuum.io/anaconda/install/) or [miniconda](https://docs.conda.io/en/latest/miniconda.html#installing) in advance.
 
-|  **Parameter**   |                     **Description**                      |
-|------------------|----------------------------------------------------------|
-| Max Cpu Limit    | For example, `1.0` or `0.5`, the same as docker compose. |
-| Max Memory Limit | For example `1G` or `500M`, the same as docker compose.  |
+**Method A:**
 
-## Environment to Prepare
+Config anaconda environment in `/dolphinscheduler/conf/env/dolphinscheduler_env.sh`.
 
-### Conda Environment
+Add the following content to the file:
+
+```bash
+# config anaconda environment
+export PATH=/opt/anaconda3/bin:$PATH
+```
 
-You need to enter the admin account to configure a conda environment variable（Please
-install [anaconda](https://docs.continuum.io/anaconda/install/)
-or [miniconda](https://docs.conda.io/en/latest/miniconda.html#installing) in advance).
+
+**Method B:**
+
+You need to enter the admin account to configure a conda environment variable.
 
 ![mlflow-conda-env](../../../../img/tasks/demo/mlflow-conda-env.png)
 
@@ -139,3 +142,14 @@ After running, an MLflow service is started.
 After this, you can visit the MLflow service (`http://localhost:5000`) page to view the experiments and models.
 
 ![mlflow-server](../../../../img/tasks/demo/mlflow-server.png)
+
+### Preset Algorithm Repository Configuration
+
+if you can't access github, you can modify the following fields in the `commom.properties` configuration file to replace the github address with an accessible address.
+
+```yaml
+# mlflow task plugin preset repository
+ml.mlflow.preset_repository=https://github.com/apache/dolphinscheduler-mlflow
+# mlflow task plugin preset repository version
+ml.mlflow.preset_repository_version="main"
+```
diff --git a/docs/docs/zh/guide/task/mlflow.md b/docs/docs/zh/guide/task/mlflow.md
@@ -19,7 +19,6 @@ MLflow 组件用于执行 MLflow 任务，目前包含Mlflow Projects，和MLflo
 - MLflow Models
   - MLFLOW: 直接使用 `mlflow models serve` 部署模型。
   - Docker: 打包 DOCKER 镜像后部署模型。
-  - Docker Compose: 使用Docker Compose 部署模型，将会取代上面的Docker部署。
 
 ## 创建任务
 
@@ -90,28 +89,33 @@ MLflow 组件用于执行 MLflow 任务，目前包含Mlflow Projects，和MLflo
 
 ![mlflow-models-docker](../../../../img/tasks/demo/mlflow-models-docker.png)
 
-#### DOCKER COMPOSE
+## 环境准备
 
-![mlflow-models-docker-compose](../../../../img/tasks/demo/mlflow-models-docker-compose.png)
+### conda 环境配置
 
-| **任务参数** |                **描述**                |
-|----------|--------------------------------------|
-| 最大CPU限制  | 如 `1.0` 或者 `0.5`，与 docker compose 一致 |
-| 最大内存限制   | 如 `1G` 或者 `500M`，与 docker compose 一致 |
+请提前[安装anaconda](https://docs.continuum.io/anaconda/install/) 或者[安装miniconda](https://docs.conda.io/en/latest/miniconda.html#installing)
 
-## 环境准备
+**方法A：**
 
-### conda 环境配置
+配置文件：/dolphinscheduler/conf/env/dolphinscheduler_env.sh。
+
+在文件最后添加内容
+```
+# 配置你的conda环境路径
+export PATH=/opt/anaconda3/bin:$PATH
+```
 
-你需要进入admin账户配置一个conda环境变量（请提前[安装anaconda](https://docs.continuum.io/anaconda/install/)
-或者[安装miniconda](https://docs.conda.io/en/latest/miniconda.html#installing) )。
+**方法B：**
+
+你需要进入admin账户配置一个conda环境变量。
 
 ![mlflow-conda-env](../../../../img/tasks/demo/mlflow-conda-env.png)
 
 后续注意配置任务时，环境选择上面创建的conda环境，否则程序会找不到conda环境。
 
 ![mlflow-set-conda-env](../../../../img/tasks/demo/mlflow-set-conda-env.png)
 
+
 ### MLflow service 启动
 
 确保你已经安装MLflow，可以使用`pip install mlflow`进行安装。
@@ -130,3 +134,15 @@ mlflow server -h 0.0.0.0 -p 5000 --serve-artifacts --backend-store-uri sqlite://
 
 ![mlflow-server](../../../../img/tasks/demo/mlflow-server.png)
 
+
+### 内置算法仓库配置
+
+如果遇到github无法访问的情况，可以修改`commom.properties`配置文件的以下字段，将github地址替换能访问的地址。
+
+```yaml
+# mlflow task plugin preset repository
+ml.mlflow.preset_repository=https://github.com/apache/dolphinscheduler-mlflow
+# mlflow task plugin preset repository version
+ml.mlflow.preset_repository_version="main"
+```
+
diff --git a/docs/img/tasks/demo/mlflow-models-docker-compose.png b/docs/img/tasks/demo/mlflow-models-docker-compose.png
diff --git a/dolphinscheduler-common/src/main/resources/common.properties b/dolphinscheduler-common/src/main/resources/common.properties
@@ -105,4 +105,9 @@ alert.rpc.port=50052
 conda.path=/opt/anaconda3/etc/profile.d/conda.sh
 
 # Task resource limit state
-task.resource.limit.state=false
+task.resource.limit.state=false
+
+# mlflow task plugin preset repository
+ml.mlflow.preset_repository=https://github.com/apache/dolphinscheduler-mlflow
+# mlflow task plugin preset repository version
+ml.mlflow.preset_repository_version="main"
diff --git a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/mlflow.yaml b/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/mlflow.yaml
@@ -32,15 +32,6 @@ tasks:
     parameters: -P learning_rate=0.2 -P colsample_bytree=0.8 -P subsample=0.9
     experiment_name: xgboost
 
-
-  - name: deploy_mlflow
-    deps: [train_xgboost_native]
-    task_type: MLflowModels 
-    model_uri: models:/xgboost_native/Production
-    mlflow_tracking_uri: *mlflow_tracking_uri
-    deploy_mode: MLFLOW
-    port: 7001
-
   - name: train_automl
     task_type: MLFlowProjectsAutoML 
     mlflow_tracking_uri: *mlflow_tracking_uri
@@ -68,11 +59,11 @@ tasks:
     data_path: /data/examples/iris
     search_params: max_depth=[5, 10];n_estimators=[100, 200]
 
-
-  - name: deploy_docker_compose
-    task_type: MLflowModels 
+  - name: deploy_mlflow
     deps: [train_basic_algorithm]
+    task_type: MLflowModels
     model_uri: models:/iris_B/Production
     mlflow_tracking_uri: *mlflow_tracking_uri
-    deploy_mode: DOCKER COMPOSE
-    port: 7003
+    deploy_mode: MLFLOW
+    port: 7001
+
diff --git a/...cheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_mlflow_example.py b/...cheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_mlflow_example.py
@@ -43,17 +43,6 @@
         experiment_name="xgboost",
     )
 
-    # Using MLFLOW to deploy model from custom mlflow project
-    deploy_mlflow = MLflowModels(
-        name="deploy_mlflow",
-        model_uri="models:/xgboost_native/Production",
-        mlflow_tracking_uri=mlflow_tracking_uri,
-        deploy_mode=MLflowDeployType.MLFLOW,
-        port=7001,
-    )
-
-    train_custom >> deploy_mlflow
-
     # run automl to train model
     train_automl = MLFlowProjectsAutoML(
         name="train_automl",
@@ -88,16 +77,16 @@
         search_params="max_depth=[5, 10];n_estimators=[100, 200]",
     )
 
-    # Using DOCKER COMPOSE to deploy model from train_basic_algorithm
-    deploy_docker_compose = MLflowModels(
-        name="deploy_docker_compose",
+    # Using MLFLOW to deploy model from training lightgbm project
+    deploy_mlflow = MLflowModels(
+        name="deploy_mlflow",
         model_uri="models:/iris_B/Production",
         mlflow_tracking_uri=mlflow_tracking_uri,
-        deploy_mode=MLflowDeployType.DOCKER_COMPOSE,
-        port=7003,
+        deploy_mode=MLflowDeployType.MLFLOW,
+        port=7001,
     )
 
-    train_basic_algorithm >> deploy_docker_compose
+    train_basic_algorithm >> deploy_mlflow
 
     pd.submit()
 

diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/mlflow.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/mlflow.py
@@ -43,7 +43,6 @@ class MLflowDeployType(str):
 
     MLFLOW = "MLFLOW"
     DOCKER = "DOCKER"
-    DOCKER_COMPOSE = "DOCKER COMPOSE"
 
 
 DEFAULT_MLFLOW_TRACKING_URI = "http://127.0.0.1:5000"
@@ -83,10 +82,8 @@ class MLflowModels(BaseMLflow):
     :param model_uri: Model-URI of MLflow , support models:/<model_name>/suffix format and runs:/ format.
         See https://mlflow.org/docs/latest/tracking.html#artifact-stores
     :param mlflow_tracking_uri: MLflow tracking server uri, default is http://127.0.0.1:5000
-    :param deploy_mode: MLflow deploy mode, support MLFLOW, DOCKER, DOCKER COMPOSE, default is DOCKER
+    :param deploy_mode: MLflow deploy mode, support MLFLOW, DOCKER, default is DOCKER
     :param port: deploy port, default is 7000
-    :param cpu_limit: cpu limit, default is 1.0
-    :param memory_limit: memory limit, default is 500M
     """
 
     mlflow_task_type = MLflowTaskType.MLFLOW_MODELS
@@ -95,8 +92,6 @@ class MLflowModels(BaseMLflow):
         "deploy_type",
         "deploy_model_key",
         "deploy_port",
-        "cpu_limit",
-        "memory_limit",
     }
 
     def __init__(
@@ -106,8 +101,6 @@ def __init__(
         mlflow_tracking_uri: Optional[str] = DEFAULT_MLFLOW_TRACKING_URI,
         deploy_mode: Optional[str] = MLflowDeployType.DOCKER,
         port: Optional[int] = 7000,
-        cpu_limit: Optional[float] = 1.0,
-        memory_limit: Optional[str] = "500M",
         *args,
         **kwargs
     ):
@@ -116,8 +109,6 @@ def __init__(
         self.deploy_type = deploy_mode.upper()
         self.deploy_model_key = model_uri
         self.deploy_port = port
-        self.cpu_limit = cpu_limit
-        self.memory_limit = memory_limit
 
 
 class MLFlowProjectsCustom(BaseMLflow):

diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_mlflow.py b/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_mlflow.py
@@ -63,19 +63,15 @@ def test_mlflow_models_get_define():
     name = "mlflow_models"
     model_uri = "models:/xgboost_native/Production"
     port = 7001
-    cpu_limit = 2.0
-    memory_limit = "600M"
 
     expect = deepcopy(EXPECT)
     expect["name"] = name
     task_params = expect["taskParams"]
     task_params["mlflowTrackingUri"] = MLFLOW_TRACKING_URI
     task_params["mlflowTaskType"] = MLflowTaskType.MLFLOW_MODELS
-    task_params["deployType"] = MLflowDeployType.DOCKER_COMPOSE
+    task_params["deployType"] = MLflowDeployType.DOCKER
     task_params["deployModelKey"] = model_uri
     task_params["deployPort"] = port
-    task_params["cpuLimit"] = cpu_limit
-    task_params["memoryLimit"] = memory_limit
 
     with patch(
         "pydolphinscheduler.core.task.Task.gen_code_and_version",
@@ -85,10 +81,8 @@ def test_mlflow_models_get_define():
             name=name,
             model_uri=model_uri,
             mlflow_tracking_uri=MLFLOW_TRACKING_URI,
-            deploy_mode=MLflowDeployType.DOCKER_COMPOSE,
+            deploy_mode=MLflowDeployType.DOCKER,
             port=port,
-            cpu_limit=cpu_limit,
-            memory_limit=memory_limit,
         )
         assert task.get_define() == expect
 

diff --git a/...-mlflow/src/main/java/org/apache/dolphinscheduler/plugin/task/mlflow/MlflowConstants.java b/...-mlflow/src/main/java/org/apache/dolphinscheduler/plugin/task/mlflow/MlflowConstants.java
@@ -28,15 +28,17 @@ private MlflowConstants() {
 
     public static final String JOB_TYPE_CUSTOM_PROJECT = "CustomProject";
 
-    public static final String PRESET_REPOSITORY = "https://github.com/apache/dolphinscheduler-mlflow";
+    public static final String PRESET_REPOSITORY_KEY = "ml.mlflow.preset_repository";
+
+    public static final String PRESET_REPOSITORY_VERSION_KEY = "ml.mlflow.preset_repository_version";
 
-    public static final String PRESET_PATH = "dolphinscheduler-mlflow";
+    public static final String PRESET_REPOSITORY = "https://github.com/apache/dolphinscheduler-mlflow";
 
     public static final String PRESET_REPOSITORY_VERSION = "main";
 
-    public static final String PRESET_AUTOML_PROJECT = PRESET_PATH + "#Project-AutoML";
+    public static final String PRESET_AUTOML_PROJECT = "#Project-AutoML";
 
-    public static final String PRESET_BASIC_ALGORITHM_PROJECT = PRESET_PATH + "#Project-BasicAlgorithm";
+    public static final String PRESET_BASIC_ALGORITHM_PROJECT = "#Project-BasicAlgorithm";
 
     public static final String MLFLOW_TASK_TYPE_PROJECTS = "MLflow Projects";
 
@@ -46,14 +48,6 @@ private MlflowConstants() {
 
     public static final String MLFLOW_MODELS_DEPLOY_TYPE_DOCKER = "DOCKER";
 
-    public static final String MLFLOW_MODELS_DEPLOY_TYPE_DOCKER_COMPOSE = "DOCKER COMPOSE";
-
-    /**
-     * template file
-     */
-    public static final String TEMPLATE_DOCKER_COMPOSE = "docker-compose.yml";
-
-
     /**
      * mlflow command
      */
@@ -81,8 +75,7 @@ private MlflowConstants() {
 
     public static final String MLFLOW_RUN_CUSTOM_PROJECT = "mlflow run $repo "
         + "%s "
-        + "--experiment-name=\"%s\" "
-        + "--version=\"%s\" ";
+        + "--experiment-name=\"%s\"";
 
     public static final String MLFLOW_MODELS_SERVE = "mlflow models serve -m %s --port %s -h 0.0.0.0";
 
@@ -94,20 +87,10 @@ private MlflowConstants() {
         + "--health-cmd \"curl --fail http://127.0.0.1:8080/ping || exit 1\" --health-interval 5s --health-retries 20"
         + " %s";
 
-    public static final String DOCKER_COMPOSE_RUN = "docker-compose up -d";
-
-    public static final String SET_DOCKER_COMPOSE_ENV = "export DS_TASK_MLFLOW_IMAGE_NAME=%s\n"
-        + "export DS_TASK_MLFLOW_CONTAINER_NAME=%s\n"
-        + "export DS_TASK_MLFLOW_DEPLOY_PORT=%s\n"
-        + "export DS_TASK_MLFLOW_CPU_LIMIT=%s\n"
-        + "export DS_TASK_MLFLOW_MEMORY_LIMIT=%s";
-
-
     public static final String DOCKER_HEALTH_CHECK = "docker inspect --format \"{{json .State.Health.Status }}\" %s";
 
     public static final int DOCKER_HEALTH_CHECK_TIMEOUT = 20;
 
     public static final int DOCKER_HEALTH_CHECK_INTERVAL = 5000;
 
-    public static final String GIT_CLONE_REPO  = "git clone %s %s";
 }