kubeflow · k8s-ci-robot · Jan 9, 2019 · Jan 3, 2019 · Jan 3, 2019 · Jan 3, 2019
diff --git a/sdk/python/kfp/compiler/compiler.py b/sdk/python/kfp/compiler/compiler.py
@@ -38,125 +38,16 @@ def my_pipeline(a: dsl.PipelineParam, b: dsl.PipelineParam):
   ```
   """
 
-  def _sanitize_name(self, name):
-    """From _make_kubernetes_name
-    _sanitize_name cleans and converts the names in the workflow.
-    """
-    return re.sub('-+', '-', re.sub('[^-0-9a-z]+', '-', name.lower())).lstrip('-').rstrip('-')
-
   def _pipelineparam_full_name(self, param):
-    """_pipelineparam_full_name
+    """_pipelineparam_full_name converts the names of pipeline parameters
+      to unique names in the argo yaml
 
     Args:
       param(PipelineParam): pipeline parameter
       """
     if param.op_name:
       return param.op_name + '-' + param.name
-    return self._sanitize_name(param.name)
-
-  def _build_conventional_artifact(self, name):
-    return {
-      'name': name,
-      'path': '/' + name + '.json',
-      's3': {
-        # TODO: parameterize namespace for minio service
-        'endpoint': 'minio-service.kubeflow:9000',
-        'bucket': 'mlpipeline',
-        'key': 'runs/{{workflow.uid}}/{{pod.name}}/' + name + '.tgz',
-        'insecure': True,
-        'accessKeySecret': {
-          'name': 'mlpipeline-minio-artifact',
-          'key': 'accesskey',
-        },
-        'secretKeySecret': {
-          'name': 'mlpipeline-minio-artifact',
-          'key': 'secretkey'
-        }
-      },
-    }
-
-  def _op_to_template(self, op):
-    """Generate template given an operator inherited from dsl.ContainerOp."""
-
-    processed_args = None
-    if op.arguments:
-      processed_args = list(map(str, op.arguments))
-      for i, _ in enumerate(processed_args):
-        if op.argument_inputs:
-          for param in op.argument_inputs:
-            full_name = self._pipelineparam_full_name(param)
-            processed_args[i] = re.sub(str(param), '{{inputs.parameters.%s}}' % full_name,
-                                       processed_args[i])
-    input_parameters = []
-    for param in op.inputs:
-      one_parameter = {'name': self._pipelineparam_full_name(param)}
-      if param.value:
-        one_parameter['value'] = str(param.value)
-      input_parameters.append(one_parameter)
-    # Sort to make the results deterministic.
-    input_parameters.sort(key=lambda x: x['name'])
-
-    output_parameters = []
-    for param in op.outputs.values():
-      output_parameters.append({
-          'name': self._pipelineparam_full_name(param),
-          'valueFrom': {'path': op.file_outputs[param.name]}
-      })
-    output_parameters.sort(key=lambda x: x['name'])
-
-    template = {
-      'name': op.name,
-      'container': {
-        'image': op.image,
-      }
-    }
-    if processed_args:
-      template['container']['args'] = processed_args
-    if input_parameters:
-      template['inputs'] = {'parameters': input_parameters}
-
-    template['outputs'] = {}
-    if output_parameters:
-      template['outputs'] = {'parameters': output_parameters}
-
-    # Generate artifact for metadata output
-    # The motivation of appending the minio info in the yaml
-    # is to specify a unique path for the metadata.
-    # TODO: after argo addresses the issue that configures a unique path
-    # for the artifact output when default artifact repository is configured,
-    # this part needs to be updated to use the default artifact repository.
-    output_artifacts = []
-    output_artifacts.append(self._build_conventional_artifact('mlpipeline-ui-metadata'))
-    output_artifacts.append(self._build_conventional_artifact('mlpipeline-metrics'))
-    template['outputs']['artifacts'] = output_artifacts
-    if op.command:
-      template['container']['command'] = op.command
-
-    # Set resources.
-    if op.resource_limits or op.resource_requests:
-      template['container']['resources'] = {}
-    if op.resource_limits:
-      template['container']['resources']['limits'] = op.resource_limits
-    if op.resource_requests:
-      template['container']['resources']['requests'] = op.resource_requests
-
-    # Set nodeSelector.
-    if op.node_selector:
-      template['nodeSelector'] = op.node_selector
-
-    if op.env_variables:
-      template['container']['env'] = list(map(K8sHelper.convert_k8s_obj_to_json, op.env_variables))
-    if op.volume_mounts:
-      template['container']['volumeMounts'] = list(map(K8sHelper.convert_k8s_obj_to_json, op.volume_mounts))
-
-    if op.pod_annotations or op.pod_labels:
-      template['metadata'] = {}
-      if op.pod_annotations:
-        template['metadata']['annotations'] = op.pod_annotations
-      if op.pod_labels:
-        template['metadata']['labels'] = op.pod_labels
-
-    return template
+    return dsl._utils._sanitize_k8s_name(param.name)
 
   def _get_groups_for_ops(self, root_group):
     """Helper function to get belonging groups for each op.
@@ -320,6 +211,110 @@ def _resolve_value_or_reference(self, value_or_reference, potential_references):
     else:
       return str(value_or_reference)
 
+  def _op_to_template(self, op):
+    """Generate template given an operator inherited from dsl.ContainerOp."""
+
+    def _build_conventional_artifact(name):
+      return {
+        'name': name,
+        'path': '/' + name + '.json',
+        's3': {
+          # TODO: parameterize namespace for minio service
+          'endpoint': 'minio-service.kubeflow:9000',
+          'bucket': 'mlpipeline',
+          'key': 'runs/{{workflow.uid}}/{{pod.name}}/' + name + '.tgz',
+          'insecure': True,
+          'accessKeySecret': {
+            'name': 'mlpipeline-minio-artifact',
+            'key': 'accesskey',
+          },
+          'secretKeySecret': {
+            'name': 'mlpipeline-minio-artifact',
+            'key': 'secretkey'
+          }
+        },
+      }
+
+    processed_args = None
+    if op.arguments:
+      processed_args = list(map(str, op.arguments))
+      for i, _ in enumerate(processed_args):
+        if op.argument_inputs:
+          for param in op.argument_inputs:
+            full_name = self._pipelineparam_full_name(param)
+            processed_args[i] = re.sub(str(param), '{{inputs.parameters.%s}}' % full_name,
+                                       processed_args[i])
+    input_parameters = []
+    for param in op.inputs:
+      one_parameter = {'name': self._pipelineparam_full_name(param)}
+      if param.value:
+        one_parameter['value'] = str(param.value)
+      input_parameters.append(one_parameter)
+    # Sort to make the results deterministic.
+    input_parameters.sort(key=lambda x: x['name'])
+
+    output_parameters = []
+    for param in op.outputs.values():
+      output_parameters.append({
+        'name': self._pipelineparam_full_name(param),
+        'valueFrom': {'path': op.file_outputs[param.name]}
+      })
+    output_parameters.sort(key=lambda x: x['name'])
+
+    template = {
+      'name': op.name,
+      'container': {
+        'image': op.image,
+      }
+    }
+    if processed_args:
+      template['container']['args'] = processed_args
+    if input_parameters:
+      template['inputs'] = {'parameters': input_parameters}
+
+    template['outputs'] = {}
+    if output_parameters:
+      template['outputs'] = {'parameters': output_parameters}
+
+    # Generate artifact for metadata output
+    # The motivation of appending the minio info in the yaml
+    # is to specify a unique path for the metadata.
+    # TODO: after argo addresses the issue that configures a unique path
+    # for the artifact output when default artifact repository is configured,
+    # this part needs to be updated to use the default artifact repository.
+    output_artifacts = []
+    output_artifacts.append(_build_conventional_artifact('mlpipeline-ui-metadata'))
+    output_artifacts.append(_build_conventional_artifact('mlpipeline-metrics'))
+    template['outputs']['artifacts'] = output_artifacts
+    if op.command:
+      template['container']['command'] = op.command
+
+    # Set resources.
+    if op.resource_limits or op.resource_requests:
+      template['container']['resources'] = {}
+    if op.resource_limits:
+      template['container']['resources']['limits'] = op.resource_limits
+    if op.resource_requests:
+      template['container']['resources']['requests'] = op.resource_requests
+
+    # Set nodeSelector.
+    if op.node_selector:
+      template['nodeSelector'] = op.node_selector
+
+    if op.env_variables:
+      template['container']['env'] = list(map(K8sHelper.convert_k8s_obj_to_json, op.env_variables))
+    if op.volume_mounts:
+      template['container']['volumeMounts'] = list(map(K8sHelper.convert_k8s_obj_to_json, op.volume_mounts))
+
+    if op.pod_annotations or op.pod_labels:
+      template['metadata'] = {}
+      if op.pod_annotations:
+        template['metadata']['annotations'] = op.pod_annotations
+      if op.pod_labels:
+        template['metadata']['labels'] = op.pod_labels
+
+    return template
+
   def _group_to_template(self, group, inputs, outputs, dependencies):
     """Generate template given an OpsGroup.
 
@@ -498,10 +493,10 @@ def _compile(self, pipeline_func):
       raise ValueError('Please use a function with @dsl.pipeline decorator.')
 
     pipeline_name, _ = dsl.Pipeline.get_pipeline_functions()[pipeline_func]
-    pipeline_name = self._sanitize_name(pipeline_name)
+    pipeline_name = dsl._utils._sanitize_k8s_name(pipeline_name)
 
     # Create the arg list with no default values and call pipeline function.
-    args_list = [dsl.PipelineParam(self._sanitize_name(arg_name))
+    args_list = [dsl.PipelineParam(dsl._utils._sanitize_k8s_name(arg_name))
                  for arg_name in argspec.args]
     with dsl.Pipeline(pipeline_name) as p:
       pipeline_func(*args_list)
@@ -510,7 +505,7 @@ def _compile(self, pipeline_func):
     self._validate_exit_handler(p)
 
     # Fill in the default values.
-    args_list_with_defaults = [dsl.PipelineParam(self._sanitize_name(arg_name))
+    args_list_with_defaults = [dsl.PipelineParam(dsl._utils._sanitize_k8s_name(arg_name))
                                for arg_name in argspec.args]
     if argspec.defaults:
       for arg, default in zip(reversed(args_list_with_defaults), reversed(argspec.defaults)):

diff --git a/sdk/python/kfp/dsl/_container_op.py b/sdk/python/kfp/dsl/_container_op.py
@@ -28,7 +28,8 @@ def __init__(self, name: str, image: str, command: str=None, arguments: str=None
     """Create a new instance of ContainerOp.
 
     Args:
-      name: the name of the op. Has to be unique within a pipeline.
+      name: the name of the op. It does not have to be unique within a pipeline
+          because the pipeline will generates a unique new name in case of conflicts.
       image: the container image name, such as 'python:3.5-jessie'
       command: the command to run in the container.
           If None, uses default CMD in defined in container.

diff --git a/sdk/python/kfp/dsl/_pipeline.py b/sdk/python/kfp/dsl/_pipeline.py
@@ -15,7 +15,7 @@
 
 from . import _container_op
 from . import _ops_group
-import re
+from ._utils import _sanitize_k8s_name
 import sys
 
 
@@ -38,9 +38,6 @@ def _pipeline(func):
 
   return _pipeline
 
-def _make_kubernetes_name(name):
-    return re.sub('-+', '-', re.sub('[^-0-9a-z]+', '-', name.lower())).lstrip('-').rstrip('-')
-
 class Pipeline():
   """A pipeline contains a list of operators.
 
@@ -108,7 +105,7 @@ def add_op(self, op: _container_op.ContainerOp, define_only: bool):
       op: An operator of ContainerOp or its inherited type.
     """
 
-    kubernetes_name = _make_kubernetes_name(op.human_name)
+    kubernetes_name = _sanitize_k8s_name(op.human_name)
     step_id = kubernetes_name
     #If there is an existing op with this name then generate a new name.
     if step_id in self.ops:

diff --git a/sdk/python/kfp/dsl/_utils.py b/sdk/python/kfp/dsl/_utils.py
@@ -0,0 +1,20 @@
+# Copyright 2018 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import re
+def _sanitize_k8s_name(name):
+  """From _make_kubernetes_name
+  _sanitize_name cleans and converts the names in the workflow.
+  """
+  return re.sub('-+', '-', re.sub('[^-0-9a-z]+', '-', name.lower())).lstrip('-').rstrip('-')