tektoncd · tekton-robot · Jun 1, 2020 · May 13, 2020 · afrittoli · May 21, 2020
diff --git a/config/200-clusterrole.yaml b/config/200-clusterrole.yaml
@@ -60,7 +60,7 @@ rules:
     # Unclear if this access is actually required.  Simply a hold-over from the previous
     # incarnation of the controller's ClusterRole.
   - apiGroups: ["apps"]
-    resources: ["deployments"]
+    resources: ["deployments", "statefulsets"]
     verbs: ["get", "list", "create", "update", "delete", "patch", "watch"]
   - apiGroups: ["apps"]
     resources: ["deployments/finalizers"]

diff --git a/docs/install.md b/docs/install.md
@@ -268,6 +268,19 @@ file lists the keys you can customize along with their default values.
 
 To customize the behavior of the Pipelines Controller, modify the ConfigMap `feature-flags` as follows:
 
+- `disable-affinity-assistant` - set this flag to disable the [Affinity Assistant](./workspaces.md#affinity-assistant-and-specifying-workspace-order-in-a-pipeline)
+  that is used to provide Node Affinity for `TaskRun` pods that share workspace volume. 
+  The Affinity Assistant pods may be incompatible with NodeSelector and other affinity rules
+  configured for `TaskRun` pods.
+
+  **Note:** Affinity Assistant use [Inter-pod affinity and anti-affinity](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#inter-pod-affinity-and-anti-affinity)
+  that require substantial amount of processing which can slow down scheduling in large clusters
+  significantly. We do not recommend using them in clusters larger than several hundred nodes
+
+  **Note:** Pod anti-affinity requires nodes to be consistently labelled, in other words every
+  node in the cluster must have an appropriate label matching `topologyKey`. If some or all nodes
+  are missing the specified `topologyKey` label, it can lead to unintended behavior.
+
 - `disable-home-env-overwrite` - set this flag to `true` to prevent Tekton
 from overriding the `$HOME` environment variable for the containers executing your `Steps`.
 The default is `false`. For more information, see the [associated issue](https://github.com/tektoncd/pipeline/issues/2013).

diff --git a/docs/labels.md b/docs/labels.md
@@ -58,6 +58,8 @@ The following labels are added to resources automatically:
   reference a `ClusterTask` will also receive `tekton.dev/task`.
 - `tekton.dev/taskRun` is added to `Pods`, and contains the name of the
   `TaskRun` that created the `Pod`.
+- `app.kubernetes.io/instance` and `app.kubernetes.io/component` is added to 
+  Affinity Assistant `StatefulSets` and `Pods`. These are used for Pod Affinity for TaskRuns.
 
 ## Examples
 

diff --git a/docs/tasks.md b/docs/tasks.md
@@ -363,7 +363,8 @@ steps:
 ### Specifying `Workspaces`
 
 [`Workspaces`](workspaces.md#using-workspaces-in-tasks) allow you to specify
-one or more volumes that your `Task` requires during execution. For example:
+one or more volumes that your `Task` requires during execution. It is recommended that `Tasks` uses **at most**
+one writeable `Workspace`. For example:
 
 ```yaml
 spec:

diff --git a/docs/workspaces.md b/docs/workspaces.md
@@ -15,7 +15,7 @@ weight: 5
     - [Mapping `Workspaces` in `Tasks` to `TaskRuns`](#mapping-workspaces-in-tasks-to-taskruns)
     - [Examples of `TaskRun` definition using `Workspaces`](#examples-of-taskrun-definition-using-workspaces)
   - [Using `Workspaces` in `Pipelines`](#using-workspaces-in-pipelines)
-    - [Specifying `Workspace` order in a `Pipeline`](#specifying-workspace-order-in-a-pipeline)
+    - [Affinity Assistant and specifying `Workspace` order in a `Pipeline`](#affinity-assistant-and-specifying-workspace-order-in-a-pipeline)
     - [Specifying `Workspaces` in `PipelineRuns`](#specifying-workspaces-in-pipelineruns)
     - [Example `PipelineRun` definition using `Workspaces`](#example-pipelinerun-definition-using-workspaces)
   - [Specifying `VolumeSources` in `Workspaces`](#specifying-volumesources-in-workspaces)
@@ -89,7 +89,8 @@ To configure one or more `Workspaces` in a `Task`, add a `workspaces` list with
 
 Note the following:
 
-- A `Task` definition can include as many `Workspaces` as it needs. 
+- A `Task` definition can include as many `Workspaces` as it needs. It is recommended that `Tasks` use
+  **at most** one _writable_ `Workspace`.
 - A `readOnly` `Workspace` will have its volume mounted as read-only. Attempting to write
   to a `readOnly` `Workspace` will result in errors and failed `TaskRuns`.
 - `mountPath` can be either absolute or relative. Absolute paths start with `/` and relative paths
@@ -204,26 +205,27 @@ Include a `subPath` in the workspace binding to mount different parts of the sam
 
 The `subPath` specified in a `Pipeline` will be appended to any `subPath` specified as part of the `PipelineRun` workspace declaration. So a `PipelineRun` declaring a Workspace with `subPath` of `/foo` for a `Pipeline` who binds it to a `Task` with `subPath` of `/bar` will end up mounting the `Volume`'s `/foo/bar` directory.
 
-#### Specifying `Workspace` order in a `Pipeline`
+#### Affinity Assistant and specifying `Workspace` order in a `Pipeline`
 
 Sharing a `Workspace` between `Tasks` requires you to define the order in which those `Tasks`
-will be accessing that `Workspace` since different classes of storage have different limits
-for concurrent reads and writes. For example, a `PersistentVolumeClaim` with
-[access mode](https://kubernetes.io/docs/concepts/storage/persistent-volumes/#access-modes)
-`ReadWriteOnce` only allow `Tasks` on the same node writing to it at once.
-
-Using parallel `Tasks` in a `Pipeline` will work with `PersistentVolumeClaims` configured with
-[access mode](https://kubernetes.io/docs/concepts/storage/persistent-volumes/#access-modes)
-`ReadWriteMany` or `ReadOnlyMany` but you must ensure that those are available for your storage class.
-When using `PersistentVolumeClaims` with access mode `ReadWriteOnce` for parallel `Tasks`, you can configure a
-workspace with it's own `PersistentVolumeClaim` for each parallel `Task`.
-
-Use the `runAfter` field in your `Pipeline` definition to define when a `Task` should be executed. For more
-information, see the [`runAfter` documentation](pipelines.md#runAfter).
-
-**Warning:** You *must* ensure that this order is compatible with the configured access modes for your `PersistentVolumeClaim`.
-Parallel `Tasks` using the same `PersistentVolumeClaim` with access mode `ReadWriteOnce`, may execute on
-different nodes and be forced to execute sequentially which may cause `Tasks` to time out.
+write to or read from that `Workspace`. Use the `runAfter` field in your `Pipeline` definition
+to define when a `Task` should be executed. For more information, see the [`runAfter` documentation](pipelines.md#runAfter).
+
+When a `PersistentVolumeClaim` is used as volume source for a `Workspace` in a `PipelineRun`,
+an Affinity Assistant will be created. The Affinity Assistant acts as a placeholder for `TaskRun` pods
+sharing the same `Workspace`. All `TaskRun` pods within the `PipelineRun` that share the `Workspace`
+will be scheduled to the same Node as the Affinity Assistant pod. This means that Affinity Assistant is incompatible
+with e.g. NodeSelectors or other affinity rules configured for the `TaskRun` pods. The Affinity Assistant
+is deleted when the `PipelineRun` is completed. The Affinity Assistant can be disabled by setting the
+[disable-affinity-assistant](install.md#customizing-basic-execution-parameters) feature gate.
+
+**Note:** Affinity Assistant use [Inter-pod affinity and anti-affinity](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#inter-pod-affinity-and-anti-affinity)
+that require substantial amount of processing which can slow down scheduling in large clusters
+significantly. We do not recommend using them in clusters larger than several hundred nodes
+
+**Note:** Pod anti-affinity requires nodes to be consistently labelled, in other words every
+node in the cluster must have an appropriate label matching `topologyKey`. If some or all nodes
+are missing the specified `topologyKey` label, it can lead to unintended behavior.
 
 #### Specifying `Workspaces` in `PipelineRuns`
 

diff --git a/examples/v1beta1/pipelineruns/pipeline-run-with-parallel-tasks-using-pvc.yaml b/examples/v1beta1/pipelineruns/pipeline-run-with-parallel-tasks-using-pvc.yaml
@@ -0,0 +1,205 @@
+# This example shows how both sequential and parallel Tasks can share data
+# using a PersistentVolumeClaim as a workspace. The TaskRun pods that share
+# workspace will be scheduled to the same Node in your cluster with an
+# Affinity Assistant (unless it is disabled). The REPORTER task does not
+# use a workspace so it does not get affinity to the Affinity Assistant
+# and can be scheduled to any Node. If multiple concurrent PipelineRuns are
+# executed, their Affinity Assistant pods will repel eachother to different
+# Nodes in a Best Effort fashion.
+#
+# A PipelineRun will pass a message parameter to the Pipeline in this example.
+# The STARTER task will write the message to a file in the workspace. The UPPER
+# and LOWER tasks will execute in parallel and process the message written by
+# the STARTER, and transform it to upper case and lower case. The REPORTER task
+# is will use the Task Result from the UPPER task and print it - it is intended
+# to mimic a Task that sends data to an external service and shows a Task that
+# doesn't use a workspace. The VALIDATOR task will validate the result from
+# UPPER and LOWER.
+#
+# Use the runAfter property in a Pipeline to configure that a task depend on
+# another task. Output can be shared both via Task Result (e.g. like REPORTER task)
+# or via files in a workspace.
+#
+#             -- (upper) -- (reporter)
+#           /                         \
+#  (starter)                           (validator)
+#           \                         /
+#             -- (lower) ------------
+
+apiVersion: tekton.dev/v1beta1
+kind: Pipeline
+metadata:
+  name: parallel-pipeline
+spec:
+  params:
+    - name: message
+      type: string
+
+  workspaces:
+    - name: ws
+
+  tasks:
+    - name: starter          # Tasks that does not declare a runAfter property
+      taskRef:               # will start execution immediately
+        name: persist-param
+      params:
+        - name: message
+          value: $(params.message)
+      workspaces:
+        - name: task-ws
+          workspace: ws
+          subPath: init
+
+    - name: upper
+      runAfter:               # Note the use of runAfter here to declare that this task
+        - starter             # depends on a previous task
+      taskRef:
+        name: to-upper
+      params:
+        - name: input-path
+          value: init/message
+      workspaces:
+        - name: w
+          workspace: ws
+
+    - name: lower
+      runAfter:
+        - starter
+      taskRef:
+        name: to-lower
+      params:
+        - name: input-path
+          value: init/message
+      workspaces:
+        - name: w
+          workspace: ws
+
+    - name: reporter          # This task does not use workspace and may be scheduled to
+      runAfter:               # any Node in the cluster.
+        - upper
+      taskRef:
+        name: result-reporter
+      params:
+        - name: result-to-report
+          value: $(tasks.upper.results.message)  # A result from a previous task is used as param
+
+    - name: validator         # This task validate the output from upper and lower Task
+      runAfter:               # It does not strictly depend on the reporter Task
+        - reporter            # But you may want to skip this task if the reporter Task fail
+        - lower
+      taskRef:
+        name: validator
+      workspaces:
+        - name: files
+          workspace: ws
+---
+apiVersion: tekton.dev/v1beta1
+kind: Task
+metadata:
+  name: persist-param
+spec:
+  params:
+    - name: message
+      type: string
+  results:
+    - name: message
+      description: A result message
+  steps:
+    - name: write
+      image: ubuntu
+      script: echo $(params.message) | tee $(workspaces.task-ws.path)/message $(results.message.path)
+  workspaces:
+    - name: task-ws
+---
+apiVersion: tekton.dev/v1beta1
+kind: Task
+metadata:
+  name: to-upper
+spec:
+  description: |
+    This task read and process a file from the workspace and write the result
+    both to a file in the workspace and as a Task Result.
+  params:
+    - name: input-path
+      type: string
+  results:
+    - name: message
+      description: Input message in upper case
+  steps:
+    - name: to-upper
+      image: ubuntu
+      script: cat $(workspaces.w.path)/$(params.input-path) | tr '[:lower:]' '[:upper:]' | tee $(workspaces.w.path)/upper $(results.message.path)
+  workspaces:
+    - name: w
+---
+apiVersion: tekton.dev/v1beta1
+kind: Task
+metadata:
+  name: to-lower
+spec:
+  description: |
+    This task read and process a file from the workspace and write the result
+    both to a file in the workspace and as a Task Result
+  params:
+    - name: input-path
+      type: string
+  results:
+    - name: message
+      description: Input message in lower case
+  steps:
+    - name: to-lower
+      image: ubuntu
+      script: cat $(workspaces.w.path)/$(params.input-path) | tr '[:upper:]' '[:lower:]' | tee $(workspaces.w.path)/lower $(results.message.path)
+  workspaces:
+    - name: w
+---
+apiVersion: tekton.dev/v1beta1
+kind: Task
+metadata:
+  name: result-reporter
+spec:
+  description: |
+    This task is supposed to mimic a service that post data from the Pipeline,
+    e.g. to an remote HTTP service or a Slack notification.
+  params:
+    - name: result-to-report
+      type: string
+  steps:
+    - name: report-result
+      image: ubuntu
+      script: echo $(params.result-to-report)
+---
+apiVersion: tekton.dev/v1beta1
+kind: Task
+metadata:
+  name: validator
+spec:
+  steps:
+    - name: validate-upper
+      image: ubuntu
+      script: cat $(workspaces.files.path)/upper | grep HELLO\ TEKTON
+    - name: validate-lower
+      image: ubuntu
+      script: cat $(workspaces.files.path)/lower | grep hello\ tekton
+  workspaces:
+    - name: files
+---
+apiVersion: tekton.dev/v1beta1
+kind: PipelineRun
+metadata:
+  generateName: parallel-pipelinerun-
+spec:
+  params:
+    - name: message
+      value: Hello Tekton
+  pipelineRef:
+    name: parallel-pipeline
+  workspaces:
+    - name: ws
+      volumeClaimTemplate:
+        spec:
+          accessModes:
+            - ReadWriteOnce
+          resources:
+            requests:
+              storage: 1Gi
diff --git a/pkg/pod/pod.go b/pkg/pod/pod.go
@@ -26,6 +26,7 @@ import (
 	"github.com/tektoncd/pipeline/pkg/names"
 	"github.com/tektoncd/pipeline/pkg/system"
 	"github.com/tektoncd/pipeline/pkg/version"
+	"github.com/tektoncd/pipeline/pkg/workspace"
 	corev1 "k8s.io/api/core/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/runtime/schema"
@@ -217,6 +218,17 @@ func MakePod(images pipeline.Images, taskRun *v1beta1.TaskRun, taskSpec v1beta1.
 		return nil, err
 	}
 
+	// Using node affinity on taskRuns sharing PVC workspace, with an Affinity Assistant
+	// is mutually exclusive with other affinity on taskRun pods. If other
+	// affinity is wanted, that should be added on the Affinity Assistant pod unless
+	// assistant is disabled. When Affinity Assistant is disabled, an affinityAssistantName is not set.
+	var affinity *corev1.Affinity
+	if affinityAssistantName := taskRun.Annotations[workspace.AnnotationAffinityAssistantName]; affinityAssistantName != "" {
+		affinity = nodeAffinityUsingAffinityAssistant(affinityAssistantName)
+	} else {
+		affinity = podTemplate.Affinity
+	}
+
 	mergedPodContainers := stepContainers
 
 	// Merge sidecar containers with step containers.
@@ -263,7 +275,7 @@ func MakePod(images pipeline.Images, taskRun *v1beta1.TaskRun, taskSpec v1beta1.
 			Volumes:                      volumes,
 			NodeSelector:                 podTemplate.NodeSelector,
 			Tolerations:                  podTemplate.Tolerations,
-			Affinity:                     podTemplate.Affinity,
+			Affinity:                     affinity,
 			SecurityContext:              podTemplate.SecurityContext,
 			RuntimeClassName:             podTemplate.RuntimeClassName,
 			AutomountServiceAccountToken: podTemplate.AutomountServiceAccountToken,
@@ -294,6 +306,25 @@ func MakeLabels(s *v1beta1.TaskRun) map[string]string {
 	return labels
 }
 
+// nodeAffinityUsingAffinityAssistant achieves Node Affinity for taskRun pods
+// sharing PVC workspace by setting PodAffinity so that taskRuns is
+// scheduled to the Node were the Affinity Assistant pod is scheduled.
+func nodeAffinityUsingAffinityAssistant(affinityAssistantName string) *corev1.Affinity {
+	return &corev1.Affinity{
+		PodAffinity: &corev1.PodAffinity{
+			RequiredDuringSchedulingIgnoredDuringExecution: []corev1.PodAffinityTerm{{
+				LabelSelector: &metav1.LabelSelector{
+					MatchLabels: map[string]string{
+						workspace.LabelInstance:  affinityAssistantName,
+						workspace.LabelComponent: workspace.ComponentNameAffinityAssistant,
+					},
+				},
+				TopologyKey: "kubernetes.io/hostname",
+			}},
+		},
+	}
+}
+
 // getLimitRangeMinimum gets all LimitRanges in a namespace and
 // searches for if a container minimum is specified. Due to
 // https://github.com/kubernetes/kubernetes/issues/79496, the