Skip to content

Commit

Permalink
Adds error location in error results
Browse files Browse the repository at this point in the history
This commit extracts the login in `conditions.go` that searches for the failed pod and its container. It merges the information of the error location (pod and container) into the error results.

Fix tests
  • Loading branch information
Baran Dalgic authored and dalbar committed Nov 3, 2021
1 parent 2d696d5 commit 77b7cf2
Show file tree
Hide file tree
Showing 9 changed files with 158 additions and 108 deletions.
5 changes: 3 additions & 2 deletions cmd/git/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,14 @@ func (e ExitError) Error() string {

type settings struct {
help bool
skipValidation bool
depth uint
url string
revision string
depth uint
target string
resultFileCommitSha string
resultFileCommitAuthor string
secretPath string
skipValidation bool
}

var flagValues settings
Expand Down Expand Up @@ -100,6 +100,7 @@ func main() {
// Execute performs flag parsing, input validation and the Git clone
func Execute(ctx context.Context) error {
flagValues = settings{depth: 1}

pflag.Parse()

if flagValues.help {
Expand Down
12 changes: 10 additions & 2 deletions deploy/crds/shipwright.io_buildruns.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -466,9 +466,17 @@ spec:
pod:
type: string
type: object
failure:
description: Failure contains error details that are collected and surfaced from TaskRun
failureDetails:
description: FailureDetails contains error details that are collected and surfaced from TaskRun
properties:
location:
description: FailedAt describes the location where the failure happened
properties:
container:
type: string
pod:
type: string
type: object
message:
type: string
reason:
Expand Down
13 changes: 7 additions & 6 deletions pkg/apis/build/v1alpha1/buildrun_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -149,9 +149,9 @@ type BuildRunStatus struct {
// +optional
FailedAt *FailedAt `json:"failedAt,omitempty"`

// Failure contains error details that are collected and surfaced from TaskRun
// FailureDetails contains error details that are collected and surfaced from TaskRun
// +optional
Failure *Failure `json:"failure,omitempty"`
FailureDetails *FailureDetails `json:"failureDetails,omitempty"`
}

// FailedAt describes the location where the failure happened
Expand All @@ -160,10 +160,11 @@ type FailedAt struct {
Container string `json:"container,omitempty"`
}

// Failure describes an error while building images
type Failure struct {
Reason string `json:"reason,omitempty"`
Message string `json:"message,omitempty"`
// FailureDetails describes an error while building images
type FailureDetails struct {
Reason string `json:"reason,omitempty"`
Message string `json:"message,omitempty"`
Location *FailedAt `json:"location,omitempty"`
}

// BuildRef can be used to refer to a specific instance of a Build.
Expand Down
2 changes: 1 addition & 1 deletion pkg/reconciler/buildrun/buildrun.go
Original file line number Diff line number Diff line change
Expand Up @@ -308,7 +308,7 @@ func (r *ReconcileBuildRun) Reconcile(request reconcile.Request) (reconcile.Resu
return reconcile.Result{}, err
}

resources.UpdateBuildRunUsingTaskFailures(buildRun, lastTaskRun)
resources.UpdateBuildRunUsingTaskFailures(ctx, r.client, buildRun, lastTaskRun)
taskRunStatus := trCondition.Status

// check if we should delete the generated service account by checking the build run spec and that the task run is complete
Expand Down
27 changes: 4 additions & 23 deletions pkg/reconciler/buildrun/resources/conditions.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,11 @@ import (
"knative.dev/pkg/apis"
"sigs.k8s.io/controller-runtime/pkg/client"

buildv1alpha1 "github.com/shipwright-io/build/pkg/apis/build/v1alpha1"
"github.com/shipwright-io/build/pkg/ctxlog"
corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"

buildv1alpha1 "github.com/shipwright-io/build/pkg/apis/build/v1alpha1"
"github.com/shipwright-io/build/pkg/ctxlog"
)

// Common condition strings for reason, kind, etc.
Expand Down Expand Up @@ -70,8 +68,8 @@ func UpdateBuildRunUsingTaskRunCondition(ctx context.Context, client client.Clie

case v1beta1.TaskRunReasonFailed:
if taskRun.Status.CompletionTime != nil {
var pod corev1.Pod
if err := client.Get(ctx, types.NamespacedName{Namespace: taskRun.Namespace, Name: taskRun.Status.PodName}, &pod); err != nil {
pod, failedContainer, err := extractFailedPodAndContainer(ctx, client, taskRun)
if err != nil {
// when trying to customize the Condition Message field, ensure the Message cover the case
// when a Pod is deleted.
// Note: this is an edge case, but not doing this prevent a BuildRun from being marked as Failed
Expand All @@ -85,23 +83,6 @@ func UpdateBuildRunUsingTaskRunCondition(ctx context.Context, client client.Clie

buildRun.Status.FailedAt = &buildv1alpha1.FailedAt{Pod: pod.Name}

// Since the container status list is not sorted, as a quick workaround mark all failed containers
var failures = make(map[string]struct{})
for _, containerStatus := range pod.Status.ContainerStatuses {
if containerStatus.State.Terminated != nil && containerStatus.State.Terminated.ExitCode != 0 {
failures[containerStatus.Name] = struct{}{}
}
}

// Find the first container that failed
var failedContainer *corev1.Container
for i, container := range pod.Spec.Containers {
if _, has := failures[container.Name]; has {
failedContainer = &pod.Spec.Containers[i]
break
}
}

if failedContainer != nil {
buildRun.Status.FailedAt.Container = failedContainer.Name
message = fmt.Sprintf("buildrun step %s failed in pod %s, for detailed information: kubectl --namespace %s logs %s --container=%s",
Expand Down
112 changes: 112 additions & 0 deletions pkg/reconciler/buildrun/resources/failureDetails.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
// Copyright The Shipwright Contributors
//
// SPDX-License-Identifier: Apache-2.0

package resources

import (
"context"
"encoding/json"
"fmt"
buildv1alpha1 "github.com/shipwright-io/build/pkg/apis/build/v1alpha1"
"github.com/tektoncd/pipeline/pkg/apis/pipeline/v1beta1"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/types"
"knative.dev/pkg/apis"
"sigs.k8s.io/controller-runtime/pkg/client"
)

const (
prefixedResultErrorReason = prefixParamsResultsVolumes + "-" + resultErrorReason
prefixedResultErrorMessage = prefixParamsResultsVolumes + "-" + resultErrorMessage
)

// UpdateBuildRunUsingTaskFailures is extracting failures from taskRun steps and adding them to buildRun (mutates)
func UpdateBuildRunUsingTaskFailures(ctx context.Context, client client.Client, buildRun *buildv1alpha1.BuildRun, taskRun *v1beta1.TaskRun) {
trCondition := taskRun.Status.GetCondition(apis.ConditionSucceeded)

// only extract failures when failing condition is present
if trCondition != nil && v1beta1.TaskRunReason(trCondition.Reason) == v1beta1.TaskRunReasonFailed {
buildRun.Status.FailureDetails = extractFailureDetails(ctx, client, taskRun)
}
}

func extractFailureReasonAndMessage(taskRun *v1beta1.TaskRun) (errorReason string, errorMessage string, hasReasonAndMessage bool) {
for _, step := range taskRun.Status.Steps {
message := step.Terminated.Message
var taskRunResults []v1beta1.PipelineResourceResult

if err := json.Unmarshal([]byte(message), &taskRunResults); err != nil {
continue
}

for _, result := range taskRunResults {
if result.Key == prefixedResultErrorMessage {
errorMessage = result.Value
}

if result.Key == prefixedResultErrorReason {
errorReason = result.Value
}
}
}

return errorReason, errorMessage, true
}

func extractFailedPodAndContainer(ctx context.Context, client client.Client, taskRun *v1beta1.TaskRun) (*v1.Pod, *v1.Container, error) {
var pod v1.Pod
if err := client.Get(ctx, types.NamespacedName{Namespace: taskRun.Namespace, Name: taskRun.Status.PodName}, &pod); err != nil {
return nil, nil, err
}

failures := make(map[string]struct{})
for _, containerStatus := range pod.Status.ContainerStatuses {
if containerStatus.State.Terminated != nil && containerStatus.State.Terminated.ExitCode != 0 {
failures[containerStatus.Name] = struct{}{}
}
}

// Find the first container that failed
var failedContainer *v1.Container
for i, container := range pod.Spec.Containers {
if _, has := failures[container.Name]; has {
failedContainer = &pod.Spec.Containers[i]
break
}
}

return &pod, failedContainer, nil
}

func extractFailureDetails(ctx context.Context, client client.Client, taskRun *v1beta1.TaskRun) (failure *buildv1alpha1.FailureDetails) {
failure = &buildv1alpha1.FailureDetails{ }
failure.Location = &buildv1alpha1.FailedAt{ Pod: taskRun.Status.PodName }

if reason, message, hasReasonAndMessage := extractFailureReasonAndMessage(taskRun); hasReasonAndMessage {
failure.Reason = reason
failure.Message = message
}

pod, container, _ := extractFailedPodAndContainer(ctx, client, taskRun)

if pod != nil && container != nil {
failure.Location.Pod = pod.Name
failure.Location.Container = container.Name
}

return failure
}

func getFailureDetailsTaskSpecResults() []v1beta1.TaskResult {
return []v1beta1.TaskResult{
{
Name: fmt.Sprintf("%s-%s", prefixParamsResultsVolumes, resultErrorMessage),
Description: "The error description of the task run",
},
{
Name: fmt.Sprintf("%s-%s", prefixParamsResultsVolumes, resultErrorReason),
Description: "The error reason of the task run",
},
}
}
Original file line number Diff line number Diff line change
@@ -1,8 +1,14 @@
// Copyright The Shipwright Contributors
//
// SPDX-License-Identifier: Apache-2.0

package resources

import (
"context"
"encoding/json"
"fmt"
"github.com/shipwright-io/build/pkg/controller/fakes"

. "github.com/onsi/ginkgo"
. "github.com/onsi/gomega"
Expand All @@ -14,6 +20,8 @@ import (

var _ = Describe("Surfacing errors", func() {
Context("resources.UpdateBuildRunUsingTaskFailures", func() {
ctx := context.Background()
client := &fakes.FakeClient{}

It("surfaces errors to BuildRun in failed TaskRun", func() {
redTaskRun := v1beta1.TaskRun{}
Expand All @@ -37,10 +45,10 @@ var _ = Describe("Surfacing errors", func() {
redTaskRun.Status.Steps = append(redTaskRun.Status.Steps, failedStep)
redBuild := v1alpha1.BuildRun{}

UpdateBuildRunUsingTaskFailures(&redBuild, &redTaskRun)
UpdateBuildRunUsingTaskFailures(ctx, client, &redBuild, &redTaskRun)

Expect(redBuild.Status.Failure.Message).To(Equal(errorMessageValue))
Expect(redBuild.Status.Failure.Reason).To(Equal(errorReasonValue))
Expect(redBuild.Status.FailureDetails.Message).To(Equal(errorMessageValue))
Expect(redBuild.Status.FailureDetails.Reason).To(Equal(errorReasonValue))
})

It("failed TaskRun without error reason and message", func() {
Expand All @@ -58,37 +66,38 @@ var _ = Describe("Surfacing errors", func() {
redTaskRun.Status.Steps = append(redTaskRun.Status.Steps, failedStep)
redBuild := v1alpha1.BuildRun{}

UpdateBuildRunUsingTaskFailures(&redBuild, &redTaskRun)
UpdateBuildRunUsingTaskFailures(ctx, client, &redBuild, &redTaskRun)

Expect(redBuild.Status.Failure).To(BeNil())
Expect(redBuild.Status.FailureDetails.Reason).To(BeEmpty())
Expect(redBuild.Status.FailureDetails.Message).To(BeEmpty())
})

It("no errors present in BuildRun for successful TaskRun", func() {
greenTaskRun := v1beta1.TaskRun{}
greenTaskRun.Status.Conditions = append(greenTaskRun.Status.Conditions, apis.Condition{Type: apis.ConditionSucceeded})
greenBuildRun := v1alpha1.BuildRun{}

UpdateBuildRunUsingTaskFailures(&greenBuildRun, &greenTaskRun)
UpdateBuildRunUsingTaskFailures(ctx, client, &greenBuildRun, &greenTaskRun)

Expect(greenBuildRun.Status.Failure).To(BeNil())
Expect(greenBuildRun.Status.FailureDetails).To(BeNil())
})

It("TaskRun has not condition succeeded so nothing to do", func() {
unfinishedTaskRun := v1beta1.TaskRun{}
unfinishedTaskRun.Status.Conditions = append(unfinishedTaskRun.Status.Conditions, apis.Condition{Type: apis.ConditionReady})
unfinishedBuildRun := v1alpha1.BuildRun{}

UpdateBuildRunUsingTaskFailures(&unfinishedBuildRun, &unfinishedTaskRun)
Expect(unfinishedBuildRun.Status.Failure).To(BeNil())
UpdateBuildRunUsingTaskFailures(ctx, client, &unfinishedBuildRun, &unfinishedTaskRun)
Expect(unfinishedBuildRun.Status.FailureDetails).To(BeNil())
})

It("TaskRun has a unknown reason", func() {
unknownTaskRun := v1beta1.TaskRun{}
unknownTaskRun.Status.Conditions = append(unknownTaskRun.Status.Conditions, apis.Condition{Type: apis.ConditionSucceeded, Reason: "random"})
unknownBuildRun := v1alpha1.BuildRun{}

UpdateBuildRunUsingTaskFailures(&unknownBuildRun, &unknownTaskRun)
Expect(unknownBuildRun.Status.Failure).To(BeNil())
UpdateBuildRunUsingTaskFailures(ctx, client, &unknownBuildRun, &unknownTaskRun)
Expect(unknownBuildRun.Status.FailureDetails).To(BeNil())
})
})
})
52 changes: 0 additions & 52 deletions pkg/reconciler/buildrun/resources/failures.go

This file was deleted.

Loading

0 comments on commit 77b7cf2

Please sign in to comment.