From 0c3fb79cd5ab11502ef54f41d2c2fbc89abb886b Mon Sep 17 00:00:00 2001 From: glaksh100 Date: Wed, 19 Feb 2020 15:19:19 -0800 Subject: [PATCH 01/41] Working version 1 --- flink-development-config.yaml | 9 ++ integ/checkpoint_failure_test.go | 4 +- integ/simple_test.go | 18 ++-- integ/utils/utils.go | 2 +- pkg/apis/app/v1beta1/types.go | 36 +++++-- pkg/apis/app/v1beta1/zz_generated.deepcopy.go | 32 +++++- pkg/controller/flink/flink.go | 100 ++++++++++-------- pkg/controller/flink/flink_test.go | 96 +++++++++-------- .../flinkapplication/flink_state_machine.go | 47 ++++---- .../flink_state_machine_test.go | 62 +++++++---- 10 files changed, 251 insertions(+), 155 deletions(-) create mode 100644 flink-development-config.yaml diff --git a/flink-development-config.yaml b/flink-development-config.yaml new file mode 100644 index 00000000..d315e7c3 --- /dev/null +++ b/flink-development-config.yaml @@ -0,0 +1,9 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: flink-development-config +data: + APPLICATION_ENV: development + JOB_MANAGER_HEAP_MB: "200" + TASK_MANAGER_SLOTS: "2" + TASK_MANAGER_HEAP_MB: "200" diff --git a/integ/checkpoint_failure_test.go b/integ/checkpoint_failure_test.go index 7cc85537..0a41fc9e 100644 --- a/integ/checkpoint_failure_test.go +++ b/integ/checkpoint_failure_test.go @@ -45,9 +45,9 @@ func failingJobTest(s *IntegSuite, c *C, testName string, causeFailure func()) { // And the job should not have been updated newApp, err := s.Util.GetFlinkApplication(config.Name) c.Assert(err, IsNil) - c.Assert(newApp.Status.JobStatus.JobID, Equals, app.Status.JobStatus.JobID) + c.Assert(newApp.Status.AppStatus[0].JobStatus.JobID, Equals, app.Status.AppStatus[0].JobStatus.JobID) - endpoint := fmt.Sprintf("jobs/%s", app.Status.JobStatus.JobID) + endpoint := fmt.Sprintf("jobs/%s", app.Status.AppStatus[0].JobStatus.JobID) _, err = s.Util.FlinkAPIGet(app, endpoint) c.Assert(err, IsNil) diff --git a/integ/simple_test.go b/integ/simple_test.go index ca6ccda7..1d4008c8 100644 --- a/integ/simple_test.go +++ b/integ/simple_test.go @@ -28,12 +28,12 @@ func updateAndValidate(c *C, s *IntegSuite, name string, updateFn func(app *v1be // check that it really updated newApp, err := s.Util.GetFlinkApplication(name) c.Assert(err, IsNil) - c.Assert(newApp.Status.JobStatus.JobID, Not(Equals), app.Status.JobStatus.JobID) + c.Assert(newApp.Status.AppStatus[0].JobStatus.JobID, Not(Equals), app.Status.AppStatus[0].JobStatus.JobID) log.Info("New job started successfully") // check that we savepointed and restored correctly - endpoint := fmt.Sprintf("jobs/%s/checkpoints", newApp.Status.JobStatus.JobID) + endpoint := fmt.Sprintf("jobs/%s/checkpoints", newApp.Status.AppStatus[0].JobStatus.JobID) res, err := s.Util.FlinkAPIGet(newApp, endpoint) c.Assert(err, IsNil) @@ -140,13 +140,13 @@ func (s *IntegSuite) TestSimple(c *C) { c.Assert(s.Util.WaitForAllTasksRunning(newApp.Name), IsNil) // the job id should have changed - jobID := newApp.Status.JobStatus.JobID + jobID := newApp.Status.AppStatus[0].JobStatus.JobID newApp, err = s.Util.GetFlinkApplication(newApp.Name) c.Assert(err, IsNil) - c.Assert(newApp.Status.JobStatus.JobID, Not(Equals), jobID) + c.Assert(newApp.Status.AppStatus[0].JobStatus.JobID, Not(Equals), jobID) // we should have restored from our savepoint - endpoint := fmt.Sprintf("jobs/%s/checkpoints", newApp.Status.JobStatus.JobID) + endpoint := fmt.Sprintf("jobs/%s/checkpoints", newApp.Status.AppStatus[0].JobStatus.JobID) res, err := s.Util.FlinkAPIGet(newApp, endpoint) c.Assert(err, IsNil) @@ -191,7 +191,7 @@ func (s *IntegSuite) TestSimple(c *C) { log.Info("User cancelled deploy. Job is in deploy failed, waiting for tasks to start") // but the job should still be running - c.Assert(newApp.Status.JobStatus.State, Equals, v1beta1.Running) + c.Assert(newApp.Status.AppStatus[0].JobStatus.State, Equals, v1beta1.Running) log.Info("Attempting to roll forward with fix") // Fixing update @@ -225,7 +225,7 @@ func (s *IntegSuite) TestSimple(c *C) { jobList := jobMap["jobs"].([]interface{}) for _, j := range jobList { job := j.(map[string]interface{}) - if job["id"] == app.Status.JobStatus.JobID { + if job["id"] == app.Status.AppStatus[0].JobStatus.JobID { return job } } @@ -284,7 +284,7 @@ func (s *IntegSuite) TestRecovery(c *C) { app, err := s.Util.GetFlinkApplication(config.Name) c.Assert(err, IsNil) - endpoint := fmt.Sprintf("jobs/%s/checkpoints", app.Status.JobStatus.JobID) + endpoint := fmt.Sprintf("jobs/%s/checkpoints", app.Status.AppStatus[0].JobStatus.JobID) for { res, err := s.Util.FlinkAPIGet(app, endpoint) c.Assert(err, IsNil) @@ -324,7 +324,7 @@ func (s *IntegSuite) TestRecovery(c *C) { // wait until the new job is launched newApp, err := s.Util.GetFlinkApplication(config.Name) c.Assert(err, IsNil) - if newApp.Status.JobStatus.JobID != app.Status.JobStatus.JobID { + if newApp.Status.AppStatus[0].JobStatus.JobID != app.Status.AppStatus[0].JobStatus.JobID { break } time.Sleep(100 * time.Millisecond) diff --git a/integ/utils/utils.go b/integ/utils/utils.go index ed9b9a88..e47dea43 100644 --- a/integ/utils/utils.go +++ b/integ/utils/utils.go @@ -429,7 +429,7 @@ func (f *TestUtil) WaitForAllTasksRunning(name string) error { return err } - endpoint := fmt.Sprintf("jobs/%s", flinkApp.Status.JobStatus.JobID) + endpoint := fmt.Sprintf("jobs/%s", flinkApp.Status.AppStatus[0].JobStatus.JobID) for { res, err := f.FlinkAPIGet(flinkApp, endpoint) if err != nil { diff --git a/pkg/apis/app/v1beta1/types.go b/pkg/apis/app/v1beta1/types.go index b357c523..a1443da6 100644 --- a/pkg/apis/app/v1beta1/types.go +++ b/pkg/apis/app/v1beta1/types.go @@ -165,12 +165,11 @@ type FlinkJobStatus struct { } type FlinkApplicationStatus struct { - Phase FlinkApplicationPhase `json:"phase"` - StartedAt *metav1.Time `json:"startedAt,omitempty"` - LastUpdatedAt *metav1.Time `json:"lastUpdatedAt,omitempty"` - Reason string `json:"reason,omitempty"` - ClusterStatus FlinkClusterStatus `json:"clusterStatus,omitempty"` - JobStatus FlinkJobStatus `json:"jobStatus"` + Phase FlinkApplicationPhase `json:"phase"` + StartedAt *metav1.Time `json:"startedAt,omitempty"` + LastUpdatedAt *metav1.Time `json:"lastUpdatedAt,omitempty"` + Reason string `json:"reason,omitempty"` + AppStatus []FlinkSubApplicationStatus `json:"appStatus,omitempty"` FailedDeployHash string `json:"failedDeployHash,omitempty"` RollbackHash string `json:"rollbackHash,omitempty"` DeployHash string `json:"deployHash"` @@ -180,6 +179,19 @@ type FlinkApplicationStatus struct { LastSeenError *FlinkApplicationError `json:"lastSeenError,omitempty"` } +type FlinkApplicationVersion string + +const ( + BlueFlinkApplication FlinkApplicationVersion = "Blue" + GreenFlinkApplication FlinkApplicationVersion = "Green" +) + +type FlinkSubApplicationStatus struct { + Version FlinkApplicationVersion + ClusterStatus FlinkClusterStatus + JobStatus FlinkJobStatus +} + func (in *FlinkApplicationStatus) GetPhase() FlinkApplicationPhase { return in.Phase } @@ -244,8 +256,9 @@ func IsRunningPhase(phase FlinkApplicationPhase) bool { type DeploymentMode string const ( - DeploymentModeSingle DeploymentMode = "Single" - DeploymentModeDual DeploymentMode = "Dual" + DeploymentModeSingle DeploymentMode = "Single" + DeploymentModeDual DeploymentMode = "Dual" + DeploymentModeBlueGreen DeploymentMode = "BlueGreen" ) type DeleteMode string @@ -256,6 +269,13 @@ const ( DeleteModeNone DeleteMode = "None" ) +type SavepointMode string + +const ( + SavepointModeSavepointOnly SavepointMode = "Savepoint" + SavepointModeSavepointAndCancel SavepointMode = "SavepointAndCancel" +) + type HealthStatus string const ( diff --git a/pkg/apis/app/v1beta1/zz_generated.deepcopy.go b/pkg/apis/app/v1beta1/zz_generated.deepcopy.go index d31b1a91..5a90b634 100644 --- a/pkg/apis/app/v1beta1/zz_generated.deepcopy.go +++ b/pkg/apis/app/v1beta1/zz_generated.deepcopy.go @@ -128,6 +128,11 @@ func (in *FlinkApplicationSpec) DeepCopyInto(out *FlinkApplicationSpec) { *out = make([]v1.LocalObjectReference, len(*in)) copy(*out, *in) } + if in.SecurityContext != nil { + in, out := &in.SecurityContext, &out.SecurityContext + *out = new(v1.PodSecurityContext) + (*in).DeepCopyInto(*out) + } in.FlinkConfig.DeepCopyInto(&out.FlinkConfig) in.TaskManagerConfig.DeepCopyInto(&out.TaskManagerConfig) in.JobManagerConfig.DeepCopyInto(&out.JobManagerConfig) @@ -200,8 +205,13 @@ func (in *FlinkApplicationStatus) DeepCopyInto(out *FlinkApplicationStatus) { in, out := &in.LastUpdatedAt, &out.LastUpdatedAt *out = (*in).DeepCopy() } - out.ClusterStatus = in.ClusterStatus - in.JobStatus.DeepCopyInto(&out.JobStatus) + if in.AppStatus != nil { + in, out := &in.AppStatus, &out.AppStatus + *out = make([]FlinkSubApplicationStatus, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } if in.LastSeenError != nil { in, out := &in.LastSeenError, &out.LastSeenError *out = new(FlinkApplicationError) @@ -268,6 +278,24 @@ func (in *FlinkJobStatus) DeepCopy() *FlinkJobStatus { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *FlinkSubApplicationStatus) DeepCopyInto(out *FlinkSubApplicationStatus) { + *out = *in + out.ClusterStatus = in.ClusterStatus + in.JobStatus.DeepCopyInto(&out.JobStatus) + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new FlinkSubApplicationStatus. +func (in *FlinkSubApplicationStatus) DeepCopy() *FlinkSubApplicationStatus { + if in == nil { + return nil + } + out := new(FlinkSubApplicationStatus) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *JobManagerConfig) DeepCopyInto(out *JobManagerConfig) { *out = *in diff --git a/pkg/controller/flink/flink.go b/pkg/controller/flink/flink.go index 8470b94c..d9e2f0d8 100644 --- a/pkg/controller/flink/flink.go +++ b/pkg/controller/flink/flink.go @@ -152,7 +152,7 @@ func getClusterOverviewURL(app *v1beta1.FlinkApplication) string { func getJobOverviewURL(app *v1beta1.FlinkApplication) string { externalURL := getExternalURLFromApp(app) if externalURL != "" { - return fmt.Sprintf(externalURL+client.WebUIAnchor+client.GetJobsOverviewURL, app.Status.JobStatus.JobID) + return fmt.Sprintf(externalURL+client.WebUIAnchor+client.GetJobsOverviewURL, app.Status.AppStatus[0].JobStatus.JobID) } return "" } @@ -204,11 +204,11 @@ func (f *Controller) GetJobsForApplication(ctx context.Context, application *v1b } func (f *Controller) GetJobForApplication(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (*client.FlinkJobOverview, error) { - if application.Status.JobStatus.JobID == "" { + if application.Status.AppStatus[0].JobStatus.JobID == "" { return nil, nil } - jobResponse, err := f.flinkClient.GetJobOverview(ctx, getURLFromApp(application, hash), application.Status.JobStatus.JobID) + jobResponse, err := f.flinkClient.GetJobOverview(ctx, getURLFromApp(application, hash), application.Status.AppStatus[0].JobStatus.JobID) if err != nil { return nil, err } @@ -219,8 +219,8 @@ func (f *Controller) GetJobForApplication(ctx context.Context, application *v1be // The operator for now assumes and is intended to run single application per Flink Cluster. // Once we move to run multiple applications, this has to be removed/updated func (f *Controller) getJobIDForApplication(application *v1beta1.FlinkApplication) (string, error) { - if application.Status.JobStatus.JobID != "" { - return application.Status.JobStatus.JobID, nil + if application.Status.AppStatus[0].JobStatus.JobID != "" { + return application.Status.AppStatus[0].JobStatus.JobID, nil } return "", errors.New("active job id not available") @@ -446,14 +446,14 @@ func (f *Controller) DeleteOldResourcesForApp(ctx context.Context, app *v1beta1. } func (f *Controller) FindExternalizedCheckpoint(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (string, error) { - checkpoint, err := f.flinkClient.GetLatestCheckpoint(ctx, getURLFromApp(application, hash), application.Status.JobStatus.JobID) + checkpoint, err := f.flinkClient.GetLatestCheckpoint(ctx, getURLFromApp(application, hash), application.Status.AppStatus[0].JobStatus.JobID) var checkpointPath string var checkpointTime int64 if err != nil { // we failed to query the JM, try to pull it out of the resource - if application.Status.JobStatus.LastCheckpointPath != "" && application.Status.JobStatus.LastCheckpointTime != nil { - checkpointPath = application.Status.JobStatus.LastCheckpointPath - checkpointTime = application.Status.JobStatus.LastCheckpointTime.Unix() + if application.Status.AppStatus[0].JobStatus.LastCheckpointPath != "" && application.Status.AppStatus[0].JobStatus.LastCheckpointTime != nil { + checkpointPath = application.Status.AppStatus[0].JobStatus.LastCheckpointPath + checkpointTime = application.Status.AppStatus[0].JobStatus.LastCheckpointTime.Unix() logger.Warnf(ctx, "Could not query JobManager for latest externalized checkpoint, using"+ " last seen checkpoint") } else { @@ -487,42 +487,45 @@ func (f *Controller) LogEvent(ctx context.Context, app *v1beta1.FlinkApplication func (f *Controller) CompareAndUpdateClusterStatus(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (bool, error) { // Error retrieving cluster / taskmanagers overview (after startup/readiness) --> Red // If there is an error this loop will return with Health set to Red - oldClusterStatus := application.Status.ClusterStatus - application.Status.ClusterStatus.Health = v1beta1.Red + if len(application.Status.AppStatus) == 0 { + application.Status.AppStatus = make([]v1beta1.FlinkSubApplicationStatus, 2) + } + oldClusterStatus := application.Status.AppStatus[0].ClusterStatus + application.Status.AppStatus[0].ClusterStatus.Health = v1beta1.Red deployment, err := f.GetCurrentDeploymentsForApp(ctx, application) if deployment == nil || err != nil { return false, err } - application.Status.ClusterStatus.ClusterOverviewURL = getClusterOverviewURL(application) - application.Status.ClusterStatus.NumberOfTaskManagers = deployment.Taskmanager.Status.AvailableReplicas + application.Status.AppStatus[0].ClusterStatus.ClusterOverviewURL = getClusterOverviewURL(application) + application.Status.AppStatus[0].ClusterStatus.NumberOfTaskManagers = deployment.Taskmanager.Status.AvailableReplicas // Get Cluster overview response, err := f.flinkClient.GetClusterOverview(ctx, getURLFromApp(application, hash)) if err != nil { return false, err } // Update cluster overview - application.Status.ClusterStatus.AvailableTaskSlots = response.SlotsAvailable - application.Status.ClusterStatus.NumberOfTaskSlots = response.NumberOfTaskSlots + application.Status.AppStatus[0].ClusterStatus.AvailableTaskSlots = response.SlotsAvailable + application.Status.AppStatus[0].ClusterStatus.NumberOfTaskSlots = response.NumberOfTaskSlots // Get Healthy Taskmanagers tmResponse, tmErr := f.flinkClient.GetTaskManagers(ctx, getURLFromApp(application, hash)) if tmErr != nil { return false, tmErr } - application.Status.ClusterStatus.HealthyTaskManagers = getHealthyTaskManagerCount(tmResponse) + application.Status.AppStatus[0].ClusterStatus.HealthyTaskManagers = getHealthyTaskManagerCount(tmResponse) // Determine Health of the cluster. // Healthy TaskManagers == Number of taskmanagers --> Green // Else --> Yellow - if application.Status.ClusterStatus.HealthyTaskManagers == deployment.Taskmanager.Status.Replicas { - application.Status.ClusterStatus.Health = v1beta1.Green + if application.Status.AppStatus[0].ClusterStatus.HealthyTaskManagers == deployment.Taskmanager.Status.Replicas { + application.Status.AppStatus[0].ClusterStatus.Health = v1beta1.Green } else { - application.Status.ClusterStatus.Health = v1beta1.Yellow + application.Status.AppStatus[0].ClusterStatus.Health = v1beta1.Yellow } - return !apiequality.Semantic.DeepEqual(oldClusterStatus, application.Status.ClusterStatus), nil + return !apiequality.Semantic.DeepEqual(oldClusterStatus, application.Status.AppStatus[0].ClusterStatus), nil } func getHealthyTaskManagerCount(response *client.TaskManagersResponse) int32 { @@ -540,46 +543,49 @@ func getHealthyTaskManagerCount(response *client.TaskManagersResponse) int32 { func (f *Controller) CompareAndUpdateJobStatus(ctx context.Context, app *v1beta1.FlinkApplication, hash string) (bool, error) { // Initialize the last failing time to beginning of time if it's never been set - if app.Status.JobStatus.LastFailingTime == nil { + if len(app.Status.AppStatus) == 0 { + app.Status.AppStatus = make([]v1beta1.FlinkSubApplicationStatus, 2) + } + if app.Status.AppStatus[0].JobStatus.LastFailingTime == nil { initTime := metav1.NewTime(time.Time{}) - app.Status.JobStatus.LastFailingTime = &initTime + app.Status.AppStatus[0].JobStatus.LastFailingTime = &initTime } - oldJobStatus := app.Status.JobStatus - app.Status.JobStatus.JobID = oldJobStatus.JobID - jobResponse, err := f.flinkClient.GetJobOverview(ctx, getURLFromApp(app, hash), app.Status.JobStatus.JobID) + oldJobStatus := app.Status.AppStatus[0].JobStatus + app.Status.AppStatus[0].JobStatus.JobID = oldJobStatus.JobID + jobResponse, err := f.flinkClient.GetJobOverview(ctx, getURLFromApp(app, hash), app.Status.AppStatus[0].JobStatus.JobID) if err != nil { return false, err } - checkpoints, err := f.flinkClient.GetCheckpointCounts(ctx, getURLFromApp(app, hash), app.Status.JobStatus.JobID) + checkpoints, err := f.flinkClient.GetCheckpointCounts(ctx, getURLFromApp(app, hash), app.Status.AppStatus[0].JobStatus.JobID) if err != nil { return false, err } // Job status - app.Status.JobStatus.JobOverviewURL = getJobOverviewURL(app) - app.Status.JobStatus.State = v1beta1.JobState(jobResponse.State) + app.Status.AppStatus[0].JobStatus.JobOverviewURL = getJobOverviewURL(app) + app.Status.AppStatus[0].JobStatus.State = v1beta1.JobState(jobResponse.State) jobStartTime := metav1.NewTime(time.Unix(jobResponse.StartTime/1000, 0)) - app.Status.JobStatus.StartTime = &jobStartTime + app.Status.AppStatus[0].JobStatus.StartTime = &jobStartTime // Checkpoints status - app.Status.JobStatus.FailedCheckpointCount = checkpoints.Counts["failed"] - app.Status.JobStatus.CompletedCheckpointCount = checkpoints.Counts["completed"] - app.Status.JobStatus.JobRestartCount = checkpoints.Counts["restored"] + app.Status.AppStatus[0].JobStatus.FailedCheckpointCount = checkpoints.Counts["failed"] + app.Status.AppStatus[0].JobStatus.CompletedCheckpointCount = checkpoints.Counts["completed"] + app.Status.AppStatus[0].JobStatus.JobRestartCount = checkpoints.Counts["restored"] latestCheckpoint := checkpoints.Latest.Completed var lastCheckpointAgeSeconds int if latestCheckpoint != nil { lastCheckpointTimeMillis := metav1.NewTime(time.Unix(latestCheckpoint.LatestAckTimestamp/1000, 0)) - app.Status.JobStatus.LastCheckpointTime = &lastCheckpointTimeMillis - app.Status.JobStatus.LastCheckpointPath = latestCheckpoint.ExternalPath - lastCheckpointAgeSeconds = app.Status.JobStatus.LastCheckpointTime.Second() + app.Status.AppStatus[0].JobStatus.LastCheckpointTime = &lastCheckpointTimeMillis + app.Status.AppStatus[0].JobStatus.LastCheckpointPath = latestCheckpoint.ExternalPath + lastCheckpointAgeSeconds = app.Status.AppStatus[0].JobStatus.LastCheckpointTime.Second() } if checkpoints.Latest.Restored != nil { - app.Status.JobStatus.RestorePath = checkpoints.Latest.Restored.ExternalPath + app.Status.AppStatus[0].JobStatus.RestorePath = checkpoints.Latest.Restored.ExternalPath restoreTime := metav1.NewTime(time.Unix(checkpoints.Latest.Restored.RestoredTimeStamp/1000, 0)) - app.Status.JobStatus.RestoreTime = &restoreTime + app.Status.AppStatus[0].JobStatus.RestoreTime = &restoreTime } runningTasks := int32(0) @@ -599,29 +605,29 @@ func (f *Controller) CompareAndUpdateJobStatus(ctx context.Context, app *v1beta1 } } - app.Status.JobStatus.RunningTasks = runningTasks - app.Status.JobStatus.TotalTasks = totalTasks + app.Status.AppStatus[0].JobStatus.RunningTasks = runningTasks + app.Status.AppStatus[0].JobStatus.TotalTasks = totalTasks // Health Status for job // Job is in FAILING state --> RED // Time since last successful checkpoint > maxCheckpointTime --> YELLOW // Else --> Green - if app.Status.JobStatus.State == v1beta1.Failing || - time.Since(app.Status.JobStatus.LastFailingTime.Time) < failingIntervalThreshold || + if app.Status.AppStatus[0].JobStatus.State == v1beta1.Failing || + time.Since(app.Status.AppStatus[0].JobStatus.LastFailingTime.Time) < failingIntervalThreshold || verticesInCreated > 0 { - app.Status.JobStatus.Health = v1beta1.Red + app.Status.AppStatus[0].JobStatus.Health = v1beta1.Red } else if time.Since(time.Unix(int64(lastCheckpointAgeSeconds), 0)) < maxCheckpointTime || runningTasks < totalTasks { - app.Status.JobStatus.Health = v1beta1.Yellow + app.Status.AppStatus[0].JobStatus.Health = v1beta1.Yellow } else { - app.Status.JobStatus.Health = v1beta1.Green + app.Status.AppStatus[0].JobStatus.Health = v1beta1.Green } // Update LastFailingTime - if app.Status.JobStatus.State == v1beta1.Failing { + if app.Status.AppStatus[0].JobStatus.State == v1beta1.Failing { currTime := metav1.Now() - app.Status.JobStatus.LastFailingTime = &currTime + app.Status.AppStatus[0].JobStatus.LastFailingTime = &currTime } - return !apiequality.Semantic.DeepEqual(oldJobStatus, app.Status.JobStatus), err + return !apiequality.Semantic.DeepEqual(oldJobStatus, app.Status.AppStatus[0].JobStatus), err } diff --git a/pkg/controller/flink/flink_test.go b/pkg/controller/flink/flink_test.go index 52d15127..0c939210 100644 --- a/pkg/controller/flink/flink_test.go +++ b/pkg/controller/flink/flink_test.go @@ -66,7 +66,13 @@ func getFlinkTestApp() v1beta1.FlinkApplication { app.Spec.Parallelism = 8 app.Name = testAppName app.Namespace = testNamespace - app.Status.JobStatus.JobID = testJobID + statuses := append(app.Status.AppStatus, v1beta1.FlinkSubApplicationStatus{ + JobStatus: v1beta1.FlinkJobStatus{ + JobID: testJobID, + }, + }) + app.Status.AppStatus = statuses + //app.Status.AppStatus[0].JobStatus.JobID = testJobID app.Spec.Image = testImage app.Spec.FlinkVersion = testFlinkVersion @@ -573,7 +579,7 @@ func TestGetJobsForApplicationErr(t *testing.T) { func TestFindExternalizedCheckpoint(t *testing.T) { flinkControllerForTest := getTestFlinkController() flinkApp := getFlinkTestApp() - flinkApp.Status.JobStatus.JobID = "jobid" + flinkApp.Status.AppStatus[0].JobStatus.JobID = "jobid" mockJmClient := flinkControllerForTest.flinkClient.(*clientMock.JobManagerClient) mockJmClient.GetLatestCheckpointFunc = func(ctx context.Context, url string, jobId string) (*client.CheckpointStatistics, error) { @@ -593,10 +599,10 @@ func TestFindExternalizedCheckpoint(t *testing.T) { func TestFindExternalizedCheckpointFromStatus(t *testing.T) { flinkControllerForTest := getTestFlinkController() flinkApp := getFlinkTestApp() - flinkApp.Status.JobStatus.JobID = "jobid" - flinkApp.Status.JobStatus.LastCheckpointPath = "/tmp/checkpoint" + flinkApp.Status.AppStatus[0].JobStatus.JobID = "jobid" + flinkApp.Status.AppStatus[0].JobStatus.LastCheckpointPath = "/tmp/checkpoint" checkpointTime := metaV1.Now() - flinkApp.Status.JobStatus.LastCheckpointTime = &checkpointTime + flinkApp.Status.AppStatus[0].JobStatus.LastCheckpointTime = &checkpointTime mockJmClient := flinkControllerForTest.flinkClient.(*clientMock.JobManagerClient) mockJmClient.GetLatestCheckpointFunc = func(ctx context.Context, url string, jobId string) (*client.CheckpointStatistics, error) { @@ -662,22 +668,22 @@ func TestClusterStatusUpdated(t *testing.T) { _, err = flinkControllerForTest.CompareAndUpdateClusterStatus(context.Background(), &flinkApp, "hash") assert.Nil(t, err) - assert.Equal(t, int32(1), flinkApp.Status.ClusterStatus.NumberOfTaskSlots) - assert.Equal(t, int32(0), flinkApp.Status.ClusterStatus.AvailableTaskSlots) - assert.Equal(t, int32(1), flinkApp.Status.ClusterStatus.HealthyTaskManagers) - assert.Equal(t, v1beta1.Green, flinkApp.Status.ClusterStatus.Health) - assert.Equal(t, "app-name.lyft.xyz/#/overview", flinkApp.Status.ClusterStatus.ClusterOverviewURL) + assert.Equal(t, int32(1), flinkApp.Status.AppStatus[0].ClusterStatus.NumberOfTaskSlots) + assert.Equal(t, int32(0), flinkApp.Status.AppStatus[0].ClusterStatus.AvailableTaskSlots) + assert.Equal(t, int32(1), flinkApp.Status.AppStatus[0].ClusterStatus.HealthyTaskManagers) + assert.Equal(t, v1beta1.Green, flinkApp.Status.AppStatus[0].ClusterStatus.Health) + assert.Equal(t, "app-name.lyft.xyz/#/overview", flinkApp.Status.AppStatus[0].ClusterStatus.ClusterOverviewURL) } func TestNoClusterStatusChange(t *testing.T) { flinkControllerForTest := getTestFlinkController() flinkApp := getFlinkTestApp() - flinkApp.Status.ClusterStatus.NumberOfTaskSlots = int32(1) - flinkApp.Status.ClusterStatus.AvailableTaskSlots = int32(0) - flinkApp.Status.ClusterStatus.HealthyTaskManagers = int32(1) - flinkApp.Status.ClusterStatus.Health = v1beta1.Green - flinkApp.Status.ClusterStatus.NumberOfTaskManagers = int32(1) + flinkApp.Status.AppStatus[0].ClusterStatus.NumberOfTaskSlots = int32(1) + flinkApp.Status.AppStatus[0].ClusterStatus.AvailableTaskSlots = int32(0) + flinkApp.Status.AppStatus[0].ClusterStatus.HealthyTaskManagers = int32(1) + flinkApp.Status.AppStatus[0].ClusterStatus.Health = v1beta1.Green + flinkApp.Status.AppStatus[0].ClusterStatus.NumberOfTaskManagers = int32(1) mockK8Cluster := flinkControllerForTest.k8Cluster.(*k8mock.K8Cluster) mockK8Cluster.GetDeploymentsWithLabelFunc = func(ctx context.Context, namespace string, labelMap map[string]string) (*v1.DeploymentList, error) { tmDeployment := FetchTaskMangerDeploymentCreateObj(&flinkApp, testAppHash) @@ -766,10 +772,10 @@ func TestHealthyTaskmanagers(t *testing.T) { _, err := flinkControllerForTest.CompareAndUpdateClusterStatus(context.Background(), &flinkApp, hash) assert.Nil(t, err) - assert.Equal(t, int32(1), flinkApp.Status.ClusterStatus.NumberOfTaskSlots) - assert.Equal(t, int32(0), flinkApp.Status.ClusterStatus.AvailableTaskSlots) - assert.Equal(t, int32(0), flinkApp.Status.ClusterStatus.HealthyTaskManagers) - assert.Equal(t, v1beta1.Yellow, flinkApp.Status.ClusterStatus.Health) + assert.Equal(t, int32(1), flinkApp.Status.AppStatus[0].ClusterStatus.NumberOfTaskSlots) + assert.Equal(t, int32(0), flinkApp.Status.AppStatus[0].ClusterStatus.AvailableTaskSlots) + assert.Equal(t, int32(0), flinkApp.Status.AppStatus[0].ClusterStatus.HealthyTaskManagers) + assert.Equal(t, v1beta1.Yellow, flinkApp.Status.AppStatus[0].ClusterStatus.Health) } @@ -826,26 +832,26 @@ func TestJobStatusUpdated(t *testing.T) { }, nil } - flinkApp.Status.JobStatus.JobID = "abc" + flinkApp.Status.AppStatus[0].JobStatus.JobID = "abc" expectedTime := metaV1.NewTime(time.Unix(startTime/1000, 0)) _, err = flinkControllerForTest.CompareAndUpdateJobStatus(context.Background(), &flinkApp, "hash") assert.Nil(t, err) - assert.Equal(t, v1beta1.Running, flinkApp.Status.JobStatus.State) - assert.Equal(t, &expectedTime, flinkApp.Status.JobStatus.StartTime) - assert.Equal(t, v1beta1.Yellow, flinkApp.Status.JobStatus.Health) + assert.Equal(t, v1beta1.Running, flinkApp.Status.AppStatus[0].JobStatus.State) + assert.Equal(t, &expectedTime, flinkApp.Status.AppStatus[0].JobStatus.StartTime) + assert.Equal(t, v1beta1.Yellow, flinkApp.Status.AppStatus[0].JobStatus.Health) - assert.Equal(t, int32(0), flinkApp.Status.JobStatus.FailedCheckpointCount) - assert.Equal(t, int32(4), flinkApp.Status.JobStatus.CompletedCheckpointCount) - assert.Equal(t, int32(1), flinkApp.Status.JobStatus.JobRestartCount) - assert.Equal(t, &expectedTime, flinkApp.Status.JobStatus.RestoreTime) + assert.Equal(t, int32(0), flinkApp.Status.AppStatus[0].JobStatus.FailedCheckpointCount) + assert.Equal(t, int32(4), flinkApp.Status.AppStatus[0].JobStatus.CompletedCheckpointCount) + assert.Equal(t, int32(1), flinkApp.Status.AppStatus[0].JobStatus.JobRestartCount) + assert.Equal(t, &expectedTime, flinkApp.Status.AppStatus[0].JobStatus.RestoreTime) - assert.Equal(t, "/test/externalpath", flinkApp.Status.JobStatus.RestorePath) - assert.Equal(t, &expectedTime, flinkApp.Status.JobStatus.LastCheckpointTime) - assert.Equal(t, "app-name.lyft.xyz/#/jobs/abc", flinkApp.Status.JobStatus.JobOverviewURL) + assert.Equal(t, "/test/externalpath", flinkApp.Status.AppStatus[0].JobStatus.RestorePath) + assert.Equal(t, &expectedTime, flinkApp.Status.AppStatus[0].JobStatus.LastCheckpointTime) + assert.Equal(t, "app-name.lyft.xyz/#/jobs/abc", flinkApp.Status.AppStatus[0].JobStatus.JobOverviewURL) - assert.Equal(t, int32(2), flinkApp.Status.JobStatus.RunningTasks) - assert.Equal(t, int32(7), flinkApp.Status.JobStatus.TotalTasks) + assert.Equal(t, int32(2), flinkApp.Status.AppStatus[0].JobStatus.RunningTasks) + assert.Equal(t, int32(7), flinkApp.Status.AppStatus[0].JobStatus.TotalTasks) } @@ -860,16 +866,16 @@ func TestNoJobStatusChange(t *testing.T) { app1 := getFlinkTestApp() mockJmClient := flinkControllerForTest.flinkClient.(*clientMock.JobManagerClient) - app1.Status.JobStatus.State = v1beta1.Running - app1.Status.JobStatus.StartTime = &metaTime - app1.Status.JobStatus.LastCheckpointTime = &metaTime - app1.Status.JobStatus.CompletedCheckpointCount = int32(4) - app1.Status.JobStatus.JobRestartCount = int32(1) - app1.Status.JobStatus.FailedCheckpointCount = int32(0) - app1.Status.JobStatus.Health = v1beta1.Green - app1.Status.JobStatus.RestoreTime = &metaTime - app1.Status.JobStatus.RestorePath = "/test/externalpath" - app1.Status.JobStatus.JobOverviewURL = "" + app1.Status.AppStatus[0].JobStatus.State = v1beta1.Running + app1.Status.AppStatus[0].JobStatus.StartTime = &metaTime + app1.Status.AppStatus[0].JobStatus.LastCheckpointTime = &metaTime + app1.Status.AppStatus[0].JobStatus.CompletedCheckpointCount = int32(4) + app1.Status.AppStatus[0].JobStatus.JobRestartCount = int32(1) + app1.Status.AppStatus[0].JobStatus.FailedCheckpointCount = int32(0) + app1.Status.AppStatus[0].JobStatus.Health = v1beta1.Green + app1.Status.AppStatus[0].JobStatus.RestoreTime = &metaTime + app1.Status.AppStatus[0].JobStatus.RestorePath = "/test/externalpath" + app1.Status.AppStatus[0].JobStatus.JobOverviewURL = "" mockJmClient.GetJobOverviewFunc = func(ctx context.Context, url string, jobID string) (*client.FlinkJobOverview, error) { assert.Equal(t, url, "http://app-name-hash.ns:8081") @@ -912,8 +918,8 @@ func TestGetAndUpdateJobStatusHealth(t *testing.T) { app1 := getFlinkTestApp() mockJmClient := flinkControllerForTest.flinkClient.(*clientMock.JobManagerClient) - app1.Status.JobStatus.State = v1beta1.Failing - app1.Status.JobStatus.LastFailingTime = &lastFailedTime + app1.Status.AppStatus[0].JobStatus.State = v1beta1.Failing + app1.Status.AppStatus[0].JobStatus.LastFailingTime = &lastFailedTime mockJmClient.GetJobOverviewFunc = func(ctx context.Context, url string, jobID string) (*client.FlinkJobOverview, error) { assert.Equal(t, url, "http://app-name-hash.ns:8081") @@ -938,7 +944,7 @@ func TestGetAndUpdateJobStatusHealth(t *testing.T) { assert.Nil(t, err) // Job is in a RUNNING state but was in a FAILING state in the last 1 minute, so we expect // JobStatus.Health to be Red - assert.Equal(t, app1.Status.JobStatus.Health, v1beta1.Red) + assert.Equal(t, app1.Status.AppStatus[0].JobStatus.Health, v1beta1.Red) } diff --git a/pkg/controller/flinkapplication/flink_state_machine.go b/pkg/controller/flinkapplication/flink_state_machine.go index 06670a63..63b64fd2 100644 --- a/pkg/controller/flinkapplication/flink_state_machine.go +++ b/pkg/controller/flinkapplication/flink_state_machine.go @@ -290,9 +290,10 @@ func (s *FlinkStateMachine) handleClusterStarting(ctx context.Context, applicati } func (s *FlinkStateMachine) handleApplicationSavepointing(ctx context.Context, application *v1beta1.FlinkApplication) (bool, error) { + application.Status.AppStatus = make([]v1beta1.FlinkSubApplicationStatus, 2) // we've already savepointed (or this is our first deploy), continue on if application.Status.SavepointPath != "" || application.Status.DeployHash == "" { - application.Status.JobStatus.JobID = "" + application.Status.AppStatus[0].JobStatus.JobID = "" s.updateApplicationPhase(application, v1beta1.FlinkApplicationSubmittingJob) return statusChanged, nil } @@ -314,7 +315,7 @@ func (s *FlinkStateMachine) handleApplicationSavepointing(ctx context.Context, a } s.flinkController.LogEvent(ctx, application, corev1.EventTypeNormal, "CancellingJob", - fmt.Sprintf("Cancelling job %s with a final savepoint", application.Status.JobStatus.JobID)) + fmt.Sprintf("Cancelling job %s with a final savepoint", application.Status.AppStatus[0].JobStatus.JobID)) application.Status.SavepointTriggerID = triggerID return statusChanged, nil @@ -332,7 +333,7 @@ func (s *FlinkStateMachine) handleApplicationSavepointing(ctx context.Context, a // TODO: we should probably retry this a few times before failing s.flinkController.LogEvent(ctx, application, corev1.EventTypeWarning, "SavepointFailed", fmt.Sprintf("Failed to take savepoint for job %s: %v", - application.Status.JobStatus.JobID, savepointStatusResponse.Operation.FailureCause)) + application.Status.AppStatus[0].JobStatus.JobID, savepointStatusResponse.Operation.FailureCause)) application.Status.RetryCount = 0 s.updateApplicationPhase(application, v1beta1.FlinkApplicationRecovering) return statusChanged, nil @@ -341,7 +342,7 @@ func (s *FlinkStateMachine) handleApplicationSavepointing(ctx context.Context, a fmt.Sprintf("Canceled job with savepoint %s", savepointStatusResponse.Operation.Location)) application.Status.SavepointPath = savepointStatusResponse.Operation.Location - application.Status.JobStatus.JobID = "" + application.Status.AppStatus[0].JobStatus.JobID = "" s.updateApplicationPhase(application, v1beta1.FlinkApplicationSubmittingJob) return statusChanged, nil } @@ -352,6 +353,7 @@ func (s *FlinkStateMachine) handleApplicationSavepointing(ctx context.Context, a func (s *FlinkStateMachine) handleApplicationRecovering(ctx context.Context, app *v1beta1.FlinkApplication) (bool, error) { // we're in the middle of a deploy, and savepointing has failed in some way... we're going to try to recover // and push through if possible + app.Status.AppStatus = make([]v1beta1.FlinkSubApplicationStatus, 2) if rollback, reason := s.shouldRollback(ctx, app); rollback { // we failed to recover, attempt to rollback s.flinkController.LogEvent(ctx, app, corev1.EventTypeWarning, "RecoveryFailed", @@ -378,7 +380,7 @@ func (s *FlinkStateMachine) handleApplicationRecovering(ctx context.Context, app path, flink.HashForApplication(app))) app.Status.SavepointPath = path - app.Status.JobStatus.JobID = "" + app.Status.AppStatus[0].JobStatus.JobID = "" s.updateApplicationPhase(app, v1beta1.FlinkApplicationSubmittingJob) return statusChanged, nil } @@ -393,8 +395,8 @@ func (s *FlinkStateMachine) submitJobIfNeeded(ctx context.Context, app *v1beta1. } // Check if the job id has already been set on our application - if app.Status.JobStatus.JobID != "" { - return app.Status.JobStatus.JobID, nil + if app.Status.AppStatus[0].JobStatus.JobID != "" { + return app.Status.AppStatus[0].JobStatus.JobID, nil } // Check that there are no jobs running before starting the job @@ -456,6 +458,10 @@ func (s *FlinkStateMachine) updateGenericService(ctx context.Context, app *v1bet } func (s *FlinkStateMachine) handleSubmittingJob(ctx context.Context, app *v1beta1.FlinkApplication) (bool, error) { + if len(app.Status.AppStatus) == 0 { + app.Status.AppStatus = make([]v1beta1.FlinkSubApplicationStatus, 2) + } + if rollback, reason := s.shouldRollback(ctx, app); rollback { // Something's gone wrong; roll back s.flinkController.LogEvent(ctx, app, corev1.EventTypeWarning, "JobSubmissionFailed", @@ -477,7 +483,7 @@ func (s *FlinkStateMachine) handleSubmittingJob(ctx context.Context, app *v1beta logger.Errorf(ctx, "Updating cluster status failed with error: %v", clusterErr) } - if app.Status.JobStatus.JobID == "" { + if app.Status.AppStatus[0].JobStatus.JobID == "" { savepointPath := "" if app.Status.DeployHash == "" { // this is the first deploy, use the user-provided savepoint @@ -499,7 +505,7 @@ func (s *FlinkStateMachine) handleSubmittingJob(ctx context.Context, app *v1beta } if appJobID != "" { - app.Status.JobStatus.JobID = appJobID + app.Status.AppStatus[0].JobStatus.JobID = appJobID return statusChanged, nil } @@ -524,11 +530,11 @@ func (s *FlinkStateMachine) handleSubmittingJob(ctx context.Context, app *v1beta app.Status.DeployHash = hash app.Status.SavepointPath = "" app.Status.SavepointTriggerID = "" - app.Status.JobStatus.JarName = app.Spec.JarName - app.Status.JobStatus.Parallelism = app.Spec.Parallelism - app.Status.JobStatus.EntryClass = app.Spec.EntryClass - app.Status.JobStatus.ProgramArgs = app.Spec.ProgramArgs - app.Status.JobStatus.AllowNonRestoredState = app.Spec.AllowNonRestoredState + app.Status.AppStatus[0].JobStatus.JarName = app.Spec.JarName + app.Status.AppStatus[0].JobStatus.Parallelism = app.Spec.Parallelism + app.Status.AppStatus[0].JobStatus.EntryClass = app.Spec.EntryClass + app.Status.AppStatus[0].JobStatus.ProgramArgs = app.Spec.ProgramArgs + app.Status.AppStatus[0].JobStatus.AllowNonRestoredState = app.Spec.AllowNonRestoredState s.updateApplicationPhase(app, v1beta1.FlinkApplicationRunning) return statusChanged, nil @@ -571,9 +577,9 @@ func (s *FlinkStateMachine) handleRollingBack(ctx context.Context, app *v1beta1. // submit the old job jobID, err := s.submitJobIfNeeded(ctx, app, app.Status.DeployHash, - app.Status.JobStatus.JarName, app.Status.JobStatus.Parallelism, - app.Status.JobStatus.EntryClass, app.Status.JobStatus.ProgramArgs, - app.Status.JobStatus.AllowNonRestoredState, + app.Status.AppStatus[0].JobStatus.JarName, app.Status.AppStatus[0].JobStatus.Parallelism, + app.Status.AppStatus[0].JobStatus.EntryClass, app.Status.AppStatus[0].JobStatus.ProgramArgs, + app.Status.AppStatus[0].JobStatus.AllowNonRestoredState, app.Status.SavepointPath) // set rollbackHash @@ -583,7 +589,7 @@ func (s *FlinkStateMachine) handleRollingBack(ctx context.Context, app *v1beta1. } if jobID != "" { - app.Status.JobStatus.JobID = jobID + app.Status.AppStatus[0].JobStatus.JobID = jobID app.Status.SavepointPath = "" app.Status.SavepointTriggerID = "" // move to the deploy failed state @@ -596,6 +602,9 @@ func (s *FlinkStateMachine) handleRollingBack(ctx context.Context, app *v1beta1. // Check if the application is Running. // This is a stable state. Keep monitoring if the underlying CRD reflects the Flink cluster func (s *FlinkStateMachine) handleApplicationRunning(ctx context.Context, application *v1beta1.FlinkApplication) (bool, error) { + if len(application.Status.AppStatus) == 0 { + application.Status.AppStatus = make([]v1beta1.FlinkSubApplicationStatus, 2) + } cur, err := s.flinkController.GetCurrentDeploymentsForApp(ctx, application) if err != nil { return statusUnchanged, err @@ -617,7 +626,7 @@ func (s *FlinkStateMachine) handleApplicationRunning(ctx context.Context, applic } if job == nil { - logger.Warnf(ctx, "Could not find active job {}", application.Status.JobStatus.JobID) + logger.Warnf(ctx, "Could not find active job {}", application.Status.AppStatus[0].JobStatus.JobID) } else { logger.Debugf(ctx, "Application running with job %v", job.JobID) } diff --git a/pkg/controller/flinkapplication/flink_state_machine_test.go b/pkg/controller/flinkapplication/flink_state_machine_test.go index 70b3fd0c..09ce56ab 100644 --- a/pkg/controller/flinkapplication/flink_state_machine_test.go +++ b/pkg/controller/flinkapplication/flink_state_machine_test.go @@ -268,7 +268,7 @@ func TestRestoreFromExternalizedCheckpoint(t *testing.T) { assert.Nil(t, err) } -func TestSubmittingToRunning(t *testing.T) { +func TestSubmittingToRunning(t *testing.T) { jobID := "j1" app := v1beta1.FlinkApplication{ @@ -364,14 +364,14 @@ func TestSubmittingToRunning(t *testing.T) { mockK8Cluster.UpdateStatusFunc = func(ctx context.Context, object runtime.Object) error { if statusUpdateCount == 0 { application := object.(*v1beta1.FlinkApplication) - assert.Equal(t, jobID, application.Status.JobStatus.JobID) + assert.Equal(t, jobID, application.Status.AppStatus[0].JobStatus.JobID) } else if statusUpdateCount == 1 { application := object.(*v1beta1.FlinkApplication) assert.Equal(t, appHash, application.Status.DeployHash) - assert.Equal(t, app.Spec.JarName, app.Status.JobStatus.JarName) - assert.Equal(t, app.Spec.Parallelism, app.Status.JobStatus.Parallelism) - assert.Equal(t, app.Spec.EntryClass, app.Status.JobStatus.EntryClass) - assert.Equal(t, app.Spec.ProgramArgs, app.Status.JobStatus.ProgramArgs) + assert.Equal(t, app.Spec.JarName, app.Status.AppStatus[0].JobStatus.JarName) + assert.Equal(t, app.Spec.Parallelism, app.Status.AppStatus[0].JobStatus.Parallelism) + assert.Equal(t, app.Spec.EntryClass, app.Status.AppStatus[0].JobStatus.EntryClass) + assert.Equal(t, app.Spec.ProgramArgs, app.Status.AppStatus[0].JobStatus.ProgramArgs) assert.Equal(t, v1beta1.FlinkApplicationRunning, application.Status.Phase) } statusUpdateCount++ @@ -451,11 +451,15 @@ func TestRollingBack(t *testing.T) { Phase: v1beta1.FlinkApplicationRollingBackJob, DeployHash: "old-hash", SavepointPath: "file:///savepoint", - JobStatus: v1beta1.FlinkJobStatus{ - JarName: "old-job.jar", - Parallelism: 10, - EntryClass: "com.my.OldClass", - ProgramArgs: "--no-test", + AppStatus: []v1beta1.FlinkSubApplicationStatus{ + v1beta1.FlinkSubApplicationStatus { + JobStatus:v1beta1.FlinkJobStatus{ + JarName: "old-job.jar", + Parallelism: 10, + EntryClass: "com.my.OldClass", + ProgramArgs: "--no-test", + }, + }, }, }, } @@ -474,11 +478,11 @@ func TestRollingBack(t *testing.T) { startCalled = true assert.Equal(t, "old-hash", hash) - assert.Equal(t, app.Status.JobStatus.JarName, jarName) - assert.Equal(t, app.Status.JobStatus.Parallelism, parallelism) - assert.Equal(t, app.Status.JobStatus.EntryClass, entryClass) - assert.Equal(t, app.Status.JobStatus.ProgramArgs, programArgs) - assert.Equal(t, app.Status.JobStatus.AllowNonRestoredState, allowNonRestoredState) + assert.Equal(t, app.Status.AppStatus[0].JobStatus.JarName, jarName) + assert.Equal(t, app.Status.AppStatus[0].JobStatus.Parallelism, parallelism) + assert.Equal(t, app.Status.AppStatus[0].JobStatus.EntryClass, entryClass) + assert.Equal(t, app.Status.AppStatus[0].JobStatus.ProgramArgs, programArgs) + assert.Equal(t, app.Status.AppStatus[0].JobStatus.AllowNonRestoredState, allowNonRestoredState) assert.Equal(t, app.Status.SavepointPath, savepointPath) return jobID, nil } @@ -629,8 +633,12 @@ func TestDeleteWithSavepoint(t *testing.T) { Status: v1beta1.FlinkApplicationStatus{ Phase: v1beta1.FlinkApplicationDeleting, DeployHash: "deployhash", - JobStatus: v1beta1.FlinkJobStatus{ - JobID: jobID, + AppStatus: []v1beta1.FlinkSubApplicationStatus{ + v1beta1.FlinkSubApplicationStatus{ + JobStatus: v1beta1.FlinkJobStatus{ + JobID: jobID, + }, + }, }, }, } @@ -741,9 +749,14 @@ func TestDeleteWithSavepointAndFinishedJob(t *testing.T) { Phase: v1beta1.FlinkApplicationDeleting, DeployHash: "deployhash", SavepointPath: "file:///savepoint", - JobStatus: v1beta1.FlinkJobStatus{ - JobID: jobID, + AppStatus: []v1beta1.FlinkSubApplicationStatus{ + v1beta1.FlinkSubApplicationStatus{ + JobStatus: v1beta1.FlinkJobStatus{ + JobID: jobID, + }, + }, }, + }, } @@ -788,9 +801,14 @@ func TestDeleteWithForceCancel(t *testing.T) { }, Status: v1beta1.FlinkApplicationStatus{ Phase: v1beta1.FlinkApplicationDeleting, - JobStatus: v1beta1.FlinkJobStatus{ - JobID: jobID, + AppStatus: []v1beta1.FlinkSubApplicationStatus{ + v1beta1.FlinkSubApplicationStatus{ + JobStatus: v1beta1.FlinkJobStatus{ + JobID: jobID, + }, + }, }, + DeployHash: "deployhash", }, } From 3fa79118b9679bd0220bafd4d8d5a6c32f6e0a55 Mon Sep 17 00:00:00 2001 From: glaksh100 Date: Fri, 21 Feb 2020 10:20:40 -0800 Subject: [PATCH 02/41] Create setup for blue green deploys --- pkg/apis/app/v1beta1/types.go | 30 ++++++++------- pkg/apis/app/v1beta1/zz_generated.deepcopy.go | 38 +++++++++---------- pkg/controller/flink/flink.go | 4 +- pkg/controller/flink/flink_test.go | 2 +- .../flinkapplication/flink_state_machine.go | 20 +++++++--- .../flink_state_machine_test.go | 25 ++++++------ 6 files changed, 66 insertions(+), 53 deletions(-) diff --git a/pkg/apis/app/v1beta1/types.go b/pkg/apis/app/v1beta1/types.go index a1443da6..a31a7fab 100644 --- a/pkg/apis/app/v1beta1/types.go +++ b/pkg/apis/app/v1beta1/types.go @@ -165,18 +165,18 @@ type FlinkJobStatus struct { } type FlinkApplicationStatus struct { - Phase FlinkApplicationPhase `json:"phase"` - StartedAt *metav1.Time `json:"startedAt,omitempty"` - LastUpdatedAt *metav1.Time `json:"lastUpdatedAt,omitempty"` - Reason string `json:"reason,omitempty"` - AppStatus []FlinkSubApplicationStatus `json:"appStatus,omitempty"` - FailedDeployHash string `json:"failedDeployHash,omitempty"` - RollbackHash string `json:"rollbackHash,omitempty"` - DeployHash string `json:"deployHash"` - SavepointTriggerID string `json:"savepointTriggerId,omitempty"` - SavepointPath string `json:"savepointPath,omitempty"` - RetryCount int32 `json:"retryCount,omitempty"` - LastSeenError *FlinkApplicationError `json:"lastSeenError,omitempty"` + Phase FlinkApplicationPhase `json:"phase"` + StartedAt *metav1.Time `json:"startedAt,omitempty"` + LastUpdatedAt *metav1.Time `json:"lastUpdatedAt,omitempty"` + Reason string `json:"reason,omitempty"` + AppStatus []FlinkApplicationVersionStatus `json:"appStatus,omitempty"` + FailedDeployHash string `json:"failedDeployHash,omitempty"` + RollbackHash string `json:"rollbackHash,omitempty"` + DeployHash string `json:"deployHash"` + SavepointTriggerID string `json:"savepointTriggerId,omitempty"` + SavepointPath string `json:"savepointPath,omitempty"` + RetryCount int32 `json:"retryCount,omitempty"` + LastSeenError *FlinkApplicationError `json:"lastSeenError,omitempty"` } type FlinkApplicationVersion string @@ -186,7 +186,7 @@ const ( GreenFlinkApplication FlinkApplicationVersion = "Green" ) -type FlinkSubApplicationStatus struct { +type FlinkApplicationVersionStatus struct { Version FlinkApplicationVersion ClusterStatus FlinkClusterStatus JobStatus FlinkJobStatus @@ -234,6 +234,8 @@ const ( FlinkApplicationRecovering FlinkApplicationPhase = "Recovering" FlinkApplicationRollingBackJob FlinkApplicationPhase = "RollingBackJob" FlinkApplicationDeployFailed FlinkApplicationPhase = "DeployFailed" + FlinkApplicationDualRunning FlinkApplicationPhase = "DualRunning" + FlinkApplicationTeardown FlinkApplicationPhase = "Teardown" ) var FlinkApplicationPhases = []FlinkApplicationPhase{ @@ -247,6 +249,8 @@ var FlinkApplicationPhases = []FlinkApplicationPhase{ FlinkApplicationRecovering, FlinkApplicationDeployFailed, FlinkApplicationRollingBackJob, + FlinkApplicationDualRunning, + FlinkApplicationTeardown, } func IsRunningPhase(phase FlinkApplicationPhase) bool { diff --git a/pkg/apis/app/v1beta1/zz_generated.deepcopy.go b/pkg/apis/app/v1beta1/zz_generated.deepcopy.go index 5a90b634..5f7df456 100644 --- a/pkg/apis/app/v1beta1/zz_generated.deepcopy.go +++ b/pkg/apis/app/v1beta1/zz_generated.deepcopy.go @@ -207,7 +207,7 @@ func (in *FlinkApplicationStatus) DeepCopyInto(out *FlinkApplicationStatus) { } if in.AppStatus != nil { in, out := &in.AppStatus, &out.AppStatus - *out = make([]FlinkSubApplicationStatus, len(*in)) + *out = make([]FlinkApplicationVersionStatus, len(*in)) for i := range *in { (*in)[i].DeepCopyInto(&(*out)[i]) } @@ -230,6 +230,24 @@ func (in *FlinkApplicationStatus) DeepCopy() *FlinkApplicationStatus { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *FlinkApplicationVersionStatus) DeepCopyInto(out *FlinkApplicationVersionStatus) { + *out = *in + out.ClusterStatus = in.ClusterStatus + in.JobStatus.DeepCopyInto(&out.JobStatus) + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new FlinkApplicationVersionStatus. +func (in *FlinkApplicationVersionStatus) DeepCopy() *FlinkApplicationVersionStatus { + if in == nil { + return nil + } + out := new(FlinkApplicationVersionStatus) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *FlinkClusterStatus) DeepCopyInto(out *FlinkClusterStatus) { *out = *in @@ -278,24 +296,6 @@ func (in *FlinkJobStatus) DeepCopy() *FlinkJobStatus { return out } -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *FlinkSubApplicationStatus) DeepCopyInto(out *FlinkSubApplicationStatus) { - *out = *in - out.ClusterStatus = in.ClusterStatus - in.JobStatus.DeepCopyInto(&out.JobStatus) - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new FlinkSubApplicationStatus. -func (in *FlinkSubApplicationStatus) DeepCopy() *FlinkSubApplicationStatus { - if in == nil { - return nil - } - out := new(FlinkSubApplicationStatus) - in.DeepCopyInto(out) - return out -} - // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *JobManagerConfig) DeepCopyInto(out *JobManagerConfig) { *out = *in diff --git a/pkg/controller/flink/flink.go b/pkg/controller/flink/flink.go index d9e2f0d8..d3bd7d62 100644 --- a/pkg/controller/flink/flink.go +++ b/pkg/controller/flink/flink.go @@ -488,7 +488,7 @@ func (f *Controller) CompareAndUpdateClusterStatus(ctx context.Context, applicat // Error retrieving cluster / taskmanagers overview (after startup/readiness) --> Red // If there is an error this loop will return with Health set to Red if len(application.Status.AppStatus) == 0 { - application.Status.AppStatus = make([]v1beta1.FlinkSubApplicationStatus, 2) + application.Status.AppStatus = make([]v1beta1.FlinkApplicationVersionStatus, 2) } oldClusterStatus := application.Status.AppStatus[0].ClusterStatus application.Status.AppStatus[0].ClusterStatus.Health = v1beta1.Red @@ -544,7 +544,7 @@ func getHealthyTaskManagerCount(response *client.TaskManagersResponse) int32 { func (f *Controller) CompareAndUpdateJobStatus(ctx context.Context, app *v1beta1.FlinkApplication, hash string) (bool, error) { // Initialize the last failing time to beginning of time if it's never been set if len(app.Status.AppStatus) == 0 { - app.Status.AppStatus = make([]v1beta1.FlinkSubApplicationStatus, 2) + app.Status.AppStatus = make([]v1beta1.FlinkApplicationVersionStatus, 2) } if app.Status.AppStatus[0].JobStatus.LastFailingTime == nil { initTime := metav1.NewTime(time.Time{}) diff --git a/pkg/controller/flink/flink_test.go b/pkg/controller/flink/flink_test.go index 0c939210..8ab92439 100644 --- a/pkg/controller/flink/flink_test.go +++ b/pkg/controller/flink/flink_test.go @@ -66,7 +66,7 @@ func getFlinkTestApp() v1beta1.FlinkApplication { app.Spec.Parallelism = 8 app.Name = testAppName app.Namespace = testNamespace - statuses := append(app.Status.AppStatus, v1beta1.FlinkSubApplicationStatus{ + statuses := append(app.Status.AppStatus, v1beta1.FlinkApplicationVersionStatus{ JobStatus: v1beta1.FlinkJobStatus{ JobID: testJobID, }, diff --git a/pkg/controller/flinkapplication/flink_state_machine.go b/pkg/controller/flinkapplication/flink_state_machine.go index 63b64fd2..d753c52f 100644 --- a/pkg/controller/flinkapplication/flink_state_machine.go +++ b/pkg/controller/flinkapplication/flink_state_machine.go @@ -153,6 +153,8 @@ func (s *FlinkStateMachine) handle(ctx context.Context, application *v1beta1.Fli updateApplication := false updateLastSeenError := false appPhase := application.Status.Phase + // initialize application status array if it's not yet been initialized + initializeAppStatusIfEmpty(application) if !application.ObjectMeta.DeletionTimestamp.IsZero() && appPhase != v1beta1.FlinkApplicationDeleting { s.updateApplicationPhase(application, v1beta1.FlinkApplicationDeleting) @@ -270,7 +272,6 @@ func (s *FlinkStateMachine) handleClusterStarting(ctx context.Context, applicati "Flink cluster failed to become available: %s", reason)) return s.deployFailed(ctx, application) } - // Wait for all to be running clusterReady, err := s.flinkController.IsClusterReady(ctx, application) if err != nil || !clusterReady { @@ -289,8 +290,18 @@ func (s *FlinkStateMachine) handleClusterStarting(ctx context.Context, applicati return statusChanged, nil } +func initializeAppStatusIfEmpty(application *v1beta1.FlinkApplication) { + // initialize the app status array to include 2 statuses in case of blue green deploys + if len(application.Status.AppStatus) == 0 { + if application.Spec.DeploymentMode == v1beta1.DeploymentModeBlueGreen { + application.Status.AppStatus = make([]v1beta1.FlinkApplicationVersionStatus, 2) + } else { + application.Status.AppStatus = make([]v1beta1.FlinkApplicationVersionStatus, 1) + } + } +} + func (s *FlinkStateMachine) handleApplicationSavepointing(ctx context.Context, application *v1beta1.FlinkApplication) (bool, error) { - application.Status.AppStatus = make([]v1beta1.FlinkSubApplicationStatus, 2) // we've already savepointed (or this is our first deploy), continue on if application.Status.SavepointPath != "" || application.Status.DeployHash == "" { application.Status.AppStatus[0].JobStatus.JobID = "" @@ -353,7 +364,6 @@ func (s *FlinkStateMachine) handleApplicationSavepointing(ctx context.Context, a func (s *FlinkStateMachine) handleApplicationRecovering(ctx context.Context, app *v1beta1.FlinkApplication) (bool, error) { // we're in the middle of a deploy, and savepointing has failed in some way... we're going to try to recover // and push through if possible - app.Status.AppStatus = make([]v1beta1.FlinkSubApplicationStatus, 2) if rollback, reason := s.shouldRollback(ctx, app); rollback { // we failed to recover, attempt to rollback s.flinkController.LogEvent(ctx, app, corev1.EventTypeWarning, "RecoveryFailed", @@ -459,7 +469,7 @@ func (s *FlinkStateMachine) updateGenericService(ctx context.Context, app *v1bet func (s *FlinkStateMachine) handleSubmittingJob(ctx context.Context, app *v1beta1.FlinkApplication) (bool, error) { if len(app.Status.AppStatus) == 0 { - app.Status.AppStatus = make([]v1beta1.FlinkSubApplicationStatus, 2) + app.Status.AppStatus = make([]v1beta1.FlinkApplicationVersionStatus, 2) } if rollback, reason := s.shouldRollback(ctx, app); rollback { @@ -603,7 +613,7 @@ func (s *FlinkStateMachine) handleRollingBack(ctx context.Context, app *v1beta1. // This is a stable state. Keep monitoring if the underlying CRD reflects the Flink cluster func (s *FlinkStateMachine) handleApplicationRunning(ctx context.Context, application *v1beta1.FlinkApplication) (bool, error) { if len(application.Status.AppStatus) == 0 { - application.Status.AppStatus = make([]v1beta1.FlinkSubApplicationStatus, 2) + application.Status.AppStatus = make([]v1beta1.FlinkApplicationVersionStatus, 2) } cur, err := s.flinkController.GetCurrentDeploymentsForApp(ctx, application) if err != nil { diff --git a/pkg/controller/flinkapplication/flink_state_machine_test.go b/pkg/controller/flinkapplication/flink_state_machine_test.go index 09ce56ab..3a54e024 100644 --- a/pkg/controller/flinkapplication/flink_state_machine_test.go +++ b/pkg/controller/flinkapplication/flink_state_machine_test.go @@ -268,7 +268,7 @@ func TestRestoreFromExternalizedCheckpoint(t *testing.T) { assert.Nil(t, err) } -func TestSubmittingToRunning(t *testing.T) { +func TestSubmittingToRunning(t *testing.T) { jobID := "j1" app := v1beta1.FlinkApplication{ @@ -451,15 +451,15 @@ func TestRollingBack(t *testing.T) { Phase: v1beta1.FlinkApplicationRollingBackJob, DeployHash: "old-hash", SavepointPath: "file:///savepoint", - AppStatus: []v1beta1.FlinkSubApplicationStatus{ - v1beta1.FlinkSubApplicationStatus { - JobStatus:v1beta1.FlinkJobStatus{ + AppStatus: []v1beta1.FlinkApplicationVersionStatus{ + v1beta1.FlinkApplicationVersionStatus{ + JobStatus: v1beta1.FlinkJobStatus{ JarName: "old-job.jar", Parallelism: 10, EntryClass: "com.my.OldClass", ProgramArgs: "--no-test", }, - }, + }, }, }, } @@ -633,9 +633,9 @@ func TestDeleteWithSavepoint(t *testing.T) { Status: v1beta1.FlinkApplicationStatus{ Phase: v1beta1.FlinkApplicationDeleting, DeployHash: "deployhash", - AppStatus: []v1beta1.FlinkSubApplicationStatus{ - v1beta1.FlinkSubApplicationStatus{ - JobStatus: v1beta1.FlinkJobStatus{ + AppStatus: []v1beta1.FlinkApplicationVersionStatus{ + v1beta1.FlinkApplicationVersionStatus{ + JobStatus: v1beta1.FlinkJobStatus{ JobID: jobID, }, }, @@ -749,14 +749,13 @@ func TestDeleteWithSavepointAndFinishedJob(t *testing.T) { Phase: v1beta1.FlinkApplicationDeleting, DeployHash: "deployhash", SavepointPath: "file:///savepoint", - AppStatus: []v1beta1.FlinkSubApplicationStatus{ - v1beta1.FlinkSubApplicationStatus{ + AppStatus: []v1beta1.FlinkApplicationVersionStatus{ + v1beta1.FlinkApplicationVersionStatus{ JobStatus: v1beta1.FlinkJobStatus{ JobID: jobID, }, }, }, - }, } @@ -801,8 +800,8 @@ func TestDeleteWithForceCancel(t *testing.T) { }, Status: v1beta1.FlinkApplicationStatus{ Phase: v1beta1.FlinkApplicationDeleting, - AppStatus: []v1beta1.FlinkSubApplicationStatus{ - v1beta1.FlinkSubApplicationStatus{ + AppStatus: []v1beta1.FlinkApplicationVersionStatus{ + v1beta1.FlinkApplicationVersionStatus{ JobStatus: v1beta1.FlinkJobStatus{ JobID: jobID, }, From a8fbe00351d0314c8b9f6dbb1f15eb9f14556b03 Mon Sep 17 00:00:00 2001 From: glaksh100 Date: Wed, 26 Feb 2020 17:37:44 -0800 Subject: [PATCH 03/41] [WIP] Setup status sub-resource for blue green deploys --- deploy/crd.yaml | 14 +- integ/checkpoint_failure_test.go | 4 +- integ/simple_test.go | 18 +-- integ/utils/utils.go | 2 +- pkg/apis/app/v1beta1/types.go | 34 ++-- pkg/apis/app/v1beta1/zz_generated.deepcopy.go | 4 +- pkg/controller/flink/flink.go | 150 +++++++++++------- pkg/controller/flink/flink_test.go | 96 +++++------ pkg/controller/flink/mock/mock_flink.go | 60 +++++++ .../flinkapplication/flink_state_machine.go | 67 ++++---- .../flink_state_machine_test.go | 28 ++-- 11 files changed, 292 insertions(+), 185 deletions(-) diff --git a/deploy/crd.yaml b/deploy/crd.yaml index 209e9782..e5200555 100644 --- a/deploy/crd.yaml +++ b/deploy/crd.yaml @@ -412,26 +412,30 @@ spec: type: string description: The current state machine phase for this FlinkApplication JSONPath: .status.phase + - name: Application Version + type: string + description: The version of the Flink cluster + JSONPath: .status.appStatus[*].clusterStatus.health - name: Cluster Health type: string description: The health of the Flink cluster - JSONPath: .status.clusterStatus.health + JSONPath: .status.appStatus[*].clusterStatus.health - name: Job Health type: string description: The health of the Flink job - JSONPath: .status.jobStatus.health + JSONPath: .status.appStatus[*].jobStatus.health - name: Healthy TMs type: string - JSONPath: ".status.clusterStatus.healthyTaskManagers" + JSONPath: .status.appStatus[*].clusterStatus.healthyTaskManagers priority: 1 - name: Total TMs type: string - JSONPath: ".status.clusterStatus.numberOfTaskManagers" + JSONPath: .status.appStatus[*].clusterStatus.numberOfTaskManagers priority: 1 - name: Job Restarts type: integer description: Number of times the job has restarted - JSONPath: .status.jobStatus.jobRestartCount + JSONPath: .status.appStatus[*].jobStatus.jobRestartCount - name: Age type: date JSONPath: .metadata.creationTimestamp diff --git a/integ/checkpoint_failure_test.go b/integ/checkpoint_failure_test.go index 0a41fc9e..c0c11c68 100644 --- a/integ/checkpoint_failure_test.go +++ b/integ/checkpoint_failure_test.go @@ -45,9 +45,9 @@ func failingJobTest(s *IntegSuite, c *C, testName string, causeFailure func()) { // And the job should not have been updated newApp, err := s.Util.GetFlinkApplication(config.Name) c.Assert(err, IsNil) - c.Assert(newApp.Status.AppStatus[0].JobStatus.JobID, Equals, app.Status.AppStatus[0].JobStatus.JobID) + c.Assert(newApp.Status.ApplicationStatus[0].JobStatus.JobID, Equals, app.Status.ApplicationStatus[0].JobStatus.JobID) - endpoint := fmt.Sprintf("jobs/%s", app.Status.AppStatus[0].JobStatus.JobID) + endpoint := fmt.Sprintf("jobs/%s", app.Status.ApplicationStatus[0].JobStatus.JobID) _, err = s.Util.FlinkAPIGet(app, endpoint) c.Assert(err, IsNil) diff --git a/integ/simple_test.go b/integ/simple_test.go index 1d4008c8..bcf6c5da 100644 --- a/integ/simple_test.go +++ b/integ/simple_test.go @@ -28,12 +28,12 @@ func updateAndValidate(c *C, s *IntegSuite, name string, updateFn func(app *v1be // check that it really updated newApp, err := s.Util.GetFlinkApplication(name) c.Assert(err, IsNil) - c.Assert(newApp.Status.AppStatus[0].JobStatus.JobID, Not(Equals), app.Status.AppStatus[0].JobStatus.JobID) + c.Assert(newApp.Status.ApplicationStatus[0].JobStatus.JobID, Not(Equals), app.Status.ApplicationStatus[0].JobStatus.JobID) log.Info("New job started successfully") // check that we savepointed and restored correctly - endpoint := fmt.Sprintf("jobs/%s/checkpoints", newApp.Status.AppStatus[0].JobStatus.JobID) + endpoint := fmt.Sprintf("jobs/%s/checkpoints", newApp.Status.ApplicationStatus[0].JobStatus.JobID) res, err := s.Util.FlinkAPIGet(newApp, endpoint) c.Assert(err, IsNil) @@ -140,13 +140,13 @@ func (s *IntegSuite) TestSimple(c *C) { c.Assert(s.Util.WaitForAllTasksRunning(newApp.Name), IsNil) // the job id should have changed - jobID := newApp.Status.AppStatus[0].JobStatus.JobID + jobID := newApp.Status.ApplicationStatus[0].JobStatus.JobID newApp, err = s.Util.GetFlinkApplication(newApp.Name) c.Assert(err, IsNil) - c.Assert(newApp.Status.AppStatus[0].JobStatus.JobID, Not(Equals), jobID) + c.Assert(newApp.Status.ApplicationStatus[0].JobStatus.JobID, Not(Equals), jobID) // we should have restored from our savepoint - endpoint := fmt.Sprintf("jobs/%s/checkpoints", newApp.Status.AppStatus[0].JobStatus.JobID) + endpoint := fmt.Sprintf("jobs/%s/checkpoints", newApp.Status.ApplicationStatus[0].JobStatus.JobID) res, err := s.Util.FlinkAPIGet(newApp, endpoint) c.Assert(err, IsNil) @@ -191,7 +191,7 @@ func (s *IntegSuite) TestSimple(c *C) { log.Info("User cancelled deploy. Job is in deploy failed, waiting for tasks to start") // but the job should still be running - c.Assert(newApp.Status.AppStatus[0].JobStatus.State, Equals, v1beta1.Running) + c.Assert(newApp.Status.ApplicationStatus[0].JobStatus.State, Equals, v1beta1.Running) log.Info("Attempting to roll forward with fix") // Fixing update @@ -225,7 +225,7 @@ func (s *IntegSuite) TestSimple(c *C) { jobList := jobMap["jobs"].([]interface{}) for _, j := range jobList { job := j.(map[string]interface{}) - if job["id"] == app.Status.AppStatus[0].JobStatus.JobID { + if job["id"] == app.Status.ApplicationStatus[0].JobStatus.JobID { return job } } @@ -284,7 +284,7 @@ func (s *IntegSuite) TestRecovery(c *C) { app, err := s.Util.GetFlinkApplication(config.Name) c.Assert(err, IsNil) - endpoint := fmt.Sprintf("jobs/%s/checkpoints", app.Status.AppStatus[0].JobStatus.JobID) + endpoint := fmt.Sprintf("jobs/%s/checkpoints", app.Status.ApplicationStatus[0].JobStatus.JobID) for { res, err := s.Util.FlinkAPIGet(app, endpoint) c.Assert(err, IsNil) @@ -324,7 +324,7 @@ func (s *IntegSuite) TestRecovery(c *C) { // wait until the new job is launched newApp, err := s.Util.GetFlinkApplication(config.Name) c.Assert(err, IsNil) - if newApp.Status.AppStatus[0].JobStatus.JobID != app.Status.AppStatus[0].JobStatus.JobID { + if newApp.Status.ApplicationStatus[0].JobStatus.JobID != app.Status.ApplicationStatus[0].JobStatus.JobID { break } time.Sleep(100 * time.Millisecond) diff --git a/integ/utils/utils.go b/integ/utils/utils.go index e47dea43..80c52f79 100644 --- a/integ/utils/utils.go +++ b/integ/utils/utils.go @@ -429,7 +429,7 @@ func (f *TestUtil) WaitForAllTasksRunning(name string) error { return err } - endpoint := fmt.Sprintf("jobs/%s", flinkApp.Status.AppStatus[0].JobStatus.JobID) + endpoint := fmt.Sprintf("jobs/%s", flinkApp.Status.ApplicationStatus[0].JobStatus.JobID) for { res, err := f.FlinkAPIGet(flinkApp, endpoint) if err != nil { diff --git a/pkg/apis/app/v1beta1/types.go b/pkg/apis/app/v1beta1/types.go index a31a7fab..aacbb224 100644 --- a/pkg/apis/app/v1beta1/types.go +++ b/pkg/apis/app/v1beta1/types.go @@ -165,18 +165,22 @@ type FlinkJobStatus struct { } type FlinkApplicationStatus struct { - Phase FlinkApplicationPhase `json:"phase"` - StartedAt *metav1.Time `json:"startedAt,omitempty"` - LastUpdatedAt *metav1.Time `json:"lastUpdatedAt,omitempty"` - Reason string `json:"reason,omitempty"` - AppStatus []FlinkApplicationVersionStatus `json:"appStatus,omitempty"` - FailedDeployHash string `json:"failedDeployHash,omitempty"` - RollbackHash string `json:"rollbackHash,omitempty"` - DeployHash string `json:"deployHash"` - SavepointTriggerID string `json:"savepointTriggerId,omitempty"` - SavepointPath string `json:"savepointPath,omitempty"` - RetryCount int32 `json:"retryCount,omitempty"` - LastSeenError *FlinkApplicationError `json:"lastSeenError,omitempty"` + Phase FlinkApplicationPhase `json:"phase"` + StartedAt *metav1.Time `json:"startedAt,omitempty"` + LastUpdatedAt *metav1.Time `json:"lastUpdatedAt,omitempty"` + Reason string `json:"reason,omitempty"` + DesiredApplicationCount int32 `json:"desiredApplicationCount,omitempty"` + RunningJobs int32 `json:"runningJobs,omitempty"` + DeployVersion string `json:"deployVersion,omitempty"` + UpdatingVersion string `json:"updatingVersion,omitempty"` + ApplicationStatus []FlinkApplicationVersionStatus `json:"appStatus,omitempty"` + FailedDeployHash string `json:"failedDeployHash,omitempty"` + RollbackHash string `json:"rollbackHash,omitempty"` + DeployHash string `json:"deployHash"` + SavepointTriggerID string `json:"savepointTriggerId,omitempty"` + SavepointPath string `json:"savepointPath,omitempty"` + RetryCount int32 `json:"retryCount,omitempty"` + LastSeenError *FlinkApplicationError `json:"lastSeenError,omitempty"` } type FlinkApplicationVersion string @@ -187,9 +191,9 @@ const ( ) type FlinkApplicationVersionStatus struct { - Version FlinkApplicationVersion - ClusterStatus FlinkClusterStatus - JobStatus FlinkJobStatus + Version FlinkApplicationVersion `json:"appVersion,omitempty"` + ClusterStatus FlinkClusterStatus `json:"clusterStatus,omitempty"` + JobStatus FlinkJobStatus `json:"jobStatus,omitempty"` } func (in *FlinkApplicationStatus) GetPhase() FlinkApplicationPhase { diff --git a/pkg/apis/app/v1beta1/zz_generated.deepcopy.go b/pkg/apis/app/v1beta1/zz_generated.deepcopy.go index 5f7df456..9419f91a 100644 --- a/pkg/apis/app/v1beta1/zz_generated.deepcopy.go +++ b/pkg/apis/app/v1beta1/zz_generated.deepcopy.go @@ -205,8 +205,8 @@ func (in *FlinkApplicationStatus) DeepCopyInto(out *FlinkApplicationStatus) { in, out := &in.LastUpdatedAt, &out.LastUpdatedAt *out = (*in).DeepCopy() } - if in.AppStatus != nil { - in, out := &in.AppStatus, &out.AppStatus + if in.ApplicationStatus != nil { + in, out := &in.ApplicationStatus, &out.ApplicationStatus *out = make([]FlinkApplicationVersionStatus, len(*in)) for i := range *in { (*in)[i].DeepCopyInto(&(*out)[i]) diff --git a/pkg/controller/flink/flink.go b/pkg/controller/flink/flink.go index d3bd7d62..2e1f3803 100644 --- a/pkg/controller/flink/flink.go +++ b/pkg/controller/flink/flink.go @@ -27,6 +27,7 @@ import ( const proxyURL = "http://localhost:%d/api/v1/namespaces/%s/services/%s:8081/proxy" const port = 8081 +const indexOffset = 1 // If the last hearbeat from a taskmanager was more than taskManagerHeartbeatThreshold, the task // manager is considered unhealthy. @@ -92,6 +93,15 @@ type ControllerInterface interface { // Compares and updates new job status with current job status // Returns true if there is a change in JobStatus CompareAndUpdateJobStatus(ctx context.Context, app *v1beta1.FlinkApplication, hash string) (bool, error) + + GetLatestClusterStatus(ctx context.Context, app *v1beta1.FlinkApplication) v1beta1.FlinkClusterStatus + + GetLatestJobStatus(ctx context.Context, app *v1beta1.FlinkApplication) v1beta1.FlinkJobStatus + + GetLatestJobID(ctx context.Context, app *v1beta1.FlinkApplication) string + + UpdateLatestJobID(ctx context.Context, app *v1beta1.FlinkApplication, jobID string) + GetDeployedJobID(ctx context.Context, application *v1beta1.FlinkApplication) string } func NewController(k8sCluster k8.ClusterInterface, eventRecorder record.EventRecorder, config controllerConfig.RuntimeConfig) ControllerInterface { @@ -132,6 +142,18 @@ type Controller struct { eventRecorder record.EventRecorder } +func (f *Controller) GetDeployedJobID(ctx context.Context, application *v1beta1.FlinkApplication) string { + return application.Status.ApplicationStatus[getCurrentStatusIndex(application)].JobStatus.JobID +} + +func (f *Controller) GetLatestJobID(ctx context.Context, app *v1beta1.FlinkApplication) string { + return f.GetDeployedJobID(ctx, app) +} + +func (f *Controller) UpdateLatestJobID(ctx context.Context, app *v1beta1.FlinkApplication, jobID string) { + app.Status.ApplicationStatus[getCurrentStatusIndex(app)].JobStatus.JobID = jobID +} + func getURLFromApp(application *v1beta1.FlinkApplication, hash string) string { service := VersionedJobManagerServiceName(application, hash) cfg := controllerConfig.GetConfig() @@ -152,7 +174,7 @@ func getClusterOverviewURL(app *v1beta1.FlinkApplication) string { func getJobOverviewURL(app *v1beta1.FlinkApplication) string { externalURL := getExternalURLFromApp(app) if externalURL != "" { - return fmt.Sprintf(externalURL+client.WebUIAnchor+client.GetJobsOverviewURL, app.Status.AppStatus[0].JobStatus.JobID) + return fmt.Sprintf(externalURL+client.WebUIAnchor+client.GetJobsOverviewURL, app.Status.ApplicationStatus[0].JobStatus.JobID) } return "" } @@ -204,11 +226,11 @@ func (f *Controller) GetJobsForApplication(ctx context.Context, application *v1b } func (f *Controller) GetJobForApplication(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (*client.FlinkJobOverview, error) { - if application.Status.AppStatus[0].JobStatus.JobID == "" { + if application.Status.ApplicationStatus[getCurrentStatusIndex(application)].JobStatus.JobID == "" { return nil, nil } - jobResponse, err := f.flinkClient.GetJobOverview(ctx, getURLFromApp(application, hash), application.Status.AppStatus[0].JobStatus.JobID) + jobResponse, err := f.flinkClient.GetJobOverview(ctx, getURLFromApp(application, hash), application.Status.ApplicationStatus[0].JobStatus.JobID) if err != nil { return nil, err } @@ -219,8 +241,8 @@ func (f *Controller) GetJobForApplication(ctx context.Context, application *v1be // The operator for now assumes and is intended to run single application per Flink Cluster. // Once we move to run multiple applications, this has to be removed/updated func (f *Controller) getJobIDForApplication(application *v1beta1.FlinkApplication) (string, error) { - if application.Status.AppStatus[0].JobStatus.JobID != "" { - return application.Status.AppStatus[0].JobStatus.JobID, nil + if application.Status.ApplicationStatus[getCurrentStatusIndex(application)].JobStatus.JobID != "" { + return application.Status.ApplicationStatus[getCurrentStatusIndex(application)].JobStatus.JobID, nil } return "", errors.New("active job id not available") @@ -446,14 +468,14 @@ func (f *Controller) DeleteOldResourcesForApp(ctx context.Context, app *v1beta1. } func (f *Controller) FindExternalizedCheckpoint(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (string, error) { - checkpoint, err := f.flinkClient.GetLatestCheckpoint(ctx, getURLFromApp(application, hash), application.Status.AppStatus[0].JobStatus.JobID) + checkpoint, err := f.flinkClient.GetLatestCheckpoint(ctx, getURLFromApp(application, hash), application.Status.ApplicationStatus[0].JobStatus.JobID) var checkpointPath string var checkpointTime int64 if err != nil { // we failed to query the JM, try to pull it out of the resource - if application.Status.AppStatus[0].JobStatus.LastCheckpointPath != "" && application.Status.AppStatus[0].JobStatus.LastCheckpointTime != nil { - checkpointPath = application.Status.AppStatus[0].JobStatus.LastCheckpointPath - checkpointTime = application.Status.AppStatus[0].JobStatus.LastCheckpointTime.Unix() + if application.Status.ApplicationStatus[0].JobStatus.LastCheckpointPath != "" && application.Status.ApplicationStatus[0].JobStatus.LastCheckpointTime != nil { + checkpointPath = application.Status.ApplicationStatus[0].JobStatus.LastCheckpointPath + checkpointTime = application.Status.ApplicationStatus[0].JobStatus.LastCheckpointTime.Unix() logger.Warnf(ctx, "Could not query JobManager for latest externalized checkpoint, using"+ " last seen checkpoint") } else { @@ -487,45 +509,53 @@ func (f *Controller) LogEvent(ctx context.Context, app *v1beta1.FlinkApplication func (f *Controller) CompareAndUpdateClusterStatus(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (bool, error) { // Error retrieving cluster / taskmanagers overview (after startup/readiness) --> Red // If there is an error this loop will return with Health set to Red - if len(application.Status.AppStatus) == 0 { - application.Status.AppStatus = make([]v1beta1.FlinkApplicationVersionStatus, 2) - } - oldClusterStatus := application.Status.AppStatus[0].ClusterStatus - application.Status.AppStatus[0].ClusterStatus.Health = v1beta1.Red + currIndex := getCurrentStatusIndex(application) + oldClusterStatus := application.Status.ApplicationStatus[currIndex].ClusterStatus + application.Status.ApplicationStatus[currIndex].ClusterStatus.Health = v1beta1.Red deployment, err := f.GetCurrentDeploymentsForApp(ctx, application) if deployment == nil || err != nil { return false, err } - application.Status.AppStatus[0].ClusterStatus.ClusterOverviewURL = getClusterOverviewURL(application) - application.Status.AppStatus[0].ClusterStatus.NumberOfTaskManagers = deployment.Taskmanager.Status.AvailableReplicas + application.Status.ApplicationStatus[currIndex].ClusterStatus.ClusterOverviewURL = getClusterOverviewURL(application) + application.Status.ApplicationStatus[currIndex].ClusterStatus.NumberOfTaskManagers = deployment.Taskmanager.Status.AvailableReplicas // Get Cluster overview response, err := f.flinkClient.GetClusterOverview(ctx, getURLFromApp(application, hash)) if err != nil { return false, err } // Update cluster overview - application.Status.AppStatus[0].ClusterStatus.AvailableTaskSlots = response.SlotsAvailable - application.Status.AppStatus[0].ClusterStatus.NumberOfTaskSlots = response.NumberOfTaskSlots + application.Status.ApplicationStatus[currIndex].ClusterStatus.AvailableTaskSlots = response.SlotsAvailable + application.Status.ApplicationStatus[currIndex].ClusterStatus.NumberOfTaskSlots = response.NumberOfTaskSlots // Get Healthy Taskmanagers tmResponse, tmErr := f.flinkClient.GetTaskManagers(ctx, getURLFromApp(application, hash)) if tmErr != nil { return false, tmErr } - application.Status.AppStatus[0].ClusterStatus.HealthyTaskManagers = getHealthyTaskManagerCount(tmResponse) + application.Status.ApplicationStatus[currIndex].ClusterStatus.HealthyTaskManagers = getHealthyTaskManagerCount(tmResponse) // Determine Health of the cluster. // Healthy TaskManagers == Number of taskmanagers --> Green // Else --> Yellow - if application.Status.AppStatus[0].ClusterStatus.HealthyTaskManagers == deployment.Taskmanager.Status.Replicas { - application.Status.AppStatus[0].ClusterStatus.Health = v1beta1.Green + if application.Status.ApplicationStatus[currIndex].ClusterStatus.HealthyTaskManagers == deployment.Taskmanager.Status.Replicas { + application.Status.ApplicationStatus[currIndex].ClusterStatus.Health = v1beta1.Green } else { - application.Status.AppStatus[0].ClusterStatus.Health = v1beta1.Yellow + application.Status.ApplicationStatus[currIndex].ClusterStatus.Health = v1beta1.Yellow } - return !apiequality.Semantic.DeepEqual(oldClusterStatus, application.Status.AppStatus[0].ClusterStatus), nil + return !apiequality.Semantic.DeepEqual(oldClusterStatus, application.Status.ApplicationStatus[0].ClusterStatus), nil +} + +func (f *Controller) GetLatestClusterStatus(ctx context.Context, application *v1beta1.FlinkApplication) v1beta1.FlinkClusterStatus { + return application.Status.ApplicationStatus[getCurrentStatusIndex(application)].ClusterStatus + +} + +func (f *Controller) GetLatestJobStatus(ctx context.Context, application *v1beta1.FlinkApplication) v1beta1.FlinkJobStatus { + return application.Status.ApplicationStatus[getCurrentStatusIndex(application)].JobStatus + } func getHealthyTaskManagerCount(response *client.TaskManagersResponse) int32 { @@ -541,51 +571,63 @@ func getHealthyTaskManagerCount(response *client.TaskManagersResponse) int32 { } -func (f *Controller) CompareAndUpdateJobStatus(ctx context.Context, app *v1beta1.FlinkApplication, hash string) (bool, error) { - // Initialize the last failing time to beginning of time if it's never been set - if len(app.Status.AppStatus) == 0 { - app.Status.AppStatus = make([]v1beta1.FlinkApplicationVersionStatus, 2) +func getCurrentStatusIndex(app *v1beta1.FlinkApplication) int32 { + desiredCount := app.Status.DesiredApplicationCount + runningJobs := app.Status.RunningJobs + // We're still trying to bring up jobs to match desired count + // so the current status will append + // to the existing array + if runningJobs != desiredCount && !v1beta1.IsRunningPhase(app.Status.Phase){ + return runningJobs } - if app.Status.AppStatus[0].JobStatus.LastFailingTime == nil { + + // We've spun up required number of jobs, so the status points to the last + // appended value. + return runningJobs - indexOffset +} + +func (f *Controller) CompareAndUpdateJobStatus(ctx context.Context, app *v1beta1.FlinkApplication, hash string) (bool, error) { + currIndex := getCurrentStatusIndex(app) + if app.Status.ApplicationStatus[currIndex].JobStatus.LastFailingTime == nil { initTime := metav1.NewTime(time.Time{}) - app.Status.AppStatus[0].JobStatus.LastFailingTime = &initTime + app.Status.ApplicationStatus[currIndex].JobStatus.LastFailingTime = &initTime } - oldJobStatus := app.Status.AppStatus[0].JobStatus - app.Status.AppStatus[0].JobStatus.JobID = oldJobStatus.JobID - jobResponse, err := f.flinkClient.GetJobOverview(ctx, getURLFromApp(app, hash), app.Status.AppStatus[0].JobStatus.JobID) + oldJobStatus := app.Status.ApplicationStatus[currIndex].JobStatus + app.Status.ApplicationStatus[currIndex].JobStatus.JobID = oldJobStatus.JobID + jobResponse, err := f.flinkClient.GetJobOverview(ctx, getURLFromApp(app, hash), app.Status.ApplicationStatus[0].JobStatus.JobID) if err != nil { return false, err } - checkpoints, err := f.flinkClient.GetCheckpointCounts(ctx, getURLFromApp(app, hash), app.Status.AppStatus[0].JobStatus.JobID) + checkpoints, err := f.flinkClient.GetCheckpointCounts(ctx, getURLFromApp(app, hash), app.Status.ApplicationStatus[0].JobStatus.JobID) if err != nil { return false, err } // Job status - app.Status.AppStatus[0].JobStatus.JobOverviewURL = getJobOverviewURL(app) - app.Status.AppStatus[0].JobStatus.State = v1beta1.JobState(jobResponse.State) + app.Status.ApplicationStatus[currIndex].JobStatus.JobOverviewURL = getJobOverviewURL(app) + app.Status.ApplicationStatus[currIndex].JobStatus.State = v1beta1.JobState(jobResponse.State) jobStartTime := metav1.NewTime(time.Unix(jobResponse.StartTime/1000, 0)) - app.Status.AppStatus[0].JobStatus.StartTime = &jobStartTime + app.Status.ApplicationStatus[currIndex].JobStatus.StartTime = &jobStartTime // Checkpoints status - app.Status.AppStatus[0].JobStatus.FailedCheckpointCount = checkpoints.Counts["failed"] - app.Status.AppStatus[0].JobStatus.CompletedCheckpointCount = checkpoints.Counts["completed"] - app.Status.AppStatus[0].JobStatus.JobRestartCount = checkpoints.Counts["restored"] + app.Status.ApplicationStatus[currIndex].JobStatus.FailedCheckpointCount = checkpoints.Counts["failed"] + app.Status.ApplicationStatus[currIndex].JobStatus.CompletedCheckpointCount = checkpoints.Counts["completed"] + app.Status.ApplicationStatus[currIndex].JobStatus.JobRestartCount = checkpoints.Counts["restored"] latestCheckpoint := checkpoints.Latest.Completed var lastCheckpointAgeSeconds int if latestCheckpoint != nil { lastCheckpointTimeMillis := metav1.NewTime(time.Unix(latestCheckpoint.LatestAckTimestamp/1000, 0)) - app.Status.AppStatus[0].JobStatus.LastCheckpointTime = &lastCheckpointTimeMillis - app.Status.AppStatus[0].JobStatus.LastCheckpointPath = latestCheckpoint.ExternalPath - lastCheckpointAgeSeconds = app.Status.AppStatus[0].JobStatus.LastCheckpointTime.Second() + app.Status.ApplicationStatus[currIndex].JobStatus.LastCheckpointTime = &lastCheckpointTimeMillis + app.Status.ApplicationStatus[currIndex].JobStatus.LastCheckpointPath = latestCheckpoint.ExternalPath + lastCheckpointAgeSeconds = app.Status.ApplicationStatus[currIndex].JobStatus.LastCheckpointTime.Second() } if checkpoints.Latest.Restored != nil { - app.Status.AppStatus[0].JobStatus.RestorePath = checkpoints.Latest.Restored.ExternalPath + app.Status.ApplicationStatus[currIndex].JobStatus.RestorePath = checkpoints.Latest.Restored.ExternalPath restoreTime := metav1.NewTime(time.Unix(checkpoints.Latest.Restored.RestoredTimeStamp/1000, 0)) - app.Status.AppStatus[0].JobStatus.RestoreTime = &restoreTime + app.Status.ApplicationStatus[currIndex].JobStatus.RestoreTime = &restoreTime } runningTasks := int32(0) @@ -605,29 +647,29 @@ func (f *Controller) CompareAndUpdateJobStatus(ctx context.Context, app *v1beta1 } } - app.Status.AppStatus[0].JobStatus.RunningTasks = runningTasks - app.Status.AppStatus[0].JobStatus.TotalTasks = totalTasks + app.Status.ApplicationStatus[currIndex].JobStatus.RunningTasks = runningTasks + app.Status.ApplicationStatus[currIndex].JobStatus.TotalTasks = totalTasks // Health Status for job // Job is in FAILING state --> RED // Time since last successful checkpoint > maxCheckpointTime --> YELLOW // Else --> Green - if app.Status.AppStatus[0].JobStatus.State == v1beta1.Failing || - time.Since(app.Status.AppStatus[0].JobStatus.LastFailingTime.Time) < failingIntervalThreshold || + if app.Status.ApplicationStatus[currIndex].JobStatus.State == v1beta1.Failing || + time.Since(app.Status.ApplicationStatus[currIndex].JobStatus.LastFailingTime.Time) < failingIntervalThreshold || verticesInCreated > 0 { - app.Status.AppStatus[0].JobStatus.Health = v1beta1.Red + app.Status.ApplicationStatus[currIndex].JobStatus.Health = v1beta1.Red } else if time.Since(time.Unix(int64(lastCheckpointAgeSeconds), 0)) < maxCheckpointTime || runningTasks < totalTasks { - app.Status.AppStatus[0].JobStatus.Health = v1beta1.Yellow + app.Status.ApplicationStatus[currIndex].JobStatus.Health = v1beta1.Yellow } else { - app.Status.AppStatus[0].JobStatus.Health = v1beta1.Green + app.Status.ApplicationStatus[currIndex].JobStatus.Health = v1beta1.Green } // Update LastFailingTime - if app.Status.AppStatus[0].JobStatus.State == v1beta1.Failing { + if app.Status.ApplicationStatus[currIndex].JobStatus.State == v1beta1.Failing { currTime := metav1.Now() - app.Status.AppStatus[0].JobStatus.LastFailingTime = &currTime + app.Status.ApplicationStatus[currIndex].JobStatus.LastFailingTime = &currTime } - return !apiequality.Semantic.DeepEqual(oldJobStatus, app.Status.AppStatus[0].JobStatus), err + return !apiequality.Semantic.DeepEqual(oldJobStatus, app.Status.ApplicationStatus[currIndex].JobStatus), err } diff --git a/pkg/controller/flink/flink_test.go b/pkg/controller/flink/flink_test.go index 8ab92439..d7f8224d 100644 --- a/pkg/controller/flink/flink_test.go +++ b/pkg/controller/flink/flink_test.go @@ -66,16 +66,16 @@ func getFlinkTestApp() v1beta1.FlinkApplication { app.Spec.Parallelism = 8 app.Name = testAppName app.Namespace = testNamespace - statuses := append(app.Status.AppStatus, v1beta1.FlinkApplicationVersionStatus{ + statuses := append(app.Status.ApplicationStatus, v1beta1.FlinkApplicationVersionStatus{ JobStatus: v1beta1.FlinkJobStatus{ JobID: testJobID, }, }) - app.Status.AppStatus = statuses - //app.Status.AppStatus[0].JobStatus.JobID = testJobID + app.Status.ApplicationStatus = statuses + //app.Status.ApplicationStatus[0].JobStatus.JobID = testJobID app.Spec.Image = testImage app.Spec.FlinkVersion = testFlinkVersion - + app.Status.DesiredApplicationCount = 1 return app } @@ -579,7 +579,7 @@ func TestGetJobsForApplicationErr(t *testing.T) { func TestFindExternalizedCheckpoint(t *testing.T) { flinkControllerForTest := getTestFlinkController() flinkApp := getFlinkTestApp() - flinkApp.Status.AppStatus[0].JobStatus.JobID = "jobid" + flinkApp.Status.ApplicationStatus[0].JobStatus.JobID = "jobid" mockJmClient := flinkControllerForTest.flinkClient.(*clientMock.JobManagerClient) mockJmClient.GetLatestCheckpointFunc = func(ctx context.Context, url string, jobId string) (*client.CheckpointStatistics, error) { @@ -599,10 +599,10 @@ func TestFindExternalizedCheckpoint(t *testing.T) { func TestFindExternalizedCheckpointFromStatus(t *testing.T) { flinkControllerForTest := getTestFlinkController() flinkApp := getFlinkTestApp() - flinkApp.Status.AppStatus[0].JobStatus.JobID = "jobid" - flinkApp.Status.AppStatus[0].JobStatus.LastCheckpointPath = "/tmp/checkpoint" + flinkApp.Status.ApplicationStatus[0].JobStatus.JobID = "jobid" + flinkApp.Status.ApplicationStatus[0].JobStatus.LastCheckpointPath = "/tmp/checkpoint" checkpointTime := metaV1.Now() - flinkApp.Status.AppStatus[0].JobStatus.LastCheckpointTime = &checkpointTime + flinkApp.Status.ApplicationStatus[0].JobStatus.LastCheckpointTime = &checkpointTime mockJmClient := flinkControllerForTest.flinkClient.(*clientMock.JobManagerClient) mockJmClient.GetLatestCheckpointFunc = func(ctx context.Context, url string, jobId string) (*client.CheckpointStatistics, error) { @@ -668,22 +668,22 @@ func TestClusterStatusUpdated(t *testing.T) { _, err = flinkControllerForTest.CompareAndUpdateClusterStatus(context.Background(), &flinkApp, "hash") assert.Nil(t, err) - assert.Equal(t, int32(1), flinkApp.Status.AppStatus[0].ClusterStatus.NumberOfTaskSlots) - assert.Equal(t, int32(0), flinkApp.Status.AppStatus[0].ClusterStatus.AvailableTaskSlots) - assert.Equal(t, int32(1), flinkApp.Status.AppStatus[0].ClusterStatus.HealthyTaskManagers) - assert.Equal(t, v1beta1.Green, flinkApp.Status.AppStatus[0].ClusterStatus.Health) - assert.Equal(t, "app-name.lyft.xyz/#/overview", flinkApp.Status.AppStatus[0].ClusterStatus.ClusterOverviewURL) + assert.Equal(t, int32(1), flinkApp.Status.ApplicationStatus[0].ClusterStatus.NumberOfTaskSlots) + assert.Equal(t, int32(0), flinkApp.Status.ApplicationStatus[0].ClusterStatus.AvailableTaskSlots) + assert.Equal(t, int32(1), flinkApp.Status.ApplicationStatus[0].ClusterStatus.HealthyTaskManagers) + assert.Equal(t, v1beta1.Green, flinkApp.Status.ApplicationStatus[0].ClusterStatus.Health) + assert.Equal(t, "app-name.lyft.xyz/#/overview", flinkApp.Status.ApplicationStatus[0].ClusterStatus.ClusterOverviewURL) } func TestNoClusterStatusChange(t *testing.T) { flinkControllerForTest := getTestFlinkController() flinkApp := getFlinkTestApp() - flinkApp.Status.AppStatus[0].ClusterStatus.NumberOfTaskSlots = int32(1) - flinkApp.Status.AppStatus[0].ClusterStatus.AvailableTaskSlots = int32(0) - flinkApp.Status.AppStatus[0].ClusterStatus.HealthyTaskManagers = int32(1) - flinkApp.Status.AppStatus[0].ClusterStatus.Health = v1beta1.Green - flinkApp.Status.AppStatus[0].ClusterStatus.NumberOfTaskManagers = int32(1) + flinkApp.Status.ApplicationStatus[0].ClusterStatus.NumberOfTaskSlots = int32(1) + flinkApp.Status.ApplicationStatus[0].ClusterStatus.AvailableTaskSlots = int32(0) + flinkApp.Status.ApplicationStatus[0].ClusterStatus.HealthyTaskManagers = int32(1) + flinkApp.Status.ApplicationStatus[0].ClusterStatus.Health = v1beta1.Green + flinkApp.Status.ApplicationStatus[0].ClusterStatus.NumberOfTaskManagers = int32(1) mockK8Cluster := flinkControllerForTest.k8Cluster.(*k8mock.K8Cluster) mockK8Cluster.GetDeploymentsWithLabelFunc = func(ctx context.Context, namespace string, labelMap map[string]string) (*v1.DeploymentList, error) { tmDeployment := FetchTaskMangerDeploymentCreateObj(&flinkApp, testAppHash) @@ -772,10 +772,10 @@ func TestHealthyTaskmanagers(t *testing.T) { _, err := flinkControllerForTest.CompareAndUpdateClusterStatus(context.Background(), &flinkApp, hash) assert.Nil(t, err) - assert.Equal(t, int32(1), flinkApp.Status.AppStatus[0].ClusterStatus.NumberOfTaskSlots) - assert.Equal(t, int32(0), flinkApp.Status.AppStatus[0].ClusterStatus.AvailableTaskSlots) - assert.Equal(t, int32(0), flinkApp.Status.AppStatus[0].ClusterStatus.HealthyTaskManagers) - assert.Equal(t, v1beta1.Yellow, flinkApp.Status.AppStatus[0].ClusterStatus.Health) + assert.Equal(t, int32(1), flinkApp.Status.ApplicationStatus[0].ClusterStatus.NumberOfTaskSlots) + assert.Equal(t, int32(0), flinkApp.Status.ApplicationStatus[0].ClusterStatus.AvailableTaskSlots) + assert.Equal(t, int32(0), flinkApp.Status.ApplicationStatus[0].ClusterStatus.HealthyTaskManagers) + assert.Equal(t, v1beta1.Yellow, flinkApp.Status.ApplicationStatus[0].ClusterStatus.Health) } @@ -832,26 +832,26 @@ func TestJobStatusUpdated(t *testing.T) { }, nil } - flinkApp.Status.AppStatus[0].JobStatus.JobID = "abc" + flinkApp.Status.ApplicationStatus[0].JobStatus.JobID = "abc" expectedTime := metaV1.NewTime(time.Unix(startTime/1000, 0)) _, err = flinkControllerForTest.CompareAndUpdateJobStatus(context.Background(), &flinkApp, "hash") assert.Nil(t, err) - assert.Equal(t, v1beta1.Running, flinkApp.Status.AppStatus[0].JobStatus.State) - assert.Equal(t, &expectedTime, flinkApp.Status.AppStatus[0].JobStatus.StartTime) - assert.Equal(t, v1beta1.Yellow, flinkApp.Status.AppStatus[0].JobStatus.Health) + assert.Equal(t, v1beta1.Running, flinkApp.Status.ApplicationStatus[0].JobStatus.State) + assert.Equal(t, &expectedTime, flinkApp.Status.ApplicationStatus[0].JobStatus.StartTime) + assert.Equal(t, v1beta1.Yellow, flinkApp.Status.ApplicationStatus[0].JobStatus.Health) - assert.Equal(t, int32(0), flinkApp.Status.AppStatus[0].JobStatus.FailedCheckpointCount) - assert.Equal(t, int32(4), flinkApp.Status.AppStatus[0].JobStatus.CompletedCheckpointCount) - assert.Equal(t, int32(1), flinkApp.Status.AppStatus[0].JobStatus.JobRestartCount) - assert.Equal(t, &expectedTime, flinkApp.Status.AppStatus[0].JobStatus.RestoreTime) + assert.Equal(t, int32(0), flinkApp.Status.ApplicationStatus[0].JobStatus.FailedCheckpointCount) + assert.Equal(t, int32(4), flinkApp.Status.ApplicationStatus[0].JobStatus.CompletedCheckpointCount) + assert.Equal(t, int32(1), flinkApp.Status.ApplicationStatus[0].JobStatus.JobRestartCount) + assert.Equal(t, &expectedTime, flinkApp.Status.ApplicationStatus[0].JobStatus.RestoreTime) - assert.Equal(t, "/test/externalpath", flinkApp.Status.AppStatus[0].JobStatus.RestorePath) - assert.Equal(t, &expectedTime, flinkApp.Status.AppStatus[0].JobStatus.LastCheckpointTime) - assert.Equal(t, "app-name.lyft.xyz/#/jobs/abc", flinkApp.Status.AppStatus[0].JobStatus.JobOverviewURL) + assert.Equal(t, "/test/externalpath", flinkApp.Status.ApplicationStatus[0].JobStatus.RestorePath) + assert.Equal(t, &expectedTime, flinkApp.Status.ApplicationStatus[0].JobStatus.LastCheckpointTime) + assert.Equal(t, "app-name.lyft.xyz/#/jobs/abc", flinkApp.Status.ApplicationStatus[0].JobStatus.JobOverviewURL) - assert.Equal(t, int32(2), flinkApp.Status.AppStatus[0].JobStatus.RunningTasks) - assert.Equal(t, int32(7), flinkApp.Status.AppStatus[0].JobStatus.TotalTasks) + assert.Equal(t, int32(2), flinkApp.Status.ApplicationStatus[0].JobStatus.RunningTasks) + assert.Equal(t, int32(7), flinkApp.Status.ApplicationStatus[0].JobStatus.TotalTasks) } @@ -866,16 +866,16 @@ func TestNoJobStatusChange(t *testing.T) { app1 := getFlinkTestApp() mockJmClient := flinkControllerForTest.flinkClient.(*clientMock.JobManagerClient) - app1.Status.AppStatus[0].JobStatus.State = v1beta1.Running - app1.Status.AppStatus[0].JobStatus.StartTime = &metaTime - app1.Status.AppStatus[0].JobStatus.LastCheckpointTime = &metaTime - app1.Status.AppStatus[0].JobStatus.CompletedCheckpointCount = int32(4) - app1.Status.AppStatus[0].JobStatus.JobRestartCount = int32(1) - app1.Status.AppStatus[0].JobStatus.FailedCheckpointCount = int32(0) - app1.Status.AppStatus[0].JobStatus.Health = v1beta1.Green - app1.Status.AppStatus[0].JobStatus.RestoreTime = &metaTime - app1.Status.AppStatus[0].JobStatus.RestorePath = "/test/externalpath" - app1.Status.AppStatus[0].JobStatus.JobOverviewURL = "" + app1.Status.ApplicationStatus[0].JobStatus.State = v1beta1.Running + app1.Status.ApplicationStatus[0].JobStatus.StartTime = &metaTime + app1.Status.ApplicationStatus[0].JobStatus.LastCheckpointTime = &metaTime + app1.Status.ApplicationStatus[0].JobStatus.CompletedCheckpointCount = int32(4) + app1.Status.ApplicationStatus[0].JobStatus.JobRestartCount = int32(1) + app1.Status.ApplicationStatus[0].JobStatus.FailedCheckpointCount = int32(0) + app1.Status.ApplicationStatus[0].JobStatus.Health = v1beta1.Green + app1.Status.ApplicationStatus[0].JobStatus.RestoreTime = &metaTime + app1.Status.ApplicationStatus[0].JobStatus.RestorePath = "/test/externalpath" + app1.Status.ApplicationStatus[0].JobStatus.JobOverviewURL = "" mockJmClient.GetJobOverviewFunc = func(ctx context.Context, url string, jobID string) (*client.FlinkJobOverview, error) { assert.Equal(t, url, "http://app-name-hash.ns:8081") @@ -918,8 +918,8 @@ func TestGetAndUpdateJobStatusHealth(t *testing.T) { app1 := getFlinkTestApp() mockJmClient := flinkControllerForTest.flinkClient.(*clientMock.JobManagerClient) - app1.Status.AppStatus[0].JobStatus.State = v1beta1.Failing - app1.Status.AppStatus[0].JobStatus.LastFailingTime = &lastFailedTime + app1.Status.ApplicationStatus[0].JobStatus.State = v1beta1.Failing + app1.Status.ApplicationStatus[0].JobStatus.LastFailingTime = &lastFailedTime mockJmClient.GetJobOverviewFunc = func(ctx context.Context, url string, jobID string) (*client.FlinkJobOverview, error) { assert.Equal(t, url, "http://app-name-hash.ns:8081") @@ -944,7 +944,7 @@ func TestGetAndUpdateJobStatusHealth(t *testing.T) { assert.Nil(t, err) // Job is in a RUNNING state but was in a FAILING state in the last 1 minute, so we expect // JobStatus.Health to be Red - assert.Equal(t, app1.Status.AppStatus[0].JobStatus.Health, v1beta1.Red) + assert.Equal(t, app1.Status.ApplicationStatus[0].JobStatus.Health, v1beta1.Red) } diff --git a/pkg/controller/flink/mock/mock_flink.go b/pkg/controller/flink/mock/mock_flink.go index fe05c07f..153b6564 100644 --- a/pkg/controller/flink/mock/mock_flink.go +++ b/pkg/controller/flink/mock/mock_flink.go @@ -24,6 +24,11 @@ type GetCurrentDeploymentsForAppFunc func(ctx context.Context, application *v1be type FindExternalizedCheckpointFunc func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (string, error) type CompareAndUpdateClusterStatusFunc func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (bool, error) type CompareAndUpdateJobStatusFunc func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (bool, error) +type GetLatestClusterStatusFunc func(ctx context.Context, app *v1beta1.FlinkApplication) v1beta1.FlinkClusterStatus +type GetLatestJobStatusFunc func(ctx context.Context, app *v1beta1.FlinkApplication) v1beta1.FlinkJobStatus +type GetLatestJobIDFunc func(ctx context.Context, app *v1beta1.FlinkApplication) string +type UpdateLatestJobIDFunc func(ctx context.Context, app *v1beta1.FlinkApplication, jobID string) +type GetDeployedJobIDFunc func(ctx context.Context, application *v1beta1.FlinkApplication) string type FlinkController struct { CreateClusterFunc CreateClusterFunc @@ -41,6 +46,11 @@ type FlinkController struct { Events []corev1.Event CompareAndUpdateClusterStatusFunc CompareAndUpdateClusterStatusFunc CompareAndUpdateJobStatusFunc CompareAndUpdateJobStatusFunc + GetLatestClusterStatusFunc GetLatestClusterStatusFunc + GetLatestJobStatusFunc GetLatestJobStatusFunc + GetLatestJobIDFunc GetLatestJobIDFunc + UpdateLatestJobIDFunc UpdateLatestJobIDFunc + GetDeployedJobIDFunc GetDeployedJobIDFunc } func (m *FlinkController) GetCurrentDeploymentsForApp(ctx context.Context, application *v1beta1.FlinkApplication) (*common.FlinkDeployment, error) { @@ -156,3 +166,53 @@ func (m *FlinkController) CompareAndUpdateJobStatus(ctx context.Context, app *v1 return false, nil } + +func (m *FlinkController) GetLatestClusterStatus(ctx context.Context, application *v1beta1.FlinkApplication) v1beta1.FlinkClusterStatus { + if m.GetLatestClusterStatusFunc != nil { + return m.GetLatestClusterStatusFunc(ctx, application) + } + + return application.Status.ApplicationStatus[getCurrentStatusIndex(application)].ClusterStatus +} + +func (m *FlinkController) GetLatestJobStatus(ctx context.Context, application *v1beta1.FlinkApplication) v1beta1.FlinkJobStatus { + if m.GetLatestClusterStatusFunc != nil { + return m.GetLatestJobStatusFunc(ctx, application) + } + + return application.Status.ApplicationStatus[getCurrentStatusIndex(application)].JobStatus +} + +func (m *FlinkController) GetLatestJobID(ctx context.Context, application *v1beta1.FlinkApplication) string { + if m.GetLatestClusterStatusFunc != nil { + return m.GetLatestJobIDFunc(ctx, application) + } + + return application.Status.ApplicationStatus[getCurrentStatusIndex(application)].JobStatus.JobID +} + +func (m *FlinkController) GetDeployedJobID(ctx context.Context, application *v1beta1.FlinkApplication) string { + if m.GetLatestClusterStatusFunc != nil { + return m.GetDeployedJobIDFunc(ctx, application) + } + + return application.Status.ApplicationStatus[getCurrentStatusIndex(application)].JobStatus.JobID +} + +func (m *FlinkController) UpdateLatestJobID(ctx context.Context, application *v1beta1.FlinkApplication, jobID string) { + if m.UpdateLatestJobIDFunc != nil { + m.UpdateLatestJobIDFunc(ctx, application, jobID) + } + + application.Status.ApplicationStatus[getCurrentStatusIndex(application)].JobStatus.JobID = jobID +} + +func getCurrentStatusIndex(app *v1beta1.FlinkApplication) int32 { + desiredCount := app.Status.DesiredApplicationCount + runningJobs := app.Status.RunningJobs + if runningJobs != desiredCount { + return runningJobs + } + + return runningJobs - 1 +} diff --git a/pkg/controller/flinkapplication/flink_state_machine.go b/pkg/controller/flinkapplication/flink_state_machine.go index d753c52f..145f0e6a 100644 --- a/pkg/controller/flinkapplication/flink_state_machine.go +++ b/pkg/controller/flinkapplication/flink_state_machine.go @@ -242,7 +242,7 @@ func (s *FlinkStateMachine) handleNewOrUpdating(ctx context.Context, application logger.Errorf(ctx, "Cluster creation failed with error: %v", err) return statusUnchanged, err } - + application.Status.DesiredApplicationCount = application.Status.DesiredApplicationCount + 1 s.updateApplicationPhase(application, v1beta1.FlinkApplicationClusterStarting) return statusChanged, nil } @@ -291,20 +291,23 @@ func (s *FlinkStateMachine) handleClusterStarting(ctx context.Context, applicati } func initializeAppStatusIfEmpty(application *v1beta1.FlinkApplication) { - // initialize the app status array to include 2 statuses in case of blue green deploys - if len(application.Status.AppStatus) == 0 { - if application.Spec.DeploymentMode == v1beta1.DeploymentModeBlueGreen { - application.Status.AppStatus = make([]v1beta1.FlinkApplicationVersionStatus, 2) - } else { - application.Status.AppStatus = make([]v1beta1.FlinkApplicationVersionStatus, 1) - } + // initialize the app status array to include 2 status elements in case of blue green deploys + // else use a one element array + if application.Spec.DeploymentMode == v1beta1.DeploymentModeBlueGreen { + application.Status.DesiredApplicationCount = 2 + } else { + application.Status.DesiredApplicationCount = 1 + } + + if len(application.Status.ApplicationStatus) == 0 { + application.Status.ApplicationStatus = make([]v1beta1.FlinkApplicationVersionStatus, application.Status.DesiredApplicationCount) } } func (s *FlinkStateMachine) handleApplicationSavepointing(ctx context.Context, application *v1beta1.FlinkApplication) (bool, error) { // we've already savepointed (or this is our first deploy), continue on if application.Status.SavepointPath != "" || application.Status.DeployHash == "" { - application.Status.AppStatus[0].JobStatus.JobID = "" + s.flinkController.UpdateLatestJobID(ctx, application, "") s.updateApplicationPhase(application, v1beta1.FlinkApplicationSubmittingJob) return statusChanged, nil } @@ -326,7 +329,7 @@ func (s *FlinkStateMachine) handleApplicationSavepointing(ctx context.Context, a } s.flinkController.LogEvent(ctx, application, corev1.EventTypeNormal, "CancellingJob", - fmt.Sprintf("Cancelling job %s with a final savepoint", application.Status.AppStatus[0].JobStatus.JobID)) + fmt.Sprintf("Cancelling job %s with a final savepoint", s.flinkController.GetLatestJobID(ctx, application))) application.Status.SavepointTriggerID = triggerID return statusChanged, nil @@ -344,7 +347,7 @@ func (s *FlinkStateMachine) handleApplicationSavepointing(ctx context.Context, a // TODO: we should probably retry this a few times before failing s.flinkController.LogEvent(ctx, application, corev1.EventTypeWarning, "SavepointFailed", fmt.Sprintf("Failed to take savepoint for job %s: %v", - application.Status.AppStatus[0].JobStatus.JobID, savepointStatusResponse.Operation.FailureCause)) + s.flinkController.GetDeployedJobID(ctx, application), savepointStatusResponse.Operation.FailureCause)) application.Status.RetryCount = 0 s.updateApplicationPhase(application, v1beta1.FlinkApplicationRecovering) return statusChanged, nil @@ -353,7 +356,7 @@ func (s *FlinkStateMachine) handleApplicationSavepointing(ctx context.Context, a fmt.Sprintf("Canceled job with savepoint %s", savepointStatusResponse.Operation.Location)) application.Status.SavepointPath = savepointStatusResponse.Operation.Location - application.Status.AppStatus[0].JobStatus.JobID = "" + s.flinkController.UpdateLatestJobID(ctx, application, "") s.updateApplicationPhase(application, v1beta1.FlinkApplicationSubmittingJob) return statusChanged, nil } @@ -390,7 +393,7 @@ func (s *FlinkStateMachine) handleApplicationRecovering(ctx context.Context, app path, flink.HashForApplication(app))) app.Status.SavepointPath = path - app.Status.AppStatus[0].JobStatus.JobID = "" + s.flinkController.UpdateLatestJobID(ctx, app, "") s.updateApplicationPhase(app, v1beta1.FlinkApplicationSubmittingJob) return statusChanged, nil } @@ -405,8 +408,8 @@ func (s *FlinkStateMachine) submitJobIfNeeded(ctx context.Context, app *v1beta1. } // Check if the job id has already been set on our application - if app.Status.AppStatus[0].JobStatus.JobID != "" { - return app.Status.AppStatus[0].JobStatus.JobID, nil + if s.flinkController.GetLatestJobID(ctx, app) != "" { + return s.flinkController.GetLatestJobID(ctx, app), nil } // Check that there are no jobs running before starting the job @@ -468,10 +471,6 @@ func (s *FlinkStateMachine) updateGenericService(ctx context.Context, app *v1bet } func (s *FlinkStateMachine) handleSubmittingJob(ctx context.Context, app *v1beta1.FlinkApplication) (bool, error) { - if len(app.Status.AppStatus) == 0 { - app.Status.AppStatus = make([]v1beta1.FlinkApplicationVersionStatus, 2) - } - if rollback, reason := s.shouldRollback(ctx, app); rollback { // Something's gone wrong; roll back s.flinkController.LogEvent(ctx, app, corev1.EventTypeWarning, "JobSubmissionFailed", @@ -493,7 +492,7 @@ func (s *FlinkStateMachine) handleSubmittingJob(ctx context.Context, app *v1beta logger.Errorf(ctx, "Updating cluster status failed with error: %v", clusterErr) } - if app.Status.AppStatus[0].JobStatus.JobID == "" { + if s.flinkController.GetLatestJobID(ctx, app) == "" { savepointPath := "" if app.Status.DeployHash == "" { // this is the first deploy, use the user-provided savepoint @@ -515,7 +514,7 @@ func (s *FlinkStateMachine) handleSubmittingJob(ctx context.Context, app *v1beta } if appJobID != "" { - app.Status.AppStatus[0].JobStatus.JobID = appJobID + s.flinkController.UpdateLatestJobID(ctx, app, appJobID) return statusChanged, nil } @@ -540,12 +539,12 @@ func (s *FlinkStateMachine) handleSubmittingJob(ctx context.Context, app *v1beta app.Status.DeployHash = hash app.Status.SavepointPath = "" app.Status.SavepointTriggerID = "" - app.Status.AppStatus[0].JobStatus.JarName = app.Spec.JarName - app.Status.AppStatus[0].JobStatus.Parallelism = app.Spec.Parallelism - app.Status.AppStatus[0].JobStatus.EntryClass = app.Spec.EntryClass - app.Status.AppStatus[0].JobStatus.ProgramArgs = app.Spec.ProgramArgs - app.Status.AppStatus[0].JobStatus.AllowNonRestoredState = app.Spec.AllowNonRestoredState - + app.Status.ApplicationStatus[app.Status.RunningJobs].JobStatus.JarName = app.Spec.JarName + app.Status.ApplicationStatus[app.Status.RunningJobs].JobStatus.Parallelism = app.Spec.Parallelism + app.Status.ApplicationStatus[app.Status.RunningJobs].JobStatus.EntryClass = app.Spec.EntryClass + app.Status.ApplicationStatus[app.Status.RunningJobs].JobStatus.ProgramArgs = app.Spec.ProgramArgs + app.Status.ApplicationStatus[app.Status.RunningJobs].JobStatus.AllowNonRestoredState = app.Spec.AllowNonRestoredState + app.Status.RunningJobs = app.Status.RunningJobs + 1 s.updateApplicationPhase(app, v1beta1.FlinkApplicationRunning) return statusChanged, nil } @@ -586,10 +585,11 @@ func (s *FlinkStateMachine) handleRollingBack(ctx context.Context, app *v1beta1. } // submit the old job + jobStatus := s.flinkController.GetLatestJobStatus(ctx, app) jobID, err := s.submitJobIfNeeded(ctx, app, app.Status.DeployHash, - app.Status.AppStatus[0].JobStatus.JarName, app.Status.AppStatus[0].JobStatus.Parallelism, - app.Status.AppStatus[0].JobStatus.EntryClass, app.Status.AppStatus[0].JobStatus.ProgramArgs, - app.Status.AppStatus[0].JobStatus.AllowNonRestoredState, + jobStatus.JarName, jobStatus.Parallelism, + jobStatus.EntryClass, jobStatus.ProgramArgs, + jobStatus.AllowNonRestoredState, app.Status.SavepointPath) // set rollbackHash @@ -599,7 +599,7 @@ func (s *FlinkStateMachine) handleRollingBack(ctx context.Context, app *v1beta1. } if jobID != "" { - app.Status.AppStatus[0].JobStatus.JobID = jobID + s.flinkController.UpdateLatestJobID(ctx, app, jobID) app.Status.SavepointPath = "" app.Status.SavepointTriggerID = "" // move to the deploy failed state @@ -612,9 +612,6 @@ func (s *FlinkStateMachine) handleRollingBack(ctx context.Context, app *v1beta1. // Check if the application is Running. // This is a stable state. Keep monitoring if the underlying CRD reflects the Flink cluster func (s *FlinkStateMachine) handleApplicationRunning(ctx context.Context, application *v1beta1.FlinkApplication) (bool, error) { - if len(application.Status.AppStatus) == 0 { - application.Status.AppStatus = make([]v1beta1.FlinkApplicationVersionStatus, 2) - } cur, err := s.flinkController.GetCurrentDeploymentsForApp(ctx, application) if err != nil { return statusUnchanged, err @@ -636,7 +633,7 @@ func (s *FlinkStateMachine) handleApplicationRunning(ctx context.Context, applic } if job == nil { - logger.Warnf(ctx, "Could not find active job {}", application.Status.AppStatus[0].JobStatus.JobID) + logger.Warnf(ctx, "Could not find active job {}", s.flinkController.GetLatestJobID(ctx, application)) } else { logger.Debugf(ctx, "Application running with job %v", job.JobID) } diff --git a/pkg/controller/flinkapplication/flink_state_machine_test.go b/pkg/controller/flinkapplication/flink_state_machine_test.go index 3a54e024..4e69b38f 100644 --- a/pkg/controller/flinkapplication/flink_state_machine_test.go +++ b/pkg/controller/flinkapplication/flink_state_machine_test.go @@ -364,14 +364,14 @@ func TestSubmittingToRunning(t *testing.T) { mockK8Cluster.UpdateStatusFunc = func(ctx context.Context, object runtime.Object) error { if statusUpdateCount == 0 { application := object.(*v1beta1.FlinkApplication) - assert.Equal(t, jobID, application.Status.AppStatus[0].JobStatus.JobID) + assert.Equal(t, jobID, application.Status.ApplicationStatus[0].JobStatus.JobID) } else if statusUpdateCount == 1 { application := object.(*v1beta1.FlinkApplication) assert.Equal(t, appHash, application.Status.DeployHash) - assert.Equal(t, app.Spec.JarName, app.Status.AppStatus[0].JobStatus.JarName) - assert.Equal(t, app.Spec.Parallelism, app.Status.AppStatus[0].JobStatus.Parallelism) - assert.Equal(t, app.Spec.EntryClass, app.Status.AppStatus[0].JobStatus.EntryClass) - assert.Equal(t, app.Spec.ProgramArgs, app.Status.AppStatus[0].JobStatus.ProgramArgs) + assert.Equal(t, app.Spec.JarName, app.Status.ApplicationStatus[0].JobStatus.JarName) + assert.Equal(t, app.Spec.Parallelism, app.Status.ApplicationStatus[0].JobStatus.Parallelism) + assert.Equal(t, app.Spec.EntryClass, app.Status.ApplicationStatus[0].JobStatus.EntryClass) + assert.Equal(t, app.Spec.ProgramArgs, app.Status.ApplicationStatus[0].JobStatus.ProgramArgs) assert.Equal(t, v1beta1.FlinkApplicationRunning, application.Status.Phase) } statusUpdateCount++ @@ -451,7 +451,7 @@ func TestRollingBack(t *testing.T) { Phase: v1beta1.FlinkApplicationRollingBackJob, DeployHash: "old-hash", SavepointPath: "file:///savepoint", - AppStatus: []v1beta1.FlinkApplicationVersionStatus{ + ApplicationStatus: []v1beta1.FlinkApplicationVersionStatus{ v1beta1.FlinkApplicationVersionStatus{ JobStatus: v1beta1.FlinkJobStatus{ JarName: "old-job.jar", @@ -478,11 +478,11 @@ func TestRollingBack(t *testing.T) { startCalled = true assert.Equal(t, "old-hash", hash) - assert.Equal(t, app.Status.AppStatus[0].JobStatus.JarName, jarName) - assert.Equal(t, app.Status.AppStatus[0].JobStatus.Parallelism, parallelism) - assert.Equal(t, app.Status.AppStatus[0].JobStatus.EntryClass, entryClass) - assert.Equal(t, app.Status.AppStatus[0].JobStatus.ProgramArgs, programArgs) - assert.Equal(t, app.Status.AppStatus[0].JobStatus.AllowNonRestoredState, allowNonRestoredState) + assert.Equal(t, app.Status.ApplicationStatus[0].JobStatus.JarName, jarName) + assert.Equal(t, app.Status.ApplicationStatus[0].JobStatus.Parallelism, parallelism) + assert.Equal(t, app.Status.ApplicationStatus[0].JobStatus.EntryClass, entryClass) + assert.Equal(t, app.Status.ApplicationStatus[0].JobStatus.ProgramArgs, programArgs) + assert.Equal(t, app.Status.ApplicationStatus[0].JobStatus.AllowNonRestoredState, allowNonRestoredState) assert.Equal(t, app.Status.SavepointPath, savepointPath) return jobID, nil } @@ -633,7 +633,7 @@ func TestDeleteWithSavepoint(t *testing.T) { Status: v1beta1.FlinkApplicationStatus{ Phase: v1beta1.FlinkApplicationDeleting, DeployHash: "deployhash", - AppStatus: []v1beta1.FlinkApplicationVersionStatus{ + ApplicationStatus: []v1beta1.FlinkApplicationVersionStatus{ v1beta1.FlinkApplicationVersionStatus{ JobStatus: v1beta1.FlinkJobStatus{ JobID: jobID, @@ -749,7 +749,7 @@ func TestDeleteWithSavepointAndFinishedJob(t *testing.T) { Phase: v1beta1.FlinkApplicationDeleting, DeployHash: "deployhash", SavepointPath: "file:///savepoint", - AppStatus: []v1beta1.FlinkApplicationVersionStatus{ + ApplicationStatus: []v1beta1.FlinkApplicationVersionStatus{ v1beta1.FlinkApplicationVersionStatus{ JobStatus: v1beta1.FlinkJobStatus{ JobID: jobID, @@ -800,7 +800,7 @@ func TestDeleteWithForceCancel(t *testing.T) { }, Status: v1beta1.FlinkApplicationStatus{ Phase: v1beta1.FlinkApplicationDeleting, - AppStatus: []v1beta1.FlinkApplicationVersionStatus{ + ApplicationStatus: []v1beta1.FlinkApplicationVersionStatus{ v1beta1.FlinkApplicationVersionStatus{ JobStatus: v1beta1.FlinkJobStatus{ JobID: jobID, From df893764051005c8ef6a50c91efb7f19bd169dbe Mon Sep 17 00:00:00 2001 From: glaksh100 Date: Wed, 26 Feb 2020 18:21:14 -0800 Subject: [PATCH 04/41] Updates --- flink-development-config.yaml | 9 --------- integ/checkpoint_failure_test.go | 4 ++-- integ/simple_test.go | 20 +++++++++---------- integ/utils/utils.go | 15 ++++++++++++++ pkg/apis/app/v1beta1/types.go | 7 ------- pkg/controller/flink/flink.go | 13 ++++++------ pkg/controller/flink/mock/mock_flink.go | 10 ---------- .../flinkapplication/flink_state_machine.go | 2 +- 8 files changed, 34 insertions(+), 46 deletions(-) delete mode 100644 flink-development-config.yaml diff --git a/flink-development-config.yaml b/flink-development-config.yaml deleted file mode 100644 index d315e7c3..00000000 --- a/flink-development-config.yaml +++ /dev/null @@ -1,9 +0,0 @@ -apiVersion: v1 -kind: ConfigMap -metadata: - name: flink-development-config -data: - APPLICATION_ENV: development - JOB_MANAGER_HEAP_MB: "200" - TASK_MANAGER_SLOTS: "2" - TASK_MANAGER_HEAP_MB: "200" diff --git a/integ/checkpoint_failure_test.go b/integ/checkpoint_failure_test.go index c0c11c68..3b4cf4a7 100644 --- a/integ/checkpoint_failure_test.go +++ b/integ/checkpoint_failure_test.go @@ -45,9 +45,9 @@ func failingJobTest(s *IntegSuite, c *C, testName string, causeFailure func()) { // And the job should not have been updated newApp, err := s.Util.GetFlinkApplication(config.Name) c.Assert(err, IsNil) - c.Assert(newApp.Status.ApplicationStatus[0].JobStatus.JobID, Equals, app.Status.ApplicationStatus[0].JobStatus.JobID) + c.Assert(newApp.Status.ApplicationStatus[s.Util.GetCurrentStatusIndex(newApp)].JobStatus.JobID, Equals, app.Status.ApplicationStatus[s.Util.GetCurrentStatusIndex(app)].JobStatus.JobID) - endpoint := fmt.Sprintf("jobs/%s", app.Status.ApplicationStatus[0].JobStatus.JobID) + endpoint := fmt.Sprintf("jobs/%s", app.Status.ApplicationStatus[s.Util.GetCurrentStatusIndex(app)].JobStatus.JobID) _, err = s.Util.FlinkAPIGet(app, endpoint) c.Assert(err, IsNil) diff --git a/integ/simple_test.go b/integ/simple_test.go index bcf6c5da..4e86ff60 100644 --- a/integ/simple_test.go +++ b/integ/simple_test.go @@ -28,12 +28,12 @@ func updateAndValidate(c *C, s *IntegSuite, name string, updateFn func(app *v1be // check that it really updated newApp, err := s.Util.GetFlinkApplication(name) c.Assert(err, IsNil) - c.Assert(newApp.Status.ApplicationStatus[0].JobStatus.JobID, Not(Equals), app.Status.ApplicationStatus[0].JobStatus.JobID) + c.Assert(newApp.Status.ApplicationStatus[s.Util.GetCurrentStatusIndex(newApp)].JobStatus.JobID, Not(Equals), app.Status.ApplicationStatus[s.Util.GetCurrentStatusIndex(app)].JobStatus.JobID) log.Info("New job started successfully") // check that we savepointed and restored correctly - endpoint := fmt.Sprintf("jobs/%s/checkpoints", newApp.Status.ApplicationStatus[0].JobStatus.JobID) + endpoint := fmt.Sprintf("jobs/%s/checkpoints", newApp.Status.ApplicationStatus[s.Util.GetCurrentStatusIndex(newApp)].JobStatus.JobID) res, err := s.Util.FlinkAPIGet(newApp, endpoint) c.Assert(err, IsNil) @@ -140,13 +140,13 @@ func (s *IntegSuite) TestSimple(c *C) { c.Assert(s.Util.WaitForAllTasksRunning(newApp.Name), IsNil) // the job id should have changed - jobID := newApp.Status.ApplicationStatus[0].JobStatus.JobID + jobID := newApp.Status.ApplicationStatus[s.Util.GetCurrentStatusIndex(newApp)].JobStatus.JobID newApp, err = s.Util.GetFlinkApplication(newApp.Name) c.Assert(err, IsNil) - c.Assert(newApp.Status.ApplicationStatus[0].JobStatus.JobID, Not(Equals), jobID) + c.Assert(newApp.Status.ApplicationStatus[s.Util.GetCurrentStatusIndex(newApp)].JobStatus.JobID, Not(Equals), jobID) // we should have restored from our savepoint - endpoint := fmt.Sprintf("jobs/%s/checkpoints", newApp.Status.ApplicationStatus[0].JobStatus.JobID) + endpoint := fmt.Sprintf("jobs/%s/checkpoints", newApp.Status.ApplicationStatus[s.Util.GetCurrentStatusIndex(newApp)].JobStatus.JobID) res, err := s.Util.FlinkAPIGet(newApp, endpoint) c.Assert(err, IsNil) @@ -191,7 +191,7 @@ func (s *IntegSuite) TestSimple(c *C) { log.Info("User cancelled deploy. Job is in deploy failed, waiting for tasks to start") // but the job should still be running - c.Assert(newApp.Status.ApplicationStatus[0].JobStatus.State, Equals, v1beta1.Running) + c.Assert(newApp.Status.ApplicationStatus[s.Util.GetCurrentStatusIndex(newApp)].JobStatus.State, Equals, v1beta1.Running) log.Info("Attempting to roll forward with fix") // Fixing update @@ -212,7 +212,7 @@ func (s *IntegSuite) TestSimple(c *C) { app, err = s.Util.GetFlinkApplication(config.Name) c.Assert(err, IsNil) - if len(app.Finalizers) == 1 && app.Finalizers[0] == finalizer { + if len(app.Finalizers) == 1 && app.Finalizers[s.Util.GetCurrentStatusIndex(app)] == finalizer { break } time.Sleep(100 * time.Millisecond) @@ -225,7 +225,7 @@ func (s *IntegSuite) TestSimple(c *C) { jobList := jobMap["jobs"].([]interface{}) for _, j := range jobList { job := j.(map[string]interface{}) - if job["id"] == app.Status.ApplicationStatus[0].JobStatus.JobID { + if job["id"] == app.Status.ApplicationStatus[s.Util.GetCurrentStatusIndex(app)].JobStatus.JobID { return job } } @@ -284,7 +284,7 @@ func (s *IntegSuite) TestRecovery(c *C) { app, err := s.Util.GetFlinkApplication(config.Name) c.Assert(err, IsNil) - endpoint := fmt.Sprintf("jobs/%s/checkpoints", app.Status.ApplicationStatus[0].JobStatus.JobID) + endpoint := fmt.Sprintf("jobs/%s/checkpoints", app.Status.ApplicationStatus[s.Util.GetCurrentStatusIndex(app)].JobStatus.JobID) for { res, err := s.Util.FlinkAPIGet(app, endpoint) c.Assert(err, IsNil) @@ -324,7 +324,7 @@ func (s *IntegSuite) TestRecovery(c *C) { // wait until the new job is launched newApp, err := s.Util.GetFlinkApplication(config.Name) c.Assert(err, IsNil) - if newApp.Status.ApplicationStatus[0].JobStatus.JobID != app.Status.ApplicationStatus[0].JobStatus.JobID { + if newApp.Status.ApplicationStatus[s.Util.GetCurrentStatusIndex(newApp)].JobStatus.JobID != app.Status.ApplicationStatus[s.Util.GetCurrentStatusIndex(app)].JobStatus.JobID { break } time.Sleep(100 * time.Millisecond) diff --git a/integ/utils/utils.go b/integ/utils/utils.go index 80c52f79..5d7c834d 100644 --- a/integ/utils/utils.go +++ b/integ/utils/utils.go @@ -482,3 +482,18 @@ func (f *TestUtil) Update(name string, updateFn func(app *flinkapp.FlinkApplicat time.Sleep(500 * time.Millisecond) } } + +func (f *TestUtil) GetCurrentStatusIndex(app *flinkapp.FlinkApplication) int32 { + desiredCount := app.Status.DesiredApplicationCount + runningJobs := app.Status.RunningJobs + // We're still trying to bring up jobs to match desired count + // so the current status will append + // to the existing array + if runningJobs != desiredCount && app.Status.Phase != "Running" { + return runningJobs + } + + // We've spun up required number of jobs, so the status points to the last + // appended value. + return runningJobs - 1 +} diff --git a/pkg/apis/app/v1beta1/types.go b/pkg/apis/app/v1beta1/types.go index aacbb224..25a378b8 100644 --- a/pkg/apis/app/v1beta1/types.go +++ b/pkg/apis/app/v1beta1/types.go @@ -277,13 +277,6 @@ const ( DeleteModeNone DeleteMode = "None" ) -type SavepointMode string - -const ( - SavepointModeSavepointOnly SavepointMode = "Savepoint" - SavepointModeSavepointAndCancel SavepointMode = "SavepointAndCancel" -) - type HealthStatus string const ( diff --git a/pkg/controller/flink/flink.go b/pkg/controller/flink/flink.go index 2e1f3803..7c6618c8 100644 --- a/pkg/controller/flink/flink.go +++ b/pkg/controller/flink/flink.go @@ -94,14 +94,17 @@ type ControllerInterface interface { // Returns true if there is a change in JobStatus CompareAndUpdateJobStatus(ctx context.Context, app *v1beta1.FlinkApplication, hash string) (bool, error) + // Gets the last updated cluster status GetLatestClusterStatus(ctx context.Context, app *v1beta1.FlinkApplication) v1beta1.FlinkClusterStatus + // Gets the last updated job status GetLatestJobStatus(ctx context.Context, app *v1beta1.FlinkApplication) v1beta1.FlinkJobStatus + // Gets the last updated job ID GetLatestJobID(ctx context.Context, app *v1beta1.FlinkApplication) string + // Updates the jobID on the latest jobStatus UpdateLatestJobID(ctx context.Context, app *v1beta1.FlinkApplication, jobID string) - GetDeployedJobID(ctx context.Context, application *v1beta1.FlinkApplication) string } func NewController(k8sCluster k8.ClusterInterface, eventRecorder record.EventRecorder, config controllerConfig.RuntimeConfig) ControllerInterface { @@ -142,14 +145,10 @@ type Controller struct { eventRecorder record.EventRecorder } -func (f *Controller) GetDeployedJobID(ctx context.Context, application *v1beta1.FlinkApplication) string { +func (f *Controller) GetLatestJobID(ctx context.Context, application *v1beta1.FlinkApplication) string { return application.Status.ApplicationStatus[getCurrentStatusIndex(application)].JobStatus.JobID } -func (f *Controller) GetLatestJobID(ctx context.Context, app *v1beta1.FlinkApplication) string { - return f.GetDeployedJobID(ctx, app) -} - func (f *Controller) UpdateLatestJobID(ctx context.Context, app *v1beta1.FlinkApplication, jobID string) { app.Status.ApplicationStatus[getCurrentStatusIndex(app)].JobStatus.JobID = jobID } @@ -577,7 +576,7 @@ func getCurrentStatusIndex(app *v1beta1.FlinkApplication) int32 { // We're still trying to bring up jobs to match desired count // so the current status will append // to the existing array - if runningJobs != desiredCount && !v1beta1.IsRunningPhase(app.Status.Phase){ + if runningJobs != desiredCount && !v1beta1.IsRunningPhase(app.Status.Phase) { return runningJobs } diff --git a/pkg/controller/flink/mock/mock_flink.go b/pkg/controller/flink/mock/mock_flink.go index 153b6564..1c483487 100644 --- a/pkg/controller/flink/mock/mock_flink.go +++ b/pkg/controller/flink/mock/mock_flink.go @@ -28,7 +28,6 @@ type GetLatestClusterStatusFunc func(ctx context.Context, app *v1beta1.FlinkAppl type GetLatestJobStatusFunc func(ctx context.Context, app *v1beta1.FlinkApplication) v1beta1.FlinkJobStatus type GetLatestJobIDFunc func(ctx context.Context, app *v1beta1.FlinkApplication) string type UpdateLatestJobIDFunc func(ctx context.Context, app *v1beta1.FlinkApplication, jobID string) -type GetDeployedJobIDFunc func(ctx context.Context, application *v1beta1.FlinkApplication) string type FlinkController struct { CreateClusterFunc CreateClusterFunc @@ -50,7 +49,6 @@ type FlinkController struct { GetLatestJobStatusFunc GetLatestJobStatusFunc GetLatestJobIDFunc GetLatestJobIDFunc UpdateLatestJobIDFunc UpdateLatestJobIDFunc - GetDeployedJobIDFunc GetDeployedJobIDFunc } func (m *FlinkController) GetCurrentDeploymentsForApp(ctx context.Context, application *v1beta1.FlinkApplication) (*common.FlinkDeployment, error) { @@ -191,14 +189,6 @@ func (m *FlinkController) GetLatestJobID(ctx context.Context, application *v1bet return application.Status.ApplicationStatus[getCurrentStatusIndex(application)].JobStatus.JobID } -func (m *FlinkController) GetDeployedJobID(ctx context.Context, application *v1beta1.FlinkApplication) string { - if m.GetLatestClusterStatusFunc != nil { - return m.GetDeployedJobIDFunc(ctx, application) - } - - return application.Status.ApplicationStatus[getCurrentStatusIndex(application)].JobStatus.JobID -} - func (m *FlinkController) UpdateLatestJobID(ctx context.Context, application *v1beta1.FlinkApplication, jobID string) { if m.UpdateLatestJobIDFunc != nil { m.UpdateLatestJobIDFunc(ctx, application, jobID) diff --git a/pkg/controller/flinkapplication/flink_state_machine.go b/pkg/controller/flinkapplication/flink_state_machine.go index 145f0e6a..d91c4328 100644 --- a/pkg/controller/flinkapplication/flink_state_machine.go +++ b/pkg/controller/flinkapplication/flink_state_machine.go @@ -347,7 +347,7 @@ func (s *FlinkStateMachine) handleApplicationSavepointing(ctx context.Context, a // TODO: we should probably retry this a few times before failing s.flinkController.LogEvent(ctx, application, corev1.EventTypeWarning, "SavepointFailed", fmt.Sprintf("Failed to take savepoint for job %s: %v", - s.flinkController.GetDeployedJobID(ctx, application), savepointStatusResponse.Operation.FailureCause)) + s.flinkController.GetLatestJobID(ctx, application), savepointStatusResponse.Operation.FailureCause)) application.Status.RetryCount = 0 s.updateApplicationPhase(application, v1beta1.FlinkApplicationRecovering) return statusChanged, nil From d5543d7a58a15ff1feee5d7359fd88eef532fda0 Mon Sep 17 00:00:00 2001 From: glaksh100 Date: Wed, 26 Feb 2020 18:29:13 -0800 Subject: [PATCH 05/41] fix bug --- pkg/controller/flinkapplication/flink_state_machine.go | 1 - 1 file changed, 1 deletion(-) diff --git a/pkg/controller/flinkapplication/flink_state_machine.go b/pkg/controller/flinkapplication/flink_state_machine.go index d91c4328..9ceec478 100644 --- a/pkg/controller/flinkapplication/flink_state_machine.go +++ b/pkg/controller/flinkapplication/flink_state_machine.go @@ -242,7 +242,6 @@ func (s *FlinkStateMachine) handleNewOrUpdating(ctx context.Context, application logger.Errorf(ctx, "Cluster creation failed with error: %v", err) return statusUnchanged, err } - application.Status.DesiredApplicationCount = application.Status.DesiredApplicationCount + 1 s.updateApplicationPhase(application, v1beta1.FlinkApplicationClusterStarting) return statusChanged, nil } From 2d7919e3791f6f4f479b7d8b3259ce0185ff271e Mon Sep 17 00:00:00 2001 From: glaksh100 Date: Wed, 26 Feb 2020 19:26:44 -0800 Subject: [PATCH 06/41] Fixes --- integ/utils/utils.go | 2 +- pkg/controller/flink/flink.go | 19 ++-- pkg/controller/flink/flink_test.go | 89 +++++++++---------- .../flinkapplication/flink_state_machine.go | 1 + .../flink_state_machine_test.go | 22 ++--- 5 files changed, 68 insertions(+), 65 deletions(-) diff --git a/integ/utils/utils.go b/integ/utils/utils.go index 5d7c834d..a3d1f149 100644 --- a/integ/utils/utils.go +++ b/integ/utils/utils.go @@ -429,7 +429,7 @@ func (f *TestUtil) WaitForAllTasksRunning(name string) error { return err } - endpoint := fmt.Sprintf("jobs/%s", flinkApp.Status.ApplicationStatus[0].JobStatus.JobID) + endpoint := fmt.Sprintf("jobs/%s", flinkApp.Status.ApplicationStatus[f.GetCurrentStatusIndex(flinkApp)].JobStatus.JobID) for { res, err := f.FlinkAPIGet(flinkApp, endpoint) if err != nil { diff --git a/pkg/controller/flink/flink.go b/pkg/controller/flink/flink.go index 7c6618c8..96c25b3e 100644 --- a/pkg/controller/flink/flink.go +++ b/pkg/controller/flink/flink.go @@ -173,7 +173,7 @@ func getClusterOverviewURL(app *v1beta1.FlinkApplication) string { func getJobOverviewURL(app *v1beta1.FlinkApplication) string { externalURL := getExternalURLFromApp(app) if externalURL != "" { - return fmt.Sprintf(externalURL+client.WebUIAnchor+client.GetJobsOverviewURL, app.Status.ApplicationStatus[0].JobStatus.JobID) + return fmt.Sprintf(externalURL+client.WebUIAnchor+client.GetJobsOverviewURL, app.Status.ApplicationStatus[getCurrentStatusIndex(app)].JobStatus.JobID) } return "" } @@ -229,7 +229,7 @@ func (f *Controller) GetJobForApplication(ctx context.Context, application *v1be return nil, nil } - jobResponse, err := f.flinkClient.GetJobOverview(ctx, getURLFromApp(application, hash), application.Status.ApplicationStatus[0].JobStatus.JobID) + jobResponse, err := f.flinkClient.GetJobOverview(ctx, getURLFromApp(application, hash), f.GetLatestJobID(ctx, application)) if err != nil { return nil, err } @@ -467,14 +467,15 @@ func (f *Controller) DeleteOldResourcesForApp(ctx context.Context, app *v1beta1. } func (f *Controller) FindExternalizedCheckpoint(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (string, error) { - checkpoint, err := f.flinkClient.GetLatestCheckpoint(ctx, getURLFromApp(application, hash), application.Status.ApplicationStatus[0].JobStatus.JobID) + checkpoint, err := f.flinkClient.GetLatestCheckpoint(ctx, getURLFromApp(application, hash), f.GetLatestJobID(ctx, application)) var checkpointPath string var checkpointTime int64 if err != nil { + jobStatus := f.GetLatestJobStatus(ctx, application) // we failed to query the JM, try to pull it out of the resource - if application.Status.ApplicationStatus[0].JobStatus.LastCheckpointPath != "" && application.Status.ApplicationStatus[0].JobStatus.LastCheckpointTime != nil { - checkpointPath = application.Status.ApplicationStatus[0].JobStatus.LastCheckpointPath - checkpointTime = application.Status.ApplicationStatus[0].JobStatus.LastCheckpointTime.Unix() + if jobStatus.LastCheckpointPath != "" && jobStatus.LastCheckpointTime != nil { + checkpointPath = jobStatus.LastCheckpointPath + checkpointTime = jobStatus.LastCheckpointTime.Unix() logger.Warnf(ctx, "Could not query JobManager for latest externalized checkpoint, using"+ " last seen checkpoint") } else { @@ -544,7 +545,7 @@ func (f *Controller) CompareAndUpdateClusterStatus(ctx context.Context, applicat application.Status.ApplicationStatus[currIndex].ClusterStatus.Health = v1beta1.Yellow } - return !apiequality.Semantic.DeepEqual(oldClusterStatus, application.Status.ApplicationStatus[0].ClusterStatus), nil + return !apiequality.Semantic.DeepEqual(oldClusterStatus, application.Status.ApplicationStatus[currIndex].ClusterStatus), nil } func (f *Controller) GetLatestClusterStatus(ctx context.Context, application *v1beta1.FlinkApplication) v1beta1.FlinkClusterStatus { @@ -594,11 +595,11 @@ func (f *Controller) CompareAndUpdateJobStatus(ctx context.Context, app *v1beta1 oldJobStatus := app.Status.ApplicationStatus[currIndex].JobStatus app.Status.ApplicationStatus[currIndex].JobStatus.JobID = oldJobStatus.JobID - jobResponse, err := f.flinkClient.GetJobOverview(ctx, getURLFromApp(app, hash), app.Status.ApplicationStatus[0].JobStatus.JobID) + jobResponse, err := f.flinkClient.GetJobOverview(ctx, getURLFromApp(app, hash), f.GetLatestJobID(ctx, app)) if err != nil { return false, err } - checkpoints, err := f.flinkClient.GetCheckpointCounts(ctx, getURLFromApp(app, hash), app.Status.ApplicationStatus[0].JobStatus.JobID) + checkpoints, err := f.flinkClient.GetCheckpointCounts(ctx, getURLFromApp(app, hash), f.GetLatestJobID(ctx, app)) if err != nil { return false, err } diff --git a/pkg/controller/flink/flink_test.go b/pkg/controller/flink/flink_test.go index d7f8224d..1c25c5b9 100644 --- a/pkg/controller/flink/flink_test.go +++ b/pkg/controller/flink/flink_test.go @@ -72,7 +72,6 @@ func getFlinkTestApp() v1beta1.FlinkApplication { }, }) app.Status.ApplicationStatus = statuses - //app.Status.ApplicationStatus[0].JobStatus.JobID = testJobID app.Spec.Image = testImage app.Spec.FlinkVersion = testFlinkVersion app.Status.DesiredApplicationCount = 1 @@ -579,7 +578,7 @@ func TestGetJobsForApplicationErr(t *testing.T) { func TestFindExternalizedCheckpoint(t *testing.T) { flinkControllerForTest := getTestFlinkController() flinkApp := getFlinkTestApp() - flinkApp.Status.ApplicationStatus[0].JobStatus.JobID = "jobid" + flinkControllerForTest.UpdateLatestJobID(context.Background(), &flinkApp, "jobid") mockJmClient := flinkControllerForTest.flinkClient.(*clientMock.JobManagerClient) mockJmClient.GetLatestCheckpointFunc = func(ctx context.Context, url string, jobId string) (*client.CheckpointStatistics, error) { @@ -599,10 +598,10 @@ func TestFindExternalizedCheckpoint(t *testing.T) { func TestFindExternalizedCheckpointFromStatus(t *testing.T) { flinkControllerForTest := getTestFlinkController() flinkApp := getFlinkTestApp() - flinkApp.Status.ApplicationStatus[0].JobStatus.JobID = "jobid" - flinkApp.Status.ApplicationStatus[0].JobStatus.LastCheckpointPath = "/tmp/checkpoint" + flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].JobStatus.JobID = "jobid" + flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].JobStatus.LastCheckpointPath = "/tmp/checkpoint" checkpointTime := metaV1.Now() - flinkApp.Status.ApplicationStatus[0].JobStatus.LastCheckpointTime = &checkpointTime + flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].JobStatus.LastCheckpointTime = &checkpointTime mockJmClient := flinkControllerForTest.flinkClient.(*clientMock.JobManagerClient) mockJmClient.GetLatestCheckpointFunc = func(ctx context.Context, url string, jobId string) (*client.CheckpointStatistics, error) { @@ -668,22 +667,22 @@ func TestClusterStatusUpdated(t *testing.T) { _, err = flinkControllerForTest.CompareAndUpdateClusterStatus(context.Background(), &flinkApp, "hash") assert.Nil(t, err) - assert.Equal(t, int32(1), flinkApp.Status.ApplicationStatus[0].ClusterStatus.NumberOfTaskSlots) - assert.Equal(t, int32(0), flinkApp.Status.ApplicationStatus[0].ClusterStatus.AvailableTaskSlots) - assert.Equal(t, int32(1), flinkApp.Status.ApplicationStatus[0].ClusterStatus.HealthyTaskManagers) - assert.Equal(t, v1beta1.Green, flinkApp.Status.ApplicationStatus[0].ClusterStatus.Health) - assert.Equal(t, "app-name.lyft.xyz/#/overview", flinkApp.Status.ApplicationStatus[0].ClusterStatus.ClusterOverviewURL) + assert.Equal(t, int32(1), flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].ClusterStatus.NumberOfTaskSlots) + assert.Equal(t, int32(0), flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].ClusterStatus.AvailableTaskSlots) + assert.Equal(t, int32(1), flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].ClusterStatus.HealthyTaskManagers) + assert.Equal(t, v1beta1.Green, flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].ClusterStatus.Health) + assert.Equal(t, "app-name.lyft.xyz/#/overview", flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].ClusterStatus.ClusterOverviewURL) } func TestNoClusterStatusChange(t *testing.T) { flinkControllerForTest := getTestFlinkController() flinkApp := getFlinkTestApp() - flinkApp.Status.ApplicationStatus[0].ClusterStatus.NumberOfTaskSlots = int32(1) - flinkApp.Status.ApplicationStatus[0].ClusterStatus.AvailableTaskSlots = int32(0) - flinkApp.Status.ApplicationStatus[0].ClusterStatus.HealthyTaskManagers = int32(1) - flinkApp.Status.ApplicationStatus[0].ClusterStatus.Health = v1beta1.Green - flinkApp.Status.ApplicationStatus[0].ClusterStatus.NumberOfTaskManagers = int32(1) + flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].ClusterStatus.NumberOfTaskSlots = int32(1) + flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].ClusterStatus.AvailableTaskSlots = int32(0) + flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].ClusterStatus.HealthyTaskManagers = int32(1) + flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].ClusterStatus.Health = v1beta1.Green + flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].ClusterStatus.NumberOfTaskManagers = int32(1) mockK8Cluster := flinkControllerForTest.k8Cluster.(*k8mock.K8Cluster) mockK8Cluster.GetDeploymentsWithLabelFunc = func(ctx context.Context, namespace string, labelMap map[string]string) (*v1.DeploymentList, error) { tmDeployment := FetchTaskMangerDeploymentCreateObj(&flinkApp, testAppHash) @@ -772,10 +771,10 @@ func TestHealthyTaskmanagers(t *testing.T) { _, err := flinkControllerForTest.CompareAndUpdateClusterStatus(context.Background(), &flinkApp, hash) assert.Nil(t, err) - assert.Equal(t, int32(1), flinkApp.Status.ApplicationStatus[0].ClusterStatus.NumberOfTaskSlots) - assert.Equal(t, int32(0), flinkApp.Status.ApplicationStatus[0].ClusterStatus.AvailableTaskSlots) - assert.Equal(t, int32(0), flinkApp.Status.ApplicationStatus[0].ClusterStatus.HealthyTaskManagers) - assert.Equal(t, v1beta1.Yellow, flinkApp.Status.ApplicationStatus[0].ClusterStatus.Health) + assert.Equal(t, int32(1), flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].ClusterStatus.NumberOfTaskSlots) + assert.Equal(t, int32(0), flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].ClusterStatus.AvailableTaskSlots) + assert.Equal(t, int32(0), flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].ClusterStatus.HealthyTaskManagers) + assert.Equal(t, v1beta1.Yellow, flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].ClusterStatus.Health) } @@ -832,26 +831,26 @@ func TestJobStatusUpdated(t *testing.T) { }, nil } - flinkApp.Status.ApplicationStatus[0].JobStatus.JobID = "abc" + flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].JobStatus.JobID = "abc" expectedTime := metaV1.NewTime(time.Unix(startTime/1000, 0)) _, err = flinkControllerForTest.CompareAndUpdateJobStatus(context.Background(), &flinkApp, "hash") assert.Nil(t, err) - assert.Equal(t, v1beta1.Running, flinkApp.Status.ApplicationStatus[0].JobStatus.State) - assert.Equal(t, &expectedTime, flinkApp.Status.ApplicationStatus[0].JobStatus.StartTime) - assert.Equal(t, v1beta1.Yellow, flinkApp.Status.ApplicationStatus[0].JobStatus.Health) + assert.Equal(t, v1beta1.Running, flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].JobStatus.State) + assert.Equal(t, &expectedTime, flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].JobStatus.StartTime) + assert.Equal(t, v1beta1.Yellow, flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].JobStatus.Health) - assert.Equal(t, int32(0), flinkApp.Status.ApplicationStatus[0].JobStatus.FailedCheckpointCount) - assert.Equal(t, int32(4), flinkApp.Status.ApplicationStatus[0].JobStatus.CompletedCheckpointCount) - assert.Equal(t, int32(1), flinkApp.Status.ApplicationStatus[0].JobStatus.JobRestartCount) - assert.Equal(t, &expectedTime, flinkApp.Status.ApplicationStatus[0].JobStatus.RestoreTime) + assert.Equal(t, int32(0), flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].JobStatus.FailedCheckpointCount) + assert.Equal(t, int32(4), flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].JobStatus.CompletedCheckpointCount) + assert.Equal(t, int32(1), flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].JobStatus.JobRestartCount) + assert.Equal(t, &expectedTime, flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].JobStatus.RestoreTime) - assert.Equal(t, "/test/externalpath", flinkApp.Status.ApplicationStatus[0].JobStatus.RestorePath) - assert.Equal(t, &expectedTime, flinkApp.Status.ApplicationStatus[0].JobStatus.LastCheckpointTime) - assert.Equal(t, "app-name.lyft.xyz/#/jobs/abc", flinkApp.Status.ApplicationStatus[0].JobStatus.JobOverviewURL) + assert.Equal(t, "/test/externalpath", flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].JobStatus.RestorePath) + assert.Equal(t, &expectedTime, flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].JobStatus.LastCheckpointTime) + assert.Equal(t, "app-name.lyft.xyz/#/jobs/abc", flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].JobStatus.JobOverviewURL) - assert.Equal(t, int32(2), flinkApp.Status.ApplicationStatus[0].JobStatus.RunningTasks) - assert.Equal(t, int32(7), flinkApp.Status.ApplicationStatus[0].JobStatus.TotalTasks) + assert.Equal(t, int32(2), flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].JobStatus.RunningTasks) + assert.Equal(t, int32(7), flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].JobStatus.TotalTasks) } @@ -866,16 +865,16 @@ func TestNoJobStatusChange(t *testing.T) { app1 := getFlinkTestApp() mockJmClient := flinkControllerForTest.flinkClient.(*clientMock.JobManagerClient) - app1.Status.ApplicationStatus[0].JobStatus.State = v1beta1.Running - app1.Status.ApplicationStatus[0].JobStatus.StartTime = &metaTime - app1.Status.ApplicationStatus[0].JobStatus.LastCheckpointTime = &metaTime - app1.Status.ApplicationStatus[0].JobStatus.CompletedCheckpointCount = int32(4) - app1.Status.ApplicationStatus[0].JobStatus.JobRestartCount = int32(1) - app1.Status.ApplicationStatus[0].JobStatus.FailedCheckpointCount = int32(0) - app1.Status.ApplicationStatus[0].JobStatus.Health = v1beta1.Green - app1.Status.ApplicationStatus[0].JobStatus.RestoreTime = &metaTime - app1.Status.ApplicationStatus[0].JobStatus.RestorePath = "/test/externalpath" - app1.Status.ApplicationStatus[0].JobStatus.JobOverviewURL = "" + app1.Status.ApplicationStatus[getCurrentStatusIndex(&app1)].JobStatus.State = v1beta1.Running + app1.Status.ApplicationStatus[getCurrentStatusIndex(&app1)].JobStatus.StartTime = &metaTime + app1.Status.ApplicationStatus[getCurrentStatusIndex(&app1)].JobStatus.LastCheckpointTime = &metaTime + app1.Status.ApplicationStatus[getCurrentStatusIndex(&app1)].JobStatus.CompletedCheckpointCount = int32(4) + app1.Status.ApplicationStatus[getCurrentStatusIndex(&app1)].JobStatus.JobRestartCount = int32(1) + app1.Status.ApplicationStatus[getCurrentStatusIndex(&app1)].JobStatus.FailedCheckpointCount = int32(0) + app1.Status.ApplicationStatus[getCurrentStatusIndex(&app1)].JobStatus.Health = v1beta1.Green + app1.Status.ApplicationStatus[getCurrentStatusIndex(&app1)].JobStatus.RestoreTime = &metaTime + app1.Status.ApplicationStatus[getCurrentStatusIndex(&app1)].JobStatus.RestorePath = "/test/externalpath" + app1.Status.ApplicationStatus[getCurrentStatusIndex(&app1)].JobStatus.JobOverviewURL = "" mockJmClient.GetJobOverviewFunc = func(ctx context.Context, url string, jobID string) (*client.FlinkJobOverview, error) { assert.Equal(t, url, "http://app-name-hash.ns:8081") @@ -918,8 +917,8 @@ func TestGetAndUpdateJobStatusHealth(t *testing.T) { app1 := getFlinkTestApp() mockJmClient := flinkControllerForTest.flinkClient.(*clientMock.JobManagerClient) - app1.Status.ApplicationStatus[0].JobStatus.State = v1beta1.Failing - app1.Status.ApplicationStatus[0].JobStatus.LastFailingTime = &lastFailedTime + app1.Status.ApplicationStatus[getCurrentStatusIndex(&app1)].JobStatus.State = v1beta1.Failing + app1.Status.ApplicationStatus[getCurrentStatusIndex(&app1)].JobStatus.LastFailingTime = &lastFailedTime mockJmClient.GetJobOverviewFunc = func(ctx context.Context, url string, jobID string) (*client.FlinkJobOverview, error) { assert.Equal(t, url, "http://app-name-hash.ns:8081") @@ -944,7 +943,7 @@ func TestGetAndUpdateJobStatusHealth(t *testing.T) { assert.Nil(t, err) // Job is in a RUNNING state but was in a FAILING state in the last 1 minute, so we expect // JobStatus.Health to be Red - assert.Equal(t, app1.Status.ApplicationStatus[0].JobStatus.Health, v1beta1.Red) + assert.Equal(t, app1.Status.ApplicationStatus[getCurrentStatusIndex(&app1)].JobStatus.Health, v1beta1.Red) } diff --git a/pkg/controller/flinkapplication/flink_state_machine.go b/pkg/controller/flinkapplication/flink_state_machine.go index 9ceec478..4765a51c 100644 --- a/pkg/controller/flinkapplication/flink_state_machine.go +++ b/pkg/controller/flinkapplication/flink_state_machine.go @@ -356,6 +356,7 @@ func (s *FlinkStateMachine) handleApplicationSavepointing(ctx context.Context, a savepointStatusResponse.Operation.Location)) application.Status.SavepointPath = savepointStatusResponse.Operation.Location s.flinkController.UpdateLatestJobID(ctx, application, "") + application.Status.RunningJobs = application.Status.RunningJobs - 1 s.updateApplicationPhase(application, v1beta1.FlinkApplicationSubmittingJob) return statusChanged, nil } diff --git a/pkg/controller/flinkapplication/flink_state_machine_test.go b/pkg/controller/flinkapplication/flink_state_machine_test.go index 4e69b38f..d241b05c 100644 --- a/pkg/controller/flinkapplication/flink_state_machine_test.go +++ b/pkg/controller/flinkapplication/flink_state_machine_test.go @@ -364,14 +364,15 @@ func TestSubmittingToRunning(t *testing.T) { mockK8Cluster.UpdateStatusFunc = func(ctx context.Context, object runtime.Object) error { if statusUpdateCount == 0 { application := object.(*v1beta1.FlinkApplication) - assert.Equal(t, jobID, application.Status.ApplicationStatus[0].JobStatus.JobID) + assert.Equal(t, jobID, mockFlinkController.GetLatestJobID(ctx, application)) } else if statusUpdateCount == 1 { application := object.(*v1beta1.FlinkApplication) assert.Equal(t, appHash, application.Status.DeployHash) - assert.Equal(t, app.Spec.JarName, app.Status.ApplicationStatus[0].JobStatus.JarName) - assert.Equal(t, app.Spec.Parallelism, app.Status.ApplicationStatus[0].JobStatus.Parallelism) - assert.Equal(t, app.Spec.EntryClass, app.Status.ApplicationStatus[0].JobStatus.EntryClass) - assert.Equal(t, app.Spec.ProgramArgs, app.Status.ApplicationStatus[0].JobStatus.ProgramArgs) + jobStatus := mockFlinkController.GetLatestJobStatus(ctx, application) + assert.Equal(t, app.Spec.JarName, jobStatus.JarName) + assert.Equal(t, app.Spec.Parallelism, jobStatus.Parallelism) + assert.Equal(t, app.Spec.EntryClass, jobStatus.EntryClass) + assert.Equal(t, app.Spec.ProgramArgs, jobStatus.ProgramArgs) assert.Equal(t, v1beta1.FlinkApplicationRunning, application.Status.Phase) } statusUpdateCount++ @@ -478,11 +479,12 @@ func TestRollingBack(t *testing.T) { startCalled = true assert.Equal(t, "old-hash", hash) - assert.Equal(t, app.Status.ApplicationStatus[0].JobStatus.JarName, jarName) - assert.Equal(t, app.Status.ApplicationStatus[0].JobStatus.Parallelism, parallelism) - assert.Equal(t, app.Status.ApplicationStatus[0].JobStatus.EntryClass, entryClass) - assert.Equal(t, app.Status.ApplicationStatus[0].JobStatus.ProgramArgs, programArgs) - assert.Equal(t, app.Status.ApplicationStatus[0].JobStatus.AllowNonRestoredState, allowNonRestoredState) + jobStatus := mockFlinkController.GetLatestJobStatus(ctx, application) + assert.Equal(t, jobStatus.JarName, jarName) + assert.Equal(t, jobStatus.Parallelism, parallelism) + assert.Equal(t, jobStatus.EntryClass, entryClass) + assert.Equal(t, jobStatus.ProgramArgs, programArgs) + assert.Equal(t, jobStatus.AllowNonRestoredState, allowNonRestoredState) assert.Equal(t, app.Status.SavepointPath, savepointPath) return jobID, nil } From f89a2e105e55804e6204915132754c35bea5c9df Mon Sep 17 00:00:00 2001 From: glaksh100 Date: Wed, 26 Feb 2020 22:06:57 -0800 Subject: [PATCH 07/41] Make running jobs calculation idempotent --- .../flinkapplication/flink_state_machine.go | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/pkg/controller/flinkapplication/flink_state_machine.go b/pkg/controller/flinkapplication/flink_state_machine.go index 4765a51c..e1e24ee3 100644 --- a/pkg/controller/flinkapplication/flink_state_machine.go +++ b/pkg/controller/flinkapplication/flink_state_machine.go @@ -539,12 +539,12 @@ func (s *FlinkStateMachine) handleSubmittingJob(ctx context.Context, app *v1beta app.Status.DeployHash = hash app.Status.SavepointPath = "" app.Status.SavepointTriggerID = "" + app.Status.RunningJobs = getRunningJobs(app) app.Status.ApplicationStatus[app.Status.RunningJobs].JobStatus.JarName = app.Spec.JarName app.Status.ApplicationStatus[app.Status.RunningJobs].JobStatus.Parallelism = app.Spec.Parallelism app.Status.ApplicationStatus[app.Status.RunningJobs].JobStatus.EntryClass = app.Spec.EntryClass app.Status.ApplicationStatus[app.Status.RunningJobs].JobStatus.ProgramArgs = app.Spec.ProgramArgs app.Status.ApplicationStatus[app.Status.RunningJobs].JobStatus.AllowNonRestoredState = app.Spec.AllowNonRestoredState - app.Status.RunningJobs = app.Status.RunningJobs + 1 s.updateApplicationPhase(app, v1beta1.FlinkApplicationRunning) return statusChanged, nil } @@ -552,6 +552,16 @@ func (s *FlinkStateMachine) handleSubmittingJob(ctx context.Context, app *v1beta return statusUnchanged, nil } +func getRunningJobs(app *v1beta1.FlinkApplication) int32 { + runningJobs := 0 + for _, status := range app.Status.ApplicationStatus { + if status.JobStatus.State == v1beta1.Running { + runningJobs++ + } + } + return int32(runningJobs) +} + // Something has gone wrong during the update, post job-cancellation (and cluster tear-down in single mode). We need // to try to get things back into a working state func (s *FlinkStateMachine) handleRollingBack(ctx context.Context, app *v1beta1.FlinkApplication) (bool, error) { From 469cc96b50a8dae50303a77151552784a3566b32 Mon Sep 17 00:00:00 2001 From: glaksh100 Date: Wed, 26 Feb 2020 22:30:01 -0800 Subject: [PATCH 08/41] Fix bugs --- pkg/controller/flinkapplication/flink_state_machine.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/controller/flinkapplication/flink_state_machine.go b/pkg/controller/flinkapplication/flink_state_machine.go index e1e24ee3..ae836249 100644 --- a/pkg/controller/flinkapplication/flink_state_machine.go +++ b/pkg/controller/flinkapplication/flink_state_machine.go @@ -539,12 +539,12 @@ func (s *FlinkStateMachine) handleSubmittingJob(ctx context.Context, app *v1beta app.Status.DeployHash = hash app.Status.SavepointPath = "" app.Status.SavepointTriggerID = "" - app.Status.RunningJobs = getRunningJobs(app) app.Status.ApplicationStatus[app.Status.RunningJobs].JobStatus.JarName = app.Spec.JarName app.Status.ApplicationStatus[app.Status.RunningJobs].JobStatus.Parallelism = app.Spec.Parallelism app.Status.ApplicationStatus[app.Status.RunningJobs].JobStatus.EntryClass = app.Spec.EntryClass app.Status.ApplicationStatus[app.Status.RunningJobs].JobStatus.ProgramArgs = app.Spec.ProgramArgs app.Status.ApplicationStatus[app.Status.RunningJobs].JobStatus.AllowNonRestoredState = app.Spec.AllowNonRestoredState + app.Status.RunningJobs = getRunningJobs(app) s.updateApplicationPhase(app, v1beta1.FlinkApplicationRunning) return statusChanged, nil } @@ -555,7 +555,7 @@ func (s *FlinkStateMachine) handleSubmittingJob(ctx context.Context, app *v1beta func getRunningJobs(app *v1beta1.FlinkApplication) int32 { runningJobs := 0 for _, status := range app.Status.ApplicationStatus { - if status.JobStatus.State == v1beta1.Running { + if status.JobStatus.JobID != "" { runningJobs++ } } From 0409316e775c120855edd51c503a925d878d206a Mon Sep 17 00:00:00 2001 From: glaksh100 Date: Wed, 26 Feb 2020 22:54:46 -0800 Subject: [PATCH 09/41] Reset running jobs in recovering phase --- pkg/controller/flinkapplication/flink_state_machine.go | 1 + 1 file changed, 1 insertion(+) diff --git a/pkg/controller/flinkapplication/flink_state_machine.go b/pkg/controller/flinkapplication/flink_state_machine.go index ae836249..456bc041 100644 --- a/pkg/controller/flinkapplication/flink_state_machine.go +++ b/pkg/controller/flinkapplication/flink_state_machine.go @@ -394,6 +394,7 @@ func (s *FlinkStateMachine) handleApplicationRecovering(ctx context.Context, app app.Status.SavepointPath = path s.flinkController.UpdateLatestJobID(ctx, app, "") + app.Status.RunningJobs = getRunningJobs(app) s.updateApplicationPhase(app, v1beta1.FlinkApplicationSubmittingJob) return statusChanged, nil } From 57882da15aed171b7fbf2f390cd993eb0740b797 Mon Sep 17 00:00:00 2001 From: glaksh100 Date: Thu, 27 Feb 2020 10:30:01 -0800 Subject: [PATCH 10/41] Make status index calculation simpler --- integ/utils/utils.go | 12 ++------ pkg/apis/app/v1beta1/types.go | 1 - pkg/controller/flink/flink.go | 28 +++++++++++-------- pkg/controller/flink/mock/mock_flink.go | 17 ++++++++--- .../flinkapplication/flink_state_machine.go | 25 +++++------------ 5 files changed, 39 insertions(+), 44 deletions(-) diff --git a/integ/utils/utils.go b/integ/utils/utils.go index a3d1f149..fdf2150f 100644 --- a/integ/utils/utils.go +++ b/integ/utils/utils.go @@ -485,15 +485,9 @@ func (f *TestUtil) Update(name string, updateFn func(app *flinkapp.FlinkApplicat func (f *TestUtil) GetCurrentStatusIndex(app *flinkapp.FlinkApplication) int32 { desiredCount := app.Status.DesiredApplicationCount - runningJobs := app.Status.RunningJobs - // We're still trying to bring up jobs to match desired count - // so the current status will append - // to the existing array - if runningJobs != desiredCount && app.Status.Phase != "Running" { - return runningJobs + if app.Status.Phase != "Running" { + return 0 } - // We've spun up required number of jobs, so the status points to the last - // appended value. - return runningJobs - 1 + return desiredCount - 1 } diff --git a/pkg/apis/app/v1beta1/types.go b/pkg/apis/app/v1beta1/types.go index 25a378b8..e15044b0 100644 --- a/pkg/apis/app/v1beta1/types.go +++ b/pkg/apis/app/v1beta1/types.go @@ -170,7 +170,6 @@ type FlinkApplicationStatus struct { LastUpdatedAt *metav1.Time `json:"lastUpdatedAt,omitempty"` Reason string `json:"reason,omitempty"` DesiredApplicationCount int32 `json:"desiredApplicationCount,omitempty"` - RunningJobs int32 `json:"runningJobs,omitempty"` DeployVersion string `json:"deployVersion,omitempty"` UpdatingVersion string `json:"updatingVersion,omitempty"` ApplicationStatus []FlinkApplicationVersionStatus `json:"appStatus,omitempty"` diff --git a/pkg/controller/flink/flink.go b/pkg/controller/flink/flink.go index 96c25b3e..56ad226b 100644 --- a/pkg/controller/flink/flink.go +++ b/pkg/controller/flink/flink.go @@ -105,6 +105,9 @@ type ControllerInterface interface { // Updates the jobID on the latest jobStatus UpdateLatestJobID(ctx context.Context, app *v1beta1.FlinkApplication, jobID string) + + // Update jobStatus on the latest ApplicationStatus + UpdateLatestJobStatus(ctx context.Context, app *v1beta1.FlinkApplication, jobStatus v1beta1.FlinkJobStatus) } func NewController(k8sCluster k8.ClusterInterface, eventRecorder record.EventRecorder, config controllerConfig.RuntimeConfig) ControllerInterface { @@ -145,6 +148,10 @@ type Controller struct { eventRecorder record.EventRecorder } +func (f *Controller) UpdateLatestJobStatus(ctx context.Context, app *v1beta1.FlinkApplication, jobStatus v1beta1.FlinkJobStatus) { + app.Status.ApplicationStatus[getCurrentStatusIndex(app)].JobStatus = jobStatus +} + func (f *Controller) GetLatestJobID(ctx context.Context, application *v1beta1.FlinkApplication) string { return application.Status.ApplicationStatus[getCurrentStatusIndex(application)].JobStatus.JobID } @@ -572,18 +579,15 @@ func getHealthyTaskManagerCount(response *client.TaskManagersResponse) int32 { } func getCurrentStatusIndex(app *v1beta1.FlinkApplication) int32 { - desiredCount := app.Status.DesiredApplicationCount - runningJobs := app.Status.RunningJobs - // We're still trying to bring up jobs to match desired count - // so the current status will append - // to the existing array - if runningJobs != desiredCount && !v1beta1.IsRunningPhase(app.Status.Phase) { - return runningJobs - } - - // We've spun up required number of jobs, so the status points to the last - // appended value. - return runningJobs - indexOffset + // In the Running phase, we always have only 1 job + if v1beta1.IsRunningPhase(app.Status.Phase) { + return 0 + } + + // In every other state, we either have + // Dual mode --> One Application status object + // BlueGreen mode --> Two Application status objects + return app.Status.DesiredApplicationCount - indexOffset } func (f *Controller) CompareAndUpdateJobStatus(ctx context.Context, app *v1beta1.FlinkApplication, hash string) (bool, error) { diff --git a/pkg/controller/flink/mock/mock_flink.go b/pkg/controller/flink/mock/mock_flink.go index 1c483487..1e5ade66 100644 --- a/pkg/controller/flink/mock/mock_flink.go +++ b/pkg/controller/flink/mock/mock_flink.go @@ -28,6 +28,7 @@ type GetLatestClusterStatusFunc func(ctx context.Context, app *v1beta1.FlinkAppl type GetLatestJobStatusFunc func(ctx context.Context, app *v1beta1.FlinkApplication) v1beta1.FlinkJobStatus type GetLatestJobIDFunc func(ctx context.Context, app *v1beta1.FlinkApplication) string type UpdateLatestJobIDFunc func(ctx context.Context, app *v1beta1.FlinkApplication, jobID string) +type UpdateLatestJobStatusFunc func(ctx context.Context, app *v1beta1.FlinkApplication, jobStatus v1beta1.FlinkJobStatus) type FlinkController struct { CreateClusterFunc CreateClusterFunc @@ -49,6 +50,7 @@ type FlinkController struct { GetLatestJobStatusFunc GetLatestJobStatusFunc GetLatestJobIDFunc GetLatestJobIDFunc UpdateLatestJobIDFunc UpdateLatestJobIDFunc + UpdateLatestJobStatusFunc UpdateLatestJobStatusFunc } func (m *FlinkController) GetCurrentDeploymentsForApp(ctx context.Context, application *v1beta1.FlinkApplication) (*common.FlinkDeployment, error) { @@ -197,12 +199,19 @@ func (m *FlinkController) UpdateLatestJobID(ctx context.Context, application *v1 application.Status.ApplicationStatus[getCurrentStatusIndex(application)].JobStatus.JobID = jobID } +func (m *FlinkController) UpdateLatestJobStatus(ctx context.Context, application *v1beta1.FlinkApplication, jobStatus v1beta1.FlinkJobStatus) { + if m.UpdateLatestJobStatusFunc != nil { + m.UpdateLatestJobStatusFunc(ctx, application, jobStatus) + } + + application.Status.ApplicationStatus[getCurrentStatusIndex(application)].JobStatus = jobStatus +} + func getCurrentStatusIndex(app *v1beta1.FlinkApplication) int32 { desiredCount := app.Status.DesiredApplicationCount - runningJobs := app.Status.RunningJobs - if runningJobs != desiredCount { - return runningJobs + if v1beta1.IsRunningPhase(app.Status.Phase) { + return 0 } - return runningJobs - 1 + return desiredCount - 1 } diff --git a/pkg/controller/flinkapplication/flink_state_machine.go b/pkg/controller/flinkapplication/flink_state_machine.go index 456bc041..5ee94847 100644 --- a/pkg/controller/flinkapplication/flink_state_machine.go +++ b/pkg/controller/flinkapplication/flink_state_machine.go @@ -356,7 +356,6 @@ func (s *FlinkStateMachine) handleApplicationSavepointing(ctx context.Context, a savepointStatusResponse.Operation.Location)) application.Status.SavepointPath = savepointStatusResponse.Operation.Location s.flinkController.UpdateLatestJobID(ctx, application, "") - application.Status.RunningJobs = application.Status.RunningJobs - 1 s.updateApplicationPhase(application, v1beta1.FlinkApplicationSubmittingJob) return statusChanged, nil } @@ -394,7 +393,6 @@ func (s *FlinkStateMachine) handleApplicationRecovering(ctx context.Context, app app.Status.SavepointPath = path s.flinkController.UpdateLatestJobID(ctx, app, "") - app.Status.RunningJobs = getRunningJobs(app) s.updateApplicationPhase(app, v1beta1.FlinkApplicationSubmittingJob) return statusChanged, nil } @@ -540,12 +538,13 @@ func (s *FlinkStateMachine) handleSubmittingJob(ctx context.Context, app *v1beta app.Status.DeployHash = hash app.Status.SavepointPath = "" app.Status.SavepointTriggerID = "" - app.Status.ApplicationStatus[app.Status.RunningJobs].JobStatus.JarName = app.Spec.JarName - app.Status.ApplicationStatus[app.Status.RunningJobs].JobStatus.Parallelism = app.Spec.Parallelism - app.Status.ApplicationStatus[app.Status.RunningJobs].JobStatus.EntryClass = app.Spec.EntryClass - app.Status.ApplicationStatus[app.Status.RunningJobs].JobStatus.ProgramArgs = app.Spec.ProgramArgs - app.Status.ApplicationStatus[app.Status.RunningJobs].JobStatus.AllowNonRestoredState = app.Spec.AllowNonRestoredState - app.Status.RunningJobs = getRunningJobs(app) + jobStatus := s.flinkController.GetLatestJobStatus(ctx, app) + jobStatus.JarName = app.Spec.JarName + jobStatus.Parallelism = app.Spec.Parallelism + jobStatus.EntryClass = app.Spec.EntryClass + jobStatus.ProgramArgs = app.Spec.ProgramArgs + jobStatus.AllowNonRestoredState = app.Spec.AllowNonRestoredState + s.flinkController.UpdateLatestJobStatus(ctx, app, jobStatus) s.updateApplicationPhase(app, v1beta1.FlinkApplicationRunning) return statusChanged, nil } @@ -553,16 +552,6 @@ func (s *FlinkStateMachine) handleSubmittingJob(ctx context.Context, app *v1beta return statusUnchanged, nil } -func getRunningJobs(app *v1beta1.FlinkApplication) int32 { - runningJobs := 0 - for _, status := range app.Status.ApplicationStatus { - if status.JobStatus.JobID != "" { - runningJobs++ - } - } - return int32(runningJobs) -} - // Something has gone wrong during the update, post job-cancellation (and cluster tear-down in single mode). We need // to try to get things back into a working state func (s *FlinkStateMachine) handleRollingBack(ctx context.Context, app *v1beta1.FlinkApplication) (bool, error) { From 08e5ce48ef6355ff174c98a04193ef9dc85770bd Mon Sep 17 00:00:00 2001 From: glaksh100 Date: Thu, 27 Feb 2020 17:11:44 -0800 Subject: [PATCH 11/41] Add container env and annotations --- deploy/crd.yaml | 2 +- pkg/apis/app/v1beta1/types.go | 4 + pkg/controller/flink/container_utils.go | 21 +++++ pkg/controller/flink/flink.go | 69 +++++++-------- pkg/controller/flink/flink_test.go | 1 + .../flink/job_manager_controller.go | 5 ++ .../flink/job_manager_controller_test.go | 88 +++++++++++++++++++ .../flink/task_manager_controller.go | 15 ++-- .../flink/task_manager_controller_test.go | 59 +++++++++++++ 9 files changed, 223 insertions(+), 41 deletions(-) diff --git a/deploy/crd.yaml b/deploy/crd.yaml index e5200555..7d9d4c39 100644 --- a/deploy/crd.yaml +++ b/deploy/crd.yaml @@ -415,7 +415,7 @@ spec: - name: Application Version type: string description: The version of the Flink cluster - JSONPath: .status.appStatus[*].clusterStatus.health + JSONPath: .status.appStatus[*].version - name: Cluster Health type: string description: The health of the Flink cluster diff --git a/pkg/apis/app/v1beta1/types.go b/pkg/apis/app/v1beta1/types.go index e15044b0..b6607bcb 100644 --- a/pkg/apis/app/v1beta1/types.go +++ b/pkg/apis/app/v1beta1/types.go @@ -260,6 +260,10 @@ func IsRunningPhase(phase FlinkApplicationPhase) bool { return phase == FlinkApplicationRunning || phase == FlinkApplicationDeployFailed } +func IsBlueGreenDeploymentMode(mode DeploymentMode) bool { + return mode == DeploymentModeBlueGreen +} + type DeploymentMode string const ( diff --git a/pkg/controller/flink/container_utils.go b/pkg/controller/flink/container_utils.go index 38b53bcd..e760b6f9 100644 --- a/pkg/controller/flink/container_utils.go +++ b/pkg/controller/flink/container_utils.go @@ -33,6 +33,8 @@ const ( FlinkAppHash = "flink-app-hash" FlinkJobProperties = "flink-job-properties" RestartNonce = "restart-nonce" + FlinkApplicationVersionEnv = "FLINK_APPLICATION_VERSION" + FlinkApplicationVersion = "flink-application-version" ) func getFlinkContainerName(containerName string) string { @@ -56,6 +58,9 @@ func getCommonAnnotations(app *v1beta1.FlinkApplication) map[string]string { if app.Spec.RestartNonce != "" { annotations[RestartNonce] = app.Spec.RestartNonce } + if v1beta1.IsBlueGreenDeploymentMode(app.Spec.DeploymentMode) { + annotations[FlinkApplicationVersion] = app.Status.UpdatingVersion + } return annotations } @@ -117,6 +122,7 @@ func GetFlinkContainerEnv(app *v1beta1.FlinkApplication) []v1.EnvVar { if err == nil { env = append(env, flinkEnv...) } + env = append(env, GetDeploySpecificEnv(app)...) return env } @@ -218,3 +224,18 @@ func InjectOperatorCustomizedConfig(deployment *appsv1.Deployment, app *v1beta1. } deployment.Spec.Template.Spec.Containers = newContainers } + +// Injects labels and environment variables required for blue green deploys +func GetDeploySpecificEnv(app *v1beta1.FlinkApplication) []v1.EnvVar { + if !v1beta1.IsBlueGreenDeploymentMode(app.Spec.DeploymentMode) { + return []v1.EnvVar{} + } + + return []v1.EnvVar{ + { + Name: FlinkApplicationVersionEnv, + Value: app.Status.UpdatingVersion, + }, + } + +} diff --git a/pkg/controller/flink/flink.go b/pkg/controller/flink/flink.go index 56ad226b..11c4708f 100644 --- a/pkg/controller/flink/flink.go +++ b/pkg/controller/flink/flink.go @@ -148,18 +148,6 @@ type Controller struct { eventRecorder record.EventRecorder } -func (f *Controller) UpdateLatestJobStatus(ctx context.Context, app *v1beta1.FlinkApplication, jobStatus v1beta1.FlinkJobStatus) { - app.Status.ApplicationStatus[getCurrentStatusIndex(app)].JobStatus = jobStatus -} - -func (f *Controller) GetLatestJobID(ctx context.Context, application *v1beta1.FlinkApplication) string { - return application.Status.ApplicationStatus[getCurrentStatusIndex(application)].JobStatus.JobID -} - -func (f *Controller) UpdateLatestJobID(ctx context.Context, app *v1beta1.FlinkApplication, jobID string) { - app.Status.ApplicationStatus[getCurrentStatusIndex(app)].JobStatus.JobID = jobID -} - func getURLFromApp(application *v1beta1.FlinkApplication, hash string) string { service := VersionedJobManagerServiceName(application, hash) cfg := controllerConfig.GetConfig() @@ -555,16 +543,6 @@ func (f *Controller) CompareAndUpdateClusterStatus(ctx context.Context, applicat return !apiequality.Semantic.DeepEqual(oldClusterStatus, application.Status.ApplicationStatus[currIndex].ClusterStatus), nil } -func (f *Controller) GetLatestClusterStatus(ctx context.Context, application *v1beta1.FlinkApplication) v1beta1.FlinkClusterStatus { - return application.Status.ApplicationStatus[getCurrentStatusIndex(application)].ClusterStatus - -} - -func (f *Controller) GetLatestJobStatus(ctx context.Context, application *v1beta1.FlinkApplication) v1beta1.FlinkJobStatus { - return application.Status.ApplicationStatus[getCurrentStatusIndex(application)].JobStatus - -} - func getHealthyTaskManagerCount(response *client.TaskManagersResponse) int32 { healthyTMCount := 0 for index := range response.TaskManagers { @@ -578,18 +556,6 @@ func getHealthyTaskManagerCount(response *client.TaskManagersResponse) int32 { } -func getCurrentStatusIndex(app *v1beta1.FlinkApplication) int32 { - // In the Running phase, we always have only 1 job - if v1beta1.IsRunningPhase(app.Status.Phase) { - return 0 - } - - // In every other state, we either have - // Dual mode --> One Application status object - // BlueGreen mode --> Two Application status objects - return app.Status.DesiredApplicationCount - indexOffset -} - func (f *Controller) CompareAndUpdateJobStatus(ctx context.Context, app *v1beta1.FlinkApplication, hash string) (bool, error) { currIndex := getCurrentStatusIndex(app) if app.Status.ApplicationStatus[currIndex].JobStatus.LastFailingTime == nil { @@ -674,6 +640,39 @@ func (f *Controller) CompareAndUpdateJobStatus(ctx context.Context, app *v1beta1 currTime := metav1.Now() app.Status.ApplicationStatus[currIndex].JobStatus.LastFailingTime = &currTime } - return !apiequality.Semantic.DeepEqual(oldJobStatus, app.Status.ApplicationStatus[currIndex].JobStatus), err } + +func getCurrentStatusIndex(app *v1beta1.FlinkApplication) int32 { + // In the Running phase, we always have only 1 job + if v1beta1.IsRunningPhase(app.Status.Phase) { + return 0 + } + + // In every other state, we either have + // Dual mode --> One Application status object + // BlueGreen mode --> Two Application status objects + return app.Status.DesiredApplicationCount - indexOffset +} + +func (f *Controller) GetLatestClusterStatus(ctx context.Context, application *v1beta1.FlinkApplication) v1beta1.FlinkClusterStatus { + return application.Status.ApplicationStatus[getCurrentStatusIndex(application)].ClusterStatus + +} + +func (f *Controller) GetLatestJobStatus(ctx context.Context, application *v1beta1.FlinkApplication) v1beta1.FlinkJobStatus { + return application.Status.ApplicationStatus[getCurrentStatusIndex(application)].JobStatus + +} + +func (f *Controller) UpdateLatestJobStatus(ctx context.Context, app *v1beta1.FlinkApplication, jobStatus v1beta1.FlinkJobStatus) { + app.Status.ApplicationStatus[getCurrentStatusIndex(app)].JobStatus = jobStatus +} + +func (f *Controller) GetLatestJobID(ctx context.Context, application *v1beta1.FlinkApplication) string { + return application.Status.ApplicationStatus[getCurrentStatusIndex(application)].JobStatus.JobID +} + +func (f *Controller) UpdateLatestJobID(ctx context.Context, app *v1beta1.FlinkApplication, jobID string) { + app.Status.ApplicationStatus[getCurrentStatusIndex(app)].JobStatus.JobID = jobID +} diff --git a/pkg/controller/flink/flink_test.go b/pkg/controller/flink/flink_test.go index 1c25c5b9..3217a6e1 100644 --- a/pkg/controller/flink/flink_test.go +++ b/pkg/controller/flink/flink_test.go @@ -39,6 +39,7 @@ const testFlinkVersion = "1.7" const testJarName = "test.jar" const testEntryClass = "com.test.MainClass" const testProgramArgs = "--test" +const testVersion = "version" func getTestFlinkController() Controller { testScope := mockScope.NewTestScope() diff --git a/pkg/controller/flink/job_manager_controller.go b/pkg/controller/flink/job_manager_controller.go index 3fbbe79c..b0116b8d 100644 --- a/pkg/controller/flink/job_manager_controller.go +++ b/pkg/controller/flink/job_manager_controller.go @@ -22,6 +22,7 @@ import ( const ( JobManagerNameFormat = "%s-%s-jm" JobManagerPodNameFormat = "%s-%s-jm-pod" + JobManagerVersionPodNameFormat = "%s-%s-jm-%s-pod" JobManagerContainerName = "jobmanager" JobManagerArg = "jobmanager" JobManagerReadinessPath = "/overview" @@ -169,6 +170,10 @@ var JobManagerDefaultResources = coreV1.ResourceRequirements{ func getJobManagerPodName(application *v1beta1.FlinkApplication, hash string) string { applicationName := application.Name + if v1beta1.IsBlueGreenDeploymentMode(application.Spec.DeploymentMode) { + applicationVersion := application.Status.UpdatingVersion + return fmt.Sprintf(JobManagerVersionPodNameFormat, applicationName, hash, applicationVersion) + } return fmt.Sprintf(JobManagerPodNameFormat, applicationName, hash) } diff --git a/pkg/controller/flink/job_manager_controller_test.go b/pkg/controller/flink/job_manager_controller_test.go index dd967907..fc99fa81 100644 --- a/pkg/controller/flink/job_manager_controller_test.go +++ b/pkg/controller/flink/job_manager_controller_test.go @@ -3,6 +3,8 @@ package flink import ( "testing" + v1beta12 "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta1" + "github.com/lyft/flinkk8soperator/pkg/controller/config" k8mock "github.com/lyft/flinkk8soperator/pkg/controller/k8/mock" @@ -42,6 +44,13 @@ func TestGetJobManagerPodName(t *testing.T) { assert.Equal(t, "app-name-"+testAppHash+"-jm-pod", getJobManagerPodName(&app, testAppHash)) } +func TestGetJobManagerPodNameWithVersion(t *testing.T) { + app := getFlinkTestApp() + app.Spec.DeploymentMode = v1beta12.DeploymentModeBlueGreen + app.Status.UpdatingVersion = testVersion + assert.Equal(t, "app-name-"+testAppHash+"-jm-"+testVersion+"-pod", getJobManagerPodName(&app, testAppHash)) +} + func TestJobManagerCreateSuccess(t *testing.T) { err := initTestConfigForIngress() assert.Nil(t, err) @@ -287,3 +296,82 @@ func TestJobManagerCreateNoIngress(t *testing.T) { assert.Nil(t, err) assert.False(t, newlyCreated) } + +func TestJobManagerCreateSuccessWithVersion(t *testing.T) { + err := initTestConfigForIngress() + assert.Nil(t, err) + testController := getJMControllerForTest() + app := getFlinkTestApp() + app.Spec.JarName = testJarName + app.Spec.EntryClass = testEntryClass + app.Spec.ProgramArgs = testProgramArgs + app.Spec.DeploymentMode = v1beta12.DeploymentModeBlueGreen + app.Status.UpdatingVersion = testVersion + annotations := map[string]string{ + "key": "annotation", + "flink-application-version": testVersion, + "flink-job-properties": "jarName: " + testJarName + "\nparallelism: 8\nentryClass:" + testEntryClass + "\nprogramArgs:\"" + testProgramArgs + "\"", + } + app.Annotations = annotations + hash := "f0bd1679" + expectedLabels := map[string]string{ + "flink-app": "app-name", + "flink-app-hash": hash, + "flink-deployment-type": "jobmanager", + } + ctr := 0 + mockK8Cluster := testController.k8Cluster.(*k8mock.K8Cluster) + mockK8Cluster.CreateK8ObjectFunc = func(ctx context.Context, object runtime.Object) error { + ctr++ + switch ctr { + case 1: + deployment := object.(*v1.Deployment) + assert.Equal(t, getJobManagerName(&app, hash), deployment.Name) + assert.Equal(t, app.Namespace, deployment.Namespace) + assert.Equal(t, getJobManagerPodName(&app, hash), deployment.Spec.Template.Name) + assert.Equal(t, annotations, deployment.Annotations) + assert.Equal(t, annotations, deployment.Spec.Template.Annotations) + assert.Equal(t, app.Namespace, deployment.Spec.Template.Namespace) + assert.Equal(t, expectedLabels, deployment.Labels) + assert.Equal(t, int32(1), *deployment.Spec.Replicas) + assert.Equal(t, "app-name", deployment.OwnerReferences[0].Name) + assert.Equal(t, "flink.k8s.io/v1beta1", deployment.OwnerReferences[0].APIVersion) + assert.Equal(t, "FlinkApplication", deployment.OwnerReferences[0].Kind) + + assert.Equal(t, "blob.server.port: 6125\njobmanager.heap.size: 1572864k\n"+ + "jobmanager.rpc.port: 6123\n"+ + "jobmanager.web.port: 8081\nmetrics.internal.query-service.port: 50101\n"+ + "query.server.port: 6124\ntaskmanager.heap.size: 524288k\n"+ + "taskmanager.numberOfTaskSlots: 16\n\n"+ + "jobmanager.rpc.address: app-name-"+hash+"\n", + common.GetEnvVar(deployment.Spec.Template.Spec.Containers[0].Env, + "FLINK_PROPERTIES").Value) + assert.Equal(t, testVersion, common.GetEnvVar(deployment.Spec.Template.Spec.Containers[0].Env, + "FLINK_APPLICATION_VERSION").Value) + case 2: + service := object.(*coreV1.Service) + assert.Equal(t, app.Name, service.Name) + assert.Equal(t, app.Namespace, service.Namespace) + assert.Equal(t, map[string]string{"flink-app": "app-name", "flink-app-hash": hash, "flink-deployment-type": "jobmanager"}, service.Spec.Selector) + case 3: + service := object.(*coreV1.Service) + assert.Equal(t, app.Name+"-"+hash, service.Name) + assert.Equal(t, "app-name", service.OwnerReferences[0].Name) + assert.Equal(t, app.Namespace, service.Namespace) + assert.Equal(t, map[string]string{"flink-app": "app-name", "flink-app-hash": hash, "flink-deployment-type": "jobmanager"}, service.Spec.Selector) + case 4: + labels := map[string]string{ + "flink-app": "app-name", + } + ingress := object.(*v1beta1.Ingress) + assert.Equal(t, app.Name, ingress.Name) + assert.Equal(t, app.Namespace, ingress.Namespace) + assert.Equal(t, labels, ingress.Labels) + } + return nil + } + newlyCreated, err := testController.CreateIfNotExist(context.Background(), &app) + assert.Nil(t, err) + assert.True(t, newlyCreated) + assert.Equal(t, 4, ctr) +} diff --git a/pkg/controller/flink/task_manager_controller.go b/pkg/controller/flink/task_manager_controller.go index 6faf5700..c7b9de84 100644 --- a/pkg/controller/flink/task_manager_controller.go +++ b/pkg/controller/flink/task_manager_controller.go @@ -20,11 +20,12 @@ import ( ) const ( - TaskManagerNameFormat = "%s-%s-tm" - TaskManagerPodNameFormat = "%s-%s-tm-pod" - TaskManagerContainerName = "taskmanager" - TaskManagerArg = "taskmanager" - TaskManagerHostnameEnvVar = "TASKMANAGER_HOSTNAME" + TaskManagerNameFormat = "%s-%s-tm" + TaskManagerPodNameFormat = "%s-%s-tm-pod" + TaskManagerVersionPodNameFormat = "%s-%s-tm-%s-pod" + TaskManagerContainerName = "taskmanager" + TaskManagerArg = "taskmanager" + TaskManagerHostnameEnvVar = "TASKMANAGER_HOSTNAME" ) type TaskManagerControllerInterface interface { @@ -142,6 +143,10 @@ func FetchTaskManagerContainerObj(application *v1beta1.FlinkApplication) *coreV1 func getTaskManagerPodName(application *v1beta1.FlinkApplication, hash string) string { applicationName := application.Name + if v1beta1.IsBlueGreenDeploymentMode(application.Spec.DeploymentMode) { + applicationVersion := application.Status.UpdatingVersion + return fmt.Sprintf(TaskManagerVersionPodNameFormat, applicationName, hash, applicationVersion) + } return fmt.Sprintf(TaskManagerPodNameFormat, applicationName, hash) } diff --git a/pkg/controller/flink/task_manager_controller_test.go b/pkg/controller/flink/task_manager_controller_test.go index 62d1a9a0..19d8b3e1 100644 --- a/pkg/controller/flink/task_manager_controller_test.go +++ b/pkg/controller/flink/task_manager_controller_test.go @@ -50,6 +50,13 @@ func TestGetTaskManagerPodName(t *testing.T) { assert.Equal(t, "app-name-"+testAppHash+"-tm-pod", getTaskManagerPodName(&app, testAppHash)) } +func TestGetTaskManagerPodNameWithVersion(t *testing.T) { + app := getFlinkTestApp() + app.Spec.DeploymentMode = v1beta1.DeploymentModeBlueGreen + app.Status.UpdatingVersion = testVersion + assert.Equal(t, "app-name-"+testAppHash+"-tm-"+testVersion+"-pod", getTaskManagerPodName(&app, testAppHash)) +} + func TestTaskManagerCreateSuccess(t *testing.T) { testController := getTMControllerForTest() app := getFlinkTestApp() @@ -215,3 +222,55 @@ func TestTaskManagerCreateAlreadyExists(t *testing.T) { assert.Nil(t, err) assert.False(t, newlyCreated) } + +func TestTaskManagerCreateSuccessWithVersion(t *testing.T) { + testController := getTMControllerForTest() + app := getFlinkTestApp() + app.Spec.JarName = testJarName + app.Spec.EntryClass = testEntryClass + app.Spec.ProgramArgs = testProgramArgs + app.Spec.DeploymentMode = v1beta1.DeploymentModeBlueGreen + app.Status.UpdatingVersion = testVersion + annotations := map[string]string{ + "key": "annotation", + "flink-application-version": testVersion, + "flink-job-properties": "jarName: test.jar\nparallelism: 8\nentryClass:com.test.MainClass\nprogramArgs:\"--test\"", + } + + hash := "f0bd1679" + + app.Annotations = annotations + expectedLabels := map[string]string{ + "flink-app": "app-name", + "flink-app-hash": hash, + "flink-deployment-type": "taskmanager", + } + mockK8Cluster := testController.k8Cluster.(*k8mock.K8Cluster) + mockK8Cluster.CreateK8ObjectFunc = func(ctx context.Context, object runtime.Object) error { + deployment := object.(*v1.Deployment) + assert.Equal(t, getTaskManagerName(&app, hash), deployment.Name) + assert.Equal(t, app.Namespace, deployment.Namespace) + assert.Equal(t, getTaskManagerPodName(&app, hash), deployment.Spec.Template.Name) + assert.Equal(t, annotations, deployment.Annotations) + assert.Equal(t, annotations, deployment.Spec.Template.Annotations) + assert.Equal(t, app.Namespace, deployment.Spec.Template.Namespace) + assert.Equal(t, expectedLabels, deployment.Labels) + + assert.Equal(t, "blob.server.port: 6125\njobmanager.heap.size: 1572864k\n"+ + "jobmanager.rpc.port: 6123\n"+ + "jobmanager.web.port: 8081\nmetrics.internal.query-service.port: 50101\n"+ + "query.server.port: 6124\ntaskmanager.heap.size: 524288k\n"+ + "taskmanager.numberOfTaskSlots: 16\n\n"+ + "jobmanager.rpc.address: app-name-"+hash+"\n"+ + "taskmanager.host: $HOST_IP\n", + common.GetEnvVar(deployment.Spec.Template.Spec.Containers[0].Env, + "FLINK_PROPERTIES").Value) + assert.Equal(t, testVersion, common.GetEnvVar(deployment.Spec.Template.Spec.Containers[0].Env, + "FLINK_APPLICATION_VERSION").Value) + + return nil + } + newlyCreated, err := testController.CreateIfNotExist(context.Background(), &app) + assert.Nil(t, err) + assert.True(t, newlyCreated) +} From 5fdbc22cd85ae2497834ee5574807d00af040f1f Mon Sep 17 00:00:00 2001 From: glaksh100 Date: Sat, 7 Mar 2020 16:33:35 -0800 Subject: [PATCH 12/41] Update CRD to v1beta2 --- config/config.yaml | 2 +- deploy/crd.yaml | 9 +- deploy/role-binding.yaml | 2 +- ...heme_v1beta1.go => addtoscheme_v1beta2.go} | 6 +- pkg/apis/app/v1beta2/doc.go | 3 + pkg/apis/app/v1beta2/register.go | 42 ++ pkg/apis/app/v1beta2/types.go | 333 +++++++++++++++ pkg/apis/app/v1beta2/zz_generated.deepcopy.go | 391 ++++++++++++++++++ pkg/client/clientset/versioned/clientset.go | 18 +- .../versioned/fake/clientset_generated.go | 10 +- .../clientset/versioned/fake/register.go | 4 +- .../clientset/versioned/scheme/register.go | 4 +- .../versioned/typed/app/v1beta2/app_client.go | 74 ++++ .../versioned/typed/app/v1beta2/doc.go | 4 + .../versioned/typed/app/v1beta2/fake/doc.go | 4 + .../typed/app/v1beta2/fake/fake_app_client.go | 24 ++ .../app/v1beta2/fake/fake_flinkapplication.go | 112 +++++ .../typed/app/v1beta2/flinkapplication.go | 158 +++++++ .../typed/app/v1beta2/generated_expansion.go | 5 + pkg/controller/flink/client/api.go | 62 +-- pkg/controller/flink/client/api_test.go | 6 +- pkg/controller/flink/client/error_handler.go | 20 +- pkg/controller/flink/config.go | 34 +- pkg/controller/flink/config_test.go | 32 +- pkg/controller/flink/container_utils.go | 22 +- pkg/controller/flink/container_utils_test.go | 8 +- pkg/controller/flink/flink.go | 118 +++--- pkg/controller/flink/flink_test.go | 54 +-- pkg/controller/flink/ingress.go | 22 +- .../flink/job_manager_controller.go | 28 +- .../flink/job_manager_controller_test.go | 20 +- pkg/controller/flink/mock/mock_flink.go | 84 ++-- .../flink/mock/mock_job_manager_controller.go | 4 +- .../flink/task_manager_controller.go | 24 +- .../flink/task_manager_controller_test.go | 8 +- pkg/controller/flinkapplication/controller.go | 14 +- .../flinkapplication/flink_state_machine.go | 126 +++--- .../flink_state_machine_test.go | 390 ++++++++--------- tmp/codegen/update-generated.sh | 2 +- 39 files changed, 1717 insertions(+), 566 deletions(-) rename pkg/apis/app/{addtoscheme_v1beta1.go => addtoscheme_v1beta2.go} (60%) create mode 100644 pkg/apis/app/v1beta2/doc.go create mode 100644 pkg/apis/app/v1beta2/register.go create mode 100644 pkg/apis/app/v1beta2/types.go create mode 100644 pkg/apis/app/v1beta2/zz_generated.deepcopy.go create mode 100644 pkg/client/clientset/versioned/typed/app/v1beta2/app_client.go create mode 100644 pkg/client/clientset/versioned/typed/app/v1beta2/doc.go create mode 100644 pkg/client/clientset/versioned/typed/app/v1beta2/fake/doc.go create mode 100644 pkg/client/clientset/versioned/typed/app/v1beta2/fake/fake_app_client.go create mode 100644 pkg/client/clientset/versioned/typed/app/v1beta2/fake/fake_flinkapplication.go create mode 100644 pkg/client/clientset/versioned/typed/app/v1beta2/flinkapplication.go create mode 100644 pkg/client/clientset/versioned/typed/app/v1beta2/generated_expansion.go diff --git a/config/config.yaml b/config/config.yaml index 371f0756..12040969 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -1,3 +1,3 @@ -apiVersion: flink.k8s.io/v1beta1 +apiVersion: flink.k8s.io/v1beta2 kind: FlinkApplication projectName: flinkk8soperator diff --git a/deploy/crd.yaml b/deploy/crd.yaml index 7d9d4c39..088c9f2b 100644 --- a/deploy/crd.yaml +++ b/deploy/crd.yaml @@ -1,4 +1,4 @@ -apiVersion: apiextensions.k8s.io/v1beta1 +apiVersion: apiextensions.k8s.io/v1beta2 kind: CustomResourceDefinition metadata: name: flinkapplications.flink.k8s.io @@ -12,11 +12,14 @@ spec: shortNames: - flinkapp scope: Namespaced - version: v1beta1 + version: v1beta2 versions: - - name: v1beta1 + - name: v1beta2 served: true storage: true + - name: v1beta1 + served: true + storage: false - name: v1alpha1 served: true storage: false diff --git a/deploy/role-binding.yaml b/deploy/role-binding.yaml index e46a1f91..163455f5 100644 --- a/deploy/role-binding.yaml +++ b/deploy/role-binding.yaml @@ -1,6 +1,6 @@ # Create a binding from Role -> ServiceAccount kind: ClusterRoleBinding -apiVersion: rbac.authorization.k8s.io/v1beta1 +apiVersion: rbac.authorization.k8s.io/v1beta2 metadata: name: flinkoperator roleRef: diff --git a/pkg/apis/app/addtoscheme_v1beta1.go b/pkg/apis/app/addtoscheme_v1beta2.go similarity index 60% rename from pkg/apis/app/addtoscheme_v1beta1.go rename to pkg/apis/app/addtoscheme_v1beta2.go index 28922b80..d2fdcb90 100644 --- a/pkg/apis/app/addtoscheme_v1beta1.go +++ b/pkg/apis/app/addtoscheme_v1beta2.go @@ -4,11 +4,9 @@ package apis -import ( - "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta1" -) +import "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta2" func init() { // Register the types with the Scheme so the components can map objects to GroupVersionKinds and back - AddToSchemes = append(AddToSchemes, v1beta1.SchemeBuilder.AddToScheme) + AddToSchemes = append(AddToSchemes, v1beta2.SchemeBuilder.AddToScheme) } diff --git a/pkg/apis/app/v1beta2/doc.go b/pkg/apis/app/v1beta2/doc.go new file mode 100644 index 00000000..eb56232b --- /dev/null +++ b/pkg/apis/app/v1beta2/doc.go @@ -0,0 +1,3 @@ +// +k8s:deepcopy-gen=package +// +groupName=flink.k8s.io +package v1beta2 diff --git a/pkg/apis/app/v1beta2/register.go b/pkg/apis/app/v1beta2/register.go new file mode 100644 index 00000000..1cfce161 --- /dev/null +++ b/pkg/apis/app/v1beta2/register.go @@ -0,0 +1,42 @@ +package v1beta2 + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" +) + +const ( + version = "v1beta2" + groupName = "flink.k8s.io" + + FlinkApplicationKind = "FlinkApplication" +) + +var ( + SchemeBuilder = runtime.NewSchemeBuilder(addKnownTypes) + AddToScheme = SchemeBuilder.AddToScheme + // SchemeGroupVersion is the group version used to register these objects. + SchemeGroupVersion = schema.GroupVersion{Group: groupName, Version: version} +) + +// GetKind takes an unqualified kind and returns back a Group qualified GroupKind +func Kind(kind string) schema.GroupKind { + return SchemeGroupVersion.WithKind(kind).GroupKind() +} + +// Resource takes an unqualified resource and returns a Group qualified GroupResource +func Resource(resource string) schema.GroupResource { + return SchemeGroupVersion.WithResource(resource).GroupResource() +} + +// addKnownTypes adds the set of types defined in this package to the supplied scheme. +func addKnownTypes(scheme *runtime.Scheme) error { + scheme.AddKnownTypes(SchemeGroupVersion, + &FlinkApplication{}, + &FlinkApplicationList{}, + ) + + metav1.AddToGroupVersion(scheme, SchemeGroupVersion) + return nil +} diff --git a/pkg/apis/app/v1beta2/types.go b/pkg/apis/app/v1beta2/types.go new file mode 100644 index 00000000..a6e7f2c4 --- /dev/null +++ b/pkg/apis/app/v1beta2/types.go @@ -0,0 +1,333 @@ +package v1beta2 + +import ( + "fmt" + apiv1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object + +type FlinkApplicationList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata"` + Items []FlinkApplication `json:"items"` +} + +// +genclient +// +genclient:noStatus +// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object +// +k8s:defaulter-gen=true +type FlinkApplication struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata"` + Spec FlinkApplicationSpec `json:"spec"` + Status FlinkApplicationStatus `json:"status,omitempty"` +} + +type FlinkApplicationSpec struct { + Image string `json:"image,omitempty" protobuf:"bytes,2,opt,name=image"` + ImagePullPolicy apiv1.PullPolicy `json:"imagePullPolicy,omitempty" protobuf:"bytes,14,opt,name=imagePullPolicy,casttype=PullPolicy"` + ImagePullSecrets []apiv1.LocalObjectReference `json:"imagePullSecrets,omitempty" patchStrategy:"merge" patchMergeKey:"name" protobuf:"bytes,15,rep,name=imagePullSecrets"` + ServiceAccountName string `json:"serviceAccountName,omitempty"` + SecurityContext *apiv1.PodSecurityContext `json:"securityContext,omitempty"` + FlinkConfig FlinkConfig `json:"flinkConfig"` + FlinkVersion string `json:"flinkVersion"` + TaskManagerConfig TaskManagerConfig `json:"taskManagerConfig,omitempty"` + JobManagerConfig JobManagerConfig `json:"jobManagerConfig,omitempty"` + JarName string `json:"jarName"` + Parallelism int32 `json:"parallelism"` + EntryClass string `json:"entryClass,omitempty"` + ProgramArgs string `json:"programArgs,omitempty"` + // Deprecated: use SavepointPath instead + SavepointInfo SavepointInfo `json:"savepointInfo,omitempty"` + SavepointPath string `json:"savepointPath,omitempty"` + DeploymentMode DeploymentMode `json:"deploymentMode,omitempty"` + RPCPort *int32 `json:"rpcPort,omitempty"` + BlobPort *int32 `json:"blobPort,omitempty"` + QueryPort *int32 `json:"queryPort,omitempty"` + UIPort *int32 `json:"uiPort,omitempty"` + MetricsQueryPort *int32 `json:"metricsQueryPort,omitempty"` + Volumes []apiv1.Volume `json:"volumes,omitempty"` + VolumeMounts []apiv1.VolumeMount `json:"volumeMounts,omitempty"` + RestartNonce string `json:"restartNonce"` + DeleteMode DeleteMode `json:"deleteMode,omitempty"` + AllowNonRestoredState bool `json:"allowNonRestoredState,omitempty"` + ForceRollback bool `json:"forceRollback"` + MaxCheckpointRestoreAgeSeconds *int32 `json:"maxCheckpointRestoreAgeSeconds,omitempty"` +} + +type FlinkConfig map[string]interface{} + +// Workaround for https://github.com/kubernetes-sigs/kubebuilder/issues/528 +func (in *FlinkConfig) DeepCopyInto(out *FlinkConfig) { + if in == nil { + *out = nil + } else { + *out = make(map[string]interface{}, len(*in)) + for k, v := range *in { + (*out)[k] = deepCopyJSONValue(v) + } + } +} + +func deepCopyJSONValue(x interface{}) interface{} { + switch x := x.(type) { + case map[string]interface{}: + clone := make(map[string]interface{}, len(x)) + for k, v := range x { + clone[k] = deepCopyJSONValue(v) + } + return clone + case []interface{}: + clone := make([]interface{}, len(x)) + for i, v := range x { + clone[i] = deepCopyJSONValue(v) + } + return clone + case string, int, uint, int32, uint32, int64, uint64, bool, float32, float64, nil: + return x + default: + panic(fmt.Errorf("cannot deep copy %T", x)) + } +} + +func (in *FlinkConfig) DeepCopy() *FlinkConfig { + if in == nil { + return nil + } + out := new(FlinkConfig) + in.DeepCopyInto(out) + return out +} + +type JobManagerConfig struct { + Resources *apiv1.ResourceRequirements `json:"resources,omitempty"` + EnvConfig EnvironmentConfig `json:"envConfig"` + Replicas *int32 `json:"replicas,omitempty"` + OffHeapMemoryFraction *float64 `json:"offHeapMemoryFraction,omitempty"` + NodeSelector map[string]string `json:"nodeSelector,omitempty"` +} + +type TaskManagerConfig struct { + Resources *apiv1.ResourceRequirements `json:"resources,omitempty"` + EnvConfig EnvironmentConfig `json:"envConfig"` + TaskSlots *int32 `json:"taskSlots,omitempty"` + OffHeapMemoryFraction *float64 `json:"offHeapMemoryFraction,omitempty"` + NodeSelector map[string]string `json:"nodeSelector,omitempty"` +} + +type EnvironmentConfig struct { + EnvFrom []apiv1.EnvFromSource `json:"envFrom,omitempty"` + Env []apiv1.EnvVar `json:"env,omitempty"` +} + +type SavepointInfo struct { + SavepointLocation string `json:"savepointLocation,omitempty"` +} + +type FlinkClusterStatus struct { + ClusterOverviewURL string `json:"clusterOverviewURL,omitempty"` + Health HealthStatus `json:"health,omitempty"` + NumberOfTaskManagers int32 `json:"numberOfTaskManagers,omitempty"` + HealthyTaskManagers int32 `json:"healthyTaskManagers,omitempty"` + NumberOfTaskSlots int32 `json:"numberOfTaskSlots,omitempty"` + AvailableTaskSlots int32 `json:"availableTaskSlots"` +} + +type FlinkJobStatus struct { + JobOverviewURL string `json:"jobOverviewURL,omitempty"` + JobID string `json:"jobID,omitempty"` + Health HealthStatus `json:"health,omitempty"` + State JobState `json:"state,omitempty"` + + JarName string `json:"jarName"` + Parallelism int32 `json:"parallelism"` + EntryClass string `json:"entryClass,omitempty"` + ProgramArgs string `json:"programArgs,omitempty"` + AllowNonRestoredState bool `json:"allowNonRestoredState,omitempty"` + + StartTime *metav1.Time `json:"startTime,omitempty"` + JobRestartCount int32 `json:"jobRestartCount,omitempty"` + CompletedCheckpointCount int32 `json:"completedCheckpointCount,omitempty"` + FailedCheckpointCount int32 `json:"failedCheckpointCount,omitempty"` + RestorePath string `json:"restorePath,omitempty"` + RestoreTime *metav1.Time `json:"restoreTime,omitempty"` + LastFailingTime *metav1.Time `json:"lastFailingTime,omitempty"` + + LastCheckpointPath string `json:"lastCheckpoint,omitempty"` + LastCheckpointTime *metav1.Time `json:"lastCheckpointTime,omitempty"` + + RunningTasks int32 `json:"runningTasks,omitempty"` + TotalTasks int32 `json:"totalTasks,omitempty"` +} + +type FlinkApplicationStatus struct { + Phase FlinkApplicationPhase `json:"phase"` + StartedAt *metav1.Time `json:"startedAt,omitempty"` + LastUpdatedAt *metav1.Time `json:"lastUpdatedAt,omitempty"` + Reason string `json:"reason,omitempty"` + DesiredApplicationCount int32 `json:"desiredApplicationCount,omitempty"` + DeployVersion string `json:"deployVersion,omitempty"` + UpdatingVersion string `json:"updatingVersion,omitempty"` + ApplicationStatus []FlinkApplicationVersionStatus `json:"appStatus,omitempty"` + FailedDeployHash string `json:"failedDeployHash,omitempty"` + RollbackHash string `json:"rollbackHash,omitempty"` + DeployHash string `json:"deployHash"` + SavepointTriggerID string `json:"savepointTriggerId,omitempty"` + SavepointPath string `json:"savepointPath,omitempty"` + RetryCount int32 `json:"retryCount,omitempty"` + LastSeenError *FlinkApplicationError `json:"lastSeenError,omitempty"` +} + +type FlinkApplicationVersion string + +const ( + BlueFlinkApplication FlinkApplicationVersion = "Blue" + GreenFlinkApplication FlinkApplicationVersion = "Green" +) + +type FlinkApplicationVersionStatus struct { + Version FlinkApplicationVersion `json:"appVersion,omitempty"` + ClusterStatus FlinkClusterStatus `json:"clusterStatus,omitempty"` + JobStatus FlinkJobStatus `json:"jobStatus,omitempty"` +} + +func (in *FlinkApplicationStatus) GetPhase() FlinkApplicationPhase { + return in.Phase +} + +func (in *FlinkApplicationStatus) UpdatePhase(phase FlinkApplicationPhase, reason string) { + now := metav1.Now() + if in.StartedAt == nil { + in.StartedAt = &now + in.LastUpdatedAt = &now + } + in.Reason = reason + in.Phase = phase +} + +func (in *FlinkApplicationStatus) TouchResource(reason string) { + now := metav1.Now() + in.LastUpdatedAt = &now + in.Reason = reason +} + +type FlinkApplicationPhase string + +func (p FlinkApplicationPhase) VerboseString() string { + phaseName := string(p) + if p == FlinkApplicationNew { + phaseName = "New" + } + return phaseName +} + +// As you add more ApplicationPhase please add it to FlinkApplicationPhases list +const ( + FlinkApplicationNew FlinkApplicationPhase = "" + FlinkApplicationUpdating FlinkApplicationPhase = "Updating" + FlinkApplicationClusterStarting FlinkApplicationPhase = "ClusterStarting" + FlinkApplicationSubmittingJob FlinkApplicationPhase = "SubmittingJob" + FlinkApplicationRunning FlinkApplicationPhase = "Running" + FlinkApplicationSavepointing FlinkApplicationPhase = "Savepointing" + FlinkApplicationDeleting FlinkApplicationPhase = "Deleting" + FlinkApplicationRecovering FlinkApplicationPhase = "Recovering" + FlinkApplicationRollingBackJob FlinkApplicationPhase = "RollingBackJob" + FlinkApplicationDeployFailed FlinkApplicationPhase = "DeployFailed" + FlinkApplicationDualRunning FlinkApplicationPhase = "DualRunning" + FlinkApplicationTeardown FlinkApplicationPhase = "Teardown" +) + +var FlinkApplicationPhases = []FlinkApplicationPhase{ + FlinkApplicationNew, + FlinkApplicationUpdating, + FlinkApplicationClusterStarting, + FlinkApplicationSubmittingJob, + FlinkApplicationRunning, + FlinkApplicationSavepointing, + FlinkApplicationDeleting, + FlinkApplicationRecovering, + FlinkApplicationDeployFailed, + FlinkApplicationRollingBackJob, + FlinkApplicationDualRunning, + FlinkApplicationTeardown, +} + +func IsRunningPhase(phase FlinkApplicationPhase) bool { + return phase == FlinkApplicationRunning || phase == FlinkApplicationDeployFailed +} + +func IsBlueGreenDeploymentMode(mode DeploymentMode) bool { + return mode == DeploymentModeBlueGreen +} + +type DeploymentMode string + +const ( + DeploymentModeSingle DeploymentMode = "Single" + DeploymentModeDual DeploymentMode = "Dual" + DeploymentModeBlueGreen DeploymentMode = "BlueGreen" +) + +type DeleteMode string + +const ( + DeleteModeSavepoint DeleteMode = "Savepoint" + DeleteModeForceCancel DeleteMode = "ForceCancel" + DeleteModeNone DeleteMode = "None" +) + +type HealthStatus string + +const ( + Green HealthStatus = "Green" + Yellow HealthStatus = "Yellow" + Red HealthStatus = "Red" +) + +type JobState string + +const ( + Created JobState = "CREATED" + Running JobState = "RUNNING" + Failing JobState = "FAILING" + Failed JobState = "FAILED" + Cancelling JobState = "CANCELLING" + Canceled JobState = "CANCELED" + Finished JobState = "FINISHED" + Restarting JobState = "RESTARTING" + Suspended JobState = "SUSPENDED" + Reconciling JobState = "RECONCILING" +) + +// FlinkApplicationError implements the error interface to make error handling more structured +type FlinkApplicationError struct { + AppError string `json:"appError,omitempty"` + Method FlinkMethod `json:"method,omitempty"` + ErrorCode string `json:"errorCode,omitempty"` + IsRetryable bool `json:"isRetryable,omitempty"` + IsFailFast bool `json:"isFailFast,omitempty"` + MaxRetries int32 `json:"maxRetries,omitempty"` + LastErrorUpdateTime *metav1.Time `json:"lastErrorUpdateTime,omitempty"` +} + +func (f *FlinkApplicationError) Error() string { + return f.AppError +} + +type FlinkMethod string + +const ( + CancelJobWithSavepoint FlinkMethod = "CancelJobWithSavepoint" + ForceCancelJob FlinkMethod = "ForceCancelJob" + SubmitJob FlinkMethod = "SubmitJob" + CheckSavepointStatus FlinkMethod = "CheckSavepointStatus" + GetJobs FlinkMethod = "GetJobs" + GetClusterOverview FlinkMethod = "GetClusterOverview" + GetLatestCheckpoint FlinkMethod = "GetLatestCheckpoint" + GetJobConfig FlinkMethod = "GetJobConfig" + GetTaskManagers FlinkMethod = "GetTaskManagers" + GetCheckpointCounts FlinkMethod = "GetCheckpointCounts" + GetJobOverview FlinkMethod = "GetJobOverview" +) diff --git a/pkg/apis/app/v1beta2/zz_generated.deepcopy.go b/pkg/apis/app/v1beta2/zz_generated.deepcopy.go new file mode 100644 index 00000000..e6cca0d5 --- /dev/null +++ b/pkg/apis/app/v1beta2/zz_generated.deepcopy.go @@ -0,0 +1,391 @@ +// +build !ignore_autogenerated + +// Code generated by deepcopy-gen. DO NOT EDIT. + +package v1beta2 + +import ( + v1 "k8s.io/api/core/v1" + runtime "k8s.io/apimachinery/pkg/runtime" +) + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *EnvironmentConfig) DeepCopyInto(out *EnvironmentConfig) { + *out = *in + if in.EnvFrom != nil { + in, out := &in.EnvFrom, &out.EnvFrom + *out = make([]v1.EnvFromSource, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + if in.Env != nil { + in, out := &in.Env, &out.Env + *out = make([]v1.EnvVar, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EnvironmentConfig. +func (in *EnvironmentConfig) DeepCopy() *EnvironmentConfig { + if in == nil { + return nil + } + out := new(EnvironmentConfig) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *FlinkApplication) DeepCopyInto(out *FlinkApplication) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + in.Spec.DeepCopyInto(&out.Spec) + in.Status.DeepCopyInto(&out.Status) + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new FlinkApplication. +func (in *FlinkApplication) DeepCopy() *FlinkApplication { + if in == nil { + return nil + } + out := new(FlinkApplication) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *FlinkApplication) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *FlinkApplicationError) DeepCopyInto(out *FlinkApplicationError) { + *out = *in + if in.LastErrorUpdateTime != nil { + in, out := &in.LastErrorUpdateTime, &out.LastErrorUpdateTime + *out = (*in).DeepCopy() + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new FlinkApplicationError. +func (in *FlinkApplicationError) DeepCopy() *FlinkApplicationError { + if in == nil { + return nil + } + out := new(FlinkApplicationError) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *FlinkApplicationList) DeepCopyInto(out *FlinkApplicationList) { + *out = *in + out.TypeMeta = in.TypeMeta + out.ListMeta = in.ListMeta + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]FlinkApplication, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new FlinkApplicationList. +func (in *FlinkApplicationList) DeepCopy() *FlinkApplicationList { + if in == nil { + return nil + } + out := new(FlinkApplicationList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *FlinkApplicationList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *FlinkApplicationSpec) DeepCopyInto(out *FlinkApplicationSpec) { + *out = *in + if in.ImagePullSecrets != nil { + in, out := &in.ImagePullSecrets, &out.ImagePullSecrets + *out = make([]v1.LocalObjectReference, len(*in)) + copy(*out, *in) + } + if in.SecurityContext != nil { + in, out := &in.SecurityContext, &out.SecurityContext + *out = new(v1.PodSecurityContext) + (*in).DeepCopyInto(*out) + } + in.FlinkConfig.DeepCopyInto(&out.FlinkConfig) + in.TaskManagerConfig.DeepCopyInto(&out.TaskManagerConfig) + in.JobManagerConfig.DeepCopyInto(&out.JobManagerConfig) + out.SavepointInfo = in.SavepointInfo + if in.RPCPort != nil { + in, out := &in.RPCPort, &out.RPCPort + *out = new(int32) + **out = **in + } + if in.BlobPort != nil { + in, out := &in.BlobPort, &out.BlobPort + *out = new(int32) + **out = **in + } + if in.QueryPort != nil { + in, out := &in.QueryPort, &out.QueryPort + *out = new(int32) + **out = **in + } + if in.UIPort != nil { + in, out := &in.UIPort, &out.UIPort + *out = new(int32) + **out = **in + } + if in.MetricsQueryPort != nil { + in, out := &in.MetricsQueryPort, &out.MetricsQueryPort + *out = new(int32) + **out = **in + } + if in.Volumes != nil { + in, out := &in.Volumes, &out.Volumes + *out = make([]v1.Volume, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + if in.VolumeMounts != nil { + in, out := &in.VolumeMounts, &out.VolumeMounts + *out = make([]v1.VolumeMount, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + if in.MaxCheckpointRestoreAgeSeconds != nil { + in, out := &in.MaxCheckpointRestoreAgeSeconds, &out.MaxCheckpointRestoreAgeSeconds + *out = new(int32) + **out = **in + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new FlinkApplicationSpec. +func (in *FlinkApplicationSpec) DeepCopy() *FlinkApplicationSpec { + if in == nil { + return nil + } + out := new(FlinkApplicationSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *FlinkApplicationStatus) DeepCopyInto(out *FlinkApplicationStatus) { + *out = *in + if in.StartedAt != nil { + in, out := &in.StartedAt, &out.StartedAt + *out = (*in).DeepCopy() + } + if in.LastUpdatedAt != nil { + in, out := &in.LastUpdatedAt, &out.LastUpdatedAt + *out = (*in).DeepCopy() + } + if in.ApplicationStatus != nil { + in, out := &in.ApplicationStatus, &out.ApplicationStatus + *out = make([]FlinkApplicationVersionStatus, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + if in.LastSeenError != nil { + in, out := &in.LastSeenError, &out.LastSeenError + *out = new(FlinkApplicationError) + (*in).DeepCopyInto(*out) + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new FlinkApplicationStatus. +func (in *FlinkApplicationStatus) DeepCopy() *FlinkApplicationStatus { + if in == nil { + return nil + } + out := new(FlinkApplicationStatus) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *FlinkApplicationVersionStatus) DeepCopyInto(out *FlinkApplicationVersionStatus) { + *out = *in + out.ClusterStatus = in.ClusterStatus + in.JobStatus.DeepCopyInto(&out.JobStatus) + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new FlinkApplicationVersionStatus. +func (in *FlinkApplicationVersionStatus) DeepCopy() *FlinkApplicationVersionStatus { + if in == nil { + return nil + } + out := new(FlinkApplicationVersionStatus) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *FlinkClusterStatus) DeepCopyInto(out *FlinkClusterStatus) { + *out = *in + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new FlinkClusterStatus. +func (in *FlinkClusterStatus) DeepCopy() *FlinkClusterStatus { + if in == nil { + return nil + } + out := new(FlinkClusterStatus) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *FlinkJobStatus) DeepCopyInto(out *FlinkJobStatus) { + *out = *in + if in.StartTime != nil { + in, out := &in.StartTime, &out.StartTime + *out = (*in).DeepCopy() + } + if in.RestoreTime != nil { + in, out := &in.RestoreTime, &out.RestoreTime + *out = (*in).DeepCopy() + } + if in.LastFailingTime != nil { + in, out := &in.LastFailingTime, &out.LastFailingTime + *out = (*in).DeepCopy() + } + if in.LastCheckpointTime != nil { + in, out := &in.LastCheckpointTime, &out.LastCheckpointTime + *out = (*in).DeepCopy() + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new FlinkJobStatus. +func (in *FlinkJobStatus) DeepCopy() *FlinkJobStatus { + if in == nil { + return nil + } + out := new(FlinkJobStatus) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *JobManagerConfig) DeepCopyInto(out *JobManagerConfig) { + *out = *in + if in.Resources != nil { + in, out := &in.Resources, &out.Resources + *out = new(v1.ResourceRequirements) + (*in).DeepCopyInto(*out) + } + in.EnvConfig.DeepCopyInto(&out.EnvConfig) + if in.Replicas != nil { + in, out := &in.Replicas, &out.Replicas + *out = new(int32) + **out = **in + } + if in.OffHeapMemoryFraction != nil { + in, out := &in.OffHeapMemoryFraction, &out.OffHeapMemoryFraction + *out = new(float64) + **out = **in + } + if in.NodeSelector != nil { + in, out := &in.NodeSelector, &out.NodeSelector + *out = make(map[string]string, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new JobManagerConfig. +func (in *JobManagerConfig) DeepCopy() *JobManagerConfig { + if in == nil { + return nil + } + out := new(JobManagerConfig) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *SavepointInfo) DeepCopyInto(out *SavepointInfo) { + *out = *in + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SavepointInfo. +func (in *SavepointInfo) DeepCopy() *SavepointInfo { + if in == nil { + return nil + } + out := new(SavepointInfo) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *TaskManagerConfig) DeepCopyInto(out *TaskManagerConfig) { + *out = *in + if in.Resources != nil { + in, out := &in.Resources, &out.Resources + *out = new(v1.ResourceRequirements) + (*in).DeepCopyInto(*out) + } + in.EnvConfig.DeepCopyInto(&out.EnvConfig) + if in.TaskSlots != nil { + in, out := &in.TaskSlots, &out.TaskSlots + *out = new(int32) + **out = **in + } + if in.OffHeapMemoryFraction != nil { + in, out := &in.OffHeapMemoryFraction, &out.OffHeapMemoryFraction + *out = new(float64) + **out = **in + } + if in.NodeSelector != nil { + in, out := &in.NodeSelector, &out.NodeSelector + *out = make(map[string]string, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TaskManagerConfig. +func (in *TaskManagerConfig) DeepCopy() *TaskManagerConfig { + if in == nil { + return nil + } + out := new(TaskManagerConfig) + in.DeepCopyInto(out) + return out +} diff --git a/pkg/client/clientset/versioned/clientset.go b/pkg/client/clientset/versioned/clientset.go index 9aa48cc5..ba4f7de7 100644 --- a/pkg/client/clientset/versioned/clientset.go +++ b/pkg/client/clientset/versioned/clientset.go @@ -3,7 +3,7 @@ package versioned import ( - flinkv1beta1 "github.com/lyft/flinkk8soperator/pkg/client/clientset/versioned/typed/app/v1beta1" + flinkv1beta2 "github.com/lyft/flinkk8soperator/pkg/client/clientset/versioned/typed/app/v1beta2" discovery "k8s.io/client-go/discovery" rest "k8s.io/client-go/rest" flowcontrol "k8s.io/client-go/util/flowcontrol" @@ -11,19 +11,19 @@ import ( type Interface interface { Discovery() discovery.DiscoveryInterface - FlinkV1beta1() flinkv1beta1.FlinkV1beta1Interface + FlinkV1beta2() flinkv1beta2.FlinkV1beta2Interface } // Clientset contains the clients for groups. Each group has exactly one // version included in a Clientset. type Clientset struct { *discovery.DiscoveryClient - flinkV1beta1 *flinkv1beta1.FlinkV1beta1Client + flinkV1beta2 *flinkv1beta2.FlinkV1beta2Client } -// FlinkV1beta1 retrieves the FlinkV1beta1Client -func (c *Clientset) FlinkV1beta1() flinkv1beta1.FlinkV1beta1Interface { - return c.flinkV1beta1 +// FlinkV1beta2 retrieves the FlinkV1beta2Client +func (c *Clientset) FlinkV1beta2() flinkv1beta2.FlinkV1beta2Interface { + return c.flinkV1beta2 } // Discovery retrieves the DiscoveryClient @@ -42,7 +42,7 @@ func NewForConfig(c *rest.Config) (*Clientset, error) { } var cs Clientset var err error - cs.flinkV1beta1, err = flinkv1beta1.NewForConfig(&configShallowCopy) + cs.flinkV1beta2, err = flinkv1beta2.NewForConfig(&configShallowCopy) if err != nil { return nil, err } @@ -58,7 +58,7 @@ func NewForConfig(c *rest.Config) (*Clientset, error) { // panics if there is an error in the config. func NewForConfigOrDie(c *rest.Config) *Clientset { var cs Clientset - cs.flinkV1beta1 = flinkv1beta1.NewForConfigOrDie(c) + cs.flinkV1beta2 = flinkv1beta2.NewForConfigOrDie(c) cs.DiscoveryClient = discovery.NewDiscoveryClientForConfigOrDie(c) return &cs @@ -67,7 +67,7 @@ func NewForConfigOrDie(c *rest.Config) *Clientset { // New creates a new Clientset for the given RESTClient. func New(c rest.Interface) *Clientset { var cs Clientset - cs.flinkV1beta1 = flinkv1beta1.New(c) + cs.flinkV1beta2 = flinkv1beta2.New(c) cs.DiscoveryClient = discovery.NewDiscoveryClient(c) return &cs diff --git a/pkg/client/clientset/versioned/fake/clientset_generated.go b/pkg/client/clientset/versioned/fake/clientset_generated.go index 664662d9..200e43a5 100644 --- a/pkg/client/clientset/versioned/fake/clientset_generated.go +++ b/pkg/client/clientset/versioned/fake/clientset_generated.go @@ -4,8 +4,8 @@ package fake import ( clientset "github.com/lyft/flinkk8soperator/pkg/client/clientset/versioned" - flinkv1beta1 "github.com/lyft/flinkk8soperator/pkg/client/clientset/versioned/typed/app/v1beta1" - fakeflinkv1beta1 "github.com/lyft/flinkk8soperator/pkg/client/clientset/versioned/typed/app/v1beta1/fake" + flinkv1beta2 "github.com/lyft/flinkk8soperator/pkg/client/clientset/versioned/typed/app/v1beta2" + fakeflinkv1beta2 "github.com/lyft/flinkk8soperator/pkg/client/clientset/versioned/typed/app/v1beta2/fake" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/watch" "k8s.io/client-go/discovery" @@ -55,7 +55,7 @@ func (c *Clientset) Discovery() discovery.DiscoveryInterface { var _ clientset.Interface = &Clientset{} -// FlinkV1beta1 retrieves the FlinkV1beta1Client -func (c *Clientset) FlinkV1beta1() flinkv1beta1.FlinkV1beta1Interface { - return &fakeflinkv1beta1.FakeFlinkV1beta1{Fake: &c.Fake} +// FlinkV1beta2 retrieves the FlinkV1beta2Client +func (c *Clientset) FlinkV1beta2() flinkv1beta2.FlinkV1beta2Interface { + return &fakeflinkv1beta2.FakeFlinkV1beta2{Fake: &c.Fake} } diff --git a/pkg/client/clientset/versioned/fake/register.go b/pkg/client/clientset/versioned/fake/register.go index 61bc01de..e84b7913 100644 --- a/pkg/client/clientset/versioned/fake/register.go +++ b/pkg/client/clientset/versioned/fake/register.go @@ -3,7 +3,7 @@ package fake import ( - flinkv1beta1 "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta1" + flinkv1beta2 "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta2" v1 "k8s.io/apimachinery/pkg/apis/meta/v1" runtime "k8s.io/apimachinery/pkg/runtime" schema "k8s.io/apimachinery/pkg/runtime/schema" @@ -15,7 +15,7 @@ var scheme = runtime.NewScheme() var codecs = serializer.NewCodecFactory(scheme) var parameterCodec = runtime.NewParameterCodec(scheme) var localSchemeBuilder = runtime.SchemeBuilder{ - flinkv1beta1.AddToScheme, + flinkv1beta2.AddToScheme, } // AddToScheme adds all types of this clientset into the given scheme. This allows composition diff --git a/pkg/client/clientset/versioned/scheme/register.go b/pkg/client/clientset/versioned/scheme/register.go index 8b472789..ceb19eec 100644 --- a/pkg/client/clientset/versioned/scheme/register.go +++ b/pkg/client/clientset/versioned/scheme/register.go @@ -3,7 +3,7 @@ package scheme import ( - flinkv1beta1 "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta1" + flinkv1beta2 "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta2" v1 "k8s.io/apimachinery/pkg/apis/meta/v1" runtime "k8s.io/apimachinery/pkg/runtime" schema "k8s.io/apimachinery/pkg/runtime/schema" @@ -15,7 +15,7 @@ var Scheme = runtime.NewScheme() var Codecs = serializer.NewCodecFactory(Scheme) var ParameterCodec = runtime.NewParameterCodec(Scheme) var localSchemeBuilder = runtime.SchemeBuilder{ - flinkv1beta1.AddToScheme, + flinkv1beta2.AddToScheme, } // AddToScheme adds all types of this clientset into the given scheme. This allows composition diff --git a/pkg/client/clientset/versioned/typed/app/v1beta2/app_client.go b/pkg/client/clientset/versioned/typed/app/v1beta2/app_client.go new file mode 100644 index 00000000..a9c7d360 --- /dev/null +++ b/pkg/client/clientset/versioned/typed/app/v1beta2/app_client.go @@ -0,0 +1,74 @@ +// Code generated by client-gen. DO NOT EDIT. + +package v1beta2 + +import ( + v1beta2 "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta2" + "github.com/lyft/flinkk8soperator/pkg/client/clientset/versioned/scheme" + serializer "k8s.io/apimachinery/pkg/runtime/serializer" + rest "k8s.io/client-go/rest" +) + +type FlinkV1beta2Interface interface { + RESTClient() rest.Interface + FlinkApplicationsGetter +} + +// FlinkV1beta2Client is used to interact with features provided by the flink.k8s.io group. +type FlinkV1beta2Client struct { + restClient rest.Interface +} + +func (c *FlinkV1beta2Client) FlinkApplications(namespace string) FlinkApplicationInterface { + return newFlinkApplications(c, namespace) +} + +// NewForConfig creates a new FlinkV1beta2Client for the given config. +func NewForConfig(c *rest.Config) (*FlinkV1beta2Client, error) { + config := *c + if err := setConfigDefaults(&config); err != nil { + return nil, err + } + client, err := rest.RESTClientFor(&config) + if err != nil { + return nil, err + } + return &FlinkV1beta2Client{client}, nil +} + +// NewForConfigOrDie creates a new FlinkV1beta2Client for the given config and +// panics if there is an error in the config. +func NewForConfigOrDie(c *rest.Config) *FlinkV1beta2Client { + client, err := NewForConfig(c) + if err != nil { + panic(err) + } + return client +} + +// New creates a new FlinkV1beta2Client for the given RESTClient. +func New(c rest.Interface) *FlinkV1beta2Client { + return &FlinkV1beta2Client{c} +} + +func setConfigDefaults(config *rest.Config) error { + gv := v1beta2.SchemeGroupVersion + config.GroupVersion = &gv + config.APIPath = "/apis" + config.NegotiatedSerializer = serializer.DirectCodecFactory{CodecFactory: scheme.Codecs} + + if config.UserAgent == "" { + config.UserAgent = rest.DefaultKubernetesUserAgent() + } + + return nil +} + +// RESTClient returns a RESTClient that is used to communicate +// with API server by this client implementation. +func (c *FlinkV1beta2Client) RESTClient() rest.Interface { + if c == nil { + return nil + } + return c.restClient +} diff --git a/pkg/client/clientset/versioned/typed/app/v1beta2/doc.go b/pkg/client/clientset/versioned/typed/app/v1beta2/doc.go new file mode 100644 index 00000000..ebe38377 --- /dev/null +++ b/pkg/client/clientset/versioned/typed/app/v1beta2/doc.go @@ -0,0 +1,4 @@ +// Code generated by client-gen. DO NOT EDIT. + +// This package has the automatically generated typed clients. +package v1beta2 diff --git a/pkg/client/clientset/versioned/typed/app/v1beta2/fake/doc.go b/pkg/client/clientset/versioned/typed/app/v1beta2/fake/doc.go new file mode 100644 index 00000000..2b5ba4c8 --- /dev/null +++ b/pkg/client/clientset/versioned/typed/app/v1beta2/fake/doc.go @@ -0,0 +1,4 @@ +// Code generated by client-gen. DO NOT EDIT. + +// Package fake has the automatically generated clients. +package fake diff --git a/pkg/client/clientset/versioned/typed/app/v1beta2/fake/fake_app_client.go b/pkg/client/clientset/versioned/typed/app/v1beta2/fake/fake_app_client.go new file mode 100644 index 00000000..4011f240 --- /dev/null +++ b/pkg/client/clientset/versioned/typed/app/v1beta2/fake/fake_app_client.go @@ -0,0 +1,24 @@ +// Code generated by client-gen. DO NOT EDIT. + +package fake + +import ( + v1beta2 "github.com/lyft/flinkk8soperator/pkg/client/clientset/versioned/typed/app/v1beta2" + rest "k8s.io/client-go/rest" + testing "k8s.io/client-go/testing" +) + +type FakeFlinkV1beta2 struct { + *testing.Fake +} + +func (c *FakeFlinkV1beta2) FlinkApplications(namespace string) v1beta2.FlinkApplicationInterface { + return &FakeFlinkApplications{c, namespace} +} + +// RESTClient returns a RESTClient that is used to communicate +// with API server by this client implementation. +func (c *FakeFlinkV1beta2) RESTClient() rest.Interface { + var ret *rest.RESTClient + return ret +} diff --git a/pkg/client/clientset/versioned/typed/app/v1beta2/fake/fake_flinkapplication.go b/pkg/client/clientset/versioned/typed/app/v1beta2/fake/fake_flinkapplication.go new file mode 100644 index 00000000..c9c40a10 --- /dev/null +++ b/pkg/client/clientset/versioned/typed/app/v1beta2/fake/fake_flinkapplication.go @@ -0,0 +1,112 @@ +// Code generated by client-gen. DO NOT EDIT. + +package fake + +import ( + v1beta2 "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta2" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + labels "k8s.io/apimachinery/pkg/labels" + schema "k8s.io/apimachinery/pkg/runtime/schema" + types "k8s.io/apimachinery/pkg/types" + watch "k8s.io/apimachinery/pkg/watch" + testing "k8s.io/client-go/testing" +) + +// FakeFlinkApplications implements FlinkApplicationInterface +type FakeFlinkApplications struct { + Fake *FakeFlinkV1beta2 + ns string +} + +var flinkapplicationsResource = schema.GroupVersionResource{Group: "flink.k8s.io", Version: "v1beta2", Resource: "flinkapplications"} + +var flinkapplicationsKind = schema.GroupVersionKind{Group: "flink.k8s.io", Version: "v1beta2", Kind: "FlinkApplication"} + +// Get takes name of the flinkApplication, and returns the corresponding flinkApplication object, and an error if there is any. +func (c *FakeFlinkApplications) Get(name string, options v1.GetOptions) (result *v1beta2.FlinkApplication, err error) { + obj, err := c.Fake. + Invokes(testing.NewGetAction(flinkapplicationsResource, c.ns, name), &v1beta2.FlinkApplication{}) + + if obj == nil { + return nil, err + } + return obj.(*v1beta2.FlinkApplication), err +} + +// List takes label and field selectors, and returns the list of FlinkApplications that match those selectors. +func (c *FakeFlinkApplications) List(opts v1.ListOptions) (result *v1beta2.FlinkApplicationList, err error) { + obj, err := c.Fake. + Invokes(testing.NewListAction(flinkapplicationsResource, flinkapplicationsKind, c.ns, opts), &v1beta2.FlinkApplicationList{}) + + if obj == nil { + return nil, err + } + + label, _, _ := testing.ExtractFromListOptions(opts) + if label == nil { + label = labels.Everything() + } + list := &v1beta2.FlinkApplicationList{ListMeta: obj.(*v1beta2.FlinkApplicationList).ListMeta} + for _, item := range obj.(*v1beta2.FlinkApplicationList).Items { + if label.Matches(labels.Set(item.Labels)) { + list.Items = append(list.Items, item) + } + } + return list, err +} + +// Watch returns a watch.Interface that watches the requested flinkApplications. +func (c *FakeFlinkApplications) Watch(opts v1.ListOptions) (watch.Interface, error) { + return c.Fake. + InvokesWatch(testing.NewWatchAction(flinkapplicationsResource, c.ns, opts)) + +} + +// Create takes the representation of a flinkApplication and creates it. Returns the server's representation of the flinkApplication, and an error, if there is any. +func (c *FakeFlinkApplications) Create(flinkApplication *v1beta2.FlinkApplication) (result *v1beta2.FlinkApplication, err error) { + obj, err := c.Fake. + Invokes(testing.NewCreateAction(flinkapplicationsResource, c.ns, flinkApplication), &v1beta2.FlinkApplication{}) + + if obj == nil { + return nil, err + } + return obj.(*v1beta2.FlinkApplication), err +} + +// Update takes the representation of a flinkApplication and updates it. Returns the server's representation of the flinkApplication, and an error, if there is any. +func (c *FakeFlinkApplications) Update(flinkApplication *v1beta2.FlinkApplication) (result *v1beta2.FlinkApplication, err error) { + obj, err := c.Fake. + Invokes(testing.NewUpdateAction(flinkapplicationsResource, c.ns, flinkApplication), &v1beta2.FlinkApplication{}) + + if obj == nil { + return nil, err + } + return obj.(*v1beta2.FlinkApplication), err +} + +// Delete takes name of the flinkApplication and deletes it. Returns an error if one occurs. +func (c *FakeFlinkApplications) Delete(name string, options *v1.DeleteOptions) error { + _, err := c.Fake. + Invokes(testing.NewDeleteAction(flinkapplicationsResource, c.ns, name), &v1beta2.FlinkApplication{}) + + return err +} + +// DeleteCollection deletes a collection of objects. +func (c *FakeFlinkApplications) DeleteCollection(options *v1.DeleteOptions, listOptions v1.ListOptions) error { + action := testing.NewDeleteCollectionAction(flinkapplicationsResource, c.ns, listOptions) + + _, err := c.Fake.Invokes(action, &v1beta2.FlinkApplicationList{}) + return err +} + +// Patch applies the patch and returns the patched flinkApplication. +func (c *FakeFlinkApplications) Patch(name string, pt types.PatchType, data []byte, subresources ...string) (result *v1beta2.FlinkApplication, err error) { + obj, err := c.Fake. + Invokes(testing.NewPatchSubresourceAction(flinkapplicationsResource, c.ns, name, pt, data, subresources...), &v1beta2.FlinkApplication{}) + + if obj == nil { + return nil, err + } + return obj.(*v1beta2.FlinkApplication), err +} diff --git a/pkg/client/clientset/versioned/typed/app/v1beta2/flinkapplication.go b/pkg/client/clientset/versioned/typed/app/v1beta2/flinkapplication.go new file mode 100644 index 00000000..02201294 --- /dev/null +++ b/pkg/client/clientset/versioned/typed/app/v1beta2/flinkapplication.go @@ -0,0 +1,158 @@ +// Code generated by client-gen. DO NOT EDIT. + +package v1beta2 + +import ( + "time" + + v1beta2 "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta2" + scheme "github.com/lyft/flinkk8soperator/pkg/client/clientset/versioned/scheme" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + types "k8s.io/apimachinery/pkg/types" + watch "k8s.io/apimachinery/pkg/watch" + rest "k8s.io/client-go/rest" +) + +// FlinkApplicationsGetter has a method to return a FlinkApplicationInterface. +// A group's client should implement this interface. +type FlinkApplicationsGetter interface { + FlinkApplications(namespace string) FlinkApplicationInterface +} + +// FlinkApplicationInterface has methods to work with FlinkApplication resources. +type FlinkApplicationInterface interface { + Create(*v1beta2.FlinkApplication) (*v1beta2.FlinkApplication, error) + Update(*v1beta2.FlinkApplication) (*v1beta2.FlinkApplication, error) + Delete(name string, options *v1.DeleteOptions) error + DeleteCollection(options *v1.DeleteOptions, listOptions v1.ListOptions) error + Get(name string, options v1.GetOptions) (*v1beta2.FlinkApplication, error) + List(opts v1.ListOptions) (*v1beta2.FlinkApplicationList, error) + Watch(opts v1.ListOptions) (watch.Interface, error) + Patch(name string, pt types.PatchType, data []byte, subresources ...string) (result *v1beta2.FlinkApplication, err error) + FlinkApplicationExpansion +} + +// flinkApplications implements FlinkApplicationInterface +type flinkApplications struct { + client rest.Interface + ns string +} + +// newFlinkApplications returns a FlinkApplications +func newFlinkApplications(c *FlinkV1beta2Client, namespace string) *flinkApplications { + return &flinkApplications{ + client: c.RESTClient(), + ns: namespace, + } +} + +// Get takes name of the flinkApplication, and returns the corresponding flinkApplication object, and an error if there is any. +func (c *flinkApplications) Get(name string, options v1.GetOptions) (result *v1beta2.FlinkApplication, err error) { + result = &v1beta2.FlinkApplication{} + err = c.client.Get(). + Namespace(c.ns). + Resource("flinkapplications"). + Name(name). + VersionedParams(&options, scheme.ParameterCodec). + Do(). + Into(result) + return +} + +// List takes label and field selectors, and returns the list of FlinkApplications that match those selectors. +func (c *flinkApplications) List(opts v1.ListOptions) (result *v1beta2.FlinkApplicationList, err error) { + var timeout time.Duration + if opts.TimeoutSeconds != nil { + timeout = time.Duration(*opts.TimeoutSeconds) * time.Second + } + result = &v1beta2.FlinkApplicationList{} + err = c.client.Get(). + Namespace(c.ns). + Resource("flinkapplications"). + VersionedParams(&opts, scheme.ParameterCodec). + Timeout(timeout). + Do(). + Into(result) + return +} + +// Watch returns a watch.Interface that watches the requested flinkApplications. +func (c *flinkApplications) Watch(opts v1.ListOptions) (watch.Interface, error) { + var timeout time.Duration + if opts.TimeoutSeconds != nil { + timeout = time.Duration(*opts.TimeoutSeconds) * time.Second + } + opts.Watch = true + return c.client.Get(). + Namespace(c.ns). + Resource("flinkapplications"). + VersionedParams(&opts, scheme.ParameterCodec). + Timeout(timeout). + Watch() +} + +// Create takes the representation of a flinkApplication and creates it. Returns the server's representation of the flinkApplication, and an error, if there is any. +func (c *flinkApplications) Create(flinkApplication *v1beta2.FlinkApplication) (result *v1beta2.FlinkApplication, err error) { + result = &v1beta2.FlinkApplication{} + err = c.client.Post(). + Namespace(c.ns). + Resource("flinkapplications"). + Body(flinkApplication). + Do(). + Into(result) + return +} + +// Update takes the representation of a flinkApplication and updates it. Returns the server's representation of the flinkApplication, and an error, if there is any. +func (c *flinkApplications) Update(flinkApplication *v1beta2.FlinkApplication) (result *v1beta2.FlinkApplication, err error) { + result = &v1beta2.FlinkApplication{} + err = c.client.Put(). + Namespace(c.ns). + Resource("flinkapplications"). + Name(flinkApplication.Name). + Body(flinkApplication). + Do(). + Into(result) + return +} + +// Delete takes name of the flinkApplication and deletes it. Returns an error if one occurs. +func (c *flinkApplications) Delete(name string, options *v1.DeleteOptions) error { + return c.client.Delete(). + Namespace(c.ns). + Resource("flinkapplications"). + Name(name). + Body(options). + Do(). + Error() +} + +// DeleteCollection deletes a collection of objects. +func (c *flinkApplications) DeleteCollection(options *v1.DeleteOptions, listOptions v1.ListOptions) error { + var timeout time.Duration + if listOptions.TimeoutSeconds != nil { + timeout = time.Duration(*listOptions.TimeoutSeconds) * time.Second + } + return c.client.Delete(). + Namespace(c.ns). + Resource("flinkapplications"). + VersionedParams(&listOptions, scheme.ParameterCodec). + Timeout(timeout). + Body(options). + Do(). + Error() +} + +// Patch applies the patch and returns the patched flinkApplication. +func (c *flinkApplications) Patch(name string, pt types.PatchType, data []byte, subresources ...string) (result *v1beta2.FlinkApplication, err error) { + result = &v1beta2.FlinkApplication{} + err = c.client.Patch(pt). + Namespace(c.ns). + Resource("flinkapplications"). + SubResource(subresources...). + Name(name). + Body(data). + Do(). + Into(result) + return +} diff --git a/pkg/client/clientset/versioned/typed/app/v1beta2/generated_expansion.go b/pkg/client/clientset/versioned/typed/app/v1beta2/generated_expansion.go new file mode 100644 index 00000000..28228bbf --- /dev/null +++ b/pkg/client/clientset/versioned/typed/app/v1beta2/generated_expansion.go @@ -0,0 +1,5 @@ +// Code generated by client-gen. DO NOT EDIT. + +package v1beta2 + +type FlinkApplicationExpansion interface{} diff --git a/pkg/controller/flink/client/api.go b/pkg/controller/flink/client/api.go index 68c802fe..38941a38 100644 --- a/pkg/controller/flink/client/api.go +++ b/pkg/controller/flink/client/api.go @@ -7,7 +7,7 @@ import ( "strings" "time" - "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta1" + "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta2" "net/http" @@ -114,18 +114,18 @@ func (c *FlinkJobManagerClient) GetJobConfig(ctx context.Context, url, jobID str response, err := c.executeRequest(ctx, httpGet, url, nil) if err != nil { c.metrics.getJobConfigFailureCounter.Inc(ctx) - return nil, GetRetryableError(err, v1beta1.GetJobConfig, GlobalFailure, DefaultRetries) + return nil, GetRetryableError(err, v1beta2.GetJobConfig, GlobalFailure, DefaultRetries) } if response != nil && !response.IsSuccess() { c.metrics.getJobConfigFailureCounter.Inc(ctx) logger.Errorf(ctx, fmt.Sprintf("Get Jobconfig failed with response %v", response)) - return nil, GetRetryableError(err, v1beta1.GetJobConfig, response.Status(), DefaultRetries) + return nil, GetRetryableError(err, v1beta2.GetJobConfig, response.Status(), DefaultRetries) } var jobConfigResponse JobConfigResponse if err := json.Unmarshal(response.Body(), &jobConfigResponse); err != nil { logger.Errorf(ctx, "Unable to Unmarshal jobPlanResponse %v, err: %v", response, err) - return nil, GetRetryableError(err, v1beta1.GetJobConfig, JSONUnmarshalError, DefaultRetries) + return nil, GetRetryableError(err, v1beta2.GetJobConfig, JSONUnmarshalError, DefaultRetries) } c.metrics.getJobConfigSuccessCounter.Inc(ctx) return &jobConfigResponse, nil @@ -136,19 +136,19 @@ func (c *FlinkJobManagerClient) GetClusterOverview(ctx context.Context, url stri response, err := c.executeRequest(ctx, httpGet, url, nil) if err != nil { c.metrics.getClusterFailureCounter.Inc(ctx) - return nil, GetRetryableError(err, v1beta1.GetClusterOverview, GlobalFailure, DefaultRetries) + return nil, GetRetryableError(err, v1beta2.GetClusterOverview, GlobalFailure, DefaultRetries) } if response != nil && !response.IsSuccess() { c.metrics.getClusterFailureCounter.Inc(ctx) if response.StatusCode() != int(http.StatusNotFound) && response.StatusCode() != int(http.StatusServiceUnavailable) { logger.Errorf(ctx, fmt.Sprintf("Get cluster overview failed with response %v", response)) } - return nil, GetRetryableError(err, v1beta1.GetClusterOverview, response.Status(), DefaultRetries) + return nil, GetRetryableError(err, v1beta2.GetClusterOverview, response.Status(), DefaultRetries) } var clusterOverviewResponse ClusterOverviewResponse if err = json.Unmarshal(response.Body(), &clusterOverviewResponse); err != nil { logger.Errorf(ctx, "Unable to Unmarshal clusterOverviewResponse %v, err: %v", response, err) - return nil, GetRetryableError(err, v1beta1.GetClusterOverview, JSONUnmarshalError, DefaultRetries) + return nil, GetRetryableError(err, v1beta2.GetClusterOverview, JSONUnmarshalError, DefaultRetries) } c.metrics.getClusterSuccessCounter.Inc(ctx) return &clusterOverviewResponse, nil @@ -187,17 +187,17 @@ func (c *FlinkJobManagerClient) CancelJobWithSavepoint(ctx context.Context, url response, err := c.executeRequest(ctx, httpPost, url, cancelJobRequest) if err != nil { c.metrics.cancelJobFailureCounter.Inc(ctx) - return "", GetRetryableError(err, v1beta1.CancelJobWithSavepoint, GlobalFailure, 5) + return "", GetRetryableError(err, v1beta2.CancelJobWithSavepoint, GlobalFailure, 5) } if response != nil && !response.IsSuccess() { c.metrics.cancelJobFailureCounter.Inc(ctx) logger.Errorf(ctx, fmt.Sprintf("Cancel job failed with response %v", response)) - return "", GetRetryableError(err, v1beta1.CancelJobWithSavepoint, response.Status(), 5) + return "", GetRetryableError(err, v1beta2.CancelJobWithSavepoint, response.Status(), 5) } var cancelJobResponse CancelJobResponse if err = json.Unmarshal(response.Body(), &cancelJobResponse); err != nil { logger.Errorf(ctx, "Unable to Unmarshal cancelJobResponse %v, err: %v", response, err) - return "", GetRetryableError(err, v1beta1.CancelJobWithSavepoint, JSONUnmarshalError, 5) + return "", GetRetryableError(err, v1beta2.CancelJobWithSavepoint, JSONUnmarshalError, 5) } c.metrics.cancelJobSuccessCounter.Inc(ctx) return cancelJobResponse.TriggerID, nil @@ -212,12 +212,12 @@ func (c *FlinkJobManagerClient) ForceCancelJob(ctx context.Context, url string, if err != nil { c.metrics.forceCancelJobFailureCounter.Inc(ctx) logger.Errorf(ctx, fmt.Sprintf("Force cancel job failed with error %v", err)) - return GetRetryableError(err, v1beta1.ForceCancelJob, GlobalFailure, DefaultRetries) + return GetRetryableError(err, v1beta2.ForceCancelJob, GlobalFailure, DefaultRetries) } if response != nil && !response.IsSuccess() { c.metrics.forceCancelJobFailureCounter.Inc(ctx) logger.Errorf(ctx, fmt.Sprintf("Force cancel job failed with response %v", response)) - return GetRetryableError(err, v1beta1.ForceCancelJob, response.Status(), DefaultRetries) + return GetRetryableError(err, v1beta2.ForceCancelJob, response.Status(), DefaultRetries) } c.metrics.forceCancelJobFailureCounter.Inc(ctx) @@ -231,7 +231,7 @@ func (c *FlinkJobManagerClient) SubmitJob(ctx context.Context, url string, jarID response, err := c.executeRequest(ctx, httpPost, url, submitJobRequest) if err != nil { c.metrics.submitJobFailureCounter.Inc(ctx) - return nil, GetRetryableError(err, v1beta1.SubmitJob, GlobalFailure, DefaultRetries) + return nil, GetRetryableError(err, v1beta2.SubmitJob, GlobalFailure, DefaultRetries) } if response != nil && !response.IsSuccess() { c.metrics.submitJobFailureCounter.Inc(ctx) @@ -241,18 +241,18 @@ func (c *FlinkJobManagerClient) SubmitJob(ctx context.Context, url string, jarID // in those cases body := response.String() if strings.Contains(body, programInvocationException) || strings.Contains(body, jobSubmissionException) { - return nil, GetNonRetryableErrorWithMessage(err, v1beta1.SubmitJob, response.Status(), body) + return nil, GetNonRetryableErrorWithMessage(err, v1beta2.SubmitJob, response.Status(), body) } - return nil, GetRetryableErrorWithMessage(err, v1beta1.SubmitJob, response.Status(), DefaultRetries, string(response.Body())) + return nil, GetRetryableErrorWithMessage(err, v1beta2.SubmitJob, response.Status(), DefaultRetries, string(response.Body())) } - return nil, GetNonRetryableErrorWithMessage(err, v1beta1.SubmitJob, response.Status(), string(response.Body())) + return nil, GetNonRetryableErrorWithMessage(err, v1beta2.SubmitJob, response.Status(), string(response.Body())) } var submitJobResponse SubmitJobResponse if err = json.Unmarshal(response.Body(), &submitJobResponse); err != nil { logger.Errorf(ctx, "Unable to Unmarshal submitJobResponse %v, err: %v", response, err) - return nil, GetRetryableErrorWithMessage(err, v1beta1.SubmitJob, response.Status(), DefaultRetries, JSONUnmarshalError) + return nil, GetRetryableErrorWithMessage(err, v1beta2.SubmitJob, response.Status(), DefaultRetries, JSONUnmarshalError) } c.metrics.submitJobSuccessCounter.Inc(ctx) @@ -266,17 +266,17 @@ func (c *FlinkJobManagerClient) CheckSavepointStatus(ctx context.Context, url st response, err := c.executeRequest(ctx, httpGet, url, nil) if err != nil { c.metrics.checkSavepointFailureCounter.Inc(ctx) - return nil, GetRetryableError(err, v1beta1.CheckSavepointStatus, GlobalFailure, checkSavepointStatusRetries) + return nil, GetRetryableError(err, v1beta2.CheckSavepointStatus, GlobalFailure, checkSavepointStatusRetries) } if response != nil && !response.IsSuccess() { c.metrics.checkSavepointFailureCounter.Inc(ctx) logger.Errorf(ctx, fmt.Sprintf("Check savepoint status failed with response %v", response)) - return nil, GetRetryableError(err, v1beta1.CheckSavepointStatus, response.Status(), checkSavepointStatusRetries) + return nil, GetRetryableError(err, v1beta2.CheckSavepointStatus, response.Status(), checkSavepointStatusRetries) } var savepointResponse SavepointResponse if err = json.Unmarshal(response.Body(), &savepointResponse); err != nil { logger.Errorf(ctx, "Unable to Unmarshal savepointResponse %v, err: %v", response, err) - return nil, GetRetryableError(err, v1beta1.CheckSavepointStatus, JSONUnmarshalError, checkSavepointStatusRetries) + return nil, GetRetryableError(err, v1beta2.CheckSavepointStatus, JSONUnmarshalError, checkSavepointStatusRetries) } c.metrics.cancelJobSuccessCounter.Inc(ctx) return &savepointResponse, nil @@ -287,18 +287,18 @@ func (c *FlinkJobManagerClient) GetJobs(ctx context.Context, url string) (*GetJo response, err := c.executeRequest(ctx, httpGet, url, nil) if err != nil { c.metrics.getJobsFailureCounter.Inc(ctx) - return nil, GetRetryableError(err, v1beta1.GetJobs, GlobalFailure, DefaultRetries) + return nil, GetRetryableError(err, v1beta2.GetJobs, GlobalFailure, DefaultRetries) } if response != nil && !response.IsSuccess() { c.metrics.getJobsFailureCounter.Inc(ctx) logger.Errorf(ctx, fmt.Sprintf("GetJobs failed with response %v", response)) - return nil, GetRetryableError(err, v1beta1.GetJobs, response.Status(), DefaultRetries) + return nil, GetRetryableError(err, v1beta2.GetJobs, response.Status(), DefaultRetries) } var getJobsResponse GetJobsResponse if err = json.Unmarshal(response.Body(), &getJobsResponse); err != nil { logger.Errorf(ctx, "%v", getJobsResponse) logger.Errorf(ctx, "Unable to Unmarshal getJobsResponse %v, err: %v", response, err) - return nil, GetRetryableError(err, v1beta1.GetJobs, response.Status(), DefaultRetries) + return nil, GetRetryableError(err, v1beta2.GetJobs, response.Status(), DefaultRetries) } c.metrics.getJobsSuccessCounter.Inc(ctx) return &getJobsResponse, nil @@ -309,11 +309,11 @@ func (c *FlinkJobManagerClient) GetLatestCheckpoint(ctx context.Context, url str response, err := c.executeRequest(ctx, httpGet, endpoint, nil) if err != nil { c.metrics.getCheckpointsFailureCounter.Inc(ctx) - return nil, GetRetryableError(err, v1beta1.GetLatestCheckpoint, GlobalFailure, DefaultRetries) + return nil, GetRetryableError(err, v1beta2.GetLatestCheckpoint, GlobalFailure, DefaultRetries) } if response != nil && !response.IsSuccess() { c.metrics.getCheckpointsFailureCounter.Inc(ctx) - return nil, GetRetryableError(err, v1beta1.GetLatestCheckpoint, response.Status(), DefaultRetries) + return nil, GetRetryableError(err, v1beta2.GetLatestCheckpoint, response.Status(), DefaultRetries) } var checkpointResponse CheckpointResponse @@ -329,11 +329,11 @@ func (c *FlinkJobManagerClient) GetTaskManagers(ctx context.Context, url string) endpoint := url + taskmanagersURL response, err := c.executeRequest(ctx, httpGet, endpoint, nil) if err != nil { - return nil, GetRetryableError(err, v1beta1.GetTaskManagers, GlobalFailure, DefaultRetries) + return nil, GetRetryableError(err, v1beta2.GetTaskManagers, GlobalFailure, DefaultRetries) } if response != nil && !response.IsSuccess() { - return nil, GetRetryableError(err, v1beta1.GetTaskManagers, response.Status(), DefaultRetries) + return nil, GetRetryableError(err, v1beta2.GetTaskManagers, response.Status(), DefaultRetries) } var taskmanagerResponse TaskManagersResponse @@ -350,11 +350,11 @@ func (c *FlinkJobManagerClient) GetCheckpointCounts(ctx context.Context, url str response, err := c.executeRequest(ctx, httpGet, endpoint, nil) if err != nil { c.metrics.getCheckpointsFailureCounter.Inc(ctx) - return nil, GetRetryableError(err, v1beta1.GetCheckpointCounts, GlobalFailure, DefaultRetries) + return nil, GetRetryableError(err, v1beta2.GetCheckpointCounts, GlobalFailure, DefaultRetries) } if response != nil && !response.IsSuccess() { c.metrics.getCheckpointsFailureCounter.Inc(ctx) - return nil, GetRetryableError(err, v1beta1.GetCheckpointCounts, response.Status(), DefaultRetries) + return nil, GetRetryableError(err, v1beta2.GetCheckpointCounts, response.Status(), DefaultRetries) } var checkpointResponse CheckpointResponse @@ -370,11 +370,11 @@ func (c *FlinkJobManagerClient) GetJobOverview(ctx context.Context, url string, endpoint := fmt.Sprintf(url+GetJobsOverviewURL, jobID) response, err := c.executeRequest(ctx, httpGet, endpoint, nil) if err != nil { - return nil, GetRetryableError(err, v1beta1.GetJobOverview, GlobalFailure, DefaultRetries) + return nil, GetRetryableError(err, v1beta2.GetJobOverview, GlobalFailure, DefaultRetries) } if response != nil && !response.IsSuccess() { c.metrics.getCheckpointsFailureCounter.Inc(ctx) - return nil, GetRetryableError(err, v1beta1.GetJobOverview, response.Status(), DefaultRetries) + return nil, GetRetryableError(err, v1beta2.GetJobOverview, response.Status(), DefaultRetries) } var jobOverviewResponse FlinkJobOverview diff --git a/pkg/controller/flink/client/api_test.go b/pkg/controller/flink/client/api_test.go index f2fd582e..f6ce2fa1 100644 --- a/pkg/controller/flink/client/api_test.go +++ b/pkg/controller/flink/client/api_test.go @@ -4,7 +4,7 @@ import ( "context" "testing" - "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta1" + "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta2" "github.com/jarcoal/httpmock" mockScope "github.com/lyft/flytestdlib/promutils" @@ -359,7 +359,7 @@ func TestSubmitStartupFail(t *testing.T) { Parallelism: 10, }) assert.Nil(t, resp) - flinkAppError, _ := err.(*v1beta1.FlinkApplicationError) + flinkAppError, _ := err.(*v1beta2.FlinkApplicationError) assert.True(t, flinkAppError.IsFailFast) assert.EqualError(t, err, "SubmitJob call failed with status 500 and message '"+ @@ -379,7 +379,7 @@ func TestIncompatibleSavepointFail(t *testing.T) { Parallelism: 10, }) assert.Nil(t, resp) - flinkAppError, _ := err.(*v1beta1.FlinkApplicationError) + flinkAppError, _ := err.(*v1beta2.FlinkApplicationError) assert.True(t, flinkAppError.IsFailFast) assert.EqualError(t, err, "SubmitJob call failed with status 500 and message '"+ diff --git a/pkg/controller/flink/client/error_handler.go b/pkg/controller/flink/client/error_handler.go index 01bbf33e..0e059e45 100644 --- a/pkg/controller/flink/client/error_handler.go +++ b/pkg/controller/flink/client/error_handler.go @@ -7,7 +7,7 @@ import ( v1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta1" + "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta2" "github.com/pkg/errors" "k8s.io/apimachinery/pkg/util/clock" @@ -21,25 +21,25 @@ const ( NoRetries = 0 ) -func GetRetryableError(err error, method v1beta1.FlinkMethod, errorCode string, maxRetries int32) error { +func GetRetryableError(err error, method v1beta2.FlinkMethod, errorCode string, maxRetries int32) error { return GetRetryableErrorWithMessage(err, method, errorCode, maxRetries, "") } -func GetRetryableErrorWithMessage(err error, method v1beta1.FlinkMethod, errorCode string, maxRetries int32, message string) error { +func GetRetryableErrorWithMessage(err error, method v1beta2.FlinkMethod, errorCode string, maxRetries int32, message string) error { appError := getErrorValue(err, method, errorCode, message) return NewFlinkApplicationError(appError.Error(), method, errorCode, true, false, maxRetries) } -func GetNonRetryableError(err error, method v1beta1.FlinkMethod, errorCode string) error { +func GetNonRetryableError(err error, method v1beta2.FlinkMethod, errorCode string) error { return GetNonRetryableErrorWithMessage(err, method, errorCode, "") } -func GetNonRetryableErrorWithMessage(err error, method v1beta1.FlinkMethod, errorCode string, message string) error { +func GetNonRetryableErrorWithMessage(err error, method v1beta2.FlinkMethod, errorCode string, message string) error { appError := getErrorValue(err, method, errorCode, message) return NewFlinkApplicationError(appError.Error(), method, errorCode, false, true, NoRetries) } -func getErrorValue(err error, method v1beta1.FlinkMethod, errorCode string, message string) error { +func getErrorValue(err error, method v1beta2.FlinkMethod, errorCode string, message string) error { if err == nil { return errors.New(fmt.Sprintf("%v call failed with status %v and message '%s'", method, errorCode, message)) } @@ -76,7 +76,7 @@ func (r RetryHandler) IsErrorRetryable(err error) bool { if err == nil { return false } - flinkAppError, ok := err.(*v1beta1.FlinkApplicationError) + flinkAppError, ok := err.(*v1beta2.FlinkApplicationError) if ok && flinkAppError != nil { return flinkAppError.IsRetryable } @@ -85,7 +85,7 @@ func (r RetryHandler) IsErrorRetryable(err error) bool { } func (r RetryHandler) IsRetryRemaining(err error, retryCount int32) bool { - flinkAppError, ok := err.(*v1beta1.FlinkApplicationError) + flinkAppError, ok := err.(*v1beta2.FlinkApplicationError) if ok && flinkAppError != nil { return retryCount <= flinkAppError.MaxRetries } @@ -112,7 +112,7 @@ func (r RetryHandler) IsTimeToRetry(clock clock.Clock, lastUpdatedTime time.Time return elapsedTime >= r.GetRetryDelay(retryCount) } -func NewFlinkApplicationError(appError string, method v1beta1.FlinkMethod, errorCode string, isRetryable bool, isFailFast bool, maxRetries int32) *v1beta1.FlinkApplicationError { +func NewFlinkApplicationError(appError string, method v1beta2.FlinkMethod, errorCode string, isRetryable bool, isFailFast bool, maxRetries int32) *v1beta2.FlinkApplicationError { now := v1.Now() - return &v1beta1.FlinkApplicationError{AppError: appError, Method: method, ErrorCode: errorCode, IsRetryable: isRetryable, IsFailFast: isFailFast, MaxRetries: maxRetries, LastErrorUpdateTime: &now} + return &v1beta2.FlinkApplicationError{AppError: appError, Method: method, ErrorCode: errorCode, IsRetryable: isRetryable, IsFailFast: isFailFast, MaxRetries: maxRetries, LastErrorUpdateTime: &now} } diff --git a/pkg/controller/flink/config.go b/pkg/controller/flink/config.go index 1e375b32..78ffca14 100644 --- a/pkg/controller/flink/config.go +++ b/pkg/controller/flink/config.go @@ -6,7 +6,7 @@ import ( "sort" "strings" - "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta1" + "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta2" ) const ( @@ -36,43 +36,43 @@ func getValidFraction(x *float64, y float64) float64 { return y } -func getTaskmanagerSlots(app *v1beta1.FlinkApplication) int32 { +func getTaskmanagerSlots(app *v1beta2.FlinkApplication) int32 { return firstNonNil(app.Spec.TaskManagerConfig.TaskSlots, TaskManagerDefaultSlots) } -func getJobmanagerReplicas(app *v1beta1.FlinkApplication) int32 { +func getJobmanagerReplicas(app *v1beta2.FlinkApplication) int32 { return firstNonNil(app.Spec.JobManagerConfig.Replicas, JobManagerDefaultReplicaCount) } -func getServiceAccountName(app *v1beta1.FlinkApplication) string { +func getServiceAccountName(app *v1beta2.FlinkApplication) string { return app.Spec.ServiceAccountName } -func getRPCPort(app *v1beta1.FlinkApplication) int32 { +func getRPCPort(app *v1beta2.FlinkApplication) int32 { return firstNonNil(app.Spec.RPCPort, RPCDefaultPort) } -func getUIPort(app *v1beta1.FlinkApplication) int32 { +func getUIPort(app *v1beta2.FlinkApplication) int32 { return firstNonNil(app.Spec.UIPort, UIDefaultPort) } -func getQueryPort(app *v1beta1.FlinkApplication) int32 { +func getQueryPort(app *v1beta2.FlinkApplication) int32 { return firstNonNil(app.Spec.QueryPort, QueryDefaultPort) } -func getBlobPort(app *v1beta1.FlinkApplication) int32 { +func getBlobPort(app *v1beta2.FlinkApplication) int32 { return firstNonNil(app.Spec.BlobPort, BlobDefaultPort) } -func getInternalMetricsQueryPort(app *v1beta1.FlinkApplication) int32 { +func getInternalMetricsQueryPort(app *v1beta2.FlinkApplication) int32 { return firstNonNil(app.Spec.MetricsQueryPort, MetricsQueryDefaultPort) } -func getMaxCheckpointRestoreAgeSeconds(app *v1beta1.FlinkApplication) int32 { +func getMaxCheckpointRestoreAgeSeconds(app *v1beta2.FlinkApplication) int32 { return firstNonNil(app.Spec.MaxCheckpointRestoreAgeSeconds, MaxCheckpointRestoreAgeSeconds) } -func getTaskManagerMemory(application *v1beta1.FlinkApplication) int64 { +func getTaskManagerMemory(application *v1beta2.FlinkApplication) int64 { tmResources := application.Spec.TaskManagerConfig.Resources if tmResources == nil { tmResources = &TaskManagerDefaultResources @@ -81,7 +81,7 @@ func getTaskManagerMemory(application *v1beta1.FlinkApplication) int64 { return tmMemory } -func getJobManagerMemory(application *v1beta1.FlinkApplication) int64 { +func getJobManagerMemory(application *v1beta2.FlinkApplication) int64 { jmResources := application.Spec.JobManagerConfig.Resources if jmResources == nil { jmResources = &JobManagerDefaultResources @@ -95,13 +95,13 @@ func computeHeap(memoryInBytes float64, fraction float64) string { return fmt.Sprintf("%dk", kbs) } -func getTaskManagerHeapMemory(app *v1beta1.FlinkApplication) string { +func getTaskManagerHeapMemory(app *v1beta2.FlinkApplication) string { offHeapMemoryFrac := getValidFraction(app.Spec.TaskManagerConfig.OffHeapMemoryFraction, OffHeapMemoryDefaultFraction) tmMemory := float64(getTaskManagerMemory(app)) return computeHeap(tmMemory, offHeapMemoryFrac) } -func getJobManagerHeapMemory(app *v1beta1.FlinkApplication) string { +func getJobManagerHeapMemory(app *v1beta2.FlinkApplication) string { offHeapMemoryFrac := getValidFraction(app.Spec.JobManagerConfig.OffHeapMemoryFraction, OffHeapMemoryDefaultFraction) jmMemory := float64(getJobManagerMemory(app)) return computeHeap(jmMemory, offHeapMemoryFrac) @@ -109,10 +109,10 @@ func getJobManagerHeapMemory(app *v1beta1.FlinkApplication) string { // Renders the flink configuration overrides stored in FlinkApplication.FlinkConfig into a // YAML string suitable for interpolating into flink-conf.yaml. -func renderFlinkConfig(app *v1beta1.FlinkApplication) (string, error) { +func renderFlinkConfig(app *v1beta2.FlinkApplication) (string, error) { config := app.Spec.FlinkConfig.DeepCopy() if config == nil { - config = &v1beta1.FlinkConfig{} + config = &v1beta2.FlinkConfig{} } // we will fill this in later using the versioned service @@ -158,7 +158,7 @@ func renderFlinkConfig(app *v1beta1.FlinkApplication) (string, error) { return s.String(), nil } -func isHAEnabled(flinkConfig v1beta1.FlinkConfig) bool { +func isHAEnabled(flinkConfig v1beta2.FlinkConfig) bool { if val, ok := flinkConfig[HighAvailabilityKey]; ok { value := val.(string) if strings.ToLower(strings.TrimSpace(value)) != "none" { diff --git a/pkg/controller/flink/config_test.go b/pkg/controller/flink/config_test.go index 6f15baca..6bda8130 100644 --- a/pkg/controller/flink/config_test.go +++ b/pkg/controller/flink/config_test.go @@ -6,7 +6,7 @@ import ( "strings" "testing" - "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta1" + "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta2" "github.com/stretchr/testify/assert" coreV1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" @@ -18,11 +18,11 @@ func TestRenderFlinkConfigOverrides(t *testing.T) { blobPort := int32(1000) offHeapMemoryFrac := 0.5 - yaml, err := renderFlinkConfig(&v1beta1.FlinkApplication{ + yaml, err := renderFlinkConfig(&v1beta2.FlinkApplication{ ObjectMeta: v1.ObjectMeta{ Name: "test-app", }, - Spec: v1beta1.FlinkApplicationSpec{ + Spec: v1beta2.FlinkApplicationSpec{ FlinkConfig: map[string]interface{}{ "akka.timeout": "5s", "taskmanager.network.memory.fraction": 0.1, @@ -30,17 +30,17 @@ func TestRenderFlinkConfigOverrides(t *testing.T) { "jobmanager.rpc.address": "wrong-address", "env.java.opts.jobmanager": "-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=39000 -XX:+UseG1GC", }, - TaskManagerConfig: v1beta1.TaskManagerConfig{ + TaskManagerConfig: v1beta2.TaskManagerConfig{ TaskSlots: &taskSlots, OffHeapMemoryFraction: &offHeapMemoryFrac, }, - JobManagerConfig: v1beta1.JobManagerConfig{ + JobManagerConfig: v1beta2.JobManagerConfig{ OffHeapMemoryFraction: &offHeapMemoryFrac, }, BlobPort: &blobPort, }, - Status: v1beta1.FlinkApplicationStatus{ - Phase: v1beta1.FlinkApplicationNew, + Status: v1beta2.FlinkApplicationStatus{ + Phase: v1beta2.FlinkApplicationNew, }, }) @@ -70,22 +70,22 @@ func TestRenderFlinkConfigOverrides(t *testing.T) { } func TestGetTaskSlots(t *testing.T) { - app1 := v1beta1.FlinkApplication{} + app1 := v1beta2.FlinkApplication{} assert.Equal(t, int32(TaskManagerDefaultSlots), getTaskmanagerSlots(&app1)) - app2 := v1beta1.FlinkApplication{} + app2 := v1beta2.FlinkApplication{} taskSlots := int32(4) app2.Spec.TaskManagerConfig.TaskSlots = &taskSlots assert.Equal(t, int32(4), getTaskmanagerSlots(&app2)) } func TestGetJobManagerReplicas(t *testing.T) { - app1 := v1beta1.FlinkApplication{} + app1 := v1beta2.FlinkApplication{} assert.Equal(t, int32(JobManagerDefaultReplicaCount), getJobmanagerReplicas(&app1)) } func TestGetJobManagerReplicasNonZero(t *testing.T) { - app1 := v1beta1.FlinkApplication{} + app1 := v1beta2.FlinkApplication{} replicas := int32(4) app1.Spec.JobManagerConfig.Replicas = &replicas @@ -93,7 +93,7 @@ func TestGetJobManagerReplicasNonZero(t *testing.T) { } func TestGetTaskManagerMemory(t *testing.T) { - app := v1beta1.FlinkApplication{} + app := v1beta2.FlinkApplication{} tmResources := coreV1.ResourceRequirements{ Requests: coreV1.ResourceList{ coreV1.ResourceCPU: resource.MustParse("2"), @@ -111,7 +111,7 @@ func TestGetTaskManagerMemory(t *testing.T) { } func TestGetJobManagerMemory(t *testing.T) { - app := v1beta1.FlinkApplication{} + app := v1beta2.FlinkApplication{} tmResources := coreV1.ResourceRequirements{ Requests: coreV1.ResourceList{ coreV1.ResourceCPU: resource.MustParse("2"), @@ -129,7 +129,7 @@ func TestGetJobManagerMemory(t *testing.T) { } func TestEnsureNoFractionalHeapMemory(t *testing.T) { - app := v1beta1.FlinkApplication{} + app := v1beta2.FlinkApplication{} tmResources := coreV1.ResourceRequirements{ Requests: coreV1.ResourceList{ coreV1.ResourceCPU: resource.MustParse("2"), @@ -148,7 +148,7 @@ func TestEnsureNoFractionalHeapMemory(t *testing.T) { } func TestGetTaskManagerHeapMemory(t *testing.T) { - app := v1beta1.FlinkApplication{} + app := v1beta2.FlinkApplication{} tmResources := coreV1.ResourceRequirements{ Requests: coreV1.ResourceList{ coreV1.ResourceCPU: resource.MustParse("2"), @@ -167,7 +167,7 @@ func TestGetTaskManagerHeapMemory(t *testing.T) { } func TestGetJobManagerHeapMemory(t *testing.T) { - app := v1beta1.FlinkApplication{} + app := v1beta2.FlinkApplication{} jmResources := coreV1.ResourceRequirements{ Requests: coreV1.ResourceList{ coreV1.ResourceCPU: resource.MustParse("2"), diff --git a/pkg/controller/flink/container_utils.go b/pkg/controller/flink/container_utils.go index e760b6f9..c576eb10 100644 --- a/pkg/controller/flink/container_utils.go +++ b/pkg/controller/flink/container_utils.go @@ -6,7 +6,7 @@ import ( "github.com/benlaurie/objecthash/go/objecthash" - "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta1" + "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta2" "github.com/lyft/flinkk8soperator/pkg/controller/common" "github.com/lyft/flinkk8soperator/pkg/controller/config" "github.com/lyft/flinkk8soperator/pkg/controller/k8" @@ -46,11 +46,11 @@ func getFlinkContainerName(containerName string) string { return containerName } -func getCommonAppLabels(app *v1beta1.FlinkApplication) map[string]string { +func getCommonAppLabels(app *v1beta2.FlinkApplication) map[string]string { return k8.GetAppLabel(app.Name) } -func getCommonAnnotations(app *v1beta1.FlinkApplication) map[string]string { +func getCommonAnnotations(app *v1beta2.FlinkApplication) map[string]string { annotations := common.DuplicateMap(app.Annotations) annotations[FlinkJobProperties] = fmt.Sprintf( "jarName: %s\nparallelism: %d\nentryClass:%s\nprogramArgs:\"%s\"", @@ -58,7 +58,7 @@ func getCommonAnnotations(app *v1beta1.FlinkApplication) map[string]string { if app.Spec.RestartNonce != "" { annotations[RestartNonce] = app.Spec.RestartNonce } - if v1beta1.IsBlueGreenDeploymentMode(app.Spec.DeploymentMode) { + if v1beta2.IsBlueGreenDeploymentMode(app.Spec.DeploymentMode) { annotations[FlinkApplicationVersion] = app.Status.UpdatingVersion } return annotations @@ -77,7 +77,7 @@ func GetAWSServiceEnv() []v1.EnvVar { } } -func getFlinkEnv(app *v1beta1.FlinkApplication) ([]v1.EnvVar, error) { +func getFlinkEnv(app *v1beta2.FlinkApplication) ([]v1.EnvVar, error) { env := []v1.EnvVar{} appName := app.Name @@ -115,7 +115,7 @@ func getFlinkEnv(app *v1beta1.FlinkApplication) ([]v1.EnvVar, error) { return env, nil } -func GetFlinkContainerEnv(app *v1beta1.FlinkApplication) []v1.EnvVar { +func GetFlinkContainerEnv(app *v1beta2.FlinkApplication) []v1.EnvVar { env := []v1.EnvVar{} env = append(env, GetAWSServiceEnv()...) flinkEnv, err := getFlinkEnv(app) @@ -126,7 +126,7 @@ func GetFlinkContainerEnv(app *v1beta1.FlinkApplication) []v1.EnvVar { return env } -func ImagePullPolicy(app *v1beta1.FlinkApplication) v1.PullPolicy { +func ImagePullPolicy(app *v1beta2.FlinkApplication) v1.PullPolicy { if app.Spec.ImagePullPolicy == "" { return v1.PullIfNotPresent } @@ -162,7 +162,7 @@ func ComputeDeploymentHash(deployment appsv1.Deployment) ([]byte, error) { // Returns an 8 character hash sensitive to the application name, labels, annotations, and spec. // TODO: we may need to add collision-avoidance to this -func HashForApplication(app *v1beta1.FlinkApplication) string { +func HashForApplication(app *v1beta2.FlinkApplication) string { // we round-trip through json to normalize the deployment objects jmDeployment := jobmanagerTemplate(app) jmDeployment.OwnerReferences = make([]metav1.OwnerReference, 0) @@ -197,7 +197,7 @@ func HashForApplication(app *v1beta1.FlinkApplication) string { return fmt.Sprintf("%08x", hasher.Sum32()) } -func InjectOperatorCustomizedConfig(deployment *appsv1.Deployment, app *v1beta1.FlinkApplication, hash string, deploymentType string) { +func InjectOperatorCustomizedConfig(deployment *appsv1.Deployment, app *v1beta2.FlinkApplication, hash string, deploymentType string) { var newContainers []v1.Container for _, container := range deployment.Spec.Template.Spec.Containers { var newEnv []v1.EnvVar @@ -226,8 +226,8 @@ func InjectOperatorCustomizedConfig(deployment *appsv1.Deployment, app *v1beta1. } // Injects labels and environment variables required for blue green deploys -func GetDeploySpecificEnv(app *v1beta1.FlinkApplication) []v1.EnvVar { - if !v1beta1.IsBlueGreenDeploymentMode(app.Spec.DeploymentMode) { +func GetDeploySpecificEnv(app *v1beta2.FlinkApplication) []v1.EnvVar { + if !v1beta2.IsBlueGreenDeploymentMode(app.Spec.DeploymentMode) { return []v1.EnvVar{} } diff --git a/pkg/controller/flink/container_utils_test.go b/pkg/controller/flink/container_utils_test.go index a6f4abb7..85bb7a5f 100644 --- a/pkg/controller/flink/container_utils_test.go +++ b/pkg/controller/flink/container_utils_test.go @@ -3,14 +3,14 @@ package flink import ( "testing" - "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta1" + "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta2" "github.com/stretchr/testify/assert" v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" ) func TestHashForApplication(t *testing.T) { - app := v1beta1.FlinkApplication{} + app := v1beta2.FlinkApplication{} taskSlots := int32(8) app.Spec.TaskManagerConfig.TaskSlots = &taskSlots app.Spec.Parallelism = 4 @@ -49,7 +49,7 @@ func TestHashForApplication(t *testing.T) { } func TestHashForDifferentResourceScales(t *testing.T) { - app1 := v1beta1.FlinkApplication{} + app1 := v1beta2.FlinkApplication{} app1.Spec.TaskManagerConfig.Resources = &v1.ResourceRequirements{ Requests: v1.ResourceList{ v1.ResourceCPU: resource.MustParse("0.5"), @@ -61,7 +61,7 @@ func TestHashForDifferentResourceScales(t *testing.T) { }, } - app2 := v1beta1.FlinkApplication{} + app2 := v1beta2.FlinkApplication{} app2.Spec.TaskManagerConfig.Resources = &v1.ResourceRequirements{ Requests: v1.ResourceList{ v1.ResourceCPU: resource.MustParse("500m"), diff --git a/pkg/controller/flink/flink.go b/pkg/controller/flink/flink.go index 11c4708f..8aaae8af 100644 --- a/pkg/controller/flink/flink.go +++ b/pkg/controller/flink/flink.go @@ -14,7 +14,7 @@ import ( controllerConfig "github.com/lyft/flinkk8soperator/pkg/controller/config" "github.com/lyft/flytestdlib/logger" - "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta1" + "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta2" "github.com/lyft/flinkk8soperator/pkg/controller/flink/client" "github.com/lyft/flinkk8soperator/pkg/controller/k8" "github.com/lyft/flytestdlib/promutils" @@ -43,71 +43,71 @@ const failingIntervalThreshold = 1 * time.Minute // Interface to manage Flink Application in Kubernetes type ControllerInterface interface { // Creates a Flink cluster with necessary Job Manager, Task Managers and services for UI - CreateCluster(ctx context.Context, application *v1beta1.FlinkApplication) error + CreateCluster(ctx context.Context, application *v1beta2.FlinkApplication) error // Cancels the running/active jobs in the Cluster for the Application after savepoint is created - CancelWithSavepoint(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (string, error) + CancelWithSavepoint(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (string, error) // Force cancels the running/active job without taking a savepoint - ForceCancel(ctx context.Context, application *v1beta1.FlinkApplication, hash string) error + ForceCancel(ctx context.Context, application *v1beta2.FlinkApplication, hash string) error // Starts the Job in the Flink Cluster - StartFlinkJob(ctx context.Context, application *v1beta1.FlinkApplication, hash string, + StartFlinkJob(ctx context.Context, application *v1beta2.FlinkApplication, hash string, jarName string, parallelism int32, entryClass string, programArgs string, allowNonRestoredState bool, savepointPath string) (string, error) // Savepoint creation is asynchronous. // Polls the status of the Savepoint, using the triggerID - GetSavepointStatus(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (*client.SavepointResponse, error) + GetSavepointStatus(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (*client.SavepointResponse, error) // Check if the Flink Kubernetes Cluster is Ready. // Checks if all the pods of task and job managers are ready. - IsClusterReady(ctx context.Context, application *v1beta1.FlinkApplication) (bool, error) + IsClusterReady(ctx context.Context, application *v1beta2.FlinkApplication) (bool, error) // Checks to see if the Flink Cluster is ready to handle API requests - IsServiceReady(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (bool, error) + IsServiceReady(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (bool, error) // Returns the list of Jobs running on the Flink Cluster for the Application - GetJobsForApplication(ctx context.Context, application *v1beta1.FlinkApplication, hash string) ([]client.FlinkJob, error) + GetJobsForApplication(ctx context.Context, application *v1beta2.FlinkApplication, hash string) ([]client.FlinkJob, error) // Returns the current job for the application, if one exists in the cluster - GetJobForApplication(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (*client.FlinkJobOverview, error) + GetJobForApplication(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (*client.FlinkJobOverview, error) // Returns the pair of deployments (tm/jm) for the current version of the application - GetCurrentDeploymentsForApp(ctx context.Context, application *v1beta1.FlinkApplication) (*common.FlinkDeployment, error) + GetCurrentDeploymentsForApp(ctx context.Context, application *v1beta2.FlinkApplication) (*common.FlinkDeployment, error) // Deletes all old resources (deployments and services) for the app - DeleteOldResourcesForApp(ctx context.Context, app *v1beta1.FlinkApplication) error + DeleteOldResourcesForApp(ctx context.Context, app *v1beta2.FlinkApplication) error // Attempts to find an externalized checkpoint for the job. This can be used to recover an application that is not // able to savepoint for some reason. - FindExternalizedCheckpoint(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (string, error) + FindExternalizedCheckpoint(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (string, error) // Logs an event to the FlinkApplication resource and to the operator log - LogEvent(ctx context.Context, app *v1beta1.FlinkApplication, eventType string, reason string, message string) + LogEvent(ctx context.Context, app *v1beta2.FlinkApplication, eventType string, reason string, message string) // Compares and updates new cluster status with current cluster status // Returns true if there is a change in ClusterStatus - CompareAndUpdateClusterStatus(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (bool, error) + CompareAndUpdateClusterStatus(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (bool, error) // Compares and updates new job status with current job status // Returns true if there is a change in JobStatus - CompareAndUpdateJobStatus(ctx context.Context, app *v1beta1.FlinkApplication, hash string) (bool, error) + CompareAndUpdateJobStatus(ctx context.Context, app *v1beta2.FlinkApplication, hash string) (bool, error) // Gets the last updated cluster status - GetLatestClusterStatus(ctx context.Context, app *v1beta1.FlinkApplication) v1beta1.FlinkClusterStatus + GetLatestClusterStatus(ctx context.Context, app *v1beta2.FlinkApplication) v1beta2.FlinkClusterStatus // Gets the last updated job status - GetLatestJobStatus(ctx context.Context, app *v1beta1.FlinkApplication) v1beta1.FlinkJobStatus + GetLatestJobStatus(ctx context.Context, app *v1beta2.FlinkApplication) v1beta2.FlinkJobStatus // Gets the last updated job ID - GetLatestJobID(ctx context.Context, app *v1beta1.FlinkApplication) string + GetLatestJobID(ctx context.Context, app *v1beta2.FlinkApplication) string // Updates the jobID on the latest jobStatus - UpdateLatestJobID(ctx context.Context, app *v1beta1.FlinkApplication, jobID string) + UpdateLatestJobID(ctx context.Context, app *v1beta2.FlinkApplication, jobID string) // Update jobStatus on the latest ApplicationStatus - UpdateLatestJobStatus(ctx context.Context, app *v1beta1.FlinkApplication, jobStatus v1beta1.FlinkJobStatus) + UpdateLatestJobStatus(ctx context.Context, app *v1beta2.FlinkApplication, jobStatus v1beta2.FlinkJobStatus) } func NewController(k8sCluster k8.ClusterInterface, eventRecorder record.EventRecorder, config controllerConfig.RuntimeConfig) ControllerInterface { @@ -148,7 +148,7 @@ type Controller struct { eventRecorder record.EventRecorder } -func getURLFromApp(application *v1beta1.FlinkApplication, hash string) string { +func getURLFromApp(application *v1beta2.FlinkApplication, hash string) string { service := VersionedJobManagerServiceName(application, hash) cfg := controllerConfig.GetConfig() if cfg.UseProxy { @@ -157,7 +157,7 @@ func getURLFromApp(application *v1beta1.FlinkApplication, hash string) string { return fmt.Sprintf("http://%s.%s:%d", service, application.Namespace, port) } -func getClusterOverviewURL(app *v1beta1.FlinkApplication) string { +func getClusterOverviewURL(app *v1beta2.FlinkApplication) string { externalURL := getExternalURLFromApp(app) if externalURL != "" { return fmt.Sprintf(externalURL + client.WebUIAnchor + client.GetClusterOverviewURL) @@ -165,7 +165,7 @@ func getClusterOverviewURL(app *v1beta1.FlinkApplication) string { return "" } -func getJobOverviewURL(app *v1beta1.FlinkApplication) string { +func getJobOverviewURL(app *v1beta2.FlinkApplication) string { externalURL := getExternalURLFromApp(app) if externalURL != "" { return fmt.Sprintf(externalURL+client.WebUIAnchor+client.GetJobsOverviewURL, app.Status.ApplicationStatus[getCurrentStatusIndex(app)].JobStatus.JobID) @@ -173,7 +173,7 @@ func getJobOverviewURL(app *v1beta1.FlinkApplication) string { return "" } -func getExternalURLFromApp(application *v1beta1.FlinkApplication) string { +func getExternalURLFromApp(application *v1beta2.FlinkApplication) string { cfg := controllerConfig.GetConfig() // Local environment if cfg.UseProxy { @@ -198,7 +198,7 @@ func GetActiveFlinkJobs(jobs []client.FlinkJob) []client.FlinkJob { // Returns true iff the deployment exactly matches the flink application // This check only validates that the name of the deployment is as expected. // This is to add extra protection, as labels to any deployments -func (f *Controller) deploymentMatches(ctx context.Context, deployment *v1.Deployment, application *v1beta1.FlinkApplication, hash string) bool { +func (f *Controller) deploymentMatches(ctx context.Context, deployment *v1.Deployment, application *v1beta2.FlinkApplication, hash string) bool { if DeploymentIsTaskmanager(deployment) { return TaskManagerDeploymentMatches(deployment, application, hash) } @@ -210,7 +210,7 @@ func (f *Controller) deploymentMatches(ctx context.Context, deployment *v1.Deplo return false } -func (f *Controller) GetJobsForApplication(ctx context.Context, application *v1beta1.FlinkApplication, hash string) ([]client.FlinkJob, error) { +func (f *Controller) GetJobsForApplication(ctx context.Context, application *v1beta2.FlinkApplication, hash string) ([]client.FlinkJob, error) { jobResponse, err := f.flinkClient.GetJobs(ctx, getURLFromApp(application, hash)) if err != nil { return nil, err @@ -219,7 +219,7 @@ func (f *Controller) GetJobsForApplication(ctx context.Context, application *v1b return jobResponse.Jobs, nil } -func (f *Controller) GetJobForApplication(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (*client.FlinkJobOverview, error) { +func (f *Controller) GetJobForApplication(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (*client.FlinkJobOverview, error) { if application.Status.ApplicationStatus[getCurrentStatusIndex(application)].JobStatus.JobID == "" { return nil, nil } @@ -234,7 +234,7 @@ func (f *Controller) GetJobForApplication(ctx context.Context, application *v1be // The operator for now assumes and is intended to run single application per Flink Cluster. // Once we move to run multiple applications, this has to be removed/updated -func (f *Controller) getJobIDForApplication(application *v1beta1.FlinkApplication) (string, error) { +func (f *Controller) getJobIDForApplication(application *v1beta2.FlinkApplication) (string, error) { if application.Status.ApplicationStatus[getCurrentStatusIndex(application)].JobStatus.JobID != "" { return application.Status.ApplicationStatus[getCurrentStatusIndex(application)].JobStatus.JobID, nil } @@ -242,7 +242,7 @@ func (f *Controller) getJobIDForApplication(application *v1beta1.FlinkApplicatio return "", errors.New("active job id not available") } -func (f *Controller) CancelWithSavepoint(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (string, error) { +func (f *Controller) CancelWithSavepoint(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (string, error) { jobID, err := f.getJobIDForApplication(application) if err != nil { return "", err @@ -250,7 +250,7 @@ func (f *Controller) CancelWithSavepoint(ctx context.Context, application *v1bet return f.flinkClient.CancelJobWithSavepoint(ctx, getURLFromApp(application, hash), jobID) } -func (f *Controller) ForceCancel(ctx context.Context, application *v1beta1.FlinkApplication, hash string) error { +func (f *Controller) ForceCancel(ctx context.Context, application *v1beta2.FlinkApplication, hash string) error { jobID, err := f.getJobIDForApplication(application) if err != nil { return err @@ -258,7 +258,7 @@ func (f *Controller) ForceCancel(ctx context.Context, application *v1beta1.Flink return f.flinkClient.ForceCancelJob(ctx, getURLFromApp(application, hash), jobID) } -func (f *Controller) CreateCluster(ctx context.Context, application *v1beta1.FlinkApplication) error { +func (f *Controller) CreateCluster(ctx context.Context, application *v1beta2.FlinkApplication) error { newlyCreatedJm, err := f.jobManager.CreateIfNotExist(ctx, application) if err != nil { logger.Errorf(ctx, "Job manager cluster creation did not succeed %v", err) @@ -284,7 +284,7 @@ func (f *Controller) CreateCluster(ctx context.Context, application *v1beta1.Fli return nil } -func (f *Controller) StartFlinkJob(ctx context.Context, application *v1beta1.FlinkApplication, hash string, +func (f *Controller) StartFlinkJob(ctx context.Context, application *v1beta2.FlinkApplication, hash string, jarName string, parallelism int32, entryClass string, programArgs string, allowNonRestoredState bool, savepointPath string) (string, error) { response, err := f.flinkClient.SubmitJob( @@ -308,7 +308,7 @@ func (f *Controller) StartFlinkJob(ctx context.Context, application *v1beta1.Fli return response.JobID, nil } -func (f *Controller) GetSavepointStatus(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (*client.SavepointResponse, error) { +func (f *Controller) GetSavepointStatus(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (*client.SavepointResponse, error) { jobID, err := f.getJobIDForApplication(application) if err != nil { return nil, err @@ -316,7 +316,7 @@ func (f *Controller) GetSavepointStatus(ctx context.Context, application *v1beta return f.flinkClient.CheckSavepointStatus(ctx, getURLFromApp(application, hash), jobID, application.Status.SavepointTriggerID) } -func (f *Controller) IsClusterReady(ctx context.Context, application *v1beta1.FlinkApplication) (bool, error) { +func (f *Controller) IsClusterReady(ctx context.Context, application *v1beta2.FlinkApplication) (bool, error) { deployments, err := f.GetCurrentDeploymentsForApp(ctx, application) if deployments == nil || err != nil { return false, err @@ -334,7 +334,7 @@ func (f *Controller) IsClusterReady(ctx context.Context, application *v1beta1.Fl return true, nil } -func (f *Controller) IsServiceReady(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (bool, error) { +func (f *Controller) IsServiceReady(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (bool, error) { resp, err := f.flinkClient.GetClusterOverview(ctx, getURLFromApp(application, hash)) if err != nil { logger.Infof(ctx, "Error response indicating flink API is not ready to handle request %v", err) @@ -374,7 +374,7 @@ func listToFlinkDeployment(ds []v1.Deployment, hash string) *common.FlinkDeploym return &fd } -func getCurrentHash(app *v1beta1.FlinkApplication) string { +func getCurrentHash(app *v1beta2.FlinkApplication) string { appHash := HashForApplication(app) if appHash == app.Status.FailedDeployHash { @@ -386,7 +386,7 @@ func getCurrentHash(app *v1beta1.FlinkApplication) string { // Gets the current deployment and any other deployments for the application. The current deployment will be the one // that matches the FlinkApplication, unless the FailedDeployHash is set, in which case it will be the one with that // hash. -func (f *Controller) GetCurrentDeploymentsForApp(ctx context.Context, application *v1beta1.FlinkApplication) (*common.FlinkDeployment, error) { +func (f *Controller) GetCurrentDeploymentsForApp(ctx context.Context, application *v1beta2.FlinkApplication) (*common.FlinkDeployment, error) { labels := k8.GetAppLabel(application.Name) curHash := getCurrentHash(application) labels[FlinkAppHash] = curHash @@ -407,7 +407,7 @@ func (f *Controller) GetCurrentDeploymentsForApp(ctx context.Context, applicatio return cur, nil } -func (f *Controller) DeleteOldResourcesForApp(ctx context.Context, app *v1beta1.FlinkApplication) error { +func (f *Controller) DeleteOldResourcesForApp(ctx context.Context, app *v1beta2.FlinkApplication) error { curHash := getCurrentHash(app) appLabel := k8.GetAppLabel(app.Name) @@ -461,7 +461,7 @@ func (f *Controller) DeleteOldResourcesForApp(ctx context.Context, app *v1beta1. return nil } -func (f *Controller) FindExternalizedCheckpoint(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (string, error) { +func (f *Controller) FindExternalizedCheckpoint(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (string, error) { checkpoint, err := f.flinkClient.GetLatestCheckpoint(ctx, getURLFromApp(application, hash), f.GetLatestJobID(ctx, application)) var checkpointPath string var checkpointTime int64 @@ -495,18 +495,18 @@ func isCheckpointOldToRecover(checkpointTime int64, maxCheckpointRecoveryAgeSec return time.Since(time.Unix(checkpointTime, 0)) > (time.Duration(maxCheckpointRecoveryAgeSec) * time.Second) } -func (f *Controller) LogEvent(ctx context.Context, app *v1beta1.FlinkApplication, eventType string, reason string, message string) { +func (f *Controller) LogEvent(ctx context.Context, app *v1beta2.FlinkApplication, eventType string, reason string, message string) { f.eventRecorder.Event(app, eventType, reason, message) logger.Infof(ctx, "Logged %s event: %s: %s", eventType, reason, message) } // Gets and updates the cluster status -func (f *Controller) CompareAndUpdateClusterStatus(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (bool, error) { +func (f *Controller) CompareAndUpdateClusterStatus(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (bool, error) { // Error retrieving cluster / taskmanagers overview (after startup/readiness) --> Red // If there is an error this loop will return with Health set to Red currIndex := getCurrentStatusIndex(application) oldClusterStatus := application.Status.ApplicationStatus[currIndex].ClusterStatus - application.Status.ApplicationStatus[currIndex].ClusterStatus.Health = v1beta1.Red + application.Status.ApplicationStatus[currIndex].ClusterStatus.Health = v1beta2.Red deployment, err := f.GetCurrentDeploymentsForApp(ctx, application) if deployment == nil || err != nil { @@ -535,9 +535,9 @@ func (f *Controller) CompareAndUpdateClusterStatus(ctx context.Context, applicat // Healthy TaskManagers == Number of taskmanagers --> Green // Else --> Yellow if application.Status.ApplicationStatus[currIndex].ClusterStatus.HealthyTaskManagers == deployment.Taskmanager.Status.Replicas { - application.Status.ApplicationStatus[currIndex].ClusterStatus.Health = v1beta1.Green + application.Status.ApplicationStatus[currIndex].ClusterStatus.Health = v1beta2.Green } else { - application.Status.ApplicationStatus[currIndex].ClusterStatus.Health = v1beta1.Yellow + application.Status.ApplicationStatus[currIndex].ClusterStatus.Health = v1beta2.Yellow } return !apiequality.Semantic.DeepEqual(oldClusterStatus, application.Status.ApplicationStatus[currIndex].ClusterStatus), nil @@ -556,7 +556,7 @@ func getHealthyTaskManagerCount(response *client.TaskManagersResponse) int32 { } -func (f *Controller) CompareAndUpdateJobStatus(ctx context.Context, app *v1beta1.FlinkApplication, hash string) (bool, error) { +func (f *Controller) CompareAndUpdateJobStatus(ctx context.Context, app *v1beta2.FlinkApplication, hash string) (bool, error) { currIndex := getCurrentStatusIndex(app) if app.Status.ApplicationStatus[currIndex].JobStatus.LastFailingTime == nil { initTime := metav1.NewTime(time.Time{}) @@ -576,7 +576,7 @@ func (f *Controller) CompareAndUpdateJobStatus(ctx context.Context, app *v1beta1 // Job status app.Status.ApplicationStatus[currIndex].JobStatus.JobOverviewURL = getJobOverviewURL(app) - app.Status.ApplicationStatus[currIndex].JobStatus.State = v1beta1.JobState(jobResponse.State) + app.Status.ApplicationStatus[currIndex].JobStatus.State = v1beta2.JobState(jobResponse.State) jobStartTime := metav1.NewTime(time.Unix(jobResponse.StartTime/1000, 0)) app.Status.ApplicationStatus[currIndex].JobStatus.StartTime = &jobStartTime @@ -625,27 +625,27 @@ func (f *Controller) CompareAndUpdateJobStatus(ctx context.Context, app *v1beta1 // Time since last successful checkpoint > maxCheckpointTime --> YELLOW // Else --> Green - if app.Status.ApplicationStatus[currIndex].JobStatus.State == v1beta1.Failing || + if app.Status.ApplicationStatus[currIndex].JobStatus.State == v1beta2.Failing || time.Since(app.Status.ApplicationStatus[currIndex].JobStatus.LastFailingTime.Time) < failingIntervalThreshold || verticesInCreated > 0 { - app.Status.ApplicationStatus[currIndex].JobStatus.Health = v1beta1.Red + app.Status.ApplicationStatus[currIndex].JobStatus.Health = v1beta2.Red } else if time.Since(time.Unix(int64(lastCheckpointAgeSeconds), 0)) < maxCheckpointTime || runningTasks < totalTasks { - app.Status.ApplicationStatus[currIndex].JobStatus.Health = v1beta1.Yellow + app.Status.ApplicationStatus[currIndex].JobStatus.Health = v1beta2.Yellow } else { - app.Status.ApplicationStatus[currIndex].JobStatus.Health = v1beta1.Green + app.Status.ApplicationStatus[currIndex].JobStatus.Health = v1beta2.Green } // Update LastFailingTime - if app.Status.ApplicationStatus[currIndex].JobStatus.State == v1beta1.Failing { + if app.Status.ApplicationStatus[currIndex].JobStatus.State == v1beta2.Failing { currTime := metav1.Now() app.Status.ApplicationStatus[currIndex].JobStatus.LastFailingTime = &currTime } return !apiequality.Semantic.DeepEqual(oldJobStatus, app.Status.ApplicationStatus[currIndex].JobStatus), err } -func getCurrentStatusIndex(app *v1beta1.FlinkApplication) int32 { +func getCurrentStatusIndex(app *v1beta2.FlinkApplication) int32 { // In the Running phase, we always have only 1 job - if v1beta1.IsRunningPhase(app.Status.Phase) { + if v1beta2.IsRunningPhase(app.Status.Phase) { return 0 } @@ -655,24 +655,24 @@ func getCurrentStatusIndex(app *v1beta1.FlinkApplication) int32 { return app.Status.DesiredApplicationCount - indexOffset } -func (f *Controller) GetLatestClusterStatus(ctx context.Context, application *v1beta1.FlinkApplication) v1beta1.FlinkClusterStatus { +func (f *Controller) GetLatestClusterStatus(ctx context.Context, application *v1beta2.FlinkApplication) v1beta2.FlinkClusterStatus { return application.Status.ApplicationStatus[getCurrentStatusIndex(application)].ClusterStatus } -func (f *Controller) GetLatestJobStatus(ctx context.Context, application *v1beta1.FlinkApplication) v1beta1.FlinkJobStatus { +func (f *Controller) GetLatestJobStatus(ctx context.Context, application *v1beta2.FlinkApplication) v1beta2.FlinkJobStatus { return application.Status.ApplicationStatus[getCurrentStatusIndex(application)].JobStatus } -func (f *Controller) UpdateLatestJobStatus(ctx context.Context, app *v1beta1.FlinkApplication, jobStatus v1beta1.FlinkJobStatus) { +func (f *Controller) UpdateLatestJobStatus(ctx context.Context, app *v1beta2.FlinkApplication, jobStatus v1beta2.FlinkJobStatus) { app.Status.ApplicationStatus[getCurrentStatusIndex(app)].JobStatus = jobStatus } -func (f *Controller) GetLatestJobID(ctx context.Context, application *v1beta1.FlinkApplication) string { +func (f *Controller) GetLatestJobID(ctx context.Context, application *v1beta2.FlinkApplication) string { return application.Status.ApplicationStatus[getCurrentStatusIndex(application)].JobStatus.JobID } -func (f *Controller) UpdateLatestJobID(ctx context.Context, app *v1beta1.FlinkApplication, jobID string) { +func (f *Controller) UpdateLatestJobID(ctx context.Context, app *v1beta2.FlinkApplication, jobID string) { app.Status.ApplicationStatus[getCurrentStatusIndex(app)].JobStatus.JobID = jobID } diff --git a/pkg/controller/flink/flink_test.go b/pkg/controller/flink/flink_test.go index 3217a6e1..33555d78 100644 --- a/pkg/controller/flink/flink_test.go +++ b/pkg/controller/flink/flink_test.go @@ -11,7 +11,7 @@ import ( "time" - "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta1" + "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta2" "github.com/lyft/flinkk8soperator/pkg/controller/common" "github.com/lyft/flinkk8soperator/pkg/controller/flink/client" clientMock "github.com/lyft/flinkk8soperator/pkg/controller/flink/client/mock" @@ -57,18 +57,18 @@ func getTestFlinkController() Controller { } } -func getFlinkTestApp() v1beta1.FlinkApplication { - app := v1beta1.FlinkApplication{ +func getFlinkTestApp() v1beta2.FlinkApplication { + app := v1beta2.FlinkApplication{ TypeMeta: metaV1.TypeMeta{ - Kind: v1beta1.FlinkApplicationKind, - APIVersion: v1beta1.SchemeGroupVersion.String(), + Kind: v1beta2.FlinkApplicationKind, + APIVersion: v1beta2.SchemeGroupVersion.String(), }, } app.Spec.Parallelism = 8 app.Name = testAppName app.Namespace = testNamespace - statuses := append(app.Status.ApplicationStatus, v1beta1.FlinkApplicationVersionStatus{ - JobStatus: v1beta1.FlinkJobStatus{ + statuses := append(app.Status.ApplicationStatus, v1beta2.FlinkApplicationVersionStatus{ + JobStatus: v1beta2.FlinkJobStatus{ JobID: testJobID, }, }) @@ -156,7 +156,7 @@ func TestFlinkApplicationChanged(t *testing.T) { assert.Nil(t, err) } -func testJobPropTriggersChange(t *testing.T, changeFun func(application *v1beta1.FlinkApplication)) { +func testJobPropTriggersChange(t *testing.T, changeFun func(application *v1beta2.FlinkApplication)) { flinkControllerForTest := getTestFlinkController() flinkApp := getFlinkTestApp() @@ -191,19 +191,19 @@ func testJobPropTriggersChange(t *testing.T, changeFun func(application *v1beta1 } func TestFlinkApplicationChangedJobProps(t *testing.T) { - testJobPropTriggersChange(t, func(app *v1beta1.FlinkApplication) { + testJobPropTriggersChange(t, func(app *v1beta2.FlinkApplication) { app.Spec.Parallelism = 3 }) - testJobPropTriggersChange(t, func(app *v1beta1.FlinkApplication) { + testJobPropTriggersChange(t, func(app *v1beta2.FlinkApplication) { app.Spec.JarName = "another.jar" }) - testJobPropTriggersChange(t, func(app *v1beta1.FlinkApplication) { + testJobPropTriggersChange(t, func(app *v1beta2.FlinkApplication) { app.Spec.ProgramArgs = "--test-change" }) - testJobPropTriggersChange(t, func(app *v1beta1.FlinkApplication) { + testJobPropTriggersChange(t, func(app *v1beta2.FlinkApplication) { app.Spec.EntryClass = "com.another.Class" }) } @@ -389,10 +389,10 @@ func TestCreateCluster(t *testing.T) { mockJobManager := flinkControllerForTest.jobManager.(*mock.JobManagerController) mockTaskManager := flinkControllerForTest.taskManager.(*mock.TaskManagerController) - mockJobManager.CreateIfNotExistFunc = func(ctx context.Context, application *v1beta1.FlinkApplication) (bool, error) { + mockJobManager.CreateIfNotExistFunc = func(ctx context.Context, application *v1beta2.FlinkApplication) (bool, error) { return true, nil } - mockTaskManager.CreateIfNotExistFunc = func(ctx context.Context, application *v1beta1.FlinkApplication) (bool, error) { + mockTaskManager.CreateIfNotExistFunc = func(ctx context.Context, application *v1beta2.FlinkApplication) (bool, error) { return true, nil } err := flinkControllerForTest.CreateCluster(context.Background(), &flinkApp) @@ -405,10 +405,10 @@ func TestCreateClusterJmErr(t *testing.T) { mockJobManager := flinkControllerForTest.jobManager.(*mock.JobManagerController) mockTaskManager := flinkControllerForTest.taskManager.(*mock.TaskManagerController) - mockJobManager.CreateIfNotExistFunc = func(ctx context.Context, application *v1beta1.FlinkApplication) (bool, error) { + mockJobManager.CreateIfNotExistFunc = func(ctx context.Context, application *v1beta2.FlinkApplication) (bool, error) { return false, errors.New("jm failed") } - mockTaskManager.CreateIfNotExistFunc = func(ctx context.Context, application *v1beta1.FlinkApplication) (bool, error) { + mockTaskManager.CreateIfNotExistFunc = func(ctx context.Context, application *v1beta2.FlinkApplication) (bool, error) { assert.False(t, true) return false, nil } @@ -422,10 +422,10 @@ func TestCreateClusterTmErr(t *testing.T) { mockJobManager := flinkControllerForTest.jobManager.(*mock.JobManagerController) mockTaskManager := flinkControllerForTest.taskManager.(*mock.TaskManagerController) - mockJobManager.CreateIfNotExistFunc = func(ctx context.Context, application *v1beta1.FlinkApplication) (bool, error) { + mockJobManager.CreateIfNotExistFunc = func(ctx context.Context, application *v1beta2.FlinkApplication) (bool, error) { return true, nil } - mockTaskManager.CreateIfNotExistFunc = func(ctx context.Context, application *v1beta1.FlinkApplication) (bool, error) { + mockTaskManager.CreateIfNotExistFunc = func(ctx context.Context, application *v1beta2.FlinkApplication) (bool, error) { return false, errors.New("tm failed") } err := flinkControllerForTest.CreateCluster(context.Background(), &flinkApp) @@ -671,7 +671,7 @@ func TestClusterStatusUpdated(t *testing.T) { assert.Equal(t, int32(1), flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].ClusterStatus.NumberOfTaskSlots) assert.Equal(t, int32(0), flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].ClusterStatus.AvailableTaskSlots) assert.Equal(t, int32(1), flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].ClusterStatus.HealthyTaskManagers) - assert.Equal(t, v1beta1.Green, flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].ClusterStatus.Health) + assert.Equal(t, v1beta2.Green, flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].ClusterStatus.Health) assert.Equal(t, "app-name.lyft.xyz/#/overview", flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].ClusterStatus.ClusterOverviewURL) } @@ -682,7 +682,7 @@ func TestNoClusterStatusChange(t *testing.T) { flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].ClusterStatus.NumberOfTaskSlots = int32(1) flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].ClusterStatus.AvailableTaskSlots = int32(0) flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].ClusterStatus.HealthyTaskManagers = int32(1) - flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].ClusterStatus.Health = v1beta1.Green + flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].ClusterStatus.Health = v1beta2.Green flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].ClusterStatus.NumberOfTaskManagers = int32(1) mockK8Cluster := flinkControllerForTest.k8Cluster.(*k8mock.K8Cluster) mockK8Cluster.GetDeploymentsWithLabelFunc = func(ctx context.Context, namespace string, labelMap map[string]string) (*v1.DeploymentList, error) { @@ -775,7 +775,7 @@ func TestHealthyTaskmanagers(t *testing.T) { assert.Equal(t, int32(1), flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].ClusterStatus.NumberOfTaskSlots) assert.Equal(t, int32(0), flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].ClusterStatus.AvailableTaskSlots) assert.Equal(t, int32(0), flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].ClusterStatus.HealthyTaskManagers) - assert.Equal(t, v1beta1.Yellow, flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].ClusterStatus.Health) + assert.Equal(t, v1beta2.Yellow, flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].ClusterStatus.Health) } @@ -837,9 +837,9 @@ func TestJobStatusUpdated(t *testing.T) { _, err = flinkControllerForTest.CompareAndUpdateJobStatus(context.Background(), &flinkApp, "hash") assert.Nil(t, err) - assert.Equal(t, v1beta1.Running, flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].JobStatus.State) + assert.Equal(t, v1beta2.Running, flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].JobStatus.State) assert.Equal(t, &expectedTime, flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].JobStatus.StartTime) - assert.Equal(t, v1beta1.Yellow, flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].JobStatus.Health) + assert.Equal(t, v1beta2.Yellow, flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].JobStatus.Health) assert.Equal(t, int32(0), flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].JobStatus.FailedCheckpointCount) assert.Equal(t, int32(4), flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].JobStatus.CompletedCheckpointCount) @@ -866,13 +866,13 @@ func TestNoJobStatusChange(t *testing.T) { app1 := getFlinkTestApp() mockJmClient := flinkControllerForTest.flinkClient.(*clientMock.JobManagerClient) - app1.Status.ApplicationStatus[getCurrentStatusIndex(&app1)].JobStatus.State = v1beta1.Running + app1.Status.ApplicationStatus[getCurrentStatusIndex(&app1)].JobStatus.State = v1beta2.Running app1.Status.ApplicationStatus[getCurrentStatusIndex(&app1)].JobStatus.StartTime = &metaTime app1.Status.ApplicationStatus[getCurrentStatusIndex(&app1)].JobStatus.LastCheckpointTime = &metaTime app1.Status.ApplicationStatus[getCurrentStatusIndex(&app1)].JobStatus.CompletedCheckpointCount = int32(4) app1.Status.ApplicationStatus[getCurrentStatusIndex(&app1)].JobStatus.JobRestartCount = int32(1) app1.Status.ApplicationStatus[getCurrentStatusIndex(&app1)].JobStatus.FailedCheckpointCount = int32(0) - app1.Status.ApplicationStatus[getCurrentStatusIndex(&app1)].JobStatus.Health = v1beta1.Green + app1.Status.ApplicationStatus[getCurrentStatusIndex(&app1)].JobStatus.Health = v1beta2.Green app1.Status.ApplicationStatus[getCurrentStatusIndex(&app1)].JobStatus.RestoreTime = &metaTime app1.Status.ApplicationStatus[getCurrentStatusIndex(&app1)].JobStatus.RestorePath = "/test/externalpath" app1.Status.ApplicationStatus[getCurrentStatusIndex(&app1)].JobStatus.JobOverviewURL = "" @@ -918,7 +918,7 @@ func TestGetAndUpdateJobStatusHealth(t *testing.T) { app1 := getFlinkTestApp() mockJmClient := flinkControllerForTest.flinkClient.(*clientMock.JobManagerClient) - app1.Status.ApplicationStatus[getCurrentStatusIndex(&app1)].JobStatus.State = v1beta1.Failing + app1.Status.ApplicationStatus[getCurrentStatusIndex(&app1)].JobStatus.State = v1beta2.Failing app1.Status.ApplicationStatus[getCurrentStatusIndex(&app1)].JobStatus.LastFailingTime = &lastFailedTime mockJmClient.GetJobOverviewFunc = func(ctx context.Context, url string, jobID string) (*client.FlinkJobOverview, error) { @@ -944,7 +944,7 @@ func TestGetAndUpdateJobStatusHealth(t *testing.T) { assert.Nil(t, err) // Job is in a RUNNING state but was in a FAILING state in the last 1 minute, so we expect // JobStatus.Health to be Red - assert.Equal(t, app1.Status.ApplicationStatus[getCurrentStatusIndex(&app1)].JobStatus.Health, v1beta1.Red) + assert.Equal(t, app1.Status.ApplicationStatus[getCurrentStatusIndex(&app1)].JobStatus.Health, v1beta2.Red) } diff --git a/pkg/controller/flink/ingress.go b/pkg/controller/flink/ingress.go index e45e4614..2ab4e187 100644 --- a/pkg/controller/flink/ingress.go +++ b/pkg/controller/flink/ingress.go @@ -3,11 +3,11 @@ package flink import ( "regexp" - flinkapp "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta1" + flinkapp "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta2" "github.com/lyft/flinkk8soperator/pkg/controller/common" "github.com/lyft/flinkk8soperator/pkg/controller/config" "github.com/lyft/flinkk8soperator/pkg/controller/k8" - "k8s.io/api/extensions/v1beta1" + "k8s.io/api/extensions/v1beta2" v1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/intstr" ) @@ -22,7 +22,7 @@ func GetFlinkUIIngressURL(jobName string) string { return ReplaceJobURL(config.GetConfig().FlinkIngressURLFormat, jobName) } -func FetchJobManagerIngressCreateObj(app *flinkapp.FlinkApplication) *v1beta1.Ingress { +func FetchJobManagerIngressCreateObj(app *flinkapp.FlinkApplication) *v1beta2.Ingress { podLabels := common.DuplicateMap(app.Labels) podLabels = common.CopyMap(podLabels, k8.GetAppLabel(app.Name)) @@ -35,7 +35,7 @@ func FetchJobManagerIngressCreateObj(app *flinkapp.FlinkApplication) *v1beta1.In }, } - backend := v1beta1.IngressBackend{ + backend := v1beta2.IngressBackend{ ServiceName: app.Name, ServicePort: intstr.IntOrString{ Type: intstr.Int, @@ -43,22 +43,22 @@ func FetchJobManagerIngressCreateObj(app *flinkapp.FlinkApplication) *v1beta1.In }, } - ingressSpec := v1beta1.IngressSpec{ - Rules: []v1beta1.IngressRule{{ + ingressSpec := v1beta2.IngressSpec{ + Rules: []v1beta2.IngressRule{{ Host: GetFlinkUIIngressURL(app.Name), - IngressRuleValue: v1beta1.IngressRuleValue{ - HTTP: &v1beta1.HTTPIngressRuleValue{ - Paths: []v1beta1.HTTPIngressPath{{ + IngressRuleValue: v1beta2.IngressRuleValue{ + HTTP: &v1beta2.HTTPIngressRuleValue{ + Paths: []v1beta2.HTTPIngressPath{{ Backend: backend, }}, }, }, }}, } - return &v1beta1.Ingress{ + return &v1beta2.Ingress{ ObjectMeta: ingressMeta, TypeMeta: v1.TypeMeta{ - APIVersion: v1beta1.SchemeGroupVersion.String(), + APIVersion: v1beta2.SchemeGroupVersion.String(), Kind: k8.Ingress, }, Spec: ingressSpec, diff --git a/pkg/controller/flink/job_manager_controller.go b/pkg/controller/flink/job_manager_controller.go index b0116b8d..6cb6b453 100644 --- a/pkg/controller/flink/job_manager_controller.go +++ b/pkg/controller/flink/job_manager_controller.go @@ -4,7 +4,7 @@ import ( "context" "fmt" - "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta1" + "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta2" "github.com/lyft/flinkk8soperator/pkg/controller/common" "github.com/lyft/flinkk8soperator/pkg/controller/config" "github.com/lyft/flinkk8soperator/pkg/controller/k8" @@ -41,12 +41,12 @@ const ( FlinkInternalMetricPortName = "metrics" ) -func VersionedJobManagerServiceName(app *v1beta1.FlinkApplication, hash string) string { +func VersionedJobManagerServiceName(app *v1beta2.FlinkApplication, hash string) string { return fmt.Sprintf("%s-%s", app.Name, hash) } type JobManagerControllerInterface interface { - CreateIfNotExist(ctx context.Context, application *v1beta1.FlinkApplication) (bool, error) + CreateIfNotExist(ctx context.Context, application *v1beta2.FlinkApplication) (bool, error) } func NewJobManagerController(k8sCluster k8.ClusterInterface, config config.RuntimeConfig) JobManagerControllerInterface { @@ -85,7 +85,7 @@ type jobManagerMetrics struct { ingressCreationFailure labeled.Counter } -func (j *JobManagerController) CreateIfNotExist(ctx context.Context, application *v1beta1.FlinkApplication) (bool, error) { +func (j *JobManagerController) CreateIfNotExist(ctx context.Context, application *v1beta2.FlinkApplication) (bool, error) { hash := HashForApplication(application) newlyCreated := false @@ -168,21 +168,21 @@ var JobManagerDefaultResources = coreV1.ResourceRequirements{ }, } -func getJobManagerPodName(application *v1beta1.FlinkApplication, hash string) string { +func getJobManagerPodName(application *v1beta2.FlinkApplication, hash string) string { applicationName := application.Name - if v1beta1.IsBlueGreenDeploymentMode(application.Spec.DeploymentMode) { + if v1beta2.IsBlueGreenDeploymentMode(application.Spec.DeploymentMode) { applicationVersion := application.Status.UpdatingVersion return fmt.Sprintf(JobManagerVersionPodNameFormat, applicationName, hash, applicationVersion) } return fmt.Sprintf(JobManagerPodNameFormat, applicationName, hash) } -func getJobManagerName(application *v1beta1.FlinkApplication, hash string) string { +func getJobManagerName(application *v1beta2.FlinkApplication, hash string) string { applicationName := application.Name return fmt.Sprintf(JobManagerNameFormat, applicationName, hash) } -func FetchJobManagerServiceCreateObj(app *v1beta1.FlinkApplication, hash string) *coreV1.Service { +func FetchJobManagerServiceCreateObj(app *v1beta2.FlinkApplication, hash string) *coreV1.Service { jmServiceName := app.Name serviceLabels := getCommonAppLabels(app) serviceLabels[FlinkAppHash] = hash @@ -208,7 +208,7 @@ func FetchJobManagerServiceCreateObj(app *v1beta1.FlinkApplication, hash string) } } -func getJobManagerServicePorts(app *v1beta1.FlinkApplication) []coreV1.ServicePort { +func getJobManagerServicePorts(app *v1beta2.FlinkApplication) []coreV1.ServicePort { ports := getJobManagerPorts(app) servicePorts := make([]coreV1.ServicePort, 0, len(ports)) for _, p := range ports { @@ -220,7 +220,7 @@ func getJobManagerServicePorts(app *v1beta1.FlinkApplication) []coreV1.ServicePo return servicePorts } -func getJobManagerPorts(app *v1beta1.FlinkApplication) []coreV1.ContainerPort { +func getJobManagerPorts(app *v1beta2.FlinkApplication) []coreV1.ContainerPort { return []coreV1.ContainerPort{ { Name: FlinkRPCPortName, @@ -245,7 +245,7 @@ func getJobManagerPorts(app *v1beta1.FlinkApplication) []coreV1.ContainerPort { } } -func FetchJobManagerContainerObj(application *v1beta1.FlinkApplication) *coreV1.Container { +func FetchJobManagerContainerObj(application *v1beta2.FlinkApplication) *coreV1.Container { jmConfig := application.Spec.JobManagerConfig resources := jmConfig.Resources if resources == nil { @@ -294,7 +294,7 @@ func DeploymentIsJobmanager(deployment *v1.Deployment) bool { // made very carefully. Any new version v' that causes DeploymentsEqual(v(x), v'(x)) to be false // will cause redeployments for all applications, and should be considered a breaking change that // requires a new version of the CRD. -func jobmanagerTemplate(app *v1beta1.FlinkApplication) *v1.Deployment { +func jobmanagerTemplate(app *v1beta2.FlinkApplication) *v1.Deployment { labels := getCommonAppLabels(app) labels = common.CopyMap(labels, app.Labels) labels[FlinkDeploymentType] = FlinkDeploymentTypeJobmanager @@ -355,7 +355,7 @@ func jobmanagerTemplate(app *v1beta1.FlinkApplication) *v1.Deployment { return deployment } -func FetchJobMangerDeploymentCreateObj(app *v1beta1.FlinkApplication, hash string) *v1.Deployment { +func FetchJobMangerDeploymentCreateObj(app *v1beta2.FlinkApplication, hash string) *v1.Deployment { template := jobmanagerTemplate(app.DeepCopy()) template.Name = getJobManagerName(app, hash) @@ -369,7 +369,7 @@ func FetchJobMangerDeploymentCreateObj(app *v1beta1.FlinkApplication, hash strin return template } -func JobManagerDeploymentMatches(deployment *v1.Deployment, application *v1beta1.FlinkApplication, hash string) bool { +func JobManagerDeploymentMatches(deployment *v1.Deployment, application *v1beta2.FlinkApplication, hash string) bool { deploymentName := getJobManagerName(application, hash) return deployment.Name == deploymentName } diff --git a/pkg/controller/flink/job_manager_controller_test.go b/pkg/controller/flink/job_manager_controller_test.go index fc99fa81..64b9be0e 100644 --- a/pkg/controller/flink/job_manager_controller_test.go +++ b/pkg/controller/flink/job_manager_controller_test.go @@ -3,7 +3,7 @@ package flink import ( "testing" - v1beta12 "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta1" + v1beta22 "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta2" "github.com/lyft/flinkk8soperator/pkg/controller/config" @@ -18,7 +18,7 @@ import ( "github.com/stretchr/testify/assert" v1 "k8s.io/api/apps/v1" coreV1 "k8s.io/api/core/v1" - "k8s.io/api/extensions/v1beta1" + "k8s.io/api/extensions/v1beta2" k8sErrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/runtime/schema" @@ -46,7 +46,7 @@ func TestGetJobManagerPodName(t *testing.T) { func TestGetJobManagerPodNameWithVersion(t *testing.T) { app := getFlinkTestApp() - app.Spec.DeploymentMode = v1beta12.DeploymentModeBlueGreen + app.Spec.DeploymentMode = v1beta22.DeploymentModeBlueGreen app.Status.UpdatingVersion = testVersion assert.Equal(t, "app-name-"+testAppHash+"-jm-"+testVersion+"-pod", getJobManagerPodName(&app, testAppHash)) } @@ -86,7 +86,7 @@ func TestJobManagerCreateSuccess(t *testing.T) { assert.Equal(t, expectedLabels, deployment.Labels) assert.Equal(t, int32(1), *deployment.Spec.Replicas) assert.Equal(t, "app-name", deployment.OwnerReferences[0].Name) - assert.Equal(t, "flink.k8s.io/v1beta1", deployment.OwnerReferences[0].APIVersion) + assert.Equal(t, "flink.k8s.io/v1beta2", deployment.OwnerReferences[0].APIVersion) assert.Equal(t, "FlinkApplication", deployment.OwnerReferences[0].Kind) assert.Equal(t, "blob.server.port: 6125\njobmanager.heap.size: 1572864k\n"+ @@ -112,7 +112,7 @@ func TestJobManagerCreateSuccess(t *testing.T) { labels := map[string]string{ "flink-app": "app-name", } - ingress := object.(*v1beta1.Ingress) + ingress := object.(*v1beta2.Ingress) assert.Equal(t, app.Name, ingress.Name) assert.Equal(t, app.Namespace, ingress.Namespace) assert.Equal(t, labels, ingress.Labels) @@ -163,7 +163,7 @@ func TestJobManagerHACreateSuccess(t *testing.T) { assert.Equal(t, expectedLabels, deployment.Labels) assert.Equal(t, int32(1), *deployment.Spec.Replicas) assert.Equal(t, "app-name", deployment.OwnerReferences[0].Name) - assert.Equal(t, "flink.k8s.io/v1beta1", deployment.OwnerReferences[0].APIVersion) + assert.Equal(t, "flink.k8s.io/v1beta2", deployment.OwnerReferences[0].APIVersion) assert.Equal(t, "FlinkApplication", deployment.OwnerReferences[0].Kind) assert.Equal(t, "blob.server.port: 6125\nhigh-availability: zookeeper\njobmanager.heap.size: 1572864k\n"+ @@ -190,7 +190,7 @@ func TestJobManagerHACreateSuccess(t *testing.T) { labels := map[string]string{ "flink-app": "app-name", } - ingress := object.(*v1beta1.Ingress) + ingress := object.(*v1beta2.Ingress) assert.Equal(t, app.Name, ingress.Name) assert.Equal(t, app.Namespace, ingress.Namespace) assert.Equal(t, labels, ingress.Labels) @@ -305,7 +305,7 @@ func TestJobManagerCreateSuccessWithVersion(t *testing.T) { app.Spec.JarName = testJarName app.Spec.EntryClass = testEntryClass app.Spec.ProgramArgs = testProgramArgs - app.Spec.DeploymentMode = v1beta12.DeploymentModeBlueGreen + app.Spec.DeploymentMode = v1beta22.DeploymentModeBlueGreen app.Status.UpdatingVersion = testVersion annotations := map[string]string{ "key": "annotation", @@ -335,7 +335,7 @@ func TestJobManagerCreateSuccessWithVersion(t *testing.T) { assert.Equal(t, expectedLabels, deployment.Labels) assert.Equal(t, int32(1), *deployment.Spec.Replicas) assert.Equal(t, "app-name", deployment.OwnerReferences[0].Name) - assert.Equal(t, "flink.k8s.io/v1beta1", deployment.OwnerReferences[0].APIVersion) + assert.Equal(t, "flink.k8s.io/v1beta2", deployment.OwnerReferences[0].APIVersion) assert.Equal(t, "FlinkApplication", deployment.OwnerReferences[0].Kind) assert.Equal(t, "blob.server.port: 6125\njobmanager.heap.size: 1572864k\n"+ @@ -363,7 +363,7 @@ func TestJobManagerCreateSuccessWithVersion(t *testing.T) { labels := map[string]string{ "flink-app": "app-name", } - ingress := object.(*v1beta1.Ingress) + ingress := object.(*v1beta2.Ingress) assert.Equal(t, app.Name, ingress.Name) assert.Equal(t, app.Namespace, ingress.Namespace) assert.Equal(t, labels, ingress.Labels) diff --git a/pkg/controller/flink/mock/mock_flink.go b/pkg/controller/flink/mock/mock_flink.go index 1e5ade66..e1a070fa 100644 --- a/pkg/controller/flink/mock/mock_flink.go +++ b/pkg/controller/flink/mock/mock_flink.go @@ -3,32 +3,32 @@ package mock import ( "context" - "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta1" + "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta2" "github.com/lyft/flinkk8soperator/pkg/controller/common" "github.com/lyft/flinkk8soperator/pkg/controller/flink/client" corev1 "k8s.io/api/core/v1" ) -type CreateClusterFunc func(ctx context.Context, application *v1beta1.FlinkApplication) error -type DeleteOldResourcesForApp func(ctx context.Context, application *v1beta1.FlinkApplication) error -type CancelWithSavepointFunc func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (string, error) -type ForceCancelFunc func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) error -type StartFlinkJobFunc func(ctx context.Context, application *v1beta1.FlinkApplication, hash string, +type CreateClusterFunc func(ctx context.Context, application *v1beta2.FlinkApplication) error +type DeleteOldResourcesForApp func(ctx context.Context, application *v1beta2.FlinkApplication) error +type CancelWithSavepointFunc func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (string, error) +type ForceCancelFunc func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) error +type StartFlinkJobFunc func(ctx context.Context, application *v1beta2.FlinkApplication, hash string, jarName string, parallelism int32, entryClass string, programArgs string, allowNonRestoredState bool, savepointPath string) (string, error) -type GetSavepointStatusFunc func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (*client.SavepointResponse, error) -type IsClusterReadyFunc func(ctx context.Context, application *v1beta1.FlinkApplication) (bool, error) -type IsServiceReadyFunc func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (bool, error) -type GetJobsForApplicationFunc func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) ([]client.FlinkJob, error) -type GetJobForApplicationFunc func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (*client.FlinkJobOverview, error) -type GetCurrentDeploymentsForAppFunc func(ctx context.Context, application *v1beta1.FlinkApplication) (*common.FlinkDeployment, error) -type FindExternalizedCheckpointFunc func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (string, error) -type CompareAndUpdateClusterStatusFunc func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (bool, error) -type CompareAndUpdateJobStatusFunc func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (bool, error) -type GetLatestClusterStatusFunc func(ctx context.Context, app *v1beta1.FlinkApplication) v1beta1.FlinkClusterStatus -type GetLatestJobStatusFunc func(ctx context.Context, app *v1beta1.FlinkApplication) v1beta1.FlinkJobStatus -type GetLatestJobIDFunc func(ctx context.Context, app *v1beta1.FlinkApplication) string -type UpdateLatestJobIDFunc func(ctx context.Context, app *v1beta1.FlinkApplication, jobID string) -type UpdateLatestJobStatusFunc func(ctx context.Context, app *v1beta1.FlinkApplication, jobStatus v1beta1.FlinkJobStatus) +type GetSavepointStatusFunc func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (*client.SavepointResponse, error) +type IsClusterReadyFunc func(ctx context.Context, application *v1beta2.FlinkApplication) (bool, error) +type IsServiceReadyFunc func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (bool, error) +type GetJobsForApplicationFunc func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) ([]client.FlinkJob, error) +type GetJobForApplicationFunc func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (*client.FlinkJobOverview, error) +type GetCurrentDeploymentsForAppFunc func(ctx context.Context, application *v1beta2.FlinkApplication) (*common.FlinkDeployment, error) +type FindExternalizedCheckpointFunc func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (string, error) +type CompareAndUpdateClusterStatusFunc func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (bool, error) +type CompareAndUpdateJobStatusFunc func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (bool, error) +type GetLatestClusterStatusFunc func(ctx context.Context, app *v1beta2.FlinkApplication) v1beta2.FlinkClusterStatus +type GetLatestJobStatusFunc func(ctx context.Context, app *v1beta2.FlinkApplication) v1beta2.FlinkJobStatus +type GetLatestJobIDFunc func(ctx context.Context, app *v1beta2.FlinkApplication) string +type UpdateLatestJobIDFunc func(ctx context.Context, app *v1beta2.FlinkApplication, jobID string) +type UpdateLatestJobStatusFunc func(ctx context.Context, app *v1beta2.FlinkApplication, jobStatus v1beta2.FlinkJobStatus) type FlinkController struct { CreateClusterFunc CreateClusterFunc @@ -53,42 +53,42 @@ type FlinkController struct { UpdateLatestJobStatusFunc UpdateLatestJobStatusFunc } -func (m *FlinkController) GetCurrentDeploymentsForApp(ctx context.Context, application *v1beta1.FlinkApplication) (*common.FlinkDeployment, error) { +func (m *FlinkController) GetCurrentDeploymentsForApp(ctx context.Context, application *v1beta2.FlinkApplication) (*common.FlinkDeployment, error) { if m.GetCurrentDeploymentsForAppFunc != nil { return m.GetCurrentDeploymentsForAppFunc(ctx, application) } return nil, nil } -func (m *FlinkController) DeleteOldResourcesForApp(ctx context.Context, application *v1beta1.FlinkApplication) error { +func (m *FlinkController) DeleteOldResourcesForApp(ctx context.Context, application *v1beta2.FlinkApplication) error { if m.DeleteOldResourcesForAppFunc != nil { return m.DeleteOldResourcesForAppFunc(ctx, application) } return nil } -func (m *FlinkController) CreateCluster(ctx context.Context, application *v1beta1.FlinkApplication) error { +func (m *FlinkController) CreateCluster(ctx context.Context, application *v1beta2.FlinkApplication) error { if m.CreateClusterFunc != nil { return m.CreateClusterFunc(ctx, application) } return nil } -func (m *FlinkController) CancelWithSavepoint(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (string, error) { +func (m *FlinkController) CancelWithSavepoint(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (string, error) { if m.CancelWithSavepointFunc != nil { return m.CancelWithSavepointFunc(ctx, application, hash) } return "", nil } -func (m *FlinkController) ForceCancel(ctx context.Context, application *v1beta1.FlinkApplication, hash string) error { +func (m *FlinkController) ForceCancel(ctx context.Context, application *v1beta2.FlinkApplication, hash string) error { if m.ForceCancelFunc != nil { return m.ForceCancelFunc(ctx, application, hash) } return nil } -func (m *FlinkController) StartFlinkJob(ctx context.Context, application *v1beta1.FlinkApplication, hash string, +func (m *FlinkController) StartFlinkJob(ctx context.Context, application *v1beta2.FlinkApplication, hash string, jarName string, parallelism int32, entryClass string, programArgs string, allowNonRestoredState bool, savepointPath string) (string, error) { if m.StartFlinkJobFunc != nil { return m.StartFlinkJobFunc(ctx, application, hash, jarName, parallelism, entryClass, programArgs, allowNonRestoredState, savepointPath) @@ -96,49 +96,49 @@ func (m *FlinkController) StartFlinkJob(ctx context.Context, application *v1beta return "", nil } -func (m *FlinkController) GetSavepointStatus(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (*client.SavepointResponse, error) { +func (m *FlinkController) GetSavepointStatus(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (*client.SavepointResponse, error) { if m.GetSavepointStatusFunc != nil { return m.GetSavepointStatusFunc(ctx, application, hash) } return nil, nil } -func (m *FlinkController) IsClusterReady(ctx context.Context, application *v1beta1.FlinkApplication) (bool, error) { +func (m *FlinkController) IsClusterReady(ctx context.Context, application *v1beta2.FlinkApplication) (bool, error) { if m.IsClusterReadyFunc != nil { return m.IsClusterReadyFunc(ctx, application) } return false, nil } -func (m *FlinkController) IsServiceReady(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (bool, error) { +func (m *FlinkController) IsServiceReady(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (bool, error) { if m.IsServiceReadyFunc != nil { return m.IsServiceReadyFunc(ctx, application, hash) } return false, nil } -func (m *FlinkController) GetJobsForApplication(ctx context.Context, application *v1beta1.FlinkApplication, hash string) ([]client.FlinkJob, error) { +func (m *FlinkController) GetJobsForApplication(ctx context.Context, application *v1beta2.FlinkApplication, hash string) ([]client.FlinkJob, error) { if m.GetJobsForApplicationFunc != nil { return m.GetJobsForApplicationFunc(ctx, application, hash) } return nil, nil } -func (m *FlinkController) GetJobForApplication(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (*client.FlinkJobOverview, error) { +func (m *FlinkController) GetJobForApplication(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (*client.FlinkJobOverview, error) { if m.GetJobForApplicationFunc != nil { return m.GetJobForApplicationFunc(ctx, application, hash) } return nil, nil } -func (m *FlinkController) FindExternalizedCheckpoint(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (string, error) { +func (m *FlinkController) FindExternalizedCheckpoint(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (string, error) { if m.FindExternalizedCheckpointFunc != nil { return m.FindExternalizedCheckpointFunc(ctx, application, hash) } return "", nil } -func (m *FlinkController) LogEvent(ctx context.Context, app *v1beta1.FlinkApplication, eventType string, reason string, message string) { +func (m *FlinkController) LogEvent(ctx context.Context, app *v1beta2.FlinkApplication, eventType string, reason string, message string) { m.Events = append(m.Events, corev1.Event{ InvolvedObject: corev1.ObjectReference{ Kind: app.Kind, @@ -151,7 +151,7 @@ func (m *FlinkController) LogEvent(ctx context.Context, app *v1beta1.FlinkApplic }) } -func (m *FlinkController) CompareAndUpdateClusterStatus(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (bool, error) { +func (m *FlinkController) CompareAndUpdateClusterStatus(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (bool, error) { if m.CompareAndUpdateClusterStatusFunc != nil { return m.CompareAndUpdateClusterStatusFunc(ctx, application, hash) } @@ -159,7 +159,7 @@ func (m *FlinkController) CompareAndUpdateClusterStatus(ctx context.Context, app return false, nil } -func (m *FlinkController) CompareAndUpdateJobStatus(ctx context.Context, app *v1beta1.FlinkApplication, hash string) (bool, error) { +func (m *FlinkController) CompareAndUpdateJobStatus(ctx context.Context, app *v1beta2.FlinkApplication, hash string) (bool, error) { if m.CompareAndUpdateJobStatusFunc != nil { return m.CompareAndUpdateJobStatusFunc(ctx, app, hash) } @@ -167,7 +167,7 @@ func (m *FlinkController) CompareAndUpdateJobStatus(ctx context.Context, app *v1 return false, nil } -func (m *FlinkController) GetLatestClusterStatus(ctx context.Context, application *v1beta1.FlinkApplication) v1beta1.FlinkClusterStatus { +func (m *FlinkController) GetLatestClusterStatus(ctx context.Context, application *v1beta2.FlinkApplication) v1beta2.FlinkClusterStatus { if m.GetLatestClusterStatusFunc != nil { return m.GetLatestClusterStatusFunc(ctx, application) } @@ -175,7 +175,7 @@ func (m *FlinkController) GetLatestClusterStatus(ctx context.Context, applicatio return application.Status.ApplicationStatus[getCurrentStatusIndex(application)].ClusterStatus } -func (m *FlinkController) GetLatestJobStatus(ctx context.Context, application *v1beta1.FlinkApplication) v1beta1.FlinkJobStatus { +func (m *FlinkController) GetLatestJobStatus(ctx context.Context, application *v1beta2.FlinkApplication) v1beta2.FlinkJobStatus { if m.GetLatestClusterStatusFunc != nil { return m.GetLatestJobStatusFunc(ctx, application) } @@ -183,7 +183,7 @@ func (m *FlinkController) GetLatestJobStatus(ctx context.Context, application *v return application.Status.ApplicationStatus[getCurrentStatusIndex(application)].JobStatus } -func (m *FlinkController) GetLatestJobID(ctx context.Context, application *v1beta1.FlinkApplication) string { +func (m *FlinkController) GetLatestJobID(ctx context.Context, application *v1beta2.FlinkApplication) string { if m.GetLatestClusterStatusFunc != nil { return m.GetLatestJobIDFunc(ctx, application) } @@ -191,7 +191,7 @@ func (m *FlinkController) GetLatestJobID(ctx context.Context, application *v1bet return application.Status.ApplicationStatus[getCurrentStatusIndex(application)].JobStatus.JobID } -func (m *FlinkController) UpdateLatestJobID(ctx context.Context, application *v1beta1.FlinkApplication, jobID string) { +func (m *FlinkController) UpdateLatestJobID(ctx context.Context, application *v1beta2.FlinkApplication, jobID string) { if m.UpdateLatestJobIDFunc != nil { m.UpdateLatestJobIDFunc(ctx, application, jobID) } @@ -199,7 +199,7 @@ func (m *FlinkController) UpdateLatestJobID(ctx context.Context, application *v1 application.Status.ApplicationStatus[getCurrentStatusIndex(application)].JobStatus.JobID = jobID } -func (m *FlinkController) UpdateLatestJobStatus(ctx context.Context, application *v1beta1.FlinkApplication, jobStatus v1beta1.FlinkJobStatus) { +func (m *FlinkController) UpdateLatestJobStatus(ctx context.Context, application *v1beta2.FlinkApplication, jobStatus v1beta2.FlinkJobStatus) { if m.UpdateLatestJobStatusFunc != nil { m.UpdateLatestJobStatusFunc(ctx, application, jobStatus) } @@ -207,9 +207,9 @@ func (m *FlinkController) UpdateLatestJobStatus(ctx context.Context, application application.Status.ApplicationStatus[getCurrentStatusIndex(application)].JobStatus = jobStatus } -func getCurrentStatusIndex(app *v1beta1.FlinkApplication) int32 { +func getCurrentStatusIndex(app *v1beta2.FlinkApplication) int32 { desiredCount := app.Status.DesiredApplicationCount - if v1beta1.IsRunningPhase(app.Status.Phase) { + if v1beta2.IsRunningPhase(app.Status.Phase) { return 0 } diff --git a/pkg/controller/flink/mock/mock_job_manager_controller.go b/pkg/controller/flink/mock/mock_job_manager_controller.go index 7814f37d..c9d511ad 100644 --- a/pkg/controller/flink/mock/mock_job_manager_controller.go +++ b/pkg/controller/flink/mock/mock_job_manager_controller.go @@ -3,7 +3,7 @@ package mock import ( "context" - "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta1" + "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta2" ) type JobManagerController struct { @@ -12,7 +12,7 @@ type JobManagerController struct { func (m *JobManagerController) CreateIfNotExist( ctx context.Context, - application *v1beta1.FlinkApplication) (bool, error) { + application *v1beta2.FlinkApplication) (bool, error) { if m.CreateIfNotExistFunc != nil { return m.CreateIfNotExistFunc(ctx, application) } diff --git a/pkg/controller/flink/task_manager_controller.go b/pkg/controller/flink/task_manager_controller.go index c7b9de84..c672ed17 100644 --- a/pkg/controller/flink/task_manager_controller.go +++ b/pkg/controller/flink/task_manager_controller.go @@ -5,7 +5,7 @@ import ( "fmt" "math" - "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta1" + "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta2" "github.com/lyft/flinkk8soperator/pkg/controller/common" "github.com/lyft/flinkk8soperator/pkg/controller/config" "github.com/lyft/flinkk8soperator/pkg/controller/k8" @@ -29,7 +29,7 @@ const ( ) type TaskManagerControllerInterface interface { - CreateIfNotExist(ctx context.Context, application *v1beta1.FlinkApplication) (bool, error) + CreateIfNotExist(ctx context.Context, application *v1beta2.FlinkApplication) (bool, error) } func NewTaskManagerController(k8sCluster k8.ClusterInterface, config config.RuntimeConfig) TaskManagerControllerInterface { @@ -71,7 +71,7 @@ var TaskManagerDefaultResources = coreV1.ResourceRequirements{ }, } -func (t *TaskManagerController) CreateIfNotExist(ctx context.Context, application *v1beta1.FlinkApplication) (bool, error) { +func (t *TaskManagerController) CreateIfNotExist(ctx context.Context, application *v1beta2.FlinkApplication) (bool, error) { hash := HashForApplication(application) taskManagerDeployment := FetchTaskMangerDeploymentCreateObj(application, hash) @@ -91,7 +91,7 @@ func (t *TaskManagerController) CreateIfNotExist(ctx context.Context, applicatio return false, nil } -func GetTaskManagerPorts(app *v1beta1.FlinkApplication) []coreV1.ContainerPort { +func GetTaskManagerPorts(app *v1beta2.FlinkApplication) []coreV1.ContainerPort { return []coreV1.ContainerPort{ { Name: FlinkRPCPortName, @@ -112,7 +112,7 @@ func GetTaskManagerPorts(app *v1beta1.FlinkApplication) []coreV1.ContainerPort { } } -func FetchTaskManagerContainerObj(application *v1beta1.FlinkApplication) *coreV1.Container { +func FetchTaskManagerContainerObj(application *v1beta2.FlinkApplication) *coreV1.Container { tmConfig := application.Spec.TaskManagerConfig ports := GetTaskManagerPorts(application) resources := tmConfig.Resources @@ -141,21 +141,21 @@ func FetchTaskManagerContainerObj(application *v1beta1.FlinkApplication) *coreV1 } } -func getTaskManagerPodName(application *v1beta1.FlinkApplication, hash string) string { +func getTaskManagerPodName(application *v1beta2.FlinkApplication, hash string) string { applicationName := application.Name - if v1beta1.IsBlueGreenDeploymentMode(application.Spec.DeploymentMode) { + if v1beta2.IsBlueGreenDeploymentMode(application.Spec.DeploymentMode) { applicationVersion := application.Status.UpdatingVersion return fmt.Sprintf(TaskManagerVersionPodNameFormat, applicationName, hash, applicationVersion) } return fmt.Sprintf(TaskManagerPodNameFormat, applicationName, hash) } -func getTaskManagerName(application *v1beta1.FlinkApplication, hash string) string { +func getTaskManagerName(application *v1beta2.FlinkApplication, hash string) string { applicationName := application.Name return fmt.Sprintf(TaskManagerNameFormat, applicationName, hash) } -func computeTaskManagerReplicas(application *v1beta1.FlinkApplication) int32 { +func computeTaskManagerReplicas(application *v1beta2.FlinkApplication) int32 { slots := getTaskmanagerSlots(application) parallelism := application.Spec.Parallelism return int32(math.Ceil(float64(parallelism) / float64(slots))) @@ -169,7 +169,7 @@ func DeploymentIsTaskmanager(deployment *v1.Deployment) bool { // made very carefully. Any new version v' that causes DeploymentsEqual(v(x), v'(x)) to be false // will cause redeployments for all applications, and should be considered a breaking change that // requires a new version of the CRD. -func taskmanagerTemplate(app *v1beta1.FlinkApplication) *v1.Deployment { +func taskmanagerTemplate(app *v1beta2.FlinkApplication) *v1.Deployment { labels := getCommonAppLabels(app) labels = common.CopyMap(labels, app.Labels) labels[FlinkDeploymentType] = FlinkDeploymentTypeTaskmanager @@ -231,7 +231,7 @@ func taskmanagerTemplate(app *v1beta1.FlinkApplication) *v1.Deployment { return deployment } -func FetchTaskMangerDeploymentCreateObj(app *v1beta1.FlinkApplication, hash string) *v1.Deployment { +func FetchTaskMangerDeploymentCreateObj(app *v1beta2.FlinkApplication, hash string) *v1.Deployment { template := taskmanagerTemplate(app.DeepCopy()) template.Name = getTaskManagerName(app, hash) @@ -245,7 +245,7 @@ func FetchTaskMangerDeploymentCreateObj(app *v1beta1.FlinkApplication, hash stri return template } -func TaskManagerDeploymentMatches(deployment *v1.Deployment, application *v1beta1.FlinkApplication, hash string) bool { +func TaskManagerDeploymentMatches(deployment *v1.Deployment, application *v1beta2.FlinkApplication, hash string) bool { deploymentName := getTaskManagerName(application, hash) return deployment.Name == deploymentName } diff --git a/pkg/controller/flink/task_manager_controller_test.go b/pkg/controller/flink/task_manager_controller_test.go index 19d8b3e1..a11f50ba 100644 --- a/pkg/controller/flink/task_manager_controller_test.go +++ b/pkg/controller/flink/task_manager_controller_test.go @@ -8,7 +8,7 @@ import ( "context" - "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta1" + "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta2" "github.com/lyft/flinkk8soperator/pkg/controller/common" "github.com/lyft/flytestdlib/promutils/labeled" "github.com/pkg/errors" @@ -31,7 +31,7 @@ func getTMControllerForTest() TaskManagerController { } func TestComputeTaskManagerReplicas(t *testing.T) { - app := v1beta1.FlinkApplication{} + app := v1beta2.FlinkApplication{} taskSlots := int32(4) app.Spec.TaskManagerConfig.TaskSlots = &taskSlots app.Spec.Parallelism = 9 @@ -52,7 +52,7 @@ func TestGetTaskManagerPodName(t *testing.T) { func TestGetTaskManagerPodNameWithVersion(t *testing.T) { app := getFlinkTestApp() - app.Spec.DeploymentMode = v1beta1.DeploymentModeBlueGreen + app.Spec.DeploymentMode = v1beta2.DeploymentModeBlueGreen app.Status.UpdatingVersion = testVersion assert.Equal(t, "app-name-"+testAppHash+"-tm-"+testVersion+"-pod", getTaskManagerPodName(&app, testAppHash)) } @@ -229,7 +229,7 @@ func TestTaskManagerCreateSuccessWithVersion(t *testing.T) { app.Spec.JarName = testJarName app.Spec.EntryClass = testEntryClass app.Spec.ProgramArgs = testProgramArgs - app.Spec.DeploymentMode = v1beta1.DeploymentModeBlueGreen + app.Spec.DeploymentMode = v1beta2.DeploymentModeBlueGreen app.Status.UpdatingVersion = testVersion annotations := map[string]string{ "key": "annotation", diff --git a/pkg/controller/flinkapplication/controller.go b/pkg/controller/flinkapplication/controller.go index 54513b78..79262ba4 100644 --- a/pkg/controller/flinkapplication/controller.go +++ b/pkg/controller/flinkapplication/controller.go @@ -6,7 +6,7 @@ import ( "github.com/lyft/flytestdlib/promutils" "github.com/lyft/flytestdlib/promutils/labeled" - "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta1" + "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta2" "github.com/lyft/flinkk8soperator/pkg/controller/config" "sigs.k8s.io/controller-runtime/pkg/controller" @@ -87,11 +87,11 @@ func (r *ReconcileFlinkApplication) Reconcile(request reconcile.Request) (reconc ctx = contextutils.WithNamespace(ctx, request.Namespace) ctx = contextutils.WithAppName(ctx, request.Name) typeMeta := metaV1.TypeMeta{ - Kind: v1beta1.FlinkApplicationKind, - APIVersion: v1beta1.SchemeGroupVersion.String(), + Kind: v1beta2.FlinkApplicationKind, + APIVersion: v1beta2.SchemeGroupVersion.String(), } // Fetch the FlinkApplication instance - instance := &v1beta1.FlinkApplication{ + instance := &v1beta2.FlinkApplication{ TypeMeta: typeMeta, } @@ -140,7 +140,7 @@ func Add(ctx context.Context, mgr manager.Manager, cfg config.RuntimeConfig) err return err } - if err = c.Watch(&source.Kind{Type: &v1beta1.FlinkApplication{}}, &handler.EnqueueRequestForObject{}); err != nil { + if err = c.Watch(&source.Kind{Type: &v1beta2.FlinkApplication{}}, &handler.EnqueueRequestForObject{}); err != nil { return err } @@ -157,8 +157,8 @@ func Add(ctx context.Context, mgr manager.Manager, cfg config.RuntimeConfig) err func isOwnedByFlinkApplication(ownerReferences []metaV1.OwnerReference) bool { for _, ownerReference := range ownerReferences { - if ownerReference.APIVersion == v1beta1.SchemeGroupVersion.String() && - ownerReference.Kind == v1beta1.FlinkApplicationKind { + if ownerReference.APIVersion == v1beta2.SchemeGroupVersion.String() && + ownerReference.Kind == v1beta2.FlinkApplicationKind { return true } } diff --git a/pkg/controller/flinkapplication/flink_state_machine.go b/pkg/controller/flinkapplication/flink_state_machine.go index 5ee94847..600c15b3 100644 --- a/pkg/controller/flinkapplication/flink_state_machine.go +++ b/pkg/controller/flinkapplication/flink_state_machine.go @@ -11,7 +11,7 @@ import ( "fmt" - "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta1" + "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta2" "github.com/lyft/flinkk8soperator/pkg/controller/config" "github.com/lyft/flinkk8soperator/pkg/controller/flink" "github.com/lyft/flinkk8soperator/pkg/controller/flink/client" @@ -33,7 +33,7 @@ const ( // The core state machine that manages Flink clusters and jobs. See docs/state_machine.md for a description of the // states and transitions. type FlinkHandlerInterface interface { - Handle(ctx context.Context, application *v1beta1.FlinkApplication) error + Handle(ctx context.Context, application *v1beta2.FlinkApplication) error } type FlinkStateMachine struct { @@ -46,18 +46,18 @@ type FlinkStateMachine struct { type stateMachineMetrics struct { scope promutils.Scope - stateMachineHandlePhaseMap map[v1beta1.FlinkApplicationPhase]labeled.StopWatch - stateMachineHandleSuccessPhaseMap map[v1beta1.FlinkApplicationPhase]labeled.StopWatch - errorCounterPhaseMap map[v1beta1.FlinkApplicationPhase]labeled.Counter + stateMachineHandlePhaseMap map[v1beta2.FlinkApplicationPhase]labeled.StopWatch + stateMachineHandleSuccessPhaseMap map[v1beta2.FlinkApplicationPhase]labeled.StopWatch + errorCounterPhaseMap map[v1beta2.FlinkApplicationPhase]labeled.Counter } func newStateMachineMetrics(scope promutils.Scope) *stateMachineMetrics { stateMachineScope := scope.NewSubScope("state_machine") - stateMachineHandlePhaseMap := map[v1beta1.FlinkApplicationPhase]labeled.StopWatch{} - stateMachineHandleSuccessPhaseMap := map[v1beta1.FlinkApplicationPhase]labeled.StopWatch{} - errorCounterPhaseMap := map[v1beta1.FlinkApplicationPhase]labeled.Counter{} + stateMachineHandlePhaseMap := map[v1beta2.FlinkApplicationPhase]labeled.StopWatch{} + stateMachineHandleSuccessPhaseMap := map[v1beta2.FlinkApplicationPhase]labeled.StopWatch{} + errorCounterPhaseMap := map[v1beta2.FlinkApplicationPhase]labeled.Counter{} - for _, phase := range v1beta1.FlinkApplicationPhases { + for _, phase := range v1beta2.FlinkApplicationPhases { phaseName := phase.VerboseString() stateMachineHandleSuccessPhaseMap[phase] = labeled.NewStopWatch(phaseName+"_"+"handle_time_success", fmt.Sprintf("Total time to handle the %s application state on success", phaseName), time.Millisecond, stateMachineScope) @@ -74,12 +74,12 @@ func newStateMachineMetrics(scope promutils.Scope) *stateMachineMetrics { } } -func (s *FlinkStateMachine) updateApplicationPhase(application *v1beta1.FlinkApplication, phase v1beta1.FlinkApplicationPhase) { +func (s *FlinkStateMachine) updateApplicationPhase(application *v1beta2.FlinkApplication, phase v1beta2.FlinkApplicationPhase) { application.Status.Phase = phase } -func (s *FlinkStateMachine) shouldRollback(ctx context.Context, application *v1beta1.FlinkApplication) (bool, string) { - if application.Spec.ForceRollback && application.Status.Phase != v1beta1.FlinkApplicationRollingBackJob { +func (s *FlinkStateMachine) shouldRollback(ctx context.Context, application *v1beta2.FlinkApplication) (bool, string) { + if application.Spec.ForceRollback && application.Status.Phase != v1beta2.FlinkApplicationRollingBackJob { return true, "forceRollback is set in the resource" } if application.Status.DeployHash == "" { @@ -118,7 +118,7 @@ func (s *FlinkStateMachine) shouldRollback(ctx context.Context, application *v1b return false, "" } -func (s *FlinkStateMachine) Handle(ctx context.Context, application *v1beta1.FlinkApplication) error { +func (s *FlinkStateMachine) Handle(ctx context.Context, application *v1beta2.FlinkApplication) error { currentPhase := application.Status.Phase if _, ok := s.metrics.stateMachineHandlePhaseMap[currentPhase]; !ok { errMsg := fmt.Sprintf("Invalid state %s for the application", currentPhase) @@ -148,7 +148,7 @@ func (s *FlinkStateMachine) Handle(ctx context.Context, application *v1beta1.Fli return err } -func (s *FlinkStateMachine) handle(ctx context.Context, application *v1beta1.FlinkApplication) (bool, error) { +func (s *FlinkStateMachine) handle(ctx context.Context, application *v1beta2.FlinkApplication) (bool, error) { var appErr error updateApplication := false updateLastSeenError := false @@ -156,37 +156,37 @@ func (s *FlinkStateMachine) handle(ctx context.Context, application *v1beta1.Fli // initialize application status array if it's not yet been initialized initializeAppStatusIfEmpty(application) - if !application.ObjectMeta.DeletionTimestamp.IsZero() && appPhase != v1beta1.FlinkApplicationDeleting { - s.updateApplicationPhase(application, v1beta1.FlinkApplicationDeleting) + if !application.ObjectMeta.DeletionTimestamp.IsZero() && appPhase != v1beta2.FlinkApplicationDeleting { + s.updateApplicationPhase(application, v1beta2.FlinkApplicationDeleting) // Always perform a single application update per callback return statusChanged, nil } if s.IsTimeToHandlePhase(application, appPhase) { - if !v1beta1.IsRunningPhase(application.Status.Phase) { + if !v1beta2.IsRunningPhase(application.Status.Phase) { logger.Infof(ctx, "Handling state for application") } switch application.Status.Phase { - case v1beta1.FlinkApplicationNew, v1beta1.FlinkApplicationUpdating: + case v1beta2.FlinkApplicationNew, v1beta2.FlinkApplicationUpdating: // Currently just transitions to the next state updateApplication, appErr = s.handleNewOrUpdating(ctx, application) - case v1beta1.FlinkApplicationClusterStarting: + case v1beta2.FlinkApplicationClusterStarting: updateApplication, appErr = s.handleClusterStarting(ctx, application) - case v1beta1.FlinkApplicationSubmittingJob: + case v1beta2.FlinkApplicationSubmittingJob: updateApplication, appErr = s.handleSubmittingJob(ctx, application) - case v1beta1.FlinkApplicationRunning, v1beta1.FlinkApplicationDeployFailed: + case v1beta2.FlinkApplicationRunning, v1beta2.FlinkApplicationDeployFailed: updateApplication, appErr = s.handleApplicationRunning(ctx, application) - case v1beta1.FlinkApplicationSavepointing: + case v1beta2.FlinkApplicationSavepointing: updateApplication, appErr = s.handleApplicationSavepointing(ctx, application) - case v1beta1.FlinkApplicationRecovering: + case v1beta2.FlinkApplicationRecovering: updateApplication, appErr = s.handleApplicationRecovering(ctx, application) - case v1beta1.FlinkApplicationRollingBackJob: + case v1beta2.FlinkApplicationRollingBackJob: updateApplication, appErr = s.handleRollingBack(ctx, application) - case v1beta1.FlinkApplicationDeleting: + case v1beta2.FlinkApplicationDeleting: updateApplication, appErr = s.handleApplicationDeleting(ctx, application) } - if !v1beta1.IsRunningPhase(appPhase) { + if !v1beta2.IsRunningPhase(appPhase) { // Only update LastSeenError and thereby invoke error handling logic for // non-Running phases updateLastSeenError = s.compareAndUpdateError(application, appErr) @@ -197,8 +197,8 @@ func (s *FlinkStateMachine) handle(ctx context.Context, application *v1beta1.Fli return updateApplication || updateLastSeenError, appErr } -func (s *FlinkStateMachine) IsTimeToHandlePhase(application *v1beta1.FlinkApplication, phase v1beta1.FlinkApplicationPhase) bool { - if phase == v1beta1.FlinkApplicationDeleting { +func (s *FlinkStateMachine) IsTimeToHandlePhase(application *v1beta2.FlinkApplication, phase v1beta2.FlinkApplicationPhase) bool { + if phase == v1beta2.FlinkApplicationDeleting { // reset lastSeenError and retryCount in case the application was failing in its previous phase // We always want a Deleting phase to be handled application.Status.LastSeenError = nil @@ -227,7 +227,7 @@ func (s *FlinkStateMachine) IsTimeToHandlePhase(application *v1beta1.FlinkApplic } // In this state we create a new cluster, either due to an entirely new FlinkApplication or due to an update. -func (s *FlinkStateMachine) handleNewOrUpdating(ctx context.Context, application *v1beta1.FlinkApplication) (bool, error) { +func (s *FlinkStateMachine) handleNewOrUpdating(ctx context.Context, application *v1beta2.FlinkApplication) (bool, error) { // TODO: add up-front validation on the FlinkApplication resource if rollback, reason := s.shouldRollback(ctx, application); rollback { // we've failed to make progress; move to deploy failed @@ -242,11 +242,11 @@ func (s *FlinkStateMachine) handleNewOrUpdating(ctx context.Context, application logger.Errorf(ctx, "Cluster creation failed with error: %v", err) return statusUnchanged, err } - s.updateApplicationPhase(application, v1beta1.FlinkApplicationClusterStarting) + s.updateApplicationPhase(application, v1beta2.FlinkApplicationClusterStarting) return statusChanged, nil } -func (s *FlinkStateMachine) deployFailed(ctx context.Context, app *v1beta1.FlinkApplication) (bool, error) { +func (s *FlinkStateMachine) deployFailed(ctx context.Context, app *v1beta2.FlinkApplication) (bool, error) { hash := flink.HashForApplication(app) s.flinkController.LogEvent(ctx, app, corev1.EventTypeWarning, "RolledBackDeploy", fmt.Sprintf("Successfully rolled back deploy %s", hash)) @@ -257,12 +257,12 @@ func (s *FlinkStateMachine) deployFailed(ctx context.Context, app *v1beta1.Flink app.Status.LastSeenError = nil app.Status.RetryCount = 0 - s.updateApplicationPhase(app, v1beta1.FlinkApplicationDeployFailed) + s.updateApplicationPhase(app, v1beta2.FlinkApplicationDeployFailed) return statusChanged, nil } // Create the underlying Kubernetes objects for the new cluster -func (s *FlinkStateMachine) handleClusterStarting(ctx context.Context, application *v1beta1.FlinkApplication) (bool, error) { +func (s *FlinkStateMachine) handleClusterStarting(ctx context.Context, application *v1beta2.FlinkApplication) (bool, error) { if rollback, reason := s.shouldRollback(ctx, application); rollback { // we've failed to make progress; move to deploy failed // TODO: this will need different logic in single mode @@ -285,29 +285,29 @@ func (s *FlinkStateMachine) handleClusterStarting(ctx context.Context, applicati logger.Infof(ctx, "Flink cluster has started successfully") // TODO: in single mode move to submitting job - s.updateApplicationPhase(application, v1beta1.FlinkApplicationSavepointing) + s.updateApplicationPhase(application, v1beta2.FlinkApplicationSavepointing) return statusChanged, nil } -func initializeAppStatusIfEmpty(application *v1beta1.FlinkApplication) { +func initializeAppStatusIfEmpty(application *v1beta2.FlinkApplication) { // initialize the app status array to include 2 status elements in case of blue green deploys // else use a one element array - if application.Spec.DeploymentMode == v1beta1.DeploymentModeBlueGreen { + if application.Spec.DeploymentMode == v1beta2.DeploymentModeBlueGreen { application.Status.DesiredApplicationCount = 2 } else { application.Status.DesiredApplicationCount = 1 } if len(application.Status.ApplicationStatus) == 0 { - application.Status.ApplicationStatus = make([]v1beta1.FlinkApplicationVersionStatus, application.Status.DesiredApplicationCount) + application.Status.ApplicationStatus = make([]v1beta2.FlinkApplicationVersionStatus, application.Status.DesiredApplicationCount) } } -func (s *FlinkStateMachine) handleApplicationSavepointing(ctx context.Context, application *v1beta1.FlinkApplication) (bool, error) { +func (s *FlinkStateMachine) handleApplicationSavepointing(ctx context.Context, application *v1beta2.FlinkApplication) (bool, error) { // we've already savepointed (or this is our first deploy), continue on if application.Status.SavepointPath != "" || application.Status.DeployHash == "" { s.flinkController.UpdateLatestJobID(ctx, application, "") - s.updateApplicationPhase(application, v1beta1.FlinkApplicationSubmittingJob) + s.updateApplicationPhase(application, v1beta2.FlinkApplicationSubmittingJob) return statusChanged, nil } @@ -315,7 +315,7 @@ func (s *FlinkStateMachine) handleApplicationSavepointing(ctx context.Context, a s.flinkController.LogEvent(ctx, application, corev1.EventTypeWarning, "SavepointFailed", fmt.Sprintf("Could not savepoint existing job: %s", reason)) application.Status.RetryCount = 0 - s.updateApplicationPhase(application, v1beta1.FlinkApplicationRecovering) + s.updateApplicationPhase(application, v1beta2.FlinkApplicationRecovering) return statusChanged, nil } @@ -348,7 +348,7 @@ func (s *FlinkStateMachine) handleApplicationSavepointing(ctx context.Context, a fmt.Sprintf("Failed to take savepoint for job %s: %v", s.flinkController.GetLatestJobID(ctx, application), savepointStatusResponse.Operation.FailureCause)) application.Status.RetryCount = 0 - s.updateApplicationPhase(application, v1beta1.FlinkApplicationRecovering) + s.updateApplicationPhase(application, v1beta2.FlinkApplicationRecovering) return statusChanged, nil } else if savepointStatusResponse.SavepointStatus.Status == client.SavePointCompleted { s.flinkController.LogEvent(ctx, application, corev1.EventTypeNormal, "CanceledJob", @@ -356,21 +356,21 @@ func (s *FlinkStateMachine) handleApplicationSavepointing(ctx context.Context, a savepointStatusResponse.Operation.Location)) application.Status.SavepointPath = savepointStatusResponse.Operation.Location s.flinkController.UpdateLatestJobID(ctx, application, "") - s.updateApplicationPhase(application, v1beta1.FlinkApplicationSubmittingJob) + s.updateApplicationPhase(application, v1beta2.FlinkApplicationSubmittingJob) return statusChanged, nil } return statusUnchanged, nil } -func (s *FlinkStateMachine) handleApplicationRecovering(ctx context.Context, app *v1beta1.FlinkApplication) (bool, error) { +func (s *FlinkStateMachine) handleApplicationRecovering(ctx context.Context, app *v1beta2.FlinkApplication) (bool, error) { // we're in the middle of a deploy, and savepointing has failed in some way... we're going to try to recover // and push through if possible if rollback, reason := s.shouldRollback(ctx, app); rollback { // we failed to recover, attempt to rollback s.flinkController.LogEvent(ctx, app, corev1.EventTypeWarning, "RecoveryFailed", fmt.Sprintf("Failed to recover with externalized checkpoint: %s", reason)) - s.updateApplicationPhase(app, v1beta1.FlinkApplicationRollingBackJob) + s.updateApplicationPhase(app, v1beta2.FlinkApplicationRollingBackJob) return statusChanged, nil } @@ -393,11 +393,11 @@ func (s *FlinkStateMachine) handleApplicationRecovering(ctx context.Context, app app.Status.SavepointPath = path s.flinkController.UpdateLatestJobID(ctx, app, "") - s.updateApplicationPhase(app, v1beta1.FlinkApplicationSubmittingJob) + s.updateApplicationPhase(app, v1beta2.FlinkApplicationSubmittingJob) return statusChanged, nil } -func (s *FlinkStateMachine) submitJobIfNeeded(ctx context.Context, app *v1beta1.FlinkApplication, hash string, +func (s *FlinkStateMachine) submitJobIfNeeded(ctx context.Context, app *v1beta2.FlinkApplication, hash string, jarName string, parallelism int32, entryClass string, programArgs string, allowNonRestoredState bool, savepointPath string) (string, error) { @@ -445,7 +445,7 @@ func (s *FlinkStateMachine) submitJobIfNeeded(ctx context.Context, app *v1beta1. } } -func (s *FlinkStateMachine) updateGenericService(ctx context.Context, app *v1beta1.FlinkApplication, newHash string) error { +func (s *FlinkStateMachine) updateGenericService(ctx context.Context, app *v1beta2.FlinkApplication, newHash string) error { service, err := s.k8Cluster.GetService(ctx, app.Namespace, app.Name) if err != nil { return err @@ -469,12 +469,12 @@ func (s *FlinkStateMachine) updateGenericService(ctx context.Context, app *v1bet return nil } -func (s *FlinkStateMachine) handleSubmittingJob(ctx context.Context, app *v1beta1.FlinkApplication) (bool, error) { +func (s *FlinkStateMachine) handleSubmittingJob(ctx context.Context, app *v1beta2.FlinkApplication) (bool, error) { if rollback, reason := s.shouldRollback(ctx, app); rollback { // Something's gone wrong; roll back s.flinkController.LogEvent(ctx, app, corev1.EventTypeWarning, "JobSubmissionFailed", fmt.Sprintf("Failed to submit job: %s", reason)) - s.updateApplicationPhase(app, v1beta1.FlinkApplicationRollingBackJob) + s.updateApplicationPhase(app, v1beta2.FlinkApplicationRollingBackJob) return statusChanged, nil } @@ -545,7 +545,7 @@ func (s *FlinkStateMachine) handleSubmittingJob(ctx context.Context, app *v1beta jobStatus.ProgramArgs = app.Spec.ProgramArgs jobStatus.AllowNonRestoredState = app.Spec.AllowNonRestoredState s.flinkController.UpdateLatestJobStatus(ctx, app, jobStatus) - s.updateApplicationPhase(app, v1beta1.FlinkApplicationRunning) + s.updateApplicationPhase(app, v1beta2.FlinkApplicationRunning) return statusChanged, nil } @@ -554,7 +554,7 @@ func (s *FlinkStateMachine) handleSubmittingJob(ctx context.Context, app *v1beta // Something has gone wrong during the update, post job-cancellation (and cluster tear-down in single mode). We need // to try to get things back into a working state -func (s *FlinkStateMachine) handleRollingBack(ctx context.Context, app *v1beta1.FlinkApplication) (bool, error) { +func (s *FlinkStateMachine) handleRollingBack(ctx context.Context, app *v1beta2.FlinkApplication) (bool, error) { if rollback, reason := s.shouldRollback(ctx, app); rollback { // we've failed in our roll back attempt (presumably because something's now wrong with the original cluster) // move immediately to the DeployFailed state so that the user can recover. @@ -611,7 +611,7 @@ func (s *FlinkStateMachine) handleRollingBack(ctx context.Context, app *v1beta1. // Check if the application is Running. // This is a stable state. Keep monitoring if the underlying CRD reflects the Flink cluster -func (s *FlinkStateMachine) handleApplicationRunning(ctx context.Context, application *v1beta1.FlinkApplication) (bool, error) { +func (s *FlinkStateMachine) handleApplicationRunning(ctx context.Context, application *v1beta2.FlinkApplication) (bool, error) { cur, err := s.flinkController.GetCurrentDeploymentsForApp(ctx, application) if err != nil { return statusUnchanged, err @@ -622,7 +622,7 @@ func (s *FlinkStateMachine) handleApplicationRunning(ctx context.Context, applic if cur == nil { logger.Infof(ctx, "Application resource has changed. Moving to Updating") // TODO: handle single mode - s.updateApplicationPhase(application, v1beta1.FlinkApplicationUpdating) + s.updateApplicationPhase(application, v1beta2.FlinkApplicationUpdating) return statusChanged, nil } @@ -664,7 +664,7 @@ func (s *FlinkStateMachine) handleApplicationRunning(ctx context.Context, applic return statusUnchanged, nil } -func (s *FlinkStateMachine) addFinalizerIfMissing(ctx context.Context, application *v1beta1.FlinkApplication, finalizer string) error { +func (s *FlinkStateMachine) addFinalizerIfMissing(ctx context.Context, application *v1beta2.FlinkApplication, finalizer string) error { for _, f := range application.Finalizers { if f == finalizer { return nil @@ -687,7 +687,7 @@ func removeString(list []string, target string) []string { return ret } -func (s *FlinkStateMachine) clearFinalizers(ctx context.Context, app *v1beta1.FlinkApplication) (bool, error) { +func (s *FlinkStateMachine) clearFinalizers(ctx context.Context, app *v1beta2.FlinkApplication) (bool, error) { app.Finalizers = removeString(app.Finalizers, jobFinalizer) return statusUnchanged, s.k8Cluster.UpdateK8Object(ctx, app) } @@ -699,7 +699,7 @@ func jobFinished(job *client.FlinkJobOverview) bool { job.State == client.Finished } -func (s *FlinkStateMachine) handleApplicationDeleting(ctx context.Context, app *v1beta1.FlinkApplication) (bool, error) { +func (s *FlinkStateMachine) handleApplicationDeleting(ctx context.Context, app *v1beta2.FlinkApplication) (bool, error) { // There should be a way for the user to force deletion (e.g., if the job is failing and they can't // savepoint). However, this seems dangerous to do automatically. // If https://github.com/kubernetes/kubernetes/issues/56567 is fixed users will be able to use @@ -707,7 +707,7 @@ func (s *FlinkStateMachine) handleApplicationDeleting(ctx context.Context, app * // If the delete mode is none or there's no deployhash set (which means we failed to submit the job on the // first deploy) just delete the finalizer so the cluster can be torn down - if app.Spec.DeleteMode == v1beta1.DeleteModeNone || app.Status.DeployHash == "" { + if app.Spec.DeleteMode == v1beta2.DeleteModeNone || app.Status.DeployHash == "" { return s.clearFinalizers(ctx, app) } @@ -717,7 +717,7 @@ func (s *FlinkStateMachine) handleApplicationDeleting(ctx context.Context, app * } switch app.Spec.DeleteMode { - case v1beta1.DeleteModeForceCancel: + case v1beta2.DeleteModeForceCancel: if job.State == client.Cancelling { // we've already cancelled the job, waiting for it to finish return statusUnchanged, nil @@ -728,7 +728,7 @@ func (s *FlinkStateMachine) handleApplicationDeleting(ctx context.Context, app * logger.Infof(ctx, "Force-cancelling job without a savepoint") return statusUnchanged, s.flinkController.ForceCancel(ctx, app, app.Status.DeployHash) - case v1beta1.DeleteModeSavepoint, "": + case v1beta2.DeleteModeSavepoint, "": if app.Status.SavepointPath != "" { // we've already created the savepoint, now just waiting for the job to be cancelled if jobFinished(job) { @@ -761,7 +761,7 @@ func (s *FlinkStateMachine) handleApplicationDeleting(ctx context.Context, app * // clear the trigger id so that we can try again app.Status.SavepointTriggerID = "" return true, client.GetRetryableError(errors.New("failed to take savepoint"), - v1beta1.CancelJobWithSavepoint, "500", math.MaxInt32) + v1beta2.CancelJobWithSavepoint, "500", math.MaxInt32) } else if status.SavepointStatus.Status == client.SavePointCompleted { // we're done, clean up s.flinkController.LogEvent(ctx, app, corev1.EventTypeNormal, "CanceledJob", @@ -779,7 +779,7 @@ func (s *FlinkStateMachine) handleApplicationDeleting(ctx context.Context, app * return statusUnchanged, nil } -func (s *FlinkStateMachine) compareAndUpdateError(application *v1beta1.FlinkApplication, err error) bool { +func (s *FlinkStateMachine) compareAndUpdateError(application *v1beta2.FlinkApplication, err error) bool { oldErr := application.Status.LastSeenError if err == nil && oldErr == nil { @@ -789,11 +789,11 @@ func (s *FlinkStateMachine) compareAndUpdateError(application *v1beta1.FlinkAppl if err == nil { application.Status.LastSeenError = nil } else { - if flinkAppError, ok := err.(*v1beta1.FlinkApplicationError); ok { + if flinkAppError, ok := err.(*v1beta2.FlinkApplicationError); ok { application.Status.LastSeenError = flinkAppError } else { err = client.GetRetryableError(err, "UnknownMethod", client.GlobalFailure, client.DefaultRetries) - application.Status.LastSeenError = err.(*v1beta1.FlinkApplicationError) + application.Status.LastSeenError = err.(*v1beta2.FlinkApplicationError) } now := v1.NewTime(s.clock.Now()) diff --git a/pkg/controller/flinkapplication/flink_state_machine_test.go b/pkg/controller/flinkapplication/flink_state_machine_test.go index d241b05c..865d6b8d 100644 --- a/pkg/controller/flinkapplication/flink_state_machine_test.go +++ b/pkg/controller/flinkapplication/flink_state_machine_test.go @@ -11,7 +11,7 @@ import ( v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta1" + "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta2" "github.com/lyft/flinkk8soperator/pkg/controller/common" "github.com/lyft/flinkk8soperator/pkg/controller/flink/mock" k8mock "github.com/lyft/flinkk8soperator/pkg/controller/k8/mock" @@ -37,7 +37,7 @@ func getTestStateMachine() FlinkStateMachine { } } -func testFlinkDeployment(app *v1beta1.FlinkApplication) common.FlinkDeployment { +func testFlinkDeployment(app *v1beta2.FlinkApplication) common.FlinkDeployment { hash := flink.HashForApplication(app) return common.FlinkDeployment{ Jobmanager: flink.FetchJobMangerDeploymentCreateObj(app, hash), @@ -51,13 +51,13 @@ func TestHandleNewOrCreate(t *testing.T) { mockK8Cluster := stateMachineForTest.k8Cluster.(*k8mock.K8Cluster) mockK8Cluster.UpdateK8ObjectFunc = func(ctx context.Context, object runtime.Object) error { - application := object.(*v1beta1.FlinkApplication) - assert.Equal(t, v1beta1.FlinkApplicationClusterStarting, application.Status.Phase) + application := object.(*v1beta2.FlinkApplication) + assert.Equal(t, v1beta2.FlinkApplicationClusterStarting, application.Status.Phase) return nil } - err := stateMachineForTest.Handle(context.Background(), &v1beta1.FlinkApplication{ - Spec: v1beta1.FlinkApplicationSpec{}, + err := stateMachineForTest.Handle(context.Background(), &v1beta2.FlinkApplication{ + Spec: v1beta2.FlinkApplicationSpec{}, }) assert.Nil(t, err) } @@ -65,7 +65,7 @@ func TestHandleNewOrCreate(t *testing.T) { func TestHandleStartingClusterStarting(t *testing.T) { stateMachineForTest := getTestStateMachine() mockFlinkController := stateMachineForTest.flinkController.(*mock.FlinkController) - mockFlinkController.IsClusterReadyFunc = func(ctx context.Context, application *v1beta1.FlinkApplication) (bool, error) { + mockFlinkController.IsClusterReadyFunc = func(ctx context.Context, application *v1beta2.FlinkApplication) (bool, error) { return false, nil } @@ -73,9 +73,9 @@ func TestHandleStartingClusterStarting(t *testing.T) { mockK8Cluster.UpdateK8ObjectFunc = func(ctx context.Context, object runtime.Object) error { return nil } - err := stateMachineForTest.Handle(context.Background(), &v1beta1.FlinkApplication{ - Status: v1beta1.FlinkApplicationStatus{ - Phase: v1beta1.FlinkApplicationClusterStarting, + err := stateMachineForTest.Handle(context.Background(), &v1beta2.FlinkApplication{ + Status: v1beta2.FlinkApplicationStatus{ + Phase: v1beta2.FlinkApplicationClusterStarting, }, }) assert.Nil(t, err) @@ -85,15 +85,15 @@ func TestHandleStartingDual(t *testing.T) { updateInvoked := false stateMachineForTest := getTestStateMachine() mockFlinkController := stateMachineForTest.flinkController.(*mock.FlinkController) - mockFlinkController.IsClusterReadyFunc = func(ctx context.Context, application *v1beta1.FlinkApplication) (bool, error) { + mockFlinkController.IsClusterReadyFunc = func(ctx context.Context, application *v1beta2.FlinkApplication) (bool, error) { return true, nil } - mockFlinkController.IsServiceReadyFunc = func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (b bool, e error) { + mockFlinkController.IsServiceReadyFunc = func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (b bool, e error) { return true, nil } - mockFlinkController.GetCurrentDeploymentsForAppFunc = func(ctx context.Context, application *v1beta1.FlinkApplication) (*common.FlinkDeployment, error) { + mockFlinkController.GetCurrentDeploymentsForAppFunc = func(ctx context.Context, application *v1beta2.FlinkApplication) (*common.FlinkDeployment, error) { fd := testFlinkDeployment(application) fd.Taskmanager.Status.AvailableReplicas = 2 fd.Jobmanager.Status.AvailableReplicas = 1 @@ -102,14 +102,14 @@ func TestHandleStartingDual(t *testing.T) { mockK8Cluster := stateMachineForTest.k8Cluster.(*k8mock.K8Cluster) mockK8Cluster.UpdateStatusFunc = func(ctx context.Context, object runtime.Object) error { - application := object.(*v1beta1.FlinkApplication) - assert.Equal(t, v1beta1.FlinkApplicationSavepointing, application.Status.Phase) + application := object.(*v1beta2.FlinkApplication) + assert.Equal(t, v1beta2.FlinkApplicationSavepointing, application.Status.Phase) updateInvoked = true return nil } - err := stateMachineForTest.Handle(context.Background(), &v1beta1.FlinkApplication{ - Status: v1beta1.FlinkApplicationStatus{ - Phase: v1beta1.FlinkApplicationClusterStarting, + err := stateMachineForTest.Handle(context.Background(), &v1beta2.FlinkApplication{ + Status: v1beta2.FlinkApplicationStatus{ + Phase: v1beta2.FlinkApplicationClusterStarting, }, }) assert.True(t, updateInvoked) @@ -122,7 +122,7 @@ func TestHandleApplicationSavepointingInitialDeploy(t *testing.T) { stateMachineForTest := getTestStateMachine() mockFlinkController := stateMachineForTest.flinkController.(*mock.FlinkController) - mockFlinkController.CancelWithSavepointFunc = func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (s string, e error) { + mockFlinkController.CancelWithSavepointFunc = func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (s string, e error) { // should not be called assert.False(t, true) return "", nil @@ -130,15 +130,15 @@ func TestHandleApplicationSavepointingInitialDeploy(t *testing.T) { mockK8Cluster := stateMachineForTest.k8Cluster.(*k8mock.K8Cluster) mockK8Cluster.UpdateStatusFunc = func(ctx context.Context, object runtime.Object) error { - application := object.(*v1beta1.FlinkApplication) - assert.Equal(t, v1beta1.FlinkApplicationSubmittingJob, application.Status.Phase) + application := object.(*v1beta2.FlinkApplication) + assert.Equal(t, v1beta2.FlinkApplicationSubmittingJob, application.Status.Phase) updateInvoked = true return nil } - err := stateMachineForTest.Handle(context.Background(), &v1beta1.FlinkApplication{ - Status: v1beta1.FlinkApplicationStatus{ - Phase: v1beta1.FlinkApplicationSavepointing, + err := stateMachineForTest.Handle(context.Background(), &v1beta2.FlinkApplication{ + Status: v1beta2.FlinkApplicationStatus{ + Phase: v1beta2.FlinkApplicationSavepointing, }, }) assert.True(t, updateInvoked) @@ -146,9 +146,9 @@ func TestHandleApplicationSavepointingInitialDeploy(t *testing.T) { } func TestHandleApplicationSavepointingDual(t *testing.T) { - app := v1beta1.FlinkApplication{ - Status: v1beta1.FlinkApplicationStatus{ - Phase: v1beta1.FlinkApplicationSavepointing, + app := v1beta2.FlinkApplication{ + Status: v1beta2.FlinkApplicationStatus{ + Phase: v1beta2.FlinkApplicationSavepointing, DeployHash: "old-hash", }, } @@ -157,14 +157,14 @@ func TestHandleApplicationSavepointingDual(t *testing.T) { stateMachineForTest := getTestStateMachine() mockFlinkController := stateMachineForTest.flinkController.(*mock.FlinkController) - mockFlinkController.CancelWithSavepointFunc = func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (s string, e error) { + mockFlinkController.CancelWithSavepointFunc = func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (s string, e error) { assert.Equal(t, "old-hash", hash) cancelInvoked = true return "trigger", nil } - mockFlinkController.GetSavepointStatusFunc = func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (*client.SavepointResponse, error) { + mockFlinkController.GetSavepointStatusFunc = func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (*client.SavepointResponse, error) { assert.Equal(t, "old-hash", hash) return &client.SavepointResponse{ SavepointStatus: client.SavepointStatusResponse{ @@ -179,12 +179,12 @@ func TestHandleApplicationSavepointingDual(t *testing.T) { mockK8Cluster := stateMachineForTest.k8Cluster.(*k8mock.K8Cluster) updateCount := 0 mockK8Cluster.UpdateStatusFunc = func(ctx context.Context, object runtime.Object) error { - application := object.(*v1beta1.FlinkApplication) + application := object.(*v1beta2.FlinkApplication) if updateCount == 0 { assert.Equal(t, "trigger", application.Status.SavepointTriggerID) } else { assert.Equal(t, testSavepointLocation, application.Status.SavepointPath) - assert.Equal(t, v1beta1.FlinkApplicationSubmittingJob, application.Status.Phase) + assert.Equal(t, v1beta2.FlinkApplicationSubmittingJob, application.Status.Phase) } updateCount++ @@ -206,7 +206,7 @@ func TestHandleApplicationSavepointingFailed(t *testing.T) { updateInvoked := false stateMachineForTest := getTestStateMachine() mockFlinkController := stateMachineForTest.flinkController.(*mock.FlinkController) - mockFlinkController.GetSavepointStatusFunc = func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (*client.SavepointResponse, error) { + mockFlinkController.GetSavepointStatusFunc = func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (*client.SavepointResponse, error) { return &client.SavepointResponse{ SavepointStatus: client.SavepointStatusResponse{ Status: client.SavePointCompleted, @@ -214,10 +214,10 @@ func TestHandleApplicationSavepointingFailed(t *testing.T) { }, nil } - app := v1beta1.FlinkApplication{ - Spec: v1beta1.FlinkApplicationSpec{}, - Status: v1beta1.FlinkApplicationStatus{ - Phase: v1beta1.FlinkApplicationSavepointing, + app := v1beta2.FlinkApplication{ + Spec: v1beta2.FlinkApplicationSpec{}, + Status: v1beta2.FlinkApplicationStatus{ + Phase: v1beta2.FlinkApplicationSavepointing, DeployHash: "blah", SavepointTriggerID: "trigger", }, @@ -225,9 +225,9 @@ func TestHandleApplicationSavepointingFailed(t *testing.T) { mockK8Cluster := stateMachineForTest.k8Cluster.(*k8mock.K8Cluster) mockK8Cluster.UpdateStatusFunc = func(ctx context.Context, object runtime.Object) error { - application := object.(*v1beta1.FlinkApplication) + application := object.(*v1beta2.FlinkApplication) assert.Empty(t, application.Status.SavepointPath) - assert.Equal(t, v1beta1.FlinkApplicationRecovering, application.Status.Phase) + assert.Equal(t, v1beta2.FlinkApplicationRecovering, application.Status.Phase) updateInvoked = true return nil } @@ -239,10 +239,10 @@ func TestHandleApplicationSavepointingFailed(t *testing.T) { func TestRestoreFromExternalizedCheckpoint(t *testing.T) { updateInvoked := false - app := v1beta1.FlinkApplication{ - Spec: v1beta1.FlinkApplicationSpec{}, - Status: v1beta1.FlinkApplicationStatus{ - Phase: v1beta1.FlinkApplicationRecovering, + app := v1beta2.FlinkApplication{ + Spec: v1beta2.FlinkApplicationSpec{}, + Status: v1beta2.FlinkApplicationStatus{ + Phase: v1beta2.FlinkApplicationRecovering, DeployHash: "blah", SavepointTriggerID: "trigger", }, @@ -251,15 +251,15 @@ func TestRestoreFromExternalizedCheckpoint(t *testing.T) { stateMachineForTest := getTestStateMachine() mockFlinkController := stateMachineForTest.flinkController.(*mock.FlinkController) - mockFlinkController.FindExternalizedCheckpointFunc = func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (string, error) { + mockFlinkController.FindExternalizedCheckpointFunc = func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (string, error) { return "/tmp/checkpoint", nil } mockK8Cluster := stateMachineForTest.k8Cluster.(*k8mock.K8Cluster) mockK8Cluster.UpdateStatusFunc = func(ctx context.Context, object runtime.Object) error { - application := object.(*v1beta1.FlinkApplication) + application := object.(*v1beta2.FlinkApplication) assert.Equal(t, "/tmp/checkpoint", application.Status.SavepointPath) - assert.Equal(t, v1beta1.FlinkApplicationSubmittingJob, application.Status.Phase) + assert.Equal(t, v1beta2.FlinkApplicationSubmittingJob, application.Status.Phase) updateInvoked = true return nil } @@ -271,19 +271,19 @@ func TestRestoreFromExternalizedCheckpoint(t *testing.T) { func TestSubmittingToRunning(t *testing.T) { jobID := "j1" - app := v1beta1.FlinkApplication{ + app := v1beta2.FlinkApplication{ ObjectMeta: metav1.ObjectMeta{ Name: "test-app", Namespace: "flink", }, - Spec: v1beta1.FlinkApplicationSpec{ + Spec: v1beta2.FlinkApplicationSpec{ JarName: "job.jar", Parallelism: 5, EntryClass: "com.my.Class", ProgramArgs: "--test", }, - Status: v1beta1.FlinkApplicationStatus{ - Phase: v1beta1.FlinkApplicationSubmittingJob, + Status: v1beta2.FlinkApplicationStatus{ + Phase: v1beta2.FlinkApplicationSubmittingJob, DeployHash: "old-hash", }, } @@ -291,11 +291,11 @@ func TestSubmittingToRunning(t *testing.T) { stateMachineForTest := getTestStateMachine() mockFlinkController := stateMachineForTest.flinkController.(*mock.FlinkController) - mockFlinkController.IsServiceReadyFunc = func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (bool, error) { + mockFlinkController.IsServiceReadyFunc = func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (bool, error) { return true, nil } - mockFlinkController.GetJobForApplicationFunc = func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (*client.FlinkJobOverview, error) { + mockFlinkController.GetJobForApplicationFunc = func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (*client.FlinkJobOverview, error) { assert.Equal(t, appHash, hash) return &client.FlinkJobOverview{ JobID: jobID, @@ -304,7 +304,7 @@ func TestSubmittingToRunning(t *testing.T) { } startCount := 0 - mockFlinkController.StartFlinkJobFunc = func(ctx context.Context, application *v1beta1.FlinkApplication, hash string, + mockFlinkController.StartFlinkJobFunc = func(ctx context.Context, application *v1beta2.FlinkApplication, hash string, jarName string, parallelism int32, entryClass string, programArgs string, allowNonRestoredState bool, savepointPath string) (string, error) { assert.Equal(t, appHash, hash) @@ -352,7 +352,7 @@ func TestSubmittingToRunning(t *testing.T) { service := object.(*v1.Service) assert.Equal(t, appHash, service.Spec.Selector["flink-app-hash"]) } else if updateCount == 1 { - application := object.(*v1beta1.FlinkApplication) + application := object.(*v1beta2.FlinkApplication) assert.Equal(t, jobFinalizer, application.Finalizers[0]) } @@ -363,17 +363,17 @@ func TestSubmittingToRunning(t *testing.T) { statusUpdateCount := 0 mockK8Cluster.UpdateStatusFunc = func(ctx context.Context, object runtime.Object) error { if statusUpdateCount == 0 { - application := object.(*v1beta1.FlinkApplication) + application := object.(*v1beta2.FlinkApplication) assert.Equal(t, jobID, mockFlinkController.GetLatestJobID(ctx, application)) } else if statusUpdateCount == 1 { - application := object.(*v1beta1.FlinkApplication) + application := object.(*v1beta2.FlinkApplication) assert.Equal(t, appHash, application.Status.DeployHash) jobStatus := mockFlinkController.GetLatestJobStatus(ctx, application) assert.Equal(t, app.Spec.JarName, jobStatus.JarName) assert.Equal(t, app.Spec.Parallelism, jobStatus.Parallelism) assert.Equal(t, app.Spec.EntryClass, jobStatus.EntryClass) assert.Equal(t, app.Spec.ProgramArgs, jobStatus.ProgramArgs) - assert.Equal(t, v1beta1.FlinkApplicationRunning, application.Status.Phase) + assert.Equal(t, v1beta2.FlinkApplicationRunning, application.Status.Phase) } statusUpdateCount++ return nil @@ -392,7 +392,7 @@ func TestSubmittingToRunning(t *testing.T) { func TestHandleApplicationRunning(t *testing.T) { stateMachineForTest := getTestStateMachine() mockFlinkController := stateMachineForTest.flinkController.(*mock.FlinkController) - mockFlinkController.GetCurrentDeploymentsForAppFunc = func(ctx context.Context, application *v1beta1.FlinkApplication) (*common.FlinkDeployment, error) { + mockFlinkController.GetCurrentDeploymentsForAppFunc = func(ctx context.Context, application *v1beta2.FlinkApplication) (*common.FlinkDeployment, error) { fd := testFlinkDeployment(application) return &fd, nil } @@ -402,9 +402,9 @@ func TestHandleApplicationRunning(t *testing.T) { assert.True(t, false) return nil } - err := stateMachineForTest.Handle(context.Background(), &v1beta1.FlinkApplication{ - Status: v1beta1.FlinkApplicationStatus{ - Phase: v1beta1.FlinkApplicationRunning, + err := stateMachineForTest.Handle(context.Background(), &v1beta2.FlinkApplication{ + Status: v1beta2.FlinkApplicationStatus{ + Phase: v1beta2.FlinkApplicationRunning, }, }) assert.Nil(t, err) @@ -414,20 +414,20 @@ func TestRunningToClusterStarting(t *testing.T) { updateInvoked := false stateMachineForTest := getTestStateMachine() mockFlinkController := stateMachineForTest.flinkController.(*mock.FlinkController) - mockFlinkController.GetCurrentDeploymentsForAppFunc = func(ctx context.Context, application *v1beta1.FlinkApplication) (*common.FlinkDeployment, error) { + mockFlinkController.GetCurrentDeploymentsForAppFunc = func(ctx context.Context, application *v1beta2.FlinkApplication) (*common.FlinkDeployment, error) { return nil, nil } mockK8Cluster := stateMachineForTest.k8Cluster.(*k8mock.K8Cluster) mockK8Cluster.UpdateStatusFunc = func(ctx context.Context, object runtime.Object) error { - application := object.(*v1beta1.FlinkApplication) - assert.Equal(t, v1beta1.FlinkApplicationUpdating, application.Status.Phase) + application := object.(*v1beta2.FlinkApplication) + assert.Equal(t, v1beta2.FlinkApplicationUpdating, application.Status.Phase) updateInvoked = true return nil } - err := stateMachineForTest.Handle(context.Background(), &v1beta1.FlinkApplication{ - Status: v1beta1.FlinkApplicationStatus{ - Phase: v1beta1.FlinkApplicationRunning, + err := stateMachineForTest.Handle(context.Background(), &v1beta2.FlinkApplication{ + Status: v1beta2.FlinkApplicationStatus{ + Phase: v1beta2.FlinkApplicationRunning, }, }) assert.True(t, updateInvoked) @@ -437,24 +437,24 @@ func TestRunningToClusterStarting(t *testing.T) { func TestRollingBack(t *testing.T) { jobID := "j1" - app := v1beta1.FlinkApplication{ + app := v1beta2.FlinkApplication{ ObjectMeta: metav1.ObjectMeta{ Name: "test-app", Namespace: "flink", }, - Spec: v1beta1.FlinkApplicationSpec{ + Spec: v1beta2.FlinkApplicationSpec{ JarName: "job.jar", Parallelism: 5, EntryClass: "com.my.Class", ProgramArgs: "--test", }, - Status: v1beta1.FlinkApplicationStatus{ - Phase: v1beta1.FlinkApplicationRollingBackJob, + Status: v1beta2.FlinkApplicationStatus{ + Phase: v1beta2.FlinkApplicationRollingBackJob, DeployHash: "old-hash", SavepointPath: "file:///savepoint", - ApplicationStatus: []v1beta1.FlinkApplicationVersionStatus{ - v1beta1.FlinkApplicationVersionStatus{ - JobStatus: v1beta1.FlinkJobStatus{ + ApplicationStatus: []v1beta2.FlinkApplicationVersionStatus{ + v1beta2.FlinkApplicationVersionStatus{ + JobStatus: v1beta2.FlinkJobStatus{ JarName: "old-job.jar", Parallelism: 10, EntryClass: "com.my.OldClass", @@ -468,13 +468,13 @@ func TestRollingBack(t *testing.T) { stateMachineForTest := getTestStateMachine() mockFlinkController := stateMachineForTest.flinkController.(*mock.FlinkController) - mockFlinkController.IsServiceReadyFunc = func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (bool, error) { + mockFlinkController.IsServiceReadyFunc = func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (bool, error) { assert.Equal(t, "old-hash", hash) return true, nil } startCalled := false - mockFlinkController.StartFlinkJobFunc = func(ctx context.Context, application *v1beta1.FlinkApplication, hash string, + mockFlinkController.StartFlinkJobFunc = func(ctx context.Context, application *v1beta2.FlinkApplication, hash string, jarName string, parallelism int32, entryClass string, programArgs string, allowNonRestoredState bool, savepointPath string) (string, error) { startCalled = true @@ -490,7 +490,7 @@ func TestRollingBack(t *testing.T) { } getCount := 0 - mockFlinkController.GetJobsForApplicationFunc = func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) ([]client.FlinkJob, error) { + mockFlinkController.GetJobsForApplicationFunc = func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) ([]client.FlinkJob, error) { assert.Equal(t, "old-hash", hash) var res []client.FlinkJob if getCount == 1 { @@ -537,7 +537,7 @@ func TestRollingBack(t *testing.T) { service := object.(*v1.Service) assert.Equal(t, "old-hash", service.Spec.Selector["flink-app-hash"]) } else if updateCount == 1 { - application := object.(*v1beta1.FlinkApplication) + application := object.(*v1beta2.FlinkApplication) assert.Equal(t, jobFinalizer, application.Finalizers[0]) } @@ -548,9 +548,9 @@ func TestRollingBack(t *testing.T) { statusUpdated := false mockK8Cluster.UpdateStatusFunc = func(ctx context.Context, object runtime.Object) error { if !statusUpdated { - application := object.(*v1beta1.FlinkApplication) + application := object.(*v1beta2.FlinkApplication) assert.Equal(t, appHash, application.Status.FailedDeployHash) - assert.Equal(t, v1beta1.FlinkApplicationDeployFailed, application.Status.Phase) + assert.Equal(t, v1beta2.FlinkApplicationDeployFailed, application.Status.Phase) statusUpdated = true } return nil @@ -572,28 +572,28 @@ func TestIsApplicationStuck(t *testing.T) { retryableErr := client.GetRetryableError(errors.New("blah"), "GetClusterOverview", "FAILED", 3) failFastError := client.GetNonRetryableError(errors.New("blah"), "SubmitJob", "400BadRequest") - app := &v1beta1.FlinkApplication{ - Status: v1beta1.FlinkApplicationStatus{ - Phase: v1beta1.FlinkApplicationClusterStarting, + app := &v1beta2.FlinkApplication{ + Status: v1beta2.FlinkApplicationStatus{ + Phase: v1beta2.FlinkApplicationClusterStarting, DeployHash: "prevhash", - LastSeenError: retryableErr.(*v1beta1.FlinkApplicationError), + LastSeenError: retryableErr.(*v1beta2.FlinkApplicationError), }, } mockRetryHandler := stateMachineForTest.retryHandler.(*mock.RetryHandler) mockRetryHandler.IsErrorRetryableFunc = func(err error) bool { - ferr, ok := err.(*v1beta1.FlinkApplicationError) + ferr, ok := err.(*v1beta2.FlinkApplicationError) assert.True(t, ok) return ferr.IsRetryable } mockRetryHandler.IsRetryRemainingFunc = func(err error, retryCount int32) bool { - ferr, ok := err.(*v1beta1.FlinkApplicationError) + ferr, ok := err.(*v1beta2.FlinkApplicationError) assert.True(t, ok) return retryCount <= ferr.MaxRetries } mockRetryHandler.IsErrorFailFastFunc = func(err error) bool { - ferr, ok := err.(*v1beta1.FlinkApplicationError) + ferr, ok := err.(*v1beta2.FlinkApplicationError) assert.True(t, ok) return ferr.IsFailFast } @@ -607,7 +607,7 @@ func TestIsApplicationStuck(t *testing.T) { // Retryable error with retries exhausted app.Status.RetryCount = 100 - app.Status.LastSeenError = retryableErr.(*v1beta1.FlinkApplicationError) + app.Status.LastSeenError = retryableErr.(*v1beta2.FlinkApplicationError) shouldRollback, _ = stateMachineForTest.shouldRollback(context.Background(), app) assert.True(t, shouldRollback, app) assert.Nil(t, app.Status.LastSeenError) @@ -615,7 +615,7 @@ func TestIsApplicationStuck(t *testing.T) { // Fail fast error app.Status.RetryCount = 0 - app.Status.LastSeenError = failFastError.(*v1beta1.FlinkApplicationError) + app.Status.LastSeenError = failFastError.(*v1beta2.FlinkApplicationError) shouldRollback, _ = stateMachineForTest.shouldRollback(context.Background(), app) assert.True(t, shouldRollback) assert.Nil(t, app.Status.LastSeenError) @@ -627,17 +627,17 @@ func TestDeleteWithSavepoint(t *testing.T) { stateMachineForTest := getTestStateMachine() jobID := "j1" - app := v1beta1.FlinkApplication{ + app := v1beta2.FlinkApplication{ ObjectMeta: metav1.ObjectMeta{ Finalizers: []string{jobFinalizer}, DeletionTimestamp: &metav1.Time{Time: time.Now()}, }, - Status: v1beta1.FlinkApplicationStatus{ - Phase: v1beta1.FlinkApplicationDeleting, + Status: v1beta2.FlinkApplicationStatus{ + Phase: v1beta2.FlinkApplicationDeleting, DeployHash: "deployhash", - ApplicationStatus: []v1beta1.FlinkApplicationVersionStatus{ - v1beta1.FlinkApplicationVersionStatus{ - JobStatus: v1beta1.FlinkJobStatus{ + ApplicationStatus: []v1beta2.FlinkApplicationVersionStatus{ + v1beta2.FlinkApplicationVersionStatus{ + JobStatus: v1beta2.FlinkJobStatus{ JobID: jobID, }, }, @@ -649,11 +649,11 @@ func TestDeleteWithSavepoint(t *testing.T) { savepointPath := "s3:///path/to/savepoint" mockFlinkController := stateMachineForTest.flinkController.(*mock.FlinkController) - mockFlinkController.CancelWithSavepointFunc = func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (string, error) { + mockFlinkController.CancelWithSavepointFunc = func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (string, error) { return triggerID, nil } - mockFlinkController.GetJobForApplicationFunc = func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (jobs *client.FlinkJobOverview, err error) { + mockFlinkController.GetJobForApplicationFunc = func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (jobs *client.FlinkJobOverview, err error) { return &client.FlinkJobOverview{ JobID: jobID, State: "RUNNING", @@ -663,8 +663,8 @@ func TestDeleteWithSavepoint(t *testing.T) { mockK8Cluster := stateMachineForTest.k8Cluster.(*k8mock.K8Cluster) updateStatusCount := 0 mockK8Cluster.UpdateStatusFunc = func(ctx context.Context, object runtime.Object) error { - application := object.(*v1beta1.FlinkApplication) - assert.Equal(t, v1beta1.FlinkApplicationDeleting, application.Status.Phase) + application := object.(*v1beta2.FlinkApplication) + assert.Equal(t, v1beta2.FlinkApplicationDeleting, application.Status.Phase) if updateStatusCount == 0 { assert.Equal(t, triggerID, application.Status.SavepointTriggerID) @@ -689,7 +689,7 @@ func TestDeleteWithSavepoint(t *testing.T) { assert.NoError(t, err) savepointStatusCount := 0 - mockFlinkController.GetSavepointStatusFunc = func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (*client.SavepointResponse, error) { + mockFlinkController.GetSavepointStatusFunc = func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (*client.SavepointResponse, error) { savepointStatusCount++ if savepointStatusCount == 1 { @@ -724,7 +724,7 @@ func TestDeleteWithSavepoint(t *testing.T) { assert.Equal(t, 3, updateStatusCount) - mockFlinkController.GetJobForApplicationFunc = func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (jobs *client.FlinkJobOverview, err error) { + mockFlinkController.GetJobForApplicationFunc = func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (jobs *client.FlinkJobOverview, err error) { return &client.FlinkJobOverview{ JobID: jobID, State: "CANCELED", @@ -742,18 +742,18 @@ func TestDeleteWithSavepointAndFinishedJob(t *testing.T) { stateMachineForTest := getTestStateMachine() jobID := "j1" - app := v1beta1.FlinkApplication{ + app := v1beta2.FlinkApplication{ ObjectMeta: metav1.ObjectMeta{ Finalizers: []string{jobFinalizer}, DeletionTimestamp: &metav1.Time{Time: time.Now()}, }, - Status: v1beta1.FlinkApplicationStatus{ - Phase: v1beta1.FlinkApplicationDeleting, + Status: v1beta2.FlinkApplicationStatus{ + Phase: v1beta2.FlinkApplicationDeleting, DeployHash: "deployhash", SavepointPath: "file:///savepoint", - ApplicationStatus: []v1beta1.FlinkApplicationVersionStatus{ - v1beta1.FlinkApplicationVersionStatus{ - JobStatus: v1beta1.FlinkJobStatus{ + ApplicationStatus: []v1beta2.FlinkApplicationVersionStatus{ + v1beta2.FlinkApplicationVersionStatus{ + JobStatus: v1beta2.FlinkJobStatus{ JobID: jobID, }, }, @@ -763,7 +763,7 @@ func TestDeleteWithSavepointAndFinishedJob(t *testing.T) { mockFlinkController := stateMachineForTest.flinkController.(*mock.FlinkController) - mockFlinkController.GetJobsForApplicationFunc = func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (jobs []client.FlinkJob, err error) { + mockFlinkController.GetJobsForApplicationFunc = func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (jobs []client.FlinkJob, err error) { return []client.FlinkJob{ { JobID: jobID, @@ -775,8 +775,8 @@ func TestDeleteWithSavepointAndFinishedJob(t *testing.T) { mockK8Cluster := stateMachineForTest.k8Cluster.(*k8mock.K8Cluster) mockK8Cluster.UpdateK8ObjectFunc = func(ctx context.Context, object runtime.Object) error { - application := object.(*v1beta1.FlinkApplication) - assert.Equal(t, v1beta1.FlinkApplicationDeleting, application.Status.Phase) + application := object.(*v1beta2.FlinkApplication) + assert.Equal(t, v1beta2.FlinkApplicationDeleting, application.Status.Phase) assert.Equal(t, 0, len(app.Finalizers)) @@ -792,19 +792,19 @@ func TestDeleteWithForceCancel(t *testing.T) { jobID := "j1" - app := v1beta1.FlinkApplication{ + app := v1beta2.FlinkApplication{ ObjectMeta: metav1.ObjectMeta{ Finalizers: []string{jobFinalizer}, DeletionTimestamp: &metav1.Time{Time: time.Now()}, }, - Spec: v1beta1.FlinkApplicationSpec{ - DeleteMode: v1beta1.DeleteModeForceCancel, + Spec: v1beta2.FlinkApplicationSpec{ + DeleteMode: v1beta2.DeleteModeForceCancel, }, - Status: v1beta1.FlinkApplicationStatus{ - Phase: v1beta1.FlinkApplicationDeleting, - ApplicationStatus: []v1beta1.FlinkApplicationVersionStatus{ - v1beta1.FlinkApplicationVersionStatus{ - JobStatus: v1beta1.FlinkJobStatus{ + Status: v1beta2.FlinkApplicationStatus{ + Phase: v1beta2.FlinkApplicationDeleting, + ApplicationStatus: []v1beta2.FlinkApplicationVersionStatus{ + v1beta2.FlinkApplicationVersionStatus{ + JobStatus: v1beta2.FlinkJobStatus{ JobID: jobID, }, }, @@ -816,7 +816,7 @@ func TestDeleteWithForceCancel(t *testing.T) { mockFlinkController := stateMachineForTest.flinkController.(*mock.FlinkController) - mockFlinkController.GetJobForApplicationFunc = func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (*client.FlinkJobOverview, error) { + mockFlinkController.GetJobForApplicationFunc = func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (*client.FlinkJobOverview, error) { return &client.FlinkJobOverview{ JobID: jobID, State: "RUNNING", @@ -824,7 +824,7 @@ func TestDeleteWithForceCancel(t *testing.T) { } cancelled := false - mockFlinkController.ForceCancelFunc = func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) error { + mockFlinkController.ForceCancelFunc = func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) error { cancelled = true return nil } @@ -832,8 +832,8 @@ func TestDeleteWithForceCancel(t *testing.T) { mockK8Cluster := stateMachineForTest.k8Cluster.(*k8mock.K8Cluster) updateCount := 1 mockK8Cluster.UpdateK8ObjectFunc = func(ctx context.Context, object runtime.Object) error { - application := object.(*v1beta1.FlinkApplication) - assert.Equal(t, v1beta1.FlinkApplicationDeleting, application.Status.Phase) + application := object.(*v1beta2.FlinkApplication) + assert.Equal(t, v1beta2.FlinkApplicationDeleting, application.Status.Phase) if updateCount == 1 { assert.Equal(t, 0, len(app.Finalizers)) @@ -848,7 +848,7 @@ func TestDeleteWithForceCancel(t *testing.T) { assert.Equal(t, 1, updateCount) assert.True(t, cancelled) - mockFlinkController.GetJobForApplicationFunc = func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (*client.FlinkJobOverview, error) { + mockFlinkController.GetJobForApplicationFunc = func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (*client.FlinkJobOverview, error) { return &client.FlinkJobOverview{ JobID: jobID, State: "CANCELED", @@ -863,16 +863,16 @@ func TestDeleteWithForceCancel(t *testing.T) { func TestDeleteModeNone(t *testing.T) { stateMachineForTest := getTestStateMachine() - app := v1beta1.FlinkApplication{ + app := v1beta2.FlinkApplication{ ObjectMeta: metav1.ObjectMeta{ Finalizers: []string{jobFinalizer}, DeletionTimestamp: &metav1.Time{Time: time.Now()}, }, - Spec: v1beta1.FlinkApplicationSpec{ - DeleteMode: v1beta1.DeleteModeNone, + Spec: v1beta2.FlinkApplicationSpec{ + DeleteMode: v1beta2.DeleteModeNone, }, - Status: v1beta1.FlinkApplicationStatus{ - Phase: v1beta1.FlinkApplicationDeleting, + Status: v1beta2.FlinkApplicationStatus{ + Phase: v1beta2.FlinkApplicationDeleting, }, } @@ -880,7 +880,7 @@ func TestDeleteModeNone(t *testing.T) { mockFlinkController := stateMachineForTest.flinkController.(*mock.FlinkController) - mockFlinkController.GetJobsForApplicationFunc = func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (jobs []client.FlinkJob, err error) { + mockFlinkController.GetJobsForApplicationFunc = func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (jobs []client.FlinkJob, err error) { return []client.FlinkJob{ { JobID: jobID, @@ -890,7 +890,7 @@ func TestDeleteModeNone(t *testing.T) { } cancelled := false - mockFlinkController.ForceCancelFunc = func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) error { + mockFlinkController.ForceCancelFunc = func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) error { cancelled = true return nil } @@ -898,8 +898,8 @@ func TestDeleteModeNone(t *testing.T) { mockK8Cluster := stateMachineForTest.k8Cluster.(*k8mock.K8Cluster) updateCount := 1 mockK8Cluster.UpdateK8ObjectFunc = func(ctx context.Context, object runtime.Object) error { - application := object.(*v1beta1.FlinkApplication) - assert.Equal(t, v1beta1.FlinkApplicationDeleting, application.Status.Phase) + application := object.(*v1beta2.FlinkApplication) + assert.Equal(t, v1beta2.FlinkApplicationDeleting, application.Status.Phase) if updateCount == 1 { assert.Equal(t, 0, len(app.Finalizers)) @@ -918,9 +918,9 @@ func TestDeleteModeNone(t *testing.T) { func TestHandleInvalidPhase(t *testing.T) { stateMachineForTest := getTestStateMachine() - err := stateMachineForTest.Handle(context.Background(), &v1beta1.FlinkApplication{ - Spec: v1beta1.FlinkApplicationSpec{}, - Status: v1beta1.FlinkApplicationStatus{ + err := stateMachineForTest.Handle(context.Background(), &v1beta2.FlinkApplication{ + Spec: v1beta2.FlinkApplicationSpec{}, + Status: v1beta2.FlinkApplicationStatus{ Phase: "asd", }, }) @@ -929,19 +929,19 @@ func TestHandleInvalidPhase(t *testing.T) { } func TestRollbackWithRetryableError(t *testing.T) { - app := v1beta1.FlinkApplication{ + app := v1beta2.FlinkApplication{ ObjectMeta: metav1.ObjectMeta{ Name: "test-app", Namespace: "flink", }, - Spec: v1beta1.FlinkApplicationSpec{ + Spec: v1beta2.FlinkApplicationSpec{ JarName: "job.jar", Parallelism: 5, EntryClass: "com.my.Class", ProgramArgs: "--test", }, - Status: v1beta1.FlinkApplicationStatus{ - Phase: v1beta1.FlinkApplicationSavepointing, + Status: v1beta2.FlinkApplicationStatus{ + Phase: v1beta2.FlinkApplicationSavepointing, DeployHash: "old-hash-retry", }, } @@ -949,19 +949,19 @@ func TestRollbackWithRetryableError(t *testing.T) { retryableErr := client.GetRetryableError(errors.New("blah"), "GetClusterOverview", "FAILED", 3) stateMachineForTest := getTestStateMachine() mockFlinkController := stateMachineForTest.flinkController.(*mock.FlinkController) - mockFlinkController.CancelWithSavepointFunc = func(ctx context.Context, app *v1beta1.FlinkApplication, hash string) (savepoint string, err error) { + mockFlinkController.CancelWithSavepointFunc = func(ctx context.Context, app *v1beta2.FlinkApplication, hash string) (savepoint string, err error) { return "", retryableErr } mockRetryHandler := stateMachineForTest.retryHandler.(*mock.RetryHandler) mockRetryHandler.IsErrorRetryableFunc = func(err error) bool { - ferr, ok := err.(*v1beta1.FlinkApplicationError) + ferr, ok := err.(*v1beta2.FlinkApplicationError) assert.True(t, ok) return ferr.IsRetryable } mockRetryHandler.IsRetryRemainingFunc = func(err error, retryCount int32) bool { - ferr, ok := err.(*v1beta1.FlinkApplicationError) + ferr, ok := err.(*v1beta2.FlinkApplicationError) assert.True(t, ok) return retryCount <= ferr.MaxRetries } @@ -983,8 +983,8 @@ func TestRollbackWithRetryableError(t *testing.T) { } retries := 0 - for ; app.Status.Phase != v1beta1.FlinkApplicationRecovering; retries++ { - assert.Equal(t, v1beta1.FlinkApplicationSavepointing, app.Status.Phase) + for ; app.Status.Phase != v1beta2.FlinkApplicationRecovering; retries++ { + assert.Equal(t, v1beta2.FlinkApplicationSavepointing, app.Status.Phase) err := stateMachineForTest.Handle(context.Background(), &app) // First attempt does not rollback @@ -998,25 +998,25 @@ func TestRollbackWithRetryableError(t *testing.T) { assert.Equal(t, 5, retries) assert.Equal(t, 5, updateErrCount) // Retries should have been exhausted and errors and retry counts reset - assert.Equal(t, v1beta1.FlinkApplicationRecovering, app.Status.Phase) + assert.Equal(t, v1beta2.FlinkApplicationRecovering, app.Status.Phase) assert.Equal(t, int32(0), app.Status.RetryCount) assert.Nil(t, app.Status.LastSeenError) } func TestRollbackWithFailFastError(t *testing.T) { - app := v1beta1.FlinkApplication{ + app := v1beta2.FlinkApplication{ ObjectMeta: metav1.ObjectMeta{ Name: "test-app", Namespace: "flink", }, - Spec: v1beta1.FlinkApplicationSpec{ + Spec: v1beta2.FlinkApplicationSpec{ JarName: "job.jar", Parallelism: 5, EntryClass: "com.my.Class", ProgramArgs: "--test", }, - Status: v1beta1.FlinkApplicationStatus{ - Phase: v1beta1.FlinkApplicationSubmittingJob, + Status: v1beta2.FlinkApplicationStatus{ + Phase: v1beta2.FlinkApplicationSubmittingJob, DeployHash: "old-hash-retry-err", }, } @@ -1025,7 +1025,7 @@ func TestRollbackWithFailFastError(t *testing.T) { mockFlinkController := stateMachineForTest.flinkController.(*mock.FlinkController) getCount := 0 - mockFlinkController.GetJobsForApplicationFunc = func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) ([]client.FlinkJob, error) { + mockFlinkController.GetJobsForApplicationFunc = func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) ([]client.FlinkJob, error) { var res []client.FlinkJob if getCount == 1 { res = []client.FlinkJob{ @@ -1038,11 +1038,11 @@ func TestRollbackWithFailFastError(t *testing.T) { return res, nil } - mockFlinkController.IsServiceReadyFunc = func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (bool, error) { + mockFlinkController.IsServiceReadyFunc = func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (bool, error) { return true, nil } failFastError := client.GetNonRetryableError(errors.New("blah"), "SubmitJob", "400BadRequest") - mockFlinkController.StartFlinkJobFunc = func(ctx context.Context, application *v1beta1.FlinkApplication, hash string, + mockFlinkController.StartFlinkJobFunc = func(ctx context.Context, application *v1beta2.FlinkApplication, hash string, jarName string, parallelism int32, entryClass string, programArgs string, allowNonRestoredState bool, savepointPath string) (string, error) { return "", failFastError } @@ -1071,15 +1071,15 @@ func TestRollbackWithFailFastError(t *testing.T) { } mockRetryHandler := stateMachineForTest.retryHandler.(*mock.RetryHandler) mockRetryHandler.IsErrorFailFastFunc = func(err error) bool { - ferr, ok := err.(*v1beta1.FlinkApplicationError) + ferr, ok := err.(*v1beta2.FlinkApplicationError) assert.True(t, ok) return ferr.IsFailFast } retries := 0 var err error - for ; app.Status.Phase == v1beta1.FlinkApplicationSubmittingJob; retries++ { + for ; app.Status.Phase == v1beta2.FlinkApplicationSubmittingJob; retries++ { err = stateMachineForTest.Handle(context.Background(), &app) - if app.Status.Phase == v1beta1.FlinkApplicationSubmittingJob { + if app.Status.Phase == v1beta2.FlinkApplicationSubmittingJob { assert.NotNil(t, err) assert.Equal(t, int32(0), app.Status.RetryCount) assert.NotNil(t, app.Status.LastSeenError) @@ -1089,25 +1089,25 @@ func TestRollbackWithFailFastError(t *testing.T) { assert.Equal(t, 2, retries) // once in rollingback phase, errors no longer exist - assert.Equal(t, v1beta1.FlinkApplicationRollingBackJob, app.Status.Phase) + assert.Equal(t, v1beta2.FlinkApplicationRollingBackJob, app.Status.Phase) assert.Equal(t, int32(0), app.Status.RetryCount) assert.Nil(t, app.Status.LastSeenError) } func TestErrorHandlingInRunningPhase(t *testing.T) { - app := v1beta1.FlinkApplication{ + app := v1beta2.FlinkApplication{ ObjectMeta: metav1.ObjectMeta{ Name: "test-app", Namespace: "flink", }, - Spec: v1beta1.FlinkApplicationSpec{ + Spec: v1beta2.FlinkApplicationSpec{ JarName: "job.jar", Parallelism: 5, EntryClass: "com.my.Class", ProgramArgs: "--test", }, - Status: v1beta1.FlinkApplicationStatus{ - Phase: v1beta1.FlinkApplicationRunning, + Status: v1beta2.FlinkApplicationStatus{ + Phase: v1beta2.FlinkApplicationRunning, DeployHash: "old-hash-retry-err", }, } @@ -1115,7 +1115,7 @@ func TestErrorHandlingInRunningPhase(t *testing.T) { stateMachineForTest := getTestStateMachine() mockFlinkController := stateMachineForTest.flinkController.(*mock.FlinkController) - mockFlinkController.GetCurrentDeploymentsForAppFunc = func(ctx context.Context, app *v1beta1.FlinkApplication) (*common.FlinkDeployment, error) { + mockFlinkController.GetCurrentDeploymentsForAppFunc = func(ctx context.Context, app *v1beta2.FlinkApplication) (*common.FlinkDeployment, error) { return &common.FlinkDeployment{ Jobmanager: nil, Taskmanager: nil, @@ -1123,7 +1123,7 @@ func TestErrorHandlingInRunningPhase(t *testing.T) { }, nil } - mockFlinkController.GetJobForApplicationFunc = func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (*client.FlinkJobOverview, error) { + mockFlinkController.GetJobForApplicationFunc = func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (*client.FlinkJobOverview, error) { return nil, client.GetNonRetryableError(errors.New("running phase error"), "TestError", "400") } @@ -1137,20 +1137,20 @@ func TestErrorHandlingInRunningPhase(t *testing.T) { func TestForceRollback(t *testing.T) { oldHash := "old-hash-force-rollback" - app := v1beta1.FlinkApplication{ + app := v1beta2.FlinkApplication{ ObjectMeta: metav1.ObjectMeta{ Name: "test-app", Namespace: "flink", }, - Spec: v1beta1.FlinkApplicationSpec{ + Spec: v1beta2.FlinkApplicationSpec{ JarName: "job.jar", Parallelism: 5, EntryClass: "com.my.Class", ProgramArgs: "--test", ForceRollback: true, }, - Status: v1beta1.FlinkApplicationStatus{ - Phase: v1beta1.FlinkApplicationSubmittingJob, + Status: v1beta2.FlinkApplicationStatus{ + Phase: v1beta2.FlinkApplicationSubmittingJob, DeployHash: oldHash, }, } @@ -1187,14 +1187,14 @@ func TestForceRollback(t *testing.T) { } mockFlinkController := stateMachineForTest.flinkController.(*mock.FlinkController) - mockFlinkController.IsServiceReadyFunc = func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (bool, error) { + mockFlinkController.IsServiceReadyFunc = func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (bool, error) { return true, nil } err := stateMachineForTest.Handle(context.Background(), &app) assert.Nil(t, err) // rolled deploy while cluster is starting - assert.Equal(t, v1beta1.FlinkApplicationRollingBackJob, app.Status.Phase) + assert.Equal(t, v1beta2.FlinkApplicationRollingBackJob, app.Status.Phase) assert.True(t, app.Spec.ForceRollback) err = stateMachineForTest.Handle(context.Background(), &app) @@ -1206,21 +1206,21 @@ func TestForceRollback(t *testing.T) { func TestLastSeenErrTimeIsNil(t *testing.T) { oldHash := "old-hash-force-nil" retryableErr := client.GetRetryableError(errors.New("blah"), "GetClusterOverview", "FAILED", 3) - app := v1beta1.FlinkApplication{ + app := v1beta2.FlinkApplication{ ObjectMeta: metav1.ObjectMeta{ Name: "test-app", Namespace: "flink", }, - Spec: v1beta1.FlinkApplicationSpec{ + Spec: v1beta2.FlinkApplicationSpec{ JarName: "job.jar", Parallelism: 5, EntryClass: "com.my.Class", ProgramArgs: "--test", }, - Status: v1beta1.FlinkApplicationStatus{ - Phase: v1beta1.FlinkApplicationClusterStarting, + Status: v1beta2.FlinkApplicationStatus{ + Phase: v1beta2.FlinkApplicationClusterStarting, DeployHash: oldHash, - LastSeenError: retryableErr.(*v1beta1.FlinkApplicationError), + LastSeenError: retryableErr.(*v1beta2.FlinkApplicationError), }, } app.Status.LastSeenError.LastErrorUpdateTime = nil @@ -1245,21 +1245,21 @@ func TestCheckSavepointStatusFailing(t *testing.T) { oldHash := "old-hash-fail" maxRetries := int32(1) retryableErr := client.GetRetryableError(errors.New("blah"), "CheckSavepointStatus", "FAILED", 1) - app := v1beta1.FlinkApplication{ + app := v1beta2.FlinkApplication{ ObjectMeta: metav1.ObjectMeta{ Name: "test-app", Namespace: "flink", }, - Spec: v1beta1.FlinkApplicationSpec{ + Spec: v1beta2.FlinkApplicationSpec{ JarName: "job.jar", Parallelism: 5, EntryClass: "com.my.Class", ProgramArgs: "--test", }, - Status: v1beta1.FlinkApplicationStatus{ - Phase: v1beta1.FlinkApplicationSavepointing, + Status: v1beta2.FlinkApplicationStatus{ + Phase: v1beta2.FlinkApplicationSavepointing, DeployHash: oldHash, - LastSeenError: retryableErr.(*v1beta1.FlinkApplicationError), + LastSeenError: retryableErr.(*v1beta2.FlinkApplicationError), SavepointTriggerID: "trigger", }, } @@ -1267,11 +1267,11 @@ func TestCheckSavepointStatusFailing(t *testing.T) { stateMachineForTest := getTestStateMachine() mockFlinkController := stateMachineForTest.flinkController.(*mock.FlinkController) - mockFlinkController.GetSavepointStatusFunc = func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (*client.SavepointResponse, error) { - return nil, retryableErr.(*v1beta1.FlinkApplicationError) + mockFlinkController.GetSavepointStatusFunc = func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (*client.SavepointResponse, error) { + return nil, retryableErr.(*v1beta2.FlinkApplicationError) } - mockFlinkController.FindExternalizedCheckpointFunc = func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (string, error) { + mockFlinkController.FindExternalizedCheckpointFunc = func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (string, error) { return "/tmp/checkpoint", nil } mockRetryHandler := stateMachineForTest.retryHandler.(*mock.RetryHandler) @@ -1288,32 +1288,32 @@ func TestCheckSavepointStatusFailing(t *testing.T) { err := stateMachineForTest.Handle(context.Background(), &app) // 1 retry left assert.NotNil(t, err) - assert.Equal(t, v1beta1.FlinkApplicationSavepointing, app.Status.Phase) + assert.Equal(t, v1beta2.FlinkApplicationSavepointing, app.Status.Phase) // No retries left for CheckSavepointStatus // The app should hence try to recover from an externalized checkpoint err = stateMachineForTest.Handle(context.Background(), &app) assert.Nil(t, err) - assert.Equal(t, v1beta1.FlinkApplicationRecovering, app.Status.Phase) + assert.Equal(t, v1beta2.FlinkApplicationRecovering, app.Status.Phase) } func TestDeleteWhenCheckSavepointStatusFailing(t *testing.T) { retryableErr := client.GetRetryableError(errors.New("blah"), "CheckSavepointStatus", "FAILED", 1) - app := v1beta1.FlinkApplication{ + app := v1beta2.FlinkApplication{ ObjectMeta: metav1.ObjectMeta{ Name: "test-app", Namespace: "flink", }, - Spec: v1beta1.FlinkApplicationSpec{ + Spec: v1beta2.FlinkApplicationSpec{ JarName: "job.jar", Parallelism: 5, EntryClass: "com.my.Class", ProgramArgs: "--test", }, - Status: v1beta1.FlinkApplicationStatus{ - Phase: v1beta1.FlinkApplicationSavepointing, + Status: v1beta2.FlinkApplicationStatus{ + Phase: v1beta2.FlinkApplicationSavepointing, DeployHash: "appHash", - LastSeenError: retryableErr.(*v1beta1.FlinkApplicationError), + LastSeenError: retryableErr.(*v1beta2.FlinkApplicationError), SavepointTriggerID: "trigger", }, } @@ -1321,10 +1321,10 @@ func TestDeleteWhenCheckSavepointStatusFailing(t *testing.T) { stateMachineForTest := getTestStateMachine() mockFlinkController := stateMachineForTest.flinkController.(*mock.FlinkController) - mockFlinkController.GetSavepointStatusFunc = func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (*client.SavepointResponse, error) { - return nil, retryableErr.(*v1beta1.FlinkApplicationError) + mockFlinkController.GetSavepointStatusFunc = func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (*client.SavepointResponse, error) { + return nil, retryableErr.(*v1beta2.FlinkApplicationError) } - mockFlinkController.CancelWithSavepointFunc = func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (s string, e error) { + mockFlinkController.CancelWithSavepointFunc = func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (s string, e error) { return "triggerId", nil } mockRetryHandler := stateMachineForTest.retryHandler.(*mock.RetryHandler) @@ -1336,14 +1336,14 @@ func TestDeleteWhenCheckSavepointStatusFailing(t *testing.T) { } err := stateMachineForTest.Handle(context.Background(), &app) assert.NotNil(t, err) - assert.Equal(t, v1beta1.FlinkApplicationSavepointing, app.Status.Phase) + assert.Equal(t, v1beta2.FlinkApplicationSavepointing, app.Status.Phase) assert.NotNil(t, app.Status.LastSeenError) // Try to force delete the app while it's in a savepointing state (with errors) // We should handle the delete here - app.Status.Phase = v1beta1.FlinkApplicationDeleting - app.Spec.DeleteMode = v1beta1.DeleteModeForceCancel + app.Status.Phase = v1beta2.FlinkApplicationDeleting + app.Spec.DeleteMode = v1beta2.DeleteModeForceCancel - mockFlinkController.GetJobForApplicationFunc = func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (*client.FlinkJobOverview, error) { + mockFlinkController.GetJobForApplicationFunc = func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (*client.FlinkJobOverview, error) { assert.Equal(t, "appHash", hash) return &client.FlinkJobOverview{ JobID: "jobID", @@ -1351,7 +1351,7 @@ func TestDeleteWhenCheckSavepointStatusFailing(t *testing.T) { }, nil } - mockFlinkController.ForceCancelFunc = func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) error { + mockFlinkController.ForceCancelFunc = func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) error { return nil } err = stateMachineForTest.Handle(context.Background(), &app) diff --git a/tmp/codegen/update-generated.sh b/tmp/codegen/update-generated.sh index 1bbdf169..2f8d9286 100755 --- a/tmp/codegen/update-generated.sh +++ b/tmp/codegen/update-generated.sh @@ -8,5 +8,5 @@ vendor/k8s.io/code-generator/generate-groups.sh \ deepcopy,client \ github.com/lyft/flinkk8soperator/pkg/client \ github.com/lyft/flinkk8soperator/pkg/apis \ -app:v1beta1 \ +app:v1beta2 \ --go-header-file "./tmp/codegen/boilerplate.go.txt" From cd4405740c1fe117443bcdcfb2641ae79e2e9248 Mon Sep 17 00:00:00 2001 From: glaksh100 Date: Sat, 7 Mar 2020 17:58:01 -0800 Subject: [PATCH 13/41] Update CRD to v1beta2 --- integ/checkpoint_failure_test.go | 6 ++--- integ/simple_test.go | 44 ++++++++++++++++---------------- integ/utils/utils.go | 12 ++++----- tmp/codegen/update-generated.sh | 1 + 4 files changed, 32 insertions(+), 31 deletions(-) diff --git a/integ/checkpoint_failure_test.go b/integ/checkpoint_failure_test.go index 3b4cf4a7..5d784a84 100644 --- a/integ/checkpoint_failure_test.go +++ b/integ/checkpoint_failure_test.go @@ -6,7 +6,7 @@ import ( "os" "time" - "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta1" + "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta2" "github.com/prometheus/common/log" . "gopkg.in/check.v1" v1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -27,7 +27,7 @@ func failingJobTest(s *IntegSuite, c *C, testName string, causeFailure func()) { // Cause it to fail causeFailure() - c.Assert(s.Util.WaitForPhase(config.Name, v1beta1.FlinkApplicationRunning, v1beta1.FlinkApplicationDeployFailed), IsNil) + c.Assert(s.Util.WaitForPhase(config.Name, v1beta2.FlinkApplicationRunning, v1beta2.FlinkApplicationDeployFailed), IsNil) // wait a bit for it to start failing time.Sleep(5 * time.Second) @@ -40,7 +40,7 @@ func failingJobTest(s *IntegSuite, c *C, testName string, causeFailure func()) { c.Assert(err, IsNil) // because the checkpoint will fail, the app should move to deploy failed - c.Assert(s.Util.WaitForPhase(config.Name, v1beta1.FlinkApplicationDeployFailed), IsNil) + c.Assert(s.Util.WaitForPhase(config.Name, v1beta2.FlinkApplicationDeployFailed), IsNil) // And the job should not have been updated newApp, err := s.Util.GetFlinkApplication(config.Name) diff --git a/integ/simple_test.go b/integ/simple_test.go index 4e86ff60..daf15e83 100644 --- a/integ/simple_test.go +++ b/integ/simple_test.go @@ -7,7 +7,7 @@ import ( "os" "time" - "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta1" + "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta2" "github.com/lyft/flinkk8soperator/pkg/controller/flink/client" "github.com/prometheus/common/log" . "gopkg.in/check.v1" @@ -17,12 +17,12 @@ import ( const NewImage = "lyft/operator-test-app:b1b3cb8e8f98bd41f44f9c89f8462ce255e0d13f.2" -func updateAndValidate(c *C, s *IntegSuite, name string, updateFn func(app *v1beta1.FlinkApplication), failurePhase v1beta1.FlinkApplicationPhase) *v1beta1.FlinkApplication { +func updateAndValidate(c *C, s *IntegSuite, name string, updateFn func(app *v1beta2.FlinkApplication), failurePhase v1beta2.FlinkApplicationPhase) *v1beta2.FlinkApplication { app, err := s.Util.Update(name, updateFn) c.Assert(err, IsNil) - c.Assert(s.Util.WaitForPhase(name, v1beta1.FlinkApplicationSavepointing, failurePhase), IsNil) - c.Assert(s.Util.WaitForPhase(name, v1beta1.FlinkApplicationRunning, failurePhase), IsNil) + c.Assert(s.Util.WaitForPhase(name, v1beta2.FlinkApplicationSavepointing, failurePhase), IsNil) + c.Assert(s.Util.WaitForPhase(name, v1beta2.FlinkApplicationRunning, failurePhase), IsNil) c.Assert(s.Util.WaitForAllTasksRunning(name), IsNil) // check that it really updated @@ -72,7 +72,7 @@ func (s *IntegSuite) TestSimple(c *C) { c.Assert(s.Util.CreateFlinkApplication(config), IsNil, Commentf("Failed to create flink application")) - c.Assert(s.Util.WaitForPhase(config.Name, v1beta1.FlinkApplicationRunning, v1beta1.FlinkApplicationDeployFailed), IsNil) + c.Assert(s.Util.WaitForPhase(config.Name, v1beta2.FlinkApplicationRunning, v1beta2.FlinkApplicationDeployFailed), IsNil) c.Assert(s.Util.WaitForAllTasksRunning(config.Name), IsNil) pods, err := s.Util.KubeClient.CoreV1().Pods(s.Util.Namespace.Name). @@ -86,9 +86,9 @@ func (s *IntegSuite) TestSimple(c *C) { log.Info("Application started successfully") // test updating the app with a new image - newApp := updateAndValidate(c, s, config.Name, func(app *v1beta1.FlinkApplication) { + newApp := updateAndValidate(c, s, config.Name, func(app *v1beta2.FlinkApplication) { app.Spec.Image = NewImage - }, v1beta1.FlinkApplicationDeployFailed) + }, v1beta2.FlinkApplicationDeployFailed) // check that the pods have the new image c.Assert(newApp.Spec.Image, Equals, NewImage) pods, err = s.Util.KubeClient.CoreV1().Pods(s.Util.Namespace.Name). @@ -100,9 +100,9 @@ func (s *IntegSuite) TestSimple(c *C) { } // test updating the app with a config change - newApp = updateAndValidate(c, s, config.Name, func(app *v1beta1.FlinkApplication) { + newApp = updateAndValidate(c, s, config.Name, func(app *v1beta2.FlinkApplication) { app.Spec.FlinkConfig["akka.client.timeout"] = "23 s" - }, v1beta1.FlinkApplicationDeployFailed) + }, v1beta2.FlinkApplicationDeployFailed) // validate the config has been applied res, err := s.Util.FlinkAPIGet(newApp, "/jobmanager/config") c.Assert(err, IsNil) @@ -122,7 +122,7 @@ func (s *IntegSuite) TestSimple(c *C) { { log.Info("Testing rollback") - newApp, err := s.Util.Update(config.Name, func(app *v1beta1.FlinkApplication) { + newApp, err := s.Util.Update(config.Name, func(app *v1beta2.FlinkApplication) { app.Spec.JarName = "nonexistent.jar" // this shouldn't be needed after STRMCMP-473 is fixed app.Spec.RestartNonce = "rollback" @@ -130,9 +130,9 @@ func (s *IntegSuite) TestSimple(c *C) { c.Assert(err, IsNil) - c.Assert(s.Util.WaitForPhase(newApp.Name, v1beta1.FlinkApplicationSavepointing, ""), IsNil) + c.Assert(s.Util.WaitForPhase(newApp.Name, v1beta2.FlinkApplicationSavepointing, ""), IsNil) // we should end up in the DeployFailed phase - c.Assert(s.Util.WaitForPhase(newApp.Name, v1beta1.FlinkApplicationDeployFailed, ""), IsNil) + c.Assert(s.Util.WaitForPhase(newApp.Name, v1beta2.FlinkApplicationDeployFailed, ""), IsNil) log.Info("Job is in deploy failed, waiting for tasks to start") @@ -159,7 +159,7 @@ func (s *IntegSuite) TestSimple(c *C) { log.Info("Attempting to roll forward") // and we should be able to roll forward by resubmitting with a fixed config - updateAndValidate(c, s, config.Name, func(app *v1beta1.FlinkApplication) { + updateAndValidate(c, s, config.Name, func(app *v1beta2.FlinkApplication) { app.Spec.JarName = config.Spec.JarName app.Spec.RestartNonce = "rollback2" }, "") @@ -169,12 +169,12 @@ func (s *IntegSuite) TestSimple(c *C) { { log.Info("Testing force rollback") - newApp, err := s.Util.Update(config.Name, func(app *v1beta1.FlinkApplication) { + newApp, err := s.Util.Update(config.Name, func(app *v1beta2.FlinkApplication) { app.Spec.Image = "lyft/badimage:latest" }) c.Assert(err, IsNil) - c.Assert(s.Util.WaitForPhase(newApp.Name, v1beta1.FlinkApplicationClusterStarting, ""), IsNil) + c.Assert(s.Util.WaitForPhase(newApp.Name, v1beta2.FlinkApplicationClusterStarting, ""), IsNil) // User realizes error and cancels the deploy log.Infof("Cancelling deploy...") @@ -186,17 +186,17 @@ func (s *IntegSuite) TestSimple(c *C) { c.Assert(err, IsNil) // we should end up in the DeployFailed phase - c.Assert(s.Util.WaitForPhase(newApp.Name, v1beta1.FlinkApplicationDeployFailed, ""), IsNil) + c.Assert(s.Util.WaitForPhase(newApp.Name, v1beta2.FlinkApplicationDeployFailed, ""), IsNil) c.Assert(newApp.Spec.ForceRollback, Equals, true) log.Info("User cancelled deploy. Job is in deploy failed, waiting for tasks to start") // but the job should still be running - c.Assert(newApp.Status.ApplicationStatus[s.Util.GetCurrentStatusIndex(newApp)].JobStatus.State, Equals, v1beta1.Running) + c.Assert(newApp.Status.ApplicationStatus[s.Util.GetCurrentStatusIndex(newApp)].JobStatus.State, Equals, v1beta2.Running) log.Info("Attempting to roll forward with fix") // Fixing update // and we should be able to roll forward by resubmitting with a fixed config - updateAndValidate(c, s, config.Name, func(app *v1beta1.FlinkApplication) { + updateAndValidate(c, s, config.Name, func(app *v1beta2.FlinkApplication) { app.Spec.Image = NewImage app.Spec.RestartNonce = "rollback3" app.Spec.ForceRollback = false @@ -207,7 +207,7 @@ func (s *IntegSuite) TestSimple(c *C) { c.Assert(s.Util.FlinkApps().Delete(config.Name, &v1.DeleteOptions{}), IsNil) // validate that a savepoint was taken and the job was cancelled - var app *v1beta1.FlinkApplication + var app *v1beta2.FlinkApplication for { app, err = s.Util.GetFlinkApplication(config.Name) c.Assert(err, IsNil) @@ -275,7 +275,7 @@ func (s *IntegSuite) TestRecovery(c *C) { log.Info("Application Created") // wait for it to be running - c.Assert(s.Util.WaitForPhase(config.Name, v1beta1.FlinkApplicationRunning, v1beta1.FlinkApplicationDeployFailed), IsNil) + c.Assert(s.Util.WaitForPhase(config.Name, v1beta2.FlinkApplicationRunning, v1beta2.FlinkApplicationDeployFailed), IsNil) c.Assert(s.Util.WaitForAllTasksRunning(config.Name), IsNil) log.Info("Application running") @@ -313,7 +313,7 @@ func (s *IntegSuite) TestRecovery(c *C) { time.Sleep(1 * time.Second) // try to update the job - app, err = s.Util.Update(config.Name, func(app *v1beta1.FlinkApplication) { + app, err = s.Util.Update(config.Name, func(app *v1beta2.FlinkApplication) { app.Spec.Image = NewImage }) c.Assert(err, IsNil) @@ -331,7 +331,7 @@ func (s *IntegSuite) TestRecovery(c *C) { } c.Assert(err, IsNil) - c.Assert(s.Util.WaitForPhase(config.Name, v1beta1.FlinkApplicationRunning, v1beta1.FlinkApplicationDeployFailed), IsNil) + c.Assert(s.Util.WaitForPhase(config.Name, v1beta2.FlinkApplicationRunning, v1beta2.FlinkApplicationDeployFailed), IsNil) // stop it from failing c.Assert(os.Remove(s.Util.CheckpointDir+"/fail"), IsNil) diff --git a/integ/utils/utils.go b/integ/utils/utils.go index fdf2150f..3d2f92c3 100644 --- a/integ/utils/utils.go +++ b/integ/utils/utils.go @@ -13,13 +13,13 @@ import ( errors2 "k8s.io/apimachinery/pkg/api/errors" "github.com/go-resty/resty" - flinkapp "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta1" + flinkapp "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta2" clientset "github.com/lyft/flinkk8soperator/pkg/client/clientset/versioned" - client "github.com/lyft/flinkk8soperator/pkg/client/clientset/versioned/typed/app/v1beta1" + client "github.com/lyft/flinkk8soperator/pkg/client/clientset/versioned/typed/app/v1beta2" "github.com/prometheus/common/log" appsv1 "k8s.io/api/apps/v1" v1 "k8s.io/api/core/v1" - "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1beta1" + "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1beta2" apiextensionsClientset "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -122,7 +122,7 @@ func (f *TestUtil) CreateCRD() error { return err } - crd := v1beta1.CustomResourceDefinition{} + crd := v1beta2.CustomResourceDefinition{} err = yaml.NewYAMLOrJSONDecoder(file, 1024).Decode(&crd) if err != nil { return err @@ -130,7 +130,7 @@ func (f *TestUtil) CreateCRD() error { crd.Namespace = f.Namespace.Name - _, err = f.APIExtensionsClient.ApiextensionsV1beta1().CustomResourceDefinitions().Create(&crd) + _, err = f.APIExtensionsClient.Apiextensionsv1beta2().CustomResourceDefinitions().Create(&crd) if err != nil { return err } @@ -348,7 +348,7 @@ func (f *TestUtil) ReadFlinkApplication(path string) (*flinkapp.FlinkApplication } func (f *TestUtil) FlinkApps() client.FlinkApplicationInterface { - return f.FlinkApplicationClient.FlinkV1beta1().FlinkApplications(f.Namespace.Name) + return f.FlinkApplicationClient.Flinkv1beta2().FlinkApplications(f.Namespace.Name) } func (f *TestUtil) CreateFlinkApplication(application *flinkapp.FlinkApplication) error { diff --git a/tmp/codegen/update-generated.sh b/tmp/codegen/update-generated.sh index 2f8d9286..65fc6ace 100755 --- a/tmp/codegen/update-generated.sh +++ b/tmp/codegen/update-generated.sh @@ -10,3 +10,4 @@ github.com/lyft/flinkk8soperator/pkg/client \ github.com/lyft/flinkk8soperator/pkg/apis \ app:v1beta2 \ --go-header-file "./tmp/codegen/boilerplate.go.txt" + From 1fdf5a1a14cd0b6a3bd91e396b7ce6ccbcde404c Mon Sep 17 00:00:00 2001 From: glaksh100 Date: Mon, 9 Mar 2020 15:12:57 -0700 Subject: [PATCH 14/41] Fix CRD update issues --- integ/utils/utils.go | 8 +- pkg/client/clientset/versioned/clientset.go | 14 ++ .../versioned/fake/clientset_generated.go | 7 + .../clientset/versioned/fake/register.go | 2 + .../clientset/versioned/scheme/register.go | 2 + .../typed/app/v1alpha1/app_client.go | 74 -------- .../versioned/typed/app/v1alpha1/doc.go | 4 - .../versioned/typed/app/v1alpha1/fake/doc.go | 4 - .../app/v1alpha1/fake/fake_app_client.go | 24 --- .../v1alpha1/fake/fake_flinkapplication.go | 112 ------------- .../typed/app/v1alpha1/flinkapplication.go | 158 ------------------ .../typed/app/v1alpha1/generated_expansion.go | 5 - pkg/controller/flink/ingress.go | 20 +-- .../flink/job_manager_controller_test.go | 8 +- .../mock/mock_task_manager_controller.go | 7 +- tmp/codegen/update-generated.sh | 2 +- 16 files changed, 47 insertions(+), 404 deletions(-) delete mode 100644 pkg/client/clientset/versioned/typed/app/v1alpha1/app_client.go delete mode 100644 pkg/client/clientset/versioned/typed/app/v1alpha1/doc.go delete mode 100644 pkg/client/clientset/versioned/typed/app/v1alpha1/fake/doc.go delete mode 100644 pkg/client/clientset/versioned/typed/app/v1alpha1/fake/fake_app_client.go delete mode 100644 pkg/client/clientset/versioned/typed/app/v1alpha1/fake/fake_flinkapplication.go delete mode 100644 pkg/client/clientset/versioned/typed/app/v1alpha1/flinkapplication.go delete mode 100644 pkg/client/clientset/versioned/typed/app/v1alpha1/generated_expansion.go diff --git a/integ/utils/utils.go b/integ/utils/utils.go index 3d2f92c3..8b95d4d2 100644 --- a/integ/utils/utils.go +++ b/integ/utils/utils.go @@ -19,7 +19,7 @@ import ( "github.com/prometheus/common/log" appsv1 "k8s.io/api/apps/v1" v1 "k8s.io/api/core/v1" - "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1beta2" + "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1beta1" apiextensionsClientset "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -122,7 +122,7 @@ func (f *TestUtil) CreateCRD() error { return err } - crd := v1beta2.CustomResourceDefinition{} + crd := v1beta1.CustomResourceDefinition{} err = yaml.NewYAMLOrJSONDecoder(file, 1024).Decode(&crd) if err != nil { return err @@ -130,7 +130,7 @@ func (f *TestUtil) CreateCRD() error { crd.Namespace = f.Namespace.Name - _, err = f.APIExtensionsClient.Apiextensionsv1beta2().CustomResourceDefinitions().Create(&crd) + _, err = f.APIExtensionsClient.ApiextensionsV1beta1().CustomResourceDefinitions().Create(&crd) if err != nil { return err } @@ -348,7 +348,7 @@ func (f *TestUtil) ReadFlinkApplication(path string) (*flinkapp.FlinkApplication } func (f *TestUtil) FlinkApps() client.FlinkApplicationInterface { - return f.FlinkApplicationClient.Flinkv1beta2().FlinkApplications(f.Namespace.Name) + return f.FlinkApplicationClient.FlinkV1beta2().FlinkApplications(f.Namespace.Name) } func (f *TestUtil) CreateFlinkApplication(application *flinkapp.FlinkApplication) error { diff --git a/pkg/client/clientset/versioned/clientset.go b/pkg/client/clientset/versioned/clientset.go index ba4f7de7..8f818b29 100644 --- a/pkg/client/clientset/versioned/clientset.go +++ b/pkg/client/clientset/versioned/clientset.go @@ -3,6 +3,7 @@ package versioned import ( + flinkv1beta1 "github.com/lyft/flinkk8soperator/pkg/client/clientset/versioned/typed/app/v1beta1" flinkv1beta2 "github.com/lyft/flinkk8soperator/pkg/client/clientset/versioned/typed/app/v1beta2" discovery "k8s.io/client-go/discovery" rest "k8s.io/client-go/rest" @@ -11,6 +12,7 @@ import ( type Interface interface { Discovery() discovery.DiscoveryInterface + FlinkV1beta1() flinkv1beta1.FlinkV1beta1Interface FlinkV1beta2() flinkv1beta2.FlinkV1beta2Interface } @@ -18,9 +20,15 @@ type Interface interface { // version included in a Clientset. type Clientset struct { *discovery.DiscoveryClient + flinkV1beta1 *flinkv1beta1.FlinkV1beta1Client flinkV1beta2 *flinkv1beta2.FlinkV1beta2Client } +// FlinkV1beta1 retrieves the FlinkV1beta1Client +func (c *Clientset) FlinkV1beta1() flinkv1beta1.FlinkV1beta1Interface { + return c.flinkV1beta1 +} + // FlinkV1beta2 retrieves the FlinkV1beta2Client func (c *Clientset) FlinkV1beta2() flinkv1beta2.FlinkV1beta2Interface { return c.flinkV1beta2 @@ -42,6 +50,10 @@ func NewForConfig(c *rest.Config) (*Clientset, error) { } var cs Clientset var err error + cs.flinkV1beta1, err = flinkv1beta1.NewForConfig(&configShallowCopy) + if err != nil { + return nil, err + } cs.flinkV1beta2, err = flinkv1beta2.NewForConfig(&configShallowCopy) if err != nil { return nil, err @@ -58,6 +70,7 @@ func NewForConfig(c *rest.Config) (*Clientset, error) { // panics if there is an error in the config. func NewForConfigOrDie(c *rest.Config) *Clientset { var cs Clientset + cs.flinkV1beta1 = flinkv1beta1.NewForConfigOrDie(c) cs.flinkV1beta2 = flinkv1beta2.NewForConfigOrDie(c) cs.DiscoveryClient = discovery.NewDiscoveryClientForConfigOrDie(c) @@ -67,6 +80,7 @@ func NewForConfigOrDie(c *rest.Config) *Clientset { // New creates a new Clientset for the given RESTClient. func New(c rest.Interface) *Clientset { var cs Clientset + cs.flinkV1beta1 = flinkv1beta1.New(c) cs.flinkV1beta2 = flinkv1beta2.New(c) cs.DiscoveryClient = discovery.NewDiscoveryClient(c) diff --git a/pkg/client/clientset/versioned/fake/clientset_generated.go b/pkg/client/clientset/versioned/fake/clientset_generated.go index 200e43a5..68b1013a 100644 --- a/pkg/client/clientset/versioned/fake/clientset_generated.go +++ b/pkg/client/clientset/versioned/fake/clientset_generated.go @@ -4,6 +4,8 @@ package fake import ( clientset "github.com/lyft/flinkk8soperator/pkg/client/clientset/versioned" + flinkv1beta1 "github.com/lyft/flinkk8soperator/pkg/client/clientset/versioned/typed/app/v1beta1" + fakeflinkv1beta1 "github.com/lyft/flinkk8soperator/pkg/client/clientset/versioned/typed/app/v1beta1/fake" flinkv1beta2 "github.com/lyft/flinkk8soperator/pkg/client/clientset/versioned/typed/app/v1beta2" fakeflinkv1beta2 "github.com/lyft/flinkk8soperator/pkg/client/clientset/versioned/typed/app/v1beta2/fake" "k8s.io/apimachinery/pkg/runtime" @@ -55,6 +57,11 @@ func (c *Clientset) Discovery() discovery.DiscoveryInterface { var _ clientset.Interface = &Clientset{} +// FlinkV1beta1 retrieves the FlinkV1beta1Client +func (c *Clientset) FlinkV1beta1() flinkv1beta1.FlinkV1beta1Interface { + return &fakeflinkv1beta1.FakeFlinkV1beta1{Fake: &c.Fake} +} + // FlinkV1beta2 retrieves the FlinkV1beta2Client func (c *Clientset) FlinkV1beta2() flinkv1beta2.FlinkV1beta2Interface { return &fakeflinkv1beta2.FakeFlinkV1beta2{Fake: &c.Fake} diff --git a/pkg/client/clientset/versioned/fake/register.go b/pkg/client/clientset/versioned/fake/register.go index e84b7913..86b7a18d 100644 --- a/pkg/client/clientset/versioned/fake/register.go +++ b/pkg/client/clientset/versioned/fake/register.go @@ -3,6 +3,7 @@ package fake import ( + flinkv1beta1 "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta1" flinkv1beta2 "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta2" v1 "k8s.io/apimachinery/pkg/apis/meta/v1" runtime "k8s.io/apimachinery/pkg/runtime" @@ -15,6 +16,7 @@ var scheme = runtime.NewScheme() var codecs = serializer.NewCodecFactory(scheme) var parameterCodec = runtime.NewParameterCodec(scheme) var localSchemeBuilder = runtime.SchemeBuilder{ + flinkv1beta1.AddToScheme, flinkv1beta2.AddToScheme, } diff --git a/pkg/client/clientset/versioned/scheme/register.go b/pkg/client/clientset/versioned/scheme/register.go index ceb19eec..ec838418 100644 --- a/pkg/client/clientset/versioned/scheme/register.go +++ b/pkg/client/clientset/versioned/scheme/register.go @@ -3,6 +3,7 @@ package scheme import ( + flinkv1beta1 "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta1" flinkv1beta2 "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta2" v1 "k8s.io/apimachinery/pkg/apis/meta/v1" runtime "k8s.io/apimachinery/pkg/runtime" @@ -15,6 +16,7 @@ var Scheme = runtime.NewScheme() var Codecs = serializer.NewCodecFactory(Scheme) var ParameterCodec = runtime.NewParameterCodec(Scheme) var localSchemeBuilder = runtime.SchemeBuilder{ + flinkv1beta1.AddToScheme, flinkv1beta2.AddToScheme, } diff --git a/pkg/client/clientset/versioned/typed/app/v1alpha1/app_client.go b/pkg/client/clientset/versioned/typed/app/v1alpha1/app_client.go deleted file mode 100644 index 0b5e3fff..00000000 --- a/pkg/client/clientset/versioned/typed/app/v1alpha1/app_client.go +++ /dev/null @@ -1,74 +0,0 @@ -// Code generated by client-gen. DO NOT EDIT. - -package v1alpha1 - -import ( - v1alpha1 "github.com/lyft/flinkk8soperator/pkg/apis/app/v1alpha1" - "github.com/lyft/flinkk8soperator/pkg/client/clientset/versioned/scheme" - serializer "k8s.io/apimachinery/pkg/runtime/serializer" - rest "k8s.io/client-go/rest" -) - -type FlinkV1alpha1Interface interface { - RESTClient() rest.Interface - FlinkApplicationsGetter -} - -// FlinkV1alpha1Client is used to interact with features provided by the flink.k8s.io group. -type FlinkV1alpha1Client struct { - restClient rest.Interface -} - -func (c *FlinkV1alpha1Client) FlinkApplications(namespace string) FlinkApplicationInterface { - return newFlinkApplications(c, namespace) -} - -// NewForConfig creates a new FlinkV1alpha1Client for the given config. -func NewForConfig(c *rest.Config) (*FlinkV1alpha1Client, error) { - config := *c - if err := setConfigDefaults(&config); err != nil { - return nil, err - } - client, err := rest.RESTClientFor(&config) - if err != nil { - return nil, err - } - return &FlinkV1alpha1Client{client}, nil -} - -// NewForConfigOrDie creates a new FlinkV1alpha1Client for the given config and -// panics if there is an error in the config. -func NewForConfigOrDie(c *rest.Config) *FlinkV1alpha1Client { - client, err := NewForConfig(c) - if err != nil { - panic(err) - } - return client -} - -// New creates a new FlinkV1alpha1Client for the given RESTClient. -func New(c rest.Interface) *FlinkV1alpha1Client { - return &FlinkV1alpha1Client{c} -} - -func setConfigDefaults(config *rest.Config) error { - gv := v1alpha1.SchemeGroupVersion - config.GroupVersion = &gv - config.APIPath = "/apis" - config.NegotiatedSerializer = serializer.DirectCodecFactory{CodecFactory: scheme.Codecs} - - if config.UserAgent == "" { - config.UserAgent = rest.DefaultKubernetesUserAgent() - } - - return nil -} - -// RESTClient returns a RESTClient that is used to communicate -// with API server by this client implementation. -func (c *FlinkV1alpha1Client) RESTClient() rest.Interface { - if c == nil { - return nil - } - return c.restClient -} diff --git a/pkg/client/clientset/versioned/typed/app/v1alpha1/doc.go b/pkg/client/clientset/versioned/typed/app/v1alpha1/doc.go deleted file mode 100644 index 93a7ca4e..00000000 --- a/pkg/client/clientset/versioned/typed/app/v1alpha1/doc.go +++ /dev/null @@ -1,4 +0,0 @@ -// Code generated by client-gen. DO NOT EDIT. - -// This package has the automatically generated typed clients. -package v1alpha1 diff --git a/pkg/client/clientset/versioned/typed/app/v1alpha1/fake/doc.go b/pkg/client/clientset/versioned/typed/app/v1alpha1/fake/doc.go deleted file mode 100644 index 2b5ba4c8..00000000 --- a/pkg/client/clientset/versioned/typed/app/v1alpha1/fake/doc.go +++ /dev/null @@ -1,4 +0,0 @@ -// Code generated by client-gen. DO NOT EDIT. - -// Package fake has the automatically generated clients. -package fake diff --git a/pkg/client/clientset/versioned/typed/app/v1alpha1/fake/fake_app_client.go b/pkg/client/clientset/versioned/typed/app/v1alpha1/fake/fake_app_client.go deleted file mode 100644 index 017abeb5..00000000 --- a/pkg/client/clientset/versioned/typed/app/v1alpha1/fake/fake_app_client.go +++ /dev/null @@ -1,24 +0,0 @@ -// Code generated by client-gen. DO NOT EDIT. - -package fake - -import ( - v1alpha1 "github.com/lyft/flinkk8soperator/pkg/client/clientset/versioned/typed/app/v1alpha1" - rest "k8s.io/client-go/rest" - testing "k8s.io/client-go/testing" -) - -type FakeFlinkV1alpha1 struct { - *testing.Fake -} - -func (c *FakeFlinkV1alpha1) FlinkApplications(namespace string) v1alpha1.FlinkApplicationInterface { - return &FakeFlinkApplications{c, namespace} -} - -// RESTClient returns a RESTClient that is used to communicate -// with API server by this client implementation. -func (c *FakeFlinkV1alpha1) RESTClient() rest.Interface { - var ret *rest.RESTClient - return ret -} diff --git a/pkg/client/clientset/versioned/typed/app/v1alpha1/fake/fake_flinkapplication.go b/pkg/client/clientset/versioned/typed/app/v1alpha1/fake/fake_flinkapplication.go deleted file mode 100644 index 73435df3..00000000 --- a/pkg/client/clientset/versioned/typed/app/v1alpha1/fake/fake_flinkapplication.go +++ /dev/null @@ -1,112 +0,0 @@ -// Code generated by client-gen. DO NOT EDIT. - -package fake - -import ( - v1alpha1 "github.com/lyft/flinkk8soperator/pkg/apis/app/v1alpha1" - v1 "k8s.io/apimachinery/pkg/apis/meta/v1" - labels "k8s.io/apimachinery/pkg/labels" - schema "k8s.io/apimachinery/pkg/runtime/schema" - types "k8s.io/apimachinery/pkg/types" - watch "k8s.io/apimachinery/pkg/watch" - testing "k8s.io/client-go/testing" -) - -// FakeFlinkApplications implements FlinkApplicationInterface -type FakeFlinkApplications struct { - Fake *FakeFlinkV1alpha1 - ns string -} - -var flinkapplicationsResource = schema.GroupVersionResource{Group: "flink.k8s.io", Version: "v1alpha1", Resource: "flinkapplications"} - -var flinkapplicationsKind = schema.GroupVersionKind{Group: "flink.k8s.io", Version: "v1alpha1", Kind: "FlinkApplication"} - -// Get takes name of the flinkApplication, and returns the corresponding flinkApplication object, and an error if there is any. -func (c *FakeFlinkApplications) Get(name string, options v1.GetOptions) (result *v1alpha1.FlinkApplication, err error) { - obj, err := c.Fake. - Invokes(testing.NewGetAction(flinkapplicationsResource, c.ns, name), &v1alpha1.FlinkApplication{}) - - if obj == nil { - return nil, err - } - return obj.(*v1alpha1.FlinkApplication), err -} - -// List takes label and field selectors, and returns the list of FlinkApplications that match those selectors. -func (c *FakeFlinkApplications) List(opts v1.ListOptions) (result *v1alpha1.FlinkApplicationList, err error) { - obj, err := c.Fake. - Invokes(testing.NewListAction(flinkapplicationsResource, flinkapplicationsKind, c.ns, opts), &v1alpha1.FlinkApplicationList{}) - - if obj == nil { - return nil, err - } - - label, _, _ := testing.ExtractFromListOptions(opts) - if label == nil { - label = labels.Everything() - } - list := &v1alpha1.FlinkApplicationList{ListMeta: obj.(*v1alpha1.FlinkApplicationList).ListMeta} - for _, item := range obj.(*v1alpha1.FlinkApplicationList).Items { - if label.Matches(labels.Set(item.Labels)) { - list.Items = append(list.Items, item) - } - } - return list, err -} - -// Watch returns a watch.Interface that watches the requested flinkApplications. -func (c *FakeFlinkApplications) Watch(opts v1.ListOptions) (watch.Interface, error) { - return c.Fake. - InvokesWatch(testing.NewWatchAction(flinkapplicationsResource, c.ns, opts)) - -} - -// Create takes the representation of a flinkApplication and creates it. Returns the server's representation of the flinkApplication, and an error, if there is any. -func (c *FakeFlinkApplications) Create(flinkApplication *v1alpha1.FlinkApplication) (result *v1alpha1.FlinkApplication, err error) { - obj, err := c.Fake. - Invokes(testing.NewCreateAction(flinkapplicationsResource, c.ns, flinkApplication), &v1alpha1.FlinkApplication{}) - - if obj == nil { - return nil, err - } - return obj.(*v1alpha1.FlinkApplication), err -} - -// Update takes the representation of a flinkApplication and updates it. Returns the server's representation of the flinkApplication, and an error, if there is any. -func (c *FakeFlinkApplications) Update(flinkApplication *v1alpha1.FlinkApplication) (result *v1alpha1.FlinkApplication, err error) { - obj, err := c.Fake. - Invokes(testing.NewUpdateAction(flinkapplicationsResource, c.ns, flinkApplication), &v1alpha1.FlinkApplication{}) - - if obj == nil { - return nil, err - } - return obj.(*v1alpha1.FlinkApplication), err -} - -// Delete takes name of the flinkApplication and deletes it. Returns an error if one occurs. -func (c *FakeFlinkApplications) Delete(name string, options *v1.DeleteOptions) error { - _, err := c.Fake. - Invokes(testing.NewDeleteAction(flinkapplicationsResource, c.ns, name), &v1alpha1.FlinkApplication{}) - - return err -} - -// DeleteCollection deletes a collection of objects. -func (c *FakeFlinkApplications) DeleteCollection(options *v1.DeleteOptions, listOptions v1.ListOptions) error { - action := testing.NewDeleteCollectionAction(flinkapplicationsResource, c.ns, listOptions) - - _, err := c.Fake.Invokes(action, &v1alpha1.FlinkApplicationList{}) - return err -} - -// Patch applies the patch and returns the patched flinkApplication. -func (c *FakeFlinkApplications) Patch(name string, pt types.PatchType, data []byte, subresources ...string) (result *v1alpha1.FlinkApplication, err error) { - obj, err := c.Fake. - Invokes(testing.NewPatchSubresourceAction(flinkapplicationsResource, c.ns, name, pt, data, subresources...), &v1alpha1.FlinkApplication{}) - - if obj == nil { - return nil, err - } - return obj.(*v1alpha1.FlinkApplication), err -} diff --git a/pkg/client/clientset/versioned/typed/app/v1alpha1/flinkapplication.go b/pkg/client/clientset/versioned/typed/app/v1alpha1/flinkapplication.go deleted file mode 100644 index 73a064a9..00000000 --- a/pkg/client/clientset/versioned/typed/app/v1alpha1/flinkapplication.go +++ /dev/null @@ -1,158 +0,0 @@ -// Code generated by client-gen. DO NOT EDIT. - -package v1alpha1 - -import ( - "time" - - v1alpha1 "github.com/lyft/flinkk8soperator/pkg/apis/app/v1alpha1" - scheme "github.com/lyft/flinkk8soperator/pkg/client/clientset/versioned/scheme" - v1 "k8s.io/apimachinery/pkg/apis/meta/v1" - types "k8s.io/apimachinery/pkg/types" - watch "k8s.io/apimachinery/pkg/watch" - rest "k8s.io/client-go/rest" -) - -// FlinkApplicationsGetter has a method to return a FlinkApplicationInterface. -// A group's client should implement this interface. -type FlinkApplicationsGetter interface { - FlinkApplications(namespace string) FlinkApplicationInterface -} - -// FlinkApplicationInterface has methods to work with FlinkApplication resources. -type FlinkApplicationInterface interface { - Create(*v1alpha1.FlinkApplication) (*v1alpha1.FlinkApplication, error) - Update(*v1alpha1.FlinkApplication) (*v1alpha1.FlinkApplication, error) - Delete(name string, options *v1.DeleteOptions) error - DeleteCollection(options *v1.DeleteOptions, listOptions v1.ListOptions) error - Get(name string, options v1.GetOptions) (*v1alpha1.FlinkApplication, error) - List(opts v1.ListOptions) (*v1alpha1.FlinkApplicationList, error) - Watch(opts v1.ListOptions) (watch.Interface, error) - Patch(name string, pt types.PatchType, data []byte, subresources ...string) (result *v1alpha1.FlinkApplication, err error) - FlinkApplicationExpansion -} - -// flinkApplications implements FlinkApplicationInterface -type flinkApplications struct { - client rest.Interface - ns string -} - -// newFlinkApplications returns a FlinkApplications -func newFlinkApplications(c *FlinkV1alpha1Client, namespace string) *flinkApplications { - return &flinkApplications{ - client: c.RESTClient(), - ns: namespace, - } -} - -// Get takes name of the flinkApplication, and returns the corresponding flinkApplication object, and an error if there is any. -func (c *flinkApplications) Get(name string, options v1.GetOptions) (result *v1alpha1.FlinkApplication, err error) { - result = &v1alpha1.FlinkApplication{} - err = c.client.Get(). - Namespace(c.ns). - Resource("flinkapplications"). - Name(name). - VersionedParams(&options, scheme.ParameterCodec). - Do(). - Into(result) - return -} - -// List takes label and field selectors, and returns the list of FlinkApplications that match those selectors. -func (c *flinkApplications) List(opts v1.ListOptions) (result *v1alpha1.FlinkApplicationList, err error) { - var timeout time.Duration - if opts.TimeoutSeconds != nil { - timeout = time.Duration(*opts.TimeoutSeconds) * time.Second - } - result = &v1alpha1.FlinkApplicationList{} - err = c.client.Get(). - Namespace(c.ns). - Resource("flinkapplications"). - VersionedParams(&opts, scheme.ParameterCodec). - Timeout(timeout). - Do(). - Into(result) - return -} - -// Watch returns a watch.Interface that watches the requested flinkApplications. -func (c *flinkApplications) Watch(opts v1.ListOptions) (watch.Interface, error) { - var timeout time.Duration - if opts.TimeoutSeconds != nil { - timeout = time.Duration(*opts.TimeoutSeconds) * time.Second - } - opts.Watch = true - return c.client.Get(). - Namespace(c.ns). - Resource("flinkapplications"). - VersionedParams(&opts, scheme.ParameterCodec). - Timeout(timeout). - Watch() -} - -// Create takes the representation of a flinkApplication and creates it. Returns the server's representation of the flinkApplication, and an error, if there is any. -func (c *flinkApplications) Create(flinkApplication *v1alpha1.FlinkApplication) (result *v1alpha1.FlinkApplication, err error) { - result = &v1alpha1.FlinkApplication{} - err = c.client.Post(). - Namespace(c.ns). - Resource("flinkapplications"). - Body(flinkApplication). - Do(). - Into(result) - return -} - -// Update takes the representation of a flinkApplication and updates it. Returns the server's representation of the flinkApplication, and an error, if there is any. -func (c *flinkApplications) Update(flinkApplication *v1alpha1.FlinkApplication) (result *v1alpha1.FlinkApplication, err error) { - result = &v1alpha1.FlinkApplication{} - err = c.client.Put(). - Namespace(c.ns). - Resource("flinkapplications"). - Name(flinkApplication.Name). - Body(flinkApplication). - Do(). - Into(result) - return -} - -// Delete takes name of the flinkApplication and deletes it. Returns an error if one occurs. -func (c *flinkApplications) Delete(name string, options *v1.DeleteOptions) error { - return c.client.Delete(). - Namespace(c.ns). - Resource("flinkapplications"). - Name(name). - Body(options). - Do(). - Error() -} - -// DeleteCollection deletes a collection of objects. -func (c *flinkApplications) DeleteCollection(options *v1.DeleteOptions, listOptions v1.ListOptions) error { - var timeout time.Duration - if listOptions.TimeoutSeconds != nil { - timeout = time.Duration(*listOptions.TimeoutSeconds) * time.Second - } - return c.client.Delete(). - Namespace(c.ns). - Resource("flinkapplications"). - VersionedParams(&listOptions, scheme.ParameterCodec). - Timeout(timeout). - Body(options). - Do(). - Error() -} - -// Patch applies the patch and returns the patched flinkApplication. -func (c *flinkApplications) Patch(name string, pt types.PatchType, data []byte, subresources ...string) (result *v1alpha1.FlinkApplication, err error) { - result = &v1alpha1.FlinkApplication{} - err = c.client.Patch(pt). - Namespace(c.ns). - Resource("flinkapplications"). - SubResource(subresources...). - Name(name). - Body(data). - Do(). - Into(result) - return -} diff --git a/pkg/client/clientset/versioned/typed/app/v1alpha1/generated_expansion.go b/pkg/client/clientset/versioned/typed/app/v1alpha1/generated_expansion.go deleted file mode 100644 index ab955a79..00000000 --- a/pkg/client/clientset/versioned/typed/app/v1alpha1/generated_expansion.go +++ /dev/null @@ -1,5 +0,0 @@ -// Code generated by client-gen. DO NOT EDIT. - -package v1alpha1 - -type FlinkApplicationExpansion interface{} diff --git a/pkg/controller/flink/ingress.go b/pkg/controller/flink/ingress.go index 2ab4e187..96001664 100644 --- a/pkg/controller/flink/ingress.go +++ b/pkg/controller/flink/ingress.go @@ -7,7 +7,7 @@ import ( "github.com/lyft/flinkk8soperator/pkg/controller/common" "github.com/lyft/flinkk8soperator/pkg/controller/config" "github.com/lyft/flinkk8soperator/pkg/controller/k8" - "k8s.io/api/extensions/v1beta2" + "k8s.io/api/extensions/v1beta1" v1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/intstr" ) @@ -22,7 +22,7 @@ func GetFlinkUIIngressURL(jobName string) string { return ReplaceJobURL(config.GetConfig().FlinkIngressURLFormat, jobName) } -func FetchJobManagerIngressCreateObj(app *flinkapp.FlinkApplication) *v1beta2.Ingress { +func FetchJobManagerIngressCreateObj(app *flinkapp.FlinkApplication) *v1beta1.Ingress { podLabels := common.DuplicateMap(app.Labels) podLabels = common.CopyMap(podLabels, k8.GetAppLabel(app.Name)) @@ -35,7 +35,7 @@ func FetchJobManagerIngressCreateObj(app *flinkapp.FlinkApplication) *v1beta2.In }, } - backend := v1beta2.IngressBackend{ + backend := v1beta1.IngressBackend{ ServiceName: app.Name, ServicePort: intstr.IntOrString{ Type: intstr.Int, @@ -43,22 +43,22 @@ func FetchJobManagerIngressCreateObj(app *flinkapp.FlinkApplication) *v1beta2.In }, } - ingressSpec := v1beta2.IngressSpec{ - Rules: []v1beta2.IngressRule{{ + ingressSpec := v1beta1.IngressSpec{ + Rules: []v1beta1.IngressRule{{ Host: GetFlinkUIIngressURL(app.Name), - IngressRuleValue: v1beta2.IngressRuleValue{ - HTTP: &v1beta2.HTTPIngressRuleValue{ - Paths: []v1beta2.HTTPIngressPath{{ + IngressRuleValue: v1beta1.IngressRuleValue{ + HTTP: &v1beta1.HTTPIngressRuleValue{ + Paths: []v1beta1.HTTPIngressPath{{ Backend: backend, }}, }, }, }}, } - return &v1beta2.Ingress{ + return &v1beta1.Ingress{ ObjectMeta: ingressMeta, TypeMeta: v1.TypeMeta{ - APIVersion: v1beta2.SchemeGroupVersion.String(), + APIVersion: v1beta1.SchemeGroupVersion.String(), Kind: k8.Ingress, }, Spec: ingressSpec, diff --git a/pkg/controller/flink/job_manager_controller_test.go b/pkg/controller/flink/job_manager_controller_test.go index 64b9be0e..7c6bf334 100644 --- a/pkg/controller/flink/job_manager_controller_test.go +++ b/pkg/controller/flink/job_manager_controller_test.go @@ -18,7 +18,7 @@ import ( "github.com/stretchr/testify/assert" v1 "k8s.io/api/apps/v1" coreV1 "k8s.io/api/core/v1" - "k8s.io/api/extensions/v1beta2" + "k8s.io/api/extensions/v1beta1" k8sErrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/runtime/schema" @@ -112,7 +112,7 @@ func TestJobManagerCreateSuccess(t *testing.T) { labels := map[string]string{ "flink-app": "app-name", } - ingress := object.(*v1beta2.Ingress) + ingress := object.(*v1beta1.Ingress) assert.Equal(t, app.Name, ingress.Name) assert.Equal(t, app.Namespace, ingress.Namespace) assert.Equal(t, labels, ingress.Labels) @@ -190,7 +190,7 @@ func TestJobManagerHACreateSuccess(t *testing.T) { labels := map[string]string{ "flink-app": "app-name", } - ingress := object.(*v1beta2.Ingress) + ingress := object.(*v1beta1.Ingress) assert.Equal(t, app.Name, ingress.Name) assert.Equal(t, app.Namespace, ingress.Namespace) assert.Equal(t, labels, ingress.Labels) @@ -363,7 +363,7 @@ func TestJobManagerCreateSuccessWithVersion(t *testing.T) { labels := map[string]string{ "flink-app": "app-name", } - ingress := object.(*v1beta2.Ingress) + ingress := object.(*v1beta1.Ingress) assert.Equal(t, app.Name, ingress.Name) assert.Equal(t, app.Namespace, ingress.Namespace) assert.Equal(t, labels, ingress.Labels) diff --git a/pkg/controller/flink/mock/mock_task_manager_controller.go b/pkg/controller/flink/mock/mock_task_manager_controller.go index 6857b088..817f0e39 100644 --- a/pkg/controller/flink/mock/mock_task_manager_controller.go +++ b/pkg/controller/flink/mock/mock_task_manager_controller.go @@ -2,18 +2,17 @@ package mock import ( "context" - - "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta1" + "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta2" ) -type CreateIfNotExistFunc func(ctx context.Context, application *v1beta1.FlinkApplication) (bool, error) +type CreateIfNotExistFunc func(ctx context.Context, application *v1beta2.FlinkApplication) (bool, error) type TaskManagerController struct { CreateIfNotExistFunc CreateIfNotExistFunc } func (m *TaskManagerController) CreateIfNotExist( - ctx context.Context, application *v1beta1.FlinkApplication) (bool, error) { + ctx context.Context, application *v1beta2.FlinkApplication) (bool, error) { if m.CreateIfNotExistFunc != nil { return m.CreateIfNotExistFunc(ctx, application) } diff --git a/tmp/codegen/update-generated.sh b/tmp/codegen/update-generated.sh index 65fc6ace..b0c63f4d 100755 --- a/tmp/codegen/update-generated.sh +++ b/tmp/codegen/update-generated.sh @@ -8,6 +8,6 @@ vendor/k8s.io/code-generator/generate-groups.sh \ deepcopy,client \ github.com/lyft/flinkk8soperator/pkg/client \ github.com/lyft/flinkk8soperator/pkg/apis \ -app:v1beta2 \ +app:v1beta1,v1beta2 \ --go-header-file "./tmp/codegen/boilerplate.go.txt" From 1cad6dd0545a9c27257a6a431a9c3e468549e7d2 Mon Sep 17 00:00:00 2001 From: glaksh100 Date: Mon, 9 Mar 2020 15:15:57 -0700 Subject: [PATCH 15/41] Fix lint --- pkg/apis/app/v1beta2/types.go | 1 + pkg/controller/flink/mock/mock_task_manager_controller.go | 1 + 2 files changed, 2 insertions(+) diff --git a/pkg/apis/app/v1beta2/types.go b/pkg/apis/app/v1beta2/types.go index a6e7f2c4..89d38dc0 100644 --- a/pkg/apis/app/v1beta2/types.go +++ b/pkg/apis/app/v1beta2/types.go @@ -2,6 +2,7 @@ package v1beta2 import ( "fmt" + apiv1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) diff --git a/pkg/controller/flink/mock/mock_task_manager_controller.go b/pkg/controller/flink/mock/mock_task_manager_controller.go index 817f0e39..bc38311c 100644 --- a/pkg/controller/flink/mock/mock_task_manager_controller.go +++ b/pkg/controller/flink/mock/mock_task_manager_controller.go @@ -2,6 +2,7 @@ package mock import ( "context" + "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta2" ) From dcba167bb2c1abb7a6fc6c62a02980ad15ad81a1 Mon Sep 17 00:00:00 2001 From: glaksh100 Date: Mon, 9 Mar 2020 16:10:03 -0700 Subject: [PATCH 16/41] Merge master and restore v1beta1 to original version --- pkg/apis/app/v1beta1/zz_generated.deepcopy.go | 27 ++----------------- pkg/apis/app/v1beta2/zz_generated.deepcopy.go | 14 ++++++++++ .../flinkapplication/flink_state_machine.go | 4 +++ .../flink_state_machine_test.go | 22 ++++++++------- 4 files changed, 33 insertions(+), 34 deletions(-) diff --git a/pkg/apis/app/v1beta1/zz_generated.deepcopy.go b/pkg/apis/app/v1beta1/zz_generated.deepcopy.go index 02063ef2..1b23bd3a 100644 --- a/pkg/apis/app/v1beta1/zz_generated.deepcopy.go +++ b/pkg/apis/app/v1beta1/zz_generated.deepcopy.go @@ -205,13 +205,8 @@ func (in *FlinkApplicationStatus) DeepCopyInto(out *FlinkApplicationStatus) { in, out := &in.LastUpdatedAt, &out.LastUpdatedAt *out = (*in).DeepCopy() } - if in.ApplicationStatus != nil { - in, out := &in.ApplicationStatus, &out.ApplicationStatus - *out = make([]FlinkApplicationVersionStatus, len(*in)) - for i := range *in { - (*in)[i].DeepCopyInto(&(*out)[i]) - } - } + out.ClusterStatus = in.ClusterStatus + in.JobStatus.DeepCopyInto(&out.JobStatus) if in.LastSeenError != nil { in, out := &in.LastSeenError, &out.LastSeenError *out = new(FlinkApplicationError) @@ -230,24 +225,6 @@ func (in *FlinkApplicationStatus) DeepCopy() *FlinkApplicationStatus { return out } -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *FlinkApplicationVersionStatus) DeepCopyInto(out *FlinkApplicationVersionStatus) { - *out = *in - out.ClusterStatus = in.ClusterStatus - in.JobStatus.DeepCopyInto(&out.JobStatus) - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new FlinkApplicationVersionStatus. -func (in *FlinkApplicationVersionStatus) DeepCopy() *FlinkApplicationVersionStatus { - if in == nil { - return nil - } - out := new(FlinkApplicationVersionStatus) - in.DeepCopyInto(out) - return out -} - // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *FlinkClusterStatus) DeepCopyInto(out *FlinkClusterStatus) { *out = *in diff --git a/pkg/apis/app/v1beta2/zz_generated.deepcopy.go b/pkg/apis/app/v1beta2/zz_generated.deepcopy.go index e6cca0d5..6f43fcf2 100644 --- a/pkg/apis/app/v1beta2/zz_generated.deepcopy.go +++ b/pkg/apis/app/v1beta2/zz_generated.deepcopy.go @@ -322,6 +322,13 @@ func (in *JobManagerConfig) DeepCopyInto(out *JobManagerConfig) { (*out)[key] = val } } + if in.Tolerations != nil { + in, out := &in.Tolerations, &out.Tolerations + *out = make([]v1.Toleration, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } return } @@ -377,6 +384,13 @@ func (in *TaskManagerConfig) DeepCopyInto(out *TaskManagerConfig) { (*out)[key] = val } } + if in.Tolerations != nil { + in, out := &in.Tolerations, &out.Tolerations + *out = make([]v1.Toleration, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } return } diff --git a/pkg/controller/flinkapplication/flink_state_machine.go b/pkg/controller/flinkapplication/flink_state_machine.go index b8230952..c43335b7 100644 --- a/pkg/controller/flinkapplication/flink_state_machine.go +++ b/pkg/controller/flinkapplication/flink_state_machine.go @@ -477,6 +477,7 @@ func (s *FlinkStateMachine) handleSubmittingJob(ctx context.Context, app *v1beta // Something's gone wrong; roll back s.flinkController.LogEvent(ctx, app, corev1.EventTypeWarning, "JobSubmissionFailed", fmt.Sprintf("Failed to submit job: %s", reason)) + s.flinkController.UpdateLatestJobID(ctx, app, "") s.updateApplicationPhase(app, v1beta2.FlinkApplicationRollingBackJob) return statusChanged, nil } @@ -529,6 +530,9 @@ func (s *FlinkStateMachine) handleSubmittingJob(ctx context.Context, app *v1beta if err != nil { return statusUnchanged, err } + if job == nil { + return statusUnchanged, errors.Errorf("Could not find job %s", s.flinkController.GetLatestJobID(ctx, app)) + } // wait until all vertices have been scheduled and started allVerticesStarted := true diff --git a/pkg/controller/flinkapplication/flink_state_machine_test.go b/pkg/controller/flinkapplication/flink_state_machine_test.go index 2c8d9721..bdeddf3a 100644 --- a/pkg/controller/flinkapplication/flink_state_machine_test.go +++ b/pkg/controller/flinkapplication/flink_state_machine_test.go @@ -1095,12 +1095,12 @@ func TestRollbackWithFailFastError(t *testing.T) { } func TestRollbackAfterJobSubmission(t *testing.T) { - app := v1beta1.FlinkApplication{ + app := v1beta2.FlinkApplication{ ObjectMeta: metav1.ObjectMeta{ Name: "test-app", Namespace: "flink", }, - Spec: v1beta1.FlinkApplicationSpec{ + Spec: v1beta2.FlinkApplicationSpec{ JarName: "job.jar", Parallelism: 5, EntryClass: "com.my.Class", @@ -1109,22 +1109,26 @@ func TestRollbackAfterJobSubmission(t *testing.T) { // force a rollback ForceRollback: true, }, - Status: v1beta1.FlinkApplicationStatus{ - Phase: v1beta1.FlinkApplicationSubmittingJob, + Status: v1beta2.FlinkApplicationStatus{ + Phase: v1beta2.FlinkApplicationSubmittingJob, DeployHash: "old-hash-retry-err", - JobStatus: v1beta1.FlinkJobStatus{ - JobID: "jobid", + ApplicationStatus: []v1beta2.FlinkApplicationVersionStatus{ + { + JobStatus: v1beta2.FlinkJobStatus{ + JobID: "jobid", + }, + }, }, }, } stateMachineForTest := getTestStateMachine() - + mockFlinkController := stateMachineForTest.flinkController.(*mock.FlinkController) err := stateMachineForTest.Handle(context.Background(), &app) assert.Nil(t, err) - assert.Equal(t, v1beta1.FlinkApplicationRollingBackJob, app.Status.Phase) - assert.Equal(t, "", app.Status.JobStatus.JobID) + assert.Equal(t, v1beta2.FlinkApplicationRollingBackJob, app.Status.Phase) + assert.Equal(t, "", mockFlinkController.GetLatestJobID(context.Background(), &app)) } func TestErrorHandlingInRunningPhase(t *testing.T) { From 47833274e05dd40add82b3c26e130d9ea5bdc958 Mon Sep 17 00:00:00 2001 From: glaksh100 Date: Mon, 9 Mar 2020 16:10:31 -0700 Subject: [PATCH 17/41] Upgrade integ test to v1beta2 --- integ/test_app.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integ/test_app.yaml b/integ/test_app.yaml index 203afaa7..3532d267 100644 --- a/integ/test_app.yaml +++ b/integ/test_app.yaml @@ -1,4 +1,4 @@ -apiVersion: flink.k8s.io/v1beta1 +apiVersion: flink.k8s.io/v1beta2 kind: FlinkApplication metadata: name: operator-test-app From 32d4a608a66bb7ab3e2e537bc56d347f6db7c2dd Mon Sep 17 00:00:00 2001 From: glaksh100 Date: Mon, 9 Mar 2020 20:21:56 -0700 Subject: [PATCH 18/41] Backward compatibility changes --- deploy/crd.yaml | 5 +++-- integ/test_app.yaml | 2 +- pkg/apis/app/addtoscheme_v1beta2.go | 4 +++- pkg/apis/app/v1beta2/types.go | 7 +++++++ pkg/controller/flink/flink.go | 7 +++++++ .../flinkapplication/flink_state_machine.go | 14 ++++++++++++-- pkg/controller/k8/cluster.go | 2 +- 7 files changed, 34 insertions(+), 7 deletions(-) diff --git a/deploy/crd.yaml b/deploy/crd.yaml index 2ea1457f..33889348 100644 --- a/deploy/crd.yaml +++ b/deploy/crd.yaml @@ -1,4 +1,4 @@ -apiVersion: apiextensions.k8s.io/v1beta2 +apiVersion: apiextensions.k8s.io/v1beta1 kind: CustomResourceDefinition metadata: name: flinkapplications.flink.k8s.io @@ -12,7 +12,6 @@ spec: shortNames: - flinkapp scope: Namespaced - version: v1beta2 versions: - name: v1beta2 served: true @@ -23,6 +22,8 @@ spec: - name: v1alpha1 served: true storage: false + conversion: + strategy: None validation: # openAPIV3Schema is the schema for validating custom objects. openAPIV3Schema: diff --git a/integ/test_app.yaml b/integ/test_app.yaml index 3532d267..203afaa7 100644 --- a/integ/test_app.yaml +++ b/integ/test_app.yaml @@ -1,4 +1,4 @@ -apiVersion: flink.k8s.io/v1beta2 +apiVersion: flink.k8s.io/v1beta1 kind: FlinkApplication metadata: name: operator-test-app diff --git a/pkg/apis/app/addtoscheme_v1beta2.go b/pkg/apis/app/addtoscheme_v1beta2.go index d2fdcb90..790a392a 100644 --- a/pkg/apis/app/addtoscheme_v1beta2.go +++ b/pkg/apis/app/addtoscheme_v1beta2.go @@ -4,7 +4,9 @@ package apis -import "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta2" +import ( + "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta2" +) func init() { // Register the types with the Scheme so the components can map objects to GroupVersionKinds and back diff --git a/pkg/apis/app/v1beta2/types.go b/pkg/apis/app/v1beta2/types.go index 4b77adf4..f2ca59c4 100644 --- a/pkg/apis/app/v1beta2/types.go +++ b/pkg/apis/app/v1beta2/types.go @@ -173,6 +173,9 @@ type FlinkApplicationStatus struct { DesiredApplicationCount int32 `json:"desiredApplicationCount,omitempty"` DeployVersion string `json:"deployVersion,omitempty"` UpdatingVersion string `json:"updatingVersion,omitempty"` + // To ensure backward compatibility allow repeat ClusterStatus and JobStatus + ClusterStatus FlinkClusterStatus `json:"clusterStatus,omitempty"` + JobStatus FlinkJobStatus `json:"jobStatus,omitempty"` ApplicationStatus []FlinkApplicationVersionStatus `json:"appStatus,omitempty"` FailedDeployHash string `json:"failedDeployHash,omitempty"` RollbackHash string `json:"rollbackHash,omitempty"` @@ -262,6 +265,10 @@ func IsRunningPhase(phase FlinkApplicationPhase) bool { } func IsBlueGreenDeploymentMode(mode DeploymentMode) bool { + // Backaward compatibility between v1beta1 and v1beta2 + if mode == DeploymentModeDual { + return false + } return mode == DeploymentModeBlueGreen } diff --git a/pkg/controller/flink/flink.go b/pkg/controller/flink/flink.go index 8aaae8af..9a436eaa 100644 --- a/pkg/controller/flink/flink.go +++ b/pkg/controller/flink/flink.go @@ -108,6 +108,9 @@ type ControllerInterface interface { // Update jobStatus on the latest ApplicationStatus UpdateLatestJobStatus(ctx context.Context, app *v1beta2.FlinkApplication, jobStatus v1beta2.FlinkJobStatus) + + // Update clusterStatus on the latest ApplicationStatus + UpdateLatestClusterStatus(ctx context.Context, app *v1beta2.FlinkApplication, jobStatus v1beta2.FlinkClusterStatus) } func NewController(k8sCluster k8.ClusterInterface, eventRecorder record.EventRecorder, config controllerConfig.RuntimeConfig) ControllerInterface { @@ -669,6 +672,10 @@ func (f *Controller) UpdateLatestJobStatus(ctx context.Context, app *v1beta2.Fli app.Status.ApplicationStatus[getCurrentStatusIndex(app)].JobStatus = jobStatus } +func (f *Controller) UpdateLatestClusterStatus(ctx context.Context, app *v1beta2.FlinkApplication, clusterStatus v1beta2.FlinkClusterStatus) { + app.Status.ApplicationStatus[getCurrentStatusIndex(app)].ClusterStatus = clusterStatus +} + func (f *Controller) GetLatestJobID(ctx context.Context, application *v1beta2.FlinkApplication) string { return application.Status.ApplicationStatus[getCurrentStatusIndex(application)].JobStatus.JobID } diff --git a/pkg/controller/flinkapplication/flink_state_machine.go b/pkg/controller/flinkapplication/flink_state_machine.go index c43335b7..88e0652c 100644 --- a/pkg/controller/flinkapplication/flink_state_machine.go +++ b/pkg/controller/flinkapplication/flink_state_machine.go @@ -154,7 +154,7 @@ func (s *FlinkStateMachine) handle(ctx context.Context, application *v1beta2.Fli updateLastSeenError := false appPhase := application.Status.Phase // initialize application status array if it's not yet been initialized - initializeAppStatusIfEmpty(application) + s.initializeAppStatusIfEmpty(ctx, application) if !application.ObjectMeta.DeletionTimestamp.IsZero() && appPhase != v1beta2.FlinkApplicationDeleting { s.updateApplicationPhase(application, v1beta2.FlinkApplicationDeleting) @@ -287,7 +287,7 @@ func (s *FlinkStateMachine) handleClusterStarting(ctx context.Context, applicati return statusChanged, nil } -func initializeAppStatusIfEmpty(application *v1beta2.FlinkApplication) { +func (s *FlinkStateMachine) initializeAppStatusIfEmpty(ctx context.Context, application *v1beta2.FlinkApplication) { // initialize the app status array to include 2 status elements in case of blue green deploys // else use a one element array if application.Spec.DeploymentMode == v1beta2.DeploymentModeBlueGreen { @@ -299,6 +299,16 @@ func initializeAppStatusIfEmpty(application *v1beta2.FlinkApplication) { if len(application.Status.ApplicationStatus) == 0 { application.Status.ApplicationStatus = make([]v1beta2.FlinkApplicationVersionStatus, application.Status.DesiredApplicationCount) } + + // If we're reading a v1beta1 app, populate the first element of the status array from + // the top-level jobStatus and clusteStatus + if application.Status.JobStatus != (v1beta2.FlinkJobStatus{}) { + s.flinkController.UpdateLatestJobStatus(ctx, application, application.Status.JobStatus) + } + + if application.Status.ClusterStatus != (v1beta2.FlinkClusterStatus{}) { + s.flinkController.UpdateLatestClusterStatus(ctx, application, application.Status.ClusterStatus) + } } func (s *FlinkStateMachine) handleApplicationSavepointing(ctx context.Context, application *v1beta2.FlinkApplication) (bool, error) { diff --git a/pkg/controller/k8/cluster.go b/pkg/controller/k8/cluster.go index ff4c0eed..4b738ba6 100644 --- a/pkg/controller/k8/cluster.go +++ b/pkg/controller/k8/cluster.go @@ -199,7 +199,7 @@ func (k *Cluster) UpdateK8Object(ctx context.Context, object runtime.Object) err func (k *Cluster) UpdateStatus(ctx context.Context, object runtime.Object) error { objectCopy := object.DeepCopyObject() - + logger.Debugf(ctx, "Version %s", objectCopy.GetObjectKind().GroupVersionKind().Version) err := k.client.Status().Update(ctx, objectCopy) if err != nil { if errors.IsConflict(err) { From 1a4b8c014ee6e6ba7e9bb7add09ffedc1a8e2428 Mon Sep 17 00:00:00 2001 From: glaksh100 Date: Tue, 10 Mar 2020 09:02:58 -0700 Subject: [PATCH 19/41] Work around status subresource bug --- pkg/controller/k8/cluster.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pkg/controller/k8/cluster.go b/pkg/controller/k8/cluster.go index 4b738ba6..cc8e64a4 100644 --- a/pkg/controller/k8/cluster.go +++ b/pkg/controller/k8/cluster.go @@ -202,6 +202,10 @@ func (k *Cluster) UpdateStatus(ctx context.Context, object runtime.Object) error logger.Debugf(ctx, "Version %s", objectCopy.GetObjectKind().GroupVersionKind().Version) err := k.client.Status().Update(ctx, objectCopy) if err != nil { + if errors.IsInvalid(err) { + logger.Warn(ctx, "Status sub-resource update failed, attempting to update the entire resource instead") + return k.client.Update(ctx, object) + } if errors.IsConflict(err) { logger.Warnf(ctx, "Conflict while updating status") k.metrics.updateConflicts.Inc(ctx) From ea2c93bc3c49f5f815be4f1b6cacf1d27103bc9a Mon Sep 17 00:00:00 2001 From: glaksh100 Date: Tue, 10 Mar 2020 10:11:17 -0700 Subject: [PATCH 20/41] Rename status array to VersionStatuses and add comment on k8s bug --- deploy/crd.yaml | 3 +- integ/checkpoint_failure_test.go | 4 +- integ/simple_test.go | 18 ++-- integ/utils/utils.go | 2 +- pkg/apis/app/v1beta2/types.go | 34 +++---- pkg/apis/app/v1beta2/zz_generated.deepcopy.go | 4 +- pkg/controller/flink/flink.go | 96 +++++++++---------- pkg/controller/flink/flink_test.go | 90 ++++++++--------- pkg/controller/flink/mock/mock_flink.go | 20 +++- .../flinkapplication/flink_state_machine.go | 4 +- .../flink_state_machine_test.go | 10 +- pkg/controller/k8/cluster.go | 15 ++- 12 files changed, 161 insertions(+), 139 deletions(-) diff --git a/deploy/crd.yaml b/deploy/crd.yaml index 33889348..2072282d 100644 --- a/deploy/crd.yaml +++ b/deploy/crd.yaml @@ -12,6 +12,7 @@ spec: shortNames: - flinkapp scope: Namespaced + version: v1beta2 versions: - name: v1beta2 served: true @@ -22,8 +23,6 @@ spec: - name: v1alpha1 served: true storage: false - conversion: - strategy: None validation: # openAPIV3Schema is the schema for validating custom objects. openAPIV3Schema: diff --git a/integ/checkpoint_failure_test.go b/integ/checkpoint_failure_test.go index 5d784a84..d95dd5b1 100644 --- a/integ/checkpoint_failure_test.go +++ b/integ/checkpoint_failure_test.go @@ -45,9 +45,9 @@ func failingJobTest(s *IntegSuite, c *C, testName string, causeFailure func()) { // And the job should not have been updated newApp, err := s.Util.GetFlinkApplication(config.Name) c.Assert(err, IsNil) - c.Assert(newApp.Status.ApplicationStatus[s.Util.GetCurrentStatusIndex(newApp)].JobStatus.JobID, Equals, app.Status.ApplicationStatus[s.Util.GetCurrentStatusIndex(app)].JobStatus.JobID) + c.Assert(newApp.Status.VersionStatuses[s.Util.GetCurrentStatusIndex(newApp)].JobStatus.JobID, Equals, app.Status.VersionStatuses[s.Util.GetCurrentStatusIndex(app)].JobStatus.JobID) - endpoint := fmt.Sprintf("jobs/%s", app.Status.ApplicationStatus[s.Util.GetCurrentStatusIndex(app)].JobStatus.JobID) + endpoint := fmt.Sprintf("jobs/%s", app.Status.VersionStatuses[s.Util.GetCurrentStatusIndex(app)].JobStatus.JobID) _, err = s.Util.FlinkAPIGet(app, endpoint) c.Assert(err, IsNil) diff --git a/integ/simple_test.go b/integ/simple_test.go index daf15e83..1780c454 100644 --- a/integ/simple_test.go +++ b/integ/simple_test.go @@ -28,12 +28,12 @@ func updateAndValidate(c *C, s *IntegSuite, name string, updateFn func(app *v1be // check that it really updated newApp, err := s.Util.GetFlinkApplication(name) c.Assert(err, IsNil) - c.Assert(newApp.Status.ApplicationStatus[s.Util.GetCurrentStatusIndex(newApp)].JobStatus.JobID, Not(Equals), app.Status.ApplicationStatus[s.Util.GetCurrentStatusIndex(app)].JobStatus.JobID) + c.Assert(newApp.Status.VersionStatuses[s.Util.GetCurrentStatusIndex(newApp)].JobStatus.JobID, Not(Equals), app.Status.VersionStatuses[s.Util.GetCurrentStatusIndex(app)].JobStatus.JobID) log.Info("New job started successfully") // check that we savepointed and restored correctly - endpoint := fmt.Sprintf("jobs/%s/checkpoints", newApp.Status.ApplicationStatus[s.Util.GetCurrentStatusIndex(newApp)].JobStatus.JobID) + endpoint := fmt.Sprintf("jobs/%s/checkpoints", newApp.Status.VersionStatuses[s.Util.GetCurrentStatusIndex(newApp)].JobStatus.JobID) res, err := s.Util.FlinkAPIGet(newApp, endpoint) c.Assert(err, IsNil) @@ -140,13 +140,13 @@ func (s *IntegSuite) TestSimple(c *C) { c.Assert(s.Util.WaitForAllTasksRunning(newApp.Name), IsNil) // the job id should have changed - jobID := newApp.Status.ApplicationStatus[s.Util.GetCurrentStatusIndex(newApp)].JobStatus.JobID + jobID := newApp.Status.VersionStatuses[s.Util.GetCurrentStatusIndex(newApp)].JobStatus.JobID newApp, err = s.Util.GetFlinkApplication(newApp.Name) c.Assert(err, IsNil) - c.Assert(newApp.Status.ApplicationStatus[s.Util.GetCurrentStatusIndex(newApp)].JobStatus.JobID, Not(Equals), jobID) + c.Assert(newApp.Status.VersionStatuses[s.Util.GetCurrentStatusIndex(newApp)].JobStatus.JobID, Not(Equals), jobID) // we should have restored from our savepoint - endpoint := fmt.Sprintf("jobs/%s/checkpoints", newApp.Status.ApplicationStatus[s.Util.GetCurrentStatusIndex(newApp)].JobStatus.JobID) + endpoint := fmt.Sprintf("jobs/%s/checkpoints", newApp.Status.VersionStatuses[s.Util.GetCurrentStatusIndex(newApp)].JobStatus.JobID) res, err := s.Util.FlinkAPIGet(newApp, endpoint) c.Assert(err, IsNil) @@ -191,7 +191,7 @@ func (s *IntegSuite) TestSimple(c *C) { log.Info("User cancelled deploy. Job is in deploy failed, waiting for tasks to start") // but the job should still be running - c.Assert(newApp.Status.ApplicationStatus[s.Util.GetCurrentStatusIndex(newApp)].JobStatus.State, Equals, v1beta2.Running) + c.Assert(newApp.Status.VersionStatuses[s.Util.GetCurrentStatusIndex(newApp)].JobStatus.State, Equals, v1beta2.Running) log.Info("Attempting to roll forward with fix") // Fixing update @@ -225,7 +225,7 @@ func (s *IntegSuite) TestSimple(c *C) { jobList := jobMap["jobs"].([]interface{}) for _, j := range jobList { job := j.(map[string]interface{}) - if job["id"] == app.Status.ApplicationStatus[s.Util.GetCurrentStatusIndex(app)].JobStatus.JobID { + if job["id"] == app.Status.VersionStatuses[s.Util.GetCurrentStatusIndex(app)].JobStatus.JobID { return job } } @@ -284,7 +284,7 @@ func (s *IntegSuite) TestRecovery(c *C) { app, err := s.Util.GetFlinkApplication(config.Name) c.Assert(err, IsNil) - endpoint := fmt.Sprintf("jobs/%s/checkpoints", app.Status.ApplicationStatus[s.Util.GetCurrentStatusIndex(app)].JobStatus.JobID) + endpoint := fmt.Sprintf("jobs/%s/checkpoints", app.Status.VersionStatuses[s.Util.GetCurrentStatusIndex(app)].JobStatus.JobID) for { res, err := s.Util.FlinkAPIGet(app, endpoint) c.Assert(err, IsNil) @@ -324,7 +324,7 @@ func (s *IntegSuite) TestRecovery(c *C) { // wait until the new job is launched newApp, err := s.Util.GetFlinkApplication(config.Name) c.Assert(err, IsNil) - if newApp.Status.ApplicationStatus[s.Util.GetCurrentStatusIndex(newApp)].JobStatus.JobID != app.Status.ApplicationStatus[s.Util.GetCurrentStatusIndex(app)].JobStatus.JobID { + if newApp.Status.VersionStatuses[s.Util.GetCurrentStatusIndex(newApp)].JobStatus.JobID != app.Status.VersionStatuses[s.Util.GetCurrentStatusIndex(app)].JobStatus.JobID { break } time.Sleep(100 * time.Millisecond) diff --git a/integ/utils/utils.go b/integ/utils/utils.go index 8b95d4d2..d7ff39d4 100644 --- a/integ/utils/utils.go +++ b/integ/utils/utils.go @@ -429,7 +429,7 @@ func (f *TestUtil) WaitForAllTasksRunning(name string) error { return err } - endpoint := fmt.Sprintf("jobs/%s", flinkApp.Status.ApplicationStatus[f.GetCurrentStatusIndex(flinkApp)].JobStatus.JobID) + endpoint := fmt.Sprintf("jobs/%s", flinkApp.Status.VersionStatuses[f.GetCurrentStatusIndex(flinkApp)].JobStatus.JobID) for { res, err := f.FlinkAPIGet(flinkApp, endpoint) if err != nil { diff --git a/pkg/apis/app/v1beta2/types.go b/pkg/apis/app/v1beta2/types.go index f2ca59c4..f551176d 100644 --- a/pkg/apis/app/v1beta2/types.go +++ b/pkg/apis/app/v1beta2/types.go @@ -166,24 +166,24 @@ type FlinkJobStatus struct { } type FlinkApplicationStatus struct { - Phase FlinkApplicationPhase `json:"phase"` - StartedAt *metav1.Time `json:"startedAt,omitempty"` - LastUpdatedAt *metav1.Time `json:"lastUpdatedAt,omitempty"` - Reason string `json:"reason,omitempty"` - DesiredApplicationCount int32 `json:"desiredApplicationCount,omitempty"` - DeployVersion string `json:"deployVersion,omitempty"` - UpdatingVersion string `json:"updatingVersion,omitempty"` + Phase FlinkApplicationPhase `json:"phase"` + StartedAt *metav1.Time `json:"startedAt,omitempty"` + LastUpdatedAt *metav1.Time `json:"lastUpdatedAt,omitempty"` + Reason string `json:"reason,omitempty"` + DesiredApplicationCount int32 `json:"desiredApplicationCount,omitempty"` + DeployVersion string `json:"deployVersion,omitempty"` + UpdatingVersion string `json:"updatingVersion,omitempty"` // To ensure backward compatibility allow repeat ClusterStatus and JobStatus - ClusterStatus FlinkClusterStatus `json:"clusterStatus,omitempty"` - JobStatus FlinkJobStatus `json:"jobStatus,omitempty"` - ApplicationStatus []FlinkApplicationVersionStatus `json:"appStatus,omitempty"` - FailedDeployHash string `json:"failedDeployHash,omitempty"` - RollbackHash string `json:"rollbackHash,omitempty"` - DeployHash string `json:"deployHash"` - SavepointTriggerID string `json:"savepointTriggerId,omitempty"` - SavepointPath string `json:"savepointPath,omitempty"` - RetryCount int32 `json:"retryCount,omitempty"` - LastSeenError *FlinkApplicationError `json:"lastSeenError,omitempty"` + ClusterStatus FlinkClusterStatus `json:"clusterStatus,omitempty"` + JobStatus FlinkJobStatus `json:"jobStatus,omitempty"` + VersionStatuses []FlinkApplicationVersionStatus `json:"appStatus,omitempty"` + FailedDeployHash string `json:"failedDeployHash,omitempty"` + RollbackHash string `json:"rollbackHash,omitempty"` + DeployHash string `json:"deployHash"` + SavepointTriggerID string `json:"savepointTriggerId,omitempty"` + SavepointPath string `json:"savepointPath,omitempty"` + RetryCount int32 `json:"retryCount,omitempty"` + LastSeenError *FlinkApplicationError `json:"lastSeenError,omitempty"` } type FlinkApplicationVersion string diff --git a/pkg/apis/app/v1beta2/zz_generated.deepcopy.go b/pkg/apis/app/v1beta2/zz_generated.deepcopy.go index 6f43fcf2..ec18e83c 100644 --- a/pkg/apis/app/v1beta2/zz_generated.deepcopy.go +++ b/pkg/apis/app/v1beta2/zz_generated.deepcopy.go @@ -205,8 +205,8 @@ func (in *FlinkApplicationStatus) DeepCopyInto(out *FlinkApplicationStatus) { in, out := &in.LastUpdatedAt, &out.LastUpdatedAt *out = (*in).DeepCopy() } - if in.ApplicationStatus != nil { - in, out := &in.ApplicationStatus, &out.ApplicationStatus + if in.VersionStatuses != nil { + in, out := &in.VersionStatuses, &out.VersionStatuses *out = make([]FlinkApplicationVersionStatus, len(*in)) for i := range *in { (*in)[i].DeepCopyInto(&(*out)[i]) diff --git a/pkg/controller/flink/flink.go b/pkg/controller/flink/flink.go index 9a436eaa..d52cf70a 100644 --- a/pkg/controller/flink/flink.go +++ b/pkg/controller/flink/flink.go @@ -106,10 +106,10 @@ type ControllerInterface interface { // Updates the jobID on the latest jobStatus UpdateLatestJobID(ctx context.Context, app *v1beta2.FlinkApplication, jobID string) - // Update jobStatus on the latest ApplicationStatus + // Update jobStatus on the latest VersionStatuses UpdateLatestJobStatus(ctx context.Context, app *v1beta2.FlinkApplication, jobStatus v1beta2.FlinkJobStatus) - // Update clusterStatus on the latest ApplicationStatus + // Update clusterStatus on the latest VersionStatuses UpdateLatestClusterStatus(ctx context.Context, app *v1beta2.FlinkApplication, jobStatus v1beta2.FlinkClusterStatus) } @@ -171,7 +171,7 @@ func getClusterOverviewURL(app *v1beta2.FlinkApplication) string { func getJobOverviewURL(app *v1beta2.FlinkApplication) string { externalURL := getExternalURLFromApp(app) if externalURL != "" { - return fmt.Sprintf(externalURL+client.WebUIAnchor+client.GetJobsOverviewURL, app.Status.ApplicationStatus[getCurrentStatusIndex(app)].JobStatus.JobID) + return fmt.Sprintf(externalURL+client.WebUIAnchor+client.GetJobsOverviewURL, app.Status.VersionStatuses[getCurrentStatusIndex(app)].JobStatus.JobID) } return "" } @@ -223,7 +223,7 @@ func (f *Controller) GetJobsForApplication(ctx context.Context, application *v1b } func (f *Controller) GetJobForApplication(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (*client.FlinkJobOverview, error) { - if application.Status.ApplicationStatus[getCurrentStatusIndex(application)].JobStatus.JobID == "" { + if application.Status.VersionStatuses[getCurrentStatusIndex(application)].JobStatus.JobID == "" { return nil, nil } @@ -238,8 +238,8 @@ func (f *Controller) GetJobForApplication(ctx context.Context, application *v1be // The operator for now assumes and is intended to run single application per Flink Cluster. // Once we move to run multiple applications, this has to be removed/updated func (f *Controller) getJobIDForApplication(application *v1beta2.FlinkApplication) (string, error) { - if application.Status.ApplicationStatus[getCurrentStatusIndex(application)].JobStatus.JobID != "" { - return application.Status.ApplicationStatus[getCurrentStatusIndex(application)].JobStatus.JobID, nil + if application.Status.VersionStatuses[getCurrentStatusIndex(application)].JobStatus.JobID != "" { + return application.Status.VersionStatuses[getCurrentStatusIndex(application)].JobStatus.JobID, nil } return "", errors.New("active job id not available") @@ -508,42 +508,42 @@ func (f *Controller) CompareAndUpdateClusterStatus(ctx context.Context, applicat // Error retrieving cluster / taskmanagers overview (after startup/readiness) --> Red // If there is an error this loop will return with Health set to Red currIndex := getCurrentStatusIndex(application) - oldClusterStatus := application.Status.ApplicationStatus[currIndex].ClusterStatus - application.Status.ApplicationStatus[currIndex].ClusterStatus.Health = v1beta2.Red + oldClusterStatus := application.Status.VersionStatuses[currIndex].ClusterStatus + application.Status.VersionStatuses[currIndex].ClusterStatus.Health = v1beta2.Red deployment, err := f.GetCurrentDeploymentsForApp(ctx, application) if deployment == nil || err != nil { return false, err } - application.Status.ApplicationStatus[currIndex].ClusterStatus.ClusterOverviewURL = getClusterOverviewURL(application) - application.Status.ApplicationStatus[currIndex].ClusterStatus.NumberOfTaskManagers = deployment.Taskmanager.Status.AvailableReplicas + application.Status.VersionStatuses[currIndex].ClusterStatus.ClusterOverviewURL = getClusterOverviewURL(application) + application.Status.VersionStatuses[currIndex].ClusterStatus.NumberOfTaskManagers = deployment.Taskmanager.Status.AvailableReplicas // Get Cluster overview response, err := f.flinkClient.GetClusterOverview(ctx, getURLFromApp(application, hash)) if err != nil { return false, err } // Update cluster overview - application.Status.ApplicationStatus[currIndex].ClusterStatus.AvailableTaskSlots = response.SlotsAvailable - application.Status.ApplicationStatus[currIndex].ClusterStatus.NumberOfTaskSlots = response.NumberOfTaskSlots + application.Status.VersionStatuses[currIndex].ClusterStatus.AvailableTaskSlots = response.SlotsAvailable + application.Status.VersionStatuses[currIndex].ClusterStatus.NumberOfTaskSlots = response.NumberOfTaskSlots // Get Healthy Taskmanagers tmResponse, tmErr := f.flinkClient.GetTaskManagers(ctx, getURLFromApp(application, hash)) if tmErr != nil { return false, tmErr } - application.Status.ApplicationStatus[currIndex].ClusterStatus.HealthyTaskManagers = getHealthyTaskManagerCount(tmResponse) + application.Status.VersionStatuses[currIndex].ClusterStatus.HealthyTaskManagers = getHealthyTaskManagerCount(tmResponse) // Determine Health of the cluster. // Healthy TaskManagers == Number of taskmanagers --> Green // Else --> Yellow - if application.Status.ApplicationStatus[currIndex].ClusterStatus.HealthyTaskManagers == deployment.Taskmanager.Status.Replicas { - application.Status.ApplicationStatus[currIndex].ClusterStatus.Health = v1beta2.Green + if application.Status.VersionStatuses[currIndex].ClusterStatus.HealthyTaskManagers == deployment.Taskmanager.Status.Replicas { + application.Status.VersionStatuses[currIndex].ClusterStatus.Health = v1beta2.Green } else { - application.Status.ApplicationStatus[currIndex].ClusterStatus.Health = v1beta2.Yellow + application.Status.VersionStatuses[currIndex].ClusterStatus.Health = v1beta2.Yellow } - return !apiequality.Semantic.DeepEqual(oldClusterStatus, application.Status.ApplicationStatus[currIndex].ClusterStatus), nil + return !apiequality.Semantic.DeepEqual(oldClusterStatus, application.Status.VersionStatuses[currIndex].ClusterStatus), nil } func getHealthyTaskManagerCount(response *client.TaskManagersResponse) int32 { @@ -561,13 +561,13 @@ func getHealthyTaskManagerCount(response *client.TaskManagersResponse) int32 { func (f *Controller) CompareAndUpdateJobStatus(ctx context.Context, app *v1beta2.FlinkApplication, hash string) (bool, error) { currIndex := getCurrentStatusIndex(app) - if app.Status.ApplicationStatus[currIndex].JobStatus.LastFailingTime == nil { + if app.Status.VersionStatuses[currIndex].JobStatus.LastFailingTime == nil { initTime := metav1.NewTime(time.Time{}) - app.Status.ApplicationStatus[currIndex].JobStatus.LastFailingTime = &initTime + app.Status.VersionStatuses[currIndex].JobStatus.LastFailingTime = &initTime } - oldJobStatus := app.Status.ApplicationStatus[currIndex].JobStatus - app.Status.ApplicationStatus[currIndex].JobStatus.JobID = oldJobStatus.JobID + oldJobStatus := app.Status.VersionStatuses[currIndex].JobStatus + app.Status.VersionStatuses[currIndex].JobStatus.JobID = oldJobStatus.JobID jobResponse, err := f.flinkClient.GetJobOverview(ctx, getURLFromApp(app, hash), f.GetLatestJobID(ctx, app)) if err != nil { return false, err @@ -578,29 +578,29 @@ func (f *Controller) CompareAndUpdateJobStatus(ctx context.Context, app *v1beta2 } // Job status - app.Status.ApplicationStatus[currIndex].JobStatus.JobOverviewURL = getJobOverviewURL(app) - app.Status.ApplicationStatus[currIndex].JobStatus.State = v1beta2.JobState(jobResponse.State) + app.Status.VersionStatuses[currIndex].JobStatus.JobOverviewURL = getJobOverviewURL(app) + app.Status.VersionStatuses[currIndex].JobStatus.State = v1beta2.JobState(jobResponse.State) jobStartTime := metav1.NewTime(time.Unix(jobResponse.StartTime/1000, 0)) - app.Status.ApplicationStatus[currIndex].JobStatus.StartTime = &jobStartTime + app.Status.VersionStatuses[currIndex].JobStatus.StartTime = &jobStartTime // Checkpoints status - app.Status.ApplicationStatus[currIndex].JobStatus.FailedCheckpointCount = checkpoints.Counts["failed"] - app.Status.ApplicationStatus[currIndex].JobStatus.CompletedCheckpointCount = checkpoints.Counts["completed"] - app.Status.ApplicationStatus[currIndex].JobStatus.JobRestartCount = checkpoints.Counts["restored"] + app.Status.VersionStatuses[currIndex].JobStatus.FailedCheckpointCount = checkpoints.Counts["failed"] + app.Status.VersionStatuses[currIndex].JobStatus.CompletedCheckpointCount = checkpoints.Counts["completed"] + app.Status.VersionStatuses[currIndex].JobStatus.JobRestartCount = checkpoints.Counts["restored"] latestCheckpoint := checkpoints.Latest.Completed var lastCheckpointAgeSeconds int if latestCheckpoint != nil { lastCheckpointTimeMillis := metav1.NewTime(time.Unix(latestCheckpoint.LatestAckTimestamp/1000, 0)) - app.Status.ApplicationStatus[currIndex].JobStatus.LastCheckpointTime = &lastCheckpointTimeMillis - app.Status.ApplicationStatus[currIndex].JobStatus.LastCheckpointPath = latestCheckpoint.ExternalPath - lastCheckpointAgeSeconds = app.Status.ApplicationStatus[currIndex].JobStatus.LastCheckpointTime.Second() + app.Status.VersionStatuses[currIndex].JobStatus.LastCheckpointTime = &lastCheckpointTimeMillis + app.Status.VersionStatuses[currIndex].JobStatus.LastCheckpointPath = latestCheckpoint.ExternalPath + lastCheckpointAgeSeconds = app.Status.VersionStatuses[currIndex].JobStatus.LastCheckpointTime.Second() } if checkpoints.Latest.Restored != nil { - app.Status.ApplicationStatus[currIndex].JobStatus.RestorePath = checkpoints.Latest.Restored.ExternalPath + app.Status.VersionStatuses[currIndex].JobStatus.RestorePath = checkpoints.Latest.Restored.ExternalPath restoreTime := metav1.NewTime(time.Unix(checkpoints.Latest.Restored.RestoredTimeStamp/1000, 0)) - app.Status.ApplicationStatus[currIndex].JobStatus.RestoreTime = &restoreTime + app.Status.VersionStatuses[currIndex].JobStatus.RestoreTime = &restoreTime } runningTasks := int32(0) @@ -620,30 +620,30 @@ func (f *Controller) CompareAndUpdateJobStatus(ctx context.Context, app *v1beta2 } } - app.Status.ApplicationStatus[currIndex].JobStatus.RunningTasks = runningTasks - app.Status.ApplicationStatus[currIndex].JobStatus.TotalTasks = totalTasks + app.Status.VersionStatuses[currIndex].JobStatus.RunningTasks = runningTasks + app.Status.VersionStatuses[currIndex].JobStatus.TotalTasks = totalTasks // Health Status for job // Job is in FAILING state --> RED // Time since last successful checkpoint > maxCheckpointTime --> YELLOW // Else --> Green - if app.Status.ApplicationStatus[currIndex].JobStatus.State == v1beta2.Failing || - time.Since(app.Status.ApplicationStatus[currIndex].JobStatus.LastFailingTime.Time) < failingIntervalThreshold || + if app.Status.VersionStatuses[currIndex].JobStatus.State == v1beta2.Failing || + time.Since(app.Status.VersionStatuses[currIndex].JobStatus.LastFailingTime.Time) < failingIntervalThreshold || verticesInCreated > 0 { - app.Status.ApplicationStatus[currIndex].JobStatus.Health = v1beta2.Red + app.Status.VersionStatuses[currIndex].JobStatus.Health = v1beta2.Red } else if time.Since(time.Unix(int64(lastCheckpointAgeSeconds), 0)) < maxCheckpointTime || runningTasks < totalTasks { - app.Status.ApplicationStatus[currIndex].JobStatus.Health = v1beta2.Yellow + app.Status.VersionStatuses[currIndex].JobStatus.Health = v1beta2.Yellow } else { - app.Status.ApplicationStatus[currIndex].JobStatus.Health = v1beta2.Green + app.Status.VersionStatuses[currIndex].JobStatus.Health = v1beta2.Green } // Update LastFailingTime - if app.Status.ApplicationStatus[currIndex].JobStatus.State == v1beta2.Failing { + if app.Status.VersionStatuses[currIndex].JobStatus.State == v1beta2.Failing { currTime := metav1.Now() - app.Status.ApplicationStatus[currIndex].JobStatus.LastFailingTime = &currTime + app.Status.VersionStatuses[currIndex].JobStatus.LastFailingTime = &currTime } - return !apiequality.Semantic.DeepEqual(oldJobStatus, app.Status.ApplicationStatus[currIndex].JobStatus), err + return !apiequality.Semantic.DeepEqual(oldJobStatus, app.Status.VersionStatuses[currIndex].JobStatus), err } func getCurrentStatusIndex(app *v1beta2.FlinkApplication) int32 { @@ -659,27 +659,27 @@ func getCurrentStatusIndex(app *v1beta2.FlinkApplication) int32 { } func (f *Controller) GetLatestClusterStatus(ctx context.Context, application *v1beta2.FlinkApplication) v1beta2.FlinkClusterStatus { - return application.Status.ApplicationStatus[getCurrentStatusIndex(application)].ClusterStatus + return application.Status.VersionStatuses[getCurrentStatusIndex(application)].ClusterStatus } func (f *Controller) GetLatestJobStatus(ctx context.Context, application *v1beta2.FlinkApplication) v1beta2.FlinkJobStatus { - return application.Status.ApplicationStatus[getCurrentStatusIndex(application)].JobStatus + return application.Status.VersionStatuses[getCurrentStatusIndex(application)].JobStatus } func (f *Controller) UpdateLatestJobStatus(ctx context.Context, app *v1beta2.FlinkApplication, jobStatus v1beta2.FlinkJobStatus) { - app.Status.ApplicationStatus[getCurrentStatusIndex(app)].JobStatus = jobStatus + app.Status.VersionStatuses[getCurrentStatusIndex(app)].JobStatus = jobStatus } func (f *Controller) UpdateLatestClusterStatus(ctx context.Context, app *v1beta2.FlinkApplication, clusterStatus v1beta2.FlinkClusterStatus) { - app.Status.ApplicationStatus[getCurrentStatusIndex(app)].ClusterStatus = clusterStatus + app.Status.VersionStatuses[getCurrentStatusIndex(app)].ClusterStatus = clusterStatus } func (f *Controller) GetLatestJobID(ctx context.Context, application *v1beta2.FlinkApplication) string { - return application.Status.ApplicationStatus[getCurrentStatusIndex(application)].JobStatus.JobID + return application.Status.VersionStatuses[getCurrentStatusIndex(application)].JobStatus.JobID } func (f *Controller) UpdateLatestJobID(ctx context.Context, app *v1beta2.FlinkApplication, jobID string) { - app.Status.ApplicationStatus[getCurrentStatusIndex(app)].JobStatus.JobID = jobID + app.Status.VersionStatuses[getCurrentStatusIndex(app)].JobStatus.JobID = jobID } diff --git a/pkg/controller/flink/flink_test.go b/pkg/controller/flink/flink_test.go index 33555d78..23962eb7 100644 --- a/pkg/controller/flink/flink_test.go +++ b/pkg/controller/flink/flink_test.go @@ -67,12 +67,12 @@ func getFlinkTestApp() v1beta2.FlinkApplication { app.Spec.Parallelism = 8 app.Name = testAppName app.Namespace = testNamespace - statuses := append(app.Status.ApplicationStatus, v1beta2.FlinkApplicationVersionStatus{ + statuses := append(app.Status.VersionStatuses, v1beta2.FlinkApplicationVersionStatus{ JobStatus: v1beta2.FlinkJobStatus{ JobID: testJobID, }, }) - app.Status.ApplicationStatus = statuses + app.Status.VersionStatuses = statuses app.Spec.Image = testImage app.Spec.FlinkVersion = testFlinkVersion app.Status.DesiredApplicationCount = 1 @@ -599,10 +599,10 @@ func TestFindExternalizedCheckpoint(t *testing.T) { func TestFindExternalizedCheckpointFromStatus(t *testing.T) { flinkControllerForTest := getTestFlinkController() flinkApp := getFlinkTestApp() - flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].JobStatus.JobID = "jobid" - flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].JobStatus.LastCheckpointPath = "/tmp/checkpoint" + flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].JobStatus.JobID = "jobid" + flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].JobStatus.LastCheckpointPath = "/tmp/checkpoint" checkpointTime := metaV1.Now() - flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].JobStatus.LastCheckpointTime = &checkpointTime + flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].JobStatus.LastCheckpointTime = &checkpointTime mockJmClient := flinkControllerForTest.flinkClient.(*clientMock.JobManagerClient) mockJmClient.GetLatestCheckpointFunc = func(ctx context.Context, url string, jobId string) (*client.CheckpointStatistics, error) { @@ -668,22 +668,22 @@ func TestClusterStatusUpdated(t *testing.T) { _, err = flinkControllerForTest.CompareAndUpdateClusterStatus(context.Background(), &flinkApp, "hash") assert.Nil(t, err) - assert.Equal(t, int32(1), flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].ClusterStatus.NumberOfTaskSlots) - assert.Equal(t, int32(0), flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].ClusterStatus.AvailableTaskSlots) - assert.Equal(t, int32(1), flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].ClusterStatus.HealthyTaskManagers) - assert.Equal(t, v1beta2.Green, flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].ClusterStatus.Health) - assert.Equal(t, "app-name.lyft.xyz/#/overview", flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].ClusterStatus.ClusterOverviewURL) + assert.Equal(t, int32(1), flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].ClusterStatus.NumberOfTaskSlots) + assert.Equal(t, int32(0), flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].ClusterStatus.AvailableTaskSlots) + assert.Equal(t, int32(1), flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].ClusterStatus.HealthyTaskManagers) + assert.Equal(t, v1beta2.Green, flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].ClusterStatus.Health) + assert.Equal(t, "app-name.lyft.xyz/#/overview", flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].ClusterStatus.ClusterOverviewURL) } func TestNoClusterStatusChange(t *testing.T) { flinkControllerForTest := getTestFlinkController() flinkApp := getFlinkTestApp() - flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].ClusterStatus.NumberOfTaskSlots = int32(1) - flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].ClusterStatus.AvailableTaskSlots = int32(0) - flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].ClusterStatus.HealthyTaskManagers = int32(1) - flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].ClusterStatus.Health = v1beta2.Green - flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].ClusterStatus.NumberOfTaskManagers = int32(1) + flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].ClusterStatus.NumberOfTaskSlots = int32(1) + flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].ClusterStatus.AvailableTaskSlots = int32(0) + flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].ClusterStatus.HealthyTaskManagers = int32(1) + flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].ClusterStatus.Health = v1beta2.Green + flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].ClusterStatus.NumberOfTaskManagers = int32(1) mockK8Cluster := flinkControllerForTest.k8Cluster.(*k8mock.K8Cluster) mockK8Cluster.GetDeploymentsWithLabelFunc = func(ctx context.Context, namespace string, labelMap map[string]string) (*v1.DeploymentList, error) { tmDeployment := FetchTaskMangerDeploymentCreateObj(&flinkApp, testAppHash) @@ -772,10 +772,10 @@ func TestHealthyTaskmanagers(t *testing.T) { _, err := flinkControllerForTest.CompareAndUpdateClusterStatus(context.Background(), &flinkApp, hash) assert.Nil(t, err) - assert.Equal(t, int32(1), flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].ClusterStatus.NumberOfTaskSlots) - assert.Equal(t, int32(0), flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].ClusterStatus.AvailableTaskSlots) - assert.Equal(t, int32(0), flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].ClusterStatus.HealthyTaskManagers) - assert.Equal(t, v1beta2.Yellow, flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].ClusterStatus.Health) + assert.Equal(t, int32(1), flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].ClusterStatus.NumberOfTaskSlots) + assert.Equal(t, int32(0), flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].ClusterStatus.AvailableTaskSlots) + assert.Equal(t, int32(0), flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].ClusterStatus.HealthyTaskManagers) + assert.Equal(t, v1beta2.Yellow, flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].ClusterStatus.Health) } @@ -832,26 +832,26 @@ func TestJobStatusUpdated(t *testing.T) { }, nil } - flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].JobStatus.JobID = "abc" + flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].JobStatus.JobID = "abc" expectedTime := metaV1.NewTime(time.Unix(startTime/1000, 0)) _, err = flinkControllerForTest.CompareAndUpdateJobStatus(context.Background(), &flinkApp, "hash") assert.Nil(t, err) - assert.Equal(t, v1beta2.Running, flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].JobStatus.State) - assert.Equal(t, &expectedTime, flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].JobStatus.StartTime) - assert.Equal(t, v1beta2.Yellow, flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].JobStatus.Health) + assert.Equal(t, v1beta2.Running, flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].JobStatus.State) + assert.Equal(t, &expectedTime, flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].JobStatus.StartTime) + assert.Equal(t, v1beta2.Yellow, flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].JobStatus.Health) - assert.Equal(t, int32(0), flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].JobStatus.FailedCheckpointCount) - assert.Equal(t, int32(4), flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].JobStatus.CompletedCheckpointCount) - assert.Equal(t, int32(1), flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].JobStatus.JobRestartCount) - assert.Equal(t, &expectedTime, flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].JobStatus.RestoreTime) + assert.Equal(t, int32(0), flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].JobStatus.FailedCheckpointCount) + assert.Equal(t, int32(4), flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].JobStatus.CompletedCheckpointCount) + assert.Equal(t, int32(1), flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].JobStatus.JobRestartCount) + assert.Equal(t, &expectedTime, flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].JobStatus.RestoreTime) - assert.Equal(t, "/test/externalpath", flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].JobStatus.RestorePath) - assert.Equal(t, &expectedTime, flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].JobStatus.LastCheckpointTime) - assert.Equal(t, "app-name.lyft.xyz/#/jobs/abc", flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].JobStatus.JobOverviewURL) + assert.Equal(t, "/test/externalpath", flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].JobStatus.RestorePath) + assert.Equal(t, &expectedTime, flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].JobStatus.LastCheckpointTime) + assert.Equal(t, "app-name.lyft.xyz/#/jobs/abc", flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].JobStatus.JobOverviewURL) - assert.Equal(t, int32(2), flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].JobStatus.RunningTasks) - assert.Equal(t, int32(7), flinkApp.Status.ApplicationStatus[getCurrentStatusIndex(&flinkApp)].JobStatus.TotalTasks) + assert.Equal(t, int32(2), flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].JobStatus.RunningTasks) + assert.Equal(t, int32(7), flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].JobStatus.TotalTasks) } @@ -866,16 +866,16 @@ func TestNoJobStatusChange(t *testing.T) { app1 := getFlinkTestApp() mockJmClient := flinkControllerForTest.flinkClient.(*clientMock.JobManagerClient) - app1.Status.ApplicationStatus[getCurrentStatusIndex(&app1)].JobStatus.State = v1beta2.Running - app1.Status.ApplicationStatus[getCurrentStatusIndex(&app1)].JobStatus.StartTime = &metaTime - app1.Status.ApplicationStatus[getCurrentStatusIndex(&app1)].JobStatus.LastCheckpointTime = &metaTime - app1.Status.ApplicationStatus[getCurrentStatusIndex(&app1)].JobStatus.CompletedCheckpointCount = int32(4) - app1.Status.ApplicationStatus[getCurrentStatusIndex(&app1)].JobStatus.JobRestartCount = int32(1) - app1.Status.ApplicationStatus[getCurrentStatusIndex(&app1)].JobStatus.FailedCheckpointCount = int32(0) - app1.Status.ApplicationStatus[getCurrentStatusIndex(&app1)].JobStatus.Health = v1beta2.Green - app1.Status.ApplicationStatus[getCurrentStatusIndex(&app1)].JobStatus.RestoreTime = &metaTime - app1.Status.ApplicationStatus[getCurrentStatusIndex(&app1)].JobStatus.RestorePath = "/test/externalpath" - app1.Status.ApplicationStatus[getCurrentStatusIndex(&app1)].JobStatus.JobOverviewURL = "" + app1.Status.VersionStatuses[getCurrentStatusIndex(&app1)].JobStatus.State = v1beta2.Running + app1.Status.VersionStatuses[getCurrentStatusIndex(&app1)].JobStatus.StartTime = &metaTime + app1.Status.VersionStatuses[getCurrentStatusIndex(&app1)].JobStatus.LastCheckpointTime = &metaTime + app1.Status.VersionStatuses[getCurrentStatusIndex(&app1)].JobStatus.CompletedCheckpointCount = int32(4) + app1.Status.VersionStatuses[getCurrentStatusIndex(&app1)].JobStatus.JobRestartCount = int32(1) + app1.Status.VersionStatuses[getCurrentStatusIndex(&app1)].JobStatus.FailedCheckpointCount = int32(0) + app1.Status.VersionStatuses[getCurrentStatusIndex(&app1)].JobStatus.Health = v1beta2.Green + app1.Status.VersionStatuses[getCurrentStatusIndex(&app1)].JobStatus.RestoreTime = &metaTime + app1.Status.VersionStatuses[getCurrentStatusIndex(&app1)].JobStatus.RestorePath = "/test/externalpath" + app1.Status.VersionStatuses[getCurrentStatusIndex(&app1)].JobStatus.JobOverviewURL = "" mockJmClient.GetJobOverviewFunc = func(ctx context.Context, url string, jobID string) (*client.FlinkJobOverview, error) { assert.Equal(t, url, "http://app-name-hash.ns:8081") @@ -918,8 +918,8 @@ func TestGetAndUpdateJobStatusHealth(t *testing.T) { app1 := getFlinkTestApp() mockJmClient := flinkControllerForTest.flinkClient.(*clientMock.JobManagerClient) - app1.Status.ApplicationStatus[getCurrentStatusIndex(&app1)].JobStatus.State = v1beta2.Failing - app1.Status.ApplicationStatus[getCurrentStatusIndex(&app1)].JobStatus.LastFailingTime = &lastFailedTime + app1.Status.VersionStatuses[getCurrentStatusIndex(&app1)].JobStatus.State = v1beta2.Failing + app1.Status.VersionStatuses[getCurrentStatusIndex(&app1)].JobStatus.LastFailingTime = &lastFailedTime mockJmClient.GetJobOverviewFunc = func(ctx context.Context, url string, jobID string) (*client.FlinkJobOverview, error) { assert.Equal(t, url, "http://app-name-hash.ns:8081") @@ -944,7 +944,7 @@ func TestGetAndUpdateJobStatusHealth(t *testing.T) { assert.Nil(t, err) // Job is in a RUNNING state but was in a FAILING state in the last 1 minute, so we expect // JobStatus.Health to be Red - assert.Equal(t, app1.Status.ApplicationStatus[getCurrentStatusIndex(&app1)].JobStatus.Health, v1beta2.Red) + assert.Equal(t, app1.Status.VersionStatuses[getCurrentStatusIndex(&app1)].JobStatus.Health, v1beta2.Red) } diff --git a/pkg/controller/flink/mock/mock_flink.go b/pkg/controller/flink/mock/mock_flink.go index e1a070fa..6b6cdd5e 100644 --- a/pkg/controller/flink/mock/mock_flink.go +++ b/pkg/controller/flink/mock/mock_flink.go @@ -29,6 +29,7 @@ type GetLatestJobStatusFunc func(ctx context.Context, app *v1beta2.FlinkApplicat type GetLatestJobIDFunc func(ctx context.Context, app *v1beta2.FlinkApplication) string type UpdateLatestJobIDFunc func(ctx context.Context, app *v1beta2.FlinkApplication, jobID string) type UpdateLatestJobStatusFunc func(ctx context.Context, app *v1beta2.FlinkApplication, jobStatus v1beta2.FlinkJobStatus) +type UpdateLatestClusterStatusFunc func(ctx context.Context, app *v1beta2.FlinkApplication, clusterStatus v1beta2.FlinkClusterStatus) type FlinkController struct { CreateClusterFunc CreateClusterFunc @@ -51,6 +52,7 @@ type FlinkController struct { GetLatestJobIDFunc GetLatestJobIDFunc UpdateLatestJobIDFunc UpdateLatestJobIDFunc UpdateLatestJobStatusFunc UpdateLatestJobStatusFunc + UpdateLatestClusterStatusFunc UpdateLatestClusterStatusFunc } func (m *FlinkController) GetCurrentDeploymentsForApp(ctx context.Context, application *v1beta2.FlinkApplication) (*common.FlinkDeployment, error) { @@ -172,7 +174,7 @@ func (m *FlinkController) GetLatestClusterStatus(ctx context.Context, applicatio return m.GetLatestClusterStatusFunc(ctx, application) } - return application.Status.ApplicationStatus[getCurrentStatusIndex(application)].ClusterStatus + return application.Status.VersionStatuses[getCurrentStatusIndex(application)].ClusterStatus } func (m *FlinkController) GetLatestJobStatus(ctx context.Context, application *v1beta2.FlinkApplication) v1beta2.FlinkJobStatus { @@ -180,7 +182,7 @@ func (m *FlinkController) GetLatestJobStatus(ctx context.Context, application *v return m.GetLatestJobStatusFunc(ctx, application) } - return application.Status.ApplicationStatus[getCurrentStatusIndex(application)].JobStatus + return application.Status.VersionStatuses[getCurrentStatusIndex(application)].JobStatus } func (m *FlinkController) GetLatestJobID(ctx context.Context, application *v1beta2.FlinkApplication) string { @@ -188,7 +190,7 @@ func (m *FlinkController) GetLatestJobID(ctx context.Context, application *v1bet return m.GetLatestJobIDFunc(ctx, application) } - return application.Status.ApplicationStatus[getCurrentStatusIndex(application)].JobStatus.JobID + return application.Status.VersionStatuses[getCurrentStatusIndex(application)].JobStatus.JobID } func (m *FlinkController) UpdateLatestJobID(ctx context.Context, application *v1beta2.FlinkApplication, jobID string) { @@ -196,7 +198,7 @@ func (m *FlinkController) UpdateLatestJobID(ctx context.Context, application *v1 m.UpdateLatestJobIDFunc(ctx, application, jobID) } - application.Status.ApplicationStatus[getCurrentStatusIndex(application)].JobStatus.JobID = jobID + application.Status.VersionStatuses[getCurrentStatusIndex(application)].JobStatus.JobID = jobID } func (m *FlinkController) UpdateLatestJobStatus(ctx context.Context, application *v1beta2.FlinkApplication, jobStatus v1beta2.FlinkJobStatus) { @@ -204,7 +206,15 @@ func (m *FlinkController) UpdateLatestJobStatus(ctx context.Context, application m.UpdateLatestJobStatusFunc(ctx, application, jobStatus) } - application.Status.ApplicationStatus[getCurrentStatusIndex(application)].JobStatus = jobStatus + application.Status.VersionStatuses[getCurrentStatusIndex(application)].JobStatus = jobStatus +} + +func (m *FlinkController) UpdateLatestClusterStatus(ctx context.Context, application *v1beta2.FlinkApplication, clusterStatus v1beta2.FlinkClusterStatus) { + if m.UpdateLatestClusterStatusFunc != nil { + m.UpdateLatestClusterStatusFunc(ctx, application, clusterStatus) + } + + application.Status.VersionStatuses[getCurrentStatusIndex(application)].ClusterStatus = clusterStatus } func getCurrentStatusIndex(app *v1beta2.FlinkApplication) int32 { diff --git a/pkg/controller/flinkapplication/flink_state_machine.go b/pkg/controller/flinkapplication/flink_state_machine.go index 88e0652c..788a154c 100644 --- a/pkg/controller/flinkapplication/flink_state_machine.go +++ b/pkg/controller/flinkapplication/flink_state_machine.go @@ -296,8 +296,8 @@ func (s *FlinkStateMachine) initializeAppStatusIfEmpty(ctx context.Context, appl application.Status.DesiredApplicationCount = 1 } - if len(application.Status.ApplicationStatus) == 0 { - application.Status.ApplicationStatus = make([]v1beta2.FlinkApplicationVersionStatus, application.Status.DesiredApplicationCount) + if len(application.Status.VersionStatuses) == 0 { + application.Status.VersionStatuses = make([]v1beta2.FlinkApplicationVersionStatus, application.Status.DesiredApplicationCount) } // If we're reading a v1beta1 app, populate the first element of the status array from diff --git a/pkg/controller/flinkapplication/flink_state_machine_test.go b/pkg/controller/flinkapplication/flink_state_machine_test.go index bdeddf3a..5549a793 100644 --- a/pkg/controller/flinkapplication/flink_state_machine_test.go +++ b/pkg/controller/flinkapplication/flink_state_machine_test.go @@ -452,7 +452,7 @@ func TestRollingBack(t *testing.T) { Phase: v1beta2.FlinkApplicationRollingBackJob, DeployHash: "old-hash", SavepointPath: "file:///savepoint", - ApplicationStatus: []v1beta2.FlinkApplicationVersionStatus{ + VersionStatuses: []v1beta2.FlinkApplicationVersionStatus{ v1beta2.FlinkApplicationVersionStatus{ JobStatus: v1beta2.FlinkJobStatus{ JarName: "old-job.jar", @@ -635,7 +635,7 @@ func TestDeleteWithSavepoint(t *testing.T) { Status: v1beta2.FlinkApplicationStatus{ Phase: v1beta2.FlinkApplicationDeleting, DeployHash: "deployhash", - ApplicationStatus: []v1beta2.FlinkApplicationVersionStatus{ + VersionStatuses: []v1beta2.FlinkApplicationVersionStatus{ v1beta2.FlinkApplicationVersionStatus{ JobStatus: v1beta2.FlinkJobStatus{ JobID: jobID, @@ -751,7 +751,7 @@ func TestDeleteWithSavepointAndFinishedJob(t *testing.T) { Phase: v1beta2.FlinkApplicationDeleting, DeployHash: "deployhash", SavepointPath: "file:///savepoint", - ApplicationStatus: []v1beta2.FlinkApplicationVersionStatus{ + VersionStatuses: []v1beta2.FlinkApplicationVersionStatus{ v1beta2.FlinkApplicationVersionStatus{ JobStatus: v1beta2.FlinkJobStatus{ JobID: jobID, @@ -802,7 +802,7 @@ func TestDeleteWithForceCancel(t *testing.T) { }, Status: v1beta2.FlinkApplicationStatus{ Phase: v1beta2.FlinkApplicationDeleting, - ApplicationStatus: []v1beta2.FlinkApplicationVersionStatus{ + VersionStatuses: []v1beta2.FlinkApplicationVersionStatus{ v1beta2.FlinkApplicationVersionStatus{ JobStatus: v1beta2.FlinkJobStatus{ JobID: jobID, @@ -1112,7 +1112,7 @@ func TestRollbackAfterJobSubmission(t *testing.T) { Status: v1beta2.FlinkApplicationStatus{ Phase: v1beta2.FlinkApplicationSubmittingJob, DeployHash: "old-hash-retry-err", - ApplicationStatus: []v1beta2.FlinkApplicationVersionStatus{ + VersionStatuses: []v1beta2.FlinkApplicationVersionStatus{ { JobStatus: v1beta2.FlinkJobStatus{ JobID: "jobid", diff --git a/pkg/controller/k8/cluster.go b/pkg/controller/k8/cluster.go index cc8e64a4..66e050ff 100644 --- a/pkg/controller/k8/cluster.go +++ b/pkg/controller/k8/cluster.go @@ -81,6 +81,7 @@ type k8ClusterMetrics struct { updateSuccess labeled.Counter updateFailure labeled.Counter updateConflicts labeled.Counter + updateInvalidVersion labeled.Counter deleteSuccess labeled.Counter deleteFailure labeled.Counter getDeploymentCacheHit labeled.Counter @@ -199,11 +200,23 @@ func (k *Cluster) UpdateK8Object(ctx context.Context, object runtime.Object) err func (k *Cluster) UpdateStatus(ctx context.Context, object runtime.Object) error { objectCopy := object.DeepCopyObject() - logger.Debugf(ctx, "Version %s", objectCopy.GetObjectKind().GroupVersionKind().Version) + logger.Debugf(ctx, "Version %s", objectCopy.GetObjectKind().GroupVersionKind().Version) err := k.client.Status().Update(ctx, objectCopy) if err != nil { if errors.IsInvalid(err) { + // This is a Kubernetes bug that has been fixed in k8s 1.15 + // https://github.com/kubernetes/kubernetes/pull/78713 + // The bug prevents status sub-resources from being updated when + // the stored version of the CRD changes + // Example of error: + // K8s object update failed FlinkApplication.flink.k8s.io "operator-test-app" is invalid: + // apiVersion: Invalid value: "flink.k8s.io/v1beta1": must be flink.k8s.io/v1beta2 + // app_name=operator-test-app ns=default phase=Running src="cluster.go:209" + // This should only ever be encountered once (per application) + // when a new CRD version is deployed + // TODO Remove this block when we upgrade to k8s 1.15 logger.Warn(ctx, "Status sub-resource update failed, attempting to update the entire resource instead") + k.metrics.updateInvalidVersion.Inc(ctx) return k.client.Update(ctx, object) } if errors.IsConflict(err) { From 937b965f2da8b141f741bebabb5c164119388b85 Mon Sep 17 00:00:00 2001 From: glaksh100 Date: Tue, 10 Mar 2020 10:23:12 -0700 Subject: [PATCH 21/41] Remove DesiredApplicationCount --- integ/utils/utils.go | 2 +- pkg/apis/app/v1beta2/types.go | 20 ++++++++++++------- pkg/controller/flink/flink.go | 2 +- pkg/controller/flink/flink_test.go | 1 - pkg/controller/flink/mock/mock_flink.go | 2 +- .../flinkapplication/flink_state_machine.go | 9 ++++----- 6 files changed, 20 insertions(+), 16 deletions(-) diff --git a/integ/utils/utils.go b/integ/utils/utils.go index d7ff39d4..f13c3e53 100644 --- a/integ/utils/utils.go +++ b/integ/utils/utils.go @@ -484,7 +484,7 @@ func (f *TestUtil) Update(name string, updateFn func(app *flinkapp.FlinkApplicat } func (f *TestUtil) GetCurrentStatusIndex(app *flinkapp.FlinkApplication) int32 { - desiredCount := app.Status.DesiredApplicationCount + desiredCount := flinkapp.GetMaxRunningJobs(app.Spec.DeploymentMode) if app.Status.Phase != "Running" { return 0 } diff --git a/pkg/apis/app/v1beta2/types.go b/pkg/apis/app/v1beta2/types.go index f551176d..51384577 100644 --- a/pkg/apis/app/v1beta2/types.go +++ b/pkg/apis/app/v1beta2/types.go @@ -166,13 +166,12 @@ type FlinkJobStatus struct { } type FlinkApplicationStatus struct { - Phase FlinkApplicationPhase `json:"phase"` - StartedAt *metav1.Time `json:"startedAt,omitempty"` - LastUpdatedAt *metav1.Time `json:"lastUpdatedAt,omitempty"` - Reason string `json:"reason,omitempty"` - DesiredApplicationCount int32 `json:"desiredApplicationCount,omitempty"` - DeployVersion string `json:"deployVersion,omitempty"` - UpdatingVersion string `json:"updatingVersion,omitempty"` + Phase FlinkApplicationPhase `json:"phase"` + StartedAt *metav1.Time `json:"startedAt,omitempty"` + LastUpdatedAt *metav1.Time `json:"lastUpdatedAt,omitempty"` + Reason string `json:"reason,omitempty"` + DeployVersion string `json:"deployVersion,omitempty"` + UpdatingVersion string `json:"updatingVersion,omitempty"` // To ensure backward compatibility allow repeat ClusterStatus and JobStatus ClusterStatus FlinkClusterStatus `json:"clusterStatus,omitempty"` JobStatus FlinkJobStatus `json:"jobStatus,omitempty"` @@ -272,6 +271,13 @@ func IsBlueGreenDeploymentMode(mode DeploymentMode) bool { return mode == DeploymentModeBlueGreen } +func GetMaxRunningJobs(mode DeploymentMode) int32 { + if IsBlueGreenDeploymentMode(mode) { + return int32(2) + } + return int32(1) +} + type DeploymentMode string const ( diff --git a/pkg/controller/flink/flink.go b/pkg/controller/flink/flink.go index d52cf70a..7821923c 100644 --- a/pkg/controller/flink/flink.go +++ b/pkg/controller/flink/flink.go @@ -655,7 +655,7 @@ func getCurrentStatusIndex(app *v1beta2.FlinkApplication) int32 { // In every other state, we either have // Dual mode --> One Application status object // BlueGreen mode --> Two Application status objects - return app.Status.DesiredApplicationCount - indexOffset + return v1beta2.GetMaxRunningJobs(app.Spec.DeploymentMode) - indexOffset } func (f *Controller) GetLatestClusterStatus(ctx context.Context, application *v1beta2.FlinkApplication) v1beta2.FlinkClusterStatus { diff --git a/pkg/controller/flink/flink_test.go b/pkg/controller/flink/flink_test.go index 23962eb7..13319470 100644 --- a/pkg/controller/flink/flink_test.go +++ b/pkg/controller/flink/flink_test.go @@ -75,7 +75,6 @@ func getFlinkTestApp() v1beta2.FlinkApplication { app.Status.VersionStatuses = statuses app.Spec.Image = testImage app.Spec.FlinkVersion = testFlinkVersion - app.Status.DesiredApplicationCount = 1 return app } diff --git a/pkg/controller/flink/mock/mock_flink.go b/pkg/controller/flink/mock/mock_flink.go index 6b6cdd5e..12f8ffbe 100644 --- a/pkg/controller/flink/mock/mock_flink.go +++ b/pkg/controller/flink/mock/mock_flink.go @@ -218,7 +218,7 @@ func (m *FlinkController) UpdateLatestClusterStatus(ctx context.Context, applica } func getCurrentStatusIndex(app *v1beta2.FlinkApplication) int32 { - desiredCount := app.Status.DesiredApplicationCount + desiredCount := v1beta2.GetMaxRunningJobs(app.Spec.DeploymentMode) if v1beta2.IsRunningPhase(app.Status.Phase) { return 0 } diff --git a/pkg/controller/flinkapplication/flink_state_machine.go b/pkg/controller/flinkapplication/flink_state_machine.go index 788a154c..535a40b6 100644 --- a/pkg/controller/flinkapplication/flink_state_machine.go +++ b/pkg/controller/flinkapplication/flink_state_machine.go @@ -290,18 +290,17 @@ func (s *FlinkStateMachine) handleClusterStarting(ctx context.Context, applicati func (s *FlinkStateMachine) initializeAppStatusIfEmpty(ctx context.Context, application *v1beta2.FlinkApplication) { // initialize the app status array to include 2 status elements in case of blue green deploys // else use a one element array + arraySize := 1 if application.Spec.DeploymentMode == v1beta2.DeploymentModeBlueGreen { - application.Status.DesiredApplicationCount = 2 - } else { - application.Status.DesiredApplicationCount = 1 + arraySize = 2 } if len(application.Status.VersionStatuses) == 0 { - application.Status.VersionStatuses = make([]v1beta2.FlinkApplicationVersionStatus, application.Status.DesiredApplicationCount) + application.Status.VersionStatuses = make([]v1beta2.FlinkApplicationVersionStatus, arraySize) } // If we're reading a v1beta1 app, populate the first element of the status array from - // the top-level jobStatus and clusteStatus + // the top-level jobStatus and clusterStatus if application.Status.JobStatus != (v1beta2.FlinkJobStatus{}) { s.flinkController.UpdateLatestJobStatus(ctx, application, application.Status.JobStatus) } From 612de704bd82d4e3511e60711e72600fc041c4ca Mon Sep 17 00:00:00 2001 From: glaksh100 Date: Tue, 10 Mar 2020 10:39:27 -0700 Subject: [PATCH 22/41] Minor updates --- pkg/apis/app/v1beta2/types.go | 2 +- pkg/controller/k8/cluster.go | 10 ++++++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/pkg/apis/app/v1beta2/types.go b/pkg/apis/app/v1beta2/types.go index 51384577..c110027c 100644 --- a/pkg/apis/app/v1beta2/types.go +++ b/pkg/apis/app/v1beta2/types.go @@ -172,7 +172,7 @@ type FlinkApplicationStatus struct { Reason string `json:"reason,omitempty"` DeployVersion string `json:"deployVersion,omitempty"` UpdatingVersion string `json:"updatingVersion,omitempty"` - // To ensure backward compatibility allow repeat ClusterStatus and JobStatus + // To ensure backward compatibility, repeat ClusterStatus and JobStatus ClusterStatus FlinkClusterStatus `json:"clusterStatus,omitempty"` JobStatus FlinkJobStatus `json:"jobStatus,omitempty"` VersionStatuses []FlinkApplicationVersionStatus `json:"appStatus,omitempty"` diff --git a/pkg/controller/k8/cluster.go b/pkg/controller/k8/cluster.go index 66e050ff..cf9d9f23 100644 --- a/pkg/controller/k8/cluster.go +++ b/pkg/controller/k8/cluster.go @@ -213,11 +213,17 @@ func (k *Cluster) UpdateStatus(ctx context.Context, object runtime.Object) error // apiVersion: Invalid value: "flink.k8s.io/v1beta1": must be flink.k8s.io/v1beta2 // app_name=operator-test-app ns=default phase=Running src="cluster.go:209" // This should only ever be encountered once (per application) - // when a new CRD version is deployed + // when a new CRD version is deployed and an older version of the application exists + // As a workaround, we try to update the entire resource instead of only the status // TODO Remove this block when we upgrade to k8s 1.15 logger.Warn(ctx, "Status sub-resource update failed, attempting to update the entire resource instead") k.metrics.updateInvalidVersion.Inc(ctx) - return k.client.Update(ctx, object) + err = k.client.Update(ctx, object) + if err != nil { + logger.Errorf(ctx, "K8s object update failed %v", err) + k.metrics.updateFailure.Inc(ctx) + return err + } } if errors.IsConflict(err) { logger.Warnf(ctx, "Conflict while updating status") From 5c1798388c0e36438332952420612372c41e2cf4 Mon Sep 17 00:00:00 2001 From: glaksh100 Date: Tue, 10 Mar 2020 10:42:53 -0700 Subject: [PATCH 23/41] Minor updates --- pkg/apis/app/v1beta2/types.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/apis/app/v1beta2/types.go b/pkg/apis/app/v1beta2/types.go index c110027c..26787158 100644 --- a/pkg/apis/app/v1beta2/types.go +++ b/pkg/apis/app/v1beta2/types.go @@ -175,7 +175,7 @@ type FlinkApplicationStatus struct { // To ensure backward compatibility, repeat ClusterStatus and JobStatus ClusterStatus FlinkClusterStatus `json:"clusterStatus,omitempty"` JobStatus FlinkJobStatus `json:"jobStatus,omitempty"` - VersionStatuses []FlinkApplicationVersionStatus `json:"appStatus,omitempty"` + VersionStatuses []FlinkApplicationVersionStatus `json:"versionStatuses,omitempty"` FailedDeployHash string `json:"failedDeployHash,omitempty"` RollbackHash string `json:"rollbackHash,omitempty"` DeployHash string `json:"deployHash"` From b543ab0d585e28bca2d266541efeeb0e58cd0ba9 Mon Sep 17 00:00:00 2001 From: glaksh100 Date: Tue, 10 Mar 2020 10:51:48 -0700 Subject: [PATCH 24/41] Initialize counter --- pkg/controller/k8/cluster.go | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pkg/controller/k8/cluster.go b/pkg/controller/k8/cluster.go index cf9d9f23..c270c5bf 100644 --- a/pkg/controller/k8/cluster.go +++ b/pkg/controller/k8/cluster.go @@ -60,6 +60,7 @@ func newK8ClusterMetrics(scope promutils.Scope) *k8ClusterMetrics { updateSuccess: labeled.NewCounter("update_success", "K8 object updated successfully", k8ClusterScope), updateFailure: labeled.NewCounter("update_failure", "K8 object update failed", k8ClusterScope), updateConflicts: labeled.NewCounter("update_conflict", "K8 object update failed due to a conflict", k8ClusterScope), + updateInvalidVersion: labeled.NewCounter("update_invalide_version", "K8 object update failed due to an invalid version", k8ClusterScope), deleteSuccess: labeled.NewCounter("delete_success", "K8 object deleted successfully", k8ClusterScope), deleteFailure: labeled.NewCounter("delete_failure", "K8 object deletion failed", k8ClusterScope), getDeploymentCacheHit: labeled.NewCounter("get_deployment_cache_hit", "Deployment fetched from cache", k8ClusterScope), @@ -218,11 +219,11 @@ func (k *Cluster) UpdateStatus(ctx context.Context, object runtime.Object) error // TODO Remove this block when we upgrade to k8s 1.15 logger.Warn(ctx, "Status sub-resource update failed, attempting to update the entire resource instead") k.metrics.updateInvalidVersion.Inc(ctx) - err = k.client.Update(ctx, object) - if err != nil { - logger.Errorf(ctx, "K8s object update failed %v", err) + updateErr := k.client.Update(ctx, object) + if updateErr != nil { + logger.Errorf(ctx, "K8s object update failed %v", updateErr) k.metrics.updateFailure.Inc(ctx) - return err + return updateErr } } if errors.IsConflict(err) { From 4e2d93eaa89ced9825c34a6ff6e20c65c9f0b4e6 Mon Sep 17 00:00:00 2001 From: glaksh100 Date: Tue, 10 Mar 2020 11:35:14 -0700 Subject: [PATCH 25/41] Handle edge case for jobId --- pkg/controller/flink/flink.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pkg/controller/flink/flink.go b/pkg/controller/flink/flink.go index 7821923c..1e4eb90c 100644 --- a/pkg/controller/flink/flink.go +++ b/pkg/controller/flink/flink.go @@ -677,7 +677,12 @@ func (f *Controller) UpdateLatestClusterStatus(ctx context.Context, app *v1beta2 } func (f *Controller) GetLatestJobID(ctx context.Context, application *v1beta2.FlinkApplication) string { - return application.Status.VersionStatuses[getCurrentStatusIndex(application)].JobStatus.JobID + jobId := application.Status.VersionStatuses[getCurrentStatusIndex(application)].JobStatus.JobID + // TODO Remove when all applications have moved to v1beta2 + if jobId == "" && application.Status.JobStatus != (v1beta2.FlinkJobStatus{}) { + jobId = application.Status.JobStatus.JobID + } + return jobId } func (f *Controller) UpdateLatestJobID(ctx context.Context, app *v1beta2.FlinkApplication, jobID string) { From 6ef5216d5f96bea59d410f295407bf9486ec521f Mon Sep 17 00:00:00 2001 From: glaksh100 Date: Tue, 10 Mar 2020 11:53:52 -0700 Subject: [PATCH 26/41] Debug --- pkg/controller/flink/flink.go | 4 ++-- .../flinkapplication/flink_state_machine.go | 13 +++++++++---- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/pkg/controller/flink/flink.go b/pkg/controller/flink/flink.go index 1e4eb90c..d9e47387 100644 --- a/pkg/controller/flink/flink.go +++ b/pkg/controller/flink/flink.go @@ -223,7 +223,7 @@ func (f *Controller) GetJobsForApplication(ctx context.Context, application *v1b } func (f *Controller) GetJobForApplication(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (*client.FlinkJobOverview, error) { - if application.Status.VersionStatuses[getCurrentStatusIndex(application)].JobStatus.JobID == "" { + if f.GetLatestJobID(ctx, application) == "" { return nil, nil } @@ -570,7 +570,7 @@ func (f *Controller) CompareAndUpdateJobStatus(ctx context.Context, app *v1beta2 app.Status.VersionStatuses[currIndex].JobStatus.JobID = oldJobStatus.JobID jobResponse, err := f.flinkClient.GetJobOverview(ctx, getURLFromApp(app, hash), f.GetLatestJobID(ctx, app)) if err != nil { - return false, err + return false, fmt.Errorf("Error in Job Status Update!!!",err) } checkpoints, err := f.flinkClient.GetCheckpointCounts(ctx, getURLFromApp(app, hash), f.GetLatestJobID(ctx, app)) if err != nil { diff --git a/pkg/controller/flinkapplication/flink_state_machine.go b/pkg/controller/flinkapplication/flink_state_machine.go index 535a40b6..05efd7bb 100644 --- a/pkg/controller/flinkapplication/flink_state_machine.go +++ b/pkg/controller/flinkapplication/flink_state_machine.go @@ -155,7 +155,10 @@ func (s *FlinkStateMachine) handle(ctx context.Context, application *v1beta2.Fli appPhase := application.Status.Phase // initialize application status array if it's not yet been initialized s.initializeAppStatusIfEmpty(ctx, application) - + //jobId := s.flinkController.GetLatestJobID(ctx, application) + //logger.Errorf(ctx, "JOB ID!!!", jobId) + jobOverview, _ := s.flinkController.GetJobForApplication(ctx, application, application.Status.DeployHash) + logger.Errorf(ctx, "JOB ID!!!", jobOverview.JobID) if !application.ObjectMeta.DeletionTimestamp.IsZero() && appPhase != v1beta2.FlinkApplicationDeleting { s.updateApplicationPhase(application, v1beta2.FlinkApplicationDeleting) // Always perform a single application update per callback @@ -417,10 +420,11 @@ func (s *FlinkStateMachine) submitJobIfNeeded(ctx context.Context, app *v1beta2. if err := s.addFinalizerIfMissing(ctx, app, jobFinalizer); err != nil { return "", err } - + jobId := s.flinkController.GetLatestJobID(ctx, app) + logger.Errorf(ctx, "JOB ID!!!", jobId) // Check if the job id has already been set on our application - if s.flinkController.GetLatestJobID(ctx, app) != "" { - return s.flinkController.GetLatestJobID(ctx, app), nil + if jobId!= "" { + return jobId, nil } // Check that there are no jobs running before starting the job @@ -536,6 +540,7 @@ func (s *FlinkStateMachine) handleSubmittingJob(ctx context.Context, app *v1beta // get the state of the current application job, err := s.flinkController.GetJobForApplication(ctx, app, hash) + logger.Errorf(ctx, "Handle submitting job ID", job) if err != nil { return statusUnchanged, err } From facab3469d3e1f1e1afebca979f74367e26c3ba1 Mon Sep 17 00:00:00 2001 From: glaksh100 Date: Tue, 10 Mar 2020 13:05:59 -0700 Subject: [PATCH 27/41] Debug --- pkg/controller/flink/flink.go | 2 ++ pkg/controller/flinkapplication/flink_state_machine.go | 7 +------ 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/pkg/controller/flink/flink.go b/pkg/controller/flink/flink.go index d9e47387..ea646c90 100644 --- a/pkg/controller/flink/flink.go +++ b/pkg/controller/flink/flink.go @@ -223,11 +223,13 @@ func (f *Controller) GetJobsForApplication(ctx context.Context, application *v1b } func (f *Controller) GetJobForApplication(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (*client.FlinkJobOverview, error) { + logger.Infof(ctx, "GetJobForApplication", f.GetLatestJobID(ctx, application)) if f.GetLatestJobID(ctx, application) == "" { return nil, nil } jobResponse, err := f.flinkClient.GetJobOverview(ctx, getURLFromApp(application, hash), f.GetLatestJobID(ctx, application)) + logger.Infof(ctx, "GetJobOverview URL", getURLFromApp(application, hash)) if err != nil { return nil, err } diff --git a/pkg/controller/flinkapplication/flink_state_machine.go b/pkg/controller/flinkapplication/flink_state_machine.go index 05efd7bb..280c767f 100644 --- a/pkg/controller/flinkapplication/flink_state_machine.go +++ b/pkg/controller/flinkapplication/flink_state_machine.go @@ -155,10 +155,6 @@ func (s *FlinkStateMachine) handle(ctx context.Context, application *v1beta2.Fli appPhase := application.Status.Phase // initialize application status array if it's not yet been initialized s.initializeAppStatusIfEmpty(ctx, application) - //jobId := s.flinkController.GetLatestJobID(ctx, application) - //logger.Errorf(ctx, "JOB ID!!!", jobId) - jobOverview, _ := s.flinkController.GetJobForApplication(ctx, application, application.Status.DeployHash) - logger.Errorf(ctx, "JOB ID!!!", jobOverview.JobID) if !application.ObjectMeta.DeletionTimestamp.IsZero() && appPhase != v1beta2.FlinkApplicationDeleting { s.updateApplicationPhase(application, v1beta2.FlinkApplicationDeleting) // Always perform a single application update per callback @@ -421,7 +417,7 @@ func (s *FlinkStateMachine) submitJobIfNeeded(ctx context.Context, app *v1beta2. return "", err } jobId := s.flinkController.GetLatestJobID(ctx, app) - logger.Errorf(ctx, "JOB ID!!!", jobId) + logger.Errorf(ctx, "Inside submitjobIfneeded") // Check if the job id has already been set on our application if jobId!= "" { return jobId, nil @@ -540,7 +536,6 @@ func (s *FlinkStateMachine) handleSubmittingJob(ctx context.Context, app *v1beta // get the state of the current application job, err := s.flinkController.GetJobForApplication(ctx, app, hash) - logger.Errorf(ctx, "Handle submitting job ID", job) if err != nil { return statusUnchanged, err } From dc630578969161bbffe41229856e07433dd97040 Mon Sep 17 00:00:00 2001 From: glaksh100 Date: Tue, 10 Mar 2020 13:35:23 -0700 Subject: [PATCH 28/41] fixes --- pkg/controller/flink/flink.go | 16 ++++++---------- pkg/controller/flink/mock/mock_flink.go | 2 +- .../flinkapplication/flink_state_machine.go | 2 ++ pkg/controller/k8/cluster.go | 1 - 4 files changed, 9 insertions(+), 12 deletions(-) diff --git a/pkg/controller/flink/flink.go b/pkg/controller/flink/flink.go index ea646c90..29abfdf4 100644 --- a/pkg/controller/flink/flink.go +++ b/pkg/controller/flink/flink.go @@ -239,16 +239,16 @@ func (f *Controller) GetJobForApplication(ctx context.Context, application *v1be // The operator for now assumes and is intended to run single application per Flink Cluster. // Once we move to run multiple applications, this has to be removed/updated -func (f *Controller) getJobIDForApplication(application *v1beta2.FlinkApplication) (string, error) { - if application.Status.VersionStatuses[getCurrentStatusIndex(application)].JobStatus.JobID != "" { - return application.Status.VersionStatuses[getCurrentStatusIndex(application)].JobStatus.JobID, nil +func (f *Controller) getJobIDForApplication(ctx context.Context, application *v1beta2.FlinkApplication) (string, error) { + if f.GetLatestJobID(ctx, application) != "" { + return f.GetLatestJobID(ctx, application), nil } return "", errors.New("active job id not available") } func (f *Controller) CancelWithSavepoint(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (string, error) { - jobID, err := f.getJobIDForApplication(application) + jobID, err := f.getJobIDForApplication(ctx, application) if err != nil { return "", err } @@ -256,7 +256,7 @@ func (f *Controller) CancelWithSavepoint(ctx context.Context, application *v1bet } func (f *Controller) ForceCancel(ctx context.Context, application *v1beta2.FlinkApplication, hash string) error { - jobID, err := f.getJobIDForApplication(application) + jobID, err := f.getJobIDForApplication(ctx, application) if err != nil { return err } @@ -314,7 +314,7 @@ func (f *Controller) StartFlinkJob(ctx context.Context, application *v1beta2.Fli } func (f *Controller) GetSavepointStatus(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (*client.SavepointResponse, error) { - jobID, err := f.getJobIDForApplication(application) + jobID, err := f.getJobIDForApplication(ctx, application) if err != nil { return nil, err } @@ -680,10 +680,6 @@ func (f *Controller) UpdateLatestClusterStatus(ctx context.Context, app *v1beta2 func (f *Controller) GetLatestJobID(ctx context.Context, application *v1beta2.FlinkApplication) string { jobId := application.Status.VersionStatuses[getCurrentStatusIndex(application)].JobStatus.JobID - // TODO Remove when all applications have moved to v1beta2 - if jobId == "" && application.Status.JobStatus != (v1beta2.FlinkJobStatus{}) { - jobId = application.Status.JobStatus.JobID - } return jobId } diff --git a/pkg/controller/flink/mock/mock_flink.go b/pkg/controller/flink/mock/mock_flink.go index 12f8ffbe..0e931745 100644 --- a/pkg/controller/flink/mock/mock_flink.go +++ b/pkg/controller/flink/mock/mock_flink.go @@ -185,7 +185,7 @@ func (m *FlinkController) GetLatestJobStatus(ctx context.Context, application *v return application.Status.VersionStatuses[getCurrentStatusIndex(application)].JobStatus } -func (m *FlinkController) GetLatestJobID(ctx context.Context, application *v1beta2.FlinkApplication) string { +func (m *FlinkController) GetLatestJobID(app *v1beta2.FlinkApplication) string { if m.GetLatestClusterStatusFunc != nil { return m.GetLatestJobIDFunc(ctx, application) } diff --git a/pkg/controller/flinkapplication/flink_state_machine.go b/pkg/controller/flinkapplication/flink_state_machine.go index 280c767f..d38219d3 100644 --- a/pkg/controller/flinkapplication/flink_state_machine.go +++ b/pkg/controller/flinkapplication/flink_state_machine.go @@ -492,6 +492,8 @@ func (s *FlinkStateMachine) handleSubmittingJob(ctx context.Context, app *v1beta } // switch the service to point to the new jobmanager + // Clear job ID before this + s.flinkController.UpdateLatestJobID(ctx, app, "") hash := flink.HashForApplication(app) err := s.updateGenericService(ctx, app, hash) if err != nil { diff --git a/pkg/controller/k8/cluster.go b/pkg/controller/k8/cluster.go index c270c5bf..1c4db4fd 100644 --- a/pkg/controller/k8/cluster.go +++ b/pkg/controller/k8/cluster.go @@ -201,7 +201,6 @@ func (k *Cluster) UpdateK8Object(ctx context.Context, object runtime.Object) err func (k *Cluster) UpdateStatus(ctx context.Context, object runtime.Object) error { objectCopy := object.DeepCopyObject() - logger.Debugf(ctx, "Version %s", objectCopy.GetObjectKind().GroupVersionKind().Version) err := k.client.Status().Update(ctx, objectCopy) if err != nil { if errors.IsInvalid(err) { From 8d33782396a54f09a37b76dec7b039e3002f56ec Mon Sep 17 00:00:00 2001 From: glaksh100 Date: Tue, 10 Mar 2020 14:24:43 -0700 Subject: [PATCH 29/41] Fix edge case --- pkg/controller/flink/flink.go | 5 +---- .../flinkapplication/flink_state_machine.go | 17 ++++++++++------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/pkg/controller/flink/flink.go b/pkg/controller/flink/flink.go index 29abfdf4..57b656e1 100644 --- a/pkg/controller/flink/flink.go +++ b/pkg/controller/flink/flink.go @@ -223,13 +223,11 @@ func (f *Controller) GetJobsForApplication(ctx context.Context, application *v1b } func (f *Controller) GetJobForApplication(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (*client.FlinkJobOverview, error) { - logger.Infof(ctx, "GetJobForApplication", f.GetLatestJobID(ctx, application)) if f.GetLatestJobID(ctx, application) == "" { return nil, nil } jobResponse, err := f.flinkClient.GetJobOverview(ctx, getURLFromApp(application, hash), f.GetLatestJobID(ctx, application)) - logger.Infof(ctx, "GetJobOverview URL", getURLFromApp(application, hash)) if err != nil { return nil, err } @@ -679,8 +677,7 @@ func (f *Controller) UpdateLatestClusterStatus(ctx context.Context, app *v1beta2 } func (f *Controller) GetLatestJobID(ctx context.Context, application *v1beta2.FlinkApplication) string { - jobId := application.Status.VersionStatuses[getCurrentStatusIndex(application)].JobStatus.JobID - return jobId + return application.Status.VersionStatuses[getCurrentStatusIndex(application)].JobStatus.JobID } func (f *Controller) UpdateLatestJobID(ctx context.Context, app *v1beta2.FlinkApplication, jobID string) { diff --git a/pkg/controller/flinkapplication/flink_state_machine.go b/pkg/controller/flinkapplication/flink_state_machine.go index d38219d3..01ebff3c 100644 --- a/pkg/controller/flinkapplication/flink_state_machine.go +++ b/pkg/controller/flinkapplication/flink_state_machine.go @@ -155,6 +155,7 @@ func (s *FlinkStateMachine) handle(ctx context.Context, application *v1beta2.Fli appPhase := application.Status.Phase // initialize application status array if it's not yet been initialized s.initializeAppStatusIfEmpty(ctx, application) + if !application.ObjectMeta.DeletionTimestamp.IsZero() && appPhase != v1beta2.FlinkApplicationDeleting { s.updateApplicationPhase(application, v1beta2.FlinkApplicationDeleting) // Always perform a single application update per callback @@ -416,11 +417,10 @@ func (s *FlinkStateMachine) submitJobIfNeeded(ctx context.Context, app *v1beta2. if err := s.addFinalizerIfMissing(ctx, app, jobFinalizer); err != nil { return "", err } - jobId := s.flinkController.GetLatestJobID(ctx, app) - logger.Errorf(ctx, "Inside submitjobIfneeded") + // Check if the job id has already been set on our application - if jobId!= "" { - return jobId, nil + if s.flinkController.GetLatestJobID(ctx, app) != "" { + return s.flinkController.GetLatestJobID(ctx, app), nil } // Check that there are no jobs running before starting the job @@ -492,8 +492,6 @@ func (s *FlinkStateMachine) handleSubmittingJob(ctx context.Context, app *v1beta } // switch the service to point to the new jobmanager - // Clear job ID before this - s.flinkController.UpdateLatestJobID(ctx, app, "") hash := flink.HashForApplication(app) err := s.updateGenericService(ctx, app, hash) if err != nil { @@ -506,7 +504,12 @@ func (s *FlinkStateMachine) handleSubmittingJob(ctx context.Context, app *v1beta logger.Errorf(ctx, "Updating cluster status failed with error: %v", clusterErr) } - if s.flinkController.GetLatestJobID(ctx, app) == "" { + // Reset jobId if for some reason it's populated but there are no jobs running + jobs, _ := s.flinkController.GetJobsForApplication(ctx,app, hash) + if len(flink.GetActiveFlinkJobs(jobs)) == 0 { + s.flinkController.UpdateLatestJobID(ctx, app, "") + } + if s.flinkController.GetLatestJobID(ctx, app) == "" || len(flink.GetActiveFlinkJobs(jobs)) == 0 { savepointPath := "" if app.Status.DeployHash == "" { // this is the first deploy, use the user-provided savepoint From 1577b778f81d8825b0b1126a958d1d9fbe3cf5f6 Mon Sep 17 00:00:00 2001 From: glaksh100 Date: Tue, 10 Mar 2020 15:30:14 -0700 Subject: [PATCH 30/41] Fix unit tests --- pkg/controller/flink/flink.go | 2 +- pkg/controller/flink/mock/mock_flink.go | 2 +- .../flinkapplication/flink_state_machine.go | 6 +++--- .../flinkapplication/flink_state_machine_test.go | 16 +++++++++++++++- 4 files changed, 20 insertions(+), 6 deletions(-) diff --git a/pkg/controller/flink/flink.go b/pkg/controller/flink/flink.go index 57b656e1..700a9ea1 100644 --- a/pkg/controller/flink/flink.go +++ b/pkg/controller/flink/flink.go @@ -570,7 +570,7 @@ func (f *Controller) CompareAndUpdateJobStatus(ctx context.Context, app *v1beta2 app.Status.VersionStatuses[currIndex].JobStatus.JobID = oldJobStatus.JobID jobResponse, err := f.flinkClient.GetJobOverview(ctx, getURLFromApp(app, hash), f.GetLatestJobID(ctx, app)) if err != nil { - return false, fmt.Errorf("Error in Job Status Update!!!",err) + return false, err } checkpoints, err := f.flinkClient.GetCheckpointCounts(ctx, getURLFromApp(app, hash), f.GetLatestJobID(ctx, app)) if err != nil { diff --git a/pkg/controller/flink/mock/mock_flink.go b/pkg/controller/flink/mock/mock_flink.go index 0e931745..12f8ffbe 100644 --- a/pkg/controller/flink/mock/mock_flink.go +++ b/pkg/controller/flink/mock/mock_flink.go @@ -185,7 +185,7 @@ func (m *FlinkController) GetLatestJobStatus(ctx context.Context, application *v return application.Status.VersionStatuses[getCurrentStatusIndex(application)].JobStatus } -func (m *FlinkController) GetLatestJobID(app *v1beta2.FlinkApplication) string { +func (m *FlinkController) GetLatestJobID(ctx context.Context, application *v1beta2.FlinkApplication) string { if m.GetLatestClusterStatusFunc != nil { return m.GetLatestJobIDFunc(ctx, application) } diff --git a/pkg/controller/flinkapplication/flink_state_machine.go b/pkg/controller/flinkapplication/flink_state_machine.go index 01ebff3c..c0deb6fe 100644 --- a/pkg/controller/flinkapplication/flink_state_machine.go +++ b/pkg/controller/flinkapplication/flink_state_machine.go @@ -505,11 +505,11 @@ func (s *FlinkStateMachine) handleSubmittingJob(ctx context.Context, app *v1beta } // Reset jobId if for some reason it's populated but there are no jobs running - jobs, _ := s.flinkController.GetJobsForApplication(ctx,app, hash) - if len(flink.GetActiveFlinkJobs(jobs)) == 0 { + jobs, _ := s.flinkController.GetJobsForApplication(ctx, app, hash) + if s.flinkController.GetLatestJobID(ctx, app) != "" && len(flink.GetActiveFlinkJobs(jobs)) == 0 { s.flinkController.UpdateLatestJobID(ctx, app, "") } - if s.flinkController.GetLatestJobID(ctx, app) == "" || len(flink.GetActiveFlinkJobs(jobs)) == 0 { + if s.flinkController.GetLatestJobID(ctx, app) == "" { savepointPath := "" if app.Status.DeployHash == "" { // this is the first deploy, use the user-provided savepoint diff --git a/pkg/controller/flinkapplication/flink_state_machine_test.go b/pkg/controller/flinkapplication/flink_state_machine_test.go index 5549a793..145c922d 100644 --- a/pkg/controller/flinkapplication/flink_state_machine_test.go +++ b/pkg/controller/flinkapplication/flink_state_machine_test.go @@ -319,6 +319,20 @@ func TestSubmittingToRunning(t *testing.T) { return jobID, nil } + mockFlinkController.GetJobsForApplicationFunc = func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) ([]client.FlinkJob, error) { + assert.Equal(t, appHash, hash) + if startCount > 0 { + return []client.FlinkJob{ + { + JobID: jobID, + Status: client.Running, + }, + }, nil + } + return nil, nil + + } + mockK8Cluster := stateMachineForTest.k8Cluster.(*k8mock.K8Cluster) getServiceCount := 0 @@ -1027,7 +1041,7 @@ func TestRollbackWithFailFastError(t *testing.T) { getCount := 0 mockFlinkController.GetJobsForApplicationFunc = func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) ([]client.FlinkJob, error) { var res []client.FlinkJob - if getCount == 1 { + if getCount == 2 { res = []client.FlinkJob{ { JobID: "jid1", From 9e1159200e9fe140d08d0ac0dbc45f428cacb59b Mon Sep 17 00:00:00 2001 From: glaksh100 Date: Tue, 10 Mar 2020 16:05:23 -0700 Subject: [PATCH 31/41] Debug logs --- deploy/crd.yaml | 12 ++++++------ pkg/controller/flink/client/api.go | 1 + pkg/controller/flink/flink.go | 2 ++ .../flinkapplication/flink_state_machine.go | 1 + 4 files changed, 10 insertions(+), 6 deletions(-) diff --git a/deploy/crd.yaml b/deploy/crd.yaml index 2072282d..a9d6bd87 100644 --- a/deploy/crd.yaml +++ b/deploy/crd.yaml @@ -451,27 +451,27 @@ spec: - name: Application Version type: string description: The version of the Flink cluster - JSONPath: .status.appStatus[*].version + JSONPath: .status.versionStatuses[*].version - name: Cluster Health type: string description: The health of the Flink cluster - JSONPath: .status.appStatus[*].clusterStatus.health + JSONPath: .status.versionStatuses[*].clusterStatus.health - name: Job Health type: string description: The health of the Flink job - JSONPath: .status.appStatus[*].jobStatus.health + JSONPath: .status.versionStatuses[*].jobStatus.health - name: Healthy TMs type: string - JSONPath: .status.appStatus[*].clusterStatus.healthyTaskManagers + JSONPath: .status.versionStatuses[*].clusterStatus.healthyTaskManagers priority: 1 - name: Total TMs type: string - JSONPath: .status.appStatus[*].clusterStatus.numberOfTaskManagers + JSONPath: .status.versionStatuses[*].clusterStatus.numberOfTaskManagers priority: 1 - name: Job Restarts type: integer description: Number of times the job has restarted - JSONPath: .status.appStatus[*].jobStatus.jobRestartCount + JSONPath: .status.versionStatuses[*].jobStatus.jobRestartCount - name: Age type: date JSONPath: .metadata.creationTimestamp diff --git a/pkg/controller/flink/client/api.go b/pkg/controller/flink/client/api.go index 38941a38..10dab721 100644 --- a/pkg/controller/flink/client/api.go +++ b/pkg/controller/flink/client/api.go @@ -368,6 +368,7 @@ func (c *FlinkJobManagerClient) GetCheckpointCounts(ctx context.Context, url str func (c *FlinkJobManagerClient) GetJobOverview(ctx context.Context, url string, jobID string) (*FlinkJobOverview, error) { endpoint := fmt.Sprintf(url+GetJobsOverviewURL, jobID) + logger.Infof(ctx, "GetJobOverview endpoint %v", endpoint) response, err := c.executeRequest(ctx, httpGet, endpoint, nil) if err != nil { return nil, GetRetryableError(err, v1beta2.GetJobOverview, GlobalFailure, DefaultRetries) diff --git a/pkg/controller/flink/flink.go b/pkg/controller/flink/flink.go index 700a9ea1..7a267ee0 100644 --- a/pkg/controller/flink/flink.go +++ b/pkg/controller/flink/flink.go @@ -223,11 +223,13 @@ func (f *Controller) GetJobsForApplication(ctx context.Context, application *v1b } func (f *Controller) GetJobForApplication(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (*client.FlinkJobOverview, error) { + logger.Infof(ctx, "Latest Job ID: %v", f.GetLatestJobID(ctx, application)) if f.GetLatestJobID(ctx, application) == "" { return nil, nil } jobResponse, err := f.flinkClient.GetJobOverview(ctx, getURLFromApp(application, hash), f.GetLatestJobID(ctx, application)) + logger.Infof(ctx, "Response: %v", jobResponse) if err != nil { return nil, err } diff --git a/pkg/controller/flinkapplication/flink_state_machine.go b/pkg/controller/flinkapplication/flink_state_machine.go index c0deb6fe..d280a47e 100644 --- a/pkg/controller/flinkapplication/flink_state_machine.go +++ b/pkg/controller/flinkapplication/flink_state_machine.go @@ -531,6 +531,7 @@ func (s *FlinkStateMachine) handleSubmittingJob(ctx context.Context, app *v1beta } if appJobID != "" { + logger.Infof(ctx, "Updated job ID %v", appJobID) s.flinkController.UpdateLatestJobID(ctx, app, appJobID) return statusChanged, nil } From 3c4d0be5082793e3b72f5bad23d331bf9a844a1e Mon Sep 17 00:00:00 2001 From: glaksh100 Date: Tue, 10 Mar 2020 16:24:30 -0700 Subject: [PATCH 32/41] Fix overwriting of versionstatuses --- pkg/controller/flinkapplication/flink_state_machine.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pkg/controller/flinkapplication/flink_state_machine.go b/pkg/controller/flinkapplication/flink_state_machine.go index d280a47e..29d25065 100644 --- a/pkg/controller/flinkapplication/flink_state_machine.go +++ b/pkg/controller/flinkapplication/flink_state_machine.go @@ -303,10 +303,12 @@ func (s *FlinkStateMachine) initializeAppStatusIfEmpty(ctx context.Context, appl // the top-level jobStatus and clusterStatus if application.Status.JobStatus != (v1beta2.FlinkJobStatus{}) { s.flinkController.UpdateLatestJobStatus(ctx, application, application.Status.JobStatus) + application.Status.JobStatus = v1beta2.FlinkJobStatus{} } if application.Status.ClusterStatus != (v1beta2.FlinkClusterStatus{}) { s.flinkController.UpdateLatestClusterStatus(ctx, application, application.Status.ClusterStatus) + application.Status.ClusterStatus = v1beta2.FlinkClusterStatus{} } } From bd689d9ae4e143f748b128d6eeb2a221c1ae17c9 Mon Sep 17 00:00:00 2001 From: glaksh100 Date: Tue, 10 Mar 2020 16:30:26 -0700 Subject: [PATCH 33/41] Remove debug logs --- pkg/controller/flink/client/api.go | 1 - pkg/controller/flink/flink.go | 2 -- pkg/controller/flinkapplication/flink_state_machine.go | 1 - 3 files changed, 4 deletions(-) diff --git a/pkg/controller/flink/client/api.go b/pkg/controller/flink/client/api.go index 10dab721..38941a38 100644 --- a/pkg/controller/flink/client/api.go +++ b/pkg/controller/flink/client/api.go @@ -368,7 +368,6 @@ func (c *FlinkJobManagerClient) GetCheckpointCounts(ctx context.Context, url str func (c *FlinkJobManagerClient) GetJobOverview(ctx context.Context, url string, jobID string) (*FlinkJobOverview, error) { endpoint := fmt.Sprintf(url+GetJobsOverviewURL, jobID) - logger.Infof(ctx, "GetJobOverview endpoint %v", endpoint) response, err := c.executeRequest(ctx, httpGet, endpoint, nil) if err != nil { return nil, GetRetryableError(err, v1beta2.GetJobOverview, GlobalFailure, DefaultRetries) diff --git a/pkg/controller/flink/flink.go b/pkg/controller/flink/flink.go index 7a267ee0..700a9ea1 100644 --- a/pkg/controller/flink/flink.go +++ b/pkg/controller/flink/flink.go @@ -223,13 +223,11 @@ func (f *Controller) GetJobsForApplication(ctx context.Context, application *v1b } func (f *Controller) GetJobForApplication(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (*client.FlinkJobOverview, error) { - logger.Infof(ctx, "Latest Job ID: %v", f.GetLatestJobID(ctx, application)) if f.GetLatestJobID(ctx, application) == "" { return nil, nil } jobResponse, err := f.flinkClient.GetJobOverview(ctx, getURLFromApp(application, hash), f.GetLatestJobID(ctx, application)) - logger.Infof(ctx, "Response: %v", jobResponse) if err != nil { return nil, err } diff --git a/pkg/controller/flinkapplication/flink_state_machine.go b/pkg/controller/flinkapplication/flink_state_machine.go index 29d25065..8cf150a7 100644 --- a/pkg/controller/flinkapplication/flink_state_machine.go +++ b/pkg/controller/flinkapplication/flink_state_machine.go @@ -533,7 +533,6 @@ func (s *FlinkStateMachine) handleSubmittingJob(ctx context.Context, app *v1beta } if appJobID != "" { - logger.Infof(ctx, "Updated job ID %v", appJobID) s.flinkController.UpdateLatestJobID(ctx, app, appJobID) return statusChanged, nil } From 88c535d4e3b0e29d67f27fb964ab3c74272089b0 Mon Sep 17 00:00:00 2001 From: glaksh100 Date: Thu, 19 Mar 2020 10:44:48 -0700 Subject: [PATCH 34/41] Merge master --- integ/checkpoint_failure_test.go | 2 +- integ/job_cancellation_test.go | 36 ++++++------- pkg/apis/app/v1beta2/types.go | 4 ++ pkg/apis/app/v1beta2/zz_generated.deepcopy.go | 2 + .../flinkapplication/flink_state_machine.go | 5 +- .../flink_state_machine_test.go | 50 +++++++++---------- 6 files changed, 52 insertions(+), 47 deletions(-) diff --git a/integ/checkpoint_failure_test.go b/integ/checkpoint_failure_test.go index a9478c87..24137716 100644 --- a/integ/checkpoint_failure_test.go +++ b/integ/checkpoint_failure_test.go @@ -17,7 +17,7 @@ func failingJobTest(s *IntegSuite, c *C, testName string, causeFailure func()) { config, err := s.Util.ReadFlinkApplication("test_app.yaml") c.Assert(err, IsNil, Commentf("Failed to read test app yaml")) config.Name = testName + "job" - config.Spec.DeleteMode = v1beta1.DeleteModeForceCancel + config.Spec.DeleteMode = v1beta2.DeleteModeForceCancel config.ObjectMeta.Labels["integTest"] = testName diff --git a/integ/job_cancellation_test.go b/integ/job_cancellation_test.go index 6c92a3e0..7cc42d6b 100644 --- a/integ/job_cancellation_test.go +++ b/integ/job_cancellation_test.go @@ -4,13 +4,13 @@ import ( "fmt" "time" - "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta1" + "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta2" "github.com/prometheus/common/log" . "gopkg.in/check.v1" v1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) -func WaitUpdateAndValidate(c *C, s *IntegSuite, name string, updateFn func(app *v1beta1.FlinkApplication), failurePhase v1beta1.FlinkApplicationPhase) *v1beta1.FlinkApplication { +func WaitUpdateAndValidate(c *C, s *IntegSuite, name string, updateFn func(app *v1beta2.FlinkApplication), failurePhase v1beta2.FlinkApplicationPhase) *v1beta2.FlinkApplication { // update with new appln image. app, err := s.Util.Update(name, updateFn) @@ -27,7 +27,7 @@ func WaitUpdateAndValidate(c *C, s *IntegSuite, name string, updateFn func(app * time.Sleep(100 * time.Millisecond) } - c.Assert(s.Util.WaitForPhase(name, v1beta1.FlinkApplicationRunning, failurePhase), IsNil) + c.Assert(s.Util.WaitForPhase(name, v1beta2.FlinkApplicationRunning, failurePhase), IsNil) c.Assert(s.Util.WaitForAllTasksRunning(name), IsNil) // check that the new job started from an empty savepoint. @@ -59,7 +59,7 @@ func (s *IntegSuite) TestJobCancellationWithoutSavepoint(c *C) { c.Assert(err, IsNil, Commentf("Failed to read test app yaml")) config.Name = testName + "job" - config.Spec.DeleteMode = v1beta1.DeleteModeForceCancel + config.Spec.DeleteMode = v1beta2.DeleteModeForceCancel config.Spec.SavepointDisabled = true config.ObjectMeta.Labels["integTest"] = testName config.Finalizers = append(config.Finalizers, finalizer) @@ -67,7 +67,7 @@ func (s *IntegSuite) TestJobCancellationWithoutSavepoint(c *C) { c.Assert(s.Util.CreateFlinkApplication(config), IsNil, Commentf("Failed to create flink application")) - c.Assert(s.Util.WaitForPhase(config.Name, v1beta1.FlinkApplicationRunning, v1beta1.FlinkApplicationDeployFailed), IsNil) + c.Assert(s.Util.WaitForPhase(config.Name, v1beta2.FlinkApplicationRunning, v1beta2.FlinkApplicationDeployFailed), IsNil) c.Assert(s.Util.WaitForAllTasksRunning(config.Name), IsNil) pods, err := s.Util.KubeClient.CoreV1().Pods(s.Util.Namespace.Name). @@ -79,9 +79,9 @@ func (s *IntegSuite) TestJobCancellationWithoutSavepoint(c *C) { } // test updating the app with a new image - newApp := WaitUpdateAndValidate(c, s, config.Name, func(app *v1beta1.FlinkApplication) { + newApp := WaitUpdateAndValidate(c, s, config.Name, func(app *v1beta2.FlinkApplication) { app.Spec.Image = NewImage - }, v1beta1.FlinkApplicationDeployFailed) + }, v1beta2.FlinkApplicationDeployFailed) c.Assert(newApp.Spec.Image, Equals, NewImage) c.Assert(newApp.Status.SavepointPath, Equals, "") @@ -96,7 +96,7 @@ func (s *IntegSuite) TestJobCancellationWithoutSavepoint(c *C) { // cleanup c.Assert(s.Util.FlinkApps().Delete(newApp.Name, &v1.DeleteOptions{}), IsNil) - var app *v1beta1.FlinkApplication + var app *v1beta2.FlinkApplication for { app, err = s.Util.GetFlinkApplication(config.Name) c.Assert(err, IsNil) @@ -134,14 +134,14 @@ func (s *IntegSuite) TestCancelledJobWithoutSavepoint(c *C) { c.Assert(err, IsNil, Commentf("Failed to read test app yaml")) config.Name = testName + "job" - config.Spec.DeleteMode = v1beta1.DeleteModeForceCancel + config.Spec.DeleteMode = v1beta2.DeleteModeForceCancel config.Spec.SavepointDisabled = true config.ObjectMeta.Labels["integTest"] = testName c.Assert(s.Util.CreateFlinkApplication(config), IsNil, Commentf("Failed to create flink application")) - c.Assert(s.Util.WaitForPhase(config.Name, v1beta1.FlinkApplicationRunning, v1beta1.FlinkApplicationDeployFailed), IsNil) + c.Assert(s.Util.WaitForPhase(config.Name, v1beta2.FlinkApplicationRunning, v1beta2.FlinkApplicationDeployFailed), IsNil) c.Assert(s.Util.WaitForAllTasksRunning(config.Name), IsNil) currApp, _ := s.Util.GetFlinkApplication(config.Name) @@ -160,7 +160,7 @@ func (s *IntegSuite) TestCancelledJobWithoutSavepoint(c *C) { job = s.Util.GetJobOverview(currApp) c.Assert(job["status"], Equals, "CANCELED") - newApp, err := s.Util.Update(config.Name, func(app *v1beta1.FlinkApplication) { + newApp, err := s.Util.Update(config.Name, func(app *v1beta2.FlinkApplication) { app.Spec.Image = NewImage }) c.Assert(err, IsNil) @@ -177,7 +177,7 @@ func (s *IntegSuite) TestCancelledJobWithoutSavepoint(c *C) { } // we should end up in the Running of the new job - c.Assert(s.Util.WaitForPhase(newApp.Name, v1beta1.FlinkApplicationRunning, v1beta1.FlinkApplicationDeployFailed), IsNil) + c.Assert(s.Util.WaitForPhase(newApp.Name, v1beta2.FlinkApplicationRunning, v1beta2.FlinkApplicationDeployFailed), IsNil) newApp, err = s.Util.GetFlinkApplication(newApp.Name) c.Assert(err, IsNil) @@ -211,25 +211,25 @@ func (s *IntegSuite) TestJobRecoveryWithoutSavepoint(c *C) { config.Name = testName config.ObjectMeta.Labels["integTest"] = testName config.Finalizers = append(config.Finalizers, finalizer) - config.Spec.DeleteMode = v1beta1.DeleteModeForceCancel + config.Spec.DeleteMode = v1beta2.DeleteModeForceCancel config.Spec.SavepointDisabled = true c.Assert(s.Util.CreateFlinkApplication(config), IsNil, Commentf("Failed to create flink application")) - c.Assert(s.Util.WaitForPhase(config.Name, v1beta1.FlinkApplicationRunning, v1beta1.FlinkApplicationSavepointing), IsNil) + c.Assert(s.Util.WaitForPhase(config.Name, v1beta2.FlinkApplicationRunning, v1beta2.FlinkApplicationSavepointing), IsNil) c.Assert(s.Util.WaitForAllTasksRunning(config.Name), IsNil) currApp, _ := s.Util.GetFlinkApplication(config.Name) c.Assert(currApp.Status.SavepointPath, Equals, "") // Test updating the app with a bad jar name -- this should cause a failed deploy and roll back - _, err = s.Util.Update(config.Name, func(app *v1beta1.FlinkApplication) { + _, err = s.Util.Update(config.Name, func(app *v1beta2.FlinkApplication) { app.Spec.JarName = "nonexistent.jar" app.Spec.RestartNonce = "rollback" }) c.Assert(err, IsNil) - c.Assert(s.Util.WaitForPhase(config.Name, v1beta1.FlinkApplicationDeployFailed, ""), IsNil) + c.Assert(s.Util.WaitForPhase(config.Name, v1beta2.FlinkApplicationDeployFailed, ""), IsNil) c.Assert(s.Util.WaitForAllTasksRunning(config.Name), IsNil) // assert the restart of the job with a new job id and old deploy hash. @@ -249,7 +249,7 @@ func (s *IntegSuite) TestJobRecoveryWithoutSavepoint(c *C) { c.Assert(restored, IsNil) // roll forward with the right config. - _ = WaitUpdateAndValidate(c, s, config.Name, func(app *v1beta1.FlinkApplication) { + _ = WaitUpdateAndValidate(c, s, config.Name, func(app *v1beta2.FlinkApplication) { app.Spec.JarName = config.Spec.JarName app.Spec.RestartNonce = "rollback2" app.Spec.Image = NewImage @@ -265,7 +265,7 @@ func (s *IntegSuite) TestJobRecoveryWithoutSavepoint(c *C) { // delete the application and ensure everything is cleaned up successfully c.Assert(s.Util.FlinkApps().Delete(config.Name, &v1.DeleteOptions{}), IsNil) - var app *v1beta1.FlinkApplication + var app *v1beta2.FlinkApplication for { app, err = s.Util.GetFlinkApplication(config.Name) c.Assert(err, IsNil) diff --git a/pkg/apis/app/v1beta2/types.go b/pkg/apis/app/v1beta2/types.go index 26787158..b2846e4c 100644 --- a/pkg/apis/app/v1beta2/types.go +++ b/pkg/apis/app/v1beta2/types.go @@ -4,6 +4,7 @@ import ( "fmt" apiv1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) @@ -43,6 +44,7 @@ type FlinkApplicationSpec struct { // Deprecated: use SavepointPath instead SavepointInfo SavepointInfo `json:"savepointInfo,omitempty"` SavepointPath string `json:"savepointPath,omitempty"` + SavepointDisabled bool `json:"savepointDisabled"` DeploymentMode DeploymentMode `json:"deploymentMode,omitempty"` RPCPort *int32 `json:"rpcPort,omitempty"` BlobPort *int32 `json:"blobPort,omitempty"` @@ -236,6 +238,7 @@ const ( FlinkApplicationSubmittingJob FlinkApplicationPhase = "SubmittingJob" FlinkApplicationRunning FlinkApplicationPhase = "Running" FlinkApplicationSavepointing FlinkApplicationPhase = "Savepointing" + FlinkApplicationCancelling FlinkApplicationPhase = "Cancelling" FlinkApplicationDeleting FlinkApplicationPhase = "Deleting" FlinkApplicationRecovering FlinkApplicationPhase = "Recovering" FlinkApplicationRollingBackJob FlinkApplicationPhase = "RollingBackJob" @@ -251,6 +254,7 @@ var FlinkApplicationPhases = []FlinkApplicationPhase{ FlinkApplicationSubmittingJob, FlinkApplicationRunning, FlinkApplicationSavepointing, + FlinkApplicationCancelling, FlinkApplicationDeleting, FlinkApplicationRecovering, FlinkApplicationDeployFailed, diff --git a/pkg/apis/app/v1beta2/zz_generated.deepcopy.go b/pkg/apis/app/v1beta2/zz_generated.deepcopy.go index ec18e83c..c0c3c04c 100644 --- a/pkg/apis/app/v1beta2/zz_generated.deepcopy.go +++ b/pkg/apis/app/v1beta2/zz_generated.deepcopy.go @@ -205,6 +205,8 @@ func (in *FlinkApplicationStatus) DeepCopyInto(out *FlinkApplicationStatus) { in, out := &in.LastUpdatedAt, &out.LastUpdatedAt *out = (*in).DeepCopy() } + out.ClusterStatus = in.ClusterStatus + in.JobStatus.DeepCopyInto(&out.JobStatus) if in.VersionStatuses != nil { in, out := &in.VersionStatuses, &out.VersionStatuses *out = make([]FlinkApplicationVersionStatus, len(*in)) diff --git a/pkg/controller/flinkapplication/flink_state_machine.go b/pkg/controller/flinkapplication/flink_state_machine.go index f48f692d..5ed099d1 100644 --- a/pkg/controller/flinkapplication/flink_state_machine.go +++ b/pkg/controller/flinkapplication/flink_state_machine.go @@ -176,10 +176,9 @@ func (s *FlinkStateMachine) handle(ctx context.Context, application *v1beta2.Fli updateApplication, appErr = s.handleSubmittingJob(ctx, application) case v1beta2.FlinkApplicationRunning, v1beta2.FlinkApplicationDeployFailed: updateApplication, appErr = s.handleApplicationRunning(ctx, application) - case v1beta2.FlinkApplicationSavepointing: - case v1beta1.FlinkApplicationCancelling: + case v1beta2.FlinkApplicationCancelling: updateApplication, appErr = s.handleApplicationCancelling(ctx, application) - case v1beta1.FlinkApplicationSavepointing: + case v1beta2.FlinkApplicationSavepointing: updateApplication, appErr = s.handleApplicationSavepointing(ctx, application) case v1beta2.FlinkApplicationRecovering: updateApplication, appErr = s.handleApplicationRecovering(ctx, application) diff --git a/pkg/controller/flinkapplication/flink_state_machine_test.go b/pkg/controller/flinkapplication/flink_state_machine_test.go index 9d998707..d9d1dbc9 100644 --- a/pkg/controller/flinkapplication/flink_state_machine_test.go +++ b/pkg/controller/flinkapplication/flink_state_machine_test.go @@ -83,12 +83,12 @@ func TestHandleStartingClusterStarting(t *testing.T) { func TestHandleNewOrCreateWithSavepointDisabled(t *testing.T) { updateInvoked := false - app := v1beta1.FlinkApplication{ - Spec: v1beta1.FlinkApplicationSpec{ + app := v1beta2.FlinkApplication{ + Spec: v1beta2.FlinkApplicationSpec{ SavepointDisabled: true, }, - Status: v1beta1.FlinkApplicationStatus{ - Phase: v1beta1.FlinkApplicationClusterStarting, + Status: v1beta2.FlinkApplicationStatus{ + Phase: v1beta2.FlinkApplicationClusterStarting, DeployHash: "old-hash", }, } @@ -96,13 +96,13 @@ func TestHandleNewOrCreateWithSavepointDisabled(t *testing.T) { stateMachineForTest := getTestStateMachine() mockFlinkController := stateMachineForTest.flinkController.(*mock.FlinkController) - mockFlinkController.IsClusterReadyFunc = func(ctx context.Context, application *v1beta1.FlinkApplication) (bool, error) { + mockFlinkController.IsClusterReadyFunc = func(ctx context.Context, application *v1beta2.FlinkApplication) (bool, error) { return true, nil } - mockFlinkController.IsServiceReadyFunc = func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (b bool, e error) { + mockFlinkController.IsServiceReadyFunc = func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (b bool, e error) { return true, nil } - mockFlinkController.GetCurrentDeploymentsForAppFunc = func(ctx context.Context, application *v1beta1.FlinkApplication) (*common.FlinkDeployment, error) { + mockFlinkController.GetCurrentDeploymentsForAppFunc = func(ctx context.Context, application *v1beta2.FlinkApplication) (*common.FlinkDeployment, error) { fd := testFlinkDeployment(application) fd.Taskmanager.Status.AvailableReplicas = 2 fd.Jobmanager.Status.AvailableReplicas = 1 @@ -115,8 +115,8 @@ func TestHandleNewOrCreateWithSavepointDisabled(t *testing.T) { } mockK8Cluster.UpdateStatusFunc = func(ctx context.Context, object runtime.Object) error { - application := object.(*v1beta1.FlinkApplication) - assert.Equal(t, v1beta1.FlinkApplicationCancelling, application.Status.Phase) + application := object.(*v1beta2.FlinkApplication) + assert.Equal(t, v1beta2.FlinkApplicationCancelling, application.Status.Phase) updateInvoked = true return nil } @@ -128,12 +128,12 @@ func TestHandleNewOrCreateWithSavepointDisabled(t *testing.T) { func TestHandleApplicationCancel(t *testing.T) { jobID := "j1" - app := v1beta1.FlinkApplication{ - Spec: v1beta1.FlinkApplicationSpec{ + app := v1beta2.FlinkApplication{ + Spec: v1beta2.FlinkApplicationSpec{ SavepointDisabled: true, }, - Status: v1beta1.FlinkApplicationStatus{ - Phase: v1beta1.FlinkApplicationCancelling, + Status: v1beta2.FlinkApplicationStatus{ + Phase: v1beta2.FlinkApplicationCancelling, DeployHash: "old-hash", }, } @@ -141,7 +141,7 @@ func TestHandleApplicationCancel(t *testing.T) { cancelInvoked := false stateMachineForTest := getTestStateMachine() mockFlinkController := stateMachineForTest.flinkController.(*mock.FlinkController) - mockFlinkController.GetJobForApplicationFunc = func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (*client.FlinkJobOverview, error) { + mockFlinkController.GetJobForApplicationFunc = func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (*client.FlinkJobOverview, error) { assert.Equal(t, "old-hash", hash) return &client.FlinkJobOverview{ JobID: jobID, @@ -149,7 +149,7 @@ func TestHandleApplicationCancel(t *testing.T) { }, nil } - mockFlinkController.ForceCancelFunc = func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (e error) { + mockFlinkController.ForceCancelFunc = func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (e error) { assert.Equal(t, "old-hash", hash) cancelInvoked = true @@ -158,8 +158,8 @@ func TestHandleApplicationCancel(t *testing.T) { mockK8Cluster := stateMachineForTest.k8Cluster.(*k8mock.K8Cluster) mockK8Cluster.UpdateStatusFunc = func(ctx context.Context, object runtime.Object) error { - application := object.(*v1beta1.FlinkApplication) - assert.Equal(t, v1beta1.FlinkApplicationSubmittingJob, application.Status.Phase) + application := object.(*v1beta2.FlinkApplication) + assert.Equal(t, v1beta2.FlinkApplicationSubmittingJob, application.Status.Phase) return nil } @@ -172,14 +172,14 @@ func TestHandleApplicationCancel(t *testing.T) { func TestHandleApplicationCancelFailedWithMaxRetries(t *testing.T) { retryableErr := client.GetRetryableError(errors.New("blah"), "ForceCancelJob", "FAILED", 5) - app := v1beta1.FlinkApplication{ - Spec: v1beta1.FlinkApplicationSpec{ + app := v1beta2.FlinkApplication{ + Spec: v1beta2.FlinkApplicationSpec{ SavepointDisabled: true, }, - Status: v1beta1.FlinkApplicationStatus{ - Phase: v1beta1.FlinkApplicationCancelling, + Status: v1beta2.FlinkApplicationStatus{ + Phase: v1beta2.FlinkApplicationCancelling, DeployHash: "old-hash", - LastSeenError: retryableErr.(*v1beta1.FlinkApplicationError), + LastSeenError: retryableErr.(*v1beta2.FlinkApplicationError), }, } @@ -187,7 +187,7 @@ func TestHandleApplicationCancelFailedWithMaxRetries(t *testing.T) { updateInvoked := false stateMachineForTest := getTestStateMachine() mockFlinkController := stateMachineForTest.flinkController.(*mock.FlinkController) - mockFlinkController.ForceCancelFunc = func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) error { + mockFlinkController.ForceCancelFunc = func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) error { // given we maxed out on retries, we should never have come here assert.False(t, true) return nil @@ -196,8 +196,8 @@ func TestHandleApplicationCancelFailedWithMaxRetries(t *testing.T) { mockK8Cluster := stateMachineForTest.k8Cluster.(*k8mock.K8Cluster) mockK8Cluster.UpdateStatusFunc = func(ctx context.Context, object runtime.Object) error { updateInvoked = true - application := object.(*v1beta1.FlinkApplication) - assert.Equal(t, v1beta1.FlinkApplicationRollingBackJob, application.Status.Phase) + application := object.(*v1beta2.FlinkApplication) + assert.Equal(t, v1beta2.FlinkApplicationRollingBackJob, application.Status.Phase) return nil } From 9e0a682816efeee3f6a9b6acde19ca1c8403e078 Mon Sep 17 00:00:00 2001 From: glaksh100 Date: Thu, 19 Mar 2020 10:51:37 -0700 Subject: [PATCH 35/41] Revert CRD upgrade --- deploy/crd.yaml | 7 +- integ/checkpoint_failure_test.go | 8 +- integ/job_cancellation_test.go | 36 +- integ/simple_test.go | 44 +- integ/utils/utils.go | 6 +- pkg/apis/app/addtoscheme_v1beta2.go | 4 +- pkg/apis/app/v1beta1/types.go | 67 ++- pkg/apis/app/v1beta1/zz_generated.deepcopy.go | 25 + pkg/apis/app/v1beta2/doc.go | 3 - pkg/apis/app/v1beta2/register.go | 42 -- pkg/apis/app/v1beta2/types.go | 353 -------------- pkg/apis/app/v1beta2/zz_generated.deepcopy.go | 407 ---------------- pkg/client/clientset/versioned/clientset.go | 14 - .../versioned/fake/clientset_generated.go | 7 - .../clientset/versioned/fake/register.go | 2 - .../clientset/versioned/scheme/register.go | 2 - .../versioned/typed/app/v1beta2/app_client.go | 6 +- .../versioned/typed/app/v1beta2/doc.go | 2 +- .../typed/app/v1beta2/fake/fake_app_client.go | 4 +- .../app/v1beta2/fake/fake_flinkapplication.go | 42 +- .../typed/app/v1beta2/flinkapplication.go | 34 +- .../typed/app/v1beta2/generated_expansion.go | 2 +- pkg/controller/flink/client/api.go | 62 +-- pkg/controller/flink/client/api_test.go | 6 +- pkg/controller/flink/client/error_handler.go | 20 +- pkg/controller/flink/config.go | 34 +- pkg/controller/flink/config_test.go | 32 +- pkg/controller/flink/container_utils.go | 22 +- pkg/controller/flink/container_utils_test.go | 8 +- pkg/controller/flink/flink.go | 124 ++--- pkg/controller/flink/flink_test.go | 54 +-- pkg/controller/flink/ingress.go | 2 +- .../flink/job_manager_controller.go | 28 +- .../flink/job_manager_controller_test.go | 12 +- pkg/controller/flink/mock/mock_flink.go | 90 ++-- .../flink/mock/mock_job_manager_controller.go | 4 +- .../mock/mock_task_manager_controller.go | 6 +- .../flink/task_manager_controller.go | 24 +- .../flink/task_manager_controller_test.go | 8 +- pkg/controller/flinkapplication/controller.go | 14 +- .../flinkapplication/flink_state_machine.go | 146 +++--- .../flink_state_machine_test.go | 456 +++++++++--------- pkg/controller/k8/cluster.go | 2 +- tmp/codegen/update-generated.sh | 2 +- 44 files changed, 751 insertions(+), 1522 deletions(-) delete mode 100644 pkg/apis/app/v1beta2/doc.go delete mode 100644 pkg/apis/app/v1beta2/register.go delete mode 100644 pkg/apis/app/v1beta2/types.go delete mode 100644 pkg/apis/app/v1beta2/zz_generated.deepcopy.go diff --git a/deploy/crd.yaml b/deploy/crd.yaml index f687f054..5bd941ae 100644 --- a/deploy/crd.yaml +++ b/deploy/crd.yaml @@ -12,14 +12,11 @@ spec: shortNames: - flinkapp scope: Namespaced - version: v1beta2 + version: v1beta1 versions: - - name: v1beta2 - served: true - storage: true - name: v1beta1 served: true - storage: false + storage: true - name: v1alpha1 served: true storage: false diff --git a/integ/checkpoint_failure_test.go b/integ/checkpoint_failure_test.go index 24137716..0f97e2bd 100644 --- a/integ/checkpoint_failure_test.go +++ b/integ/checkpoint_failure_test.go @@ -6,7 +6,7 @@ import ( "os" "time" - "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta2" + "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta1" "github.com/prometheus/common/log" . "gopkg.in/check.v1" v1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -17,7 +17,7 @@ func failingJobTest(s *IntegSuite, c *C, testName string, causeFailure func()) { config, err := s.Util.ReadFlinkApplication("test_app.yaml") c.Assert(err, IsNil, Commentf("Failed to read test app yaml")) config.Name = testName + "job" - config.Spec.DeleteMode = v1beta2.DeleteModeForceCancel + config.Spec.DeleteMode = v1beta1.DeleteModeForceCancel config.ObjectMeta.Labels["integTest"] = testName @@ -27,7 +27,7 @@ func failingJobTest(s *IntegSuite, c *C, testName string, causeFailure func()) { // Cause it to fail causeFailure() - c.Assert(s.Util.WaitForPhase(config.Name, v1beta2.FlinkApplicationRunning, v1beta2.FlinkApplicationDeployFailed), IsNil) + c.Assert(s.Util.WaitForPhase(config.Name, v1beta1.FlinkApplicationRunning, v1beta1.FlinkApplicationDeployFailed), IsNil) // wait a bit for it to start failing time.Sleep(5 * time.Second) @@ -40,7 +40,7 @@ func failingJobTest(s *IntegSuite, c *C, testName string, causeFailure func()) { c.Assert(err, IsNil) // because the checkpoint will fail, the app should move to deploy failed - c.Assert(s.Util.WaitForPhase(config.Name, v1beta2.FlinkApplicationDeployFailed), IsNil) + c.Assert(s.Util.WaitForPhase(config.Name, v1beta1.FlinkApplicationDeployFailed), IsNil) // And the job should not have been updated newApp, err := s.Util.GetFlinkApplication(config.Name) diff --git a/integ/job_cancellation_test.go b/integ/job_cancellation_test.go index 7cc42d6b..6c92a3e0 100644 --- a/integ/job_cancellation_test.go +++ b/integ/job_cancellation_test.go @@ -4,13 +4,13 @@ import ( "fmt" "time" - "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta2" + "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta1" "github.com/prometheus/common/log" . "gopkg.in/check.v1" v1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) -func WaitUpdateAndValidate(c *C, s *IntegSuite, name string, updateFn func(app *v1beta2.FlinkApplication), failurePhase v1beta2.FlinkApplicationPhase) *v1beta2.FlinkApplication { +func WaitUpdateAndValidate(c *C, s *IntegSuite, name string, updateFn func(app *v1beta1.FlinkApplication), failurePhase v1beta1.FlinkApplicationPhase) *v1beta1.FlinkApplication { // update with new appln image. app, err := s.Util.Update(name, updateFn) @@ -27,7 +27,7 @@ func WaitUpdateAndValidate(c *C, s *IntegSuite, name string, updateFn func(app * time.Sleep(100 * time.Millisecond) } - c.Assert(s.Util.WaitForPhase(name, v1beta2.FlinkApplicationRunning, failurePhase), IsNil) + c.Assert(s.Util.WaitForPhase(name, v1beta1.FlinkApplicationRunning, failurePhase), IsNil) c.Assert(s.Util.WaitForAllTasksRunning(name), IsNil) // check that the new job started from an empty savepoint. @@ -59,7 +59,7 @@ func (s *IntegSuite) TestJobCancellationWithoutSavepoint(c *C) { c.Assert(err, IsNil, Commentf("Failed to read test app yaml")) config.Name = testName + "job" - config.Spec.DeleteMode = v1beta2.DeleteModeForceCancel + config.Spec.DeleteMode = v1beta1.DeleteModeForceCancel config.Spec.SavepointDisabled = true config.ObjectMeta.Labels["integTest"] = testName config.Finalizers = append(config.Finalizers, finalizer) @@ -67,7 +67,7 @@ func (s *IntegSuite) TestJobCancellationWithoutSavepoint(c *C) { c.Assert(s.Util.CreateFlinkApplication(config), IsNil, Commentf("Failed to create flink application")) - c.Assert(s.Util.WaitForPhase(config.Name, v1beta2.FlinkApplicationRunning, v1beta2.FlinkApplicationDeployFailed), IsNil) + c.Assert(s.Util.WaitForPhase(config.Name, v1beta1.FlinkApplicationRunning, v1beta1.FlinkApplicationDeployFailed), IsNil) c.Assert(s.Util.WaitForAllTasksRunning(config.Name), IsNil) pods, err := s.Util.KubeClient.CoreV1().Pods(s.Util.Namespace.Name). @@ -79,9 +79,9 @@ func (s *IntegSuite) TestJobCancellationWithoutSavepoint(c *C) { } // test updating the app with a new image - newApp := WaitUpdateAndValidate(c, s, config.Name, func(app *v1beta2.FlinkApplication) { + newApp := WaitUpdateAndValidate(c, s, config.Name, func(app *v1beta1.FlinkApplication) { app.Spec.Image = NewImage - }, v1beta2.FlinkApplicationDeployFailed) + }, v1beta1.FlinkApplicationDeployFailed) c.Assert(newApp.Spec.Image, Equals, NewImage) c.Assert(newApp.Status.SavepointPath, Equals, "") @@ -96,7 +96,7 @@ func (s *IntegSuite) TestJobCancellationWithoutSavepoint(c *C) { // cleanup c.Assert(s.Util.FlinkApps().Delete(newApp.Name, &v1.DeleteOptions{}), IsNil) - var app *v1beta2.FlinkApplication + var app *v1beta1.FlinkApplication for { app, err = s.Util.GetFlinkApplication(config.Name) c.Assert(err, IsNil) @@ -134,14 +134,14 @@ func (s *IntegSuite) TestCancelledJobWithoutSavepoint(c *C) { c.Assert(err, IsNil, Commentf("Failed to read test app yaml")) config.Name = testName + "job" - config.Spec.DeleteMode = v1beta2.DeleteModeForceCancel + config.Spec.DeleteMode = v1beta1.DeleteModeForceCancel config.Spec.SavepointDisabled = true config.ObjectMeta.Labels["integTest"] = testName c.Assert(s.Util.CreateFlinkApplication(config), IsNil, Commentf("Failed to create flink application")) - c.Assert(s.Util.WaitForPhase(config.Name, v1beta2.FlinkApplicationRunning, v1beta2.FlinkApplicationDeployFailed), IsNil) + c.Assert(s.Util.WaitForPhase(config.Name, v1beta1.FlinkApplicationRunning, v1beta1.FlinkApplicationDeployFailed), IsNil) c.Assert(s.Util.WaitForAllTasksRunning(config.Name), IsNil) currApp, _ := s.Util.GetFlinkApplication(config.Name) @@ -160,7 +160,7 @@ func (s *IntegSuite) TestCancelledJobWithoutSavepoint(c *C) { job = s.Util.GetJobOverview(currApp) c.Assert(job["status"], Equals, "CANCELED") - newApp, err := s.Util.Update(config.Name, func(app *v1beta2.FlinkApplication) { + newApp, err := s.Util.Update(config.Name, func(app *v1beta1.FlinkApplication) { app.Spec.Image = NewImage }) c.Assert(err, IsNil) @@ -177,7 +177,7 @@ func (s *IntegSuite) TestCancelledJobWithoutSavepoint(c *C) { } // we should end up in the Running of the new job - c.Assert(s.Util.WaitForPhase(newApp.Name, v1beta2.FlinkApplicationRunning, v1beta2.FlinkApplicationDeployFailed), IsNil) + c.Assert(s.Util.WaitForPhase(newApp.Name, v1beta1.FlinkApplicationRunning, v1beta1.FlinkApplicationDeployFailed), IsNil) newApp, err = s.Util.GetFlinkApplication(newApp.Name) c.Assert(err, IsNil) @@ -211,25 +211,25 @@ func (s *IntegSuite) TestJobRecoveryWithoutSavepoint(c *C) { config.Name = testName config.ObjectMeta.Labels["integTest"] = testName config.Finalizers = append(config.Finalizers, finalizer) - config.Spec.DeleteMode = v1beta2.DeleteModeForceCancel + config.Spec.DeleteMode = v1beta1.DeleteModeForceCancel config.Spec.SavepointDisabled = true c.Assert(s.Util.CreateFlinkApplication(config), IsNil, Commentf("Failed to create flink application")) - c.Assert(s.Util.WaitForPhase(config.Name, v1beta2.FlinkApplicationRunning, v1beta2.FlinkApplicationSavepointing), IsNil) + c.Assert(s.Util.WaitForPhase(config.Name, v1beta1.FlinkApplicationRunning, v1beta1.FlinkApplicationSavepointing), IsNil) c.Assert(s.Util.WaitForAllTasksRunning(config.Name), IsNil) currApp, _ := s.Util.GetFlinkApplication(config.Name) c.Assert(currApp.Status.SavepointPath, Equals, "") // Test updating the app with a bad jar name -- this should cause a failed deploy and roll back - _, err = s.Util.Update(config.Name, func(app *v1beta2.FlinkApplication) { + _, err = s.Util.Update(config.Name, func(app *v1beta1.FlinkApplication) { app.Spec.JarName = "nonexistent.jar" app.Spec.RestartNonce = "rollback" }) c.Assert(err, IsNil) - c.Assert(s.Util.WaitForPhase(config.Name, v1beta2.FlinkApplicationDeployFailed, ""), IsNil) + c.Assert(s.Util.WaitForPhase(config.Name, v1beta1.FlinkApplicationDeployFailed, ""), IsNil) c.Assert(s.Util.WaitForAllTasksRunning(config.Name), IsNil) // assert the restart of the job with a new job id and old deploy hash. @@ -249,7 +249,7 @@ func (s *IntegSuite) TestJobRecoveryWithoutSavepoint(c *C) { c.Assert(restored, IsNil) // roll forward with the right config. - _ = WaitUpdateAndValidate(c, s, config.Name, func(app *v1beta2.FlinkApplication) { + _ = WaitUpdateAndValidate(c, s, config.Name, func(app *v1beta1.FlinkApplication) { app.Spec.JarName = config.Spec.JarName app.Spec.RestartNonce = "rollback2" app.Spec.Image = NewImage @@ -265,7 +265,7 @@ func (s *IntegSuite) TestJobRecoveryWithoutSavepoint(c *C) { // delete the application and ensure everything is cleaned up successfully c.Assert(s.Util.FlinkApps().Delete(config.Name, &v1.DeleteOptions{}), IsNil) - var app *v1beta2.FlinkApplication + var app *v1beta1.FlinkApplication for { app, err = s.Util.GetFlinkApplication(config.Name) c.Assert(err, IsNil) diff --git a/integ/simple_test.go b/integ/simple_test.go index 1780c454..13de736a 100644 --- a/integ/simple_test.go +++ b/integ/simple_test.go @@ -7,7 +7,7 @@ import ( "os" "time" - "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta2" + "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta1" "github.com/lyft/flinkk8soperator/pkg/controller/flink/client" "github.com/prometheus/common/log" . "gopkg.in/check.v1" @@ -17,12 +17,12 @@ import ( const NewImage = "lyft/operator-test-app:b1b3cb8e8f98bd41f44f9c89f8462ce255e0d13f.2" -func updateAndValidate(c *C, s *IntegSuite, name string, updateFn func(app *v1beta2.FlinkApplication), failurePhase v1beta2.FlinkApplicationPhase) *v1beta2.FlinkApplication { +func updateAndValidate(c *C, s *IntegSuite, name string, updateFn func(app *v1beta1.FlinkApplication), failurePhase v1beta1.FlinkApplicationPhase) *v1beta1.FlinkApplication { app, err := s.Util.Update(name, updateFn) c.Assert(err, IsNil) - c.Assert(s.Util.WaitForPhase(name, v1beta2.FlinkApplicationSavepointing, failurePhase), IsNil) - c.Assert(s.Util.WaitForPhase(name, v1beta2.FlinkApplicationRunning, failurePhase), IsNil) + c.Assert(s.Util.WaitForPhase(name, v1beta1.FlinkApplicationSavepointing, failurePhase), IsNil) + c.Assert(s.Util.WaitForPhase(name, v1beta1.FlinkApplicationRunning, failurePhase), IsNil) c.Assert(s.Util.WaitForAllTasksRunning(name), IsNil) // check that it really updated @@ -72,7 +72,7 @@ func (s *IntegSuite) TestSimple(c *C) { c.Assert(s.Util.CreateFlinkApplication(config), IsNil, Commentf("Failed to create flink application")) - c.Assert(s.Util.WaitForPhase(config.Name, v1beta2.FlinkApplicationRunning, v1beta2.FlinkApplicationDeployFailed), IsNil) + c.Assert(s.Util.WaitForPhase(config.Name, v1beta1.FlinkApplicationRunning, v1beta1.FlinkApplicationDeployFailed), IsNil) c.Assert(s.Util.WaitForAllTasksRunning(config.Name), IsNil) pods, err := s.Util.KubeClient.CoreV1().Pods(s.Util.Namespace.Name). @@ -86,9 +86,9 @@ func (s *IntegSuite) TestSimple(c *C) { log.Info("Application started successfully") // test updating the app with a new image - newApp := updateAndValidate(c, s, config.Name, func(app *v1beta2.FlinkApplication) { + newApp := updateAndValidate(c, s, config.Name, func(app *v1beta1.FlinkApplication) { app.Spec.Image = NewImage - }, v1beta2.FlinkApplicationDeployFailed) + }, v1beta1.FlinkApplicationDeployFailed) // check that the pods have the new image c.Assert(newApp.Spec.Image, Equals, NewImage) pods, err = s.Util.KubeClient.CoreV1().Pods(s.Util.Namespace.Name). @@ -100,9 +100,9 @@ func (s *IntegSuite) TestSimple(c *C) { } // test updating the app with a config change - newApp = updateAndValidate(c, s, config.Name, func(app *v1beta2.FlinkApplication) { + newApp = updateAndValidate(c, s, config.Name, func(app *v1beta1.FlinkApplication) { app.Spec.FlinkConfig["akka.client.timeout"] = "23 s" - }, v1beta2.FlinkApplicationDeployFailed) + }, v1beta1.FlinkApplicationDeployFailed) // validate the config has been applied res, err := s.Util.FlinkAPIGet(newApp, "/jobmanager/config") c.Assert(err, IsNil) @@ -122,7 +122,7 @@ func (s *IntegSuite) TestSimple(c *C) { { log.Info("Testing rollback") - newApp, err := s.Util.Update(config.Name, func(app *v1beta2.FlinkApplication) { + newApp, err := s.Util.Update(config.Name, func(app *v1beta1.FlinkApplication) { app.Spec.JarName = "nonexistent.jar" // this shouldn't be needed after STRMCMP-473 is fixed app.Spec.RestartNonce = "rollback" @@ -130,9 +130,9 @@ func (s *IntegSuite) TestSimple(c *C) { c.Assert(err, IsNil) - c.Assert(s.Util.WaitForPhase(newApp.Name, v1beta2.FlinkApplicationSavepointing, ""), IsNil) + c.Assert(s.Util.WaitForPhase(newApp.Name, v1beta1.FlinkApplicationSavepointing, ""), IsNil) // we should end up in the DeployFailed phase - c.Assert(s.Util.WaitForPhase(newApp.Name, v1beta2.FlinkApplicationDeployFailed, ""), IsNil) + c.Assert(s.Util.WaitForPhase(newApp.Name, v1beta1.FlinkApplicationDeployFailed, ""), IsNil) log.Info("Job is in deploy failed, waiting for tasks to start") @@ -159,7 +159,7 @@ func (s *IntegSuite) TestSimple(c *C) { log.Info("Attempting to roll forward") // and we should be able to roll forward by resubmitting with a fixed config - updateAndValidate(c, s, config.Name, func(app *v1beta2.FlinkApplication) { + updateAndValidate(c, s, config.Name, func(app *v1beta1.FlinkApplication) { app.Spec.JarName = config.Spec.JarName app.Spec.RestartNonce = "rollback2" }, "") @@ -169,12 +169,12 @@ func (s *IntegSuite) TestSimple(c *C) { { log.Info("Testing force rollback") - newApp, err := s.Util.Update(config.Name, func(app *v1beta2.FlinkApplication) { + newApp, err := s.Util.Update(config.Name, func(app *v1beta1.FlinkApplication) { app.Spec.Image = "lyft/badimage:latest" }) c.Assert(err, IsNil) - c.Assert(s.Util.WaitForPhase(newApp.Name, v1beta2.FlinkApplicationClusterStarting, ""), IsNil) + c.Assert(s.Util.WaitForPhase(newApp.Name, v1beta1.FlinkApplicationClusterStarting, ""), IsNil) // User realizes error and cancels the deploy log.Infof("Cancelling deploy...") @@ -186,17 +186,17 @@ func (s *IntegSuite) TestSimple(c *C) { c.Assert(err, IsNil) // we should end up in the DeployFailed phase - c.Assert(s.Util.WaitForPhase(newApp.Name, v1beta2.FlinkApplicationDeployFailed, ""), IsNil) + c.Assert(s.Util.WaitForPhase(newApp.Name, v1beta1.FlinkApplicationDeployFailed, ""), IsNil) c.Assert(newApp.Spec.ForceRollback, Equals, true) log.Info("User cancelled deploy. Job is in deploy failed, waiting for tasks to start") // but the job should still be running - c.Assert(newApp.Status.VersionStatuses[s.Util.GetCurrentStatusIndex(newApp)].JobStatus.State, Equals, v1beta2.Running) + c.Assert(newApp.Status.VersionStatuses[s.Util.GetCurrentStatusIndex(newApp)].JobStatus.State, Equals, v1beta1.Running) log.Info("Attempting to roll forward with fix") // Fixing update // and we should be able to roll forward by resubmitting with a fixed config - updateAndValidate(c, s, config.Name, func(app *v1beta2.FlinkApplication) { + updateAndValidate(c, s, config.Name, func(app *v1beta1.FlinkApplication) { app.Spec.Image = NewImage app.Spec.RestartNonce = "rollback3" app.Spec.ForceRollback = false @@ -207,7 +207,7 @@ func (s *IntegSuite) TestSimple(c *C) { c.Assert(s.Util.FlinkApps().Delete(config.Name, &v1.DeleteOptions{}), IsNil) // validate that a savepoint was taken and the job was cancelled - var app *v1beta2.FlinkApplication + var app *v1beta1.FlinkApplication for { app, err = s.Util.GetFlinkApplication(config.Name) c.Assert(err, IsNil) @@ -275,7 +275,7 @@ func (s *IntegSuite) TestRecovery(c *C) { log.Info("Application Created") // wait for it to be running - c.Assert(s.Util.WaitForPhase(config.Name, v1beta2.FlinkApplicationRunning, v1beta2.FlinkApplicationDeployFailed), IsNil) + c.Assert(s.Util.WaitForPhase(config.Name, v1beta1.FlinkApplicationRunning, v1beta1.FlinkApplicationDeployFailed), IsNil) c.Assert(s.Util.WaitForAllTasksRunning(config.Name), IsNil) log.Info("Application running") @@ -313,7 +313,7 @@ func (s *IntegSuite) TestRecovery(c *C) { time.Sleep(1 * time.Second) // try to update the job - app, err = s.Util.Update(config.Name, func(app *v1beta2.FlinkApplication) { + app, err = s.Util.Update(config.Name, func(app *v1beta1.FlinkApplication) { app.Spec.Image = NewImage }) c.Assert(err, IsNil) @@ -331,7 +331,7 @@ func (s *IntegSuite) TestRecovery(c *C) { } c.Assert(err, IsNil) - c.Assert(s.Util.WaitForPhase(config.Name, v1beta2.FlinkApplicationRunning, v1beta2.FlinkApplicationDeployFailed), IsNil) + c.Assert(s.Util.WaitForPhase(config.Name, v1beta1.FlinkApplicationRunning, v1beta1.FlinkApplicationDeployFailed), IsNil) // stop it from failing c.Assert(os.Remove(s.Util.CheckpointDir+"/fail"), IsNil) diff --git a/integ/utils/utils.go b/integ/utils/utils.go index 7cf8bc5c..aaed4c6d 100644 --- a/integ/utils/utils.go +++ b/integ/utils/utils.go @@ -13,9 +13,9 @@ import ( errors2 "k8s.io/apimachinery/pkg/api/errors" "github.com/go-resty/resty" - flinkapp "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta2" + flinkapp "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta1" clientset "github.com/lyft/flinkk8soperator/pkg/client/clientset/versioned" - client "github.com/lyft/flinkk8soperator/pkg/client/clientset/versioned/typed/app/v1beta2" + client "github.com/lyft/flinkk8soperator/pkg/client/clientset/versioned/typed/app/v1beta1" "github.com/prometheus/common/log" appsv1 "k8s.io/api/apps/v1" v1 "k8s.io/api/core/v1" @@ -348,7 +348,7 @@ func (f *TestUtil) ReadFlinkApplication(path string) (*flinkapp.FlinkApplication } func (f *TestUtil) FlinkApps() client.FlinkApplicationInterface { - return f.FlinkApplicationClient.FlinkV1beta2().FlinkApplications(f.Namespace.Name) + return f.FlinkApplicationClient.FlinkV1beta1().FlinkApplications(f.Namespace.Name) } func (f *TestUtil) CreateFlinkApplication(application *flinkapp.FlinkApplication) error { diff --git a/pkg/apis/app/addtoscheme_v1beta2.go b/pkg/apis/app/addtoscheme_v1beta2.go index 790a392a..28922b80 100644 --- a/pkg/apis/app/addtoscheme_v1beta2.go +++ b/pkg/apis/app/addtoscheme_v1beta2.go @@ -5,10 +5,10 @@ package apis import ( - "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta2" + "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta1" ) func init() { // Register the types with the Scheme so the components can map objects to GroupVersionKinds and back - AddToSchemes = append(AddToSchemes, v1beta2.SchemeBuilder.AddToScheme) + AddToSchemes = append(AddToSchemes, v1beta1.SchemeBuilder.AddToScheme) } diff --git a/pkg/apis/app/v1beta1/types.go b/pkg/apis/app/v1beta1/types.go index 35a20dd1..0026fabc 100644 --- a/pkg/apis/app/v1beta1/types.go +++ b/pkg/apis/app/v1beta1/types.go @@ -168,19 +168,36 @@ type FlinkJobStatus struct { } type FlinkApplicationStatus struct { - Phase FlinkApplicationPhase `json:"phase"` - StartedAt *metav1.Time `json:"startedAt,omitempty"` - LastUpdatedAt *metav1.Time `json:"lastUpdatedAt,omitempty"` - Reason string `json:"reason,omitempty"` - ClusterStatus FlinkClusterStatus `json:"clusterStatus,omitempty"` - JobStatus FlinkJobStatus `json:"jobStatus"` - FailedDeployHash string `json:"failedDeployHash,omitempty"` - RollbackHash string `json:"rollbackHash,omitempty"` - DeployHash string `json:"deployHash"` - SavepointTriggerID string `json:"savepointTriggerId,omitempty"` - SavepointPath string `json:"savepointPath,omitempty"` - RetryCount int32 `json:"retryCount,omitempty"` - LastSeenError *FlinkApplicationError `json:"lastSeenError,omitempty"` + Phase FlinkApplicationPhase `json:"phase"` + StartedAt *metav1.Time `json:"startedAt,omitempty"` + LastUpdatedAt *metav1.Time `json:"lastUpdatedAt,omitempty"` + Reason string `json:"reason,omitempty"` + DeployVersion string `json:"deployVersion,omitempty"` + UpdatingVersion string `json:"updatingVersion,omitempty"` + // To ensure backward compatibility, repeat ClusterStatus and JobStatus + ClusterStatus FlinkClusterStatus `json:"clusterStatus,omitempty"` + JobStatus FlinkJobStatus `json:"jobStatus,omitempty"` + VersionStatuses []FlinkApplicationVersionStatus `json:"versionStatuses,omitempty"` + FailedDeployHash string `json:"failedDeployHash,omitempty"` + RollbackHash string `json:"rollbackHash,omitempty"` + DeployHash string `json:"deployHash"` + SavepointTriggerID string `json:"savepointTriggerId,omitempty"` + SavepointPath string `json:"savepointPath,omitempty"` + RetryCount int32 `json:"retryCount,omitempty"` + LastSeenError *FlinkApplicationError `json:"lastSeenError,omitempty"` +} + +type FlinkApplicationVersion string + +const ( + BlueFlinkApplication FlinkApplicationVersion = "Blue" + GreenFlinkApplication FlinkApplicationVersion = "Green" +) + +type FlinkApplicationVersionStatus struct { + Version FlinkApplicationVersion `json:"appVersion,omitempty"` + ClusterStatus FlinkClusterStatus `json:"clusterStatus,omitempty"` + JobStatus FlinkJobStatus `json:"jobStatus,omitempty"` } func (in *FlinkApplicationStatus) GetPhase() FlinkApplicationPhase { @@ -226,6 +243,8 @@ const ( FlinkApplicationRecovering FlinkApplicationPhase = "Recovering" FlinkApplicationRollingBackJob FlinkApplicationPhase = "RollingBackJob" FlinkApplicationDeployFailed FlinkApplicationPhase = "DeployFailed" + FlinkApplicationDualRunning FlinkApplicationPhase = "DualRunning" + FlinkApplicationTeardown FlinkApplicationPhase = "Teardown" ) var FlinkApplicationPhases = []FlinkApplicationPhase{ @@ -240,17 +259,35 @@ var FlinkApplicationPhases = []FlinkApplicationPhase{ FlinkApplicationRecovering, FlinkApplicationDeployFailed, FlinkApplicationRollingBackJob, + FlinkApplicationDualRunning, + FlinkApplicationTeardown, } func IsRunningPhase(phase FlinkApplicationPhase) bool { return phase == FlinkApplicationRunning || phase == FlinkApplicationDeployFailed } +func IsBlueGreenDeploymentMode(mode DeploymentMode) bool { + // Backaward compatibility between v1beta1 and v1beta1 + if mode == DeploymentModeDual { + return false + } + return mode == DeploymentModeBlueGreen +} + +func GetMaxRunningJobs(mode DeploymentMode) int32 { + if IsBlueGreenDeploymentMode(mode) { + return int32(2) + } + return int32(1) +} + type DeploymentMode string const ( - DeploymentModeSingle DeploymentMode = "Single" - DeploymentModeDual DeploymentMode = "Dual" + DeploymentModeSingle DeploymentMode = "Single" + DeploymentModeDual DeploymentMode = "Dual" + DeploymentModeBlueGreen DeploymentMode = "BlueGreen" ) type DeleteMode string diff --git a/pkg/apis/app/v1beta1/zz_generated.deepcopy.go b/pkg/apis/app/v1beta1/zz_generated.deepcopy.go index 1b23bd3a..b0e1c005 100644 --- a/pkg/apis/app/v1beta1/zz_generated.deepcopy.go +++ b/pkg/apis/app/v1beta1/zz_generated.deepcopy.go @@ -207,6 +207,13 @@ func (in *FlinkApplicationStatus) DeepCopyInto(out *FlinkApplicationStatus) { } out.ClusterStatus = in.ClusterStatus in.JobStatus.DeepCopyInto(&out.JobStatus) + if in.VersionStatuses != nil { + in, out := &in.VersionStatuses, &out.VersionStatuses + *out = make([]FlinkApplicationVersionStatus, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } if in.LastSeenError != nil { in, out := &in.LastSeenError, &out.LastSeenError *out = new(FlinkApplicationError) @@ -225,6 +232,24 @@ func (in *FlinkApplicationStatus) DeepCopy() *FlinkApplicationStatus { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *FlinkApplicationVersionStatus) DeepCopyInto(out *FlinkApplicationVersionStatus) { + *out = *in + out.ClusterStatus = in.ClusterStatus + in.JobStatus.DeepCopyInto(&out.JobStatus) + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new FlinkApplicationVersionStatus. +func (in *FlinkApplicationVersionStatus) DeepCopy() *FlinkApplicationVersionStatus { + if in == nil { + return nil + } + out := new(FlinkApplicationVersionStatus) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *FlinkClusterStatus) DeepCopyInto(out *FlinkClusterStatus) { *out = *in diff --git a/pkg/apis/app/v1beta2/doc.go b/pkg/apis/app/v1beta2/doc.go deleted file mode 100644 index eb56232b..00000000 --- a/pkg/apis/app/v1beta2/doc.go +++ /dev/null @@ -1,3 +0,0 @@ -// +k8s:deepcopy-gen=package -// +groupName=flink.k8s.io -package v1beta2 diff --git a/pkg/apis/app/v1beta2/register.go b/pkg/apis/app/v1beta2/register.go deleted file mode 100644 index 1cfce161..00000000 --- a/pkg/apis/app/v1beta2/register.go +++ /dev/null @@ -1,42 +0,0 @@ -package v1beta2 - -import ( - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/runtime" - "k8s.io/apimachinery/pkg/runtime/schema" -) - -const ( - version = "v1beta2" - groupName = "flink.k8s.io" - - FlinkApplicationKind = "FlinkApplication" -) - -var ( - SchemeBuilder = runtime.NewSchemeBuilder(addKnownTypes) - AddToScheme = SchemeBuilder.AddToScheme - // SchemeGroupVersion is the group version used to register these objects. - SchemeGroupVersion = schema.GroupVersion{Group: groupName, Version: version} -) - -// GetKind takes an unqualified kind and returns back a Group qualified GroupKind -func Kind(kind string) schema.GroupKind { - return SchemeGroupVersion.WithKind(kind).GroupKind() -} - -// Resource takes an unqualified resource and returns a Group qualified GroupResource -func Resource(resource string) schema.GroupResource { - return SchemeGroupVersion.WithResource(resource).GroupResource() -} - -// addKnownTypes adds the set of types defined in this package to the supplied scheme. -func addKnownTypes(scheme *runtime.Scheme) error { - scheme.AddKnownTypes(SchemeGroupVersion, - &FlinkApplication{}, - &FlinkApplicationList{}, - ) - - metav1.AddToGroupVersion(scheme, SchemeGroupVersion) - return nil -} diff --git a/pkg/apis/app/v1beta2/types.go b/pkg/apis/app/v1beta2/types.go deleted file mode 100644 index b2846e4c..00000000 --- a/pkg/apis/app/v1beta2/types.go +++ /dev/null @@ -1,353 +0,0 @@ -package v1beta2 - -import ( - "fmt" - - apiv1 "k8s.io/api/core/v1" - - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" -) - -// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object - -type FlinkApplicationList struct { - metav1.TypeMeta `json:",inline"` - metav1.ListMeta `json:"metadata"` - Items []FlinkApplication `json:"items"` -} - -// +genclient -// +genclient:noStatus -// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object -// +k8s:defaulter-gen=true -type FlinkApplication struct { - metav1.TypeMeta `json:",inline"` - metav1.ObjectMeta `json:"metadata"` - Spec FlinkApplicationSpec `json:"spec"` - Status FlinkApplicationStatus `json:"status,omitempty"` -} - -type FlinkApplicationSpec struct { - Image string `json:"image,omitempty" protobuf:"bytes,2,opt,name=image"` - ImagePullPolicy apiv1.PullPolicy `json:"imagePullPolicy,omitempty" protobuf:"bytes,14,opt,name=imagePullPolicy,casttype=PullPolicy"` - ImagePullSecrets []apiv1.LocalObjectReference `json:"imagePullSecrets,omitempty" patchStrategy:"merge" patchMergeKey:"name" protobuf:"bytes,15,rep,name=imagePullSecrets"` - ServiceAccountName string `json:"serviceAccountName,omitempty"` - SecurityContext *apiv1.PodSecurityContext `json:"securityContext,omitempty"` - FlinkConfig FlinkConfig `json:"flinkConfig"` - FlinkVersion string `json:"flinkVersion"` - TaskManagerConfig TaskManagerConfig `json:"taskManagerConfig,omitempty"` - JobManagerConfig JobManagerConfig `json:"jobManagerConfig,omitempty"` - JarName string `json:"jarName"` - Parallelism int32 `json:"parallelism"` - EntryClass string `json:"entryClass,omitempty"` - ProgramArgs string `json:"programArgs,omitempty"` - // Deprecated: use SavepointPath instead - SavepointInfo SavepointInfo `json:"savepointInfo,omitempty"` - SavepointPath string `json:"savepointPath,omitempty"` - SavepointDisabled bool `json:"savepointDisabled"` - DeploymentMode DeploymentMode `json:"deploymentMode,omitempty"` - RPCPort *int32 `json:"rpcPort,omitempty"` - BlobPort *int32 `json:"blobPort,omitempty"` - QueryPort *int32 `json:"queryPort,omitempty"` - UIPort *int32 `json:"uiPort,omitempty"` - MetricsQueryPort *int32 `json:"metricsQueryPort,omitempty"` - Volumes []apiv1.Volume `json:"volumes,omitempty"` - VolumeMounts []apiv1.VolumeMount `json:"volumeMounts,omitempty"` - RestartNonce string `json:"restartNonce"` - DeleteMode DeleteMode `json:"deleteMode,omitempty"` - AllowNonRestoredState bool `json:"allowNonRestoredState,omitempty"` - ForceRollback bool `json:"forceRollback"` - MaxCheckpointRestoreAgeSeconds *int32 `json:"maxCheckpointRestoreAgeSeconds,omitempty"` -} - -type FlinkConfig map[string]interface{} - -// Workaround for https://github.com/kubernetes-sigs/kubebuilder/issues/528 -func (in *FlinkConfig) DeepCopyInto(out *FlinkConfig) { - if in == nil { - *out = nil - } else { - *out = make(map[string]interface{}, len(*in)) - for k, v := range *in { - (*out)[k] = deepCopyJSONValue(v) - } - } -} - -func deepCopyJSONValue(x interface{}) interface{} { - switch x := x.(type) { - case map[string]interface{}: - clone := make(map[string]interface{}, len(x)) - for k, v := range x { - clone[k] = deepCopyJSONValue(v) - } - return clone - case []interface{}: - clone := make([]interface{}, len(x)) - for i, v := range x { - clone[i] = deepCopyJSONValue(v) - } - return clone - case string, int, uint, int32, uint32, int64, uint64, bool, float32, float64, nil: - return x - default: - panic(fmt.Errorf("cannot deep copy %T", x)) - } -} - -func (in *FlinkConfig) DeepCopy() *FlinkConfig { - if in == nil { - return nil - } - out := new(FlinkConfig) - in.DeepCopyInto(out) - return out -} - -type JobManagerConfig struct { - Resources *apiv1.ResourceRequirements `json:"resources,omitempty"` - EnvConfig EnvironmentConfig `json:"envConfig"` - Replicas *int32 `json:"replicas,omitempty"` - OffHeapMemoryFraction *float64 `json:"offHeapMemoryFraction,omitempty"` - NodeSelector map[string]string `json:"nodeSelector,omitempty"` - Tolerations []apiv1.Toleration `json:"tolerations,omitempty"` -} - -type TaskManagerConfig struct { - Resources *apiv1.ResourceRequirements `json:"resources,omitempty"` - EnvConfig EnvironmentConfig `json:"envConfig"` - TaskSlots *int32 `json:"taskSlots,omitempty"` - OffHeapMemoryFraction *float64 `json:"offHeapMemoryFraction,omitempty"` - NodeSelector map[string]string `json:"nodeSelector,omitempty"` - Tolerations []apiv1.Toleration `json:"tolerations,omitempty"` -} - -type EnvironmentConfig struct { - EnvFrom []apiv1.EnvFromSource `json:"envFrom,omitempty"` - Env []apiv1.EnvVar `json:"env,omitempty"` -} - -type SavepointInfo struct { - SavepointLocation string `json:"savepointLocation,omitempty"` -} - -type FlinkClusterStatus struct { - ClusterOverviewURL string `json:"clusterOverviewURL,omitempty"` - Health HealthStatus `json:"health,omitempty"` - NumberOfTaskManagers int32 `json:"numberOfTaskManagers,omitempty"` - HealthyTaskManagers int32 `json:"healthyTaskManagers,omitempty"` - NumberOfTaskSlots int32 `json:"numberOfTaskSlots,omitempty"` - AvailableTaskSlots int32 `json:"availableTaskSlots"` -} - -type FlinkJobStatus struct { - JobOverviewURL string `json:"jobOverviewURL,omitempty"` - JobID string `json:"jobID,omitempty"` - Health HealthStatus `json:"health,omitempty"` - State JobState `json:"state,omitempty"` - - JarName string `json:"jarName"` - Parallelism int32 `json:"parallelism"` - EntryClass string `json:"entryClass,omitempty"` - ProgramArgs string `json:"programArgs,omitempty"` - AllowNonRestoredState bool `json:"allowNonRestoredState,omitempty"` - - StartTime *metav1.Time `json:"startTime,omitempty"` - JobRestartCount int32 `json:"jobRestartCount,omitempty"` - CompletedCheckpointCount int32 `json:"completedCheckpointCount,omitempty"` - FailedCheckpointCount int32 `json:"failedCheckpointCount,omitempty"` - RestorePath string `json:"restorePath,omitempty"` - RestoreTime *metav1.Time `json:"restoreTime,omitempty"` - LastFailingTime *metav1.Time `json:"lastFailingTime,omitempty"` - - LastCheckpointPath string `json:"lastCheckpoint,omitempty"` - LastCheckpointTime *metav1.Time `json:"lastCheckpointTime,omitempty"` - - RunningTasks int32 `json:"runningTasks,omitempty"` - TotalTasks int32 `json:"totalTasks,omitempty"` -} - -type FlinkApplicationStatus struct { - Phase FlinkApplicationPhase `json:"phase"` - StartedAt *metav1.Time `json:"startedAt,omitempty"` - LastUpdatedAt *metav1.Time `json:"lastUpdatedAt,omitempty"` - Reason string `json:"reason,omitempty"` - DeployVersion string `json:"deployVersion,omitempty"` - UpdatingVersion string `json:"updatingVersion,omitempty"` - // To ensure backward compatibility, repeat ClusterStatus and JobStatus - ClusterStatus FlinkClusterStatus `json:"clusterStatus,omitempty"` - JobStatus FlinkJobStatus `json:"jobStatus,omitempty"` - VersionStatuses []FlinkApplicationVersionStatus `json:"versionStatuses,omitempty"` - FailedDeployHash string `json:"failedDeployHash,omitempty"` - RollbackHash string `json:"rollbackHash,omitempty"` - DeployHash string `json:"deployHash"` - SavepointTriggerID string `json:"savepointTriggerId,omitempty"` - SavepointPath string `json:"savepointPath,omitempty"` - RetryCount int32 `json:"retryCount,omitempty"` - LastSeenError *FlinkApplicationError `json:"lastSeenError,omitempty"` -} - -type FlinkApplicationVersion string - -const ( - BlueFlinkApplication FlinkApplicationVersion = "Blue" - GreenFlinkApplication FlinkApplicationVersion = "Green" -) - -type FlinkApplicationVersionStatus struct { - Version FlinkApplicationVersion `json:"appVersion,omitempty"` - ClusterStatus FlinkClusterStatus `json:"clusterStatus,omitempty"` - JobStatus FlinkJobStatus `json:"jobStatus,omitempty"` -} - -func (in *FlinkApplicationStatus) GetPhase() FlinkApplicationPhase { - return in.Phase -} - -func (in *FlinkApplicationStatus) UpdatePhase(phase FlinkApplicationPhase, reason string) { - now := metav1.Now() - if in.StartedAt == nil { - in.StartedAt = &now - in.LastUpdatedAt = &now - } - in.Reason = reason - in.Phase = phase -} - -func (in *FlinkApplicationStatus) TouchResource(reason string) { - now := metav1.Now() - in.LastUpdatedAt = &now - in.Reason = reason -} - -type FlinkApplicationPhase string - -func (p FlinkApplicationPhase) VerboseString() string { - phaseName := string(p) - if p == FlinkApplicationNew { - phaseName = "New" - } - return phaseName -} - -// As you add more ApplicationPhase please add it to FlinkApplicationPhases list -const ( - FlinkApplicationNew FlinkApplicationPhase = "" - FlinkApplicationUpdating FlinkApplicationPhase = "Updating" - FlinkApplicationClusterStarting FlinkApplicationPhase = "ClusterStarting" - FlinkApplicationSubmittingJob FlinkApplicationPhase = "SubmittingJob" - FlinkApplicationRunning FlinkApplicationPhase = "Running" - FlinkApplicationSavepointing FlinkApplicationPhase = "Savepointing" - FlinkApplicationCancelling FlinkApplicationPhase = "Cancelling" - FlinkApplicationDeleting FlinkApplicationPhase = "Deleting" - FlinkApplicationRecovering FlinkApplicationPhase = "Recovering" - FlinkApplicationRollingBackJob FlinkApplicationPhase = "RollingBackJob" - FlinkApplicationDeployFailed FlinkApplicationPhase = "DeployFailed" - FlinkApplicationDualRunning FlinkApplicationPhase = "DualRunning" - FlinkApplicationTeardown FlinkApplicationPhase = "Teardown" -) - -var FlinkApplicationPhases = []FlinkApplicationPhase{ - FlinkApplicationNew, - FlinkApplicationUpdating, - FlinkApplicationClusterStarting, - FlinkApplicationSubmittingJob, - FlinkApplicationRunning, - FlinkApplicationSavepointing, - FlinkApplicationCancelling, - FlinkApplicationDeleting, - FlinkApplicationRecovering, - FlinkApplicationDeployFailed, - FlinkApplicationRollingBackJob, - FlinkApplicationDualRunning, - FlinkApplicationTeardown, -} - -func IsRunningPhase(phase FlinkApplicationPhase) bool { - return phase == FlinkApplicationRunning || phase == FlinkApplicationDeployFailed -} - -func IsBlueGreenDeploymentMode(mode DeploymentMode) bool { - // Backaward compatibility between v1beta1 and v1beta2 - if mode == DeploymentModeDual { - return false - } - return mode == DeploymentModeBlueGreen -} - -func GetMaxRunningJobs(mode DeploymentMode) int32 { - if IsBlueGreenDeploymentMode(mode) { - return int32(2) - } - return int32(1) -} - -type DeploymentMode string - -const ( - DeploymentModeSingle DeploymentMode = "Single" - DeploymentModeDual DeploymentMode = "Dual" - DeploymentModeBlueGreen DeploymentMode = "BlueGreen" -) - -type DeleteMode string - -const ( - DeleteModeSavepoint DeleteMode = "Savepoint" - DeleteModeForceCancel DeleteMode = "ForceCancel" - DeleteModeNone DeleteMode = "None" -) - -type HealthStatus string - -const ( - Green HealthStatus = "Green" - Yellow HealthStatus = "Yellow" - Red HealthStatus = "Red" -) - -type JobState string - -const ( - Created JobState = "CREATED" - Running JobState = "RUNNING" - Failing JobState = "FAILING" - Failed JobState = "FAILED" - Cancelling JobState = "CANCELLING" - Canceled JobState = "CANCELED" - Finished JobState = "FINISHED" - Restarting JobState = "RESTARTING" - Suspended JobState = "SUSPENDED" - Reconciling JobState = "RECONCILING" -) - -// FlinkApplicationError implements the error interface to make error handling more structured -type FlinkApplicationError struct { - AppError string `json:"appError,omitempty"` - Method FlinkMethod `json:"method,omitempty"` - ErrorCode string `json:"errorCode,omitempty"` - IsRetryable bool `json:"isRetryable,omitempty"` - IsFailFast bool `json:"isFailFast,omitempty"` - MaxRetries int32 `json:"maxRetries,omitempty"` - LastErrorUpdateTime *metav1.Time `json:"lastErrorUpdateTime,omitempty"` -} - -func (f *FlinkApplicationError) Error() string { - return f.AppError -} - -type FlinkMethod string - -const ( - CancelJobWithSavepoint FlinkMethod = "CancelJobWithSavepoint" - ForceCancelJob FlinkMethod = "ForceCancelJob" - SubmitJob FlinkMethod = "SubmitJob" - CheckSavepointStatus FlinkMethod = "CheckSavepointStatus" - GetJobs FlinkMethod = "GetJobs" - GetClusterOverview FlinkMethod = "GetClusterOverview" - GetLatestCheckpoint FlinkMethod = "GetLatestCheckpoint" - GetJobConfig FlinkMethod = "GetJobConfig" - GetTaskManagers FlinkMethod = "GetTaskManagers" - GetCheckpointCounts FlinkMethod = "GetCheckpointCounts" - GetJobOverview FlinkMethod = "GetJobOverview" -) diff --git a/pkg/apis/app/v1beta2/zz_generated.deepcopy.go b/pkg/apis/app/v1beta2/zz_generated.deepcopy.go deleted file mode 100644 index c0c3c04c..00000000 --- a/pkg/apis/app/v1beta2/zz_generated.deepcopy.go +++ /dev/null @@ -1,407 +0,0 @@ -// +build !ignore_autogenerated - -// Code generated by deepcopy-gen. DO NOT EDIT. - -package v1beta2 - -import ( - v1 "k8s.io/api/core/v1" - runtime "k8s.io/apimachinery/pkg/runtime" -) - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *EnvironmentConfig) DeepCopyInto(out *EnvironmentConfig) { - *out = *in - if in.EnvFrom != nil { - in, out := &in.EnvFrom, &out.EnvFrom - *out = make([]v1.EnvFromSource, len(*in)) - for i := range *in { - (*in)[i].DeepCopyInto(&(*out)[i]) - } - } - if in.Env != nil { - in, out := &in.Env, &out.Env - *out = make([]v1.EnvVar, len(*in)) - for i := range *in { - (*in)[i].DeepCopyInto(&(*out)[i]) - } - } - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EnvironmentConfig. -func (in *EnvironmentConfig) DeepCopy() *EnvironmentConfig { - if in == nil { - return nil - } - out := new(EnvironmentConfig) - in.DeepCopyInto(out) - return out -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *FlinkApplication) DeepCopyInto(out *FlinkApplication) { - *out = *in - out.TypeMeta = in.TypeMeta - in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) - in.Spec.DeepCopyInto(&out.Spec) - in.Status.DeepCopyInto(&out.Status) - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new FlinkApplication. -func (in *FlinkApplication) DeepCopy() *FlinkApplication { - if in == nil { - return nil - } - out := new(FlinkApplication) - in.DeepCopyInto(out) - return out -} - -// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. -func (in *FlinkApplication) DeepCopyObject() runtime.Object { - if c := in.DeepCopy(); c != nil { - return c - } - return nil -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *FlinkApplicationError) DeepCopyInto(out *FlinkApplicationError) { - *out = *in - if in.LastErrorUpdateTime != nil { - in, out := &in.LastErrorUpdateTime, &out.LastErrorUpdateTime - *out = (*in).DeepCopy() - } - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new FlinkApplicationError. -func (in *FlinkApplicationError) DeepCopy() *FlinkApplicationError { - if in == nil { - return nil - } - out := new(FlinkApplicationError) - in.DeepCopyInto(out) - return out -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *FlinkApplicationList) DeepCopyInto(out *FlinkApplicationList) { - *out = *in - out.TypeMeta = in.TypeMeta - out.ListMeta = in.ListMeta - if in.Items != nil { - in, out := &in.Items, &out.Items - *out = make([]FlinkApplication, len(*in)) - for i := range *in { - (*in)[i].DeepCopyInto(&(*out)[i]) - } - } - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new FlinkApplicationList. -func (in *FlinkApplicationList) DeepCopy() *FlinkApplicationList { - if in == nil { - return nil - } - out := new(FlinkApplicationList) - in.DeepCopyInto(out) - return out -} - -// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. -func (in *FlinkApplicationList) DeepCopyObject() runtime.Object { - if c := in.DeepCopy(); c != nil { - return c - } - return nil -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *FlinkApplicationSpec) DeepCopyInto(out *FlinkApplicationSpec) { - *out = *in - if in.ImagePullSecrets != nil { - in, out := &in.ImagePullSecrets, &out.ImagePullSecrets - *out = make([]v1.LocalObjectReference, len(*in)) - copy(*out, *in) - } - if in.SecurityContext != nil { - in, out := &in.SecurityContext, &out.SecurityContext - *out = new(v1.PodSecurityContext) - (*in).DeepCopyInto(*out) - } - in.FlinkConfig.DeepCopyInto(&out.FlinkConfig) - in.TaskManagerConfig.DeepCopyInto(&out.TaskManagerConfig) - in.JobManagerConfig.DeepCopyInto(&out.JobManagerConfig) - out.SavepointInfo = in.SavepointInfo - if in.RPCPort != nil { - in, out := &in.RPCPort, &out.RPCPort - *out = new(int32) - **out = **in - } - if in.BlobPort != nil { - in, out := &in.BlobPort, &out.BlobPort - *out = new(int32) - **out = **in - } - if in.QueryPort != nil { - in, out := &in.QueryPort, &out.QueryPort - *out = new(int32) - **out = **in - } - if in.UIPort != nil { - in, out := &in.UIPort, &out.UIPort - *out = new(int32) - **out = **in - } - if in.MetricsQueryPort != nil { - in, out := &in.MetricsQueryPort, &out.MetricsQueryPort - *out = new(int32) - **out = **in - } - if in.Volumes != nil { - in, out := &in.Volumes, &out.Volumes - *out = make([]v1.Volume, len(*in)) - for i := range *in { - (*in)[i].DeepCopyInto(&(*out)[i]) - } - } - if in.VolumeMounts != nil { - in, out := &in.VolumeMounts, &out.VolumeMounts - *out = make([]v1.VolumeMount, len(*in)) - for i := range *in { - (*in)[i].DeepCopyInto(&(*out)[i]) - } - } - if in.MaxCheckpointRestoreAgeSeconds != nil { - in, out := &in.MaxCheckpointRestoreAgeSeconds, &out.MaxCheckpointRestoreAgeSeconds - *out = new(int32) - **out = **in - } - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new FlinkApplicationSpec. -func (in *FlinkApplicationSpec) DeepCopy() *FlinkApplicationSpec { - if in == nil { - return nil - } - out := new(FlinkApplicationSpec) - in.DeepCopyInto(out) - return out -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *FlinkApplicationStatus) DeepCopyInto(out *FlinkApplicationStatus) { - *out = *in - if in.StartedAt != nil { - in, out := &in.StartedAt, &out.StartedAt - *out = (*in).DeepCopy() - } - if in.LastUpdatedAt != nil { - in, out := &in.LastUpdatedAt, &out.LastUpdatedAt - *out = (*in).DeepCopy() - } - out.ClusterStatus = in.ClusterStatus - in.JobStatus.DeepCopyInto(&out.JobStatus) - if in.VersionStatuses != nil { - in, out := &in.VersionStatuses, &out.VersionStatuses - *out = make([]FlinkApplicationVersionStatus, len(*in)) - for i := range *in { - (*in)[i].DeepCopyInto(&(*out)[i]) - } - } - if in.LastSeenError != nil { - in, out := &in.LastSeenError, &out.LastSeenError - *out = new(FlinkApplicationError) - (*in).DeepCopyInto(*out) - } - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new FlinkApplicationStatus. -func (in *FlinkApplicationStatus) DeepCopy() *FlinkApplicationStatus { - if in == nil { - return nil - } - out := new(FlinkApplicationStatus) - in.DeepCopyInto(out) - return out -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *FlinkApplicationVersionStatus) DeepCopyInto(out *FlinkApplicationVersionStatus) { - *out = *in - out.ClusterStatus = in.ClusterStatus - in.JobStatus.DeepCopyInto(&out.JobStatus) - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new FlinkApplicationVersionStatus. -func (in *FlinkApplicationVersionStatus) DeepCopy() *FlinkApplicationVersionStatus { - if in == nil { - return nil - } - out := new(FlinkApplicationVersionStatus) - in.DeepCopyInto(out) - return out -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *FlinkClusterStatus) DeepCopyInto(out *FlinkClusterStatus) { - *out = *in - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new FlinkClusterStatus. -func (in *FlinkClusterStatus) DeepCopy() *FlinkClusterStatus { - if in == nil { - return nil - } - out := new(FlinkClusterStatus) - in.DeepCopyInto(out) - return out -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *FlinkJobStatus) DeepCopyInto(out *FlinkJobStatus) { - *out = *in - if in.StartTime != nil { - in, out := &in.StartTime, &out.StartTime - *out = (*in).DeepCopy() - } - if in.RestoreTime != nil { - in, out := &in.RestoreTime, &out.RestoreTime - *out = (*in).DeepCopy() - } - if in.LastFailingTime != nil { - in, out := &in.LastFailingTime, &out.LastFailingTime - *out = (*in).DeepCopy() - } - if in.LastCheckpointTime != nil { - in, out := &in.LastCheckpointTime, &out.LastCheckpointTime - *out = (*in).DeepCopy() - } - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new FlinkJobStatus. -func (in *FlinkJobStatus) DeepCopy() *FlinkJobStatus { - if in == nil { - return nil - } - out := new(FlinkJobStatus) - in.DeepCopyInto(out) - return out -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *JobManagerConfig) DeepCopyInto(out *JobManagerConfig) { - *out = *in - if in.Resources != nil { - in, out := &in.Resources, &out.Resources - *out = new(v1.ResourceRequirements) - (*in).DeepCopyInto(*out) - } - in.EnvConfig.DeepCopyInto(&out.EnvConfig) - if in.Replicas != nil { - in, out := &in.Replicas, &out.Replicas - *out = new(int32) - **out = **in - } - if in.OffHeapMemoryFraction != nil { - in, out := &in.OffHeapMemoryFraction, &out.OffHeapMemoryFraction - *out = new(float64) - **out = **in - } - if in.NodeSelector != nil { - in, out := &in.NodeSelector, &out.NodeSelector - *out = make(map[string]string, len(*in)) - for key, val := range *in { - (*out)[key] = val - } - } - if in.Tolerations != nil { - in, out := &in.Tolerations, &out.Tolerations - *out = make([]v1.Toleration, len(*in)) - for i := range *in { - (*in)[i].DeepCopyInto(&(*out)[i]) - } - } - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new JobManagerConfig. -func (in *JobManagerConfig) DeepCopy() *JobManagerConfig { - if in == nil { - return nil - } - out := new(JobManagerConfig) - in.DeepCopyInto(out) - return out -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *SavepointInfo) DeepCopyInto(out *SavepointInfo) { - *out = *in - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SavepointInfo. -func (in *SavepointInfo) DeepCopy() *SavepointInfo { - if in == nil { - return nil - } - out := new(SavepointInfo) - in.DeepCopyInto(out) - return out -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *TaskManagerConfig) DeepCopyInto(out *TaskManagerConfig) { - *out = *in - if in.Resources != nil { - in, out := &in.Resources, &out.Resources - *out = new(v1.ResourceRequirements) - (*in).DeepCopyInto(*out) - } - in.EnvConfig.DeepCopyInto(&out.EnvConfig) - if in.TaskSlots != nil { - in, out := &in.TaskSlots, &out.TaskSlots - *out = new(int32) - **out = **in - } - if in.OffHeapMemoryFraction != nil { - in, out := &in.OffHeapMemoryFraction, &out.OffHeapMemoryFraction - *out = new(float64) - **out = **in - } - if in.NodeSelector != nil { - in, out := &in.NodeSelector, &out.NodeSelector - *out = make(map[string]string, len(*in)) - for key, val := range *in { - (*out)[key] = val - } - } - if in.Tolerations != nil { - in, out := &in.Tolerations, &out.Tolerations - *out = make([]v1.Toleration, len(*in)) - for i := range *in { - (*in)[i].DeepCopyInto(&(*out)[i]) - } - } - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TaskManagerConfig. -func (in *TaskManagerConfig) DeepCopy() *TaskManagerConfig { - if in == nil { - return nil - } - out := new(TaskManagerConfig) - in.DeepCopyInto(out) - return out -} diff --git a/pkg/client/clientset/versioned/clientset.go b/pkg/client/clientset/versioned/clientset.go index 8f818b29..9aa48cc5 100644 --- a/pkg/client/clientset/versioned/clientset.go +++ b/pkg/client/clientset/versioned/clientset.go @@ -4,7 +4,6 @@ package versioned import ( flinkv1beta1 "github.com/lyft/flinkk8soperator/pkg/client/clientset/versioned/typed/app/v1beta1" - flinkv1beta2 "github.com/lyft/flinkk8soperator/pkg/client/clientset/versioned/typed/app/v1beta2" discovery "k8s.io/client-go/discovery" rest "k8s.io/client-go/rest" flowcontrol "k8s.io/client-go/util/flowcontrol" @@ -13,7 +12,6 @@ import ( type Interface interface { Discovery() discovery.DiscoveryInterface FlinkV1beta1() flinkv1beta1.FlinkV1beta1Interface - FlinkV1beta2() flinkv1beta2.FlinkV1beta2Interface } // Clientset contains the clients for groups. Each group has exactly one @@ -21,7 +19,6 @@ type Interface interface { type Clientset struct { *discovery.DiscoveryClient flinkV1beta1 *flinkv1beta1.FlinkV1beta1Client - flinkV1beta2 *flinkv1beta2.FlinkV1beta2Client } // FlinkV1beta1 retrieves the FlinkV1beta1Client @@ -29,11 +26,6 @@ func (c *Clientset) FlinkV1beta1() flinkv1beta1.FlinkV1beta1Interface { return c.flinkV1beta1 } -// FlinkV1beta2 retrieves the FlinkV1beta2Client -func (c *Clientset) FlinkV1beta2() flinkv1beta2.FlinkV1beta2Interface { - return c.flinkV1beta2 -} - // Discovery retrieves the DiscoveryClient func (c *Clientset) Discovery() discovery.DiscoveryInterface { if c == nil { @@ -54,10 +46,6 @@ func NewForConfig(c *rest.Config) (*Clientset, error) { if err != nil { return nil, err } - cs.flinkV1beta2, err = flinkv1beta2.NewForConfig(&configShallowCopy) - if err != nil { - return nil, err - } cs.DiscoveryClient, err = discovery.NewDiscoveryClientForConfig(&configShallowCopy) if err != nil { @@ -71,7 +59,6 @@ func NewForConfig(c *rest.Config) (*Clientset, error) { func NewForConfigOrDie(c *rest.Config) *Clientset { var cs Clientset cs.flinkV1beta1 = flinkv1beta1.NewForConfigOrDie(c) - cs.flinkV1beta2 = flinkv1beta2.NewForConfigOrDie(c) cs.DiscoveryClient = discovery.NewDiscoveryClientForConfigOrDie(c) return &cs @@ -81,7 +68,6 @@ func NewForConfigOrDie(c *rest.Config) *Clientset { func New(c rest.Interface) *Clientset { var cs Clientset cs.flinkV1beta1 = flinkv1beta1.New(c) - cs.flinkV1beta2 = flinkv1beta2.New(c) cs.DiscoveryClient = discovery.NewDiscoveryClient(c) return &cs diff --git a/pkg/client/clientset/versioned/fake/clientset_generated.go b/pkg/client/clientset/versioned/fake/clientset_generated.go index 68b1013a..664662d9 100644 --- a/pkg/client/clientset/versioned/fake/clientset_generated.go +++ b/pkg/client/clientset/versioned/fake/clientset_generated.go @@ -6,8 +6,6 @@ import ( clientset "github.com/lyft/flinkk8soperator/pkg/client/clientset/versioned" flinkv1beta1 "github.com/lyft/flinkk8soperator/pkg/client/clientset/versioned/typed/app/v1beta1" fakeflinkv1beta1 "github.com/lyft/flinkk8soperator/pkg/client/clientset/versioned/typed/app/v1beta1/fake" - flinkv1beta2 "github.com/lyft/flinkk8soperator/pkg/client/clientset/versioned/typed/app/v1beta2" - fakeflinkv1beta2 "github.com/lyft/flinkk8soperator/pkg/client/clientset/versioned/typed/app/v1beta2/fake" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/watch" "k8s.io/client-go/discovery" @@ -61,8 +59,3 @@ var _ clientset.Interface = &Clientset{} func (c *Clientset) FlinkV1beta1() flinkv1beta1.FlinkV1beta1Interface { return &fakeflinkv1beta1.FakeFlinkV1beta1{Fake: &c.Fake} } - -// FlinkV1beta2 retrieves the FlinkV1beta2Client -func (c *Clientset) FlinkV1beta2() flinkv1beta2.FlinkV1beta2Interface { - return &fakeflinkv1beta2.FakeFlinkV1beta2{Fake: &c.Fake} -} diff --git a/pkg/client/clientset/versioned/fake/register.go b/pkg/client/clientset/versioned/fake/register.go index 86b7a18d..61bc01de 100644 --- a/pkg/client/clientset/versioned/fake/register.go +++ b/pkg/client/clientset/versioned/fake/register.go @@ -4,7 +4,6 @@ package fake import ( flinkv1beta1 "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta1" - flinkv1beta2 "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta2" v1 "k8s.io/apimachinery/pkg/apis/meta/v1" runtime "k8s.io/apimachinery/pkg/runtime" schema "k8s.io/apimachinery/pkg/runtime/schema" @@ -17,7 +16,6 @@ var codecs = serializer.NewCodecFactory(scheme) var parameterCodec = runtime.NewParameterCodec(scheme) var localSchemeBuilder = runtime.SchemeBuilder{ flinkv1beta1.AddToScheme, - flinkv1beta2.AddToScheme, } // AddToScheme adds all types of this clientset into the given scheme. This allows composition diff --git a/pkg/client/clientset/versioned/scheme/register.go b/pkg/client/clientset/versioned/scheme/register.go index ec838418..8b472789 100644 --- a/pkg/client/clientset/versioned/scheme/register.go +++ b/pkg/client/clientset/versioned/scheme/register.go @@ -4,7 +4,6 @@ package scheme import ( flinkv1beta1 "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta1" - flinkv1beta2 "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta2" v1 "k8s.io/apimachinery/pkg/apis/meta/v1" runtime "k8s.io/apimachinery/pkg/runtime" schema "k8s.io/apimachinery/pkg/runtime/schema" @@ -17,7 +16,6 @@ var Codecs = serializer.NewCodecFactory(Scheme) var ParameterCodec = runtime.NewParameterCodec(Scheme) var localSchemeBuilder = runtime.SchemeBuilder{ flinkv1beta1.AddToScheme, - flinkv1beta2.AddToScheme, } // AddToScheme adds all types of this clientset into the given scheme. This allows composition diff --git a/pkg/client/clientset/versioned/typed/app/v1beta2/app_client.go b/pkg/client/clientset/versioned/typed/app/v1beta2/app_client.go index a9c7d360..5fdf3da5 100644 --- a/pkg/client/clientset/versioned/typed/app/v1beta2/app_client.go +++ b/pkg/client/clientset/versioned/typed/app/v1beta2/app_client.go @@ -1,9 +1,9 @@ // Code generated by client-gen. DO NOT EDIT. -package v1beta2 +package v1beta1 import ( - v1beta2 "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta2" + v1beta1 "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta1" "github.com/lyft/flinkk8soperator/pkg/client/clientset/versioned/scheme" serializer "k8s.io/apimachinery/pkg/runtime/serializer" rest "k8s.io/client-go/rest" @@ -52,7 +52,7 @@ func New(c rest.Interface) *FlinkV1beta2Client { } func setConfigDefaults(config *rest.Config) error { - gv := v1beta2.SchemeGroupVersion + gv := v1beta1.SchemeGroupVersion config.GroupVersion = &gv config.APIPath = "/apis" config.NegotiatedSerializer = serializer.DirectCodecFactory{CodecFactory: scheme.Codecs} diff --git a/pkg/client/clientset/versioned/typed/app/v1beta2/doc.go b/pkg/client/clientset/versioned/typed/app/v1beta2/doc.go index ebe38377..897c0995 100644 --- a/pkg/client/clientset/versioned/typed/app/v1beta2/doc.go +++ b/pkg/client/clientset/versioned/typed/app/v1beta2/doc.go @@ -1,4 +1,4 @@ // Code generated by client-gen. DO NOT EDIT. // This package has the automatically generated typed clients. -package v1beta2 +package v1beta1 diff --git a/pkg/client/clientset/versioned/typed/app/v1beta2/fake/fake_app_client.go b/pkg/client/clientset/versioned/typed/app/v1beta2/fake/fake_app_client.go index 4011f240..d32b1655 100644 --- a/pkg/client/clientset/versioned/typed/app/v1beta2/fake/fake_app_client.go +++ b/pkg/client/clientset/versioned/typed/app/v1beta2/fake/fake_app_client.go @@ -3,7 +3,7 @@ package fake import ( - v1beta2 "github.com/lyft/flinkk8soperator/pkg/client/clientset/versioned/typed/app/v1beta2" + v1beta1 "github.com/lyft/flinkk8soperator/pkg/client/clientset/versioned/typed/app/v1beta1" rest "k8s.io/client-go/rest" testing "k8s.io/client-go/testing" ) @@ -12,7 +12,7 @@ type FakeFlinkV1beta2 struct { *testing.Fake } -func (c *FakeFlinkV1beta2) FlinkApplications(namespace string) v1beta2.FlinkApplicationInterface { +func (c *FakeFlinkV1beta2) FlinkApplications(namespace string) v1beta1.FlinkApplicationInterface { return &FakeFlinkApplications{c, namespace} } diff --git a/pkg/client/clientset/versioned/typed/app/v1beta2/fake/fake_flinkapplication.go b/pkg/client/clientset/versioned/typed/app/v1beta2/fake/fake_flinkapplication.go index c9c40a10..1ee1d653 100644 --- a/pkg/client/clientset/versioned/typed/app/v1beta2/fake/fake_flinkapplication.go +++ b/pkg/client/clientset/versioned/typed/app/v1beta2/fake/fake_flinkapplication.go @@ -3,7 +3,7 @@ package fake import ( - v1beta2 "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta2" + v1beta1 "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta1" v1 "k8s.io/apimachinery/pkg/apis/meta/v1" labels "k8s.io/apimachinery/pkg/labels" schema "k8s.io/apimachinery/pkg/runtime/schema" @@ -18,25 +18,25 @@ type FakeFlinkApplications struct { ns string } -var flinkapplicationsResource = schema.GroupVersionResource{Group: "flink.k8s.io", Version: "v1beta2", Resource: "flinkapplications"} +var flinkapplicationsResource = schema.GroupVersionResource{Group: "flink.k8s.io", Version: "v1beta1", Resource: "flinkapplications"} -var flinkapplicationsKind = schema.GroupVersionKind{Group: "flink.k8s.io", Version: "v1beta2", Kind: "FlinkApplication"} +var flinkapplicationsKind = schema.GroupVersionKind{Group: "flink.k8s.io", Version: "v1beta1", Kind: "FlinkApplication"} // Get takes name of the flinkApplication, and returns the corresponding flinkApplication object, and an error if there is any. -func (c *FakeFlinkApplications) Get(name string, options v1.GetOptions) (result *v1beta2.FlinkApplication, err error) { +func (c *FakeFlinkApplications) Get(name string, options v1.GetOptions) (result *v1beta1.FlinkApplication, err error) { obj, err := c.Fake. - Invokes(testing.NewGetAction(flinkapplicationsResource, c.ns, name), &v1beta2.FlinkApplication{}) + Invokes(testing.NewGetAction(flinkapplicationsResource, c.ns, name), &v1beta1.FlinkApplication{}) if obj == nil { return nil, err } - return obj.(*v1beta2.FlinkApplication), err + return obj.(*v1beta1.FlinkApplication), err } // List takes label and field selectors, and returns the list of FlinkApplications that match those selectors. -func (c *FakeFlinkApplications) List(opts v1.ListOptions) (result *v1beta2.FlinkApplicationList, err error) { +func (c *FakeFlinkApplications) List(opts v1.ListOptions) (result *v1beta1.FlinkApplicationList, err error) { obj, err := c.Fake. - Invokes(testing.NewListAction(flinkapplicationsResource, flinkapplicationsKind, c.ns, opts), &v1beta2.FlinkApplicationList{}) + Invokes(testing.NewListAction(flinkapplicationsResource, flinkapplicationsKind, c.ns, opts), &v1beta1.FlinkApplicationList{}) if obj == nil { return nil, err @@ -46,8 +46,8 @@ func (c *FakeFlinkApplications) List(opts v1.ListOptions) (result *v1beta2.Flink if label == nil { label = labels.Everything() } - list := &v1beta2.FlinkApplicationList{ListMeta: obj.(*v1beta2.FlinkApplicationList).ListMeta} - for _, item := range obj.(*v1beta2.FlinkApplicationList).Items { + list := &v1beta1.FlinkApplicationList{ListMeta: obj.(*v1beta1.FlinkApplicationList).ListMeta} + for _, item := range obj.(*v1beta1.FlinkApplicationList).Items { if label.Matches(labels.Set(item.Labels)) { list.Items = append(list.Items, item) } @@ -63,31 +63,31 @@ func (c *FakeFlinkApplications) Watch(opts v1.ListOptions) (watch.Interface, err } // Create takes the representation of a flinkApplication and creates it. Returns the server's representation of the flinkApplication, and an error, if there is any. -func (c *FakeFlinkApplications) Create(flinkApplication *v1beta2.FlinkApplication) (result *v1beta2.FlinkApplication, err error) { +func (c *FakeFlinkApplications) Create(flinkApplication *v1beta1.FlinkApplication) (result *v1beta1.FlinkApplication, err error) { obj, err := c.Fake. - Invokes(testing.NewCreateAction(flinkapplicationsResource, c.ns, flinkApplication), &v1beta2.FlinkApplication{}) + Invokes(testing.NewCreateAction(flinkapplicationsResource, c.ns, flinkApplication), &v1beta1.FlinkApplication{}) if obj == nil { return nil, err } - return obj.(*v1beta2.FlinkApplication), err + return obj.(*v1beta1.FlinkApplication), err } // Update takes the representation of a flinkApplication and updates it. Returns the server's representation of the flinkApplication, and an error, if there is any. -func (c *FakeFlinkApplications) Update(flinkApplication *v1beta2.FlinkApplication) (result *v1beta2.FlinkApplication, err error) { +func (c *FakeFlinkApplications) Update(flinkApplication *v1beta1.FlinkApplication) (result *v1beta1.FlinkApplication, err error) { obj, err := c.Fake. - Invokes(testing.NewUpdateAction(flinkapplicationsResource, c.ns, flinkApplication), &v1beta2.FlinkApplication{}) + Invokes(testing.NewUpdateAction(flinkapplicationsResource, c.ns, flinkApplication), &v1beta1.FlinkApplication{}) if obj == nil { return nil, err } - return obj.(*v1beta2.FlinkApplication), err + return obj.(*v1beta1.FlinkApplication), err } // Delete takes name of the flinkApplication and deletes it. Returns an error if one occurs. func (c *FakeFlinkApplications) Delete(name string, options *v1.DeleteOptions) error { _, err := c.Fake. - Invokes(testing.NewDeleteAction(flinkapplicationsResource, c.ns, name), &v1beta2.FlinkApplication{}) + Invokes(testing.NewDeleteAction(flinkapplicationsResource, c.ns, name), &v1beta1.FlinkApplication{}) return err } @@ -96,17 +96,17 @@ func (c *FakeFlinkApplications) Delete(name string, options *v1.DeleteOptions) e func (c *FakeFlinkApplications) DeleteCollection(options *v1.DeleteOptions, listOptions v1.ListOptions) error { action := testing.NewDeleteCollectionAction(flinkapplicationsResource, c.ns, listOptions) - _, err := c.Fake.Invokes(action, &v1beta2.FlinkApplicationList{}) + _, err := c.Fake.Invokes(action, &v1beta1.FlinkApplicationList{}) return err } // Patch applies the patch and returns the patched flinkApplication. -func (c *FakeFlinkApplications) Patch(name string, pt types.PatchType, data []byte, subresources ...string) (result *v1beta2.FlinkApplication, err error) { +func (c *FakeFlinkApplications) Patch(name string, pt types.PatchType, data []byte, subresources ...string) (result *v1beta1.FlinkApplication, err error) { obj, err := c.Fake. - Invokes(testing.NewPatchSubresourceAction(flinkapplicationsResource, c.ns, name, pt, data, subresources...), &v1beta2.FlinkApplication{}) + Invokes(testing.NewPatchSubresourceAction(flinkapplicationsResource, c.ns, name, pt, data, subresources...), &v1beta1.FlinkApplication{}) if obj == nil { return nil, err } - return obj.(*v1beta2.FlinkApplication), err + return obj.(*v1beta1.FlinkApplication), err } diff --git a/pkg/client/clientset/versioned/typed/app/v1beta2/flinkapplication.go b/pkg/client/clientset/versioned/typed/app/v1beta2/flinkapplication.go index 02201294..d71015d2 100644 --- a/pkg/client/clientset/versioned/typed/app/v1beta2/flinkapplication.go +++ b/pkg/client/clientset/versioned/typed/app/v1beta2/flinkapplication.go @@ -1,11 +1,11 @@ // Code generated by client-gen. DO NOT EDIT. -package v1beta2 +package v1beta1 import ( "time" - v1beta2 "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta2" + v1beta1 "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta1" scheme "github.com/lyft/flinkk8soperator/pkg/client/clientset/versioned/scheme" v1 "k8s.io/apimachinery/pkg/apis/meta/v1" types "k8s.io/apimachinery/pkg/types" @@ -21,14 +21,14 @@ type FlinkApplicationsGetter interface { // FlinkApplicationInterface has methods to work with FlinkApplication resources. type FlinkApplicationInterface interface { - Create(*v1beta2.FlinkApplication) (*v1beta2.FlinkApplication, error) - Update(*v1beta2.FlinkApplication) (*v1beta2.FlinkApplication, error) + Create(*v1beta1.FlinkApplication) (*v1beta1.FlinkApplication, error) + Update(*v1beta1.FlinkApplication) (*v1beta1.FlinkApplication, error) Delete(name string, options *v1.DeleteOptions) error DeleteCollection(options *v1.DeleteOptions, listOptions v1.ListOptions) error - Get(name string, options v1.GetOptions) (*v1beta2.FlinkApplication, error) - List(opts v1.ListOptions) (*v1beta2.FlinkApplicationList, error) + Get(name string, options v1.GetOptions) (*v1beta1.FlinkApplication, error) + List(opts v1.ListOptions) (*v1beta1.FlinkApplicationList, error) Watch(opts v1.ListOptions) (watch.Interface, error) - Patch(name string, pt types.PatchType, data []byte, subresources ...string) (result *v1beta2.FlinkApplication, err error) + Patch(name string, pt types.PatchType, data []byte, subresources ...string) (result *v1beta1.FlinkApplication, err error) FlinkApplicationExpansion } @@ -47,8 +47,8 @@ func newFlinkApplications(c *FlinkV1beta2Client, namespace string) *flinkApplica } // Get takes name of the flinkApplication, and returns the corresponding flinkApplication object, and an error if there is any. -func (c *flinkApplications) Get(name string, options v1.GetOptions) (result *v1beta2.FlinkApplication, err error) { - result = &v1beta2.FlinkApplication{} +func (c *flinkApplications) Get(name string, options v1.GetOptions) (result *v1beta1.FlinkApplication, err error) { + result = &v1beta1.FlinkApplication{} err = c.client.Get(). Namespace(c.ns). Resource("flinkapplications"). @@ -60,12 +60,12 @@ func (c *flinkApplications) Get(name string, options v1.GetOptions) (result *v1b } // List takes label and field selectors, and returns the list of FlinkApplications that match those selectors. -func (c *flinkApplications) List(opts v1.ListOptions) (result *v1beta2.FlinkApplicationList, err error) { +func (c *flinkApplications) List(opts v1.ListOptions) (result *v1beta1.FlinkApplicationList, err error) { var timeout time.Duration if opts.TimeoutSeconds != nil { timeout = time.Duration(*opts.TimeoutSeconds) * time.Second } - result = &v1beta2.FlinkApplicationList{} + result = &v1beta1.FlinkApplicationList{} err = c.client.Get(). Namespace(c.ns). Resource("flinkapplications"). @@ -92,8 +92,8 @@ func (c *flinkApplications) Watch(opts v1.ListOptions) (watch.Interface, error) } // Create takes the representation of a flinkApplication and creates it. Returns the server's representation of the flinkApplication, and an error, if there is any. -func (c *flinkApplications) Create(flinkApplication *v1beta2.FlinkApplication) (result *v1beta2.FlinkApplication, err error) { - result = &v1beta2.FlinkApplication{} +func (c *flinkApplications) Create(flinkApplication *v1beta1.FlinkApplication) (result *v1beta1.FlinkApplication, err error) { + result = &v1beta1.FlinkApplication{} err = c.client.Post(). Namespace(c.ns). Resource("flinkapplications"). @@ -104,8 +104,8 @@ func (c *flinkApplications) Create(flinkApplication *v1beta2.FlinkApplication) ( } // Update takes the representation of a flinkApplication and updates it. Returns the server's representation of the flinkApplication, and an error, if there is any. -func (c *flinkApplications) Update(flinkApplication *v1beta2.FlinkApplication) (result *v1beta2.FlinkApplication, err error) { - result = &v1beta2.FlinkApplication{} +func (c *flinkApplications) Update(flinkApplication *v1beta1.FlinkApplication) (result *v1beta1.FlinkApplication, err error) { + result = &v1beta1.FlinkApplication{} err = c.client.Put(). Namespace(c.ns). Resource("flinkapplications"). @@ -144,8 +144,8 @@ func (c *flinkApplications) DeleteCollection(options *v1.DeleteOptions, listOpti } // Patch applies the patch and returns the patched flinkApplication. -func (c *flinkApplications) Patch(name string, pt types.PatchType, data []byte, subresources ...string) (result *v1beta2.FlinkApplication, err error) { - result = &v1beta2.FlinkApplication{} +func (c *flinkApplications) Patch(name string, pt types.PatchType, data []byte, subresources ...string) (result *v1beta1.FlinkApplication, err error) { + result = &v1beta1.FlinkApplication{} err = c.client.Patch(pt). Namespace(c.ns). Resource("flinkapplications"). diff --git a/pkg/client/clientset/versioned/typed/app/v1beta2/generated_expansion.go b/pkg/client/clientset/versioned/typed/app/v1beta2/generated_expansion.go index 28228bbf..eaf95aa7 100644 --- a/pkg/client/clientset/versioned/typed/app/v1beta2/generated_expansion.go +++ b/pkg/client/clientset/versioned/typed/app/v1beta2/generated_expansion.go @@ -1,5 +1,5 @@ // Code generated by client-gen. DO NOT EDIT. -package v1beta2 +package v1beta1 type FlinkApplicationExpansion interface{} diff --git a/pkg/controller/flink/client/api.go b/pkg/controller/flink/client/api.go index 8e02f3f4..76048d0b 100644 --- a/pkg/controller/flink/client/api.go +++ b/pkg/controller/flink/client/api.go @@ -7,7 +7,7 @@ import ( "strings" "time" - "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta2" + "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta1" "net/http" @@ -114,18 +114,18 @@ func (c *FlinkJobManagerClient) GetJobConfig(ctx context.Context, url, jobID str response, err := c.executeRequest(ctx, httpGet, url, nil) if err != nil { c.metrics.getJobConfigFailureCounter.Inc(ctx) - return nil, GetRetryableError(err, v1beta2.GetJobConfig, GlobalFailure, DefaultRetries) + return nil, GetRetryableError(err, v1beta1.GetJobConfig, GlobalFailure, DefaultRetries) } if response != nil && !response.IsSuccess() { c.metrics.getJobConfigFailureCounter.Inc(ctx) logger.Errorf(ctx, fmt.Sprintf("Get Jobconfig failed with response %v", response)) - return nil, GetRetryableError(err, v1beta2.GetJobConfig, response.Status(), DefaultRetries) + return nil, GetRetryableError(err, v1beta1.GetJobConfig, response.Status(), DefaultRetries) } var jobConfigResponse JobConfigResponse if err := json.Unmarshal(response.Body(), &jobConfigResponse); err != nil { logger.Errorf(ctx, "Unable to Unmarshal jobPlanResponse %v, err: %v", response, err) - return nil, GetRetryableError(err, v1beta2.GetJobConfig, JSONUnmarshalError, DefaultRetries) + return nil, GetRetryableError(err, v1beta1.GetJobConfig, JSONUnmarshalError, DefaultRetries) } c.metrics.getJobConfigSuccessCounter.Inc(ctx) return &jobConfigResponse, nil @@ -136,19 +136,19 @@ func (c *FlinkJobManagerClient) GetClusterOverview(ctx context.Context, url stri response, err := c.executeRequest(ctx, httpGet, url, nil) if err != nil { c.metrics.getClusterFailureCounter.Inc(ctx) - return nil, GetRetryableError(err, v1beta2.GetClusterOverview, GlobalFailure, DefaultRetries) + return nil, GetRetryableError(err, v1beta1.GetClusterOverview, GlobalFailure, DefaultRetries) } if response != nil && !response.IsSuccess() { c.metrics.getClusterFailureCounter.Inc(ctx) if response.StatusCode() != int(http.StatusNotFound) && response.StatusCode() != int(http.StatusServiceUnavailable) { logger.Errorf(ctx, fmt.Sprintf("Get cluster overview failed with response %v", response)) } - return nil, GetRetryableError(err, v1beta2.GetClusterOverview, response.Status(), DefaultRetries) + return nil, GetRetryableError(err, v1beta1.GetClusterOverview, response.Status(), DefaultRetries) } var clusterOverviewResponse ClusterOverviewResponse if err = json.Unmarshal(response.Body(), &clusterOverviewResponse); err != nil { logger.Errorf(ctx, "Unable to Unmarshal clusterOverviewResponse %v, err: %v", response, err) - return nil, GetRetryableError(err, v1beta2.GetClusterOverview, JSONUnmarshalError, DefaultRetries) + return nil, GetRetryableError(err, v1beta1.GetClusterOverview, JSONUnmarshalError, DefaultRetries) } c.metrics.getClusterSuccessCounter.Inc(ctx) return &clusterOverviewResponse, nil @@ -187,17 +187,17 @@ func (c *FlinkJobManagerClient) CancelJobWithSavepoint(ctx context.Context, url response, err := c.executeRequest(ctx, httpPost, url, cancelJobRequest) if err != nil { c.metrics.cancelJobFailureCounter.Inc(ctx) - return "", GetRetryableError(err, v1beta2.CancelJobWithSavepoint, GlobalFailure, 5) + return "", GetRetryableError(err, v1beta1.CancelJobWithSavepoint, GlobalFailure, 5) } if response != nil && !response.IsSuccess() { c.metrics.cancelJobFailureCounter.Inc(ctx) logger.Errorf(ctx, fmt.Sprintf("Cancel job failed with response %v", response)) - return "", GetRetryableError(err, v1beta2.CancelJobWithSavepoint, response.Status(), 5) + return "", GetRetryableError(err, v1beta1.CancelJobWithSavepoint, response.Status(), 5) } var cancelJobResponse CancelJobResponse if err = json.Unmarshal(response.Body(), &cancelJobResponse); err != nil { logger.Errorf(ctx, "Unable to Unmarshal cancelJobResponse %v, err: %v", response, err) - return "", GetRetryableError(err, v1beta2.CancelJobWithSavepoint, JSONUnmarshalError, 5) + return "", GetRetryableError(err, v1beta1.CancelJobWithSavepoint, JSONUnmarshalError, 5) } c.metrics.cancelJobSuccessCounter.Inc(ctx) return cancelJobResponse.TriggerID, nil @@ -212,12 +212,12 @@ func (c *FlinkJobManagerClient) ForceCancelJob(ctx context.Context, url string, if err != nil { c.metrics.forceCancelJobFailureCounter.Inc(ctx) logger.Errorf(ctx, fmt.Sprintf("Force cancel job failed with error %v", err)) - return GetRetryableError(err, v1beta2.ForceCancelJob, GlobalFailure, DefaultRetries) + return GetRetryableError(err, v1beta1.ForceCancelJob, GlobalFailure, DefaultRetries) } if response != nil && !response.IsSuccess() { c.metrics.forceCancelJobFailureCounter.Inc(ctx) logger.Errorf(ctx, fmt.Sprintf("Force cancel job failed with response %v", response)) - return GetRetryableError(err, v1beta2.ForceCancelJob, response.Status(), DefaultRetries) + return GetRetryableError(err, v1beta1.ForceCancelJob, response.Status(), DefaultRetries) } c.metrics.forceCancelJobSuccessCounter.Inc(ctx) @@ -231,7 +231,7 @@ func (c *FlinkJobManagerClient) SubmitJob(ctx context.Context, url string, jarID response, err := c.executeRequest(ctx, httpPost, url, submitJobRequest) if err != nil { c.metrics.submitJobFailureCounter.Inc(ctx) - return nil, GetRetryableError(err, v1beta2.SubmitJob, GlobalFailure, DefaultRetries) + return nil, GetRetryableError(err, v1beta1.SubmitJob, GlobalFailure, DefaultRetries) } if response != nil && !response.IsSuccess() { c.metrics.submitJobFailureCounter.Inc(ctx) @@ -241,18 +241,18 @@ func (c *FlinkJobManagerClient) SubmitJob(ctx context.Context, url string, jarID // in those cases body := response.String() if strings.Contains(body, programInvocationException) || strings.Contains(body, jobSubmissionException) { - return nil, GetNonRetryableErrorWithMessage(err, v1beta2.SubmitJob, response.Status(), body) + return nil, GetNonRetryableErrorWithMessage(err, v1beta1.SubmitJob, response.Status(), body) } - return nil, GetRetryableErrorWithMessage(err, v1beta2.SubmitJob, response.Status(), DefaultRetries, string(response.Body())) + return nil, GetRetryableErrorWithMessage(err, v1beta1.SubmitJob, response.Status(), DefaultRetries, string(response.Body())) } - return nil, GetNonRetryableErrorWithMessage(err, v1beta2.SubmitJob, response.Status(), string(response.Body())) + return nil, GetNonRetryableErrorWithMessage(err, v1beta1.SubmitJob, response.Status(), string(response.Body())) } var submitJobResponse SubmitJobResponse if err = json.Unmarshal(response.Body(), &submitJobResponse); err != nil { logger.Errorf(ctx, "Unable to Unmarshal submitJobResponse %v, err: %v", response, err) - return nil, GetRetryableErrorWithMessage(err, v1beta2.SubmitJob, response.Status(), DefaultRetries, JSONUnmarshalError) + return nil, GetRetryableErrorWithMessage(err, v1beta1.SubmitJob, response.Status(), DefaultRetries, JSONUnmarshalError) } c.metrics.submitJobSuccessCounter.Inc(ctx) @@ -266,17 +266,17 @@ func (c *FlinkJobManagerClient) CheckSavepointStatus(ctx context.Context, url st response, err := c.executeRequest(ctx, httpGet, url, nil) if err != nil { c.metrics.checkSavepointFailureCounter.Inc(ctx) - return nil, GetRetryableError(err, v1beta2.CheckSavepointStatus, GlobalFailure, checkSavepointStatusRetries) + return nil, GetRetryableError(err, v1beta1.CheckSavepointStatus, GlobalFailure, checkSavepointStatusRetries) } if response != nil && !response.IsSuccess() { c.metrics.checkSavepointFailureCounter.Inc(ctx) logger.Errorf(ctx, fmt.Sprintf("Check savepoint status failed with response %v", response)) - return nil, GetRetryableError(err, v1beta2.CheckSavepointStatus, response.Status(), checkSavepointStatusRetries) + return nil, GetRetryableError(err, v1beta1.CheckSavepointStatus, response.Status(), checkSavepointStatusRetries) } var savepointResponse SavepointResponse if err = json.Unmarshal(response.Body(), &savepointResponse); err != nil { logger.Errorf(ctx, "Unable to Unmarshal savepointResponse %v, err: %v", response, err) - return nil, GetRetryableError(err, v1beta2.CheckSavepointStatus, JSONUnmarshalError, checkSavepointStatusRetries) + return nil, GetRetryableError(err, v1beta1.CheckSavepointStatus, JSONUnmarshalError, checkSavepointStatusRetries) } c.metrics.cancelJobSuccessCounter.Inc(ctx) return &savepointResponse, nil @@ -287,18 +287,18 @@ func (c *FlinkJobManagerClient) GetJobs(ctx context.Context, url string) (*GetJo response, err := c.executeRequest(ctx, httpGet, url, nil) if err != nil { c.metrics.getJobsFailureCounter.Inc(ctx) - return nil, GetRetryableError(err, v1beta2.GetJobs, GlobalFailure, DefaultRetries) + return nil, GetRetryableError(err, v1beta1.GetJobs, GlobalFailure, DefaultRetries) } if response != nil && !response.IsSuccess() { c.metrics.getJobsFailureCounter.Inc(ctx) logger.Errorf(ctx, fmt.Sprintf("GetJobs failed with response %v", response)) - return nil, GetRetryableError(err, v1beta2.GetJobs, response.Status(), DefaultRetries) + return nil, GetRetryableError(err, v1beta1.GetJobs, response.Status(), DefaultRetries) } var getJobsResponse GetJobsResponse if err = json.Unmarshal(response.Body(), &getJobsResponse); err != nil { logger.Errorf(ctx, "%v", getJobsResponse) logger.Errorf(ctx, "Unable to Unmarshal getJobsResponse %v, err: %v", response, err) - return nil, GetRetryableError(err, v1beta2.GetJobs, response.Status(), DefaultRetries) + return nil, GetRetryableError(err, v1beta1.GetJobs, response.Status(), DefaultRetries) } c.metrics.getJobsSuccessCounter.Inc(ctx) return &getJobsResponse, nil @@ -309,11 +309,11 @@ func (c *FlinkJobManagerClient) GetLatestCheckpoint(ctx context.Context, url str response, err := c.executeRequest(ctx, httpGet, endpoint, nil) if err != nil { c.metrics.getCheckpointsFailureCounter.Inc(ctx) - return nil, GetRetryableError(err, v1beta2.GetLatestCheckpoint, GlobalFailure, DefaultRetries) + return nil, GetRetryableError(err, v1beta1.GetLatestCheckpoint, GlobalFailure, DefaultRetries) } if response != nil && !response.IsSuccess() { c.metrics.getCheckpointsFailureCounter.Inc(ctx) - return nil, GetRetryableError(err, v1beta2.GetLatestCheckpoint, response.Status(), DefaultRetries) + return nil, GetRetryableError(err, v1beta1.GetLatestCheckpoint, response.Status(), DefaultRetries) } var checkpointResponse CheckpointResponse @@ -329,11 +329,11 @@ func (c *FlinkJobManagerClient) GetTaskManagers(ctx context.Context, url string) endpoint := url + taskmanagersURL response, err := c.executeRequest(ctx, httpGet, endpoint, nil) if err != nil { - return nil, GetRetryableError(err, v1beta2.GetTaskManagers, GlobalFailure, DefaultRetries) + return nil, GetRetryableError(err, v1beta1.GetTaskManagers, GlobalFailure, DefaultRetries) } if response != nil && !response.IsSuccess() { - return nil, GetRetryableError(err, v1beta2.GetTaskManagers, response.Status(), DefaultRetries) + return nil, GetRetryableError(err, v1beta1.GetTaskManagers, response.Status(), DefaultRetries) } var taskmanagerResponse TaskManagersResponse @@ -350,11 +350,11 @@ func (c *FlinkJobManagerClient) GetCheckpointCounts(ctx context.Context, url str response, err := c.executeRequest(ctx, httpGet, endpoint, nil) if err != nil { c.metrics.getCheckpointsFailureCounter.Inc(ctx) - return nil, GetRetryableError(err, v1beta2.GetCheckpointCounts, GlobalFailure, DefaultRetries) + return nil, GetRetryableError(err, v1beta1.GetCheckpointCounts, GlobalFailure, DefaultRetries) } if response != nil && !response.IsSuccess() { c.metrics.getCheckpointsFailureCounter.Inc(ctx) - return nil, GetRetryableError(err, v1beta2.GetCheckpointCounts, response.Status(), DefaultRetries) + return nil, GetRetryableError(err, v1beta1.GetCheckpointCounts, response.Status(), DefaultRetries) } var checkpointResponse CheckpointResponse @@ -370,11 +370,11 @@ func (c *FlinkJobManagerClient) GetJobOverview(ctx context.Context, url string, endpoint := fmt.Sprintf(url+GetJobsOverviewURL, jobID) response, err := c.executeRequest(ctx, httpGet, endpoint, nil) if err != nil { - return nil, GetRetryableError(err, v1beta2.GetJobOverview, GlobalFailure, DefaultRetries) + return nil, GetRetryableError(err, v1beta1.GetJobOverview, GlobalFailure, DefaultRetries) } if response != nil && !response.IsSuccess() { c.metrics.getCheckpointsFailureCounter.Inc(ctx) - return nil, GetRetryableError(err, v1beta2.GetJobOverview, response.Status(), DefaultRetries) + return nil, GetRetryableError(err, v1beta1.GetJobOverview, response.Status(), DefaultRetries) } var jobOverviewResponse FlinkJobOverview diff --git a/pkg/controller/flink/client/api_test.go b/pkg/controller/flink/client/api_test.go index f6ce2fa1..f2fd582e 100644 --- a/pkg/controller/flink/client/api_test.go +++ b/pkg/controller/flink/client/api_test.go @@ -4,7 +4,7 @@ import ( "context" "testing" - "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta2" + "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta1" "github.com/jarcoal/httpmock" mockScope "github.com/lyft/flytestdlib/promutils" @@ -359,7 +359,7 @@ func TestSubmitStartupFail(t *testing.T) { Parallelism: 10, }) assert.Nil(t, resp) - flinkAppError, _ := err.(*v1beta2.FlinkApplicationError) + flinkAppError, _ := err.(*v1beta1.FlinkApplicationError) assert.True(t, flinkAppError.IsFailFast) assert.EqualError(t, err, "SubmitJob call failed with status 500 and message '"+ @@ -379,7 +379,7 @@ func TestIncompatibleSavepointFail(t *testing.T) { Parallelism: 10, }) assert.Nil(t, resp) - flinkAppError, _ := err.(*v1beta2.FlinkApplicationError) + flinkAppError, _ := err.(*v1beta1.FlinkApplicationError) assert.True(t, flinkAppError.IsFailFast) assert.EqualError(t, err, "SubmitJob call failed with status 500 and message '"+ diff --git a/pkg/controller/flink/client/error_handler.go b/pkg/controller/flink/client/error_handler.go index 0e059e45..01bbf33e 100644 --- a/pkg/controller/flink/client/error_handler.go +++ b/pkg/controller/flink/client/error_handler.go @@ -7,7 +7,7 @@ import ( v1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta2" + "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta1" "github.com/pkg/errors" "k8s.io/apimachinery/pkg/util/clock" @@ -21,25 +21,25 @@ const ( NoRetries = 0 ) -func GetRetryableError(err error, method v1beta2.FlinkMethod, errorCode string, maxRetries int32) error { +func GetRetryableError(err error, method v1beta1.FlinkMethod, errorCode string, maxRetries int32) error { return GetRetryableErrorWithMessage(err, method, errorCode, maxRetries, "") } -func GetRetryableErrorWithMessage(err error, method v1beta2.FlinkMethod, errorCode string, maxRetries int32, message string) error { +func GetRetryableErrorWithMessage(err error, method v1beta1.FlinkMethod, errorCode string, maxRetries int32, message string) error { appError := getErrorValue(err, method, errorCode, message) return NewFlinkApplicationError(appError.Error(), method, errorCode, true, false, maxRetries) } -func GetNonRetryableError(err error, method v1beta2.FlinkMethod, errorCode string) error { +func GetNonRetryableError(err error, method v1beta1.FlinkMethod, errorCode string) error { return GetNonRetryableErrorWithMessage(err, method, errorCode, "") } -func GetNonRetryableErrorWithMessage(err error, method v1beta2.FlinkMethod, errorCode string, message string) error { +func GetNonRetryableErrorWithMessage(err error, method v1beta1.FlinkMethod, errorCode string, message string) error { appError := getErrorValue(err, method, errorCode, message) return NewFlinkApplicationError(appError.Error(), method, errorCode, false, true, NoRetries) } -func getErrorValue(err error, method v1beta2.FlinkMethod, errorCode string, message string) error { +func getErrorValue(err error, method v1beta1.FlinkMethod, errorCode string, message string) error { if err == nil { return errors.New(fmt.Sprintf("%v call failed with status %v and message '%s'", method, errorCode, message)) } @@ -76,7 +76,7 @@ func (r RetryHandler) IsErrorRetryable(err error) bool { if err == nil { return false } - flinkAppError, ok := err.(*v1beta2.FlinkApplicationError) + flinkAppError, ok := err.(*v1beta1.FlinkApplicationError) if ok && flinkAppError != nil { return flinkAppError.IsRetryable } @@ -85,7 +85,7 @@ func (r RetryHandler) IsErrorRetryable(err error) bool { } func (r RetryHandler) IsRetryRemaining(err error, retryCount int32) bool { - flinkAppError, ok := err.(*v1beta2.FlinkApplicationError) + flinkAppError, ok := err.(*v1beta1.FlinkApplicationError) if ok && flinkAppError != nil { return retryCount <= flinkAppError.MaxRetries } @@ -112,7 +112,7 @@ func (r RetryHandler) IsTimeToRetry(clock clock.Clock, lastUpdatedTime time.Time return elapsedTime >= r.GetRetryDelay(retryCount) } -func NewFlinkApplicationError(appError string, method v1beta2.FlinkMethod, errorCode string, isRetryable bool, isFailFast bool, maxRetries int32) *v1beta2.FlinkApplicationError { +func NewFlinkApplicationError(appError string, method v1beta1.FlinkMethod, errorCode string, isRetryable bool, isFailFast bool, maxRetries int32) *v1beta1.FlinkApplicationError { now := v1.Now() - return &v1beta2.FlinkApplicationError{AppError: appError, Method: method, ErrorCode: errorCode, IsRetryable: isRetryable, IsFailFast: isFailFast, MaxRetries: maxRetries, LastErrorUpdateTime: &now} + return &v1beta1.FlinkApplicationError{AppError: appError, Method: method, ErrorCode: errorCode, IsRetryable: isRetryable, IsFailFast: isFailFast, MaxRetries: maxRetries, LastErrorUpdateTime: &now} } diff --git a/pkg/controller/flink/config.go b/pkg/controller/flink/config.go index 78ffca14..1e375b32 100644 --- a/pkg/controller/flink/config.go +++ b/pkg/controller/flink/config.go @@ -6,7 +6,7 @@ import ( "sort" "strings" - "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta2" + "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta1" ) const ( @@ -36,43 +36,43 @@ func getValidFraction(x *float64, y float64) float64 { return y } -func getTaskmanagerSlots(app *v1beta2.FlinkApplication) int32 { +func getTaskmanagerSlots(app *v1beta1.FlinkApplication) int32 { return firstNonNil(app.Spec.TaskManagerConfig.TaskSlots, TaskManagerDefaultSlots) } -func getJobmanagerReplicas(app *v1beta2.FlinkApplication) int32 { +func getJobmanagerReplicas(app *v1beta1.FlinkApplication) int32 { return firstNonNil(app.Spec.JobManagerConfig.Replicas, JobManagerDefaultReplicaCount) } -func getServiceAccountName(app *v1beta2.FlinkApplication) string { +func getServiceAccountName(app *v1beta1.FlinkApplication) string { return app.Spec.ServiceAccountName } -func getRPCPort(app *v1beta2.FlinkApplication) int32 { +func getRPCPort(app *v1beta1.FlinkApplication) int32 { return firstNonNil(app.Spec.RPCPort, RPCDefaultPort) } -func getUIPort(app *v1beta2.FlinkApplication) int32 { +func getUIPort(app *v1beta1.FlinkApplication) int32 { return firstNonNil(app.Spec.UIPort, UIDefaultPort) } -func getQueryPort(app *v1beta2.FlinkApplication) int32 { +func getQueryPort(app *v1beta1.FlinkApplication) int32 { return firstNonNil(app.Spec.QueryPort, QueryDefaultPort) } -func getBlobPort(app *v1beta2.FlinkApplication) int32 { +func getBlobPort(app *v1beta1.FlinkApplication) int32 { return firstNonNil(app.Spec.BlobPort, BlobDefaultPort) } -func getInternalMetricsQueryPort(app *v1beta2.FlinkApplication) int32 { +func getInternalMetricsQueryPort(app *v1beta1.FlinkApplication) int32 { return firstNonNil(app.Spec.MetricsQueryPort, MetricsQueryDefaultPort) } -func getMaxCheckpointRestoreAgeSeconds(app *v1beta2.FlinkApplication) int32 { +func getMaxCheckpointRestoreAgeSeconds(app *v1beta1.FlinkApplication) int32 { return firstNonNil(app.Spec.MaxCheckpointRestoreAgeSeconds, MaxCheckpointRestoreAgeSeconds) } -func getTaskManagerMemory(application *v1beta2.FlinkApplication) int64 { +func getTaskManagerMemory(application *v1beta1.FlinkApplication) int64 { tmResources := application.Spec.TaskManagerConfig.Resources if tmResources == nil { tmResources = &TaskManagerDefaultResources @@ -81,7 +81,7 @@ func getTaskManagerMemory(application *v1beta2.FlinkApplication) int64 { return tmMemory } -func getJobManagerMemory(application *v1beta2.FlinkApplication) int64 { +func getJobManagerMemory(application *v1beta1.FlinkApplication) int64 { jmResources := application.Spec.JobManagerConfig.Resources if jmResources == nil { jmResources = &JobManagerDefaultResources @@ -95,13 +95,13 @@ func computeHeap(memoryInBytes float64, fraction float64) string { return fmt.Sprintf("%dk", kbs) } -func getTaskManagerHeapMemory(app *v1beta2.FlinkApplication) string { +func getTaskManagerHeapMemory(app *v1beta1.FlinkApplication) string { offHeapMemoryFrac := getValidFraction(app.Spec.TaskManagerConfig.OffHeapMemoryFraction, OffHeapMemoryDefaultFraction) tmMemory := float64(getTaskManagerMemory(app)) return computeHeap(tmMemory, offHeapMemoryFrac) } -func getJobManagerHeapMemory(app *v1beta2.FlinkApplication) string { +func getJobManagerHeapMemory(app *v1beta1.FlinkApplication) string { offHeapMemoryFrac := getValidFraction(app.Spec.JobManagerConfig.OffHeapMemoryFraction, OffHeapMemoryDefaultFraction) jmMemory := float64(getJobManagerMemory(app)) return computeHeap(jmMemory, offHeapMemoryFrac) @@ -109,10 +109,10 @@ func getJobManagerHeapMemory(app *v1beta2.FlinkApplication) string { // Renders the flink configuration overrides stored in FlinkApplication.FlinkConfig into a // YAML string suitable for interpolating into flink-conf.yaml. -func renderFlinkConfig(app *v1beta2.FlinkApplication) (string, error) { +func renderFlinkConfig(app *v1beta1.FlinkApplication) (string, error) { config := app.Spec.FlinkConfig.DeepCopy() if config == nil { - config = &v1beta2.FlinkConfig{} + config = &v1beta1.FlinkConfig{} } // we will fill this in later using the versioned service @@ -158,7 +158,7 @@ func renderFlinkConfig(app *v1beta2.FlinkApplication) (string, error) { return s.String(), nil } -func isHAEnabled(flinkConfig v1beta2.FlinkConfig) bool { +func isHAEnabled(flinkConfig v1beta1.FlinkConfig) bool { if val, ok := flinkConfig[HighAvailabilityKey]; ok { value := val.(string) if strings.ToLower(strings.TrimSpace(value)) != "none" { diff --git a/pkg/controller/flink/config_test.go b/pkg/controller/flink/config_test.go index 6bda8130..6f15baca 100644 --- a/pkg/controller/flink/config_test.go +++ b/pkg/controller/flink/config_test.go @@ -6,7 +6,7 @@ import ( "strings" "testing" - "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta2" + "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta1" "github.com/stretchr/testify/assert" coreV1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" @@ -18,11 +18,11 @@ func TestRenderFlinkConfigOverrides(t *testing.T) { blobPort := int32(1000) offHeapMemoryFrac := 0.5 - yaml, err := renderFlinkConfig(&v1beta2.FlinkApplication{ + yaml, err := renderFlinkConfig(&v1beta1.FlinkApplication{ ObjectMeta: v1.ObjectMeta{ Name: "test-app", }, - Spec: v1beta2.FlinkApplicationSpec{ + Spec: v1beta1.FlinkApplicationSpec{ FlinkConfig: map[string]interface{}{ "akka.timeout": "5s", "taskmanager.network.memory.fraction": 0.1, @@ -30,17 +30,17 @@ func TestRenderFlinkConfigOverrides(t *testing.T) { "jobmanager.rpc.address": "wrong-address", "env.java.opts.jobmanager": "-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=39000 -XX:+UseG1GC", }, - TaskManagerConfig: v1beta2.TaskManagerConfig{ + TaskManagerConfig: v1beta1.TaskManagerConfig{ TaskSlots: &taskSlots, OffHeapMemoryFraction: &offHeapMemoryFrac, }, - JobManagerConfig: v1beta2.JobManagerConfig{ + JobManagerConfig: v1beta1.JobManagerConfig{ OffHeapMemoryFraction: &offHeapMemoryFrac, }, BlobPort: &blobPort, }, - Status: v1beta2.FlinkApplicationStatus{ - Phase: v1beta2.FlinkApplicationNew, + Status: v1beta1.FlinkApplicationStatus{ + Phase: v1beta1.FlinkApplicationNew, }, }) @@ -70,22 +70,22 @@ func TestRenderFlinkConfigOverrides(t *testing.T) { } func TestGetTaskSlots(t *testing.T) { - app1 := v1beta2.FlinkApplication{} + app1 := v1beta1.FlinkApplication{} assert.Equal(t, int32(TaskManagerDefaultSlots), getTaskmanagerSlots(&app1)) - app2 := v1beta2.FlinkApplication{} + app2 := v1beta1.FlinkApplication{} taskSlots := int32(4) app2.Spec.TaskManagerConfig.TaskSlots = &taskSlots assert.Equal(t, int32(4), getTaskmanagerSlots(&app2)) } func TestGetJobManagerReplicas(t *testing.T) { - app1 := v1beta2.FlinkApplication{} + app1 := v1beta1.FlinkApplication{} assert.Equal(t, int32(JobManagerDefaultReplicaCount), getJobmanagerReplicas(&app1)) } func TestGetJobManagerReplicasNonZero(t *testing.T) { - app1 := v1beta2.FlinkApplication{} + app1 := v1beta1.FlinkApplication{} replicas := int32(4) app1.Spec.JobManagerConfig.Replicas = &replicas @@ -93,7 +93,7 @@ func TestGetJobManagerReplicasNonZero(t *testing.T) { } func TestGetTaskManagerMemory(t *testing.T) { - app := v1beta2.FlinkApplication{} + app := v1beta1.FlinkApplication{} tmResources := coreV1.ResourceRequirements{ Requests: coreV1.ResourceList{ coreV1.ResourceCPU: resource.MustParse("2"), @@ -111,7 +111,7 @@ func TestGetTaskManagerMemory(t *testing.T) { } func TestGetJobManagerMemory(t *testing.T) { - app := v1beta2.FlinkApplication{} + app := v1beta1.FlinkApplication{} tmResources := coreV1.ResourceRequirements{ Requests: coreV1.ResourceList{ coreV1.ResourceCPU: resource.MustParse("2"), @@ -129,7 +129,7 @@ func TestGetJobManagerMemory(t *testing.T) { } func TestEnsureNoFractionalHeapMemory(t *testing.T) { - app := v1beta2.FlinkApplication{} + app := v1beta1.FlinkApplication{} tmResources := coreV1.ResourceRequirements{ Requests: coreV1.ResourceList{ coreV1.ResourceCPU: resource.MustParse("2"), @@ -148,7 +148,7 @@ func TestEnsureNoFractionalHeapMemory(t *testing.T) { } func TestGetTaskManagerHeapMemory(t *testing.T) { - app := v1beta2.FlinkApplication{} + app := v1beta1.FlinkApplication{} tmResources := coreV1.ResourceRequirements{ Requests: coreV1.ResourceList{ coreV1.ResourceCPU: resource.MustParse("2"), @@ -167,7 +167,7 @@ func TestGetTaskManagerHeapMemory(t *testing.T) { } func TestGetJobManagerHeapMemory(t *testing.T) { - app := v1beta2.FlinkApplication{} + app := v1beta1.FlinkApplication{} jmResources := coreV1.ResourceRequirements{ Requests: coreV1.ResourceList{ coreV1.ResourceCPU: resource.MustParse("2"), diff --git a/pkg/controller/flink/container_utils.go b/pkg/controller/flink/container_utils.go index c576eb10..e760b6f9 100644 --- a/pkg/controller/flink/container_utils.go +++ b/pkg/controller/flink/container_utils.go @@ -6,7 +6,7 @@ import ( "github.com/benlaurie/objecthash/go/objecthash" - "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta2" + "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta1" "github.com/lyft/flinkk8soperator/pkg/controller/common" "github.com/lyft/flinkk8soperator/pkg/controller/config" "github.com/lyft/flinkk8soperator/pkg/controller/k8" @@ -46,11 +46,11 @@ func getFlinkContainerName(containerName string) string { return containerName } -func getCommonAppLabels(app *v1beta2.FlinkApplication) map[string]string { +func getCommonAppLabels(app *v1beta1.FlinkApplication) map[string]string { return k8.GetAppLabel(app.Name) } -func getCommonAnnotations(app *v1beta2.FlinkApplication) map[string]string { +func getCommonAnnotations(app *v1beta1.FlinkApplication) map[string]string { annotations := common.DuplicateMap(app.Annotations) annotations[FlinkJobProperties] = fmt.Sprintf( "jarName: %s\nparallelism: %d\nentryClass:%s\nprogramArgs:\"%s\"", @@ -58,7 +58,7 @@ func getCommonAnnotations(app *v1beta2.FlinkApplication) map[string]string { if app.Spec.RestartNonce != "" { annotations[RestartNonce] = app.Spec.RestartNonce } - if v1beta2.IsBlueGreenDeploymentMode(app.Spec.DeploymentMode) { + if v1beta1.IsBlueGreenDeploymentMode(app.Spec.DeploymentMode) { annotations[FlinkApplicationVersion] = app.Status.UpdatingVersion } return annotations @@ -77,7 +77,7 @@ func GetAWSServiceEnv() []v1.EnvVar { } } -func getFlinkEnv(app *v1beta2.FlinkApplication) ([]v1.EnvVar, error) { +func getFlinkEnv(app *v1beta1.FlinkApplication) ([]v1.EnvVar, error) { env := []v1.EnvVar{} appName := app.Name @@ -115,7 +115,7 @@ func getFlinkEnv(app *v1beta2.FlinkApplication) ([]v1.EnvVar, error) { return env, nil } -func GetFlinkContainerEnv(app *v1beta2.FlinkApplication) []v1.EnvVar { +func GetFlinkContainerEnv(app *v1beta1.FlinkApplication) []v1.EnvVar { env := []v1.EnvVar{} env = append(env, GetAWSServiceEnv()...) flinkEnv, err := getFlinkEnv(app) @@ -126,7 +126,7 @@ func GetFlinkContainerEnv(app *v1beta2.FlinkApplication) []v1.EnvVar { return env } -func ImagePullPolicy(app *v1beta2.FlinkApplication) v1.PullPolicy { +func ImagePullPolicy(app *v1beta1.FlinkApplication) v1.PullPolicy { if app.Spec.ImagePullPolicy == "" { return v1.PullIfNotPresent } @@ -162,7 +162,7 @@ func ComputeDeploymentHash(deployment appsv1.Deployment) ([]byte, error) { // Returns an 8 character hash sensitive to the application name, labels, annotations, and spec. // TODO: we may need to add collision-avoidance to this -func HashForApplication(app *v1beta2.FlinkApplication) string { +func HashForApplication(app *v1beta1.FlinkApplication) string { // we round-trip through json to normalize the deployment objects jmDeployment := jobmanagerTemplate(app) jmDeployment.OwnerReferences = make([]metav1.OwnerReference, 0) @@ -197,7 +197,7 @@ func HashForApplication(app *v1beta2.FlinkApplication) string { return fmt.Sprintf("%08x", hasher.Sum32()) } -func InjectOperatorCustomizedConfig(deployment *appsv1.Deployment, app *v1beta2.FlinkApplication, hash string, deploymentType string) { +func InjectOperatorCustomizedConfig(deployment *appsv1.Deployment, app *v1beta1.FlinkApplication, hash string, deploymentType string) { var newContainers []v1.Container for _, container := range deployment.Spec.Template.Spec.Containers { var newEnv []v1.EnvVar @@ -226,8 +226,8 @@ func InjectOperatorCustomizedConfig(deployment *appsv1.Deployment, app *v1beta2. } // Injects labels and environment variables required for blue green deploys -func GetDeploySpecificEnv(app *v1beta2.FlinkApplication) []v1.EnvVar { - if !v1beta2.IsBlueGreenDeploymentMode(app.Spec.DeploymentMode) { +func GetDeploySpecificEnv(app *v1beta1.FlinkApplication) []v1.EnvVar { + if !v1beta1.IsBlueGreenDeploymentMode(app.Spec.DeploymentMode) { return []v1.EnvVar{} } diff --git a/pkg/controller/flink/container_utils_test.go b/pkg/controller/flink/container_utils_test.go index 85bb7a5f..a6f4abb7 100644 --- a/pkg/controller/flink/container_utils_test.go +++ b/pkg/controller/flink/container_utils_test.go @@ -3,14 +3,14 @@ package flink import ( "testing" - "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta2" + "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta1" "github.com/stretchr/testify/assert" v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" ) func TestHashForApplication(t *testing.T) { - app := v1beta2.FlinkApplication{} + app := v1beta1.FlinkApplication{} taskSlots := int32(8) app.Spec.TaskManagerConfig.TaskSlots = &taskSlots app.Spec.Parallelism = 4 @@ -49,7 +49,7 @@ func TestHashForApplication(t *testing.T) { } func TestHashForDifferentResourceScales(t *testing.T) { - app1 := v1beta2.FlinkApplication{} + app1 := v1beta1.FlinkApplication{} app1.Spec.TaskManagerConfig.Resources = &v1.ResourceRequirements{ Requests: v1.ResourceList{ v1.ResourceCPU: resource.MustParse("0.5"), @@ -61,7 +61,7 @@ func TestHashForDifferentResourceScales(t *testing.T) { }, } - app2 := v1beta2.FlinkApplication{} + app2 := v1beta1.FlinkApplication{} app2.Spec.TaskManagerConfig.Resources = &v1.ResourceRequirements{ Requests: v1.ResourceList{ v1.ResourceCPU: resource.MustParse("500m"), diff --git a/pkg/controller/flink/flink.go b/pkg/controller/flink/flink.go index 700a9ea1..8d7c86ef 100644 --- a/pkg/controller/flink/flink.go +++ b/pkg/controller/flink/flink.go @@ -14,7 +14,7 @@ import ( controllerConfig "github.com/lyft/flinkk8soperator/pkg/controller/config" "github.com/lyft/flytestdlib/logger" - "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta2" + "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta1" "github.com/lyft/flinkk8soperator/pkg/controller/flink/client" "github.com/lyft/flinkk8soperator/pkg/controller/k8" "github.com/lyft/flytestdlib/promutils" @@ -43,74 +43,74 @@ const failingIntervalThreshold = 1 * time.Minute // Interface to manage Flink Application in Kubernetes type ControllerInterface interface { // Creates a Flink cluster with necessary Job Manager, Task Managers and services for UI - CreateCluster(ctx context.Context, application *v1beta2.FlinkApplication) error + CreateCluster(ctx context.Context, application *v1beta1.FlinkApplication) error // Cancels the running/active jobs in the Cluster for the Application after savepoint is created - CancelWithSavepoint(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (string, error) + CancelWithSavepoint(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (string, error) // Force cancels the running/active job without taking a savepoint - ForceCancel(ctx context.Context, application *v1beta2.FlinkApplication, hash string) error + ForceCancel(ctx context.Context, application *v1beta1.FlinkApplication, hash string) error // Starts the Job in the Flink Cluster - StartFlinkJob(ctx context.Context, application *v1beta2.FlinkApplication, hash string, + StartFlinkJob(ctx context.Context, application *v1beta1.FlinkApplication, hash string, jarName string, parallelism int32, entryClass string, programArgs string, allowNonRestoredState bool, savepointPath string) (string, error) // Savepoint creation is asynchronous. // Polls the status of the Savepoint, using the triggerID - GetSavepointStatus(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (*client.SavepointResponse, error) + GetSavepointStatus(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (*client.SavepointResponse, error) // Check if the Flink Kubernetes Cluster is Ready. // Checks if all the pods of task and job managers are ready. - IsClusterReady(ctx context.Context, application *v1beta2.FlinkApplication) (bool, error) + IsClusterReady(ctx context.Context, application *v1beta1.FlinkApplication) (bool, error) // Checks to see if the Flink Cluster is ready to handle API requests - IsServiceReady(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (bool, error) + IsServiceReady(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (bool, error) // Returns the list of Jobs running on the Flink Cluster for the Application - GetJobsForApplication(ctx context.Context, application *v1beta2.FlinkApplication, hash string) ([]client.FlinkJob, error) + GetJobsForApplication(ctx context.Context, application *v1beta1.FlinkApplication, hash string) ([]client.FlinkJob, error) // Returns the current job for the application, if one exists in the cluster - GetJobForApplication(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (*client.FlinkJobOverview, error) + GetJobForApplication(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (*client.FlinkJobOverview, error) // Returns the pair of deployments (tm/jm) for the current version of the application - GetCurrentDeploymentsForApp(ctx context.Context, application *v1beta2.FlinkApplication) (*common.FlinkDeployment, error) + GetCurrentDeploymentsForApp(ctx context.Context, application *v1beta1.FlinkApplication) (*common.FlinkDeployment, error) // Deletes all old resources (deployments and services) for the app - DeleteOldResourcesForApp(ctx context.Context, app *v1beta2.FlinkApplication) error + DeleteOldResourcesForApp(ctx context.Context, app *v1beta1.FlinkApplication) error // Attempts to find an externalized checkpoint for the job. This can be used to recover an application that is not // able to savepoint for some reason. - FindExternalizedCheckpoint(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (string, error) + FindExternalizedCheckpoint(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (string, error) // Logs an event to the FlinkApplication resource and to the operator log - LogEvent(ctx context.Context, app *v1beta2.FlinkApplication, eventType string, reason string, message string) + LogEvent(ctx context.Context, app *v1beta1.FlinkApplication, eventType string, reason string, message string) // Compares and updates new cluster status with current cluster status // Returns true if there is a change in ClusterStatus - CompareAndUpdateClusterStatus(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (bool, error) + CompareAndUpdateClusterStatus(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (bool, error) // Compares and updates new job status with current job status // Returns true if there is a change in JobStatus - CompareAndUpdateJobStatus(ctx context.Context, app *v1beta2.FlinkApplication, hash string) (bool, error) + CompareAndUpdateJobStatus(ctx context.Context, app *v1beta1.FlinkApplication, hash string) (bool, error) // Gets the last updated cluster status - GetLatestClusterStatus(ctx context.Context, app *v1beta2.FlinkApplication) v1beta2.FlinkClusterStatus + GetLatestClusterStatus(ctx context.Context, app *v1beta1.FlinkApplication) v1beta1.FlinkClusterStatus // Gets the last updated job status - GetLatestJobStatus(ctx context.Context, app *v1beta2.FlinkApplication) v1beta2.FlinkJobStatus + GetLatestJobStatus(ctx context.Context, app *v1beta1.FlinkApplication) v1beta1.FlinkJobStatus // Gets the last updated job ID - GetLatestJobID(ctx context.Context, app *v1beta2.FlinkApplication) string + GetLatestJobID(ctx context.Context, app *v1beta1.FlinkApplication) string // Updates the jobID on the latest jobStatus - UpdateLatestJobID(ctx context.Context, app *v1beta2.FlinkApplication, jobID string) + UpdateLatestJobID(ctx context.Context, app *v1beta1.FlinkApplication, jobID string) // Update jobStatus on the latest VersionStatuses - UpdateLatestJobStatus(ctx context.Context, app *v1beta2.FlinkApplication, jobStatus v1beta2.FlinkJobStatus) + UpdateLatestJobStatus(ctx context.Context, app *v1beta1.FlinkApplication, jobStatus v1beta1.FlinkJobStatus) // Update clusterStatus on the latest VersionStatuses - UpdateLatestClusterStatus(ctx context.Context, app *v1beta2.FlinkApplication, jobStatus v1beta2.FlinkClusterStatus) + UpdateLatestClusterStatus(ctx context.Context, app *v1beta1.FlinkApplication, jobStatus v1beta1.FlinkClusterStatus) } func NewController(k8sCluster k8.ClusterInterface, eventRecorder record.EventRecorder, config controllerConfig.RuntimeConfig) ControllerInterface { @@ -151,7 +151,7 @@ type Controller struct { eventRecorder record.EventRecorder } -func getURLFromApp(application *v1beta2.FlinkApplication, hash string) string { +func getURLFromApp(application *v1beta1.FlinkApplication, hash string) string { service := VersionedJobManagerServiceName(application, hash) cfg := controllerConfig.GetConfig() if cfg.UseProxy { @@ -160,7 +160,7 @@ func getURLFromApp(application *v1beta2.FlinkApplication, hash string) string { return fmt.Sprintf("http://%s.%s:%d", service, application.Namespace, port) } -func getClusterOverviewURL(app *v1beta2.FlinkApplication) string { +func getClusterOverviewURL(app *v1beta1.FlinkApplication) string { externalURL := getExternalURLFromApp(app) if externalURL != "" { return fmt.Sprintf(externalURL + client.WebUIAnchor + client.GetClusterOverviewURL) @@ -168,7 +168,7 @@ func getClusterOverviewURL(app *v1beta2.FlinkApplication) string { return "" } -func getJobOverviewURL(app *v1beta2.FlinkApplication) string { +func getJobOverviewURL(app *v1beta1.FlinkApplication) string { externalURL := getExternalURLFromApp(app) if externalURL != "" { return fmt.Sprintf(externalURL+client.WebUIAnchor+client.GetJobsOverviewURL, app.Status.VersionStatuses[getCurrentStatusIndex(app)].JobStatus.JobID) @@ -176,7 +176,7 @@ func getJobOverviewURL(app *v1beta2.FlinkApplication) string { return "" } -func getExternalURLFromApp(application *v1beta2.FlinkApplication) string { +func getExternalURLFromApp(application *v1beta1.FlinkApplication) string { cfg := controllerConfig.GetConfig() // Local environment if cfg.UseProxy { @@ -201,7 +201,7 @@ func GetActiveFlinkJobs(jobs []client.FlinkJob) []client.FlinkJob { // Returns true iff the deployment exactly matches the flink application // This check only validates that the name of the deployment is as expected. // This is to add extra protection, as labels to any deployments -func (f *Controller) deploymentMatches(ctx context.Context, deployment *v1.Deployment, application *v1beta2.FlinkApplication, hash string) bool { +func (f *Controller) deploymentMatches(ctx context.Context, deployment *v1.Deployment, application *v1beta1.FlinkApplication, hash string) bool { if DeploymentIsTaskmanager(deployment) { return TaskManagerDeploymentMatches(deployment, application, hash) } @@ -213,7 +213,7 @@ func (f *Controller) deploymentMatches(ctx context.Context, deployment *v1.Deplo return false } -func (f *Controller) GetJobsForApplication(ctx context.Context, application *v1beta2.FlinkApplication, hash string) ([]client.FlinkJob, error) { +func (f *Controller) GetJobsForApplication(ctx context.Context, application *v1beta1.FlinkApplication, hash string) ([]client.FlinkJob, error) { jobResponse, err := f.flinkClient.GetJobs(ctx, getURLFromApp(application, hash)) if err != nil { return nil, err @@ -222,7 +222,7 @@ func (f *Controller) GetJobsForApplication(ctx context.Context, application *v1b return jobResponse.Jobs, nil } -func (f *Controller) GetJobForApplication(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (*client.FlinkJobOverview, error) { +func (f *Controller) GetJobForApplication(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (*client.FlinkJobOverview, error) { if f.GetLatestJobID(ctx, application) == "" { return nil, nil } @@ -237,7 +237,7 @@ func (f *Controller) GetJobForApplication(ctx context.Context, application *v1be // The operator for now assumes and is intended to run single application per Flink Cluster. // Once we move to run multiple applications, this has to be removed/updated -func (f *Controller) getJobIDForApplication(ctx context.Context, application *v1beta2.FlinkApplication) (string, error) { +func (f *Controller) getJobIDForApplication(ctx context.Context, application *v1beta1.FlinkApplication) (string, error) { if f.GetLatestJobID(ctx, application) != "" { return f.GetLatestJobID(ctx, application), nil } @@ -245,7 +245,7 @@ func (f *Controller) getJobIDForApplication(ctx context.Context, application *v1 return "", errors.New("active job id not available") } -func (f *Controller) CancelWithSavepoint(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (string, error) { +func (f *Controller) CancelWithSavepoint(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (string, error) { jobID, err := f.getJobIDForApplication(ctx, application) if err != nil { return "", err @@ -253,7 +253,7 @@ func (f *Controller) CancelWithSavepoint(ctx context.Context, application *v1bet return f.flinkClient.CancelJobWithSavepoint(ctx, getURLFromApp(application, hash), jobID) } -func (f *Controller) ForceCancel(ctx context.Context, application *v1beta2.FlinkApplication, hash string) error { +func (f *Controller) ForceCancel(ctx context.Context, application *v1beta1.FlinkApplication, hash string) error { jobID, err := f.getJobIDForApplication(ctx, application) if err != nil { return err @@ -261,7 +261,7 @@ func (f *Controller) ForceCancel(ctx context.Context, application *v1beta2.Flink return f.flinkClient.ForceCancelJob(ctx, getURLFromApp(application, hash), jobID) } -func (f *Controller) CreateCluster(ctx context.Context, application *v1beta2.FlinkApplication) error { +func (f *Controller) CreateCluster(ctx context.Context, application *v1beta1.FlinkApplication) error { newlyCreatedJm, err := f.jobManager.CreateIfNotExist(ctx, application) if err != nil { logger.Errorf(ctx, "Job manager cluster creation did not succeed %v", err) @@ -287,7 +287,7 @@ func (f *Controller) CreateCluster(ctx context.Context, application *v1beta2.Fli return nil } -func (f *Controller) StartFlinkJob(ctx context.Context, application *v1beta2.FlinkApplication, hash string, +func (f *Controller) StartFlinkJob(ctx context.Context, application *v1beta1.FlinkApplication, hash string, jarName string, parallelism int32, entryClass string, programArgs string, allowNonRestoredState bool, savepointPath string) (string, error) { response, err := f.flinkClient.SubmitJob( @@ -311,7 +311,7 @@ func (f *Controller) StartFlinkJob(ctx context.Context, application *v1beta2.Fli return response.JobID, nil } -func (f *Controller) GetSavepointStatus(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (*client.SavepointResponse, error) { +func (f *Controller) GetSavepointStatus(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (*client.SavepointResponse, error) { jobID, err := f.getJobIDForApplication(ctx, application) if err != nil { return nil, err @@ -319,7 +319,7 @@ func (f *Controller) GetSavepointStatus(ctx context.Context, application *v1beta return f.flinkClient.CheckSavepointStatus(ctx, getURLFromApp(application, hash), jobID, application.Status.SavepointTriggerID) } -func (f *Controller) IsClusterReady(ctx context.Context, application *v1beta2.FlinkApplication) (bool, error) { +func (f *Controller) IsClusterReady(ctx context.Context, application *v1beta1.FlinkApplication) (bool, error) { deployments, err := f.GetCurrentDeploymentsForApp(ctx, application) if deployments == nil || err != nil { return false, err @@ -337,7 +337,7 @@ func (f *Controller) IsClusterReady(ctx context.Context, application *v1beta2.Fl return true, nil } -func (f *Controller) IsServiceReady(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (bool, error) { +func (f *Controller) IsServiceReady(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (bool, error) { resp, err := f.flinkClient.GetClusterOverview(ctx, getURLFromApp(application, hash)) if err != nil { logger.Infof(ctx, "Error response indicating flink API is not ready to handle request %v", err) @@ -377,7 +377,7 @@ func listToFlinkDeployment(ds []v1.Deployment, hash string) *common.FlinkDeploym return &fd } -func getCurrentHash(app *v1beta2.FlinkApplication) string { +func getCurrentHash(app *v1beta1.FlinkApplication) string { appHash := HashForApplication(app) if appHash == app.Status.FailedDeployHash { @@ -389,7 +389,7 @@ func getCurrentHash(app *v1beta2.FlinkApplication) string { // Gets the current deployment and any other deployments for the application. The current deployment will be the one // that matches the FlinkApplication, unless the FailedDeployHash is set, in which case it will be the one with that // hash. -func (f *Controller) GetCurrentDeploymentsForApp(ctx context.Context, application *v1beta2.FlinkApplication) (*common.FlinkDeployment, error) { +func (f *Controller) GetCurrentDeploymentsForApp(ctx context.Context, application *v1beta1.FlinkApplication) (*common.FlinkDeployment, error) { labels := k8.GetAppLabel(application.Name) curHash := getCurrentHash(application) labels[FlinkAppHash] = curHash @@ -410,7 +410,7 @@ func (f *Controller) GetCurrentDeploymentsForApp(ctx context.Context, applicatio return cur, nil } -func (f *Controller) DeleteOldResourcesForApp(ctx context.Context, app *v1beta2.FlinkApplication) error { +func (f *Controller) DeleteOldResourcesForApp(ctx context.Context, app *v1beta1.FlinkApplication) error { curHash := getCurrentHash(app) appLabel := k8.GetAppLabel(app.Name) @@ -464,7 +464,7 @@ func (f *Controller) DeleteOldResourcesForApp(ctx context.Context, app *v1beta2. return nil } -func (f *Controller) FindExternalizedCheckpoint(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (string, error) { +func (f *Controller) FindExternalizedCheckpoint(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (string, error) { checkpoint, err := f.flinkClient.GetLatestCheckpoint(ctx, getURLFromApp(application, hash), f.GetLatestJobID(ctx, application)) var checkpointPath string var checkpointTime int64 @@ -498,18 +498,18 @@ func isCheckpointOldToRecover(checkpointTime int64, maxCheckpointRecoveryAgeSec return time.Since(time.Unix(checkpointTime, 0)) > (time.Duration(maxCheckpointRecoveryAgeSec) * time.Second) } -func (f *Controller) LogEvent(ctx context.Context, app *v1beta2.FlinkApplication, eventType string, reason string, message string) { +func (f *Controller) LogEvent(ctx context.Context, app *v1beta1.FlinkApplication, eventType string, reason string, message string) { f.eventRecorder.Event(app, eventType, reason, message) logger.Infof(ctx, "Logged %s event: %s: %s", eventType, reason, message) } // Gets and updates the cluster status -func (f *Controller) CompareAndUpdateClusterStatus(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (bool, error) { +func (f *Controller) CompareAndUpdateClusterStatus(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (bool, error) { // Error retrieving cluster / taskmanagers overview (after startup/readiness) --> Red // If there is an error this loop will return with Health set to Red currIndex := getCurrentStatusIndex(application) oldClusterStatus := application.Status.VersionStatuses[currIndex].ClusterStatus - application.Status.VersionStatuses[currIndex].ClusterStatus.Health = v1beta2.Red + application.Status.VersionStatuses[currIndex].ClusterStatus.Health = v1beta1.Red deployment, err := f.GetCurrentDeploymentsForApp(ctx, application) if deployment == nil || err != nil { @@ -538,9 +538,9 @@ func (f *Controller) CompareAndUpdateClusterStatus(ctx context.Context, applicat // Healthy TaskManagers == Number of taskmanagers --> Green // Else --> Yellow if application.Status.VersionStatuses[currIndex].ClusterStatus.HealthyTaskManagers == deployment.Taskmanager.Status.Replicas { - application.Status.VersionStatuses[currIndex].ClusterStatus.Health = v1beta2.Green + application.Status.VersionStatuses[currIndex].ClusterStatus.Health = v1beta1.Green } else { - application.Status.VersionStatuses[currIndex].ClusterStatus.Health = v1beta2.Yellow + application.Status.VersionStatuses[currIndex].ClusterStatus.Health = v1beta1.Yellow } return !apiequality.Semantic.DeepEqual(oldClusterStatus, application.Status.VersionStatuses[currIndex].ClusterStatus), nil @@ -559,7 +559,7 @@ func getHealthyTaskManagerCount(response *client.TaskManagersResponse) int32 { } -func (f *Controller) CompareAndUpdateJobStatus(ctx context.Context, app *v1beta2.FlinkApplication, hash string) (bool, error) { +func (f *Controller) CompareAndUpdateJobStatus(ctx context.Context, app *v1beta1.FlinkApplication, hash string) (bool, error) { currIndex := getCurrentStatusIndex(app) if app.Status.VersionStatuses[currIndex].JobStatus.LastFailingTime == nil { initTime := metav1.NewTime(time.Time{}) @@ -579,7 +579,7 @@ func (f *Controller) CompareAndUpdateJobStatus(ctx context.Context, app *v1beta2 // Job status app.Status.VersionStatuses[currIndex].JobStatus.JobOverviewURL = getJobOverviewURL(app) - app.Status.VersionStatuses[currIndex].JobStatus.State = v1beta2.JobState(jobResponse.State) + app.Status.VersionStatuses[currIndex].JobStatus.State = v1beta1.JobState(jobResponse.State) jobStartTime := metav1.NewTime(time.Unix(jobResponse.StartTime/1000, 0)) app.Status.VersionStatuses[currIndex].JobStatus.StartTime = &jobStartTime @@ -628,58 +628,58 @@ func (f *Controller) CompareAndUpdateJobStatus(ctx context.Context, app *v1beta2 // Time since last successful checkpoint > maxCheckpointTime --> YELLOW // Else --> Green - if app.Status.VersionStatuses[currIndex].JobStatus.State == v1beta2.Failing || + if app.Status.VersionStatuses[currIndex].JobStatus.State == v1beta1.Failing || time.Since(app.Status.VersionStatuses[currIndex].JobStatus.LastFailingTime.Time) < failingIntervalThreshold || verticesInCreated > 0 { - app.Status.VersionStatuses[currIndex].JobStatus.Health = v1beta2.Red + app.Status.VersionStatuses[currIndex].JobStatus.Health = v1beta1.Red } else if time.Since(time.Unix(int64(lastCheckpointAgeSeconds), 0)) < maxCheckpointTime || runningTasks < totalTasks { - app.Status.VersionStatuses[currIndex].JobStatus.Health = v1beta2.Yellow + app.Status.VersionStatuses[currIndex].JobStatus.Health = v1beta1.Yellow } else { - app.Status.VersionStatuses[currIndex].JobStatus.Health = v1beta2.Green + app.Status.VersionStatuses[currIndex].JobStatus.Health = v1beta1.Green } // Update LastFailingTime - if app.Status.VersionStatuses[currIndex].JobStatus.State == v1beta2.Failing { + if app.Status.VersionStatuses[currIndex].JobStatus.State == v1beta1.Failing { currTime := metav1.Now() app.Status.VersionStatuses[currIndex].JobStatus.LastFailingTime = &currTime } return !apiequality.Semantic.DeepEqual(oldJobStatus, app.Status.VersionStatuses[currIndex].JobStatus), err } -func getCurrentStatusIndex(app *v1beta2.FlinkApplication) int32 { +func getCurrentStatusIndex(app *v1beta1.FlinkApplication) int32 { // In the Running phase, we always have only 1 job - if v1beta2.IsRunningPhase(app.Status.Phase) { + if v1beta1.IsRunningPhase(app.Status.Phase) { return 0 } // In every other state, we either have // Dual mode --> One Application status object // BlueGreen mode --> Two Application status objects - return v1beta2.GetMaxRunningJobs(app.Spec.DeploymentMode) - indexOffset + return v1beta1.GetMaxRunningJobs(app.Spec.DeploymentMode) - indexOffset } -func (f *Controller) GetLatestClusterStatus(ctx context.Context, application *v1beta2.FlinkApplication) v1beta2.FlinkClusterStatus { +func (f *Controller) GetLatestClusterStatus(ctx context.Context, application *v1beta1.FlinkApplication) v1beta1.FlinkClusterStatus { return application.Status.VersionStatuses[getCurrentStatusIndex(application)].ClusterStatus } -func (f *Controller) GetLatestJobStatus(ctx context.Context, application *v1beta2.FlinkApplication) v1beta2.FlinkJobStatus { +func (f *Controller) GetLatestJobStatus(ctx context.Context, application *v1beta1.FlinkApplication) v1beta1.FlinkJobStatus { return application.Status.VersionStatuses[getCurrentStatusIndex(application)].JobStatus } -func (f *Controller) UpdateLatestJobStatus(ctx context.Context, app *v1beta2.FlinkApplication, jobStatus v1beta2.FlinkJobStatus) { +func (f *Controller) UpdateLatestJobStatus(ctx context.Context, app *v1beta1.FlinkApplication, jobStatus v1beta1.FlinkJobStatus) { app.Status.VersionStatuses[getCurrentStatusIndex(app)].JobStatus = jobStatus } -func (f *Controller) UpdateLatestClusterStatus(ctx context.Context, app *v1beta2.FlinkApplication, clusterStatus v1beta2.FlinkClusterStatus) { +func (f *Controller) UpdateLatestClusterStatus(ctx context.Context, app *v1beta1.FlinkApplication, clusterStatus v1beta1.FlinkClusterStatus) { app.Status.VersionStatuses[getCurrentStatusIndex(app)].ClusterStatus = clusterStatus } -func (f *Controller) GetLatestJobID(ctx context.Context, application *v1beta2.FlinkApplication) string { +func (f *Controller) GetLatestJobID(ctx context.Context, application *v1beta1.FlinkApplication) string { return application.Status.VersionStatuses[getCurrentStatusIndex(application)].JobStatus.JobID } -func (f *Controller) UpdateLatestJobID(ctx context.Context, app *v1beta2.FlinkApplication, jobID string) { +func (f *Controller) UpdateLatestJobID(ctx context.Context, app *v1beta1.FlinkApplication, jobID string) { app.Status.VersionStatuses[getCurrentStatusIndex(app)].JobStatus.JobID = jobID } diff --git a/pkg/controller/flink/flink_test.go b/pkg/controller/flink/flink_test.go index 13319470..381fffd2 100644 --- a/pkg/controller/flink/flink_test.go +++ b/pkg/controller/flink/flink_test.go @@ -11,7 +11,7 @@ import ( "time" - "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta2" + "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta1" "github.com/lyft/flinkk8soperator/pkg/controller/common" "github.com/lyft/flinkk8soperator/pkg/controller/flink/client" clientMock "github.com/lyft/flinkk8soperator/pkg/controller/flink/client/mock" @@ -57,18 +57,18 @@ func getTestFlinkController() Controller { } } -func getFlinkTestApp() v1beta2.FlinkApplication { - app := v1beta2.FlinkApplication{ +func getFlinkTestApp() v1beta1.FlinkApplication { + app := v1beta1.FlinkApplication{ TypeMeta: metaV1.TypeMeta{ - Kind: v1beta2.FlinkApplicationKind, - APIVersion: v1beta2.SchemeGroupVersion.String(), + Kind: v1beta1.FlinkApplicationKind, + APIVersion: v1beta1.SchemeGroupVersion.String(), }, } app.Spec.Parallelism = 8 app.Name = testAppName app.Namespace = testNamespace - statuses := append(app.Status.VersionStatuses, v1beta2.FlinkApplicationVersionStatus{ - JobStatus: v1beta2.FlinkJobStatus{ + statuses := append(app.Status.VersionStatuses, v1beta1.FlinkApplicationVersionStatus{ + JobStatus: v1beta1.FlinkJobStatus{ JobID: testJobID, }, }) @@ -155,7 +155,7 @@ func TestFlinkApplicationChanged(t *testing.T) { assert.Nil(t, err) } -func testJobPropTriggersChange(t *testing.T, changeFun func(application *v1beta2.FlinkApplication)) { +func testJobPropTriggersChange(t *testing.T, changeFun func(application *v1beta1.FlinkApplication)) { flinkControllerForTest := getTestFlinkController() flinkApp := getFlinkTestApp() @@ -190,19 +190,19 @@ func testJobPropTriggersChange(t *testing.T, changeFun func(application *v1beta2 } func TestFlinkApplicationChangedJobProps(t *testing.T) { - testJobPropTriggersChange(t, func(app *v1beta2.FlinkApplication) { + testJobPropTriggersChange(t, func(app *v1beta1.FlinkApplication) { app.Spec.Parallelism = 3 }) - testJobPropTriggersChange(t, func(app *v1beta2.FlinkApplication) { + testJobPropTriggersChange(t, func(app *v1beta1.FlinkApplication) { app.Spec.JarName = "another.jar" }) - testJobPropTriggersChange(t, func(app *v1beta2.FlinkApplication) { + testJobPropTriggersChange(t, func(app *v1beta1.FlinkApplication) { app.Spec.ProgramArgs = "--test-change" }) - testJobPropTriggersChange(t, func(app *v1beta2.FlinkApplication) { + testJobPropTriggersChange(t, func(app *v1beta1.FlinkApplication) { app.Spec.EntryClass = "com.another.Class" }) } @@ -388,10 +388,10 @@ func TestCreateCluster(t *testing.T) { mockJobManager := flinkControllerForTest.jobManager.(*mock.JobManagerController) mockTaskManager := flinkControllerForTest.taskManager.(*mock.TaskManagerController) - mockJobManager.CreateIfNotExistFunc = func(ctx context.Context, application *v1beta2.FlinkApplication) (bool, error) { + mockJobManager.CreateIfNotExistFunc = func(ctx context.Context, application *v1beta1.FlinkApplication) (bool, error) { return true, nil } - mockTaskManager.CreateIfNotExistFunc = func(ctx context.Context, application *v1beta2.FlinkApplication) (bool, error) { + mockTaskManager.CreateIfNotExistFunc = func(ctx context.Context, application *v1beta1.FlinkApplication) (bool, error) { return true, nil } err := flinkControllerForTest.CreateCluster(context.Background(), &flinkApp) @@ -404,10 +404,10 @@ func TestCreateClusterJmErr(t *testing.T) { mockJobManager := flinkControllerForTest.jobManager.(*mock.JobManagerController) mockTaskManager := flinkControllerForTest.taskManager.(*mock.TaskManagerController) - mockJobManager.CreateIfNotExistFunc = func(ctx context.Context, application *v1beta2.FlinkApplication) (bool, error) { + mockJobManager.CreateIfNotExistFunc = func(ctx context.Context, application *v1beta1.FlinkApplication) (bool, error) { return false, errors.New("jm failed") } - mockTaskManager.CreateIfNotExistFunc = func(ctx context.Context, application *v1beta2.FlinkApplication) (bool, error) { + mockTaskManager.CreateIfNotExistFunc = func(ctx context.Context, application *v1beta1.FlinkApplication) (bool, error) { assert.False(t, true) return false, nil } @@ -421,10 +421,10 @@ func TestCreateClusterTmErr(t *testing.T) { mockJobManager := flinkControllerForTest.jobManager.(*mock.JobManagerController) mockTaskManager := flinkControllerForTest.taskManager.(*mock.TaskManagerController) - mockJobManager.CreateIfNotExistFunc = func(ctx context.Context, application *v1beta2.FlinkApplication) (bool, error) { + mockJobManager.CreateIfNotExistFunc = func(ctx context.Context, application *v1beta1.FlinkApplication) (bool, error) { return true, nil } - mockTaskManager.CreateIfNotExistFunc = func(ctx context.Context, application *v1beta2.FlinkApplication) (bool, error) { + mockTaskManager.CreateIfNotExistFunc = func(ctx context.Context, application *v1beta1.FlinkApplication) (bool, error) { return false, errors.New("tm failed") } err := flinkControllerForTest.CreateCluster(context.Background(), &flinkApp) @@ -670,7 +670,7 @@ func TestClusterStatusUpdated(t *testing.T) { assert.Equal(t, int32(1), flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].ClusterStatus.NumberOfTaskSlots) assert.Equal(t, int32(0), flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].ClusterStatus.AvailableTaskSlots) assert.Equal(t, int32(1), flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].ClusterStatus.HealthyTaskManagers) - assert.Equal(t, v1beta2.Green, flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].ClusterStatus.Health) + assert.Equal(t, v1beta1.Green, flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].ClusterStatus.Health) assert.Equal(t, "app-name.lyft.xyz/#/overview", flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].ClusterStatus.ClusterOverviewURL) } @@ -681,7 +681,7 @@ func TestNoClusterStatusChange(t *testing.T) { flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].ClusterStatus.NumberOfTaskSlots = int32(1) flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].ClusterStatus.AvailableTaskSlots = int32(0) flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].ClusterStatus.HealthyTaskManagers = int32(1) - flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].ClusterStatus.Health = v1beta2.Green + flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].ClusterStatus.Health = v1beta1.Green flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].ClusterStatus.NumberOfTaskManagers = int32(1) mockK8Cluster := flinkControllerForTest.k8Cluster.(*k8mock.K8Cluster) mockK8Cluster.GetDeploymentsWithLabelFunc = func(ctx context.Context, namespace string, labelMap map[string]string) (*v1.DeploymentList, error) { @@ -774,7 +774,7 @@ func TestHealthyTaskmanagers(t *testing.T) { assert.Equal(t, int32(1), flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].ClusterStatus.NumberOfTaskSlots) assert.Equal(t, int32(0), flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].ClusterStatus.AvailableTaskSlots) assert.Equal(t, int32(0), flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].ClusterStatus.HealthyTaskManagers) - assert.Equal(t, v1beta2.Yellow, flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].ClusterStatus.Health) + assert.Equal(t, v1beta1.Yellow, flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].ClusterStatus.Health) } @@ -836,9 +836,9 @@ func TestJobStatusUpdated(t *testing.T) { _, err = flinkControllerForTest.CompareAndUpdateJobStatus(context.Background(), &flinkApp, "hash") assert.Nil(t, err) - assert.Equal(t, v1beta2.Running, flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].JobStatus.State) + assert.Equal(t, v1beta1.Running, flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].JobStatus.State) assert.Equal(t, &expectedTime, flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].JobStatus.StartTime) - assert.Equal(t, v1beta2.Yellow, flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].JobStatus.Health) + assert.Equal(t, v1beta1.Yellow, flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].JobStatus.Health) assert.Equal(t, int32(0), flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].JobStatus.FailedCheckpointCount) assert.Equal(t, int32(4), flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].JobStatus.CompletedCheckpointCount) @@ -865,13 +865,13 @@ func TestNoJobStatusChange(t *testing.T) { app1 := getFlinkTestApp() mockJmClient := flinkControllerForTest.flinkClient.(*clientMock.JobManagerClient) - app1.Status.VersionStatuses[getCurrentStatusIndex(&app1)].JobStatus.State = v1beta2.Running + app1.Status.VersionStatuses[getCurrentStatusIndex(&app1)].JobStatus.State = v1beta1.Running app1.Status.VersionStatuses[getCurrentStatusIndex(&app1)].JobStatus.StartTime = &metaTime app1.Status.VersionStatuses[getCurrentStatusIndex(&app1)].JobStatus.LastCheckpointTime = &metaTime app1.Status.VersionStatuses[getCurrentStatusIndex(&app1)].JobStatus.CompletedCheckpointCount = int32(4) app1.Status.VersionStatuses[getCurrentStatusIndex(&app1)].JobStatus.JobRestartCount = int32(1) app1.Status.VersionStatuses[getCurrentStatusIndex(&app1)].JobStatus.FailedCheckpointCount = int32(0) - app1.Status.VersionStatuses[getCurrentStatusIndex(&app1)].JobStatus.Health = v1beta2.Green + app1.Status.VersionStatuses[getCurrentStatusIndex(&app1)].JobStatus.Health = v1beta1.Green app1.Status.VersionStatuses[getCurrentStatusIndex(&app1)].JobStatus.RestoreTime = &metaTime app1.Status.VersionStatuses[getCurrentStatusIndex(&app1)].JobStatus.RestorePath = "/test/externalpath" app1.Status.VersionStatuses[getCurrentStatusIndex(&app1)].JobStatus.JobOverviewURL = "" @@ -917,7 +917,7 @@ func TestGetAndUpdateJobStatusHealth(t *testing.T) { app1 := getFlinkTestApp() mockJmClient := flinkControllerForTest.flinkClient.(*clientMock.JobManagerClient) - app1.Status.VersionStatuses[getCurrentStatusIndex(&app1)].JobStatus.State = v1beta2.Failing + app1.Status.VersionStatuses[getCurrentStatusIndex(&app1)].JobStatus.State = v1beta1.Failing app1.Status.VersionStatuses[getCurrentStatusIndex(&app1)].JobStatus.LastFailingTime = &lastFailedTime mockJmClient.GetJobOverviewFunc = func(ctx context.Context, url string, jobID string) (*client.FlinkJobOverview, error) { @@ -943,7 +943,7 @@ func TestGetAndUpdateJobStatusHealth(t *testing.T) { assert.Nil(t, err) // Job is in a RUNNING state but was in a FAILING state in the last 1 minute, so we expect // JobStatus.Health to be Red - assert.Equal(t, app1.Status.VersionStatuses[getCurrentStatusIndex(&app1)].JobStatus.Health, v1beta2.Red) + assert.Equal(t, app1.Status.VersionStatuses[getCurrentStatusIndex(&app1)].JobStatus.Health, v1beta1.Red) } diff --git a/pkg/controller/flink/ingress.go b/pkg/controller/flink/ingress.go index 96001664..e45e4614 100644 --- a/pkg/controller/flink/ingress.go +++ b/pkg/controller/flink/ingress.go @@ -3,7 +3,7 @@ package flink import ( "regexp" - flinkapp "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta2" + flinkapp "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta1" "github.com/lyft/flinkk8soperator/pkg/controller/common" "github.com/lyft/flinkk8soperator/pkg/controller/config" "github.com/lyft/flinkk8soperator/pkg/controller/k8" diff --git a/pkg/controller/flink/job_manager_controller.go b/pkg/controller/flink/job_manager_controller.go index 4bbd9b76..279c0a44 100644 --- a/pkg/controller/flink/job_manager_controller.go +++ b/pkg/controller/flink/job_manager_controller.go @@ -4,7 +4,7 @@ import ( "context" "fmt" - "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta2" + "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta1" "github.com/lyft/flinkk8soperator/pkg/controller/common" "github.com/lyft/flinkk8soperator/pkg/controller/config" "github.com/lyft/flinkk8soperator/pkg/controller/k8" @@ -41,12 +41,12 @@ const ( FlinkInternalMetricPortName = "metrics" ) -func VersionedJobManagerServiceName(app *v1beta2.FlinkApplication, hash string) string { +func VersionedJobManagerServiceName(app *v1beta1.FlinkApplication, hash string) string { return fmt.Sprintf("%s-%s", app.Name, hash) } type JobManagerControllerInterface interface { - CreateIfNotExist(ctx context.Context, application *v1beta2.FlinkApplication) (bool, error) + CreateIfNotExist(ctx context.Context, application *v1beta1.FlinkApplication) (bool, error) } func NewJobManagerController(k8sCluster k8.ClusterInterface, config config.RuntimeConfig) JobManagerControllerInterface { @@ -85,7 +85,7 @@ type jobManagerMetrics struct { ingressCreationFailure labeled.Counter } -func (j *JobManagerController) CreateIfNotExist(ctx context.Context, application *v1beta2.FlinkApplication) (bool, error) { +func (j *JobManagerController) CreateIfNotExist(ctx context.Context, application *v1beta1.FlinkApplication) (bool, error) { hash := HashForApplication(application) newlyCreated := false @@ -168,21 +168,21 @@ var JobManagerDefaultResources = coreV1.ResourceRequirements{ }, } -func getJobManagerPodName(application *v1beta2.FlinkApplication, hash string) string { +func getJobManagerPodName(application *v1beta1.FlinkApplication, hash string) string { applicationName := application.Name - if v1beta2.IsBlueGreenDeploymentMode(application.Spec.DeploymentMode) { + if v1beta1.IsBlueGreenDeploymentMode(application.Spec.DeploymentMode) { applicationVersion := application.Status.UpdatingVersion return fmt.Sprintf(JobManagerVersionPodNameFormat, applicationName, hash, applicationVersion) } return fmt.Sprintf(JobManagerPodNameFormat, applicationName, hash) } -func getJobManagerName(application *v1beta2.FlinkApplication, hash string) string { +func getJobManagerName(application *v1beta1.FlinkApplication, hash string) string { applicationName := application.Name return fmt.Sprintf(JobManagerNameFormat, applicationName, hash) } -func FetchJobManagerServiceCreateObj(app *v1beta2.FlinkApplication, hash string) *coreV1.Service { +func FetchJobManagerServiceCreateObj(app *v1beta1.FlinkApplication, hash string) *coreV1.Service { jmServiceName := app.Name serviceLabels := getCommonAppLabels(app) serviceLabels[FlinkAppHash] = hash @@ -208,7 +208,7 @@ func FetchJobManagerServiceCreateObj(app *v1beta2.FlinkApplication, hash string) } } -func getJobManagerServicePorts(app *v1beta2.FlinkApplication) []coreV1.ServicePort { +func getJobManagerServicePorts(app *v1beta1.FlinkApplication) []coreV1.ServicePort { ports := getJobManagerPorts(app) servicePorts := make([]coreV1.ServicePort, 0, len(ports)) for _, p := range ports { @@ -220,7 +220,7 @@ func getJobManagerServicePorts(app *v1beta2.FlinkApplication) []coreV1.ServicePo return servicePorts } -func getJobManagerPorts(app *v1beta2.FlinkApplication) []coreV1.ContainerPort { +func getJobManagerPorts(app *v1beta1.FlinkApplication) []coreV1.ContainerPort { return []coreV1.ContainerPort{ { Name: FlinkRPCPortName, @@ -245,7 +245,7 @@ func getJobManagerPorts(app *v1beta2.FlinkApplication) []coreV1.ContainerPort { } } -func FetchJobManagerContainerObj(application *v1beta2.FlinkApplication) *coreV1.Container { +func FetchJobManagerContainerObj(application *v1beta1.FlinkApplication) *coreV1.Container { jmConfig := application.Spec.JobManagerConfig resources := jmConfig.Resources if resources == nil { @@ -294,7 +294,7 @@ func DeploymentIsJobmanager(deployment *v1.Deployment) bool { // made very carefully. Any new version v' that causes DeploymentsEqual(v(x), v'(x)) to be false // will cause redeployments for all applications, and should be considered a breaking change that // requires a new version of the CRD. -func jobmanagerTemplate(app *v1beta2.FlinkApplication) *v1.Deployment { +func jobmanagerTemplate(app *v1beta1.FlinkApplication) *v1.Deployment { labels := getCommonAppLabels(app) labels = common.CopyMap(labels, app.Labels) labels[FlinkDeploymentType] = FlinkDeploymentTypeJobmanager @@ -356,7 +356,7 @@ func jobmanagerTemplate(app *v1beta2.FlinkApplication) *v1.Deployment { return deployment } -func FetchJobMangerDeploymentCreateObj(app *v1beta2.FlinkApplication, hash string) *v1.Deployment { +func FetchJobMangerDeploymentCreateObj(app *v1beta1.FlinkApplication, hash string) *v1.Deployment { template := jobmanagerTemplate(app.DeepCopy()) template.Name = getJobManagerName(app, hash) @@ -370,7 +370,7 @@ func FetchJobMangerDeploymentCreateObj(app *v1beta2.FlinkApplication, hash strin return template } -func JobManagerDeploymentMatches(deployment *v1.Deployment, application *v1beta2.FlinkApplication, hash string) bool { +func JobManagerDeploymentMatches(deployment *v1.Deployment, application *v1beta1.FlinkApplication, hash string) bool { deploymentName := getJobManagerName(application, hash) return deployment.Name == deploymentName } diff --git a/pkg/controller/flink/job_manager_controller_test.go b/pkg/controller/flink/job_manager_controller_test.go index 0b0cb0ce..7e54de54 100644 --- a/pkg/controller/flink/job_manager_controller_test.go +++ b/pkg/controller/flink/job_manager_controller_test.go @@ -3,7 +3,7 @@ package flink import ( "testing" - v1beta22 "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta2" + v1beta12 "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta1" "github.com/lyft/flinkk8soperator/pkg/controller/config" @@ -46,7 +46,7 @@ func TestGetJobManagerPodName(t *testing.T) { func TestGetJobManagerPodNameWithVersion(t *testing.T) { app := getFlinkTestApp() - app.Spec.DeploymentMode = v1beta22.DeploymentModeBlueGreen + app.Spec.DeploymentMode = v1beta12.DeploymentModeBlueGreen app.Status.UpdatingVersion = testVersion assert.Equal(t, "app-name-"+testAppHash+"-jm-"+testVersion+"-pod", getJobManagerPodName(&app, testAppHash)) } @@ -94,7 +94,7 @@ func TestJobManagerCreateSuccess(t *testing.T) { assert.Equal(t, app.Spec.JobManagerConfig.Tolerations, deployment.Spec.Template.Spec.Tolerations) assert.Equal(t, int32(1), *deployment.Spec.Replicas) assert.Equal(t, "app-name", deployment.OwnerReferences[0].Name) - assert.Equal(t, "flink.k8s.io/v1beta2", deployment.OwnerReferences[0].APIVersion) + assert.Equal(t, "flink.k8s.io/v1beta1", deployment.OwnerReferences[0].APIVersion) assert.Equal(t, "FlinkApplication", deployment.OwnerReferences[0].Kind) assert.Equal(t, "blob.server.port: 6125\njobmanager.heap.size: 1572864k\n"+ @@ -171,7 +171,7 @@ func TestJobManagerHACreateSuccess(t *testing.T) { assert.Equal(t, expectedLabels, deployment.Labels) assert.Equal(t, int32(1), *deployment.Spec.Replicas) assert.Equal(t, "app-name", deployment.OwnerReferences[0].Name) - assert.Equal(t, "flink.k8s.io/v1beta2", deployment.OwnerReferences[0].APIVersion) + assert.Equal(t, "flink.k8s.io/v1beta1", deployment.OwnerReferences[0].APIVersion) assert.Equal(t, "FlinkApplication", deployment.OwnerReferences[0].Kind) assert.Equal(t, "blob.server.port: 6125\nhigh-availability: zookeeper\njobmanager.heap.size: 1572864k\n"+ @@ -313,7 +313,7 @@ func TestJobManagerCreateSuccessWithVersion(t *testing.T) { app.Spec.JarName = testJarName app.Spec.EntryClass = testEntryClass app.Spec.ProgramArgs = testProgramArgs - app.Spec.DeploymentMode = v1beta22.DeploymentModeBlueGreen + app.Spec.DeploymentMode = v1beta12.DeploymentModeBlueGreen app.Status.UpdatingVersion = testVersion annotations := map[string]string{ "key": "annotation", @@ -343,7 +343,7 @@ func TestJobManagerCreateSuccessWithVersion(t *testing.T) { assert.Equal(t, expectedLabels, deployment.Labels) assert.Equal(t, int32(1), *deployment.Spec.Replicas) assert.Equal(t, "app-name", deployment.OwnerReferences[0].Name) - assert.Equal(t, "flink.k8s.io/v1beta2", deployment.OwnerReferences[0].APIVersion) + assert.Equal(t, "flink.k8s.io/v1beta1", deployment.OwnerReferences[0].APIVersion) assert.Equal(t, "FlinkApplication", deployment.OwnerReferences[0].Kind) assert.Equal(t, "blob.server.port: 6125\njobmanager.heap.size: 1572864k\n"+ diff --git a/pkg/controller/flink/mock/mock_flink.go b/pkg/controller/flink/mock/mock_flink.go index 12f8ffbe..88678d4b 100644 --- a/pkg/controller/flink/mock/mock_flink.go +++ b/pkg/controller/flink/mock/mock_flink.go @@ -3,33 +3,33 @@ package mock import ( "context" - "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta2" + "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta1" "github.com/lyft/flinkk8soperator/pkg/controller/common" "github.com/lyft/flinkk8soperator/pkg/controller/flink/client" corev1 "k8s.io/api/core/v1" ) -type CreateClusterFunc func(ctx context.Context, application *v1beta2.FlinkApplication) error -type DeleteOldResourcesForApp func(ctx context.Context, application *v1beta2.FlinkApplication) error -type CancelWithSavepointFunc func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (string, error) -type ForceCancelFunc func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) error -type StartFlinkJobFunc func(ctx context.Context, application *v1beta2.FlinkApplication, hash string, +type CreateClusterFunc func(ctx context.Context, application *v1beta1.FlinkApplication) error +type DeleteOldResourcesForApp func(ctx context.Context, application *v1beta1.FlinkApplication) error +type CancelWithSavepointFunc func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (string, error) +type ForceCancelFunc func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) error +type StartFlinkJobFunc func(ctx context.Context, application *v1beta1.FlinkApplication, hash string, jarName string, parallelism int32, entryClass string, programArgs string, allowNonRestoredState bool, savepointPath string) (string, error) -type GetSavepointStatusFunc func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (*client.SavepointResponse, error) -type IsClusterReadyFunc func(ctx context.Context, application *v1beta2.FlinkApplication) (bool, error) -type IsServiceReadyFunc func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (bool, error) -type GetJobsForApplicationFunc func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) ([]client.FlinkJob, error) -type GetJobForApplicationFunc func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (*client.FlinkJobOverview, error) -type GetCurrentDeploymentsForAppFunc func(ctx context.Context, application *v1beta2.FlinkApplication) (*common.FlinkDeployment, error) -type FindExternalizedCheckpointFunc func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (string, error) -type CompareAndUpdateClusterStatusFunc func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (bool, error) -type CompareAndUpdateJobStatusFunc func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (bool, error) -type GetLatestClusterStatusFunc func(ctx context.Context, app *v1beta2.FlinkApplication) v1beta2.FlinkClusterStatus -type GetLatestJobStatusFunc func(ctx context.Context, app *v1beta2.FlinkApplication) v1beta2.FlinkJobStatus -type GetLatestJobIDFunc func(ctx context.Context, app *v1beta2.FlinkApplication) string -type UpdateLatestJobIDFunc func(ctx context.Context, app *v1beta2.FlinkApplication, jobID string) -type UpdateLatestJobStatusFunc func(ctx context.Context, app *v1beta2.FlinkApplication, jobStatus v1beta2.FlinkJobStatus) -type UpdateLatestClusterStatusFunc func(ctx context.Context, app *v1beta2.FlinkApplication, clusterStatus v1beta2.FlinkClusterStatus) +type GetSavepointStatusFunc func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (*client.SavepointResponse, error) +type IsClusterReadyFunc func(ctx context.Context, application *v1beta1.FlinkApplication) (bool, error) +type IsServiceReadyFunc func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (bool, error) +type GetJobsForApplicationFunc func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) ([]client.FlinkJob, error) +type GetJobForApplicationFunc func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (*client.FlinkJobOverview, error) +type GetCurrentDeploymentsForAppFunc func(ctx context.Context, application *v1beta1.FlinkApplication) (*common.FlinkDeployment, error) +type FindExternalizedCheckpointFunc func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (string, error) +type CompareAndUpdateClusterStatusFunc func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (bool, error) +type CompareAndUpdateJobStatusFunc func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (bool, error) +type GetLatestClusterStatusFunc func(ctx context.Context, app *v1beta1.FlinkApplication) v1beta1.FlinkClusterStatus +type GetLatestJobStatusFunc func(ctx context.Context, app *v1beta1.FlinkApplication) v1beta1.FlinkJobStatus +type GetLatestJobIDFunc func(ctx context.Context, app *v1beta1.FlinkApplication) string +type UpdateLatestJobIDFunc func(ctx context.Context, app *v1beta1.FlinkApplication, jobID string) +type UpdateLatestJobStatusFunc func(ctx context.Context, app *v1beta1.FlinkApplication, jobStatus v1beta1.FlinkJobStatus) +type UpdateLatestClusterStatusFunc func(ctx context.Context, app *v1beta1.FlinkApplication, clusterStatus v1beta1.FlinkClusterStatus) type FlinkController struct { CreateClusterFunc CreateClusterFunc @@ -55,42 +55,42 @@ type FlinkController struct { UpdateLatestClusterStatusFunc UpdateLatestClusterStatusFunc } -func (m *FlinkController) GetCurrentDeploymentsForApp(ctx context.Context, application *v1beta2.FlinkApplication) (*common.FlinkDeployment, error) { +func (m *FlinkController) GetCurrentDeploymentsForApp(ctx context.Context, application *v1beta1.FlinkApplication) (*common.FlinkDeployment, error) { if m.GetCurrentDeploymentsForAppFunc != nil { return m.GetCurrentDeploymentsForAppFunc(ctx, application) } return nil, nil } -func (m *FlinkController) DeleteOldResourcesForApp(ctx context.Context, application *v1beta2.FlinkApplication) error { +func (m *FlinkController) DeleteOldResourcesForApp(ctx context.Context, application *v1beta1.FlinkApplication) error { if m.DeleteOldResourcesForAppFunc != nil { return m.DeleteOldResourcesForAppFunc(ctx, application) } return nil } -func (m *FlinkController) CreateCluster(ctx context.Context, application *v1beta2.FlinkApplication) error { +func (m *FlinkController) CreateCluster(ctx context.Context, application *v1beta1.FlinkApplication) error { if m.CreateClusterFunc != nil { return m.CreateClusterFunc(ctx, application) } return nil } -func (m *FlinkController) CancelWithSavepoint(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (string, error) { +func (m *FlinkController) CancelWithSavepoint(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (string, error) { if m.CancelWithSavepointFunc != nil { return m.CancelWithSavepointFunc(ctx, application, hash) } return "", nil } -func (m *FlinkController) ForceCancel(ctx context.Context, application *v1beta2.FlinkApplication, hash string) error { +func (m *FlinkController) ForceCancel(ctx context.Context, application *v1beta1.FlinkApplication, hash string) error { if m.ForceCancelFunc != nil { return m.ForceCancelFunc(ctx, application, hash) } return nil } -func (m *FlinkController) StartFlinkJob(ctx context.Context, application *v1beta2.FlinkApplication, hash string, +func (m *FlinkController) StartFlinkJob(ctx context.Context, application *v1beta1.FlinkApplication, hash string, jarName string, parallelism int32, entryClass string, programArgs string, allowNonRestoredState bool, savepointPath string) (string, error) { if m.StartFlinkJobFunc != nil { return m.StartFlinkJobFunc(ctx, application, hash, jarName, parallelism, entryClass, programArgs, allowNonRestoredState, savepointPath) @@ -98,49 +98,49 @@ func (m *FlinkController) StartFlinkJob(ctx context.Context, application *v1beta return "", nil } -func (m *FlinkController) GetSavepointStatus(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (*client.SavepointResponse, error) { +func (m *FlinkController) GetSavepointStatus(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (*client.SavepointResponse, error) { if m.GetSavepointStatusFunc != nil { return m.GetSavepointStatusFunc(ctx, application, hash) } return nil, nil } -func (m *FlinkController) IsClusterReady(ctx context.Context, application *v1beta2.FlinkApplication) (bool, error) { +func (m *FlinkController) IsClusterReady(ctx context.Context, application *v1beta1.FlinkApplication) (bool, error) { if m.IsClusterReadyFunc != nil { return m.IsClusterReadyFunc(ctx, application) } return false, nil } -func (m *FlinkController) IsServiceReady(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (bool, error) { +func (m *FlinkController) IsServiceReady(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (bool, error) { if m.IsServiceReadyFunc != nil { return m.IsServiceReadyFunc(ctx, application, hash) } return false, nil } -func (m *FlinkController) GetJobsForApplication(ctx context.Context, application *v1beta2.FlinkApplication, hash string) ([]client.FlinkJob, error) { +func (m *FlinkController) GetJobsForApplication(ctx context.Context, application *v1beta1.FlinkApplication, hash string) ([]client.FlinkJob, error) { if m.GetJobsForApplicationFunc != nil { return m.GetJobsForApplicationFunc(ctx, application, hash) } return nil, nil } -func (m *FlinkController) GetJobForApplication(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (*client.FlinkJobOverview, error) { +func (m *FlinkController) GetJobForApplication(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (*client.FlinkJobOverview, error) { if m.GetJobForApplicationFunc != nil { return m.GetJobForApplicationFunc(ctx, application, hash) } return nil, nil } -func (m *FlinkController) FindExternalizedCheckpoint(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (string, error) { +func (m *FlinkController) FindExternalizedCheckpoint(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (string, error) { if m.FindExternalizedCheckpointFunc != nil { return m.FindExternalizedCheckpointFunc(ctx, application, hash) } return "", nil } -func (m *FlinkController) LogEvent(ctx context.Context, app *v1beta2.FlinkApplication, eventType string, reason string, message string) { +func (m *FlinkController) LogEvent(ctx context.Context, app *v1beta1.FlinkApplication, eventType string, reason string, message string) { m.Events = append(m.Events, corev1.Event{ InvolvedObject: corev1.ObjectReference{ Kind: app.Kind, @@ -153,7 +153,7 @@ func (m *FlinkController) LogEvent(ctx context.Context, app *v1beta2.FlinkApplic }) } -func (m *FlinkController) CompareAndUpdateClusterStatus(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (bool, error) { +func (m *FlinkController) CompareAndUpdateClusterStatus(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (bool, error) { if m.CompareAndUpdateClusterStatusFunc != nil { return m.CompareAndUpdateClusterStatusFunc(ctx, application, hash) } @@ -161,7 +161,7 @@ func (m *FlinkController) CompareAndUpdateClusterStatus(ctx context.Context, app return false, nil } -func (m *FlinkController) CompareAndUpdateJobStatus(ctx context.Context, app *v1beta2.FlinkApplication, hash string) (bool, error) { +func (m *FlinkController) CompareAndUpdateJobStatus(ctx context.Context, app *v1beta1.FlinkApplication, hash string) (bool, error) { if m.CompareAndUpdateJobStatusFunc != nil { return m.CompareAndUpdateJobStatusFunc(ctx, app, hash) } @@ -169,7 +169,7 @@ func (m *FlinkController) CompareAndUpdateJobStatus(ctx context.Context, app *v1 return false, nil } -func (m *FlinkController) GetLatestClusterStatus(ctx context.Context, application *v1beta2.FlinkApplication) v1beta2.FlinkClusterStatus { +func (m *FlinkController) GetLatestClusterStatus(ctx context.Context, application *v1beta1.FlinkApplication) v1beta1.FlinkClusterStatus { if m.GetLatestClusterStatusFunc != nil { return m.GetLatestClusterStatusFunc(ctx, application) } @@ -177,7 +177,7 @@ func (m *FlinkController) GetLatestClusterStatus(ctx context.Context, applicatio return application.Status.VersionStatuses[getCurrentStatusIndex(application)].ClusterStatus } -func (m *FlinkController) GetLatestJobStatus(ctx context.Context, application *v1beta2.FlinkApplication) v1beta2.FlinkJobStatus { +func (m *FlinkController) GetLatestJobStatus(ctx context.Context, application *v1beta1.FlinkApplication) v1beta1.FlinkJobStatus { if m.GetLatestClusterStatusFunc != nil { return m.GetLatestJobStatusFunc(ctx, application) } @@ -185,7 +185,7 @@ func (m *FlinkController) GetLatestJobStatus(ctx context.Context, application *v return application.Status.VersionStatuses[getCurrentStatusIndex(application)].JobStatus } -func (m *FlinkController) GetLatestJobID(ctx context.Context, application *v1beta2.FlinkApplication) string { +func (m *FlinkController) GetLatestJobID(ctx context.Context, application *v1beta1.FlinkApplication) string { if m.GetLatestClusterStatusFunc != nil { return m.GetLatestJobIDFunc(ctx, application) } @@ -193,7 +193,7 @@ func (m *FlinkController) GetLatestJobID(ctx context.Context, application *v1bet return application.Status.VersionStatuses[getCurrentStatusIndex(application)].JobStatus.JobID } -func (m *FlinkController) UpdateLatestJobID(ctx context.Context, application *v1beta2.FlinkApplication, jobID string) { +func (m *FlinkController) UpdateLatestJobID(ctx context.Context, application *v1beta1.FlinkApplication, jobID string) { if m.UpdateLatestJobIDFunc != nil { m.UpdateLatestJobIDFunc(ctx, application, jobID) } @@ -201,7 +201,7 @@ func (m *FlinkController) UpdateLatestJobID(ctx context.Context, application *v1 application.Status.VersionStatuses[getCurrentStatusIndex(application)].JobStatus.JobID = jobID } -func (m *FlinkController) UpdateLatestJobStatus(ctx context.Context, application *v1beta2.FlinkApplication, jobStatus v1beta2.FlinkJobStatus) { +func (m *FlinkController) UpdateLatestJobStatus(ctx context.Context, application *v1beta1.FlinkApplication, jobStatus v1beta1.FlinkJobStatus) { if m.UpdateLatestJobStatusFunc != nil { m.UpdateLatestJobStatusFunc(ctx, application, jobStatus) } @@ -209,7 +209,7 @@ func (m *FlinkController) UpdateLatestJobStatus(ctx context.Context, application application.Status.VersionStatuses[getCurrentStatusIndex(application)].JobStatus = jobStatus } -func (m *FlinkController) UpdateLatestClusterStatus(ctx context.Context, application *v1beta2.FlinkApplication, clusterStatus v1beta2.FlinkClusterStatus) { +func (m *FlinkController) UpdateLatestClusterStatus(ctx context.Context, application *v1beta1.FlinkApplication, clusterStatus v1beta1.FlinkClusterStatus) { if m.UpdateLatestClusterStatusFunc != nil { m.UpdateLatestClusterStatusFunc(ctx, application, clusterStatus) } @@ -217,9 +217,9 @@ func (m *FlinkController) UpdateLatestClusterStatus(ctx context.Context, applica application.Status.VersionStatuses[getCurrentStatusIndex(application)].ClusterStatus = clusterStatus } -func getCurrentStatusIndex(app *v1beta2.FlinkApplication) int32 { - desiredCount := v1beta2.GetMaxRunningJobs(app.Spec.DeploymentMode) - if v1beta2.IsRunningPhase(app.Status.Phase) { +func getCurrentStatusIndex(app *v1beta1.FlinkApplication) int32 { + desiredCount := v1beta1.GetMaxRunningJobs(app.Spec.DeploymentMode) + if v1beta1.IsRunningPhase(app.Status.Phase) { return 0 } diff --git a/pkg/controller/flink/mock/mock_job_manager_controller.go b/pkg/controller/flink/mock/mock_job_manager_controller.go index c9d511ad..7814f37d 100644 --- a/pkg/controller/flink/mock/mock_job_manager_controller.go +++ b/pkg/controller/flink/mock/mock_job_manager_controller.go @@ -3,7 +3,7 @@ package mock import ( "context" - "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta2" + "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta1" ) type JobManagerController struct { @@ -12,7 +12,7 @@ type JobManagerController struct { func (m *JobManagerController) CreateIfNotExist( ctx context.Context, - application *v1beta2.FlinkApplication) (bool, error) { + application *v1beta1.FlinkApplication) (bool, error) { if m.CreateIfNotExistFunc != nil { return m.CreateIfNotExistFunc(ctx, application) } diff --git a/pkg/controller/flink/mock/mock_task_manager_controller.go b/pkg/controller/flink/mock/mock_task_manager_controller.go index bc38311c..6857b088 100644 --- a/pkg/controller/flink/mock/mock_task_manager_controller.go +++ b/pkg/controller/flink/mock/mock_task_manager_controller.go @@ -3,17 +3,17 @@ package mock import ( "context" - "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta2" + "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta1" ) -type CreateIfNotExistFunc func(ctx context.Context, application *v1beta2.FlinkApplication) (bool, error) +type CreateIfNotExistFunc func(ctx context.Context, application *v1beta1.FlinkApplication) (bool, error) type TaskManagerController struct { CreateIfNotExistFunc CreateIfNotExistFunc } func (m *TaskManagerController) CreateIfNotExist( - ctx context.Context, application *v1beta2.FlinkApplication) (bool, error) { + ctx context.Context, application *v1beta1.FlinkApplication) (bool, error) { if m.CreateIfNotExistFunc != nil { return m.CreateIfNotExistFunc(ctx, application) } diff --git a/pkg/controller/flink/task_manager_controller.go b/pkg/controller/flink/task_manager_controller.go index fb75ddea..3b0d9d30 100644 --- a/pkg/controller/flink/task_manager_controller.go +++ b/pkg/controller/flink/task_manager_controller.go @@ -5,7 +5,7 @@ import ( "fmt" "math" - "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta2" + "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta1" "github.com/lyft/flinkk8soperator/pkg/controller/common" "github.com/lyft/flinkk8soperator/pkg/controller/config" "github.com/lyft/flinkk8soperator/pkg/controller/k8" @@ -29,7 +29,7 @@ const ( ) type TaskManagerControllerInterface interface { - CreateIfNotExist(ctx context.Context, application *v1beta2.FlinkApplication) (bool, error) + CreateIfNotExist(ctx context.Context, application *v1beta1.FlinkApplication) (bool, error) } func NewTaskManagerController(k8sCluster k8.ClusterInterface, config config.RuntimeConfig) TaskManagerControllerInterface { @@ -71,7 +71,7 @@ var TaskManagerDefaultResources = coreV1.ResourceRequirements{ }, } -func (t *TaskManagerController) CreateIfNotExist(ctx context.Context, application *v1beta2.FlinkApplication) (bool, error) { +func (t *TaskManagerController) CreateIfNotExist(ctx context.Context, application *v1beta1.FlinkApplication) (bool, error) { hash := HashForApplication(application) taskManagerDeployment := FetchTaskMangerDeploymentCreateObj(application, hash) @@ -91,7 +91,7 @@ func (t *TaskManagerController) CreateIfNotExist(ctx context.Context, applicatio return false, nil } -func GetTaskManagerPorts(app *v1beta2.FlinkApplication) []coreV1.ContainerPort { +func GetTaskManagerPorts(app *v1beta1.FlinkApplication) []coreV1.ContainerPort { return []coreV1.ContainerPort{ { Name: FlinkRPCPortName, @@ -112,7 +112,7 @@ func GetTaskManagerPorts(app *v1beta2.FlinkApplication) []coreV1.ContainerPort { } } -func FetchTaskManagerContainerObj(application *v1beta2.FlinkApplication) *coreV1.Container { +func FetchTaskManagerContainerObj(application *v1beta1.FlinkApplication) *coreV1.Container { tmConfig := application.Spec.TaskManagerConfig ports := GetTaskManagerPorts(application) resources := tmConfig.Resources @@ -141,21 +141,21 @@ func FetchTaskManagerContainerObj(application *v1beta2.FlinkApplication) *coreV1 } } -func getTaskManagerPodName(application *v1beta2.FlinkApplication, hash string) string { +func getTaskManagerPodName(application *v1beta1.FlinkApplication, hash string) string { applicationName := application.Name - if v1beta2.IsBlueGreenDeploymentMode(application.Spec.DeploymentMode) { + if v1beta1.IsBlueGreenDeploymentMode(application.Spec.DeploymentMode) { applicationVersion := application.Status.UpdatingVersion return fmt.Sprintf(TaskManagerVersionPodNameFormat, applicationName, hash, applicationVersion) } return fmt.Sprintf(TaskManagerPodNameFormat, applicationName, hash) } -func getTaskManagerName(application *v1beta2.FlinkApplication, hash string) string { +func getTaskManagerName(application *v1beta1.FlinkApplication, hash string) string { applicationName := application.Name return fmt.Sprintf(TaskManagerNameFormat, applicationName, hash) } -func computeTaskManagerReplicas(application *v1beta2.FlinkApplication) int32 { +func computeTaskManagerReplicas(application *v1beta1.FlinkApplication) int32 { slots := getTaskmanagerSlots(application) parallelism := application.Spec.Parallelism return int32(math.Ceil(float64(parallelism) / float64(slots))) @@ -169,7 +169,7 @@ func DeploymentIsTaskmanager(deployment *v1.Deployment) bool { // made very carefully. Any new version v' that causes DeploymentsEqual(v(x), v'(x)) to be false // will cause redeployments for all applications, and should be considered a breaking change that // requires a new version of the CRD. -func taskmanagerTemplate(app *v1beta2.FlinkApplication) *v1.Deployment { +func taskmanagerTemplate(app *v1beta1.FlinkApplication) *v1.Deployment { labels := getCommonAppLabels(app) labels = common.CopyMap(labels, app.Labels) labels[FlinkDeploymentType] = FlinkDeploymentTypeTaskmanager @@ -232,7 +232,7 @@ func taskmanagerTemplate(app *v1beta2.FlinkApplication) *v1.Deployment { return deployment } -func FetchTaskMangerDeploymentCreateObj(app *v1beta2.FlinkApplication, hash string) *v1.Deployment { +func FetchTaskMangerDeploymentCreateObj(app *v1beta1.FlinkApplication, hash string) *v1.Deployment { template := taskmanagerTemplate(app.DeepCopy()) template.Name = getTaskManagerName(app, hash) @@ -246,7 +246,7 @@ func FetchTaskMangerDeploymentCreateObj(app *v1beta2.FlinkApplication, hash stri return template } -func TaskManagerDeploymentMatches(deployment *v1.Deployment, application *v1beta2.FlinkApplication, hash string) bool { +func TaskManagerDeploymentMatches(deployment *v1.Deployment, application *v1beta1.FlinkApplication, hash string) bool { deploymentName := getTaskManagerName(application, hash) return deployment.Name == deploymentName } diff --git a/pkg/controller/flink/task_manager_controller_test.go b/pkg/controller/flink/task_manager_controller_test.go index 3edf62be..9f3edca6 100644 --- a/pkg/controller/flink/task_manager_controller_test.go +++ b/pkg/controller/flink/task_manager_controller_test.go @@ -8,7 +8,7 @@ import ( "context" - "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta2" + "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta1" "github.com/lyft/flinkk8soperator/pkg/controller/common" "github.com/lyft/flytestdlib/promutils/labeled" "github.com/pkg/errors" @@ -31,7 +31,7 @@ func getTMControllerForTest() TaskManagerController { } func TestComputeTaskManagerReplicas(t *testing.T) { - app := v1beta2.FlinkApplication{} + app := v1beta1.FlinkApplication{} taskSlots := int32(4) app.Spec.TaskManagerConfig.TaskSlots = &taskSlots app.Spec.Parallelism = 9 @@ -52,7 +52,7 @@ func TestGetTaskManagerPodName(t *testing.T) { func TestGetTaskManagerPodNameWithVersion(t *testing.T) { app := getFlinkTestApp() - app.Spec.DeploymentMode = v1beta2.DeploymentModeBlueGreen + app.Spec.DeploymentMode = v1beta1.DeploymentModeBlueGreen app.Status.UpdatingVersion = testVersion assert.Equal(t, "app-name-"+testAppHash+"-tm-"+testVersion+"-pod", getTaskManagerPodName(&app, testAppHash)) } @@ -237,7 +237,7 @@ func TestTaskManagerCreateSuccessWithVersion(t *testing.T) { app.Spec.JarName = testJarName app.Spec.EntryClass = testEntryClass app.Spec.ProgramArgs = testProgramArgs - app.Spec.DeploymentMode = v1beta2.DeploymentModeBlueGreen + app.Spec.DeploymentMode = v1beta1.DeploymentModeBlueGreen app.Status.UpdatingVersion = testVersion annotations := map[string]string{ "key": "annotation", diff --git a/pkg/controller/flinkapplication/controller.go b/pkg/controller/flinkapplication/controller.go index 79262ba4..54513b78 100644 --- a/pkg/controller/flinkapplication/controller.go +++ b/pkg/controller/flinkapplication/controller.go @@ -6,7 +6,7 @@ import ( "github.com/lyft/flytestdlib/promutils" "github.com/lyft/flytestdlib/promutils/labeled" - "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta2" + "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta1" "github.com/lyft/flinkk8soperator/pkg/controller/config" "sigs.k8s.io/controller-runtime/pkg/controller" @@ -87,11 +87,11 @@ func (r *ReconcileFlinkApplication) Reconcile(request reconcile.Request) (reconc ctx = contextutils.WithNamespace(ctx, request.Namespace) ctx = contextutils.WithAppName(ctx, request.Name) typeMeta := metaV1.TypeMeta{ - Kind: v1beta2.FlinkApplicationKind, - APIVersion: v1beta2.SchemeGroupVersion.String(), + Kind: v1beta1.FlinkApplicationKind, + APIVersion: v1beta1.SchemeGroupVersion.String(), } // Fetch the FlinkApplication instance - instance := &v1beta2.FlinkApplication{ + instance := &v1beta1.FlinkApplication{ TypeMeta: typeMeta, } @@ -140,7 +140,7 @@ func Add(ctx context.Context, mgr manager.Manager, cfg config.RuntimeConfig) err return err } - if err = c.Watch(&source.Kind{Type: &v1beta2.FlinkApplication{}}, &handler.EnqueueRequestForObject{}); err != nil { + if err = c.Watch(&source.Kind{Type: &v1beta1.FlinkApplication{}}, &handler.EnqueueRequestForObject{}); err != nil { return err } @@ -157,8 +157,8 @@ func Add(ctx context.Context, mgr manager.Manager, cfg config.RuntimeConfig) err func isOwnedByFlinkApplication(ownerReferences []metaV1.OwnerReference) bool { for _, ownerReference := range ownerReferences { - if ownerReference.APIVersion == v1beta2.SchemeGroupVersion.String() && - ownerReference.Kind == v1beta2.FlinkApplicationKind { + if ownerReference.APIVersion == v1beta1.SchemeGroupVersion.String() && + ownerReference.Kind == v1beta1.FlinkApplicationKind { return true } } diff --git a/pkg/controller/flinkapplication/flink_state_machine.go b/pkg/controller/flinkapplication/flink_state_machine.go index 5ed099d1..c23c07d3 100644 --- a/pkg/controller/flinkapplication/flink_state_machine.go +++ b/pkg/controller/flinkapplication/flink_state_machine.go @@ -11,7 +11,7 @@ import ( "fmt" - "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta2" + "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta1" "github.com/lyft/flinkk8soperator/pkg/controller/config" "github.com/lyft/flinkk8soperator/pkg/controller/flink" "github.com/lyft/flinkk8soperator/pkg/controller/flink/client" @@ -33,7 +33,7 @@ const ( // The core state machine that manages Flink clusters and jobs. See docs/state_machine.md for a description of the // states and transitions. type FlinkHandlerInterface interface { - Handle(ctx context.Context, application *v1beta2.FlinkApplication) error + Handle(ctx context.Context, application *v1beta1.FlinkApplication) error } type FlinkStateMachine struct { @@ -46,18 +46,18 @@ type FlinkStateMachine struct { type stateMachineMetrics struct { scope promutils.Scope - stateMachineHandlePhaseMap map[v1beta2.FlinkApplicationPhase]labeled.StopWatch - stateMachineHandleSuccessPhaseMap map[v1beta2.FlinkApplicationPhase]labeled.StopWatch - errorCounterPhaseMap map[v1beta2.FlinkApplicationPhase]labeled.Counter + stateMachineHandlePhaseMap map[v1beta1.FlinkApplicationPhase]labeled.StopWatch + stateMachineHandleSuccessPhaseMap map[v1beta1.FlinkApplicationPhase]labeled.StopWatch + errorCounterPhaseMap map[v1beta1.FlinkApplicationPhase]labeled.Counter } func newStateMachineMetrics(scope promutils.Scope) *stateMachineMetrics { stateMachineScope := scope.NewSubScope("state_machine") - stateMachineHandlePhaseMap := map[v1beta2.FlinkApplicationPhase]labeled.StopWatch{} - stateMachineHandleSuccessPhaseMap := map[v1beta2.FlinkApplicationPhase]labeled.StopWatch{} - errorCounterPhaseMap := map[v1beta2.FlinkApplicationPhase]labeled.Counter{} + stateMachineHandlePhaseMap := map[v1beta1.FlinkApplicationPhase]labeled.StopWatch{} + stateMachineHandleSuccessPhaseMap := map[v1beta1.FlinkApplicationPhase]labeled.StopWatch{} + errorCounterPhaseMap := map[v1beta1.FlinkApplicationPhase]labeled.Counter{} - for _, phase := range v1beta2.FlinkApplicationPhases { + for _, phase := range v1beta1.FlinkApplicationPhases { phaseName := phase.VerboseString() stateMachineHandleSuccessPhaseMap[phase] = labeled.NewStopWatch(phaseName+"_"+"handle_time_success", fmt.Sprintf("Total time to handle the %s application state on success", phaseName), time.Millisecond, stateMachineScope) @@ -74,12 +74,12 @@ func newStateMachineMetrics(scope promutils.Scope) *stateMachineMetrics { } } -func (s *FlinkStateMachine) updateApplicationPhase(application *v1beta2.FlinkApplication, phase v1beta2.FlinkApplicationPhase) { +func (s *FlinkStateMachine) updateApplicationPhase(application *v1beta1.FlinkApplication, phase v1beta1.FlinkApplicationPhase) { application.Status.Phase = phase } -func (s *FlinkStateMachine) shouldRollback(ctx context.Context, application *v1beta2.FlinkApplication) (bool, string) { - if application.Spec.ForceRollback && application.Status.Phase != v1beta2.FlinkApplicationRollingBackJob { +func (s *FlinkStateMachine) shouldRollback(ctx context.Context, application *v1beta1.FlinkApplication) (bool, string) { + if application.Spec.ForceRollback && application.Status.Phase != v1beta1.FlinkApplicationRollingBackJob { return true, "forceRollback is set in the resource" } if application.Status.DeployHash == "" { @@ -118,7 +118,7 @@ func (s *FlinkStateMachine) shouldRollback(ctx context.Context, application *v1b return false, "" } -func (s *FlinkStateMachine) Handle(ctx context.Context, application *v1beta2.FlinkApplication) error { +func (s *FlinkStateMachine) Handle(ctx context.Context, application *v1beta1.FlinkApplication) error { currentPhase := application.Status.Phase if _, ok := s.metrics.stateMachineHandlePhaseMap[currentPhase]; !ok { errMsg := fmt.Sprintf("Invalid state %s for the application", currentPhase) @@ -148,7 +148,7 @@ func (s *FlinkStateMachine) Handle(ctx context.Context, application *v1beta2.Fli return err } -func (s *FlinkStateMachine) handle(ctx context.Context, application *v1beta2.FlinkApplication) (bool, error) { +func (s *FlinkStateMachine) handle(ctx context.Context, application *v1beta1.FlinkApplication) (bool, error) { var appErr error updateApplication := false updateLastSeenError := false @@ -156,39 +156,39 @@ func (s *FlinkStateMachine) handle(ctx context.Context, application *v1beta2.Fli // initialize application status array if it's not yet been initialized s.initializeAppStatusIfEmpty(ctx, application) - if !application.ObjectMeta.DeletionTimestamp.IsZero() && appPhase != v1beta2.FlinkApplicationDeleting { - s.updateApplicationPhase(application, v1beta2.FlinkApplicationDeleting) + if !application.ObjectMeta.DeletionTimestamp.IsZero() && appPhase != v1beta1.FlinkApplicationDeleting { + s.updateApplicationPhase(application, v1beta1.FlinkApplicationDeleting) // Always perform a single application update per callback return statusChanged, nil } if s.IsTimeToHandlePhase(application, appPhase) { - if !v1beta2.IsRunningPhase(application.Status.Phase) { + if !v1beta1.IsRunningPhase(application.Status.Phase) { logger.Infof(ctx, "Handling state for application") } switch application.Status.Phase { - case v1beta2.FlinkApplicationNew, v1beta2.FlinkApplicationUpdating: + case v1beta1.FlinkApplicationNew, v1beta1.FlinkApplicationUpdating: // Currently just transitions to the next state updateApplication, appErr = s.handleNewOrUpdating(ctx, application) - case v1beta2.FlinkApplicationClusterStarting: + case v1beta1.FlinkApplicationClusterStarting: updateApplication, appErr = s.handleClusterStarting(ctx, application) - case v1beta2.FlinkApplicationSubmittingJob: + case v1beta1.FlinkApplicationSubmittingJob: updateApplication, appErr = s.handleSubmittingJob(ctx, application) - case v1beta2.FlinkApplicationRunning, v1beta2.FlinkApplicationDeployFailed: + case v1beta1.FlinkApplicationRunning, v1beta1.FlinkApplicationDeployFailed: updateApplication, appErr = s.handleApplicationRunning(ctx, application) - case v1beta2.FlinkApplicationCancelling: + case v1beta1.FlinkApplicationCancelling: updateApplication, appErr = s.handleApplicationCancelling(ctx, application) - case v1beta2.FlinkApplicationSavepointing: + case v1beta1.FlinkApplicationSavepointing: updateApplication, appErr = s.handleApplicationSavepointing(ctx, application) - case v1beta2.FlinkApplicationRecovering: + case v1beta1.FlinkApplicationRecovering: updateApplication, appErr = s.handleApplicationRecovering(ctx, application) - case v1beta2.FlinkApplicationRollingBackJob: + case v1beta1.FlinkApplicationRollingBackJob: updateApplication, appErr = s.handleRollingBack(ctx, application) - case v1beta2.FlinkApplicationDeleting: + case v1beta1.FlinkApplicationDeleting: updateApplication, appErr = s.handleApplicationDeleting(ctx, application) } - if !v1beta2.IsRunningPhase(appPhase) { + if !v1beta1.IsRunningPhase(appPhase) { // Only update LastSeenError and thereby invoke error handling logic for // non-Running phases updateLastSeenError = s.compareAndUpdateError(application, appErr) @@ -199,8 +199,8 @@ func (s *FlinkStateMachine) handle(ctx context.Context, application *v1beta2.Fli return updateApplication || updateLastSeenError, appErr } -func (s *FlinkStateMachine) IsTimeToHandlePhase(application *v1beta2.FlinkApplication, phase v1beta2.FlinkApplicationPhase) bool { - if phase == v1beta2.FlinkApplicationDeleting { +func (s *FlinkStateMachine) IsTimeToHandlePhase(application *v1beta1.FlinkApplication, phase v1beta1.FlinkApplicationPhase) bool { + if phase == v1beta1.FlinkApplicationDeleting { // reset lastSeenError and retryCount in case the application was failing in its previous phase // We always want a Deleting phase to be handled application.Status.LastSeenError = nil @@ -229,7 +229,7 @@ func (s *FlinkStateMachine) IsTimeToHandlePhase(application *v1beta2.FlinkApplic } // In this state we create a new cluster, either due to an entirely new FlinkApplication or due to an update. -func (s *FlinkStateMachine) handleNewOrUpdating(ctx context.Context, application *v1beta2.FlinkApplication) (bool, error) { +func (s *FlinkStateMachine) handleNewOrUpdating(ctx context.Context, application *v1beta1.FlinkApplication) (bool, error) { // TODO: add up-front validation on the FlinkApplication resource if rollback, reason := s.shouldRollback(ctx, application); rollback { // we've failed to make progress; move to deploy failed @@ -244,11 +244,11 @@ func (s *FlinkStateMachine) handleNewOrUpdating(ctx context.Context, application logger.Errorf(ctx, "Cluster creation failed with error: %v", err) return statusUnchanged, err } - s.updateApplicationPhase(application, v1beta2.FlinkApplicationClusterStarting) + s.updateApplicationPhase(application, v1beta1.FlinkApplicationClusterStarting) return statusChanged, nil } -func (s *FlinkStateMachine) deployFailed(app *v1beta2.FlinkApplication) (bool, error) { +func (s *FlinkStateMachine) deployFailed(app *v1beta1.FlinkApplication) (bool, error) { hash := flink.HashForApplication(app) app.Status.FailedDeployHash = hash // set rollbackHash to deployHash @@ -257,12 +257,12 @@ func (s *FlinkStateMachine) deployFailed(app *v1beta2.FlinkApplication) (bool, e app.Status.LastSeenError = nil app.Status.RetryCount = 0 - s.updateApplicationPhase(app, v1beta2.FlinkApplicationDeployFailed) + s.updateApplicationPhase(app, v1beta1.FlinkApplicationDeployFailed) return statusChanged, nil } // Create the underlying Kubernetes objects for the new cluster -func (s *FlinkStateMachine) handleClusterStarting(ctx context.Context, application *v1beta2.FlinkApplication) (bool, error) { +func (s *FlinkStateMachine) handleClusterStarting(ctx context.Context, application *v1beta1.FlinkApplication) (bool, error) { if rollback, reason := s.shouldRollback(ctx, application); rollback { // we've failed to make progress; move to deploy failed // TODO: this will need different logic in single mode @@ -287,43 +287,43 @@ func (s *FlinkStateMachine) handleClusterStarting(ctx context.Context, applicati logger.Infof(ctx, "Flink cluster has started successfully") // TODO: in single mode move to submitting job if application.Spec.SavepointDisabled { - s.updateApplicationPhase(application, v1beta2.FlinkApplicationCancelling) + s.updateApplicationPhase(application, v1beta1.FlinkApplicationCancelling) } else { - s.updateApplicationPhase(application, v1beta2.FlinkApplicationSavepointing) + s.updateApplicationPhase(application, v1beta1.FlinkApplicationSavepointing) } return statusChanged, nil } -func (s *FlinkStateMachine) initializeAppStatusIfEmpty(ctx context.Context, application *v1beta2.FlinkApplication) { +func (s *FlinkStateMachine) initializeAppStatusIfEmpty(ctx context.Context, application *v1beta1.FlinkApplication) { // initialize the app status array to include 2 status elements in case of blue green deploys // else use a one element array arraySize := 1 - if application.Spec.DeploymentMode == v1beta2.DeploymentModeBlueGreen { + if application.Spec.DeploymentMode == v1beta1.DeploymentModeBlueGreen { arraySize = 2 } if len(application.Status.VersionStatuses) == 0 { - application.Status.VersionStatuses = make([]v1beta2.FlinkApplicationVersionStatus, arraySize) + application.Status.VersionStatuses = make([]v1beta1.FlinkApplicationVersionStatus, arraySize) } // If we're reading a v1beta1 app, populate the first element of the status array from // the top-level jobStatus and clusterStatus - if application.Status.JobStatus != (v1beta2.FlinkJobStatus{}) { + if application.Status.JobStatus != (v1beta1.FlinkJobStatus{}) { s.flinkController.UpdateLatestJobStatus(ctx, application, application.Status.JobStatus) - application.Status.JobStatus = v1beta2.FlinkJobStatus{} + application.Status.JobStatus = v1beta1.FlinkJobStatus{} } - if application.Status.ClusterStatus != (v1beta2.FlinkClusterStatus{}) { + if application.Status.ClusterStatus != (v1beta1.FlinkClusterStatus{}) { s.flinkController.UpdateLatestClusterStatus(ctx, application, application.Status.ClusterStatus) - application.Status.ClusterStatus = v1beta2.FlinkClusterStatus{} + application.Status.ClusterStatus = v1beta1.FlinkClusterStatus{} } } -func (s *FlinkStateMachine) handleApplicationSavepointing(ctx context.Context, application *v1beta2.FlinkApplication) (bool, error) { +func (s *FlinkStateMachine) handleApplicationSavepointing(ctx context.Context, application *v1beta1.FlinkApplication) (bool, error) { // we've already savepointed (or this is our first deploy), continue on if application.Status.SavepointPath != "" || application.Status.DeployHash == "" { s.flinkController.UpdateLatestJobID(ctx, application, "") - s.updateApplicationPhase(application, v1beta2.FlinkApplicationSubmittingJob) + s.updateApplicationPhase(application, v1beta1.FlinkApplicationSubmittingJob) return statusChanged, nil } @@ -331,7 +331,7 @@ func (s *FlinkStateMachine) handleApplicationSavepointing(ctx context.Context, a s.flinkController.LogEvent(ctx, application, corev1.EventTypeWarning, "SavepointFailed", fmt.Sprintf("Could not savepoint existing job: %s", reason)) application.Status.RetryCount = 0 - s.updateApplicationPhase(application, v1beta2.FlinkApplicationRecovering) + s.updateApplicationPhase(application, v1beta1.FlinkApplicationRecovering) return statusChanged, nil } @@ -364,7 +364,7 @@ func (s *FlinkStateMachine) handleApplicationSavepointing(ctx context.Context, a fmt.Sprintf("Failed to take savepoint for job %s: %v", s.flinkController.GetLatestJobID(ctx, application), savepointStatusResponse.Operation.FailureCause)) application.Status.RetryCount = 0 - s.updateApplicationPhase(application, v1beta2.FlinkApplicationRecovering) + s.updateApplicationPhase(application, v1beta1.FlinkApplicationRecovering) return statusChanged, nil } else if savepointStatusResponse.SavepointStatus.Status == client.SavePointCompleted { s.flinkController.LogEvent(ctx, application, corev1.EventTypeNormal, "CanceledJob", @@ -372,18 +372,18 @@ func (s *FlinkStateMachine) handleApplicationSavepointing(ctx context.Context, a savepointStatusResponse.Operation.Location)) application.Status.SavepointPath = savepointStatusResponse.Operation.Location s.flinkController.UpdateLatestJobID(ctx, application, "") - s.updateApplicationPhase(application, v1beta2.FlinkApplicationSubmittingJob) + s.updateApplicationPhase(application, v1beta1.FlinkApplicationSubmittingJob) return statusChanged, nil } return statusUnchanged, nil } -func (s *FlinkStateMachine) handleApplicationCancelling(ctx context.Context, application *v1beta2.FlinkApplication) (bool, error) { +func (s *FlinkStateMachine) handleApplicationCancelling(ctx context.Context, application *v1beta1.FlinkApplication) (bool, error) { // this is the first deploy if application.Status.DeployHash == "" { - s.updateApplicationPhase(application, v1beta2.FlinkApplicationSubmittingJob) + s.updateApplicationPhase(application, v1beta1.FlinkApplicationSubmittingJob) return statusChanged, nil } @@ -392,7 +392,7 @@ func (s *FlinkStateMachine) handleApplicationCancelling(ctx context.Context, app fmt.Sprintf("Could not cancel existing job: %s", reason)) application.Status.RetryCount = 0 application.Status.JobStatus.JobID = "" - s.updateApplicationPhase(application, v1beta2.FlinkApplicationRollingBackJob) + s.updateApplicationPhase(application, v1beta1.FlinkApplicationRollingBackJob) return statusChanged, nil } @@ -410,18 +410,18 @@ func (s *FlinkStateMachine) handleApplicationCancelling(ctx context.Context, app } application.Status.JobStatus.JobID = "" - s.updateApplicationPhase(application, v1beta2.FlinkApplicationSubmittingJob) + s.updateApplicationPhase(application, v1beta1.FlinkApplicationSubmittingJob) return statusChanged, nil } -func (s *FlinkStateMachine) handleApplicationRecovering(ctx context.Context, app *v1beta2.FlinkApplication) (bool, error) { +func (s *FlinkStateMachine) handleApplicationRecovering(ctx context.Context, app *v1beta1.FlinkApplication) (bool, error) { // we're in the middle of a deploy, and savepointing has failed in some way... we're going to try to recover // and push through if possible if rollback, reason := s.shouldRollback(ctx, app); rollback { // we failed to recover, attempt to rollback s.flinkController.LogEvent(ctx, app, corev1.EventTypeWarning, "RecoveryFailed", fmt.Sprintf("Failed to recover with externalized checkpoint: %s", reason)) - s.updateApplicationPhase(app, v1beta2.FlinkApplicationRollingBackJob) + s.updateApplicationPhase(app, v1beta1.FlinkApplicationRollingBackJob) return statusChanged, nil } @@ -449,11 +449,11 @@ func (s *FlinkStateMachine) handleApplicationRecovering(ctx context.Context, app app.Status.SavepointPath = path s.flinkController.UpdateLatestJobID(ctx, app, "") - s.updateApplicationPhase(app, v1beta2.FlinkApplicationSubmittingJob) + s.updateApplicationPhase(app, v1beta1.FlinkApplicationSubmittingJob) return statusChanged, nil } -func (s *FlinkStateMachine) submitJobIfNeeded(ctx context.Context, app *v1beta2.FlinkApplication, hash string, +func (s *FlinkStateMachine) submitJobIfNeeded(ctx context.Context, app *v1beta1.FlinkApplication, hash string, jarName string, parallelism int32, entryClass string, programArgs string, allowNonRestoredState bool, savepointPath string) (string, error) { @@ -501,7 +501,7 @@ func (s *FlinkStateMachine) submitJobIfNeeded(ctx context.Context, app *v1beta2. } } -func (s *FlinkStateMachine) updateGenericService(ctx context.Context, app *v1beta2.FlinkApplication, newHash string) error { +func (s *FlinkStateMachine) updateGenericService(ctx context.Context, app *v1beta1.FlinkApplication, newHash string) error { service, err := s.k8Cluster.GetService(ctx, app.Namespace, app.Name) if err != nil { return err @@ -525,13 +525,13 @@ func (s *FlinkStateMachine) updateGenericService(ctx context.Context, app *v1bet return nil } -func (s *FlinkStateMachine) handleSubmittingJob(ctx context.Context, app *v1beta2.FlinkApplication) (bool, error) { +func (s *FlinkStateMachine) handleSubmittingJob(ctx context.Context, app *v1beta1.FlinkApplication) (bool, error) { if rollback, reason := s.shouldRollback(ctx, app); rollback { // Something's gone wrong; roll back s.flinkController.LogEvent(ctx, app, corev1.EventTypeWarning, "JobSubmissionFailed", fmt.Sprintf("Failed to submit job: %s", reason)) s.flinkController.UpdateLatestJobID(ctx, app, "") - s.updateApplicationPhase(app, v1beta2.FlinkApplicationRollingBackJob) + s.updateApplicationPhase(app, v1beta1.FlinkApplicationRollingBackJob) return statusChanged, nil } @@ -610,7 +610,7 @@ func (s *FlinkStateMachine) handleSubmittingJob(ctx context.Context, app *v1beta jobStatus.ProgramArgs = app.Spec.ProgramArgs jobStatus.AllowNonRestoredState = app.Spec.AllowNonRestoredState s.flinkController.UpdateLatestJobStatus(ctx, app, jobStatus) - s.updateApplicationPhase(app, v1beta2.FlinkApplicationRunning) + s.updateApplicationPhase(app, v1beta1.FlinkApplicationRunning) return statusChanged, nil } @@ -619,7 +619,7 @@ func (s *FlinkStateMachine) handleSubmittingJob(ctx context.Context, app *v1beta // Something has gone wrong during the update, post job-cancellation (and cluster tear-down in single mode). We need // to try to get things back into a working state -func (s *FlinkStateMachine) handleRollingBack(ctx context.Context, app *v1beta2.FlinkApplication) (bool, error) { +func (s *FlinkStateMachine) handleRollingBack(ctx context.Context, app *v1beta1.FlinkApplication) (bool, error) { if rollback, reason := s.shouldRollback(ctx, app); rollback { // we've failed in our roll back attempt (presumably because something's now wrong with the original cluster) // move immediately to the DeployFailed state so that the user can recover. @@ -678,7 +678,7 @@ func (s *FlinkStateMachine) handleRollingBack(ctx context.Context, app *v1beta2. // Check if the application is Running. // This is a stable state. Keep monitoring if the underlying CRD reflects the Flink cluster -func (s *FlinkStateMachine) handleApplicationRunning(ctx context.Context, application *v1beta2.FlinkApplication) (bool, error) { +func (s *FlinkStateMachine) handleApplicationRunning(ctx context.Context, application *v1beta1.FlinkApplication) (bool, error) { cur, err := s.flinkController.GetCurrentDeploymentsForApp(ctx, application) if err != nil { return statusUnchanged, err @@ -689,7 +689,7 @@ func (s *FlinkStateMachine) handleApplicationRunning(ctx context.Context, applic if cur == nil { logger.Infof(ctx, "Application resource has changed. Moving to Updating") // TODO: handle single mode - s.updateApplicationPhase(application, v1beta2.FlinkApplicationUpdating) + s.updateApplicationPhase(application, v1beta1.FlinkApplicationUpdating) return statusChanged, nil } @@ -731,7 +731,7 @@ func (s *FlinkStateMachine) handleApplicationRunning(ctx context.Context, applic return statusUnchanged, nil } -func (s *FlinkStateMachine) addFinalizerIfMissing(ctx context.Context, application *v1beta2.FlinkApplication, finalizer string) error { +func (s *FlinkStateMachine) addFinalizerIfMissing(ctx context.Context, application *v1beta1.FlinkApplication, finalizer string) error { for _, f := range application.Finalizers { if f == finalizer { return nil @@ -754,7 +754,7 @@ func removeString(list []string, target string) []string { return ret } -func (s *FlinkStateMachine) clearFinalizers(ctx context.Context, app *v1beta2.FlinkApplication) (bool, error) { +func (s *FlinkStateMachine) clearFinalizers(ctx context.Context, app *v1beta1.FlinkApplication) (bool, error) { app.Finalizers = removeString(app.Finalizers, jobFinalizer) return statusUnchanged, s.k8Cluster.UpdateK8Object(ctx, app) } @@ -766,7 +766,7 @@ func jobFinished(job *client.FlinkJobOverview) bool { job.State == client.Finished } -func (s *FlinkStateMachine) handleApplicationDeleting(ctx context.Context, app *v1beta2.FlinkApplication) (bool, error) { +func (s *FlinkStateMachine) handleApplicationDeleting(ctx context.Context, app *v1beta1.FlinkApplication) (bool, error) { // There should be a way for the user to force deletion (e.g., if the job is failing and they can't // savepoint). However, this seems dangerous to do automatically. // If https://github.com/kubernetes/kubernetes/issues/56567 is fixed users will be able to use @@ -774,7 +774,7 @@ func (s *FlinkStateMachine) handleApplicationDeleting(ctx context.Context, app * // If the delete mode is none or there's no deployhash set (which means we failed to submit the job on the // first deploy) just delete the finalizer so the cluster can be torn down - if app.Spec.DeleteMode == v1beta2.DeleteModeNone || app.Status.DeployHash == "" { + if app.Spec.DeleteMode == v1beta1.DeleteModeNone || app.Status.DeployHash == "" { return s.clearFinalizers(ctx, app) } @@ -788,7 +788,7 @@ func (s *FlinkStateMachine) handleApplicationDeleting(ctx context.Context, app * } switch app.Spec.DeleteMode { - case v1beta2.DeleteModeForceCancel: + case v1beta1.DeleteModeForceCancel: if job.State == client.Cancelling { // we've already cancelled the job, waiting for it to finish return statusUnchanged, nil @@ -799,7 +799,7 @@ func (s *FlinkStateMachine) handleApplicationDeleting(ctx context.Context, app * logger.Infof(ctx, "Force-cancelling job without a savepoint") return statusUnchanged, s.flinkController.ForceCancel(ctx, app, app.Status.DeployHash) - case v1beta2.DeleteModeSavepoint, "": + case v1beta1.DeleteModeSavepoint, "": if app.Status.SavepointPath != "" { // we've already created the savepoint, now just waiting for the job to be cancelled if jobFinished(job) { @@ -832,7 +832,7 @@ func (s *FlinkStateMachine) handleApplicationDeleting(ctx context.Context, app * // clear the trigger id so that we can try again app.Status.SavepointTriggerID = "" return true, client.GetRetryableError(errors.New("failed to take savepoint"), - v1beta2.CancelJobWithSavepoint, "500", math.MaxInt32) + v1beta1.CancelJobWithSavepoint, "500", math.MaxInt32) } else if status.SavepointStatus.Status == client.SavePointCompleted { // we're done, clean up s.flinkController.LogEvent(ctx, app, corev1.EventTypeNormal, "CanceledJob", @@ -850,7 +850,7 @@ func (s *FlinkStateMachine) handleApplicationDeleting(ctx context.Context, app * return statusUnchanged, nil } -func (s *FlinkStateMachine) compareAndUpdateError(application *v1beta2.FlinkApplication, err error) bool { +func (s *FlinkStateMachine) compareAndUpdateError(application *v1beta1.FlinkApplication, err error) bool { oldErr := application.Status.LastSeenError if err == nil && oldErr == nil { @@ -860,11 +860,11 @@ func (s *FlinkStateMachine) compareAndUpdateError(application *v1beta2.FlinkAppl if err == nil { application.Status.LastSeenError = nil } else { - if flinkAppError, ok := err.(*v1beta2.FlinkApplicationError); ok { + if flinkAppError, ok := err.(*v1beta1.FlinkApplicationError); ok { application.Status.LastSeenError = flinkAppError } else { err = client.GetRetryableError(err, "UnknownMethod", client.GlobalFailure, client.DefaultRetries) - application.Status.LastSeenError = err.(*v1beta2.FlinkApplicationError) + application.Status.LastSeenError = err.(*v1beta1.FlinkApplicationError) } now := v1.NewTime(s.clock.Now()) diff --git a/pkg/controller/flinkapplication/flink_state_machine_test.go b/pkg/controller/flinkapplication/flink_state_machine_test.go index d9d1dbc9..fc15c62c 100644 --- a/pkg/controller/flinkapplication/flink_state_machine_test.go +++ b/pkg/controller/flinkapplication/flink_state_machine_test.go @@ -11,7 +11,7 @@ import ( v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta2" + "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta1" "github.com/lyft/flinkk8soperator/pkg/controller/common" "github.com/lyft/flinkk8soperator/pkg/controller/flink/mock" k8mock "github.com/lyft/flinkk8soperator/pkg/controller/k8/mock" @@ -37,7 +37,7 @@ func getTestStateMachine() FlinkStateMachine { } } -func testFlinkDeployment(app *v1beta2.FlinkApplication) common.FlinkDeployment { +func testFlinkDeployment(app *v1beta1.FlinkApplication) common.FlinkDeployment { hash := flink.HashForApplication(app) return common.FlinkDeployment{ Jobmanager: flink.FetchJobMangerDeploymentCreateObj(app, hash), @@ -51,13 +51,13 @@ func TestHandleNewOrCreate(t *testing.T) { mockK8Cluster := stateMachineForTest.k8Cluster.(*k8mock.K8Cluster) mockK8Cluster.UpdateK8ObjectFunc = func(ctx context.Context, object runtime.Object) error { - application := object.(*v1beta2.FlinkApplication) - assert.Equal(t, v1beta2.FlinkApplicationClusterStarting, application.Status.Phase) + application := object.(*v1beta1.FlinkApplication) + assert.Equal(t, v1beta1.FlinkApplicationClusterStarting, application.Status.Phase) return nil } - err := stateMachineForTest.Handle(context.Background(), &v1beta2.FlinkApplication{ - Spec: v1beta2.FlinkApplicationSpec{}, + err := stateMachineForTest.Handle(context.Background(), &v1beta1.FlinkApplication{ + Spec: v1beta1.FlinkApplicationSpec{}, }) assert.Nil(t, err) } @@ -65,7 +65,7 @@ func TestHandleNewOrCreate(t *testing.T) { func TestHandleStartingClusterStarting(t *testing.T) { stateMachineForTest := getTestStateMachine() mockFlinkController := stateMachineForTest.flinkController.(*mock.FlinkController) - mockFlinkController.IsClusterReadyFunc = func(ctx context.Context, application *v1beta2.FlinkApplication) (bool, error) { + mockFlinkController.IsClusterReadyFunc = func(ctx context.Context, application *v1beta1.FlinkApplication) (bool, error) { return false, nil } @@ -73,9 +73,9 @@ func TestHandleStartingClusterStarting(t *testing.T) { mockK8Cluster.UpdateK8ObjectFunc = func(ctx context.Context, object runtime.Object) error { return nil } - err := stateMachineForTest.Handle(context.Background(), &v1beta2.FlinkApplication{ - Status: v1beta2.FlinkApplicationStatus{ - Phase: v1beta2.FlinkApplicationClusterStarting, + err := stateMachineForTest.Handle(context.Background(), &v1beta1.FlinkApplication{ + Status: v1beta1.FlinkApplicationStatus{ + Phase: v1beta1.FlinkApplicationClusterStarting, }, }) assert.Nil(t, err) @@ -83,12 +83,12 @@ func TestHandleStartingClusterStarting(t *testing.T) { func TestHandleNewOrCreateWithSavepointDisabled(t *testing.T) { updateInvoked := false - app := v1beta2.FlinkApplication{ - Spec: v1beta2.FlinkApplicationSpec{ + app := v1beta1.FlinkApplication{ + Spec: v1beta1.FlinkApplicationSpec{ SavepointDisabled: true, }, - Status: v1beta2.FlinkApplicationStatus{ - Phase: v1beta2.FlinkApplicationClusterStarting, + Status: v1beta1.FlinkApplicationStatus{ + Phase: v1beta1.FlinkApplicationClusterStarting, DeployHash: "old-hash", }, } @@ -96,13 +96,13 @@ func TestHandleNewOrCreateWithSavepointDisabled(t *testing.T) { stateMachineForTest := getTestStateMachine() mockFlinkController := stateMachineForTest.flinkController.(*mock.FlinkController) - mockFlinkController.IsClusterReadyFunc = func(ctx context.Context, application *v1beta2.FlinkApplication) (bool, error) { + mockFlinkController.IsClusterReadyFunc = func(ctx context.Context, application *v1beta1.FlinkApplication) (bool, error) { return true, nil } - mockFlinkController.IsServiceReadyFunc = func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (b bool, e error) { + mockFlinkController.IsServiceReadyFunc = func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (b bool, e error) { return true, nil } - mockFlinkController.GetCurrentDeploymentsForAppFunc = func(ctx context.Context, application *v1beta2.FlinkApplication) (*common.FlinkDeployment, error) { + mockFlinkController.GetCurrentDeploymentsForAppFunc = func(ctx context.Context, application *v1beta1.FlinkApplication) (*common.FlinkDeployment, error) { fd := testFlinkDeployment(application) fd.Taskmanager.Status.AvailableReplicas = 2 fd.Jobmanager.Status.AvailableReplicas = 1 @@ -115,8 +115,8 @@ func TestHandleNewOrCreateWithSavepointDisabled(t *testing.T) { } mockK8Cluster.UpdateStatusFunc = func(ctx context.Context, object runtime.Object) error { - application := object.(*v1beta2.FlinkApplication) - assert.Equal(t, v1beta2.FlinkApplicationCancelling, application.Status.Phase) + application := object.(*v1beta1.FlinkApplication) + assert.Equal(t, v1beta1.FlinkApplicationCancelling, application.Status.Phase) updateInvoked = true return nil } @@ -128,12 +128,12 @@ func TestHandleNewOrCreateWithSavepointDisabled(t *testing.T) { func TestHandleApplicationCancel(t *testing.T) { jobID := "j1" - app := v1beta2.FlinkApplication{ - Spec: v1beta2.FlinkApplicationSpec{ + app := v1beta1.FlinkApplication{ + Spec: v1beta1.FlinkApplicationSpec{ SavepointDisabled: true, }, - Status: v1beta2.FlinkApplicationStatus{ - Phase: v1beta2.FlinkApplicationCancelling, + Status: v1beta1.FlinkApplicationStatus{ + Phase: v1beta1.FlinkApplicationCancelling, DeployHash: "old-hash", }, } @@ -141,7 +141,7 @@ func TestHandleApplicationCancel(t *testing.T) { cancelInvoked := false stateMachineForTest := getTestStateMachine() mockFlinkController := stateMachineForTest.flinkController.(*mock.FlinkController) - mockFlinkController.GetJobForApplicationFunc = func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (*client.FlinkJobOverview, error) { + mockFlinkController.GetJobForApplicationFunc = func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (*client.FlinkJobOverview, error) { assert.Equal(t, "old-hash", hash) return &client.FlinkJobOverview{ JobID: jobID, @@ -149,7 +149,7 @@ func TestHandleApplicationCancel(t *testing.T) { }, nil } - mockFlinkController.ForceCancelFunc = func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (e error) { + mockFlinkController.ForceCancelFunc = func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (e error) { assert.Equal(t, "old-hash", hash) cancelInvoked = true @@ -158,8 +158,8 @@ func TestHandleApplicationCancel(t *testing.T) { mockK8Cluster := stateMachineForTest.k8Cluster.(*k8mock.K8Cluster) mockK8Cluster.UpdateStatusFunc = func(ctx context.Context, object runtime.Object) error { - application := object.(*v1beta2.FlinkApplication) - assert.Equal(t, v1beta2.FlinkApplicationSubmittingJob, application.Status.Phase) + application := object.(*v1beta1.FlinkApplication) + assert.Equal(t, v1beta1.FlinkApplicationSubmittingJob, application.Status.Phase) return nil } @@ -172,14 +172,14 @@ func TestHandleApplicationCancel(t *testing.T) { func TestHandleApplicationCancelFailedWithMaxRetries(t *testing.T) { retryableErr := client.GetRetryableError(errors.New("blah"), "ForceCancelJob", "FAILED", 5) - app := v1beta2.FlinkApplication{ - Spec: v1beta2.FlinkApplicationSpec{ + app := v1beta1.FlinkApplication{ + Spec: v1beta1.FlinkApplicationSpec{ SavepointDisabled: true, }, - Status: v1beta2.FlinkApplicationStatus{ - Phase: v1beta2.FlinkApplicationCancelling, + Status: v1beta1.FlinkApplicationStatus{ + Phase: v1beta1.FlinkApplicationCancelling, DeployHash: "old-hash", - LastSeenError: retryableErr.(*v1beta2.FlinkApplicationError), + LastSeenError: retryableErr.(*v1beta1.FlinkApplicationError), }, } @@ -187,7 +187,7 @@ func TestHandleApplicationCancelFailedWithMaxRetries(t *testing.T) { updateInvoked := false stateMachineForTest := getTestStateMachine() mockFlinkController := stateMachineForTest.flinkController.(*mock.FlinkController) - mockFlinkController.ForceCancelFunc = func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) error { + mockFlinkController.ForceCancelFunc = func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) error { // given we maxed out on retries, we should never have come here assert.False(t, true) return nil @@ -196,8 +196,8 @@ func TestHandleApplicationCancelFailedWithMaxRetries(t *testing.T) { mockK8Cluster := stateMachineForTest.k8Cluster.(*k8mock.K8Cluster) mockK8Cluster.UpdateStatusFunc = func(ctx context.Context, object runtime.Object) error { updateInvoked = true - application := object.(*v1beta2.FlinkApplication) - assert.Equal(t, v1beta2.FlinkApplicationRollingBackJob, application.Status.Phase) + application := object.(*v1beta1.FlinkApplication) + assert.Equal(t, v1beta1.FlinkApplicationRollingBackJob, application.Status.Phase) return nil } @@ -219,15 +219,15 @@ func TestHandleStartingDual(t *testing.T) { updateInvoked := false stateMachineForTest := getTestStateMachine() mockFlinkController := stateMachineForTest.flinkController.(*mock.FlinkController) - mockFlinkController.IsClusterReadyFunc = func(ctx context.Context, application *v1beta2.FlinkApplication) (bool, error) { + mockFlinkController.IsClusterReadyFunc = func(ctx context.Context, application *v1beta1.FlinkApplication) (bool, error) { return true, nil } - mockFlinkController.IsServiceReadyFunc = func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (b bool, e error) { + mockFlinkController.IsServiceReadyFunc = func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (b bool, e error) { return true, nil } - mockFlinkController.GetCurrentDeploymentsForAppFunc = func(ctx context.Context, application *v1beta2.FlinkApplication) (*common.FlinkDeployment, error) { + mockFlinkController.GetCurrentDeploymentsForAppFunc = func(ctx context.Context, application *v1beta1.FlinkApplication) (*common.FlinkDeployment, error) { fd := testFlinkDeployment(application) fd.Taskmanager.Status.AvailableReplicas = 2 fd.Jobmanager.Status.AvailableReplicas = 1 @@ -236,14 +236,14 @@ func TestHandleStartingDual(t *testing.T) { mockK8Cluster := stateMachineForTest.k8Cluster.(*k8mock.K8Cluster) mockK8Cluster.UpdateStatusFunc = func(ctx context.Context, object runtime.Object) error { - application := object.(*v1beta2.FlinkApplication) - assert.Equal(t, v1beta2.FlinkApplicationSavepointing, application.Status.Phase) + application := object.(*v1beta1.FlinkApplication) + assert.Equal(t, v1beta1.FlinkApplicationSavepointing, application.Status.Phase) updateInvoked = true return nil } - err := stateMachineForTest.Handle(context.Background(), &v1beta2.FlinkApplication{ - Status: v1beta2.FlinkApplicationStatus{ - Phase: v1beta2.FlinkApplicationClusterStarting, + err := stateMachineForTest.Handle(context.Background(), &v1beta1.FlinkApplication{ + Status: v1beta1.FlinkApplicationStatus{ + Phase: v1beta1.FlinkApplicationClusterStarting, }, }) assert.True(t, updateInvoked) @@ -256,7 +256,7 @@ func TestHandleApplicationSavepointingInitialDeploy(t *testing.T) { stateMachineForTest := getTestStateMachine() mockFlinkController := stateMachineForTest.flinkController.(*mock.FlinkController) - mockFlinkController.CancelWithSavepointFunc = func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (s string, e error) { + mockFlinkController.CancelWithSavepointFunc = func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (s string, e error) { // should not be called assert.False(t, true) return "", nil @@ -264,15 +264,15 @@ func TestHandleApplicationSavepointingInitialDeploy(t *testing.T) { mockK8Cluster := stateMachineForTest.k8Cluster.(*k8mock.K8Cluster) mockK8Cluster.UpdateStatusFunc = func(ctx context.Context, object runtime.Object) error { - application := object.(*v1beta2.FlinkApplication) - assert.Equal(t, v1beta2.FlinkApplicationSubmittingJob, application.Status.Phase) + application := object.(*v1beta1.FlinkApplication) + assert.Equal(t, v1beta1.FlinkApplicationSubmittingJob, application.Status.Phase) updateInvoked = true return nil } - err := stateMachineForTest.Handle(context.Background(), &v1beta2.FlinkApplication{ - Status: v1beta2.FlinkApplicationStatus{ - Phase: v1beta2.FlinkApplicationSavepointing, + err := stateMachineForTest.Handle(context.Background(), &v1beta1.FlinkApplication{ + Status: v1beta1.FlinkApplicationStatus{ + Phase: v1beta1.FlinkApplicationSavepointing, }, }) assert.True(t, updateInvoked) @@ -280,9 +280,9 @@ func TestHandleApplicationSavepointingInitialDeploy(t *testing.T) { } func TestHandleApplicationSavepointingDual(t *testing.T) { - app := v1beta2.FlinkApplication{ - Status: v1beta2.FlinkApplicationStatus{ - Phase: v1beta2.FlinkApplicationSavepointing, + app := v1beta1.FlinkApplication{ + Status: v1beta1.FlinkApplicationStatus{ + Phase: v1beta1.FlinkApplicationSavepointing, DeployHash: "old-hash", }, } @@ -291,14 +291,14 @@ func TestHandleApplicationSavepointingDual(t *testing.T) { stateMachineForTest := getTestStateMachine() mockFlinkController := stateMachineForTest.flinkController.(*mock.FlinkController) - mockFlinkController.CancelWithSavepointFunc = func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (s string, e error) { + mockFlinkController.CancelWithSavepointFunc = func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (s string, e error) { assert.Equal(t, "old-hash", hash) cancelInvoked = true return "trigger", nil } - mockFlinkController.GetSavepointStatusFunc = func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (*client.SavepointResponse, error) { + mockFlinkController.GetSavepointStatusFunc = func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (*client.SavepointResponse, error) { assert.Equal(t, "old-hash", hash) return &client.SavepointResponse{ SavepointStatus: client.SavepointStatusResponse{ @@ -313,12 +313,12 @@ func TestHandleApplicationSavepointingDual(t *testing.T) { mockK8Cluster := stateMachineForTest.k8Cluster.(*k8mock.K8Cluster) updateCount := 0 mockK8Cluster.UpdateStatusFunc = func(ctx context.Context, object runtime.Object) error { - application := object.(*v1beta2.FlinkApplication) + application := object.(*v1beta1.FlinkApplication) if updateCount == 0 { assert.Equal(t, "trigger", application.Status.SavepointTriggerID) } else { assert.Equal(t, testSavepointLocation, application.Status.SavepointPath) - assert.Equal(t, v1beta2.FlinkApplicationSubmittingJob, application.Status.Phase) + assert.Equal(t, v1beta1.FlinkApplicationSubmittingJob, application.Status.Phase) } updateCount++ @@ -340,7 +340,7 @@ func TestHandleApplicationSavepointingFailed(t *testing.T) { updateInvoked := false stateMachineForTest := getTestStateMachine() mockFlinkController := stateMachineForTest.flinkController.(*mock.FlinkController) - mockFlinkController.GetSavepointStatusFunc = func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (*client.SavepointResponse, error) { + mockFlinkController.GetSavepointStatusFunc = func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (*client.SavepointResponse, error) { return &client.SavepointResponse{ SavepointStatus: client.SavepointStatusResponse{ Status: client.SavePointCompleted, @@ -348,10 +348,10 @@ func TestHandleApplicationSavepointingFailed(t *testing.T) { }, nil } - app := v1beta2.FlinkApplication{ - Spec: v1beta2.FlinkApplicationSpec{}, - Status: v1beta2.FlinkApplicationStatus{ - Phase: v1beta2.FlinkApplicationSavepointing, + app := v1beta1.FlinkApplication{ + Spec: v1beta1.FlinkApplicationSpec{}, + Status: v1beta1.FlinkApplicationStatus{ + Phase: v1beta1.FlinkApplicationSavepointing, DeployHash: "blah", SavepointTriggerID: "trigger", }, @@ -359,9 +359,9 @@ func TestHandleApplicationSavepointingFailed(t *testing.T) { mockK8Cluster := stateMachineForTest.k8Cluster.(*k8mock.K8Cluster) mockK8Cluster.UpdateStatusFunc = func(ctx context.Context, object runtime.Object) error { - application := object.(*v1beta2.FlinkApplication) + application := object.(*v1beta1.FlinkApplication) assert.Empty(t, application.Status.SavepointPath) - assert.Equal(t, v1beta2.FlinkApplicationRecovering, application.Status.Phase) + assert.Equal(t, v1beta1.FlinkApplicationRecovering, application.Status.Phase) updateInvoked = true return nil } @@ -373,10 +373,10 @@ func TestHandleApplicationSavepointingFailed(t *testing.T) { func TestRestoreFromExternalizedCheckpoint(t *testing.T) { updateInvoked := false - app := v1beta2.FlinkApplication{ - Spec: v1beta2.FlinkApplicationSpec{}, - Status: v1beta2.FlinkApplicationStatus{ - Phase: v1beta2.FlinkApplicationRecovering, + app := v1beta1.FlinkApplication{ + Spec: v1beta1.FlinkApplicationSpec{}, + Status: v1beta1.FlinkApplicationStatus{ + Phase: v1beta1.FlinkApplicationRecovering, DeployHash: "blah", SavepointTriggerID: "trigger", }, @@ -385,15 +385,15 @@ func TestRestoreFromExternalizedCheckpoint(t *testing.T) { stateMachineForTest := getTestStateMachine() mockFlinkController := stateMachineForTest.flinkController.(*mock.FlinkController) - mockFlinkController.FindExternalizedCheckpointFunc = func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (string, error) { + mockFlinkController.FindExternalizedCheckpointFunc = func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (string, error) { return "/tmp/checkpoint", nil } mockK8Cluster := stateMachineForTest.k8Cluster.(*k8mock.K8Cluster) mockK8Cluster.UpdateStatusFunc = func(ctx context.Context, object runtime.Object) error { - application := object.(*v1beta2.FlinkApplication) + application := object.(*v1beta1.FlinkApplication) assert.Equal(t, "/tmp/checkpoint", application.Status.SavepointPath) - assert.Equal(t, v1beta2.FlinkApplicationSubmittingJob, application.Status.Phase) + assert.Equal(t, v1beta1.FlinkApplicationSubmittingJob, application.Status.Phase) updateInvoked = true return nil } @@ -405,19 +405,19 @@ func TestRestoreFromExternalizedCheckpoint(t *testing.T) { func TestSubmittingToRunning(t *testing.T) { jobID := "j1" - app := v1beta2.FlinkApplication{ + app := v1beta1.FlinkApplication{ ObjectMeta: metav1.ObjectMeta{ Name: "test-app", Namespace: "flink", }, - Spec: v1beta2.FlinkApplicationSpec{ + Spec: v1beta1.FlinkApplicationSpec{ JarName: "job.jar", Parallelism: 5, EntryClass: "com.my.Class", ProgramArgs: "--test", }, - Status: v1beta2.FlinkApplicationStatus{ - Phase: v1beta2.FlinkApplicationSubmittingJob, + Status: v1beta1.FlinkApplicationStatus{ + Phase: v1beta1.FlinkApplicationSubmittingJob, DeployHash: "old-hash", }, } @@ -425,11 +425,11 @@ func TestSubmittingToRunning(t *testing.T) { stateMachineForTest := getTestStateMachine() mockFlinkController := stateMachineForTest.flinkController.(*mock.FlinkController) - mockFlinkController.IsServiceReadyFunc = func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (bool, error) { + mockFlinkController.IsServiceReadyFunc = func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (bool, error) { return true, nil } - mockFlinkController.GetJobForApplicationFunc = func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (*client.FlinkJobOverview, error) { + mockFlinkController.GetJobForApplicationFunc = func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (*client.FlinkJobOverview, error) { assert.Equal(t, appHash, hash) return &client.FlinkJobOverview{ JobID: jobID, @@ -438,7 +438,7 @@ func TestSubmittingToRunning(t *testing.T) { } startCount := 0 - mockFlinkController.StartFlinkJobFunc = func(ctx context.Context, application *v1beta2.FlinkApplication, hash string, + mockFlinkController.StartFlinkJobFunc = func(ctx context.Context, application *v1beta1.FlinkApplication, hash string, jarName string, parallelism int32, entryClass string, programArgs string, allowNonRestoredState bool, savepointPath string) (string, error) { assert.Equal(t, appHash, hash) @@ -453,7 +453,7 @@ func TestSubmittingToRunning(t *testing.T) { return jobID, nil } - mockFlinkController.GetJobsForApplicationFunc = func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) ([]client.FlinkJob, error) { + mockFlinkController.GetJobsForApplicationFunc = func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) ([]client.FlinkJob, error) { assert.Equal(t, appHash, hash) if startCount > 0 { return []client.FlinkJob{ @@ -500,7 +500,7 @@ func TestSubmittingToRunning(t *testing.T) { service := object.(*v1.Service) assert.Equal(t, appHash, service.Spec.Selector["flink-app-hash"]) } else if updateCount == 1 { - application := object.(*v1beta2.FlinkApplication) + application := object.(*v1beta1.FlinkApplication) assert.Equal(t, jobFinalizer, application.Finalizers[0]) } @@ -511,17 +511,17 @@ func TestSubmittingToRunning(t *testing.T) { statusUpdateCount := 0 mockK8Cluster.UpdateStatusFunc = func(ctx context.Context, object runtime.Object) error { if statusUpdateCount == 0 { - application := object.(*v1beta2.FlinkApplication) + application := object.(*v1beta1.FlinkApplication) assert.Equal(t, jobID, mockFlinkController.GetLatestJobID(ctx, application)) } else if statusUpdateCount == 1 { - application := object.(*v1beta2.FlinkApplication) + application := object.(*v1beta1.FlinkApplication) assert.Equal(t, appHash, application.Status.DeployHash) jobStatus := mockFlinkController.GetLatestJobStatus(ctx, application) assert.Equal(t, app.Spec.JarName, jobStatus.JarName) assert.Equal(t, app.Spec.Parallelism, jobStatus.Parallelism) assert.Equal(t, app.Spec.EntryClass, jobStatus.EntryClass) assert.Equal(t, app.Spec.ProgramArgs, jobStatus.ProgramArgs) - assert.Equal(t, v1beta2.FlinkApplicationRunning, application.Status.Phase) + assert.Equal(t, v1beta1.FlinkApplicationRunning, application.Status.Phase) } statusUpdateCount++ return nil @@ -540,7 +540,7 @@ func TestSubmittingToRunning(t *testing.T) { func TestHandleApplicationRunning(t *testing.T) { stateMachineForTest := getTestStateMachine() mockFlinkController := stateMachineForTest.flinkController.(*mock.FlinkController) - mockFlinkController.GetCurrentDeploymentsForAppFunc = func(ctx context.Context, application *v1beta2.FlinkApplication) (*common.FlinkDeployment, error) { + mockFlinkController.GetCurrentDeploymentsForAppFunc = func(ctx context.Context, application *v1beta1.FlinkApplication) (*common.FlinkDeployment, error) { fd := testFlinkDeployment(application) return &fd, nil } @@ -550,9 +550,9 @@ func TestHandleApplicationRunning(t *testing.T) { assert.True(t, false) return nil } - err := stateMachineForTest.Handle(context.Background(), &v1beta2.FlinkApplication{ - Status: v1beta2.FlinkApplicationStatus{ - Phase: v1beta2.FlinkApplicationRunning, + err := stateMachineForTest.Handle(context.Background(), &v1beta1.FlinkApplication{ + Status: v1beta1.FlinkApplicationStatus{ + Phase: v1beta1.FlinkApplicationRunning, }, }) assert.Nil(t, err) @@ -562,20 +562,20 @@ func TestRunningToClusterStarting(t *testing.T) { updateInvoked := false stateMachineForTest := getTestStateMachine() mockFlinkController := stateMachineForTest.flinkController.(*mock.FlinkController) - mockFlinkController.GetCurrentDeploymentsForAppFunc = func(ctx context.Context, application *v1beta2.FlinkApplication) (*common.FlinkDeployment, error) { + mockFlinkController.GetCurrentDeploymentsForAppFunc = func(ctx context.Context, application *v1beta1.FlinkApplication) (*common.FlinkDeployment, error) { return nil, nil } mockK8Cluster := stateMachineForTest.k8Cluster.(*k8mock.K8Cluster) mockK8Cluster.UpdateStatusFunc = func(ctx context.Context, object runtime.Object) error { - application := object.(*v1beta2.FlinkApplication) - assert.Equal(t, v1beta2.FlinkApplicationUpdating, application.Status.Phase) + application := object.(*v1beta1.FlinkApplication) + assert.Equal(t, v1beta1.FlinkApplicationUpdating, application.Status.Phase) updateInvoked = true return nil } - err := stateMachineForTest.Handle(context.Background(), &v1beta2.FlinkApplication{ - Status: v1beta2.FlinkApplicationStatus{ - Phase: v1beta2.FlinkApplicationRunning, + err := stateMachineForTest.Handle(context.Background(), &v1beta1.FlinkApplication{ + Status: v1beta1.FlinkApplicationStatus{ + Phase: v1beta1.FlinkApplicationRunning, }, }) assert.True(t, updateInvoked) @@ -585,24 +585,24 @@ func TestRunningToClusterStarting(t *testing.T) { func TestRollingBack(t *testing.T) { jobID := "j1" - app := v1beta2.FlinkApplication{ + app := v1beta1.FlinkApplication{ ObjectMeta: metav1.ObjectMeta{ Name: "test-app", Namespace: "flink", }, - Spec: v1beta2.FlinkApplicationSpec{ + Spec: v1beta1.FlinkApplicationSpec{ JarName: "job.jar", Parallelism: 5, EntryClass: "com.my.Class", ProgramArgs: "--test", }, - Status: v1beta2.FlinkApplicationStatus{ - Phase: v1beta2.FlinkApplicationRollingBackJob, + Status: v1beta1.FlinkApplicationStatus{ + Phase: v1beta1.FlinkApplicationRollingBackJob, DeployHash: "old-hash", SavepointPath: "file:///savepoint", - VersionStatuses: []v1beta2.FlinkApplicationVersionStatus{ - v1beta2.FlinkApplicationVersionStatus{ - JobStatus: v1beta2.FlinkJobStatus{ + VersionStatuses: []v1beta1.FlinkApplicationVersionStatus{ + v1beta1.FlinkApplicationVersionStatus{ + JobStatus: v1beta1.FlinkJobStatus{ JarName: "old-job.jar", Parallelism: 10, EntryClass: "com.my.OldClass", @@ -616,13 +616,13 @@ func TestRollingBack(t *testing.T) { stateMachineForTest := getTestStateMachine() mockFlinkController := stateMachineForTest.flinkController.(*mock.FlinkController) - mockFlinkController.IsServiceReadyFunc = func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (bool, error) { + mockFlinkController.IsServiceReadyFunc = func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (bool, error) { assert.Equal(t, "old-hash", hash) return true, nil } startCalled := false - mockFlinkController.StartFlinkJobFunc = func(ctx context.Context, application *v1beta2.FlinkApplication, hash string, + mockFlinkController.StartFlinkJobFunc = func(ctx context.Context, application *v1beta1.FlinkApplication, hash string, jarName string, parallelism int32, entryClass string, programArgs string, allowNonRestoredState bool, savepointPath string) (string, error) { startCalled = true @@ -638,7 +638,7 @@ func TestRollingBack(t *testing.T) { } getCount := 0 - mockFlinkController.GetJobsForApplicationFunc = func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) ([]client.FlinkJob, error) { + mockFlinkController.GetJobsForApplicationFunc = func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) ([]client.FlinkJob, error) { assert.Equal(t, "old-hash", hash) var res []client.FlinkJob if getCount == 1 { @@ -685,7 +685,7 @@ func TestRollingBack(t *testing.T) { service := object.(*v1.Service) assert.Equal(t, "old-hash", service.Spec.Selector["flink-app-hash"]) } else if updateCount == 1 { - application := object.(*v1beta2.FlinkApplication) + application := object.(*v1beta1.FlinkApplication) assert.Equal(t, jobFinalizer, application.Finalizers[0]) } @@ -696,9 +696,9 @@ func TestRollingBack(t *testing.T) { statusUpdated := false mockK8Cluster.UpdateStatusFunc = func(ctx context.Context, object runtime.Object) error { if !statusUpdated { - application := object.(*v1beta2.FlinkApplication) + application := object.(*v1beta1.FlinkApplication) assert.Equal(t, appHash, application.Status.FailedDeployHash) - assert.Equal(t, v1beta2.FlinkApplicationDeployFailed, application.Status.Phase) + assert.Equal(t, v1beta1.FlinkApplicationDeployFailed, application.Status.Phase) statusUpdated = true } return nil @@ -720,28 +720,28 @@ func TestIsApplicationStuck(t *testing.T) { retryableErr := client.GetRetryableError(errors.New("blah"), "GetClusterOverview", "FAILED", 3) failFastError := client.GetNonRetryableError(errors.New("blah"), "SubmitJob", "400BadRequest") - app := &v1beta2.FlinkApplication{ - Status: v1beta2.FlinkApplicationStatus{ - Phase: v1beta2.FlinkApplicationClusterStarting, + app := &v1beta1.FlinkApplication{ + Status: v1beta1.FlinkApplicationStatus{ + Phase: v1beta1.FlinkApplicationClusterStarting, DeployHash: "prevhash", - LastSeenError: retryableErr.(*v1beta2.FlinkApplicationError), + LastSeenError: retryableErr.(*v1beta1.FlinkApplicationError), }, } mockRetryHandler := stateMachineForTest.retryHandler.(*mock.RetryHandler) mockRetryHandler.IsErrorRetryableFunc = func(err error) bool { - ferr, ok := err.(*v1beta2.FlinkApplicationError) + ferr, ok := err.(*v1beta1.FlinkApplicationError) assert.True(t, ok) return ferr.IsRetryable } mockRetryHandler.IsRetryRemainingFunc = func(err error, retryCount int32) bool { - ferr, ok := err.(*v1beta2.FlinkApplicationError) + ferr, ok := err.(*v1beta1.FlinkApplicationError) assert.True(t, ok) return retryCount <= ferr.MaxRetries } mockRetryHandler.IsErrorFailFastFunc = func(err error) bool { - ferr, ok := err.(*v1beta2.FlinkApplicationError) + ferr, ok := err.(*v1beta1.FlinkApplicationError) assert.True(t, ok) return ferr.IsFailFast } @@ -755,7 +755,7 @@ func TestIsApplicationStuck(t *testing.T) { // Retryable error with retries exhausted app.Status.RetryCount = 100 - app.Status.LastSeenError = retryableErr.(*v1beta2.FlinkApplicationError) + app.Status.LastSeenError = retryableErr.(*v1beta1.FlinkApplicationError) shouldRollback, _ = stateMachineForTest.shouldRollback(context.Background(), app) assert.True(t, shouldRollback, app) assert.Nil(t, app.Status.LastSeenError) @@ -763,7 +763,7 @@ func TestIsApplicationStuck(t *testing.T) { // Fail fast error app.Status.RetryCount = 0 - app.Status.LastSeenError = failFastError.(*v1beta2.FlinkApplicationError) + app.Status.LastSeenError = failFastError.(*v1beta1.FlinkApplicationError) shouldRollback, _ = stateMachineForTest.shouldRollback(context.Background(), app) assert.True(t, shouldRollback) assert.Nil(t, app.Status.LastSeenError) @@ -775,17 +775,17 @@ func TestDeleteWithSavepoint(t *testing.T) { stateMachineForTest := getTestStateMachine() jobID := "j1" - app := v1beta2.FlinkApplication{ + app := v1beta1.FlinkApplication{ ObjectMeta: metav1.ObjectMeta{ Finalizers: []string{jobFinalizer}, DeletionTimestamp: &metav1.Time{Time: time.Now()}, }, - Status: v1beta2.FlinkApplicationStatus{ - Phase: v1beta2.FlinkApplicationDeleting, + Status: v1beta1.FlinkApplicationStatus{ + Phase: v1beta1.FlinkApplicationDeleting, DeployHash: "deployhash", - VersionStatuses: []v1beta2.FlinkApplicationVersionStatus{ - v1beta2.FlinkApplicationVersionStatus{ - JobStatus: v1beta2.FlinkJobStatus{ + VersionStatuses: []v1beta1.FlinkApplicationVersionStatus{ + v1beta1.FlinkApplicationVersionStatus{ + JobStatus: v1beta1.FlinkJobStatus{ JobID: jobID, }, }, @@ -797,11 +797,11 @@ func TestDeleteWithSavepoint(t *testing.T) { savepointPath := "s3:///path/to/savepoint" mockFlinkController := stateMachineForTest.flinkController.(*mock.FlinkController) - mockFlinkController.CancelWithSavepointFunc = func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (string, error) { + mockFlinkController.CancelWithSavepointFunc = func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (string, error) { return triggerID, nil } - mockFlinkController.GetJobForApplicationFunc = func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (jobs *client.FlinkJobOverview, err error) { + mockFlinkController.GetJobForApplicationFunc = func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (jobs *client.FlinkJobOverview, err error) { return &client.FlinkJobOverview{ JobID: jobID, State: "RUNNING", @@ -811,8 +811,8 @@ func TestDeleteWithSavepoint(t *testing.T) { mockK8Cluster := stateMachineForTest.k8Cluster.(*k8mock.K8Cluster) updateStatusCount := 0 mockK8Cluster.UpdateStatusFunc = func(ctx context.Context, object runtime.Object) error { - application := object.(*v1beta2.FlinkApplication) - assert.Equal(t, v1beta2.FlinkApplicationDeleting, application.Status.Phase) + application := object.(*v1beta1.FlinkApplication) + assert.Equal(t, v1beta1.FlinkApplicationDeleting, application.Status.Phase) if updateStatusCount == 0 { assert.Equal(t, triggerID, application.Status.SavepointTriggerID) @@ -837,7 +837,7 @@ func TestDeleteWithSavepoint(t *testing.T) { assert.NoError(t, err) savepointStatusCount := 0 - mockFlinkController.GetSavepointStatusFunc = func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (*client.SavepointResponse, error) { + mockFlinkController.GetSavepointStatusFunc = func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (*client.SavepointResponse, error) { savepointStatusCount++ if savepointStatusCount == 1 { @@ -872,7 +872,7 @@ func TestDeleteWithSavepoint(t *testing.T) { assert.Equal(t, 3, updateStatusCount) - mockFlinkController.GetJobForApplicationFunc = func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (jobs *client.FlinkJobOverview, err error) { + mockFlinkController.GetJobForApplicationFunc = func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (jobs *client.FlinkJobOverview, err error) { return &client.FlinkJobOverview{ JobID: jobID, State: "CANCELED", @@ -890,18 +890,18 @@ func TestDeleteWithSavepointAndFinishedJob(t *testing.T) { stateMachineForTest := getTestStateMachine() jobID := "j1" - app := v1beta2.FlinkApplication{ + app := v1beta1.FlinkApplication{ ObjectMeta: metav1.ObjectMeta{ Finalizers: []string{jobFinalizer}, DeletionTimestamp: &metav1.Time{Time: time.Now()}, }, - Status: v1beta2.FlinkApplicationStatus{ - Phase: v1beta2.FlinkApplicationDeleting, + Status: v1beta1.FlinkApplicationStatus{ + Phase: v1beta1.FlinkApplicationDeleting, DeployHash: "deployhash", SavepointPath: "file:///savepoint", - VersionStatuses: []v1beta2.FlinkApplicationVersionStatus{ - v1beta2.FlinkApplicationVersionStatus{ - JobStatus: v1beta2.FlinkJobStatus{ + VersionStatuses: []v1beta1.FlinkApplicationVersionStatus{ + v1beta1.FlinkApplicationVersionStatus{ + JobStatus: v1beta1.FlinkJobStatus{ JobID: jobID, }, }, @@ -911,7 +911,7 @@ func TestDeleteWithSavepointAndFinishedJob(t *testing.T) { mockFlinkController := stateMachineForTest.flinkController.(*mock.FlinkController) - mockFlinkController.GetJobsForApplicationFunc = func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (jobs []client.FlinkJob, err error) { + mockFlinkController.GetJobsForApplicationFunc = func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (jobs []client.FlinkJob, err error) { return []client.FlinkJob{ { JobID: jobID, @@ -923,8 +923,8 @@ func TestDeleteWithSavepointAndFinishedJob(t *testing.T) { mockK8Cluster := stateMachineForTest.k8Cluster.(*k8mock.K8Cluster) mockK8Cluster.UpdateK8ObjectFunc = func(ctx context.Context, object runtime.Object) error { - application := object.(*v1beta2.FlinkApplication) - assert.Equal(t, v1beta2.FlinkApplicationDeleting, application.Status.Phase) + application := object.(*v1beta1.FlinkApplication) + assert.Equal(t, v1beta1.FlinkApplicationDeleting, application.Status.Phase) assert.Equal(t, 0, len(app.Finalizers)) @@ -940,19 +940,19 @@ func TestDeleteWithForceCancel(t *testing.T) { jobID := "j1" - app := v1beta2.FlinkApplication{ + app := v1beta1.FlinkApplication{ ObjectMeta: metav1.ObjectMeta{ Finalizers: []string{jobFinalizer}, DeletionTimestamp: &metav1.Time{Time: time.Now()}, }, - Spec: v1beta2.FlinkApplicationSpec{ - DeleteMode: v1beta2.DeleteModeForceCancel, + Spec: v1beta1.FlinkApplicationSpec{ + DeleteMode: v1beta1.DeleteModeForceCancel, }, - Status: v1beta2.FlinkApplicationStatus{ - Phase: v1beta2.FlinkApplicationDeleting, - VersionStatuses: []v1beta2.FlinkApplicationVersionStatus{ - v1beta2.FlinkApplicationVersionStatus{ - JobStatus: v1beta2.FlinkJobStatus{ + Status: v1beta1.FlinkApplicationStatus{ + Phase: v1beta1.FlinkApplicationDeleting, + VersionStatuses: []v1beta1.FlinkApplicationVersionStatus{ + v1beta1.FlinkApplicationVersionStatus{ + JobStatus: v1beta1.FlinkJobStatus{ JobID: jobID, }, }, @@ -964,7 +964,7 @@ func TestDeleteWithForceCancel(t *testing.T) { mockFlinkController := stateMachineForTest.flinkController.(*mock.FlinkController) - mockFlinkController.GetJobForApplicationFunc = func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (*client.FlinkJobOverview, error) { + mockFlinkController.GetJobForApplicationFunc = func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (*client.FlinkJobOverview, error) { return &client.FlinkJobOverview{ JobID: jobID, State: "RUNNING", @@ -972,7 +972,7 @@ func TestDeleteWithForceCancel(t *testing.T) { } cancelled := false - mockFlinkController.ForceCancelFunc = func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) error { + mockFlinkController.ForceCancelFunc = func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) error { cancelled = true return nil } @@ -980,8 +980,8 @@ func TestDeleteWithForceCancel(t *testing.T) { mockK8Cluster := stateMachineForTest.k8Cluster.(*k8mock.K8Cluster) updateCount := 1 mockK8Cluster.UpdateK8ObjectFunc = func(ctx context.Context, object runtime.Object) error { - application := object.(*v1beta2.FlinkApplication) - assert.Equal(t, v1beta2.FlinkApplicationDeleting, application.Status.Phase) + application := object.(*v1beta1.FlinkApplication) + assert.Equal(t, v1beta1.FlinkApplicationDeleting, application.Status.Phase) if updateCount == 1 { assert.Equal(t, 0, len(app.Finalizers)) @@ -996,7 +996,7 @@ func TestDeleteWithForceCancel(t *testing.T) { assert.Equal(t, 1, updateCount) assert.True(t, cancelled) - mockFlinkController.GetJobForApplicationFunc = func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (*client.FlinkJobOverview, error) { + mockFlinkController.GetJobForApplicationFunc = func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (*client.FlinkJobOverview, error) { return &client.FlinkJobOverview{ JobID: jobID, State: "CANCELED", @@ -1011,16 +1011,16 @@ func TestDeleteWithForceCancel(t *testing.T) { func TestDeleteModeNone(t *testing.T) { stateMachineForTest := getTestStateMachine() - app := v1beta2.FlinkApplication{ + app := v1beta1.FlinkApplication{ ObjectMeta: metav1.ObjectMeta{ Finalizers: []string{jobFinalizer}, DeletionTimestamp: &metav1.Time{Time: time.Now()}, }, - Spec: v1beta2.FlinkApplicationSpec{ - DeleteMode: v1beta2.DeleteModeNone, + Spec: v1beta1.FlinkApplicationSpec{ + DeleteMode: v1beta1.DeleteModeNone, }, - Status: v1beta2.FlinkApplicationStatus{ - Phase: v1beta2.FlinkApplicationDeleting, + Status: v1beta1.FlinkApplicationStatus{ + Phase: v1beta1.FlinkApplicationDeleting, }, } @@ -1028,7 +1028,7 @@ func TestDeleteModeNone(t *testing.T) { mockFlinkController := stateMachineForTest.flinkController.(*mock.FlinkController) - mockFlinkController.GetJobsForApplicationFunc = func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (jobs []client.FlinkJob, err error) { + mockFlinkController.GetJobsForApplicationFunc = func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (jobs []client.FlinkJob, err error) { return []client.FlinkJob{ { JobID: jobID, @@ -1038,7 +1038,7 @@ func TestDeleteModeNone(t *testing.T) { } cancelled := false - mockFlinkController.ForceCancelFunc = func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) error { + mockFlinkController.ForceCancelFunc = func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) error { cancelled = true return nil } @@ -1046,8 +1046,8 @@ func TestDeleteModeNone(t *testing.T) { mockK8Cluster := stateMachineForTest.k8Cluster.(*k8mock.K8Cluster) updateCount := 1 mockK8Cluster.UpdateK8ObjectFunc = func(ctx context.Context, object runtime.Object) error { - application := object.(*v1beta2.FlinkApplication) - assert.Equal(t, v1beta2.FlinkApplicationDeleting, application.Status.Phase) + application := object.(*v1beta1.FlinkApplication) + assert.Equal(t, v1beta1.FlinkApplicationDeleting, application.Status.Phase) if updateCount == 1 { assert.Equal(t, 0, len(app.Finalizers)) @@ -1066,9 +1066,9 @@ func TestDeleteModeNone(t *testing.T) { func TestHandleInvalidPhase(t *testing.T) { stateMachineForTest := getTestStateMachine() - err := stateMachineForTest.Handle(context.Background(), &v1beta2.FlinkApplication{ - Spec: v1beta2.FlinkApplicationSpec{}, - Status: v1beta2.FlinkApplicationStatus{ + err := stateMachineForTest.Handle(context.Background(), &v1beta1.FlinkApplication{ + Spec: v1beta1.FlinkApplicationSpec{}, + Status: v1beta1.FlinkApplicationStatus{ Phase: "asd", }, }) @@ -1077,19 +1077,19 @@ func TestHandleInvalidPhase(t *testing.T) { } func TestRollbackWithRetryableError(t *testing.T) { - app := v1beta2.FlinkApplication{ + app := v1beta1.FlinkApplication{ ObjectMeta: metav1.ObjectMeta{ Name: "test-app", Namespace: "flink", }, - Spec: v1beta2.FlinkApplicationSpec{ + Spec: v1beta1.FlinkApplicationSpec{ JarName: "job.jar", Parallelism: 5, EntryClass: "com.my.Class", ProgramArgs: "--test", }, - Status: v1beta2.FlinkApplicationStatus{ - Phase: v1beta2.FlinkApplicationSavepointing, + Status: v1beta1.FlinkApplicationStatus{ + Phase: v1beta1.FlinkApplicationSavepointing, DeployHash: "old-hash-retry", }, } @@ -1097,19 +1097,19 @@ func TestRollbackWithRetryableError(t *testing.T) { retryableErr := client.GetRetryableError(errors.New("blah"), "GetClusterOverview", "FAILED", 3) stateMachineForTest := getTestStateMachine() mockFlinkController := stateMachineForTest.flinkController.(*mock.FlinkController) - mockFlinkController.CancelWithSavepointFunc = func(ctx context.Context, app *v1beta2.FlinkApplication, hash string) (savepoint string, err error) { + mockFlinkController.CancelWithSavepointFunc = func(ctx context.Context, app *v1beta1.FlinkApplication, hash string) (savepoint string, err error) { return "", retryableErr } mockRetryHandler := stateMachineForTest.retryHandler.(*mock.RetryHandler) mockRetryHandler.IsErrorRetryableFunc = func(err error) bool { - ferr, ok := err.(*v1beta2.FlinkApplicationError) + ferr, ok := err.(*v1beta1.FlinkApplicationError) assert.True(t, ok) return ferr.IsRetryable } mockRetryHandler.IsRetryRemainingFunc = func(err error, retryCount int32) bool { - ferr, ok := err.(*v1beta2.FlinkApplicationError) + ferr, ok := err.(*v1beta1.FlinkApplicationError) assert.True(t, ok) return retryCount <= ferr.MaxRetries } @@ -1131,8 +1131,8 @@ func TestRollbackWithRetryableError(t *testing.T) { } retries := 0 - for ; app.Status.Phase != v1beta2.FlinkApplicationRecovering; retries++ { - assert.Equal(t, v1beta2.FlinkApplicationSavepointing, app.Status.Phase) + for ; app.Status.Phase != v1beta1.FlinkApplicationRecovering; retries++ { + assert.Equal(t, v1beta1.FlinkApplicationSavepointing, app.Status.Phase) err := stateMachineForTest.Handle(context.Background(), &app) // First attempt does not rollback @@ -1146,25 +1146,25 @@ func TestRollbackWithRetryableError(t *testing.T) { assert.Equal(t, 5, retries) assert.Equal(t, 5, updateErrCount) // Retries should have been exhausted and errors and retry counts reset - assert.Equal(t, v1beta2.FlinkApplicationRecovering, app.Status.Phase) + assert.Equal(t, v1beta1.FlinkApplicationRecovering, app.Status.Phase) assert.Equal(t, int32(0), app.Status.RetryCount) assert.Nil(t, app.Status.LastSeenError) } func TestRollbackWithFailFastError(t *testing.T) { - app := v1beta2.FlinkApplication{ + app := v1beta1.FlinkApplication{ ObjectMeta: metav1.ObjectMeta{ Name: "test-app", Namespace: "flink", }, - Spec: v1beta2.FlinkApplicationSpec{ + Spec: v1beta1.FlinkApplicationSpec{ JarName: "job.jar", Parallelism: 5, EntryClass: "com.my.Class", ProgramArgs: "--test", }, - Status: v1beta2.FlinkApplicationStatus{ - Phase: v1beta2.FlinkApplicationSubmittingJob, + Status: v1beta1.FlinkApplicationStatus{ + Phase: v1beta1.FlinkApplicationSubmittingJob, DeployHash: "old-hash-retry-err", }, } @@ -1173,7 +1173,7 @@ func TestRollbackWithFailFastError(t *testing.T) { mockFlinkController := stateMachineForTest.flinkController.(*mock.FlinkController) getCount := 0 - mockFlinkController.GetJobsForApplicationFunc = func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) ([]client.FlinkJob, error) { + mockFlinkController.GetJobsForApplicationFunc = func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) ([]client.FlinkJob, error) { var res []client.FlinkJob if getCount == 2 { res = []client.FlinkJob{ @@ -1186,11 +1186,11 @@ func TestRollbackWithFailFastError(t *testing.T) { return res, nil } - mockFlinkController.IsServiceReadyFunc = func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (bool, error) { + mockFlinkController.IsServiceReadyFunc = func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (bool, error) { return true, nil } failFastError := client.GetNonRetryableError(errors.New("blah"), "SubmitJob", "400BadRequest") - mockFlinkController.StartFlinkJobFunc = func(ctx context.Context, application *v1beta2.FlinkApplication, hash string, + mockFlinkController.StartFlinkJobFunc = func(ctx context.Context, application *v1beta1.FlinkApplication, hash string, jarName string, parallelism int32, entryClass string, programArgs string, allowNonRestoredState bool, savepointPath string) (string, error) { return "", failFastError } @@ -1219,15 +1219,15 @@ func TestRollbackWithFailFastError(t *testing.T) { } mockRetryHandler := stateMachineForTest.retryHandler.(*mock.RetryHandler) mockRetryHandler.IsErrorFailFastFunc = func(err error) bool { - ferr, ok := err.(*v1beta2.FlinkApplicationError) + ferr, ok := err.(*v1beta1.FlinkApplicationError) assert.True(t, ok) return ferr.IsFailFast } retries := 0 var err error - for ; app.Status.Phase == v1beta2.FlinkApplicationSubmittingJob; retries++ { + for ; app.Status.Phase == v1beta1.FlinkApplicationSubmittingJob; retries++ { err = stateMachineForTest.Handle(context.Background(), &app) - if app.Status.Phase == v1beta2.FlinkApplicationSubmittingJob { + if app.Status.Phase == v1beta1.FlinkApplicationSubmittingJob { assert.NotNil(t, err) assert.Equal(t, int32(0), app.Status.RetryCount) assert.NotNil(t, app.Status.LastSeenError) @@ -1237,18 +1237,18 @@ func TestRollbackWithFailFastError(t *testing.T) { assert.Equal(t, 2, retries) // once in rollingback phase, errors no longer exist - assert.Equal(t, v1beta2.FlinkApplicationRollingBackJob, app.Status.Phase) + assert.Equal(t, v1beta1.FlinkApplicationRollingBackJob, app.Status.Phase) assert.Equal(t, int32(0), app.Status.RetryCount) assert.Nil(t, app.Status.LastSeenError) } func TestRollbackAfterJobSubmission(t *testing.T) { - app := v1beta2.FlinkApplication{ + app := v1beta1.FlinkApplication{ ObjectMeta: metav1.ObjectMeta{ Name: "test-app", Namespace: "flink", }, - Spec: v1beta2.FlinkApplicationSpec{ + Spec: v1beta1.FlinkApplicationSpec{ JarName: "job.jar", Parallelism: 5, EntryClass: "com.my.Class", @@ -1257,12 +1257,12 @@ func TestRollbackAfterJobSubmission(t *testing.T) { // force a rollback ForceRollback: true, }, - Status: v1beta2.FlinkApplicationStatus{ - Phase: v1beta2.FlinkApplicationSubmittingJob, + Status: v1beta1.FlinkApplicationStatus{ + Phase: v1beta1.FlinkApplicationSubmittingJob, DeployHash: "old-hash-retry-err", - VersionStatuses: []v1beta2.FlinkApplicationVersionStatus{ + VersionStatuses: []v1beta1.FlinkApplicationVersionStatus{ { - JobStatus: v1beta2.FlinkJobStatus{ + JobStatus: v1beta1.FlinkJobStatus{ JobID: "jobid", }, }, @@ -1275,24 +1275,24 @@ func TestRollbackAfterJobSubmission(t *testing.T) { err := stateMachineForTest.Handle(context.Background(), &app) assert.Nil(t, err) - assert.Equal(t, v1beta2.FlinkApplicationRollingBackJob, app.Status.Phase) + assert.Equal(t, v1beta1.FlinkApplicationRollingBackJob, app.Status.Phase) assert.Equal(t, "", mockFlinkController.GetLatestJobID(context.Background(), &app)) } func TestErrorHandlingInRunningPhase(t *testing.T) { - app := v1beta2.FlinkApplication{ + app := v1beta1.FlinkApplication{ ObjectMeta: metav1.ObjectMeta{ Name: "test-app", Namespace: "flink", }, - Spec: v1beta2.FlinkApplicationSpec{ + Spec: v1beta1.FlinkApplicationSpec{ JarName: "job.jar", Parallelism: 5, EntryClass: "com.my.Class", ProgramArgs: "--test", }, - Status: v1beta2.FlinkApplicationStatus{ - Phase: v1beta2.FlinkApplicationRunning, + Status: v1beta1.FlinkApplicationStatus{ + Phase: v1beta1.FlinkApplicationRunning, DeployHash: "old-hash-retry-err", }, } @@ -1300,7 +1300,7 @@ func TestErrorHandlingInRunningPhase(t *testing.T) { stateMachineForTest := getTestStateMachine() mockFlinkController := stateMachineForTest.flinkController.(*mock.FlinkController) - mockFlinkController.GetCurrentDeploymentsForAppFunc = func(ctx context.Context, app *v1beta2.FlinkApplication) (*common.FlinkDeployment, error) { + mockFlinkController.GetCurrentDeploymentsForAppFunc = func(ctx context.Context, app *v1beta1.FlinkApplication) (*common.FlinkDeployment, error) { return &common.FlinkDeployment{ Jobmanager: nil, Taskmanager: nil, @@ -1308,7 +1308,7 @@ func TestErrorHandlingInRunningPhase(t *testing.T) { }, nil } - mockFlinkController.GetJobForApplicationFunc = func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (*client.FlinkJobOverview, error) { + mockFlinkController.GetJobForApplicationFunc = func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (*client.FlinkJobOverview, error) { return nil, client.GetNonRetryableError(errors.New("running phase error"), "TestError", "400") } @@ -1322,20 +1322,20 @@ func TestErrorHandlingInRunningPhase(t *testing.T) { func TestForceRollback(t *testing.T) { oldHash := "old-hash-force-rollback" - app := v1beta2.FlinkApplication{ + app := v1beta1.FlinkApplication{ ObjectMeta: metav1.ObjectMeta{ Name: "test-app", Namespace: "flink", }, - Spec: v1beta2.FlinkApplicationSpec{ + Spec: v1beta1.FlinkApplicationSpec{ JarName: "job.jar", Parallelism: 5, EntryClass: "com.my.Class", ProgramArgs: "--test", ForceRollback: true, }, - Status: v1beta2.FlinkApplicationStatus{ - Phase: v1beta2.FlinkApplicationSubmittingJob, + Status: v1beta1.FlinkApplicationStatus{ + Phase: v1beta1.FlinkApplicationSubmittingJob, DeployHash: oldHash, }, } @@ -1372,14 +1372,14 @@ func TestForceRollback(t *testing.T) { } mockFlinkController := stateMachineForTest.flinkController.(*mock.FlinkController) - mockFlinkController.IsServiceReadyFunc = func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (bool, error) { + mockFlinkController.IsServiceReadyFunc = func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (bool, error) { return true, nil } err := stateMachineForTest.Handle(context.Background(), &app) assert.Nil(t, err) // rolled deploy while cluster is starting - assert.Equal(t, v1beta2.FlinkApplicationRollingBackJob, app.Status.Phase) + assert.Equal(t, v1beta1.FlinkApplicationRollingBackJob, app.Status.Phase) assert.True(t, app.Spec.ForceRollback) err = stateMachineForTest.Handle(context.Background(), &app) @@ -1391,21 +1391,21 @@ func TestForceRollback(t *testing.T) { func TestLastSeenErrTimeIsNil(t *testing.T) { oldHash := "old-hash-force-nil" retryableErr := client.GetRetryableError(errors.New("blah"), "GetClusterOverview", "FAILED", 3) - app := v1beta2.FlinkApplication{ + app := v1beta1.FlinkApplication{ ObjectMeta: metav1.ObjectMeta{ Name: "test-app", Namespace: "flink", }, - Spec: v1beta2.FlinkApplicationSpec{ + Spec: v1beta1.FlinkApplicationSpec{ JarName: "job.jar", Parallelism: 5, EntryClass: "com.my.Class", ProgramArgs: "--test", }, - Status: v1beta2.FlinkApplicationStatus{ - Phase: v1beta2.FlinkApplicationClusterStarting, + Status: v1beta1.FlinkApplicationStatus{ + Phase: v1beta1.FlinkApplicationClusterStarting, DeployHash: oldHash, - LastSeenError: retryableErr.(*v1beta2.FlinkApplicationError), + LastSeenError: retryableErr.(*v1beta1.FlinkApplicationError), }, } app.Status.LastSeenError.LastErrorUpdateTime = nil @@ -1430,21 +1430,21 @@ func TestCheckSavepointStatusFailing(t *testing.T) { oldHash := "old-hash-fail" maxRetries := int32(1) retryableErr := client.GetRetryableError(errors.New("blah"), "CheckSavepointStatus", "FAILED", 1) - app := v1beta2.FlinkApplication{ + app := v1beta1.FlinkApplication{ ObjectMeta: metav1.ObjectMeta{ Name: "test-app", Namespace: "flink", }, - Spec: v1beta2.FlinkApplicationSpec{ + Spec: v1beta1.FlinkApplicationSpec{ JarName: "job.jar", Parallelism: 5, EntryClass: "com.my.Class", ProgramArgs: "--test", }, - Status: v1beta2.FlinkApplicationStatus{ - Phase: v1beta2.FlinkApplicationSavepointing, + Status: v1beta1.FlinkApplicationStatus{ + Phase: v1beta1.FlinkApplicationSavepointing, DeployHash: oldHash, - LastSeenError: retryableErr.(*v1beta2.FlinkApplicationError), + LastSeenError: retryableErr.(*v1beta1.FlinkApplicationError), SavepointTriggerID: "trigger", }, } @@ -1452,11 +1452,11 @@ func TestCheckSavepointStatusFailing(t *testing.T) { stateMachineForTest := getTestStateMachine() mockFlinkController := stateMachineForTest.flinkController.(*mock.FlinkController) - mockFlinkController.GetSavepointStatusFunc = func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (*client.SavepointResponse, error) { - return nil, retryableErr.(*v1beta2.FlinkApplicationError) + mockFlinkController.GetSavepointStatusFunc = func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (*client.SavepointResponse, error) { + return nil, retryableErr.(*v1beta1.FlinkApplicationError) } - mockFlinkController.FindExternalizedCheckpointFunc = func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (string, error) { + mockFlinkController.FindExternalizedCheckpointFunc = func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (string, error) { return "/tmp/checkpoint", nil } mockRetryHandler := stateMachineForTest.retryHandler.(*mock.RetryHandler) @@ -1473,32 +1473,32 @@ func TestCheckSavepointStatusFailing(t *testing.T) { err := stateMachineForTest.Handle(context.Background(), &app) // 1 retry left assert.NotNil(t, err) - assert.Equal(t, v1beta2.FlinkApplicationSavepointing, app.Status.Phase) + assert.Equal(t, v1beta1.FlinkApplicationSavepointing, app.Status.Phase) // No retries left for CheckSavepointStatus // The app should hence try to recover from an externalized checkpoint err = stateMachineForTest.Handle(context.Background(), &app) assert.Nil(t, err) - assert.Equal(t, v1beta2.FlinkApplicationRecovering, app.Status.Phase) + assert.Equal(t, v1beta1.FlinkApplicationRecovering, app.Status.Phase) } func TestDeleteWhenCheckSavepointStatusFailing(t *testing.T) { retryableErr := client.GetRetryableError(errors.New("blah"), "CheckSavepointStatus", "FAILED", 1) - app := v1beta2.FlinkApplication{ + app := v1beta1.FlinkApplication{ ObjectMeta: metav1.ObjectMeta{ Name: "test-app", Namespace: "flink", }, - Spec: v1beta2.FlinkApplicationSpec{ + Spec: v1beta1.FlinkApplicationSpec{ JarName: "job.jar", Parallelism: 5, EntryClass: "com.my.Class", ProgramArgs: "--test", }, - Status: v1beta2.FlinkApplicationStatus{ - Phase: v1beta2.FlinkApplicationSavepointing, + Status: v1beta1.FlinkApplicationStatus{ + Phase: v1beta1.FlinkApplicationSavepointing, DeployHash: "appHash", - LastSeenError: retryableErr.(*v1beta2.FlinkApplicationError), + LastSeenError: retryableErr.(*v1beta1.FlinkApplicationError), SavepointTriggerID: "trigger", }, } @@ -1506,10 +1506,10 @@ func TestDeleteWhenCheckSavepointStatusFailing(t *testing.T) { stateMachineForTest := getTestStateMachine() mockFlinkController := stateMachineForTest.flinkController.(*mock.FlinkController) - mockFlinkController.GetSavepointStatusFunc = func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (*client.SavepointResponse, error) { - return nil, retryableErr.(*v1beta2.FlinkApplicationError) + mockFlinkController.GetSavepointStatusFunc = func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (*client.SavepointResponse, error) { + return nil, retryableErr.(*v1beta1.FlinkApplicationError) } - mockFlinkController.CancelWithSavepointFunc = func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (s string, e error) { + mockFlinkController.CancelWithSavepointFunc = func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (s string, e error) { return "triggerId", nil } mockRetryHandler := stateMachineForTest.retryHandler.(*mock.RetryHandler) @@ -1521,14 +1521,14 @@ func TestDeleteWhenCheckSavepointStatusFailing(t *testing.T) { } err := stateMachineForTest.Handle(context.Background(), &app) assert.NotNil(t, err) - assert.Equal(t, v1beta2.FlinkApplicationSavepointing, app.Status.Phase) + assert.Equal(t, v1beta1.FlinkApplicationSavepointing, app.Status.Phase) assert.NotNil(t, app.Status.LastSeenError) // Try to force delete the app while it's in a savepointing state (with errors) // We should handle the delete here - app.Status.Phase = v1beta2.FlinkApplicationDeleting - app.Spec.DeleteMode = v1beta2.DeleteModeForceCancel + app.Status.Phase = v1beta1.FlinkApplicationDeleting + app.Spec.DeleteMode = v1beta1.DeleteModeForceCancel - mockFlinkController.GetJobForApplicationFunc = func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) (*client.FlinkJobOverview, error) { + mockFlinkController.GetJobForApplicationFunc = func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (*client.FlinkJobOverview, error) { assert.Equal(t, "appHash", hash) return &client.FlinkJobOverview{ JobID: "jobID", @@ -1536,7 +1536,7 @@ func TestDeleteWhenCheckSavepointStatusFailing(t *testing.T) { }, nil } - mockFlinkController.ForceCancelFunc = func(ctx context.Context, application *v1beta2.FlinkApplication, hash string) error { + mockFlinkController.ForceCancelFunc = func(ctx context.Context, application *v1beta1.FlinkApplication, hash string) error { return nil } err = stateMachineForTest.Handle(context.Background(), &app) diff --git a/pkg/controller/k8/cluster.go b/pkg/controller/k8/cluster.go index 1c4db4fd..1da90a1c 100644 --- a/pkg/controller/k8/cluster.go +++ b/pkg/controller/k8/cluster.go @@ -210,7 +210,7 @@ func (k *Cluster) UpdateStatus(ctx context.Context, object runtime.Object) error // the stored version of the CRD changes // Example of error: // K8s object update failed FlinkApplication.flink.k8s.io "operator-test-app" is invalid: - // apiVersion: Invalid value: "flink.k8s.io/v1beta1": must be flink.k8s.io/v1beta2 + // apiVersion: Invalid value: "flink.k8s.io/v1beta1": must be flink.k8s.io/v1beta1 // app_name=operator-test-app ns=default phase=Running src="cluster.go:209" // This should only ever be encountered once (per application) // when a new CRD version is deployed and an older version of the application exists diff --git a/tmp/codegen/update-generated.sh b/tmp/codegen/update-generated.sh index b0c63f4d..ae656b5b 100755 --- a/tmp/codegen/update-generated.sh +++ b/tmp/codegen/update-generated.sh @@ -8,6 +8,6 @@ vendor/k8s.io/code-generator/generate-groups.sh \ deepcopy,client \ github.com/lyft/flinkk8soperator/pkg/client \ github.com/lyft/flinkk8soperator/pkg/apis \ -app:v1beta1,v1beta2 \ +app:v1beta1 \ --go-header-file "./tmp/codegen/boilerplate.go.txt" From d5fbbc0e7ab2cde4f19c6c4f86b5711c39d056e1 Mon Sep 17 00:00:00 2001 From: glaksh100 Date: Thu, 19 Mar 2020 12:06:32 -0700 Subject: [PATCH 36/41] Keep Status.ClusterStatus and Status.JobStatus unchanged for Dual mode --- integ/checkpoint_failure_test.go | 4 +- integ/simple_test.go | 20 +- integ/utils/utils.go | 11 +- pkg/apis/app/v1beta1/types.go | 1 + pkg/controller/flink/flink.go | 292 ++++++++++++++---- pkg/controller/flink/flink_test.go | 93 +++--- pkg/controller/flink/mock/mock_flink.go | 39 ++- .../flinkapplication/flink_state_machine.go | 33 +- 8 files changed, 328 insertions(+), 165 deletions(-) diff --git a/integ/checkpoint_failure_test.go b/integ/checkpoint_failure_test.go index 0f97e2bd..9f7a596c 100644 --- a/integ/checkpoint_failure_test.go +++ b/integ/checkpoint_failure_test.go @@ -45,9 +45,9 @@ func failingJobTest(s *IntegSuite, c *C, testName string, causeFailure func()) { // And the job should not have been updated newApp, err := s.Util.GetFlinkApplication(config.Name) c.Assert(err, IsNil) - c.Assert(newApp.Status.VersionStatuses[s.Util.GetCurrentStatusIndex(newApp)].JobStatus.JobID, Equals, app.Status.VersionStatuses[s.Util.GetCurrentStatusIndex(app)].JobStatus.JobID) + c.Assert(newApp.Status.JobStatus.JobID, Equals, app.Status.JobStatus.JobID) - endpoint := fmt.Sprintf("jobs/%s", app.Status.VersionStatuses[s.Util.GetCurrentStatusIndex(app)].JobStatus.JobID) + endpoint := fmt.Sprintf("jobs/%s", app.Status.JobStatus.JobID) _, err = s.Util.FlinkAPIGet(app, endpoint) c.Assert(err, IsNil) diff --git a/integ/simple_test.go b/integ/simple_test.go index 13de736a..ca6ccda7 100644 --- a/integ/simple_test.go +++ b/integ/simple_test.go @@ -28,12 +28,12 @@ func updateAndValidate(c *C, s *IntegSuite, name string, updateFn func(app *v1be // check that it really updated newApp, err := s.Util.GetFlinkApplication(name) c.Assert(err, IsNil) - c.Assert(newApp.Status.VersionStatuses[s.Util.GetCurrentStatusIndex(newApp)].JobStatus.JobID, Not(Equals), app.Status.VersionStatuses[s.Util.GetCurrentStatusIndex(app)].JobStatus.JobID) + c.Assert(newApp.Status.JobStatus.JobID, Not(Equals), app.Status.JobStatus.JobID) log.Info("New job started successfully") // check that we savepointed and restored correctly - endpoint := fmt.Sprintf("jobs/%s/checkpoints", newApp.Status.VersionStatuses[s.Util.GetCurrentStatusIndex(newApp)].JobStatus.JobID) + endpoint := fmt.Sprintf("jobs/%s/checkpoints", newApp.Status.JobStatus.JobID) res, err := s.Util.FlinkAPIGet(newApp, endpoint) c.Assert(err, IsNil) @@ -140,13 +140,13 @@ func (s *IntegSuite) TestSimple(c *C) { c.Assert(s.Util.WaitForAllTasksRunning(newApp.Name), IsNil) // the job id should have changed - jobID := newApp.Status.VersionStatuses[s.Util.GetCurrentStatusIndex(newApp)].JobStatus.JobID + jobID := newApp.Status.JobStatus.JobID newApp, err = s.Util.GetFlinkApplication(newApp.Name) c.Assert(err, IsNil) - c.Assert(newApp.Status.VersionStatuses[s.Util.GetCurrentStatusIndex(newApp)].JobStatus.JobID, Not(Equals), jobID) + c.Assert(newApp.Status.JobStatus.JobID, Not(Equals), jobID) // we should have restored from our savepoint - endpoint := fmt.Sprintf("jobs/%s/checkpoints", newApp.Status.VersionStatuses[s.Util.GetCurrentStatusIndex(newApp)].JobStatus.JobID) + endpoint := fmt.Sprintf("jobs/%s/checkpoints", newApp.Status.JobStatus.JobID) res, err := s.Util.FlinkAPIGet(newApp, endpoint) c.Assert(err, IsNil) @@ -191,7 +191,7 @@ func (s *IntegSuite) TestSimple(c *C) { log.Info("User cancelled deploy. Job is in deploy failed, waiting for tasks to start") // but the job should still be running - c.Assert(newApp.Status.VersionStatuses[s.Util.GetCurrentStatusIndex(newApp)].JobStatus.State, Equals, v1beta1.Running) + c.Assert(newApp.Status.JobStatus.State, Equals, v1beta1.Running) log.Info("Attempting to roll forward with fix") // Fixing update @@ -212,7 +212,7 @@ func (s *IntegSuite) TestSimple(c *C) { app, err = s.Util.GetFlinkApplication(config.Name) c.Assert(err, IsNil) - if len(app.Finalizers) == 1 && app.Finalizers[s.Util.GetCurrentStatusIndex(app)] == finalizer { + if len(app.Finalizers) == 1 && app.Finalizers[0] == finalizer { break } time.Sleep(100 * time.Millisecond) @@ -225,7 +225,7 @@ func (s *IntegSuite) TestSimple(c *C) { jobList := jobMap["jobs"].([]interface{}) for _, j := range jobList { job := j.(map[string]interface{}) - if job["id"] == app.Status.VersionStatuses[s.Util.GetCurrentStatusIndex(app)].JobStatus.JobID { + if job["id"] == app.Status.JobStatus.JobID { return job } } @@ -284,7 +284,7 @@ func (s *IntegSuite) TestRecovery(c *C) { app, err := s.Util.GetFlinkApplication(config.Name) c.Assert(err, IsNil) - endpoint := fmt.Sprintf("jobs/%s/checkpoints", app.Status.VersionStatuses[s.Util.GetCurrentStatusIndex(app)].JobStatus.JobID) + endpoint := fmt.Sprintf("jobs/%s/checkpoints", app.Status.JobStatus.JobID) for { res, err := s.Util.FlinkAPIGet(app, endpoint) c.Assert(err, IsNil) @@ -324,7 +324,7 @@ func (s *IntegSuite) TestRecovery(c *C) { // wait until the new job is launched newApp, err := s.Util.GetFlinkApplication(config.Name) c.Assert(err, IsNil) - if newApp.Status.VersionStatuses[s.Util.GetCurrentStatusIndex(newApp)].JobStatus.JobID != app.Status.VersionStatuses[s.Util.GetCurrentStatusIndex(app)].JobStatus.JobID { + if newApp.Status.JobStatus.JobID != app.Status.JobStatus.JobID { break } time.Sleep(100 * time.Millisecond) diff --git a/integ/utils/utils.go b/integ/utils/utils.go index aaed4c6d..fb632179 100644 --- a/integ/utils/utils.go +++ b/integ/utils/utils.go @@ -453,7 +453,7 @@ func (f *TestUtil) WaitForAllTasksRunning(name string) error { return err } - endpoint := fmt.Sprintf("jobs/%s", flinkApp.Status.VersionStatuses[f.GetCurrentStatusIndex(flinkApp)].JobStatus.JobID) + endpoint := fmt.Sprintf("jobs/%s", flinkApp.Status.JobStatus.JobID) for { res, err := f.FlinkAPIGet(flinkApp, endpoint) if err != nil { @@ -520,12 +520,3 @@ func (f *TestUtil) GetJobOverview(app *flinkapp.FlinkApplication) map[string]int } return nil } - -func (f *TestUtil) GetCurrentStatusIndex(app *flinkapp.FlinkApplication) int32 { - desiredCount := flinkapp.GetMaxRunningJobs(app.Spec.DeploymentMode) - if app.Status.Phase != "Running" { - return 0 - } - - return desiredCount - 1 -} diff --git a/pkg/apis/app/v1beta1/types.go b/pkg/apis/app/v1beta1/types.go index 0026fabc..14e47f37 100644 --- a/pkg/apis/app/v1beta1/types.go +++ b/pkg/apis/app/v1beta1/types.go @@ -196,6 +196,7 @@ const ( type FlinkApplicationVersionStatus struct { Version FlinkApplicationVersion `json:"appVersion,omitempty"` + VersionHash string `json:"versionHash,omitempty"` ClusterStatus FlinkClusterStatus `json:"clusterStatus,omitempty"` JobStatus FlinkJobStatus `json:"jobStatus,omitempty"` } diff --git a/pkg/controller/flink/flink.go b/pkg/controller/flink/flink.go index 8d7c86ef..678df7de 100644 --- a/pkg/controller/flink/flink.go +++ b/pkg/controller/flink/flink.go @@ -151,7 +151,7 @@ type Controller struct { eventRecorder record.EventRecorder } -func getURLFromApp(application *v1beta1.FlinkApplication, hash string) string { +func (f *Controller) getURLFromApp(application *v1beta1.FlinkApplication, hash string) string { service := VersionedJobManagerServiceName(application, hash) cfg := controllerConfig.GetConfig() if cfg.UseProxy { @@ -160,23 +160,23 @@ func getURLFromApp(application *v1beta1.FlinkApplication, hash string) string { return fmt.Sprintf("http://%s.%s:%d", service, application.Namespace, port) } -func getClusterOverviewURL(app *v1beta1.FlinkApplication) string { - externalURL := getExternalURLFromApp(app) +func (f *Controller) getClusterOverviewURL(app *v1beta1.FlinkApplication) string { + externalURL := f.getExternalURLFromApp(app) if externalURL != "" { return fmt.Sprintf(externalURL + client.WebUIAnchor + client.GetClusterOverviewURL) } return "" } -func getJobOverviewURL(app *v1beta1.FlinkApplication) string { - externalURL := getExternalURLFromApp(app) +func (f *Controller) getJobOverviewURL(ctx context.Context, app *v1beta1.FlinkApplication) string { + externalURL := f.getExternalURLFromApp(app) if externalURL != "" { - return fmt.Sprintf(externalURL+client.WebUIAnchor+client.GetJobsOverviewURL, app.Status.VersionStatuses[getCurrentStatusIndex(app)].JobStatus.JobID) + return fmt.Sprintf(externalURL+client.WebUIAnchor+client.GetJobsOverviewURL, f.GetLatestJobID(ctx, app)) } return "" } -func getExternalURLFromApp(application *v1beta1.FlinkApplication) string { +func (f *Controller) getExternalURLFromApp(application *v1beta1.FlinkApplication) string { cfg := controllerConfig.GetConfig() // Local environment if cfg.UseProxy { @@ -214,7 +214,7 @@ func (f *Controller) deploymentMatches(ctx context.Context, deployment *v1.Deplo } func (f *Controller) GetJobsForApplication(ctx context.Context, application *v1beta1.FlinkApplication, hash string) ([]client.FlinkJob, error) { - jobResponse, err := f.flinkClient.GetJobs(ctx, getURLFromApp(application, hash)) + jobResponse, err := f.flinkClient.GetJobs(ctx, f.getURLFromApp(application, hash)) if err != nil { return nil, err } @@ -227,7 +227,7 @@ func (f *Controller) GetJobForApplication(ctx context.Context, application *v1be return nil, nil } - jobResponse, err := f.flinkClient.GetJobOverview(ctx, getURLFromApp(application, hash), f.GetLatestJobID(ctx, application)) + jobResponse, err := f.flinkClient.GetJobOverview(ctx, f.getURLFromApp(application, hash), f.GetLatestJobID(ctx, application)) if err != nil { return nil, err } @@ -250,7 +250,7 @@ func (f *Controller) CancelWithSavepoint(ctx context.Context, application *v1bet if err != nil { return "", err } - return f.flinkClient.CancelJobWithSavepoint(ctx, getURLFromApp(application, hash), jobID) + return f.flinkClient.CancelJobWithSavepoint(ctx, f.getURLFromApp(application, hash), jobID) } func (f *Controller) ForceCancel(ctx context.Context, application *v1beta1.FlinkApplication, hash string) error { @@ -258,7 +258,7 @@ func (f *Controller) ForceCancel(ctx context.Context, application *v1beta1.Flink if err != nil { return err } - return f.flinkClient.ForceCancelJob(ctx, getURLFromApp(application, hash), jobID) + return f.flinkClient.ForceCancelJob(ctx, f.getURLFromApp(application, hash), jobID) } func (f *Controller) CreateCluster(ctx context.Context, application *v1beta1.FlinkApplication) error { @@ -292,7 +292,7 @@ func (f *Controller) StartFlinkJob(ctx context.Context, application *v1beta1.Fli savepointPath string) (string, error) { response, err := f.flinkClient.SubmitJob( ctx, - getURLFromApp(application, hash), + f.getURLFromApp(application, hash), jarName, client.SubmitJobRequest{ Parallelism: parallelism, @@ -316,7 +316,7 @@ func (f *Controller) GetSavepointStatus(ctx context.Context, application *v1beta if err != nil { return nil, err } - return f.flinkClient.CheckSavepointStatus(ctx, getURLFromApp(application, hash), jobID, application.Status.SavepointTriggerID) + return f.flinkClient.CheckSavepointStatus(ctx, f.getURLFromApp(application, hash), jobID, application.Status.SavepointTriggerID) } func (f *Controller) IsClusterReady(ctx context.Context, application *v1beta1.FlinkApplication) (bool, error) { @@ -338,7 +338,7 @@ func (f *Controller) IsClusterReady(ctx context.Context, application *v1beta1.Fl } func (f *Controller) IsServiceReady(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (bool, error) { - resp, err := f.flinkClient.GetClusterOverview(ctx, getURLFromApp(application, hash)) + resp, err := f.flinkClient.GetClusterOverview(ctx, f.getURLFromApp(application, hash)) if err != nil { logger.Infof(ctx, "Error response indicating flink API is not ready to handle request %v", err) return false, err @@ -465,7 +465,7 @@ func (f *Controller) DeleteOldResourcesForApp(ctx context.Context, app *v1beta1. } func (f *Controller) FindExternalizedCheckpoint(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (string, error) { - checkpoint, err := f.flinkClient.GetLatestCheckpoint(ctx, getURLFromApp(application, hash), f.GetLatestJobID(ctx, application)) + checkpoint, err := f.flinkClient.GetLatestCheckpoint(ctx, f.getURLFromApp(application, hash), f.GetLatestJobID(ctx, application)) var checkpointPath string var checkpointTime int64 if err != nil { @@ -505,45 +505,47 @@ func (f *Controller) LogEvent(ctx context.Context, app *v1beta1.FlinkApplication // Gets and updates the cluster status func (f *Controller) CompareAndUpdateClusterStatus(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (bool, error) { + if v1beta1.IsBlueGreenDeploymentMode(application.Spec.DeploymentMode) { + return f.compareAndUpdateBlueGreenClusterStatus(ctx, application, hash) + } // Error retrieving cluster / taskmanagers overview (after startup/readiness) --> Red // If there is an error this loop will return with Health set to Red - currIndex := getCurrentStatusIndex(application) - oldClusterStatus := application.Status.VersionStatuses[currIndex].ClusterStatus - application.Status.VersionStatuses[currIndex].ClusterStatus.Health = v1beta1.Red + oldClusterStatus := application.Status.ClusterStatus + application.Status.ClusterStatus.Health = v1beta1.Red deployment, err := f.GetCurrentDeploymentsForApp(ctx, application) if deployment == nil || err != nil { return false, err } - application.Status.VersionStatuses[currIndex].ClusterStatus.ClusterOverviewURL = getClusterOverviewURL(application) - application.Status.VersionStatuses[currIndex].ClusterStatus.NumberOfTaskManagers = deployment.Taskmanager.Status.AvailableReplicas + application.Status.ClusterStatus.ClusterOverviewURL = f.getClusterOverviewURL(application) + application.Status.ClusterStatus.NumberOfTaskManagers = deployment.Taskmanager.Status.AvailableReplicas // Get Cluster overview - response, err := f.flinkClient.GetClusterOverview(ctx, getURLFromApp(application, hash)) + response, err := f.flinkClient.GetClusterOverview(ctx, f.getURLFromApp(application, hash)) if err != nil { return false, err } // Update cluster overview - application.Status.VersionStatuses[currIndex].ClusterStatus.AvailableTaskSlots = response.SlotsAvailable - application.Status.VersionStatuses[currIndex].ClusterStatus.NumberOfTaskSlots = response.NumberOfTaskSlots + application.Status.ClusterStatus.AvailableTaskSlots = response.SlotsAvailable + application.Status.ClusterStatus.NumberOfTaskSlots = response.NumberOfTaskSlots // Get Healthy Taskmanagers - tmResponse, tmErr := f.flinkClient.GetTaskManagers(ctx, getURLFromApp(application, hash)) + tmResponse, tmErr := f.flinkClient.GetTaskManagers(ctx, f.getURLFromApp(application, hash)) if tmErr != nil { return false, tmErr } - application.Status.VersionStatuses[currIndex].ClusterStatus.HealthyTaskManagers = getHealthyTaskManagerCount(tmResponse) + application.Status.ClusterStatus.HealthyTaskManagers = getHealthyTaskManagerCount(tmResponse) // Determine Health of the cluster. // Healthy TaskManagers == Number of taskmanagers --> Green // Else --> Yellow - if application.Status.VersionStatuses[currIndex].ClusterStatus.HealthyTaskManagers == deployment.Taskmanager.Status.Replicas { - application.Status.VersionStatuses[currIndex].ClusterStatus.Health = v1beta1.Green + if application.Status.ClusterStatus.HealthyTaskManagers == deployment.Taskmanager.Status.Replicas { + application.Status.ClusterStatus.Health = v1beta1.Green } else { - application.Status.VersionStatuses[currIndex].ClusterStatus.Health = v1beta1.Yellow + application.Status.ClusterStatus.Health = v1beta1.Yellow } - return !apiequality.Semantic.DeepEqual(oldClusterStatus, application.Status.VersionStatuses[currIndex].ClusterStatus), nil + return !apiequality.Semantic.DeepEqual(oldClusterStatus, application.Status.ClusterStatus), nil } func getHealthyTaskManagerCount(response *client.TaskManagersResponse) int32 { @@ -560,47 +562,49 @@ func getHealthyTaskManagerCount(response *client.TaskManagersResponse) int32 { } func (f *Controller) CompareAndUpdateJobStatus(ctx context.Context, app *v1beta1.FlinkApplication, hash string) (bool, error) { - currIndex := getCurrentStatusIndex(app) - if app.Status.VersionStatuses[currIndex].JobStatus.LastFailingTime == nil { + if v1beta1.IsBlueGreenDeploymentMode(app.Spec.DeploymentMode) { + return f.compareAndUpdateBlueGreenJobStatus(ctx, app, hash) + } + if app.Status.JobStatus.LastFailingTime == nil { initTime := metav1.NewTime(time.Time{}) - app.Status.VersionStatuses[currIndex].JobStatus.LastFailingTime = &initTime + app.Status.JobStatus.LastFailingTime = &initTime } - oldJobStatus := app.Status.VersionStatuses[currIndex].JobStatus - app.Status.VersionStatuses[currIndex].JobStatus.JobID = oldJobStatus.JobID - jobResponse, err := f.flinkClient.GetJobOverview(ctx, getURLFromApp(app, hash), f.GetLatestJobID(ctx, app)) + oldJobStatus := app.Status.JobStatus + app.Status.JobStatus.JobID = oldJobStatus.JobID + jobResponse, err := f.flinkClient.GetJobOverview(ctx, f.getURLFromApp(app, hash), f.GetLatestJobID(ctx, app)) if err != nil { return false, err } - checkpoints, err := f.flinkClient.GetCheckpointCounts(ctx, getURLFromApp(app, hash), f.GetLatestJobID(ctx, app)) + checkpoints, err := f.flinkClient.GetCheckpointCounts(ctx, f.getURLFromApp(app, hash), f.GetLatestJobID(ctx, app)) if err != nil { return false, err } // Job status - app.Status.VersionStatuses[currIndex].JobStatus.JobOverviewURL = getJobOverviewURL(app) - app.Status.VersionStatuses[currIndex].JobStatus.State = v1beta1.JobState(jobResponse.State) + app.Status.JobStatus.JobOverviewURL = f.getJobOverviewURL(ctx, app) + app.Status.JobStatus.State = v1beta1.JobState(jobResponse.State) jobStartTime := metav1.NewTime(time.Unix(jobResponse.StartTime/1000, 0)) - app.Status.VersionStatuses[currIndex].JobStatus.StartTime = &jobStartTime + app.Status.JobStatus.StartTime = &jobStartTime // Checkpoints status - app.Status.VersionStatuses[currIndex].JobStatus.FailedCheckpointCount = checkpoints.Counts["failed"] - app.Status.VersionStatuses[currIndex].JobStatus.CompletedCheckpointCount = checkpoints.Counts["completed"] - app.Status.VersionStatuses[currIndex].JobStatus.JobRestartCount = checkpoints.Counts["restored"] + app.Status.JobStatus.FailedCheckpointCount = checkpoints.Counts["failed"] + app.Status.JobStatus.CompletedCheckpointCount = checkpoints.Counts["completed"] + app.Status.JobStatus.JobRestartCount = checkpoints.Counts["restored"] latestCheckpoint := checkpoints.Latest.Completed var lastCheckpointAgeSeconds int if latestCheckpoint != nil { lastCheckpointTimeMillis := metav1.NewTime(time.Unix(latestCheckpoint.LatestAckTimestamp/1000, 0)) - app.Status.VersionStatuses[currIndex].JobStatus.LastCheckpointTime = &lastCheckpointTimeMillis - app.Status.VersionStatuses[currIndex].JobStatus.LastCheckpointPath = latestCheckpoint.ExternalPath - lastCheckpointAgeSeconds = app.Status.VersionStatuses[currIndex].JobStatus.LastCheckpointTime.Second() + app.Status.JobStatus.LastCheckpointTime = &lastCheckpointTimeMillis + app.Status.JobStatus.LastCheckpointPath = latestCheckpoint.ExternalPath + lastCheckpointAgeSeconds = app.Status.JobStatus.LastCheckpointTime.Second() } if checkpoints.Latest.Restored != nil { - app.Status.VersionStatuses[currIndex].JobStatus.RestorePath = checkpoints.Latest.Restored.ExternalPath + app.Status.JobStatus.RestorePath = checkpoints.Latest.Restored.ExternalPath restoreTime := metav1.NewTime(time.Unix(checkpoints.Latest.Restored.RestoredTimeStamp/1000, 0)) - app.Status.VersionStatuses[currIndex].JobStatus.RestoreTime = &restoreTime + app.Status.JobStatus.RestoreTime = &restoreTime } runningTasks := int32(0) @@ -620,30 +624,30 @@ func (f *Controller) CompareAndUpdateJobStatus(ctx context.Context, app *v1beta1 } } - app.Status.VersionStatuses[currIndex].JobStatus.RunningTasks = runningTasks - app.Status.VersionStatuses[currIndex].JobStatus.TotalTasks = totalTasks + app.Status.JobStatus.RunningTasks = runningTasks + app.Status.JobStatus.TotalTasks = totalTasks // Health Status for job // Job is in FAILING state --> RED // Time since last successful checkpoint > maxCheckpointTime --> YELLOW // Else --> Green - if app.Status.VersionStatuses[currIndex].JobStatus.State == v1beta1.Failing || - time.Since(app.Status.VersionStatuses[currIndex].JobStatus.LastFailingTime.Time) < failingIntervalThreshold || + if app.Status.JobStatus.State == v1beta1.Failing || + time.Since(app.Status.JobStatus.LastFailingTime.Time) < failingIntervalThreshold || verticesInCreated > 0 { - app.Status.VersionStatuses[currIndex].JobStatus.Health = v1beta1.Red + app.Status.JobStatus.Health = v1beta1.Red } else if time.Since(time.Unix(int64(lastCheckpointAgeSeconds), 0)) < maxCheckpointTime || runningTasks < totalTasks { - app.Status.VersionStatuses[currIndex].JobStatus.Health = v1beta1.Yellow + app.Status.JobStatus.Health = v1beta1.Yellow } else { - app.Status.VersionStatuses[currIndex].JobStatus.Health = v1beta1.Green + app.Status.JobStatus.Health = v1beta1.Green } // Update LastFailingTime - if app.Status.VersionStatuses[currIndex].JobStatus.State == v1beta1.Failing { + if app.Status.JobStatus.State == v1beta1.Failing { currTime := metav1.Now() - app.Status.VersionStatuses[currIndex].JobStatus.LastFailingTime = &currTime + app.Status.JobStatus.LastFailingTime = &currTime } - return !apiequality.Semantic.DeepEqual(oldJobStatus, app.Status.VersionStatuses[currIndex].JobStatus), err + return !apiequality.Semantic.DeepEqual(oldJobStatus, app.Status.JobStatus), err } func getCurrentStatusIndex(app *v1beta1.FlinkApplication) int32 { @@ -659,27 +663,187 @@ func getCurrentStatusIndex(app *v1beta1.FlinkApplication) int32 { } func (f *Controller) GetLatestClusterStatus(ctx context.Context, application *v1beta1.FlinkApplication) v1beta1.FlinkClusterStatus { - return application.Status.VersionStatuses[getCurrentStatusIndex(application)].ClusterStatus - + if v1beta1.IsBlueGreenDeploymentMode(application.Spec.DeploymentMode) { + return application.Status.VersionStatuses[getCurrentStatusIndex(application)].ClusterStatus + } + return application.Status.ClusterStatus } func (f *Controller) GetLatestJobStatus(ctx context.Context, application *v1beta1.FlinkApplication) v1beta1.FlinkJobStatus { - return application.Status.VersionStatuses[getCurrentStatusIndex(application)].JobStatus + if v1beta1.IsBlueGreenDeploymentMode(application.Spec.DeploymentMode) { + return application.Status.VersionStatuses[getCurrentStatusIndex(application)].JobStatus + } + return application.Status.JobStatus } func (f *Controller) UpdateLatestJobStatus(ctx context.Context, app *v1beta1.FlinkApplication, jobStatus v1beta1.FlinkJobStatus) { - app.Status.VersionStatuses[getCurrentStatusIndex(app)].JobStatus = jobStatus + if v1beta1.IsBlueGreenDeploymentMode(app.Spec.DeploymentMode) { + app.Status.VersionStatuses[getCurrentStatusIndex(app)].JobStatus = jobStatus + return + } + app.Status.JobStatus = jobStatus } func (f *Controller) UpdateLatestClusterStatus(ctx context.Context, app *v1beta1.FlinkApplication, clusterStatus v1beta1.FlinkClusterStatus) { - app.Status.VersionStatuses[getCurrentStatusIndex(app)].ClusterStatus = clusterStatus + if v1beta1.IsBlueGreenDeploymentMode(app.Spec.DeploymentMode) { + app.Status.VersionStatuses[getCurrentStatusIndex(app)].ClusterStatus = clusterStatus + return + } + app.Status.ClusterStatus = clusterStatus } func (f *Controller) GetLatestJobID(ctx context.Context, application *v1beta1.FlinkApplication) string { - return application.Status.VersionStatuses[getCurrentStatusIndex(application)].JobStatus.JobID + if v1beta1.IsBlueGreenDeploymentMode(application.Spec.DeploymentMode) { + return application.Status.VersionStatuses[getCurrentStatusIndex(application)].JobStatus.JobID + } + return application.Status.JobStatus.JobID } func (f *Controller) UpdateLatestJobID(ctx context.Context, app *v1beta1.FlinkApplication, jobID string) { - app.Status.VersionStatuses[getCurrentStatusIndex(app)].JobStatus.JobID = jobID + if v1beta1.IsBlueGreenDeploymentMode(app.Spec.DeploymentMode) { + app.Status.VersionStatuses[getCurrentStatusIndex(app)].JobStatus.JobID = jobID + } + app.Status.JobStatus.JobID = jobID +} + +func (f *Controller) compareAndUpdateBlueGreenClusterStatus(ctx context.Context, application *v1beta1.FlinkApplication, hash string) (bool, error) { + isEqual := false + for currIndex := range application.Status.VersionStatuses { + if application.Status.VersionStatuses[currIndex].VersionHash == "" { + continue + } + + oldClusterStatus := application.Status.VersionStatuses[currIndex].ClusterStatus + application.Status.VersionStatuses[currIndex].ClusterStatus.Health = v1beta1.Red + + deployment, err := f.GetCurrentDeploymentsForApp(ctx, application) + if deployment == nil || err != nil { + return false, err + } + + application.Status.VersionStatuses[currIndex].ClusterStatus.ClusterOverviewURL = f.getClusterOverviewURL(application) + application.Status.VersionStatuses[currIndex].ClusterStatus.NumberOfTaskManagers = deployment.Taskmanager.Status.AvailableReplicas + // Get Cluster overview + response, err := f.flinkClient.GetClusterOverview(ctx, f.getURLFromApp(application, hash)) + if err != nil { + return false, err + } + // Update cluster overview + application.Status.VersionStatuses[currIndex].ClusterStatus.AvailableTaskSlots = response.SlotsAvailable + application.Status.VersionStatuses[currIndex].ClusterStatus.NumberOfTaskSlots = response.NumberOfTaskSlots + + // Get Healthy Taskmanagers + tmResponse, tmErr := f.flinkClient.GetTaskManagers(ctx, f.getURLFromApp(application, hash)) + if tmErr != nil { + return false, tmErr + } + application.Status.VersionStatuses[currIndex].ClusterStatus.HealthyTaskManagers = getHealthyTaskManagerCount(tmResponse) + + // Determine Health of the cluster. + // Healthy TaskManagers == Number of taskmanagers --> Green + // Else --> Yellow + if application.Status.VersionStatuses[currIndex].ClusterStatus.HealthyTaskManagers == deployment.Taskmanager.Status.Replicas { + application.Status.VersionStatuses[currIndex].ClusterStatus.Health = v1beta1.Green + } else { + application.Status.VersionStatuses[currIndex].ClusterStatus.Health = v1beta1.Yellow + } + isEqual = isEqual || !apiequality.Semantic.DeepEqual(oldClusterStatus, application.Status.VersionStatuses[currIndex].ClusterStatus) + } + + return isEqual, nil +} + +func (f *Controller) compareAndUpdateBlueGreenJobStatus(ctx context.Context, app *v1beta1.FlinkApplication, hash string) (bool, error) { + isEqual := false + var err error + for statusIndex := range app.Status.VersionStatuses { + if app.Status.VersionStatuses[statusIndex].JobStatus.JobID == "" { + continue + } + + if app.Status.VersionStatuses[statusIndex].JobStatus.LastFailingTime == nil { + initTime := metav1.NewTime(time.Time{}) + app.Status.VersionStatuses[statusIndex].JobStatus.LastFailingTime = &initTime + } + oldJobStatus := app.Status.VersionStatuses[statusIndex].JobStatus + app.Status.VersionStatuses[statusIndex].JobStatus.JobID = oldJobStatus.JobID + jobResponse, err := f.flinkClient.GetJobOverview(ctx, f.getURLFromApp(app, hash), app.Status.VersionStatuses[statusIndex].JobStatus.JobID) + if err != nil { + return false, err + } + checkpoints, err := f.flinkClient.GetCheckpointCounts(ctx, f.getURLFromApp(app, hash), app.Status.VersionStatuses[statusIndex].JobStatus.JobID) + if err != nil { + return false, err + } + + // Job status + app.Status.VersionStatuses[statusIndex].JobStatus.JobOverviewURL = f.getJobOverviewURL(ctx, app) + app.Status.VersionStatuses[statusIndex].JobStatus.State = v1beta1.JobState(jobResponse.State) + jobStartTime := metav1.NewTime(time.Unix(jobResponse.StartTime/1000, 0)) + app.Status.VersionStatuses[statusIndex].JobStatus.StartTime = &jobStartTime + + // Checkpoints status + app.Status.VersionStatuses[statusIndex].JobStatus.FailedCheckpointCount = checkpoints.Counts["failed"] + app.Status.VersionStatuses[statusIndex].JobStatus.CompletedCheckpointCount = checkpoints.Counts["completed"] + app.Status.VersionStatuses[statusIndex].JobStatus.JobRestartCount = checkpoints.Counts["restored"] + + latestCheckpoint := checkpoints.Latest.Completed + var lastCheckpointAgeSeconds int + if latestCheckpoint != nil { + lastCheckpointTimeMillis := metav1.NewTime(time.Unix(latestCheckpoint.LatestAckTimestamp/1000, 0)) + app.Status.VersionStatuses[statusIndex].JobStatus.LastCheckpointTime = &lastCheckpointTimeMillis + app.Status.VersionStatuses[statusIndex].JobStatus.LastCheckpointPath = latestCheckpoint.ExternalPath + lastCheckpointAgeSeconds = app.Status.VersionStatuses[statusIndex].JobStatus.LastCheckpointTime.Second() + } + + if checkpoints.Latest.Restored != nil { + app.Status.VersionStatuses[statusIndex].JobStatus.RestorePath = checkpoints.Latest.Restored.ExternalPath + restoreTime := metav1.NewTime(time.Unix(checkpoints.Latest.Restored.RestoredTimeStamp/1000, 0)) + app.Status.VersionStatuses[statusIndex].JobStatus.RestoreTime = &restoreTime + } + + runningTasks := int32(0) + totalTasks := int32(0) + verticesInCreated := int32(0) + + for _, v := range jobResponse.Vertices { + if v.Status == client.Created { + verticesInCreated++ + } + + for k, v := range v.Tasks { + if k == "RUNNING" { + runningTasks += int32(v) + } + totalTasks += int32(v) + } + } + + app.Status.VersionStatuses[statusIndex].JobStatus.RunningTasks = runningTasks + app.Status.VersionStatuses[statusIndex].JobStatus.TotalTasks = totalTasks + + // Health Status for job + // Job is in FAILING state --> RED + // Time since last successful checkpoint > maxCheckpointTime --> YELLOW + // Else --> Green + + if app.Status.VersionStatuses[statusIndex].JobStatus.State == v1beta1.Failing || + time.Since(app.Status.VersionStatuses[statusIndex].JobStatus.LastFailingTime.Time) < failingIntervalThreshold || + verticesInCreated > 0 { + app.Status.VersionStatuses[statusIndex].JobStatus.Health = v1beta1.Red + } else if time.Since(time.Unix(int64(lastCheckpointAgeSeconds), 0)) < maxCheckpointTime || + runningTasks < totalTasks { + app.Status.VersionStatuses[statusIndex].JobStatus.Health = v1beta1.Yellow + } else { + app.Status.VersionStatuses[statusIndex].JobStatus.Health = v1beta1.Green + } + // Update LastFailingTime + if app.Status.VersionStatuses[statusIndex].JobStatus.State == v1beta1.Failing { + currTime := metav1.Now() + app.Status.VersionStatuses[statusIndex].JobStatus.LastFailingTime = &currTime + } + isEqual = isEqual || !apiequality.Semantic.DeepEqual(oldJobStatus, app.Status.VersionStatuses[statusIndex].JobStatus) + } + return isEqual, err } diff --git a/pkg/controller/flink/flink_test.go b/pkg/controller/flink/flink_test.go index 381fffd2..57e4c575 100644 --- a/pkg/controller/flink/flink_test.go +++ b/pkg/controller/flink/flink_test.go @@ -67,12 +67,7 @@ func getFlinkTestApp() v1beta1.FlinkApplication { app.Spec.Parallelism = 8 app.Name = testAppName app.Namespace = testNamespace - statuses := append(app.Status.VersionStatuses, v1beta1.FlinkApplicationVersionStatus{ - JobStatus: v1beta1.FlinkJobStatus{ - JobID: testJobID, - }, - }) - app.Status.VersionStatuses = statuses + app.Status.JobStatus.JobID = testJobID app.Spec.Image = testImage app.Spec.FlinkVersion = testFlinkVersion return app @@ -598,10 +593,10 @@ func TestFindExternalizedCheckpoint(t *testing.T) { func TestFindExternalizedCheckpointFromStatus(t *testing.T) { flinkControllerForTest := getTestFlinkController() flinkApp := getFlinkTestApp() - flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].JobStatus.JobID = "jobid" - flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].JobStatus.LastCheckpointPath = "/tmp/checkpoint" + flinkApp.Status.JobStatus.JobID = "jobid" + flinkApp.Status.JobStatus.LastCheckpointPath = "/tmp/checkpoint" checkpointTime := metaV1.Now() - flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].JobStatus.LastCheckpointTime = &checkpointTime + flinkApp.Status.JobStatus.LastCheckpointTime = &checkpointTime mockJmClient := flinkControllerForTest.flinkClient.(*clientMock.JobManagerClient) mockJmClient.GetLatestCheckpointFunc = func(ctx context.Context, url string, jobId string) (*client.CheckpointStatistics, error) { @@ -667,22 +662,22 @@ func TestClusterStatusUpdated(t *testing.T) { _, err = flinkControllerForTest.CompareAndUpdateClusterStatus(context.Background(), &flinkApp, "hash") assert.Nil(t, err) - assert.Equal(t, int32(1), flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].ClusterStatus.NumberOfTaskSlots) - assert.Equal(t, int32(0), flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].ClusterStatus.AvailableTaskSlots) - assert.Equal(t, int32(1), flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].ClusterStatus.HealthyTaskManagers) - assert.Equal(t, v1beta1.Green, flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].ClusterStatus.Health) - assert.Equal(t, "app-name.lyft.xyz/#/overview", flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].ClusterStatus.ClusterOverviewURL) + assert.Equal(t, int32(1), flinkApp.Status.ClusterStatus.NumberOfTaskSlots) + assert.Equal(t, int32(0), flinkApp.Status.ClusterStatus.AvailableTaskSlots) + assert.Equal(t, int32(1), flinkApp.Status.ClusterStatus.HealthyTaskManagers) + assert.Equal(t, v1beta1.Green, flinkApp.Status.ClusterStatus.Health) + assert.Equal(t, "app-name.lyft.xyz/#/overview", flinkApp.Status.ClusterStatus.ClusterOverviewURL) } func TestNoClusterStatusChange(t *testing.T) { flinkControllerForTest := getTestFlinkController() flinkApp := getFlinkTestApp() - flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].ClusterStatus.NumberOfTaskSlots = int32(1) - flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].ClusterStatus.AvailableTaskSlots = int32(0) - flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].ClusterStatus.HealthyTaskManagers = int32(1) - flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].ClusterStatus.Health = v1beta1.Green - flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].ClusterStatus.NumberOfTaskManagers = int32(1) + flinkApp.Status.ClusterStatus.NumberOfTaskSlots = int32(1) + flinkApp.Status.ClusterStatus.AvailableTaskSlots = int32(0) + flinkApp.Status.ClusterStatus.HealthyTaskManagers = int32(1) + flinkApp.Status.ClusterStatus.Health = v1beta1.Green + flinkApp.Status.ClusterStatus.NumberOfTaskManagers = int32(1) mockK8Cluster := flinkControllerForTest.k8Cluster.(*k8mock.K8Cluster) mockK8Cluster.GetDeploymentsWithLabelFunc = func(ctx context.Context, namespace string, labelMap map[string]string) (*v1.DeploymentList, error) { tmDeployment := FetchTaskMangerDeploymentCreateObj(&flinkApp, testAppHash) @@ -771,10 +766,10 @@ func TestHealthyTaskmanagers(t *testing.T) { _, err := flinkControllerForTest.CompareAndUpdateClusterStatus(context.Background(), &flinkApp, hash) assert.Nil(t, err) - assert.Equal(t, int32(1), flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].ClusterStatus.NumberOfTaskSlots) - assert.Equal(t, int32(0), flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].ClusterStatus.AvailableTaskSlots) - assert.Equal(t, int32(0), flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].ClusterStatus.HealthyTaskManagers) - assert.Equal(t, v1beta1.Yellow, flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].ClusterStatus.Health) + assert.Equal(t, int32(1), flinkApp.Status.ClusterStatus.NumberOfTaskSlots) + assert.Equal(t, int32(0), flinkApp.Status.ClusterStatus.AvailableTaskSlots) + assert.Equal(t, int32(0), flinkApp.Status.ClusterStatus.HealthyTaskManagers) + assert.Equal(t, v1beta1.Yellow, flinkApp.Status.ClusterStatus.Health) } @@ -831,26 +826,26 @@ func TestJobStatusUpdated(t *testing.T) { }, nil } - flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].JobStatus.JobID = "abc" + flinkApp.Status.JobStatus.JobID = "abc" expectedTime := metaV1.NewTime(time.Unix(startTime/1000, 0)) _, err = flinkControllerForTest.CompareAndUpdateJobStatus(context.Background(), &flinkApp, "hash") assert.Nil(t, err) - assert.Equal(t, v1beta1.Running, flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].JobStatus.State) - assert.Equal(t, &expectedTime, flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].JobStatus.StartTime) - assert.Equal(t, v1beta1.Yellow, flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].JobStatus.Health) + assert.Equal(t, v1beta1.Running, flinkApp.Status.JobStatus.State) + assert.Equal(t, &expectedTime, flinkApp.Status.JobStatus.StartTime) + assert.Equal(t, v1beta1.Yellow, flinkApp.Status.JobStatus.Health) - assert.Equal(t, int32(0), flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].JobStatus.FailedCheckpointCount) - assert.Equal(t, int32(4), flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].JobStatus.CompletedCheckpointCount) - assert.Equal(t, int32(1), flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].JobStatus.JobRestartCount) - assert.Equal(t, &expectedTime, flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].JobStatus.RestoreTime) + assert.Equal(t, int32(0), flinkApp.Status.JobStatus.FailedCheckpointCount) + assert.Equal(t, int32(4), flinkApp.Status.JobStatus.CompletedCheckpointCount) + assert.Equal(t, int32(1), flinkApp.Status.JobStatus.JobRestartCount) + assert.Equal(t, &expectedTime, flinkApp.Status.JobStatus.RestoreTime) - assert.Equal(t, "/test/externalpath", flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].JobStatus.RestorePath) - assert.Equal(t, &expectedTime, flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].JobStatus.LastCheckpointTime) - assert.Equal(t, "app-name.lyft.xyz/#/jobs/abc", flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].JobStatus.JobOverviewURL) + assert.Equal(t, "/test/externalpath", flinkApp.Status.JobStatus.RestorePath) + assert.Equal(t, &expectedTime, flinkApp.Status.JobStatus.LastCheckpointTime) + assert.Equal(t, "app-name.lyft.xyz/#/jobs/abc", flinkApp.Status.JobStatus.JobOverviewURL) - assert.Equal(t, int32(2), flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].JobStatus.RunningTasks) - assert.Equal(t, int32(7), flinkApp.Status.VersionStatuses[getCurrentStatusIndex(&flinkApp)].JobStatus.TotalTasks) + assert.Equal(t, int32(2), flinkApp.Status.JobStatus.RunningTasks) + assert.Equal(t, int32(7), flinkApp.Status.JobStatus.TotalTasks) } @@ -865,16 +860,16 @@ func TestNoJobStatusChange(t *testing.T) { app1 := getFlinkTestApp() mockJmClient := flinkControllerForTest.flinkClient.(*clientMock.JobManagerClient) - app1.Status.VersionStatuses[getCurrentStatusIndex(&app1)].JobStatus.State = v1beta1.Running - app1.Status.VersionStatuses[getCurrentStatusIndex(&app1)].JobStatus.StartTime = &metaTime - app1.Status.VersionStatuses[getCurrentStatusIndex(&app1)].JobStatus.LastCheckpointTime = &metaTime - app1.Status.VersionStatuses[getCurrentStatusIndex(&app1)].JobStatus.CompletedCheckpointCount = int32(4) - app1.Status.VersionStatuses[getCurrentStatusIndex(&app1)].JobStatus.JobRestartCount = int32(1) - app1.Status.VersionStatuses[getCurrentStatusIndex(&app1)].JobStatus.FailedCheckpointCount = int32(0) - app1.Status.VersionStatuses[getCurrentStatusIndex(&app1)].JobStatus.Health = v1beta1.Green - app1.Status.VersionStatuses[getCurrentStatusIndex(&app1)].JobStatus.RestoreTime = &metaTime - app1.Status.VersionStatuses[getCurrentStatusIndex(&app1)].JobStatus.RestorePath = "/test/externalpath" - app1.Status.VersionStatuses[getCurrentStatusIndex(&app1)].JobStatus.JobOverviewURL = "" + app1.Status.JobStatus.State = v1beta1.Running + app1.Status.JobStatus.StartTime = &metaTime + app1.Status.JobStatus.LastCheckpointTime = &metaTime + app1.Status.JobStatus.CompletedCheckpointCount = int32(4) + app1.Status.JobStatus.JobRestartCount = int32(1) + app1.Status.JobStatus.FailedCheckpointCount = int32(0) + app1.Status.JobStatus.Health = v1beta1.Green + app1.Status.JobStatus.RestoreTime = &metaTime + app1.Status.JobStatus.RestorePath = "/test/externalpath" + app1.Status.JobStatus.JobOverviewURL = "" mockJmClient.GetJobOverviewFunc = func(ctx context.Context, url string, jobID string) (*client.FlinkJobOverview, error) { assert.Equal(t, url, "http://app-name-hash.ns:8081") @@ -917,8 +912,8 @@ func TestGetAndUpdateJobStatusHealth(t *testing.T) { app1 := getFlinkTestApp() mockJmClient := flinkControllerForTest.flinkClient.(*clientMock.JobManagerClient) - app1.Status.VersionStatuses[getCurrentStatusIndex(&app1)].JobStatus.State = v1beta1.Failing - app1.Status.VersionStatuses[getCurrentStatusIndex(&app1)].JobStatus.LastFailingTime = &lastFailedTime + app1.Status.JobStatus.State = v1beta1.Failing + app1.Status.JobStatus.LastFailingTime = &lastFailedTime mockJmClient.GetJobOverviewFunc = func(ctx context.Context, url string, jobID string) (*client.FlinkJobOverview, error) { assert.Equal(t, url, "http://app-name-hash.ns:8081") @@ -943,7 +938,7 @@ func TestGetAndUpdateJobStatusHealth(t *testing.T) { assert.Nil(t, err) // Job is in a RUNNING state but was in a FAILING state in the last 1 minute, so we expect // JobStatus.Health to be Red - assert.Equal(t, app1.Status.VersionStatuses[getCurrentStatusIndex(&app1)].JobStatus.Health, v1beta1.Red) + assert.Equal(t, app1.Status.JobStatus.Health, v1beta1.Red) } diff --git a/pkg/controller/flink/mock/mock_flink.go b/pkg/controller/flink/mock/mock_flink.go index 88678d4b..15b64c91 100644 --- a/pkg/controller/flink/mock/mock_flink.go +++ b/pkg/controller/flink/mock/mock_flink.go @@ -173,48 +173,63 @@ func (m *FlinkController) GetLatestClusterStatus(ctx context.Context, applicatio if m.GetLatestClusterStatusFunc != nil { return m.GetLatestClusterStatusFunc(ctx, application) } - - return application.Status.VersionStatuses[getCurrentStatusIndex(application)].ClusterStatus + if v1beta1.IsBlueGreenDeploymentMode(application.Spec.DeploymentMode) { + return application.Status.VersionStatuses[getCurrentStatusIndex(application)].ClusterStatus + } + return application.Status.ClusterStatus } func (m *FlinkController) GetLatestJobStatus(ctx context.Context, application *v1beta1.FlinkApplication) v1beta1.FlinkJobStatus { if m.GetLatestClusterStatusFunc != nil { return m.GetLatestJobStatusFunc(ctx, application) } - - return application.Status.VersionStatuses[getCurrentStatusIndex(application)].JobStatus + if v1beta1.IsBlueGreenDeploymentMode(application.Spec.DeploymentMode) { + return application.Status.VersionStatuses[getCurrentStatusIndex(application)].JobStatus + } + return application.Status.JobStatus } func (m *FlinkController) GetLatestJobID(ctx context.Context, application *v1beta1.FlinkApplication) string { if m.GetLatestClusterStatusFunc != nil { return m.GetLatestJobIDFunc(ctx, application) } - - return application.Status.VersionStatuses[getCurrentStatusIndex(application)].JobStatus.JobID + if v1beta1.IsBlueGreenDeploymentMode(application.Spec.DeploymentMode) { + return application.Status.VersionStatuses[getCurrentStatusIndex(application)].JobStatus.JobID + } + return application.Status.JobStatus.JobID } func (m *FlinkController) UpdateLatestJobID(ctx context.Context, application *v1beta1.FlinkApplication, jobID string) { if m.UpdateLatestJobIDFunc != nil { m.UpdateLatestJobIDFunc(ctx, application, jobID) } - - application.Status.VersionStatuses[getCurrentStatusIndex(application)].JobStatus.JobID = jobID + if v1beta1.IsBlueGreenDeploymentMode(application.Spec.DeploymentMode) { + application.Status.VersionStatuses[getCurrentStatusIndex(application)].JobStatus.JobID = jobID + return + } + application.Status.JobStatus.JobID = jobID } func (m *FlinkController) UpdateLatestJobStatus(ctx context.Context, application *v1beta1.FlinkApplication, jobStatus v1beta1.FlinkJobStatus) { if m.UpdateLatestJobStatusFunc != nil { m.UpdateLatestJobStatusFunc(ctx, application, jobStatus) } - - application.Status.VersionStatuses[getCurrentStatusIndex(application)].JobStatus = jobStatus + if v1beta1.IsBlueGreenDeploymentMode(application.Spec.DeploymentMode) { + application.Status.VersionStatuses[getCurrentStatusIndex(application)].JobStatus = jobStatus + return + } + application.Status.JobStatus = jobStatus } func (m *FlinkController) UpdateLatestClusterStatus(ctx context.Context, application *v1beta1.FlinkApplication, clusterStatus v1beta1.FlinkClusterStatus) { if m.UpdateLatestClusterStatusFunc != nil { m.UpdateLatestClusterStatusFunc(ctx, application, clusterStatus) } - - application.Status.VersionStatuses[getCurrentStatusIndex(application)].ClusterStatus = clusterStatus + if v1beta1.IsBlueGreenDeploymentMode(application.Spec.DeploymentMode) { + application.Status.VersionStatuses[getCurrentStatusIndex(application)].ClusterStatus = clusterStatus + return + } + application.Status.ClusterStatus = clusterStatus } func getCurrentStatusIndex(app *v1beta1.FlinkApplication) int32 { diff --git a/pkg/controller/flinkapplication/flink_state_machine.go b/pkg/controller/flinkapplication/flink_state_machine.go index c23c07d3..20eae187 100644 --- a/pkg/controller/flinkapplication/flink_state_machine.go +++ b/pkg/controller/flinkapplication/flink_state_machine.go @@ -297,25 +297,22 @@ func (s *FlinkStateMachine) handleClusterStarting(ctx context.Context, applicati func (s *FlinkStateMachine) initializeAppStatusIfEmpty(ctx context.Context, application *v1beta1.FlinkApplication) { // initialize the app status array to include 2 status elements in case of blue green deploys // else use a one element array - arraySize := 1 - if application.Spec.DeploymentMode == v1beta1.DeploymentModeBlueGreen { - arraySize = 2 - } - - if len(application.Status.VersionStatuses) == 0 { - application.Status.VersionStatuses = make([]v1beta1.FlinkApplicationVersionStatus, arraySize) - } - - // If we're reading a v1beta1 app, populate the first element of the status array from - // the top-level jobStatus and clusterStatus - if application.Status.JobStatus != (v1beta1.FlinkJobStatus{}) { - s.flinkController.UpdateLatestJobStatus(ctx, application, application.Status.JobStatus) - application.Status.JobStatus = v1beta1.FlinkJobStatus{} - } + if v1beta1.IsBlueGreenDeploymentMode(application.Spec.DeploymentMode) { + application.Status.VersionStatuses = make([]v1beta1.FlinkApplicationVersionStatus, v1beta1.GetMaxRunningJobs(application.Spec.DeploymentMode)) + + // If an application is moving from a Dual to BlueGreen deployment mode, + // We pre-populate the version statuses array with the current Job and Cluster Status + // And reset top-level ClusterStatus and JobStatus to empty structs + // as they'll no longer get updated + if application.Status.JobStatus != (v1beta1.FlinkJobStatus{}) { + s.flinkController.UpdateLatestJobStatus(ctx, application, application.Status.JobStatus) + application.Status.JobStatus = v1beta1.FlinkJobStatus{} + } - if application.Status.ClusterStatus != (v1beta1.FlinkClusterStatus{}) { - s.flinkController.UpdateLatestClusterStatus(ctx, application, application.Status.ClusterStatus) - application.Status.ClusterStatus = v1beta1.FlinkClusterStatus{} + if application.Status.ClusterStatus != (v1beta1.FlinkClusterStatus{}) { + s.flinkController.UpdateLatestClusterStatus(ctx, application, application.Status.ClusterStatus) + application.Status.ClusterStatus = v1beta1.FlinkClusterStatus{} + } } } From 007340192f2b76ca184157dc8f2d75d3662b0552 Mon Sep 17 00:00:00 2001 From: glaksh100 Date: Thu, 19 Mar 2020 14:11:49 -0700 Subject: [PATCH 37/41] Remove unwarranted changes --- config/config.yaml | 2 +- deploy/crd.yaml | 14 +++++--------- deploy/role-binding.yaml | 2 +- pkg/controller/flink/flink.go | 31 +++++++++++++++++++++++++------ tmp/codegen/update-generated.sh | 1 - 5 files changed, 32 insertions(+), 18 deletions(-) diff --git a/config/config.yaml b/config/config.yaml index 12040969..371f0756 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -1,3 +1,3 @@ -apiVersion: flink.k8s.io/v1beta2 +apiVersion: flink.k8s.io/v1beta1 kind: FlinkApplication projectName: flinkk8soperator diff --git a/deploy/crd.yaml b/deploy/crd.yaml index 5bd941ae..4be01739 100644 --- a/deploy/crd.yaml +++ b/deploy/crd.yaml @@ -447,30 +447,26 @@ spec: type: string description: The current state machine phase for this FlinkApplication JSONPath: .status.phase - - name: Application Version - type: string - description: The version of the Flink cluster - JSONPath: .status.versionStatuses[*].version - name: Cluster Health type: string description: The health of the Flink cluster - JSONPath: .status.versionStatuses[*].clusterStatus.health + JSONPath: .status.clusterStatus.health - name: Job Health type: string description: The health of the Flink job - JSONPath: .status.versionStatuses[*].jobStatus.health + JSONPath: .status.jobStatus.health - name: Healthy TMs type: string - JSONPath: .status.versionStatuses[*].clusterStatus.healthyTaskManagers + JSONPath: ".status.clusterStatus.healthyTaskManagers" priority: 1 - name: Total TMs type: string - JSONPath: .status.versionStatuses[*].clusterStatus.numberOfTaskManagers + JSONPath: ".status.clusterStatus.numberOfTaskManagers" priority: 1 - name: Job Restarts type: integer description: Number of times the job has restarted - JSONPath: .status.versionStatuses[*].jobStatus.jobRestartCount + JSONPath: .status.jobStatus.jobRestartCount - name: Age type: date JSONPath: .metadata.creationTimestamp diff --git a/deploy/role-binding.yaml b/deploy/role-binding.yaml index 163455f5..e46a1f91 100644 --- a/deploy/role-binding.yaml +++ b/deploy/role-binding.yaml @@ -1,6 +1,6 @@ # Create a binding from Role -> ServiceAccount kind: ClusterRoleBinding -apiVersion: rbac.authorization.k8s.io/v1beta2 +apiVersion: rbac.authorization.k8s.io/v1beta1 metadata: name: flinkoperator roleRef: diff --git a/pkg/controller/flink/flink.go b/pkg/controller/flink/flink.go index 678df7de..e8f36ed6 100644 --- a/pkg/controller/flink/flink.go +++ b/pkg/controller/flink/flink.go @@ -650,16 +650,35 @@ func (f *Controller) CompareAndUpdateJobStatus(ctx context.Context, app *v1beta1 return !apiequality.Semantic.DeepEqual(oldJobStatus, app.Status.JobStatus), err } +// Only used with the BlueGreen DeploymentMode +// A method to identify the current VersionStatus func getCurrentStatusIndex(app *v1beta1.FlinkApplication) int32 { - // In the Running phase, we always have only 1 job - if v1beta1.IsRunningPhase(app.Status.Phase) { + // The current VersionStatus is the first (or earlier) version when + // 1. The application is a Running phase and there's only one job running + // 2. First deploy ever + // 3. When the savepoint is being taken on the existing job + if v1beta1.IsRunningPhase(app.Status.Phase) || app.Status.DeployHash == "" || + app.Status.Phase == v1beta1.FlinkApplicationSavepointing { return 0 } - // In every other state, we either have - // Dual mode --> One Application status object - // BlueGreen mode --> Two Application status objects - return v1beta1.GetMaxRunningJobs(app.Spec.DeploymentMode) - indexOffset + if app.Status.Phase == v1beta1.FlinkApplicationDualRunning { + return 1 + } + + // activeJobs and maxRunningJobs would be different once a Teardown has happened and + // the app has moved back to a Running state. + activeJobs := int32(len(app.Status.VersionStatuses)) + maxRunningJobs := v1beta1.GetMaxRunningJobs(app.Spec.DeploymentMode) + index := Min(activeJobs, maxRunningJobs) - indexOffset + return index +} + +func Min(x, y int32) int32 { + if x < y { + return x + } + return y } func (f *Controller) GetLatestClusterStatus(ctx context.Context, application *v1beta1.FlinkApplication) v1beta1.FlinkClusterStatus { diff --git a/tmp/codegen/update-generated.sh b/tmp/codegen/update-generated.sh index ae656b5b..1bbdf169 100755 --- a/tmp/codegen/update-generated.sh +++ b/tmp/codegen/update-generated.sh @@ -10,4 +10,3 @@ github.com/lyft/flinkk8soperator/pkg/client \ github.com/lyft/flinkk8soperator/pkg/apis \ app:v1beta1 \ --go-header-file "./tmp/codegen/boilerplate.go.txt" - From 527fab0504aae2289c0a25a0113bead884d64c4e Mon Sep 17 00:00:00 2001 From: glaksh100 Date: Tue, 24 Mar 2020 09:29:54 -0700 Subject: [PATCH 38/41] Make import name more descriptive --- pkg/controller/flink/job_manager_controller_test.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/controller/flink/job_manager_controller_test.go b/pkg/controller/flink/job_manager_controller_test.go index 7e54de54..fe5376ae 100644 --- a/pkg/controller/flink/job_manager_controller_test.go +++ b/pkg/controller/flink/job_manager_controller_test.go @@ -3,7 +3,7 @@ package flink import ( "testing" - v1beta12 "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta1" + flinkapp "github.com/lyft/flinkk8soperator/pkg/apis/app/v1beta1" "github.com/lyft/flinkk8soperator/pkg/controller/config" @@ -46,7 +46,7 @@ func TestGetJobManagerPodName(t *testing.T) { func TestGetJobManagerPodNameWithVersion(t *testing.T) { app := getFlinkTestApp() - app.Spec.DeploymentMode = v1beta12.DeploymentModeBlueGreen + app.Spec.DeploymentMode = flinkapp.DeploymentModeBlueGreen app.Status.UpdatingVersion = testVersion assert.Equal(t, "app-name-"+testAppHash+"-jm-"+testVersion+"-pod", getJobManagerPodName(&app, testAppHash)) } @@ -313,7 +313,7 @@ func TestJobManagerCreateSuccessWithVersion(t *testing.T) { app.Spec.JarName = testJarName app.Spec.EntryClass = testEntryClass app.Spec.ProgramArgs = testProgramArgs - app.Spec.DeploymentMode = v1beta12.DeploymentModeBlueGreen + app.Spec.DeploymentMode = flinkapp.DeploymentModeBlueGreen app.Status.UpdatingVersion = testVersion annotations := map[string]string{ "key": "annotation", From 719e881b5843cd41d3e784a2382cd99fbcd8ddf7 Mon Sep 17 00:00:00 2001 From: glaksh100 Date: Tue, 24 Mar 2020 09:31:10 -0700 Subject: [PATCH 39/41] Revert file name to add_v1beta1 --- pkg/apis/app/{addtoscheme_v1beta2.go => addtoscheme_v1beta1.go} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename pkg/apis/app/{addtoscheme_v1beta2.go => addtoscheme_v1beta1.go} (100%) diff --git a/pkg/apis/app/addtoscheme_v1beta2.go b/pkg/apis/app/addtoscheme_v1beta1.go similarity index 100% rename from pkg/apis/app/addtoscheme_v1beta2.go rename to pkg/apis/app/addtoscheme_v1beta1.go From 578a82be3eb7526db30a769b768efdb3597bbdc3 Mon Sep 17 00:00:00 2001 From: glaksh100 Date: Thu, 26 Mar 2020 08:46:30 -0700 Subject: [PATCH 40/41] Remove an unnecessary diff --- pkg/controller/flinkapplication/flink_state_machine.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/controller/flinkapplication/flink_state_machine.go b/pkg/controller/flinkapplication/flink_state_machine.go index 20eae187..59f999b3 100644 --- a/pkg/controller/flinkapplication/flink_state_machine.go +++ b/pkg/controller/flinkapplication/flink_state_machine.go @@ -244,6 +244,7 @@ func (s *FlinkStateMachine) handleNewOrUpdating(ctx context.Context, application logger.Errorf(ctx, "Cluster creation failed with error: %v", err) return statusUnchanged, err } + s.updateApplicationPhase(application, v1beta1.FlinkApplicationClusterStarting) return statusChanged, nil } @@ -319,7 +320,6 @@ func (s *FlinkStateMachine) initializeAppStatusIfEmpty(ctx context.Context, appl func (s *FlinkStateMachine) handleApplicationSavepointing(ctx context.Context, application *v1beta1.FlinkApplication) (bool, error) { // we've already savepointed (or this is our first deploy), continue on if application.Status.SavepointPath != "" || application.Status.DeployHash == "" { - s.flinkController.UpdateLatestJobID(ctx, application, "") s.updateApplicationPhase(application, v1beta1.FlinkApplicationSubmittingJob) return statusChanged, nil } From 0b381e25e15ce89a4fa647a970916f23ee0398a8 Mon Sep 17 00:00:00 2001 From: glaksh100 Date: Thu, 26 Mar 2020 08:51:06 -0700 Subject: [PATCH 41/41] Remove an unnecessary diff --- pkg/controller/flinkapplication/flink_state_machine.go | 5 ----- 1 file changed, 5 deletions(-) diff --git a/pkg/controller/flinkapplication/flink_state_machine.go b/pkg/controller/flinkapplication/flink_state_machine.go index 59f999b3..8ecd201d 100644 --- a/pkg/controller/flinkapplication/flink_state_machine.go +++ b/pkg/controller/flinkapplication/flink_state_machine.go @@ -545,11 +545,6 @@ func (s *FlinkStateMachine) handleSubmittingJob(ctx context.Context, app *v1beta logger.Errorf(ctx, "Updating cluster status failed with error: %v", clusterErr) } - // Reset jobId if for some reason it's populated but there are no jobs running - jobs, _ := s.flinkController.GetJobsForApplication(ctx, app, hash) - if s.flinkController.GetLatestJobID(ctx, app) != "" && len(flink.GetActiveFlinkJobs(jobs)) == 0 { - s.flinkController.UpdateLatestJobID(ctx, app, "") - } if s.flinkController.GetLatestJobID(ctx, app) == "" { savepointPath := "" if app.Status.DeployHash == "" {