Skip to content

Commit

Permalink
fixed reip error before start_db
Browse files Browse the repository at this point in the history
  • Loading branch information
cchen-vertica committed Jul 11, 2024
1 parent 2560667 commit ddd9ef8
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 9 deletions.
10 changes: 9 additions & 1 deletion pkg/controllers/vdb/podfacts.go
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,9 @@ type PodFact struct {
// true if the pod's spec includes a sidecar to run the NMA
hasNMASidecar bool

// true if NMA sidecar container is ready
isNMAContainerReady bool

// The name of the container to run exec commands on.
execContainerName string

Expand Down Expand Up @@ -389,6 +392,7 @@ func (p *PodFacts) collectPodByStsIndex(ctx context.Context, vdb *vapi.VerticaDB
return err
}
pf.hasNMASidecar = vk8s.HasNMAContainer(&pod.Spec)
pf.isNMAContainerReady = vk8s.IsNMAContainerReady(pod)
// we get the sandbox name from the sts labels if the subcluster
// belongs to a sandbox. If the node is up, we will later retrieve
// the sandbox state from the catalog
Expand Down Expand Up @@ -932,12 +936,16 @@ func (p *PodFacts) findInstalledPods() []*PodFact {

// findReIPPods returns a list of pod facts that may need their IPs to be refreshed with re-ip.
// An empty list implies there are no pods that match the criteria.
func (p *PodFacts) findReIPPods(chk dBCheckType) []*PodFact {
func (p *PodFacts) findReIPPods(chk dBCheckType, useVClusterOps bool) []*PodFact {
return p.filterPods(func(pod *PodFact) bool {
// Only consider running pods that exist and have an installation
if !pod.exists || !pod.isPodRunning || !pod.isInstalled {
return false
}
// NMA needs to be running before re-ip
if useVClusterOps && !pod.isNMAContainerReady {
return false
}
switch chk {
case dBCheckOnlyWithDBs:
return pod.dbExists
Expand Down
13 changes: 9 additions & 4 deletions pkg/controllers/vdb/podfacts_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -332,7 +332,7 @@ var _ = Describe("podfacts", func() {
It("should correctly return re-ip pods", func() {
pf := MakePodFacts(vdbRec, &cmds.FakePodRunner{}, logger, TestPassword)
pf.Detail[types.NamespacedName{Name: "p1"}] = &PodFact{
dnsName: "p1", vnodeName: "node1", dbExists: true, exists: true, isPodRunning: true, isInstalled: true,
dnsName: "p1", vnodeName: "node1", dbExists: true, exists: true, isPodRunning: true, isInstalled: true, isNMAContainerReady: true,
}
pf.Detail[types.NamespacedName{Name: "p2"}] = &PodFact{
dnsName: "p2", vnodeName: "node2", dbExists: false, exists: true, isPodRunning: true, isInstalled: true,
Expand Down Expand Up @@ -363,18 +363,23 @@ var _ = Describe("podfacts", func() {

func verifyReIP(pf *PodFacts) {
By("finding any installed pod")
pods := pf.findReIPPods(dBCheckAny)
pods := pf.findReIPPods(dBCheckAny, false)
Ω(pods).Should(HaveLen(2))
Ω(pods[0].dnsName).Should(Equal("p1"))
Ω(pods[1].dnsName).Should(Equal("p2"))

By("finding pods with a db")
pods = pf.findReIPPods(dBCheckOnlyWithDBs)
pods = pf.findReIPPods(dBCheckOnlyWithDBs, false)
Ω(pods).Should(HaveLen(1))
Ω(pods[0].dnsName).Should(Equal("p1"))

By("finding pods without a db")
pods = pf.findReIPPods(dBCheckOnlyWithoutDBs)
pods = pf.findReIPPods(dBCheckOnlyWithoutDBs, false)
Ω(pods).Should(HaveLen(1))
Ω(pods[0].dnsName).Should(Equal("p2"))

By("finding any installed pod that uses vclusterOps")
pods = pf.findReIPPods(dBCheckAny, true)
Ω(pods).Should(HaveLen(1))
Ω(pods[0].dnsName).Should(Equal("p1"))
}
13 changes: 9 additions & 4 deletions pkg/controllers/vdb/restart_reconciler.go
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,12 @@ func (r *RestartReconciler) reconcileCluster(ctx context.Context) (ctrl.Result,

// re_ip nodes. This is done ahead of the db check in case we need to update
// the IP of nodes that have been installed but not yet added to the db.
if res, err := r.reipNodes(ctx, r.getReIPPods(false)); verrors.IsReconcileAborted(res, err) {
reIPPods := r.getReIPPods(false)
if len(reIPPods) != len(r.PFacts.Detail) {
r.Log.Info("Not all pods are running. Need to requeue restart reconciler")
return ctrl.Result{Requeue: true}, nil
}
if res, err := r.reipNodes(ctx, reIPPods); verrors.IsReconcileAborted(res, err) {
return res, err
}

Expand Down Expand Up @@ -708,13 +713,13 @@ func (r *RestartReconciler) getReIPPods(isRestartNode bool) []*PodFact {
if vmeta.UseVClusterOps(r.Vdb.Annotations) {
return nil
}
return r.PFacts.findReIPPods(dBCheckOnlyWithoutDBs)
return r.PFacts.findReIPPods(dBCheckOnlyWithoutDBs, false)
}
// For cluster restart, we re-ip all nodes that have been added to the DB.
// And if using admintools, we also need to re-ip installed pods that
// haven't been added to the db to keep admintools.conf in-sync.
if vmeta.UseVClusterOps(r.Vdb.Annotations) {
return r.PFacts.findReIPPods(dBCheckOnlyWithDBs)
return r.PFacts.findReIPPods(dBCheckOnlyWithDBs, true)
}
return r.PFacts.findReIPPods(dBCheckAny)
return r.PFacts.findReIPPods(dBCheckAny, false)
}

0 comments on commit ddd9ef8

Please sign in to comment.