Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cmd/bosun: (wip) link previous incidents and next incident #2323

Merged
merged 2 commits into from
Oct 3, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions cmd/bosun/conf/rule/rule.go
Original file line number Diff line number Diff line change
Expand Up @@ -424,6 +424,9 @@ var defaultFuncs = template.FuncMap{
}
return &d
},
"append": func(a []interface{}, b interface{}) interface{} {
return append(a, b)
},
"makeSlice": func(vals ...interface{}) interface{} {
return vals
},
Expand Down
58 changes: 58 additions & 0 deletions cmd/bosun/database/migrate.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package database

import (
"encoding/json"
"sort"

"bosun.org/models"
"bosun.org/slog"
Expand All @@ -25,6 +26,11 @@ var tasks = []Migration{
Task: migrateRenderedTemplates,
Version: 1,
},
{
UID: "Populate Previous IncidentIds",
Task: populatePreviousIncidents,
Version: 2,
},
}

type oldIncidentState struct {
Expand Down Expand Up @@ -75,6 +81,58 @@ func migrateRenderedTemplates(d *dataAccess) error {
return nil
}

func populatePreviousIncidents(d *dataAccess) error {
slog.Infoln("Adding fields for previous incidents and next incident on all incidents in order to link incidents together. This is a one time operation that can take several minutes.")

ids, err := d.getAllIncidentIdsByKeys()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Possibly collect Alert Keys up front, and run inner loop once per alert key, not once per incident.

slog.Infof("migrating %v incidents", len(ids))
if err != nil {
return err
}

conn := d.Get()
defer conn.Close()

prevIdCache := make(map[models.AlertKey]*[]int64)

for _, id := range ids {
incident, err := d.State().GetIncidentState(id)
if err != nil {
return err
}
if _, ok := prevIdCache[incident.AlertKey]; !ok {
prevList, err := d.State().GetAllIncidentIdsByAlertKey(incident.AlertKey)
if err != nil {
return err
}
sort.Slice(prevList, func(i, j int) bool {
return prevList[i] < prevList[j]
})
prevIdCache[incident.AlertKey] = &prevList
}
for _, pid := range *prevIdCache[incident.AlertKey] {
if incident.Id > pid {
incident.PreviousIds = append([]int64{pid}, incident.PreviousIds...)
continue
}
break
}

err = d.setIncident(incident, conn)
if err != nil {
return err
}
if len(incident.PreviousIds) > 0 {
err := d.State().SetIncidentNext(incident.PreviousIds[0], incident.Id)
if err != nil {
return err
}
}

}
return nil
}

func (d *dataAccess) Migrate() error {
slog.Infoln("checking migrations")
conn := d.Get()
Expand Down
44 changes: 44 additions & 0 deletions cmd/bosun/database/state_data.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,14 @@ type StateDataAccess interface {
GetIncidentState(incidentId int64) (*models.IncidentState, error)

GetAllIncidentsByAlertKey(ak models.AlertKey) ([]*models.IncidentState, error)
GetAllIncidentIdsByAlertKey(ak models.AlertKey) ([]int64, error)

UpdateIncidentState(s *models.IncidentState) (int64, error)
ImportIncidentState(s *models.IncidentState) error

// SetIncidentNext gets the incident for previousIncidentId, and sets its NextId field to be nextIncidentId and then saves the incident
SetIncidentNext(incidentId, nextIncidentId int64) error

SetRenderedTemplates(incidentId int64, rt *models.RenderedTemplates) error
GetRenderedTemplates(incidentId int64) (*models.RenderedTemplates, error)
GetRenderedTemplateKeys() ([]string, error)
Expand Down Expand Up @@ -259,6 +263,17 @@ func (d *dataAccess) GetAllIncidentsByAlertKey(ak models.AlertKey) ([]*models.In
return d.incidentMultiGet(conn, ids)
}

func (d *dataAccess) GetAllIncidentIdsByAlertKey(ak models.AlertKey) ([]int64, error) {
conn := d.Get()
defer conn.Close()

ids, err := int64s(conn.Do("LRANGE", incidentsForAlertKeyKey(ak), 0, -1))
if err != nil {
return nil, slog.Wrap(err)
}
return ids, nil
}

// In general one should not use the redis KEYS command. So this is only used
// in migration. If we want to use a proper index of all incidents
// then issues with allIncidents must be fixed. Currently it is planned
Expand Down Expand Up @@ -318,12 +333,41 @@ func (d *dataAccess) getIncident(incidentId int64, conn redis.Conn) (*models.Inc
return state, nil
}

// setIncident directly sets the incident as is to the datastore
func (d *dataAccess) setIncident(incident *models.IncidentState, conn redis.Conn) error {
data, err := json.Marshal(incident)
if err != nil {
return slog.Wrap(err)
}
if _, err = conn.Do("SET", incidentStateKey(incident.Id), data); err != nil {
return err
}
return nil
}

func (d *dataAccess) GetIncidentState(incidentId int64) (*models.IncidentState, error) {
conn := d.Get()
defer conn.Close()
return d.getIncident(incidentId, conn)
}

// SetIncidentNext gets the incident for previousIncidentId, and sets its NextId field
// to be nextIncidentId and then saves the incident
func (d *dataAccess) SetIncidentNext(previousIncidentId, nextIncidentId int64) error {
conn := d.Get()
defer conn.Close()
previousIncident, err := d.getIncident(previousIncidentId, conn)
if err != nil {
return err
}
previousIncident.NextId = nextIncidentId
err = d.setIncident(previousIncident, conn)
if err != nil {
return err
}
return nil
}

func (d *dataAccess) UpdateIncidentState(s *models.IncidentState) (int64, error) {
return d.save(s, false)
}
Expand Down
27 changes: 26 additions & 1 deletion cmd/bosun/sched/check.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package sched
import (
"fmt"
"math"
"sort"
"time"

"bosun.org/cmd/bosun/cache"
Expand Down Expand Up @@ -244,10 +245,34 @@ func (s *Schedule) runHistory(r *RunHistory, ak models.AlertKey, event *models.E
if a.Log || silencedOrIgnored(a, event, si) {
//a log or silenced/ignored alert will not need to be saved
} else {
incident.Id, err = s.DataAccess.State().UpdateIncidentState(incident)
daState := s.DataAccess.State()
incident.Id, err = daState.UpdateIncidentState(incident)
if err != nil {
return
}
previousIds := []int64{}
previousIds, err = daState.GetAllIncidentIdsByAlertKey(ak)
if err != nil {
return
}
for _, id := range previousIds {
if incident.Id > id {
incident.PreviousIds = append(incident.PreviousIds, id)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wait, we're storing all previous alert keys on every alert? Gross. Why?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@captncraig To make it easier to either:

  1. Display more than just the previous incident in a view
  2. Make it easier to iterate over all previous incidents in templates without having to follow a linked list.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also to clarify not the previous alert keys, just an array of incident id numbers (int64)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Another Also :P alert keys are the alert name plus the tagset (not all alerts under the name, unless the tagset is {} or of len 1. So the list generally should not be crazy long.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess I'm just uncomfortable anytime I see potentially large, and also redundant, and also repetitive collections or lists.

I guess though I'd rather a template could show them all without having to recursively traverse the chain, which was gonna be the alternative I suggested. So ok.

}
}
sort.Slice(incident.PreviousIds, func(i, j int) bool {
return incident.PreviousIds[i] > incident.PreviousIds[j]
})
_, err = daState.UpdateIncidentState(incident)
if err != nil {
return
}
if len(incident.PreviousIds) > 0 {
err = daState.SetIncidentNext(incident.PreviousIds[0], incident.Id)
if err != nil {
return
}
}
}
}

Expand Down
11 changes: 11 additions & 0 deletions cmd/bosun/sched/template.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,17 @@ func (c *Context) Last() interface{} {
}{c.IncidentState.Last(), c.Id}
}

// GetIncidentState returns an IncidentState so users can
// include information about previous or other Incidents in alert notifications
func (c *Context) GetIncidentState(id int64) *models.IncidentState {
is, err := c.schedule.DataAccess.State().GetIncidentState(id)
if err != nil {
c.addError(err)
return nil
}
return is
}

// Expr takes an expression in the form of a string, changes the tags to
// match the context of the alert, and returns a link to the expression page.
func (c *Context) Expr(v string) string {
Expand Down
6 changes: 6 additions & 0 deletions cmd/bosun/web/expr.go
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,12 @@ func procRule(t miniprofiler.Timer, ruleConf conf.RuleConfProvider, a *conf.Aler
}
var errs []error
primaryIncident.Id = int64(incidentID)
// See if the incidentID corresponds to a real Incident, and if so
// get some information from the real incident
if realIncident, err := schedule.DataAccess.State().GetIncidentState(primaryIncident.Id); err == nil {
primaryIncident.PreviousIds = realIncident.PreviousIds
}

primaryIncident.Start = time.Now().UTC()
primaryIncident.CurrentStatus = e.Status
primaryIncident.LastAbnormalStatus = e.Status
Expand Down
Loading