Skip to content

Commit

Permalink
add slack webhook alert plugin (#105)
Browse files Browse the repository at this point in the history
  • Loading branch information
LexLuthr committed Jul 19, 2024
1 parent 29fc9fa commit 2651fd7
Show file tree
Hide file tree
Showing 6 changed files with 220 additions and 4 deletions.
3 changes: 3 additions & 0 deletions alertmanager/plugin/plugin.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,5 +30,8 @@ func LoadAlertPlugins(cfg config.CurioAlertingConfig) []Plugin {
if cfg.PrometheusAlertManager.Enable {
plugins = append(plugins, NewPrometheusAlertManager(cfg.PrometheusAlertManager))
}
if cfg.SlackWebhook.Enable {
plugins = append(plugins, NewSlackWebhook(cfg.SlackWebhook))
}
return plugins
}
164 changes: 164 additions & 0 deletions alertmanager/plugin/slack_webhook.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
package plugin

import (
"bytes"
"encoding/json"
"fmt"
"io"
"net/http"
"time"

"github.com/samber/lo"
"golang.org/x/xerrors"

"github.com/filecoin-project/curio/deps/config"
)

type SlackWebhook struct {
cfg config.SlackWebhookConfig
}

func NewSlackWebhook(cfg config.SlackWebhookConfig) Plugin {
return &SlackWebhook{
cfg: cfg,
}
}

// SendAlert sends an alert to SlackWebHook with the provided payload data.
// It creates a payload struct with the provided data.
// It creates an HTTP POST request with the SlackWebHook URL as the endpoint and the marshaled JSON data as the request body.
// It sends the request using an HTTP client with a maximum of 5 retries for network errors with exponential backoff before each retry.
// It handles different HTTP response status codes and returns an error based on the status code().
// If all retries fail, it returns an error indicating the last network error encountered.
func (s *SlackWebhook) SendAlert(data *AlertPayload) error {

type TextBlock struct {
Type string `json:"type"`
Text string `json:"text"`
}

type Block struct {
Type string `json:"type"`
Text *TextBlock `json:"text,omitempty"`
}

type Payload struct {
Blocks []Block `json:"blocks"`
}

// Initialize the payload with the alert and first divider
payload := Payload{
Blocks: []Block{
{
Type: "section",
Text: &TextBlock{
Type: "mrkdwn",
Text: ":alert: " + data.Summary,
},
},
{
Type: "divider",
},
},
}

// Iterate through the map to construct the remaining blocks
for key, value := range data.Details {
payload.Blocks = append(payload.Blocks,
Block{
Type: "header",
Text: &TextBlock{
Type: "plain_text",
Text: key,
},
},
Block{
Type: "section",
Text: &TextBlock{
Type: "plain_text",
Text: fmt.Sprintf("%v", value),
},
},
Block{
Type: "divider",
},
)
}

// Marshal the payload to JSON
jsonData, err := json.MarshalIndent(payload, "", " ")
if err != nil {
return xerrors.Errorf("Error marshaling JSON: %w", err)
}

req, err := http.NewRequest("POST", s.cfg.WebHookURL, bytes.NewBuffer(jsonData))
if err != nil {
return fmt.Errorf("error creating request: %w", err)
}
req.Header.Set("Content-Type", "application/json")

client := &http.Client{
Timeout: time.Second * 15,
}
iter, _, err := lo.AttemptWithDelay(5, time.Second,
func(index int, duration time.Duration) error {
resp, err := client.Do(req)
if err != nil {
time.Sleep(time.Duration(2*index) * duration) // Exponential backoff
return err
}
defer func() { _ = resp.Body.Close() }()

switch resp.StatusCode {
case 202:
log.Debug("Accepted: The event has been accepted by Slack Webhook.")
return nil
case 400:
bd, rerr := io.ReadAll(resp.Body)
if rerr != nil {
return xerrors.Errorf("Bad request: invalid payload. Failed to read the body: %w", rerr)
}
switch string(bd) {
case "invalid_payload":
return xerrors.Errorf("Bad request: the data sent in your request cannot be understood as presented; verify your content body matches your content type and is structurally valid.")
case "user_not_found":
return xerrors.Errorf("Bad request: the user used in your request does not actually exist.")
default:
return xerrors.Errorf("Bad request: payload JSON is invalid %s", string(bd))
}
case 403:
bd, rerr := io.ReadAll(resp.Body)
if rerr != nil {
return xerrors.Errorf("Bad request: invalid payload. Failed to read the body: %w", rerr)
}
switch string(bd) {
case "action_prohibited":
return xerrors.Errorf("Forbidden: the team associated with your request has some kind of restriction on the webhook posting in this context.")
default:
return xerrors.Errorf("Unexpected 403 error: %s", string(bd))
}
case 404:
return xerrors.Errorf("Not Found: the channel associated with your request does not exist.")
case 410:
return xerrors.Errorf("Gone: the channel has been archived and doesn't accept further messages, even from your incoming webhook.")
case 500:
bd, rerr := io.ReadAll(resp.Body)
if rerr != nil {
return xerrors.Errorf("Bad request: invalid payload. Failed to read the body: %w", rerr)
}
switch string(bd) {
case "rollup_error":
return xerrors.Errorf("Server error: something strange and unusual happened that was likely not your fault at all.")
default:
return xerrors.Errorf("Unexpected 500 error: %s", string(bd))
}
default:
log.Errorw("Response status:", resp.Status)
return xerrors.Errorf("Unexpected HTTP response: %s", resp.Status)
}
})
if err != nil {
return fmt.Errorf("after %d retries,last error: %w", iter, err)
}
return nil
}
12 changes: 8 additions & 4 deletions alertmanager/task_alert.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,11 +71,15 @@ var alertFuncs = []alertFunc{

func NewAlertTask(
api AlertAPI, db *harmonydb.DB, alertingCfg config.CurioAlertingConfig, al *curioalerting.AlertingSystem) *AlertTask {

plugins := plugin.LoadAlertPlugins(alertingCfg)

return &AlertTask{
api: api,
db: db,
cfg: alertingCfg,
al: al,
api: api,
db: db,
cfg: alertingCfg,
al: al,
plugins: plugins,
}
}

Expand Down
21 changes: 21 additions & 0 deletions deps/config/doc_gen.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 12 additions & 0 deletions deps/config/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -434,6 +434,9 @@ type CurioAlertingConfig struct {

// PrometheusAlertManagerConfig is the configuration for the Prometheus AlertManager alerting integration.
PrometheusAlertManager PrometheusAlertManagerConfig

// SlackWebhookConfig is a configuration type for Slack webhook integration.
SlackWebhook SlackWebhookConfig
}

type PagerDutyConfig struct {
Expand All @@ -458,6 +461,15 @@ type PrometheusAlertManagerConfig struct {
AlertManagerURL string
}

type SlackWebhookConfig struct {
// Enable is a flag to enable or disable the Prometheus AlertManager integration.
Enable bool

// WebHookURL is the URL for the URL for slack Webhook.
// Example: https://hooks.slack.com/services/T00000000/B00000000/XXXXXXXXXXXXXXXXXXXXXXXX
WebHookURL string
}

type JournalConfig struct {
//Events of the form: "system1:event1,system1:event2[,...]"
DisabledEvents string
Expand Down
12 changes: 12 additions & 0 deletions documentation/en/configuration/default-curio-configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -477,4 +477,16 @@ description: The default curio configuration
# type: string
#AlertManagerURL = "http://localhost:9093/api/v2/alerts"

[Alerting.SlackWebhook]
# Enable is a flag to enable or disable the Prometheus AlertManager integration.
#
# type: bool
#Enable = false

# WebHookURL is the URL for the URL for slack Webhook.
# Example: https://hooks.slack.com/services/T00000000/B00000000/XXXXXXXXXXXXXXXXXXXXXXXX
#
# type: string
#WebHookURL = ""

```

0 comments on commit 2651fd7

Please sign in to comment.