diff --git a/alertmanager/plugin/plugin.go b/alertmanager/plugin/plugin.go index a6a7b7057..6c5355434 100644 --- a/alertmanager/plugin/plugin.go +++ b/alertmanager/plugin/plugin.go @@ -30,5 +30,8 @@ func LoadAlertPlugins(cfg config.CurioAlertingConfig) []Plugin { if cfg.PrometheusAlertManager.Enable { plugins = append(plugins, NewPrometheusAlertManager(cfg.PrometheusAlertManager)) } + if cfg.SlackWebhook.Enable { + plugins = append(plugins, NewSlackWebhook(cfg.SlackWebhook)) + } return plugins } diff --git a/alertmanager/plugin/slack_webhook.go b/alertmanager/plugin/slack_webhook.go new file mode 100644 index 000000000..47768c510 --- /dev/null +++ b/alertmanager/plugin/slack_webhook.go @@ -0,0 +1,164 @@ +package plugin + +import ( + "bytes" + "encoding/json" + "fmt" + "io" + "net/http" + "time" + + "github.com/samber/lo" + "golang.org/x/xerrors" + + "github.com/filecoin-project/curio/deps/config" +) + +type SlackWebhook struct { + cfg config.SlackWebhookConfig +} + +func NewSlackWebhook(cfg config.SlackWebhookConfig) Plugin { + return &SlackWebhook{ + cfg: cfg, + } +} + +// SendAlert sends an alert to SlackWebHook with the provided payload data. +// It creates a payload struct with the provided data. +// It creates an HTTP POST request with the SlackWebHook URL as the endpoint and the marshaled JSON data as the request body. +// It sends the request using an HTTP client with a maximum of 5 retries for network errors with exponential backoff before each retry. +// It handles different HTTP response status codes and returns an error based on the status code(). +// If all retries fail, it returns an error indicating the last network error encountered. +func (s *SlackWebhook) SendAlert(data *AlertPayload) error { + + type TextBlock struct { + Type string `json:"type"` + Text string `json:"text"` + } + + type Block struct { + Type string `json:"type"` + Text *TextBlock `json:"text,omitempty"` + } + + type Payload struct { + Blocks []Block `json:"blocks"` + } + + // Initialize the payload with the alert and first divider + payload := Payload{ + Blocks: []Block{ + { + Type: "section", + Text: &TextBlock{ + Type: "mrkdwn", + Text: ":alert: " + data.Summary, + }, + }, + { + Type: "divider", + }, + }, + } + + // Iterate through the map to construct the remaining blocks + for key, value := range data.Details { + payload.Blocks = append(payload.Blocks, + Block{ + Type: "header", + Text: &TextBlock{ + Type: "plain_text", + Text: key, + }, + }, + Block{ + Type: "section", + Text: &TextBlock{ + Type: "plain_text", + Text: fmt.Sprintf("%v", value), + }, + }, + Block{ + Type: "divider", + }, + ) + } + + // Marshal the payload to JSON + jsonData, err := json.MarshalIndent(payload, "", " ") + if err != nil { + return xerrors.Errorf("Error marshaling JSON: %w", err) + } + + req, err := http.NewRequest("POST", s.cfg.WebHookURL, bytes.NewBuffer(jsonData)) + if err != nil { + return fmt.Errorf("error creating request: %w", err) + } + req.Header.Set("Content-Type", "application/json") + + client := &http.Client{ + Timeout: time.Second * 15, + } + iter, _, err := lo.AttemptWithDelay(5, time.Second, + func(index int, duration time.Duration) error { + resp, err := client.Do(req) + if err != nil { + time.Sleep(time.Duration(2*index) * duration) // Exponential backoff + return err + } + defer func() { _ = resp.Body.Close() }() + + switch resp.StatusCode { + case 202: + log.Debug("Accepted: The event has been accepted by Slack Webhook.") + return nil + case 400: + bd, rerr := io.ReadAll(resp.Body) + if rerr != nil { + return xerrors.Errorf("Bad request: invalid payload. Failed to read the body: %w", rerr) + } + switch string(bd) { + case "invalid_payload": + return xerrors.Errorf("Bad request: the data sent in your request cannot be understood as presented; verify your content body matches your content type and is structurally valid.") + case "user_not_found": + return xerrors.Errorf("Bad request: the user used in your request does not actually exist.") + default: + return xerrors.Errorf("Bad request: payload JSON is invalid %s", string(bd)) + } + case 403: + bd, rerr := io.ReadAll(resp.Body) + if rerr != nil { + return xerrors.Errorf("Bad request: invalid payload. Failed to read the body: %w", rerr) + } + switch string(bd) { + case "action_prohibited": + return xerrors.Errorf("Forbidden: the team associated with your request has some kind of restriction on the webhook posting in this context.") + default: + return xerrors.Errorf("Unexpected 403 error: %s", string(bd)) + } + case 404: + return xerrors.Errorf("Not Found: the channel associated with your request does not exist.") + case 410: + return xerrors.Errorf("Gone: the channel has been archived and doesn't accept further messages, even from your incoming webhook.") + case 500: + bd, rerr := io.ReadAll(resp.Body) + if rerr != nil { + return xerrors.Errorf("Bad request: invalid payload. Failed to read the body: %w", rerr) + } + switch string(bd) { + case "rollup_error": + return xerrors.Errorf("Server error: something strange and unusual happened that was likely not your fault at all.") + default: + return xerrors.Errorf("Unexpected 500 error: %s", string(bd)) + } + default: + log.Errorw("Response status:", resp.Status) + return xerrors.Errorf("Unexpected HTTP response: %s", resp.Status) + } + }) + if err != nil { + return fmt.Errorf("after %d retries,last error: %w", iter, err) + } + return nil +} diff --git a/alertmanager/task_alert.go b/alertmanager/task_alert.go index f016725dd..bf6d4bb39 100644 --- a/alertmanager/task_alert.go +++ b/alertmanager/task_alert.go @@ -71,11 +71,15 @@ var alertFuncs = []alertFunc{ func NewAlertTask( api AlertAPI, db *harmonydb.DB, alertingCfg config.CurioAlertingConfig, al *curioalerting.AlertingSystem) *AlertTask { + + plugins := plugin.LoadAlertPlugins(alertingCfg) + return &AlertTask{ - api: api, - db: db, - cfg: alertingCfg, - al: al, + api: api, + db: db, + cfg: alertingCfg, + al: al, + plugins: plugins, } } diff --git a/deps/config/doc_gen.go b/deps/config/doc_gen.go index b1aaef6fa..93df613cb 100644 --- a/deps/config/doc_gen.go +++ b/deps/config/doc_gen.go @@ -101,6 +101,12 @@ alerts will be triggered for the wallet`, Comment: `PrometheusAlertManagerConfig is the configuration for the Prometheus AlertManager alerting integration.`, }, + { + Name: "SlackWebhook", + Type: "SlackWebhookConfig", + + Comment: `SlackWebhookConfig is a configuration type for Slack webhook integration.`, + }, }, "CurioConfig": { { @@ -684,4 +690,19 @@ identifier in the integration page for the service.`, Comment: `AlertManagerURL is the URL for the Prometheus AlertManager API v2 URL.`, }, }, + "SlackWebhookConfig": { + { + Name: "Enable", + Type: "bool", + + Comment: `Enable is a flag to enable or disable the Prometheus AlertManager integration.`, + }, + { + Name: "WebHookURL", + Type: "string", + + Comment: `WebHookURL is the URL for the URL for slack Webhook. +Example: https://hooks.slack.com/services/T00000000/B00000000/XXXXXXXXXXXXXXXXXXXXXXXX`, + }, + }, } diff --git a/deps/config/types.go b/deps/config/types.go index c330c9c05..028979a97 100644 --- a/deps/config/types.go +++ b/deps/config/types.go @@ -434,6 +434,9 @@ type CurioAlertingConfig struct { // PrometheusAlertManagerConfig is the configuration for the Prometheus AlertManager alerting integration. PrometheusAlertManager PrometheusAlertManagerConfig + + // SlackWebhookConfig is a configuration type for Slack webhook integration. + SlackWebhook SlackWebhookConfig } type PagerDutyConfig struct { @@ -458,6 +461,15 @@ type PrometheusAlertManagerConfig struct { AlertManagerURL string } +type SlackWebhookConfig struct { + // Enable is a flag to enable or disable the Prometheus AlertManager integration. + Enable bool + + // WebHookURL is the URL for the URL for slack Webhook. + // Example: https://hooks.slack.com/services/T00000000/B00000000/XXXXXXXXXXXXXXXXXXXXXXXX + WebHookURL string +} + type JournalConfig struct { //Events of the form: "system1:event1,system1:event2[,...]" DisabledEvents string diff --git a/documentation/en/configuration/default-curio-configuration.md b/documentation/en/configuration/default-curio-configuration.md index 2a37866dc..2c8b99cec 100644 --- a/documentation/en/configuration/default-curio-configuration.md +++ b/documentation/en/configuration/default-curio-configuration.md @@ -477,4 +477,16 @@ description: The default curio configuration # type: string #AlertManagerURL = "http://localhost:9093/api/v2/alerts" + [Alerting.SlackWebhook] + # Enable is a flag to enable or disable the Prometheus AlertManager integration. + # + # type: bool + #Enable = false + + # WebHookURL is the URL for the URL for slack Webhook. + # Example: https://hooks.slack.com/services/T00000000/B00000000/XXXXXXXXXXXXXXXXXXXXXXXX + # + # type: string + #WebHookURL = "" + ```