diff --git a/alerter/src/main/java/org/dromara/hertzbeat/alert/calculate/CalculateAlarm.java b/alerter/src/main/java/org/dromara/hertzbeat/alert/calculate/CalculateAlarm.java index e6d9fadc515..218a1b4dc63 100644 --- a/alerter/src/main/java/org/dromara/hertzbeat/alert/calculate/CalculateAlarm.java +++ b/alerter/src/main/java/org/dromara/hertzbeat/alert/calculate/CalculateAlarm.java @@ -35,6 +35,7 @@ import org.dromara.hertzbeat.common.entity.manager.Monitor; import org.dromara.hertzbeat.common.entity.message.CollectRep; import org.dromara.hertzbeat.common.constants.CommonConstants; +import org.dromara.hertzbeat.common.support.event.MonitorDeletedEvent; import org.dromara.hertzbeat.common.support.event.SystemConfigChangeEvent; import org.dromara.hertzbeat.common.util.CommonUtil; import org.dromara.hertzbeat.common.util.ResourceBundleUtil; @@ -70,8 +71,12 @@ public class CalculateAlarm { * key - monitorId+alertDefineId 为普通阈值告警 | The alarm is a common threshold alarm * key - monitorId 为监控状态可用性可达性告警 | Indicates the monitoring status availability reachability alarm */ - public Map triggeredAlertMap; - public Set unAvailableMonitors; + private final Map triggeredAlertMap; + /** + * The not recover alert + * key - monitorId + alertDefineId + (instance) + */ + private final Map notRecoveredAlertMap; private final AlerterWorkerPool workerPool; private final CommonDataQueue dataQueue; private final AlertDefineService alertDefineService; @@ -89,13 +94,13 @@ public CalculateAlarm(AlerterWorkerPool workerPool, CommonDataQueue dataQueue, this.alertService = alertService; this.bundle = ResourceBundleUtil.getBundle("alerter"); this.triggeredAlertMap = new ConcurrentHashMap<>(128); - this.unAvailableMonitors = Collections.synchronizedSet(new HashSet<>(16)); + this.notRecoveredAlertMap = new ConcurrentHashMap<>(128); // Initialize stateAlertMap // 初始化stateAlertMap List monitors = monitorDao.findMonitorsByStatus(CommonConstants.UN_AVAILABLE_CODE); if (monitors != null) { for (Monitor monitor : monitors) { - this.unAvailableMonitors.add(monitor.getId()); + this.notRecoveredAlertMap.put(monitor.getId() + CommonConstants.AVAILABILITY, Alert.builder().build()); } } startCalculate(); @@ -127,7 +132,7 @@ private void calculate(CollectRep.MetricsData metricsData) { // If the indicator group whose scheduling priority is 0 has the status of collecting response data UN_REACHABLE/UN_CONNECTABLE, the highest severity alarm is generated to monitor the status change // 先判断调度优先级为0的指标组采集响应数据状态 UN_REACHABLE/UN_CONNECTABLE 则需发最高级别告警进行监控状态变更 if (metricsData.getPriority() == 0) { - handlerAvailableMetrics(monitorId, app, metrics, metricsData); + handlerAvailableMetrics(monitorId, app, metricsData); } // Query the alarm definitions associated with the indicator set of the monitoring type // 查出此监控类型下的此指标集合下关联配置的告警定义信息 @@ -156,6 +161,9 @@ private void calculate(CollectRep.MetricsData metricsData) { afterThresholdRuleMatch(currentTimeMilli, monitorId, app, metrics, fieldValueMap, define); // 若此阈值已被触发,则其它数据行的触发忽略 continue; + } else if (define.isRecoverNotice()) { + String notResolvedAlertKey = String.valueOf(monitorId) + define.getId() + null; + handleRecoveredAlert(currentTimeMilli, monitorId, app, define, expr, notResolvedAlertKey); } } catch (Exception e) { log.warn(e.getMessage(), e); @@ -193,6 +201,9 @@ private void calculate(CollectRep.MetricsData metricsData) { afterThresholdRuleMatch(currentTimeMilli, monitorId, app, metrics, fieldValueMap, define); // 若此阈值已被触发,则其它数据行的触发忽略 break; + } else if (define.isRecoverNotice()) { + String notResolvedAlertKey = String.valueOf(monitorId) + define.getId() + (!"".equals(instance) ? instance : null); + handleRecoveredAlert(currentTimeMilli, monitorId, app, define, expr, notResolvedAlertKey); } } catch (Exception e) { log.warn(e.getMessage(), e); @@ -203,6 +214,26 @@ private void calculate(CollectRep.MetricsData metricsData) { } } + private void handleRecoveredAlert(long currentTimeMilli, long monitorId, String app, AlertDefine define, String expr, String notResolvedAlertKey) { + Alert notResolvedAlert = notRecoveredAlertMap.remove(notResolvedAlertKey); + if (notResolvedAlert != null) { + // Sending an alarm Restore + Map tags = notResolvedAlert.getTags(); + String content = this.bundle.getString("alerter.alarm.recover") + " : " + expr; + Alert resumeAlert = Alert.builder() + .tags(tags) + .target(define.getApp() + "." + define.getMetric() + "." + define.getField()) + .content(content) + .priority(CommonConstants.ALERT_PRIORITY_CODE_WARNING) + .status(CommonConstants.ALERT_STATUS_CODE_RESTORED) + .firstAlarmTime(currentTimeMilli) + .lastAlarmTime(currentTimeMilli) + .triggerTimes(1) + .build(); + alarmCommonReduce.reduceAndSendAlarm(resumeAlert); + } + } + private void afterThresholdRuleMatch(long currentTimeMilli, long monitorId, String app, String metrics, Map fieldValueMap, AlertDefine define) { String monitorAlertKey = String.valueOf(monitorId) + define.getId(); Alert triggeredAlert = triggeredAlertMap.get(monitorAlertKey); @@ -213,7 +244,9 @@ private void afterThresholdRuleMatch(long currentTimeMilli, long monitorId, Stri triggeredAlert.setLastAlarmTime(currentTimeMilli); int defineTimes = define.getTimes() == null ? 1 : define.getTimes(); if (times >= defineTimes) { + String notResolvedAlertKey = String.valueOf(monitorId) + define.getId() + fieldValueMap.get("instance"); triggeredAlertMap.remove(monitorAlertKey); + notRecoveredAlertMap.put(notResolvedAlertKey, triggeredAlert); alarmCommonReduce.reduceAndSendAlarm(triggeredAlert.clone()); } } else { @@ -244,6 +277,8 @@ private void afterThresholdRuleMatch(long currentTimeMilli, long monitorId, Stri .build(); int defineTimes = define.getTimes() == null ? 1 : define.getTimes(); if (1 >= defineTimes) { + String notResolvedAlertKey = String.valueOf(monitorId) + define.getId() + fieldValueMap.get("instance"); + notRecoveredAlertMap.put(notResolvedAlertKey, alert); alarmCommonReduce.reduceAndSendAlarm(alert); } else { triggeredAlertMap.put(monitorAlertKey, alert); @@ -267,52 +302,93 @@ private boolean execAlertExpression(Map fieldValueMap, String ex return match; } - private void handlerAvailableMetrics(long monitorId, String app, String metrics, CollectRep.MetricsData metricsData) { + private void handlerAvailableMetrics(long monitorId, String app, CollectRep.MetricsData metricsData) { + AlertDefine avaAlertDefine = alertDefineService.getMonitorBindAlertAvaDefine(monitorId, app, CommonConstants.AVAILABILITY); + if (avaAlertDefine == null) { + return; + } + long currentTimeMill = System.currentTimeMillis(); if (metricsData.getCode() != CollectRep.Code.SUCCESS) { - // Collection and abnormal - // 采集异常 - if (metricsData.getCode() == CollectRep.Code.UN_AVAILABLE) { - // The todo collector is unavailable - // todo 采集器不可用 - return; - } else if (metricsData.getCode() == CollectRep.Code.UN_REACHABLE) { - // UN_REACHABLE Peer unreachable (Network layer icmp) - // UN_REACHABLE 对端不可达(网络层icmp) - handlerMonitorAvailableAlert(monitorId, app, metricsData.getCode()); - } else if (metricsData.getCode() == CollectRep.Code.UN_CONNECTABLE) { - // UN_CONNECTABLE Peer connection failure (transport layer tcp,udp) - // UN_CONNECTABLE 对端连接失败(传输层tcp,udp) - handlerMonitorAvailableAlert(monitorId, app, metricsData.getCode()); + Alert preAlert = triggeredAlertMap.get(String.valueOf(monitorId)); + Map tags = new HashMap<>(6); + tags.put(CommonConstants.TAG_MONITOR_ID, String.valueOf(monitorId)); + tags.put(CommonConstants.TAG_MONITOR_APP, app); + tags.put("metrics", CommonConstants.AVAILABILITY); + tags.put("code", metricsData.getCode().name()); + Map valueMap = tags.entrySet().stream() + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); + if (avaAlertDefine.getTags() != null && !avaAlertDefine.getTags().isEmpty()) { + for (TagItem tagItem : avaAlertDefine.getTags()) { + valueMap.put(tagItem.getName(), tagItem.getValue()); + tags.put(tagItem.getName(), tagItem.getValue()); + } + } + if (preAlert == null) { + Alert.AlertBuilder alertBuilder = Alert.builder() + .tags(tags) + .priority(CommonConstants.ALERT_PRIORITY_CODE_EMERGENCY) + .status(ALERT_STATUS_CODE_PENDING) + .target(CommonConstants.AVAILABILITY) + .content(AlertTemplateUtil.render(avaAlertDefine.getTemplate(), valueMap)) + .firstAlarmTime(currentTimeMill) + .lastAlarmTime(currentTimeMill) + .triggerTimes(1); + if (avaAlertDefine.getTimes() == null || avaAlertDefine.getTimes() <= 1) { + String notResolvedAlertKey = monitorId + CommonConstants.AVAILABILITY; + notRecoveredAlertMap.put(notResolvedAlertKey, alertBuilder.build()); + alarmCommonReduce.reduceAndSendAlarm(alertBuilder.build().clone()); + } else { + alertBuilder.status(CommonConstants.ALERT_STATUS_CODE_NOT_REACH); + } + triggeredAlertMap.put(String.valueOf(monitorId), alertBuilder.build()); } else { - // Other exceptions - // 其他异常 - handlerMonitorAvailableAlert(monitorId, app, metricsData.getCode()); + int times = preAlert.getTriggerTimes() + 1; + if (preAlert.getStatus() == ALERT_STATUS_CODE_PENDING) { + times = 1; + preAlert.setContent(AlertTemplateUtil.render(avaAlertDefine.getTemplate(), valueMap)); + preAlert.setTags(tags); + } + preAlert.setTriggerTimes(times); + preAlert.setFirstAlarmTime(currentTimeMill); + preAlert.setLastAlarmTime(currentTimeMill); + int defineTimes = avaAlertDefine.getTimes() == null ? 1 : avaAlertDefine.getTimes(); + if (times >= defineTimes) { + preAlert.setStatus(ALERT_STATUS_CODE_PENDING); + String notResolvedAlertKey = monitorId + CommonConstants.AVAILABILITY; + notRecoveredAlertMap.put(notResolvedAlertKey, preAlert.clone()); + alarmCommonReduce.reduceAndSendAlarm(preAlert.clone()); + } else { + preAlert.setStatus(CommonConstants.ALERT_STATUS_CODE_NOT_REACH); + } } - return; - } else { - // Check whether an availability or unreachable alarm is generated before the association monitoring, and send a clear alarm to clear the monitoring status + } else if (avaAlertDefine.isRecoverNotice()) { + // Check whether an availability or unreachable alarm is generated before the association monitoring + // and send a clear alarm to clear the monitoring status // 判断关联监控之前是否有可用性或者不可达告警,发送恢复告警进行监控状态恢复 - triggeredAlertMap.remove(String.valueOf(monitorId)); - boolean isRestartUnavailable = unAvailableMonitors.remove(monitorId); - if (isRestartUnavailable) { + String notResolvedAlertKey = monitorId + CommonConstants.AVAILABILITY; + Alert notResolvedAlert = notRecoveredAlertMap.remove(notResolvedAlertKey); + if (notResolvedAlert != null) { // Sending an alarm Restore Map tags = new HashMap<>(6); tags.put(CommonConstants.TAG_MONITOR_ID, String.valueOf(monitorId)); tags.put(CommonConstants.TAG_MONITOR_APP, app); - String content = this.bundle.getString("alerter.availability.resolved"); - long currentTimeMilli = System.currentTimeMillis(); + if (notResolvedAlert.getTags() != null) { + tags.putAll(notResolvedAlert.getTags()); + } + String content = this.bundle.getString("alerter.availability.recover"); Alert resumeAlert = Alert.builder() - .tags(tags) - .target(CommonConstants.AVAILABILITY) - .content(content) - .priority(CommonConstants.ALERT_PRIORITY_CODE_WARNING) - .status(CommonConstants.ALERT_STATUS_CODE_RESTORED) - .firstAlarmTime(currentTimeMilli) - .lastAlarmTime(currentTimeMilli) - .triggerTimes(1) - .build(); + .tags(tags) + .target(CommonConstants.AVAILABILITY) + .content(content) + .priority(CommonConstants.ALERT_PRIORITY_CODE_WARNING) + .status(CommonConstants.ALERT_STATUS_CODE_RESTORED) + .firstAlarmTime(currentTimeMill) + .lastAlarmTime(currentTimeMill) + .triggerTimes(1) + .build(); alarmCommonReduce.reduceAndSendAlarm(resumeAlert); Runnable updateStatusJob = () -> { + // todo update pre all type alarm status updateAvailabilityAlertStatus(monitorId, resumeAlert); }; workerPool.executeJob(updateStatusJob); @@ -320,69 +396,10 @@ private void handlerAvailableMetrics(long monitorId, String app, String metrics, } } - private void handlerMonitorAvailableAlert(long monitorId, String app, CollectRep.Code code) { - AlertDefine avaAlertDefine = alertDefineService.getMonitorBindAlertAvaDefine(monitorId, app, CommonConstants.AVAILABILITY); - if (avaAlertDefine == null) { - return; - } - Alert preAlert = triggeredAlertMap.get(String.valueOf(monitorId)); - long currentTimeMill = System.currentTimeMillis(); - Map tags = new HashMap<>(6); - tags.put(CommonConstants.TAG_MONITOR_ID, String.valueOf(monitorId)); - tags.put(CommonConstants.TAG_MONITOR_APP, app); - tags.put("metrics", CommonConstants.AVAILABILITY); - tags.put("code", code.name()); - Map valueMap = tags.entrySet() - .stream().collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); - if (avaAlertDefine.getTags() != null && !avaAlertDefine.getTags().isEmpty()) { - for (TagItem tagItem : avaAlertDefine.getTags()) { - valueMap.put(tagItem.getName(), tagItem.getValue()); - tags.put(tagItem.getName(), tagItem.getValue()); - } - } - if (preAlert == null) { - Alert.AlertBuilder alertBuilder = Alert.builder() - .tags(tags) - .priority(CommonConstants.ALERT_PRIORITY_CODE_EMERGENCY) - .status(ALERT_STATUS_CODE_PENDING) - .target(CommonConstants.AVAILABILITY) - .content(AlertTemplateUtil.render(avaAlertDefine.getTemplate(), valueMap)) - .firstAlarmTime(currentTimeMill) - .lastAlarmTime(currentTimeMill) - .triggerTimes(1); - if (avaAlertDefine.getTimes() == null || avaAlertDefine.getTimes() <= 1) { - alarmCommonReduce.reduceAndSendAlarm(alertBuilder.build().clone()); - unAvailableMonitors.add(monitorId); - } else { - alertBuilder.status(CommonConstants.ALERT_STATUS_CODE_NOT_REACH); - } - triggeredAlertMap.put(String.valueOf(monitorId), alertBuilder.build()); - } else { - int times = preAlert.getTriggerTimes() + 1; - if (preAlert.getStatus() == ALERT_STATUS_CODE_PENDING) { - times = 1; - preAlert.setContent(AlertTemplateUtil.render(avaAlertDefine.getTemplate(), valueMap)); - preAlert.setTags(tags); - } - preAlert.setTriggerTimes(times); - preAlert.setFirstAlarmTime(currentTimeMill); - preAlert.setLastAlarmTime(currentTimeMill); - int defineTimes = avaAlertDefine.getTimes() == null ? 1 : avaAlertDefine.getTimes(); - if (times >= defineTimes) { - preAlert.setStatus(ALERT_STATUS_CODE_PENDING); - alarmCommonReduce.reduceAndSendAlarm(preAlert.clone()); - unAvailableMonitors.add(monitorId); - } else { - preAlert.setStatus(CommonConstants.ALERT_STATUS_CODE_NOT_REACH); - } - } - } - - private void updateAvailabilityAlertStatus(long monitorId, Alert restoreAlert) { List availabilityAlerts = queryAvailabilityAlerts(monitorId, restoreAlert); availabilityAlerts.stream().parallel().forEach(alert -> { - log.info("updating alert id:{}",alert.getId()); + log.info("updating alert status solved id: {}", alert.getId()); alertService.editAlertStatus(ALERT_STATUS_CODE_SOLVED, List.of(alert.getId())); }); } @@ -413,8 +430,15 @@ private List queryAvailabilityAlerts(long monitorId, Alert restoreAlert) } @EventListener(SystemConfigChangeEvent.class) - public void onEvent(SystemConfigChangeEvent event) { + public void onSystemConfigChangeEvent(SystemConfigChangeEvent event) { log.info("calculate alarm receive system config change event: {}.", event.getSource()); this.bundle = ResourceBundleUtil.getBundle("alerter"); } + + @EventListener(MonitorDeletedEvent.class) + public void onMonitorDeletedEvent(MonitorDeletedEvent event) { + log.info("calculate alarm receive monitor {} has been deleted.", event.getMonitorId()); + this.triggeredAlertMap.remove(String.valueOf(event.getMonitorId())); + } + } diff --git a/alerter/src/main/resources/alerter_en_US.properties b/alerter/src/main/resources/alerter_en_US.properties index 34cb6b965b2..ce9216ab4cc 100644 --- a/alerter/src/main/resources/alerter_en_US.properties +++ b/alerter/src/main/resources/alerter_en_US.properties @@ -13,10 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -alerter.availability.emergency = Monitoring Availability Emergency Alert -alerter.reachability.emergency = Monitoring Reachability Emergency Alert -alerter.availability.resolved = Availability Alert Resolved, Monitor Status Normal Now -alerter.reachability.resolved = Reachability Alert Resolved, Monitor Status Normal Now +alerter.availability.recover = Availability Alert Resolved, Monitor Status Normal Now +alerter.alarm.recover = Alert Resolved Notice alerter.notify.title = HertzBeat Alert Notify alerter.notify.target = Monitor Target alerter.notify.monitorId = Monitor ID @@ -27,4 +25,4 @@ alerter.notify.content = Alert Content alerter.notify.console = Console Login alerter.priority.0 = Emergency Alert alerter.priority.1 = Critical Alert -alerter.priority.2 = Warning Alert \ No newline at end of file +alerter.priority.2 = Warning Alert diff --git a/alerter/src/main/resources/alerter_zh_CN.properties b/alerter/src/main/resources/alerter_zh_CN.properties index 30bce35c033..656bf8fd806 100644 --- a/alerter/src/main/resources/alerter_zh_CN.properties +++ b/alerter/src/main/resources/alerter_zh_CN.properties @@ -13,10 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -alerter.availability.emergency = 监控紧急可用性告警 -alerter.reachability.emergency = 监控紧急可达性告警 -alerter.availability.resolved = 可用性告警恢复通知, 监控状态已恢复正常 -alerter.reachability.resolved = 可达性告警恢复通知, 监控状态已恢复正常 +alerter.availability.recover = 可用性告警恢复通知, 监控状态已恢复正常 +alerter.alarm.recover = 告警恢复通知 alerter.notify.title = HertzBeat告警通知 alerter.notify.target = 告警目标对象 alerter.notify.monitorId = 所属监控ID @@ -27,4 +25,4 @@ alerter.notify.content = 内容详情 alerter.notify.console = 登入控制台 alerter.priority.0 = 紧急告警 alerter.priority.1 = 严重告警 -alerter.priority.2 = 警告告警 \ No newline at end of file +alerter.priority.2 = 警告告警 diff --git a/common/src/main/java/org/dromara/hertzbeat/common/entity/alerter/AlertDefine.java b/common/src/main/java/org/dromara/hertzbeat/common/entity/alerter/AlertDefine.java index 0e6806784e3..5d4c6f70f36 100644 --- a/common/src/main/java/org/dromara/hertzbeat/common/entity/alerter/AlertDefine.java +++ b/common/src/main/java/org/dromara/hertzbeat/common/entity/alerter/AlertDefine.java @@ -105,6 +105,10 @@ public class AlertDefine { @Schema(title = "告警阈值开关", example = "true", accessMode = READ_WRITE) private boolean enable = true; + + @Schema(title = "Is send alarm recover notice | 是否发送告警恢复通知", example = "false", accessMode = READ_WRITE) + @Column(columnDefinition = "boolean default false") + private boolean recoverNotice = false; @Schema(title = "告警通知内容模版", example = "linux {monitor_name}: {monitor_id} cpu usage high", accessMode = READ_WRITE) diff --git a/common/src/main/java/org/dromara/hertzbeat/common/support/event/MonitorDeletedEvent.java b/common/src/main/java/org/dromara/hertzbeat/common/support/event/MonitorDeletedEvent.java new file mode 100644 index 00000000000..ad9e0e14c90 --- /dev/null +++ b/common/src/main/java/org/dromara/hertzbeat/common/support/event/MonitorDeletedEvent.java @@ -0,0 +1,24 @@ +package org.dromara.hertzbeat.common.support.event; + +import org.springframework.context.ApplicationEvent; + +/** + * the event for system config change + * @author tom + */ +public class MonitorDeletedEvent extends ApplicationEvent { + + /** + * monitoring id + */ + private final Long monitorId; + + public MonitorDeletedEvent(Object source, Long monitorId) { + super(source); + this.monitorId = monitorId; + } + + public Long getMonitorId() { + return monitorId; + } +} diff --git a/manager/src/main/java/org/dromara/hertzbeat/manager/component/alerter/impl/DbAlertStoreHandlerImpl.java b/manager/src/main/java/org/dromara/hertzbeat/manager/component/alerter/impl/DbAlertStoreHandlerImpl.java index e8dfd5da07b..24f6c523f9e 100644 --- a/manager/src/main/java/org/dromara/hertzbeat/manager/component/alerter/impl/DbAlertStoreHandlerImpl.java +++ b/manager/src/main/java/org/dromara/hertzbeat/manager/component/alerter/impl/DbAlertStoreHandlerImpl.java @@ -32,8 +32,8 @@ /** * 报警持久化 - 落地到数据库 * Alarm data persistence - landing in the database - * @author Musk.Chen * + * @author Musk.Chen */ @Component @RequiredArgsConstructor @@ -43,7 +43,7 @@ final class DbAlertStoreHandlerImpl implements AlertStoreHandler { private final MonitorService monitorService; private final AlertService alertService; - + @Override public void store(Alert alert) { Map tags = alert.getTags(); @@ -60,19 +60,17 @@ public void store(Alert alert) { // 当监控未管理时 忽略静默其告警信息 return; } - if (monitor.getStatus() == CommonConstants.AVAILABLE_CODE) { - if (CommonConstants.AVAILABILITY.equals(alert.getTarget())) { + if (CommonConstants.AVAILABILITY.equals(alert.getTarget())) { + if (alert.getStatus() == CommonConstants.ALERT_STATUS_CODE_PENDING && monitor.getStatus() == CommonConstants.AVAILABLE_CODE) { // Availability Alarm Need to change the monitoring status to unavailable // 可用性告警 需变更监控状态为不可用 monitorService.updateMonitorStatus(monitor.getId(), CommonConstants.UN_AVAILABLE_CODE); - } - } else { - // If the alarm is restored, the monitoring state needs to be restored - // 若是恢复告警 需对监控状态进行恢复 - if (alert.getStatus() == CommonConstants.ALERT_STATUS_CODE_RESTORED) { + } else if (alert.getStatus() == CommonConstants.ALERT_STATUS_CODE_RESTORED && monitor.getStatus() == CommonConstants.UN_AVAILABLE_CODE) { + // If the alarm is restored, the monitoring state needs to be restored + // 若是恢复告警 需对监控状态进行恢复 monitorService.updateMonitorStatus(monitorId, CommonConstants.AVAILABLE_CODE); } - } + } } else { log.debug("store extern alert content: {}.", alert); } diff --git a/manager/src/main/java/org/dromara/hertzbeat/manager/service/AvailableAlertDefineInit.java b/manager/src/main/java/org/dromara/hertzbeat/manager/service/AvailableAlertDefineInit.java index f7d7635bd53..342605d8696 100644 --- a/manager/src/main/java/org/dromara/hertzbeat/manager/service/AvailableAlertDefineInit.java +++ b/manager/src/main/java/org/dromara/hertzbeat/manager/service/AvailableAlertDefineInit.java @@ -41,6 +41,7 @@ public void run(String... args) throws Exception { .preset(true) .times(1) .enable(true) + .recoverNotice(true) .priority(CommonConstants.ALERT_PRIORITY_CODE_EMERGENCY) .template("${app} monitoring availability alert, code is ${code}") .build(); diff --git a/manager/src/main/java/org/dromara/hertzbeat/manager/service/impl/MonitorServiceImpl.java b/manager/src/main/java/org/dromara/hertzbeat/manager/service/impl/MonitorServiceImpl.java index 0075f0641a7..717943d1496 100644 --- a/manager/src/main/java/org/dromara/hertzbeat/manager/service/impl/MonitorServiceImpl.java +++ b/manager/src/main/java/org/dromara/hertzbeat/manager/service/impl/MonitorServiceImpl.java @@ -20,7 +20,6 @@ import com.fasterxml.jackson.core.type.TypeReference; import com.google.gson.Gson; import lombok.extern.slf4j.Slf4j; -import org.dromara.hertzbeat.alert.calculate.CalculateAlarm; import org.dromara.hertzbeat.alert.dao.AlertDefineBindDao; import org.dromara.hertzbeat.common.constants.CommonConstants; import org.dromara.hertzbeat.common.entity.job.Configmap; @@ -33,6 +32,7 @@ import org.dromara.hertzbeat.common.entity.manager.ParamDefine; import org.dromara.hertzbeat.common.entity.manager.Tag; import org.dromara.hertzbeat.common.entity.message.CollectRep; +import org.dromara.hertzbeat.common.support.event.MonitorDeletedEvent; import org.dromara.hertzbeat.common.util.*; import org.dromara.hertzbeat.manager.dao.CollectorDao; import org.dromara.hertzbeat.manager.dao.CollectorMonitorBindDao; @@ -50,6 +50,7 @@ import org.dromara.hertzbeat.manager.support.exception.MonitorDetectException; import org.dromara.hertzbeat.manager.support.exception.MonitorMetricsException; import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.context.ApplicationContext; import org.springframework.data.domain.Page; import org.springframework.data.domain.PageRequest; import org.springframework.data.jpa.domain.Specification; @@ -112,9 +113,9 @@ public class MonitorServiceImpl implements MonitorService { @Autowired private TagMonitorBindDao tagMonitorBindDao; - + @Autowired - private CalculateAlarm calculateAlarm; + private ApplicationContext applicationContext; private final Map imExportServiceMap = new HashMap<>(); @@ -545,7 +546,6 @@ public void modifyMonitor(Monitor monitor, List params, String collector) if (params != null) { paramDao.saveAll(params); } - calculateAlarm.triggeredAlertMap.remove(String.valueOf(monitorId)); } catch (Exception e) { log.error(e.getMessage(), e); // Repository brushing abnormally cancels the previously delivered task @@ -568,7 +568,7 @@ public void deleteMonitor(long id) throws RuntimeException { tagMonitorBindDao.deleteTagMonitorBindsByMonitorId(id); alertDefineBindDao.deleteAlertDefineMonitorBindsByMonitorIdEquals(id); collectJobScheduling.cancelAsyncCollectJob(monitor.getJobId()); - calculateAlarm.triggeredAlertMap.remove(String.valueOf(monitor.getId())); + applicationContext.publishEvent(new MonitorDeletedEvent(applicationContext, monitor.getId())); } } @@ -586,7 +586,7 @@ public void deleteMonitors(Set ids) throws RuntimeException { // delete tag 删除监控对应的标签 tagService.deleteMonitorSystemTags(monitor); collectJobScheduling.cancelAsyncCollectJob(monitor.getJobId()); - calculateAlarm.triggeredAlertMap.remove(String.valueOf(monitor.getId())); + applicationContext.publishEvent(new MonitorDeletedEvent(applicationContext, monitor.getId())); } } } @@ -674,7 +674,7 @@ public void enableManageMonitors(HashSet ids) { // Issue collection tasks 下发采集任务 long newJobId = collectJobScheduling.addAsyncCollectJob(appDefine); monitor.setJobId(newJobId); - calculateAlarm.triggeredAlertMap.remove(String.valueOf(monitor.getId())); + applicationContext.publishEvent(new MonitorDeletedEvent(applicationContext, monitor.getId())); } monitorDao.saveAll(unManagedMonitors); } @@ -773,7 +773,6 @@ public void updateAppCollectJob(Job job) { // 下发采集任务 long newJobId = collectJobScheduling.addAsyncCollectJob(appDefine); monitor.setJobId(newJobId); - calculateAlarm.triggeredAlertMap.remove(String.valueOf(monitor.getId())); monitorDao.save(monitor); } } diff --git a/manager/src/test/java/org/dromara/hertzbeat/manager/service/MonitorServiceTest.java b/manager/src/test/java/org/dromara/hertzbeat/manager/service/MonitorServiceTest.java index 72623532b9f..205493c6a4e 100644 --- a/manager/src/test/java/org/dromara/hertzbeat/manager/service/MonitorServiceTest.java +++ b/manager/src/test/java/org/dromara/hertzbeat/manager/service/MonitorServiceTest.java @@ -1,6 +1,5 @@ package org.dromara.hertzbeat.manager.service; -import org.dromara.hertzbeat.alert.calculate.CalculateAlarm; import org.dromara.hertzbeat.alert.dao.AlertDefineBindDao; import org.dromara.hertzbeat.common.entity.alerter.Alert; import org.dromara.hertzbeat.common.entity.job.Job; @@ -32,6 +31,7 @@ import org.mockito.InjectMocks; import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; +import org.springframework.context.ApplicationContext; import org.springframework.data.domain.Page; import org.springframework.data.domain.PageRequest; import org.springframework.data.jpa.domain.Specification; @@ -96,7 +96,7 @@ class MonitorServiceTest { private CollectorMonitorBindDao collectorMonitorBindDao; @Mock - private CalculateAlarm calculateAlarm; + private ApplicationContext applicationContext; @Mock Map triggeredAlertMap = spy(new HashMap<>()); @@ -105,9 +105,7 @@ class MonitorServiceTest { * 属性无法直接mock,测试执行前-手动赋值 */ @BeforeEach - public void setUp() { - calculateAlarm.triggeredAlertMap = triggeredAlertMap; - } + public void setUp() {} @Test void detectMonitorEmpty() { diff --git a/script/sql/schema.sql b/script/sql/schema.sql index f89a286f614..b32199b336a 100644 --- a/script/sql/schema.sql +++ b/script/sql/schema.sql @@ -125,21 +125,22 @@ CREATE TABLE hzb_tag_monitor_bind DROP TABLE IF EXISTS hzb_alert_define ; CREATE TABLE hzb_alert_define ( - id bigint not null auto_increment comment '告警定义ID', - app varchar(100) not null comment '配置告警的监控类型:linux,mysql,jvm...', - metric varchar(100) not null comment '配置告警的指标集合:cpu,memory,info...', - field varchar(100) comment '配置告警的指标:usage,cores...', - preset boolean not null default false comment '是否是全局默认告警,是则所有此类型监控默认关联此告警', - expr varchar(255) comment '告警触发条件表达式', - priority tinyint not null default 0 comment '告警级别 0:高-emergency-紧急告警-红色 1:中-critical-严重告警-橙色 2:低-warning-警告告警-黄色', - times int not null default 1 comment '触发次数,即达到触发阈值次数要求后才算触发告警', - tags varchar(4000) comment '附加告警标签(status:success,env:prod)', - enable boolean not null default true comment '告警阈值开关', - template varchar(255) not null comment '告警通知模板内容', - creator varchar(100) comment '创建者', - modifier varchar(100) comment '最新修改者', - gmt_create timestamp default current_timestamp comment 'create time', - gmt_update datetime default current_timestamp on update current_timestamp comment 'update time', + id bigint not null auto_increment comment '告警定义ID', + app varchar(100) not null comment '配置告警的监控类型:linux,mysql,jvm...', + metric varchar(100) not null comment '配置告警的指标集合:cpu,memory,info...', + field varchar(100) comment '配置告警的指标:usage,cores...', + preset boolean not null default false comment '是否是全局默认告警,是则所有此类型监控默认关联此告警', + expr varchar(255) comment '告警触发条件表达式', + priority tinyint not null default 0 comment '告警级别 0:高-emergency-紧急告警-红色 1:中-critical-严重告警-橙色 2:低-warning-警告告警-黄色', + times int not null default 1 comment '触发次数,即达到触发阈值次数要求后才算触发告警', + tags varchar(4000) comment '附加告警标签(status:success,env:prod)', + enable boolean not null default true comment '告警阈值开关', + template varchar(255) not null comment '告警通知模板内容', + recover_notice boolean not null default false comment 'Is send alarm recovered notice | 是否发送告警恢复通知', + creator varchar(100) comment '创建者', + modifier varchar(100) comment '最新修改者', + gmt_create timestamp default current_timestamp comment 'create time', + gmt_update datetime default current_timestamp on update current_timestamp comment 'update time', primary key (id) ) ENGINE = InnoDB DEFAULT CHARSET=utf8mb4; diff --git a/web-app/src/app/pojo/AlertDefine.ts b/web-app/src/app/pojo/AlertDefine.ts index 529801f164f..0c287836559 100644 --- a/web-app/src/app/pojo/AlertDefine.ts +++ b/web-app/src/app/pojo/AlertDefine.ts @@ -13,6 +13,7 @@ export class AlertDefine { times: number = 3; tags!: TagItem[]; enable: boolean = true; + recoverNotice: boolean = false; template!: string; creator!: string; modifier!: string; diff --git a/web-app/src/app/routes/alert/alert-setting/alert-setting.component.html b/web-app/src/app/routes/alert/alert-setting/alert-setting.component.html index 7acc98d9587..2002b156c59 100644 --- a/web-app/src/app/routes/alert/alert-setting/alert-setting.component.html +++ b/web-app/src/app/routes/alert/alert-setting/alert-setting.component.html @@ -449,6 +449,19 @@ + + + {{ 'alert.setting.recover-notice' | i18n }} + + + + + {{ 'alert.setting.enable' | i18n }} diff --git a/web-app/src/assets/i18n/en-US.json b/web-app/src/assets/i18n/en-US.json index e9966637233..65d14a1b101 100644 --- a/web-app/src/assets/i18n/en-US.json +++ b/web-app/src/assets/i18n/en-US.json @@ -146,6 +146,8 @@ "alert.setting.default.tip": "Whether this alarm threshold configuration applies to all this type of monitoring globally", "alert.setting.enable": "Enable Alert", "alert.setting.enable.tip": "This alarm threshold configuration is enabled or disabled", + "alert.setting.recover-notice": "Recover Notice", + "alert.setting.recover-notice.tip": "Whether to send the corresponding recovered notification when the alarm is resolved under this threshold rule", "alert.setting.connect": "Alert Associate Monitors", "alert.setting.connect.left": "No Associate", "alert.setting.connect.right": "Associated", diff --git a/web-app/src/assets/i18n/zh-CN.json b/web-app/src/assets/i18n/zh-CN.json index 8d071ef6020..a14cbc7ff07 100644 --- a/web-app/src/assets/i18n/zh-CN.json +++ b/web-app/src/assets/i18n/zh-CN.json @@ -146,6 +146,8 @@ "alert.setting.default.tip": "此告警阈值配置是否应用于全局所有此类型监控", "alert.setting.enable": "启用告警", "alert.setting.enable.tip": "此告警阈值配置开启生效或关闭", + "alert.setting.recover-notice": "恢复通知", + "alert.setting.recover-notice.tip": "是否在此阈值规则下告警恢复时发送对应的恢复通知", "alert.setting.connect": "告警定义关联监控", "alert.setting.connect.left": "未关联监控", "alert.setting.connect.right": "已关联监控", diff --git a/web-app/src/assets/i18n/zh-TW.json b/web-app/src/assets/i18n/zh-TW.json index da7b7a3cedd..41da7293330 100644 --- a/web-app/src/assets/i18n/zh-TW.json +++ b/web-app/src/assets/i18n/zh-TW.json @@ -146,6 +146,8 @@ "alert.setting.default.tip": "此告警阈值配置是否應用于全局所有此類型監控", "alert.setting.enable": "啓用告警", "alert.setting.enable.tip": "此告警阈值配置開啓生效或關閉", + "alert.setting.recover-notice": "恢復通知", + "alert.setting.recover-notice.tip": "是否在此閾值規則下告警恢復時發送對應的恢復通知", "alert.setting.connect": "告警定義關聯監控", "alert.setting.connect.left": "未關聯監控", "alert.setting.connect.right": "已關聯監控",