Skip to content

Commit

Permalink
support enable alert threshold auto resolved notice (#1185)
Browse files Browse the repository at this point in the history
Signed-off-by: tomsun28 <tomsun28@outlook.com>
  • Loading branch information
tomsun28 authored Aug 16, 2023
1 parent 40f5d82 commit e26e6b6
Show file tree
Hide file tree
Showing 14 changed files with 197 additions and 133 deletions.

Large diffs are not rendered by default.

8 changes: 3 additions & 5 deletions alerter/src/main/resources/alerter_en_US.properties
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.

alerter.availability.emergency = Monitoring Availability Emergency Alert
alerter.reachability.emergency = Monitoring Reachability Emergency Alert
alerter.availability.resolved = Availability Alert Resolved, Monitor Status Normal Now
alerter.reachability.resolved = Reachability Alert Resolved, Monitor Status Normal Now
alerter.availability.recover = Availability Alert Resolved, Monitor Status Normal Now
alerter.alarm.recover = Alert Resolved Notice
alerter.notify.title = HertzBeat Alert Notify
alerter.notify.target = Monitor Target
alerter.notify.monitorId = Monitor ID
Expand All @@ -27,4 +25,4 @@ alerter.notify.content = Alert Content
alerter.notify.console = Console Login
alerter.priority.0 = Emergency Alert
alerter.priority.1 = Critical Alert
alerter.priority.2 = Warning Alert
alerter.priority.2 = Warning Alert
8 changes: 3 additions & 5 deletions alerter/src/main/resources/alerter_zh_CN.properties
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.

alerter.availability.emergency = 监控紧急可用性告警
alerter.reachability.emergency = 监控紧急可达性告警
alerter.availability.resolved = 可用性告警恢复通知, 监控状态已恢复正常
alerter.reachability.resolved = 可达性告警恢复通知, 监控状态已恢复正常
alerter.availability.recover = 可用性告警恢复通知, 监控状态已恢复正常
alerter.alarm.recover = 告警恢复通知
alerter.notify.title = HertzBeat告警通知
alerter.notify.target = 告警目标对象
alerter.notify.monitorId = 所属监控ID
Expand All @@ -27,4 +25,4 @@ alerter.notify.content = 内容详情
alerter.notify.console = 登入控制台
alerter.priority.0 = 紧急告警
alerter.priority.1 = 严重告警
alerter.priority.2 = 警告告警
alerter.priority.2 = 警告告警
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,10 @@ public class AlertDefine {

@Schema(title = "告警阈值开关", example = "true", accessMode = READ_WRITE)
private boolean enable = true;

@Schema(title = "Is send alarm recover notice | 是否发送告警恢复通知", example = "false", accessMode = READ_WRITE)
@Column(columnDefinition = "boolean default false")
private boolean recoverNotice = false;

@Schema(title = "告警通知内容模版", example = "linux {monitor_name}: {monitor_id} cpu usage high",
accessMode = READ_WRITE)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
package org.dromara.hertzbeat.common.support.event;

import org.springframework.context.ApplicationEvent;

/**
* the event for system config change
*
*/
public class MonitorDeletedEvent extends ApplicationEvent {

/**
* monitoring id
*/
private final Long monitorId;

public MonitorDeletedEvent(Object source, Long monitorId) {
super(source);
this.monitorId = monitorId;
}

public Long getMonitorId() {
return monitorId;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ final class DbAlertStoreHandlerImpl implements AlertStoreHandler {
private final MonitorService monitorService;

private final AlertService alertService;

@Override
public void store(Alert alert) {
Map<String, String> tags = alert.getTags();
Expand All @@ -60,19 +60,17 @@ public void store(Alert alert) {
// 当监控未管理时 忽略静默其告警信息
return;
}
if (monitor.getStatus() == CommonConstants.AVAILABLE_CODE) {
if (CommonConstants.AVAILABILITY.equals(alert.getTarget())) {
if (CommonConstants.AVAILABILITY.equals(alert.getTarget())) {
if (alert.getStatus() == CommonConstants.ALERT_STATUS_CODE_PENDING && monitor.getStatus() == CommonConstants.AVAILABLE_CODE) {
// Availability Alarm Need to change the monitoring status to unavailable
// 可用性告警 需变更监控状态为不可用
monitorService.updateMonitorStatus(monitor.getId(), CommonConstants.UN_AVAILABLE_CODE);
}
} else {
// If the alarm is restored, the monitoring state needs to be restored
// 若是恢复告警 需对监控状态进行恢复
if (alert.getStatus() == CommonConstants.ALERT_STATUS_CODE_RESTORED) {
} else if (alert.getStatus() == CommonConstants.ALERT_STATUS_CODE_RESTORED && monitor.getStatus() == CommonConstants.UN_AVAILABLE_CODE) {
// If the alarm is restored, the monitoring state needs to be restored
// 若是恢复告警 需对监控状态进行恢复
monitorService.updateMonitorStatus(monitorId, CommonConstants.AVAILABLE_CODE);
}
}
}
} else {
log.debug("store extern alert content: {}.", alert);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ public void run(String... args) throws Exception {
.preset(true)
.times(1)
.enable(true)
.recoverNotice(true)
.priority(CommonConstants.ALERT_PRIORITY_CODE_EMERGENCY)
.template("${app} monitoring availability alert, code is ${code}")
.build();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
import com.fasterxml.jackson.core.type.TypeReference;
import com.google.gson.Gson;
import lombok.extern.slf4j.Slf4j;
import org.dromara.hertzbeat.alert.calculate.CalculateAlarm;
import org.dromara.hertzbeat.alert.dao.AlertDefineBindDao;
import org.dromara.hertzbeat.common.constants.CommonConstants;
import org.dromara.hertzbeat.common.entity.job.Configmap;
Expand All @@ -33,6 +32,7 @@
import org.dromara.hertzbeat.common.entity.manager.ParamDefine;
import org.dromara.hertzbeat.common.entity.manager.Tag;
import org.dromara.hertzbeat.common.entity.message.CollectRep;
import org.dromara.hertzbeat.common.support.event.MonitorDeletedEvent;
import org.dromara.hertzbeat.common.util.*;
import org.dromara.hertzbeat.manager.dao.CollectorDao;
import org.dromara.hertzbeat.manager.dao.CollectorMonitorBindDao;
Expand All @@ -50,6 +50,7 @@
import org.dromara.hertzbeat.manager.support.exception.MonitorDetectException;
import org.dromara.hertzbeat.manager.support.exception.MonitorMetricsException;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.ApplicationContext;
import org.springframework.data.domain.Page;
import org.springframework.data.domain.PageRequest;
import org.springframework.data.jpa.domain.Specification;
Expand Down Expand Up @@ -112,9 +113,9 @@ public class MonitorServiceImpl implements MonitorService {

@Autowired
private TagMonitorBindDao tagMonitorBindDao;

@Autowired
private CalculateAlarm calculateAlarm;
private ApplicationContext applicationContext;

private final Map<String, ImExportService> imExportServiceMap = new HashMap<>();

Expand Down Expand Up @@ -545,7 +546,6 @@ public void modifyMonitor(Monitor monitor, List<Param> params, String collector)
if (params != null) {
paramDao.saveAll(params);
}
calculateAlarm.triggeredAlertMap.remove(String.valueOf(monitorId));
} catch (Exception e) {
log.error(e.getMessage(), e);
// Repository brushing abnormally cancels the previously delivered task
Expand All @@ -568,7 +568,7 @@ public void deleteMonitor(long id) throws RuntimeException {
tagMonitorBindDao.deleteTagMonitorBindsByMonitorId(id);
alertDefineBindDao.deleteAlertDefineMonitorBindsByMonitorIdEquals(id);
collectJobScheduling.cancelAsyncCollectJob(monitor.getJobId());
calculateAlarm.triggeredAlertMap.remove(String.valueOf(monitor.getId()));
applicationContext.publishEvent(new MonitorDeletedEvent(applicationContext, monitor.getId()));
}
}

Expand All @@ -586,7 +586,7 @@ public void deleteMonitors(Set<Long> ids) throws RuntimeException {
// delete tag 删除监控对应的标签
tagService.deleteMonitorSystemTags(monitor);
collectJobScheduling.cancelAsyncCollectJob(monitor.getJobId());
calculateAlarm.triggeredAlertMap.remove(String.valueOf(monitor.getId()));
applicationContext.publishEvent(new MonitorDeletedEvent(applicationContext, monitor.getId()));
}
}
}
Expand Down Expand Up @@ -674,7 +674,7 @@ public void enableManageMonitors(HashSet<Long> ids) {
// Issue collection tasks 下发采集任务
long newJobId = collectJobScheduling.addAsyncCollectJob(appDefine);
monitor.setJobId(newJobId);
calculateAlarm.triggeredAlertMap.remove(String.valueOf(monitor.getId()));
applicationContext.publishEvent(new MonitorDeletedEvent(applicationContext, monitor.getId()));
}
monitorDao.saveAll(unManagedMonitors);
}
Expand Down Expand Up @@ -773,7 +773,6 @@ public void updateAppCollectJob(Job job) {
// 下发采集任务
long newJobId = collectJobScheduling.addAsyncCollectJob(appDefine);
monitor.setJobId(newJobId);
calculateAlarm.triggeredAlertMap.remove(String.valueOf(monitor.getId()));
monitorDao.save(monitor);
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
package org.dromara.hertzbeat.manager.service;

import org.dromara.hertzbeat.alert.calculate.CalculateAlarm;
import org.dromara.hertzbeat.alert.dao.AlertDefineBindDao;
import org.dromara.hertzbeat.common.entity.alerter.Alert;
import org.dromara.hertzbeat.common.entity.job.Job;
Expand Down Expand Up @@ -32,6 +31,7 @@
import org.mockito.InjectMocks;
import org.mockito.Mock;
import org.mockito.junit.jupiter.MockitoExtension;
import org.springframework.context.ApplicationContext;
import org.springframework.data.domain.Page;
import org.springframework.data.domain.PageRequest;
import org.springframework.data.jpa.domain.Specification;
Expand Down Expand Up @@ -96,7 +96,7 @@ class MonitorServiceTest {
private CollectorMonitorBindDao collectorMonitorBindDao;

@Mock
private CalculateAlarm calculateAlarm;
private ApplicationContext applicationContext;

@Mock
Map<String, Alert> triggeredAlertMap = spy(new HashMap<>());
Expand All @@ -105,9 +105,7 @@ class MonitorServiceTest {
* 属性无法直接mock,测试执行前-手动赋值
*/
@BeforeEach
public void setUp() {
calculateAlarm.triggeredAlertMap = triggeredAlertMap;
}
public void setUp() {}

@Test
void detectMonitorEmpty() {
Expand Down
1 change: 1 addition & 0 deletions web-app/src/app/pojo/AlertDefine.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ export class AlertDefine {
times: number = 3;
tags!: TagItem[];
enable: boolean = true;
recoverNotice: boolean = false;
template!: string;
creator!: string;
modifier!: string;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -449,6 +449,19 @@
<nz-switch [(ngModel)]="define.preset" name="preset" id="preset"></nz-switch>
</nz-form-control>
</nz-form-item>
<nz-form-item>
<nz-form-label nzSpan="7" nzFor="recoverNotice" [nzTooltipTitle]="'alert.setting.recover-notice.tip' | i18n">
{{ 'alert.setting.recover-notice' | i18n }}
</nz-form-label>
<nz-form-control nzSpan="12">
<nz-switch
[(ngModel)]="define.recoverNotice"
[ngModelOptions]="{ standalone: true }"
name="recoverNotice"
id="recoverNotice"
></nz-switch>
</nz-form-control>
</nz-form-item>
<nz-form-item>
<nz-form-label nzSpan="7" nzRequired="true" nzFor="enable" [nzTooltipTitle]="'alert.setting.enable.tip' | i18n">
{{ 'alert.setting.enable' | i18n }}
Expand Down
2 changes: 2 additions & 0 deletions web-app/src/assets/i18n/en-US.json
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,8 @@
"alert.setting.default.tip": "Whether this alarm threshold configuration applies to all this type of monitoring globally",
"alert.setting.enable": "Enable Alert",
"alert.setting.enable.tip": "This alarm threshold configuration is enabled or disabled",
"alert.setting.recover-notice": "Recover Notice",
"alert.setting.recover-notice.tip": "Whether to send the corresponding recovered notification when the alarm is resolved under this threshold rule",
"alert.setting.connect": "Alert Associate Monitors",
"alert.setting.connect.left": "No Associate",
"alert.setting.connect.right": "Associated",
Expand Down
2 changes: 2 additions & 0 deletions web-app/src/assets/i18n/zh-CN.json
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,8 @@
"alert.setting.default.tip": "此告警阈值配置是否应用于全局所有此类型监控",
"alert.setting.enable": "启用告警",
"alert.setting.enable.tip": "此告警阈值配置开启生效或关闭",
"alert.setting.recover-notice": "恢复通知",
"alert.setting.recover-notice.tip": "是否在此阈值规则下告警恢复时发送对应的恢复通知",
"alert.setting.connect": "告警定义关联监控",
"alert.setting.connect.left": "未关联监控",
"alert.setting.connect.right": "已关联监控",
Expand Down
2 changes: 2 additions & 0 deletions web-app/src/assets/i18n/zh-TW.json
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,8 @@
"alert.setting.default.tip": "此告警阈值配置是否應用于全局所有此類型監控",
"alert.setting.enable": "啓用告警",
"alert.setting.enable.tip": "此告警阈值配置開啓生效或關閉",
"alert.setting.recover-notice": "恢復通知",
"alert.setting.recover-notice.tip": "是否在此閾值規則下告警恢復時發送對應的恢復通知",
"alert.setting.connect": "告警定義關聯監控",
"alert.setting.connect.left": "未關聯監控",
"alert.setting.connect.right": "已關聯監控",
Expand Down

0 comments on commit e26e6b6

Please sign in to comment.