Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

support enable alert threshold auto resolved notice #1185

Merged
merged 16 commits into from
Aug 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view

Large diffs are not rendered by default.

8 changes: 3 additions & 5 deletions alerter/src/main/resources/alerter_en_US.properties
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.

alerter.availability.emergency = Monitoring Availability Emergency Alert
alerter.reachability.emergency = Monitoring Reachability Emergency Alert
alerter.availability.resolved = Availability Alert Resolved, Monitor Status Normal Now
alerter.reachability.resolved = Reachability Alert Resolved, Monitor Status Normal Now
alerter.availability.recover = Availability Alert Resolved, Monitor Status Normal Now
alerter.alarm.recover = Alert Resolved Notice
alerter.notify.title = HertzBeat Alert Notify
alerter.notify.target = Monitor Target
alerter.notify.monitorId = Monitor ID
Expand All @@ -27,4 +25,4 @@ alerter.notify.content = Alert Content
alerter.notify.console = Console Login
alerter.priority.0 = Emergency Alert
alerter.priority.1 = Critical Alert
alerter.priority.2 = Warning Alert
alerter.priority.2 = Warning Alert
8 changes: 3 additions & 5 deletions alerter/src/main/resources/alerter_zh_CN.properties
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.

alerter.availability.emergency = 监控紧急可用性告警
alerter.reachability.emergency = 监控紧急可达性告警
alerter.availability.resolved = 可用性告警恢复通知, 监控状态已恢复正常
alerter.reachability.resolved = 可达性告警恢复通知, 监控状态已恢复正常
alerter.availability.recover = 可用性告警恢复通知, 监控状态已恢复正常
alerter.alarm.recover = 告警恢复通知
alerter.notify.title = HertzBeat告警通知
alerter.notify.target = 告警目标对象
alerter.notify.monitorId = 所属监控ID
Expand All @@ -27,4 +25,4 @@ alerter.notify.content = 内容详情
alerter.notify.console = 登入控制台
alerter.priority.0 = 紧急告警
alerter.priority.1 = 严重告警
alerter.priority.2 = 警告告警
alerter.priority.2 = 警告告警
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,10 @@ public class AlertDefine {

@Schema(title = "告警阈值开关", example = "true", accessMode = READ_WRITE)
private boolean enable = true;

@Schema(title = "Is send alarm recover notice | 是否发送告警恢复通知", example = "false", accessMode = READ_WRITE)
@Column(columnDefinition = "boolean default false")
private boolean recoverNotice = false;

@Schema(title = "告警通知内容模版", example = "linux {monitor_name}: {monitor_id} cpu usage high",
accessMode = READ_WRITE)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
package org.dromara.hertzbeat.common.support.event;

import org.springframework.context.ApplicationEvent;

/**
* the event for system config change
* @author tom
*/
public class MonitorDeletedEvent extends ApplicationEvent {

/**
* monitoring id
*/
private final Long monitorId;

public MonitorDeletedEvent(Object source, Long monitorId) {
super(source);
this.monitorId = monitorId;
}

public Long getMonitorId() {
return monitorId;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@
/**
* 报警持久化 - 落地到数据库
* Alarm data persistence - landing in the database
* @author <a href="mailto:Musk.Chen@fanruan.com">Musk.Chen</a>
*
* @author <a href="mailto:Musk.Chen@fanruan.com">Musk.Chen</a>
*/
@Component
@RequiredArgsConstructor
Expand All @@ -43,7 +43,7 @@ final class DbAlertStoreHandlerImpl implements AlertStoreHandler {
private final MonitorService monitorService;

private final AlertService alertService;

@Override
public void store(Alert alert) {
Map<String, String> tags = alert.getTags();
Expand All @@ -60,19 +60,17 @@ public void store(Alert alert) {
// 当监控未管理时 忽略静默其告警信息
return;
}
if (monitor.getStatus() == CommonConstants.AVAILABLE_CODE) {
if (CommonConstants.AVAILABILITY.equals(alert.getTarget())) {
if (CommonConstants.AVAILABILITY.equals(alert.getTarget())) {
if (alert.getStatus() == CommonConstants.ALERT_STATUS_CODE_PENDING && monitor.getStatus() == CommonConstants.AVAILABLE_CODE) {
// Availability Alarm Need to change the monitoring status to unavailable
// 可用性告警 需变更监控状态为不可用
monitorService.updateMonitorStatus(monitor.getId(), CommonConstants.UN_AVAILABLE_CODE);
}
} else {
// If the alarm is restored, the monitoring state needs to be restored
// 若是恢复告警 需对监控状态进行恢复
if (alert.getStatus() == CommonConstants.ALERT_STATUS_CODE_RESTORED) {
} else if (alert.getStatus() == CommonConstants.ALERT_STATUS_CODE_RESTORED && monitor.getStatus() == CommonConstants.UN_AVAILABLE_CODE) {
// If the alarm is restored, the monitoring state needs to be restored
// 若是恢复告警 需对监控状态进行恢复
monitorService.updateMonitorStatus(monitorId, CommonConstants.AVAILABLE_CODE);
}
}
}
} else {
log.debug("store extern alert content: {}.", alert);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ public void run(String... args) throws Exception {
.preset(true)
.times(1)
.enable(true)
.recoverNotice(true)
.priority(CommonConstants.ALERT_PRIORITY_CODE_EMERGENCY)
.template("${app} monitoring availability alert, code is ${code}")
.build();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
import com.fasterxml.jackson.core.type.TypeReference;
import com.google.gson.Gson;
import lombok.extern.slf4j.Slf4j;
import org.dromara.hertzbeat.alert.calculate.CalculateAlarm;
import org.dromara.hertzbeat.alert.dao.AlertDefineBindDao;
import org.dromara.hertzbeat.common.constants.CommonConstants;
import org.dromara.hertzbeat.common.entity.job.Configmap;
Expand All @@ -33,6 +32,7 @@
import org.dromara.hertzbeat.common.entity.manager.ParamDefine;
import org.dromara.hertzbeat.common.entity.manager.Tag;
import org.dromara.hertzbeat.common.entity.message.CollectRep;
import org.dromara.hertzbeat.common.support.event.MonitorDeletedEvent;
import org.dromara.hertzbeat.common.util.*;
import org.dromara.hertzbeat.manager.dao.CollectorDao;
import org.dromara.hertzbeat.manager.dao.CollectorMonitorBindDao;
Expand All @@ -50,6 +50,7 @@
import org.dromara.hertzbeat.manager.support.exception.MonitorDetectException;
import org.dromara.hertzbeat.manager.support.exception.MonitorMetricsException;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.ApplicationContext;
import org.springframework.data.domain.Page;
import org.springframework.data.domain.PageRequest;
import org.springframework.data.jpa.domain.Specification;
Expand Down Expand Up @@ -112,9 +113,9 @@ public class MonitorServiceImpl implements MonitorService {

@Autowired
private TagMonitorBindDao tagMonitorBindDao;

@Autowired
private CalculateAlarm calculateAlarm;
private ApplicationContext applicationContext;

private final Map<String, ImExportService> imExportServiceMap = new HashMap<>();

Expand Down Expand Up @@ -545,7 +546,6 @@ public void modifyMonitor(Monitor monitor, List<Param> params, String collector)
if (params != null) {
paramDao.saveAll(params);
}
calculateAlarm.triggeredAlertMap.remove(String.valueOf(monitorId));
} catch (Exception e) {
log.error(e.getMessage(), e);
// Repository brushing abnormally cancels the previously delivered task
Expand All @@ -568,7 +568,7 @@ public void deleteMonitor(long id) throws RuntimeException {
tagMonitorBindDao.deleteTagMonitorBindsByMonitorId(id);
alertDefineBindDao.deleteAlertDefineMonitorBindsByMonitorIdEquals(id);
collectJobScheduling.cancelAsyncCollectJob(monitor.getJobId());
calculateAlarm.triggeredAlertMap.remove(String.valueOf(monitor.getId()));
applicationContext.publishEvent(new MonitorDeletedEvent(applicationContext, monitor.getId()));
}
}

Expand All @@ -586,7 +586,7 @@ public void deleteMonitors(Set<Long> ids) throws RuntimeException {
// delete tag 删除监控对应的标签
tagService.deleteMonitorSystemTags(monitor);
collectJobScheduling.cancelAsyncCollectJob(monitor.getJobId());
calculateAlarm.triggeredAlertMap.remove(String.valueOf(monitor.getId()));
applicationContext.publishEvent(new MonitorDeletedEvent(applicationContext, monitor.getId()));
}
}
}
Expand Down Expand Up @@ -674,7 +674,7 @@ public void enableManageMonitors(HashSet<Long> ids) {
// Issue collection tasks 下发采集任务
long newJobId = collectJobScheduling.addAsyncCollectJob(appDefine);
monitor.setJobId(newJobId);
calculateAlarm.triggeredAlertMap.remove(String.valueOf(monitor.getId()));
applicationContext.publishEvent(new MonitorDeletedEvent(applicationContext, monitor.getId()));
}
monitorDao.saveAll(unManagedMonitors);
}
Expand Down Expand Up @@ -773,7 +773,6 @@ public void updateAppCollectJob(Job job) {
// 下发采集任务
long newJobId = collectJobScheduling.addAsyncCollectJob(appDefine);
monitor.setJobId(newJobId);
calculateAlarm.triggeredAlertMap.remove(String.valueOf(monitor.getId()));
monitorDao.save(monitor);
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
package org.dromara.hertzbeat.manager.service;

import org.dromara.hertzbeat.alert.calculate.CalculateAlarm;
import org.dromara.hertzbeat.alert.dao.AlertDefineBindDao;
import org.dromara.hertzbeat.common.entity.alerter.Alert;
import org.dromara.hertzbeat.common.entity.job.Job;
Expand Down Expand Up @@ -32,6 +31,7 @@
import org.mockito.InjectMocks;
import org.mockito.Mock;
import org.mockito.junit.jupiter.MockitoExtension;
import org.springframework.context.ApplicationContext;
import org.springframework.data.domain.Page;
import org.springframework.data.domain.PageRequest;
import org.springframework.data.jpa.domain.Specification;
Expand Down Expand Up @@ -96,7 +96,7 @@ class MonitorServiceTest {
private CollectorMonitorBindDao collectorMonitorBindDao;

@Mock
private CalculateAlarm calculateAlarm;
private ApplicationContext applicationContext;

@Mock
Map<String, Alert> triggeredAlertMap = spy(new HashMap<>());
Expand All @@ -105,9 +105,7 @@ class MonitorServiceTest {
* 属性无法直接mock,测试执行前-手动赋值
*/
@BeforeEach
public void setUp() {
calculateAlarm.triggeredAlertMap = triggeredAlertMap;
}
public void setUp() {}

@Test
void detectMonitorEmpty() {
Expand Down
31 changes: 16 additions & 15 deletions script/sql/schema.sql
Original file line number Diff line number Diff line change
Expand Up @@ -125,21 +125,22 @@ CREATE TABLE hzb_tag_monitor_bind
DROP TABLE IF EXISTS hzb_alert_define ;
CREATE TABLE hzb_alert_define
(
id bigint not null auto_increment comment '告警定义ID',
app varchar(100) not null comment '配置告警的监控类型:linux,mysql,jvm...',
metric varchar(100) not null comment '配置告警的指标集合:cpu,memory,info...',
field varchar(100) comment '配置告警的指标:usage,cores...',
preset boolean not null default false comment '是否是全局默认告警,是则所有此类型监控默认关联此告警',
expr varchar(255) comment '告警触发条件表达式',
priority tinyint not null default 0 comment '告警级别 0:高-emergency-紧急告警-红色 1:中-critical-严重告警-橙色 2:低-warning-警告告警-黄色',
times int not null default 1 comment '触发次数,即达到触发阈值次数要求后才算触发告警',
tags varchar(4000) comment '附加告警标签(status:success,env:prod)',
enable boolean not null default true comment '告警阈值开关',
template varchar(255) not null comment '告警通知模板内容',
creator varchar(100) comment '创建者',
modifier varchar(100) comment '最新修改者',
gmt_create timestamp default current_timestamp comment 'create time',
gmt_update datetime default current_timestamp on update current_timestamp comment 'update time',
id bigint not null auto_increment comment '告警定义ID',
app varchar(100) not null comment '配置告警的监控类型:linux,mysql,jvm...',
metric varchar(100) not null comment '配置告警的指标集合:cpu,memory,info...',
field varchar(100) comment '配置告警的指标:usage,cores...',
preset boolean not null default false comment '是否是全局默认告警,是则所有此类型监控默认关联此告警',
expr varchar(255) comment '告警触发条件表达式',
priority tinyint not null default 0 comment '告警级别 0:高-emergency-紧急告警-红色 1:中-critical-严重告警-橙色 2:低-warning-警告告警-黄色',
times int not null default 1 comment '触发次数,即达到触发阈值次数要求后才算触发告警',
tags varchar(4000) comment '附加告警标签(status:success,env:prod)',
enable boolean not null default true comment '告警阈值开关',
template varchar(255) not null comment '告警通知模板内容',
recover_notice boolean not null default false comment 'Is send alarm recovered notice | 是否发送告警恢复通知',
creator varchar(100) comment '创建者',
modifier varchar(100) comment '最新修改者',
gmt_create timestamp default current_timestamp comment 'create time',
gmt_update datetime default current_timestamp on update current_timestamp comment 'update time',
primary key (id)
) ENGINE = InnoDB DEFAULT CHARSET=utf8mb4;

Expand Down
1 change: 1 addition & 0 deletions web-app/src/app/pojo/AlertDefine.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ export class AlertDefine {
times: number = 3;
tags!: TagItem[];
enable: boolean = true;
recoverNotice: boolean = false;
template!: string;
creator!: string;
modifier!: string;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -449,6 +449,19 @@
<nz-switch [(ngModel)]="define.preset" name="preset" id="preset"></nz-switch>
</nz-form-control>
</nz-form-item>
<nz-form-item>
<nz-form-label nzSpan="7" nzFor="recoverNotice" [nzTooltipTitle]="'alert.setting.recover-notice.tip' | i18n">
{{ 'alert.setting.recover-notice' | i18n }}
</nz-form-label>
<nz-form-control nzSpan="12">
<nz-switch
[(ngModel)]="define.recoverNotice"
[ngModelOptions]="{ standalone: true }"
name="recoverNotice"
id="recoverNotice"
></nz-switch>
</nz-form-control>
</nz-form-item>
<nz-form-item>
<nz-form-label nzSpan="7" nzRequired="true" nzFor="enable" [nzTooltipTitle]="'alert.setting.enable.tip' | i18n">
{{ 'alert.setting.enable' | i18n }}
Expand Down
2 changes: 2 additions & 0 deletions web-app/src/assets/i18n/en-US.json
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,8 @@
"alert.setting.default.tip": "Whether this alarm threshold configuration applies to all this type of monitoring globally",
"alert.setting.enable": "Enable Alert",
"alert.setting.enable.tip": "This alarm threshold configuration is enabled or disabled",
"alert.setting.recover-notice": "Recover Notice",
"alert.setting.recover-notice.tip": "Whether to send the corresponding recovered notification when the alarm is resolved under this threshold rule",
"alert.setting.connect": "Alert Associate Monitors",
"alert.setting.connect.left": "No Associate",
"alert.setting.connect.right": "Associated",
Expand Down
2 changes: 2 additions & 0 deletions web-app/src/assets/i18n/zh-CN.json
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,8 @@
"alert.setting.default.tip": "此告警阈值配置是否应用于全局所有此类型监控",
"alert.setting.enable": "启用告警",
"alert.setting.enable.tip": "此告警阈值配置开启生效或关闭",
"alert.setting.recover-notice": "恢复通知",
"alert.setting.recover-notice.tip": "是否在此阈值规则下告警恢复时发送对应的恢复通知",
"alert.setting.connect": "告警定义关联监控",
"alert.setting.connect.left": "未关联监控",
"alert.setting.connect.right": "已关联监控",
Expand Down
2 changes: 2 additions & 0 deletions web-app/src/assets/i18n/zh-TW.json
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,8 @@
"alert.setting.default.tip": "此告警阈值配置是否應用于全局所有此類型監控",
"alert.setting.enable": "啓用告警",
"alert.setting.enable.tip": "此告警阈值配置開啓生效或關閉",
"alert.setting.recover-notice": "恢復通知",
"alert.setting.recover-notice.tip": "是否在此閾值規則下告警恢復時發送對應的恢復通知",
"alert.setting.connect": "告警定義關聯監控",
"alert.setting.connect.left": "未關聯監控",
"alert.setting.connect.right": "已關聯監控",
Expand Down
Loading