Skip to content

Commit

Permalink
bugfix monitor status can not auto update, remove status UN_REACHABLE (
Browse files Browse the repository at this point in the history
  • Loading branch information
tomsun28 authored May 12, 2023
1 parent bc6cebf commit 794e5e9
Show file tree
Hide file tree
Showing 10 changed files with 32 additions and 75 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ public class CalculateAlarm {
* key - monitorId 为监控状态可用性可达性告警 | Indicates the monitoring status availability reachability alarm
*/
public Map<String, Alert> triggeredAlertMap;

public Set<Long> unAvailableMonitors;

private final AlerterWorkerPool workerPool;
private final CommonDataQueue dataQueue;
Expand All @@ -77,21 +79,13 @@ public CalculateAlarm (AlerterWorkerPool workerPool, CommonDataQueue dataQueue,
this.alerterProperties = alerterProperties;
this.bundle = ResourceBundleUtil.getBundle("alerter");
this.triggeredAlertMap = new ConcurrentHashMap<>(128);
this.unAvailableMonitors = Collections.synchronizedSet(new HashSet<>(16));
// Initialize stateAlertMap
// 初始化stateAlertMap
List<Monitor> monitors = monitorDao.findMonitorsByStatusIn(Arrays.asList(CommonConstants.UN_AVAILABLE_CODE,
CommonConstants.UN_REACHABLE_CODE));
List<Monitor> monitors = monitorDao.findMonitorsByStatus(CommonConstants.UN_AVAILABLE_CODE);
if (monitors != null) {
for (Monitor monitor : monitors) {
Alert.AlertBuilder alertBuilder = Alert.builder()
.priority(CommonConstants.ALERT_PRIORITY_CODE_EMERGENCY)
.status(CommonConstants.ALERT_STATUS_CODE_PENDING)
.target(CommonConstants.AVAILABILITY)
.firstTriggerTime(System.currentTimeMillis())
.lastTriggerTime(System.currentTimeMillis())
.nextEvalInterval(0L)
.times(0);
this.triggeredAlertMap.put(String.valueOf(monitor.getId()), alertBuilder.build());
this.unAvailableMonitors.add(monitor.getId());
}
}
startCalculate();
Expand Down Expand Up @@ -252,17 +246,14 @@ private void handlerAvailableMetrics(long monitorId, String app, String metrics,
} else {
// Check whether an availability or unreachable alarm is generated before the association monitoring, and send a clear alarm to clear the monitoring status
// 判断关联监控之前是否有可用性或者不可达告警,发送恢复告警进行监控状态恢复
Alert preAlert = triggeredAlertMap.remove(String.valueOf(monitorId));
if (preAlert != null && preAlert.getStatus() == CommonConstants.ALERT_STATUS_CODE_PENDING) {
// Sending an alarm cleared
// 发送告警恢复
triggeredAlertMap.remove(String.valueOf(monitorId));
boolean isRestartUnavailable = unAvailableMonitors.remove(monitorId);
if (isRestartUnavailable) {
// Sending an alarm Restore
Map<String, String> tags = new HashMap<>(6);
tags.put(CommonConstants.TAG_MONITOR_ID, String.valueOf(monitorId));
tags.put(CommonConstants.TAG_MONITOR_APP, app);
String content = this.bundle.getString("alerter.availability.resolved");
if (CommonConstants.REACHABLE.equals(preAlert.getTarget())) {
content = this.bundle.getString("alerter.reachability.resolved");
}
long currentTimeMilli = System.currentTimeMillis();
Alert resumeAlert = Alert.builder()
.tags(tags)
Expand All @@ -284,7 +275,6 @@ private void handlerMonitorAvailableAlert(long monitorId, String app, CollectRep
if (avaAlertDefine == null) {
return;
}
String monitorKey = String.valueOf(monitorId);
Alert preAlert = triggeredAlertMap.get(String.valueOf(monitorId));
long currentTimeMill = System.currentTimeMillis();
Map<String, String> tags = new HashMap<>(6);
Expand All @@ -307,10 +297,11 @@ private void handlerMonitorAvailableAlert(long monitorId, String app, CollectRep
.times(1);
if (avaAlertDefine.getTimes() == null || avaAlertDefine.getTimes() <= 1) {
silenceAlarm.filterSilenceAndSendData(alertBuilder.build().clone());
unAvailableMonitors.add(monitorId);
} else {
alertBuilder.status(CommonConstants.ALERT_STATUS_CODE_NOT_REACH);
}
triggeredAlertMap.put(monitorKey, alertBuilder.build());
triggeredAlertMap.put(String.valueOf(monitorId), alertBuilder.build());
} else {
int times = preAlert.getTimes() + 1;
if (preAlert.getStatus() == CommonConstants.ALERT_STATUS_CODE_PENDING) {
Expand All @@ -324,6 +315,7 @@ private void handlerMonitorAvailableAlert(long monitorId, String app, CollectRep
if (times >= defineTimes) {
preAlert.setStatus(CommonConstants.ALERT_STATUS_CODE_PENDING);
silenceAlarm.filterSilenceAndSendData(preAlert);
unAvailableMonitors.add(monitorId);
} else {
preAlert.setStatus(CommonConstants.ALERT_STATUS_CODE_NOT_REACH);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,5 +36,13 @@ public interface AlertMonitorDao extends JpaRepository<Monitor, Long>, JpaSpecif
* @return Monitor the list | 监控列表
*/
List<Monitor> findMonitorsByStatusIn(List<Byte> status);


/**
* Query the monitoring status of a specified monitoring state | 查询指定监控状态的监控
* @param status 监控状态
* @return Monitor the list | 监控列表
*/
List<Monitor> findMonitorsByStatus(Byte status);

}
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ private Alert buildAlertData(AlertReport alertReport){
.priority(alertReport.getPriority().byteValue())
.status(CommonConstants.ALERT_STATUS_CODE_PENDING)
.tags(alertReport.getLabels())
.target(CommonConstants.AVAILABLE)
.target(alertReport.getAlertName())
.times(3)
.gmtCreate(LocalDateTime.ofInstant(Instant.ofEpochMilli(alertReport.getAlertTime()), ZoneId.systemDefault()))
.build();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -188,17 +188,6 @@ public interface CommonConstants {
*/
String AVAILABILITY = "availability";

/**
* Availability Object
* 可用性对象
*/
String AVAILABLE = "available";

/**
* Reachability Object可达性对象
*/
String REACHABLE = "reachable";

/**
* Parameter Type Number
* 参数类型 数字
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,14 +71,10 @@ public void store(Alert alert) {
return;
}
if (monitor.getStatus() == CommonConstants.AVAILABLE_CODE) {
if (CommonConstants.AVAILABLE.equals(alert.getTarget())) {
if (CommonConstants.AVAILABILITY.equals(alert.getTarget())) {
// Availability Alarm Need to change the monitoring status to unavailable
// 可用性告警 需变更监控状态为不可用
monitorService.updateMonitorStatus(monitor.getId(), CommonConstants.UN_AVAILABLE_CODE);
} else if (CommonConstants.REACHABLE.equals(alert.getTarget())) {
// Reachability alarm The monitoring status needs to be changed to unreachable
// 可达性告警 需变更监控状态为不可达
monitorService.updateMonitorStatus(monitor.getId(), CommonConstants.UN_REACHABLE_CODE);
}
} else {
// If the alarm is restored, the monitoring state needs to be restored
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,4 @@ public AppCount(String app, byte status, Long size) {
* 监控状态不可用的数量
*/
private long unAvailableSize;
/**
* 监控状态不可达的数量
*/
private long unReachableSize;
}
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@
import org.springframework.web.multipart.MultipartFile;

import javax.servlet.http.HttpServletResponse;
import java.io.IOException;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.time.LocalDateTime;
Expand Down Expand Up @@ -450,6 +449,7 @@ public void modifyMonitor(Monitor monitor, List<Param> params) throws RuntimeExc
// Construct the collection task Job entity
// 构造采集任务Job实体
Job appDefine = appService.getAppDefine(monitor.getApp());
appDefine.setId(preMonitor.getJobId());
appDefine.setMonitorId(monitorId);
appDefine.setInterval(monitor.getIntervals());
appDefine.setCyclic(true);
Expand Down Expand Up @@ -613,9 +613,6 @@ public List<AppCount> getAllAppMonitorsCount() {
case CommonConstants.UN_MANAGE_CODE:
appCount.setUnManageSize(appCount.getUnManageSize() + item.getSize());
break;
case CommonConstants.UN_REACHABLE_CODE:
appCount.setUnReachableSize(appCount.getUnReachableSize() + item.getSize());
break;
default:
break;
}
Expand All @@ -624,8 +621,7 @@ public List<AppCount> getAllAppMonitorsCount() {
//Traverse the map obtained by statistics and convert it into a List<App Count> result set
//遍历统计得到的map,转换成List<App Count>结果集
return appCountMap.values().stream().map(item -> {
item.setSize(item.getAvailableSize() + item.getUnManageSize()
+ item.getUnReachableSize() + item.getUnAvailableSize());
item.setSize(item.getAvailableSize() + item.getUnManageSize() + item.getUnAvailableSize());
try {
Job job = appService.getAppDefine(item.getApp());
item.setCategory(job.getCategory());
Expand Down
1 change: 0 additions & 1 deletion web-app/src/app/pojo/AppCount.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,4 @@ export class AppCount {
availableSize: number = 0;
unManageSize: number = 0;
unAvailableSize: number = 0;
unReachableSize: number = 0;
}
30 changes: 8 additions & 22 deletions web-app/src/app/routes/dashboard/dashboard.component.html
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@
<div nz-col nzXs="24" nzSm="12" nzMd="6" class="mb-md hoverCard">
<a [routerLink]="['/monitors']" [queryParams]="{ app: 'website' }">
<div nz-row nzAlign="middle" class="bg-primary rounded-lg">
<div nz-col nzSpan="10" class="p-md text-white">
<div nz-col nzSpan="9" class="p-md text-white">
<div class="h2 mt0 font-weight-bold">{{ appCountService.size }}</div>
<p class="h5 text-nowrap mb0">
<i nz-icon nzType="cloud" nzTheme="outline"></i>
{{ 'monitor.category.service' | i18n }}
</p>
</div>
<div nz-col nzSpan="14">
<div nz-col nzSpan="15">
<nz-tag class="mb-xs">
<span>{{ 'monitor.status.available' | i18n }} </span
><span style="font-weight: bolder">{{ appCountService.availableSize }}</span>
Expand All @@ -18,10 +18,6 @@
<span>{{ 'monitor.status.unavailable' | i18n }} </span
><span style="font-weight: bolder">{{ appCountService.unAvailableSize }}</span>
</nz-tag>
<nz-tag class="mb-xs">
<span>{{ 'monitor.status.unreachable' | i18n }} </span
><span style="font-weight: bolder">{{ appCountService.unReachableSize }}</span>
</nz-tag>
<nz-tag class="mb-xs">
<span>{{ 'monitor.status.un-manage' | i18n }} </span><span style="font-weight: bolder">{{ appCountService.unManageSize }}</span>
</nz-tag>
Expand All @@ -32,23 +28,20 @@
<div nz-col nzXs="24" nzSm="12" nzMd="6" class="mb-md hoverCard">
<a [routerLink]="['/monitors']" [queryParams]="{ app: 'mysql' }">
<div nz-row nzAlign="middle" class="bg-primary rounded-lg">
<div nz-col nzSpan="10" class="p-md text-white">
<div nz-col nzSpan="9" class="p-md text-white">
<div class="h2 mt0 font-weight-bold">{{ appCountDb.size }}</div>
<p class="h5 text-nowrap mb0">
<i nz-icon nzType="database" nzTheme="outline"></i>
{{ 'monitor.category.db' | i18n }}
</p>
</div>
<div nz-col nzSpan="14">
<div nz-col nzSpan="15">
<nz-tag class="mb-xs">
<span>{{ 'monitor.status.available' | i18n }} </span><span style="font-weight: bolder">{{ appCountDb.availableSize }}</span>
</nz-tag>
<nz-tag class="mb-xs">
<span>{{ 'monitor.status.unavailable' | i18n }} </span><span style="font-weight: bolder">{{ appCountDb.unAvailableSize }}</span>
</nz-tag>
<nz-tag class="mb-xs">
<span>{{ 'monitor.status.unreachable' | i18n }} </span><span style="font-weight: bolder">{{ appCountDb.unReachableSize }}</span>
</nz-tag>
<nz-tag class="mb-xs">
<span>{{ 'monitor.status.un-manage' | i18n }} </span><span style="font-weight: bolder">{{ appCountDb.unManageSize }}</span>
</nz-tag>
Expand All @@ -59,23 +52,20 @@
<div nz-col nzXs="24" nzSm="12" nzMd="6" class="mb-md hoverCard">
<a [routerLink]="['/monitors']" [queryParams]="{ app: 'linux' }">
<div nz-row nzAlign="middle" class="bg-primary rounded-lg">
<div nz-col nzSpan="10" class="p-md text-white">
<div nz-col nzSpan="9" class="p-md text-white">
<div class="h2 mt0 font-weight-bold">{{ appCountOs.size }}</div>
<p class="h5 text-nowrap mb0">
<i nz-icon nzType="windows" nzTheme="outline"></i>
{{ 'monitor.category.os' | i18n }}
</p>
</div>
<div nz-col nzSpan="14">
<div nz-col nzSpan="15">
<nz-tag class="mb-xs">
<span>{{ 'monitor.status.available' | i18n }} </span><span style="font-weight: bolder">{{ appCountOs.availableSize }}</span>
</nz-tag>
<nz-tag class="mb-xs">
<span>{{ 'monitor.status.unavailable' | i18n }} </span><span style="font-weight: bolder">{{ appCountOs.unAvailableSize }}</span>
</nz-tag>
<nz-tag class="mb-xs">
<span>{{ 'monitor.status.unreachable' | i18n }} </span><span style="font-weight: bolder">{{ appCountOs.unReachableSize }}</span>
</nz-tag>
<nz-tag class="mb-xs">
<span>{{ 'monitor.status.un-manage' | i18n }} </span><span style="font-weight: bolder">{{ appCountOs.unManageSize }}</span>
</nz-tag>
Expand All @@ -86,25 +76,21 @@
<div nz-col nzXs="24" nzSm="12" nzMd="6" class="mb-md hoverCard">
<a [routerLink]="['/monitors']" [queryParams]="{ app: 'kafka' }">
<div nz-row nzAlign="middle" class="bg-primary rounded-lg">
<div nz-col nzSpan="10" class="p-md text-white">
<div nz-col nzSpan="9" class="p-md text-white">
<div class="h2 mt0 font-weight-bold">{{ appCountMid.size }}</div>
<p class="h5 text-nowrap mb0">
<i nz-icon nzType="merge-cells" nzTheme="outline"></i>
{{ 'monitor.category.mid' | i18n }}
</p>
</div>
<div nz-col nzSpan="14">
<div nz-col nzSpan="15">
<nz-tag class="mb-xs">
<span>{{ 'monitor.status.available' | i18n }} </span><span style="font-weight: bolder">{{ appCountMid.availableSize }}</span>
</nz-tag>
<nz-tag class="mb-xs">
<span>{{ 'monitor.status.unavailable' | i18n }} </span
><span style="font-weight: bolder">{{ appCountMid.unAvailableSize }}</span>
</nz-tag>
<nz-tag class="mb-xs">
<span>{{ 'monitor.status.unreachable' | i18n }} </span
><span style="font-weight: bolder">{{ appCountMid.unReachableSize }}</span>
</nz-tag>
<nz-tag class="mb-xs">
<span>{{ 'monitor.status.un-manage' | i18n }} </span><span style="font-weight: bolder">{{ appCountMid.unManageSize }}</span>
</nz-tag>
Expand Down
5 changes: 0 additions & 5 deletions web-app/src/app/routes/dashboard/dashboard.component.ts
Original file line number Diff line number Diff line change
Expand Up @@ -292,35 +292,30 @@ export class DashboardComponent implements OnInit, OnDestroy {
this.appCountService.availableSize += app.availableSize;
this.appCountService.unAvailableSize += app.unAvailableSize;
this.appCountService.unManageSize += app.unManageSize;
this.appCountService.unReachableSize += app.unReachableSize;
break;
case 'db':
this.appCountDb.size += app.size;
this.appCountDb.availableSize += app.availableSize;
this.appCountDb.unAvailableSize += app.unAvailableSize;
this.appCountDb.unManageSize += app.unManageSize;
this.appCountDb.unReachableSize += app.unReachableSize;
break;
case 'os':
this.appCountOs.size += app.size;
this.appCountOs.availableSize += app.availableSize;
this.appCountOs.unAvailableSize += app.unAvailableSize;
this.appCountOs.unManageSize += app.unManageSize;
this.appCountOs.unReachableSize += app.unReachableSize;
break;
case 'mid':
this.appCountMid.size += app.size;
this.appCountMid.availableSize += app.availableSize;
this.appCountMid.unAvailableSize += app.unAvailableSize;
this.appCountMid.unManageSize += app.unManageSize;
this.appCountMid.unReachableSize += app.unReachableSize;
break;
case 'custom':
this.appCountCustom.size += app.size;
this.appCountCustom.availableSize += app.availableSize;
this.appCountCustom.unAvailableSize += app.unAvailableSize;
this.appCountCustom.unManageSize += app.unManageSize;
this.appCountCustom.unReachableSize += app.unReachableSize;
break;
}
});
Expand Down

0 comments on commit 794e5e9

Please sign in to comment.