Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Onecond 2342-Collect list of Conductor alerts in Splunk #1014

Open
wants to merge 10 commits into
base: release-v2.67.0
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ private void start() throws Exception {
jobs.add(new EventPubsJob(dataSource));
jobs.add(new WorkflowJob(dataSource));
jobs.add(new DbLogJob(dataSource));
jobs.add(new AlertsJob(dataSource));

// Run jobs in threads
CountDownLatch countDown = new CountDownLatch(jobs.size());
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
package com.netflix.conductor.archiver.job;

import com.netflix.conductor.archiver.config.AppConfig;
import com.zaxxer.hikari.HikariDataSource;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

import java.sql.Timestamp;
import java.util.List;
import java.util.concurrent.TimeUnit;

import static org.apache.commons.collections.CollectionUtils.isNotEmpty;

public class AlertsJob extends AbstractJob {
private static final Logger logger = LogManager.getLogger(AlertsJob.class);

public AlertsJob(HikariDataSource dataSource) {
super(dataSource);
}

@Override
public void cleanup() {
logger.info("Starting alerts cleanup job");
try {
AppConfig config = AppConfig.getInstance();
int batchSize = config.batchSize();

// Calculate the start of the current day
Timestamp startOfToday = new Timestamp(System.currentTimeMillis() - TimeUnit.MILLISECONDS.toMillis(System.currentTimeMillis() % TimeUnit.DAYS.toMillis(1)));
logger.info("Deleting records earlier than " + startOfToday + ", batch size = " + batchSize);

int deleted = 0;
List<Long> ids = fetchIds("SELECT id FROM alerts WHERE timestamp < ? LIMIT ?", startOfToday, batchSize);
while (isNotEmpty(ids)) {
deleted += deleteByIds("alerts", ids);
logger.debug("Alerts cleanup job deleted " + deleted + " records");

ids = fetchIds("SELECT id FROM alerts WHERE timestamp < ? LIMIT ?", startOfToday, batchSize);
}
logger.info("Finished alerts cleanup job. Total records deleted: " + deleted);
} catch (Exception ex) {
logger.error("Alerts cleanup job failed: " + ex.getMessage(), ex);
}
}
}
47 changes: 47 additions & 0 deletions common/src/main/java/com/netflix/conductor/common/run/Alert.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
/**
* Copyright 2016 Netflix, Inc.
* <p>
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* <p>
* http://www.apache.org/licenses/LICENSE-2.0
* <p>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.netflix.conductor.common.run;

public class Alert {

private String message;
private int alertLookUpId;
private int isProcessed;

public String getMessage() {
return message;
}

public void setMessage(String message) {
this.message = message;
}

public int getAlertLookUpId() {
return alertLookUpId;
}

public void setAlertLookUpId(int alertLookUpId) {
this.alertLookUpId = alertLookUpId;
}

public int getIsProcessed() {
return isProcessed;
}

public void setIsProcessed(int isProcessed) {
this.isProcessed = isProcessed;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
/**
* Copyright 2016 Netflix, Inc.
* <p>
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* <p>
* http://www.apache.org/licenses/LICENSE-2.0
* <p>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.netflix.conductor.common.run;

public class AlertRegistry {

private int id;
private String lookup;
private String generalMessage;
private int alertCount;

public int getId() {
return id;
}

public void setId(int id) {
this.id = id;
}

public String getLookup() {
return lookup;
}

public void setLookup(String lookup) {
this.lookup = lookup;
}

public String getGeneralMessage() {
return generalMessage;
}

public void setGeneralMessage(String generalMessage) {
this.generalMessage = generalMessage;
}

public int getAlertCount() {
return alertCount;
}

public void setAlertCount(int alertCount) {
this.alertCount = alertCount;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import com.netflix.conductor.core.events.EventProcessor;
import com.netflix.conductor.core.events.queue.dyno.DynoEventQueueProvider;
import com.netflix.conductor.core.execution.WorkflowSweeper;
import com.netflix.conductor.core.execution.alerts.AlertScheduler;
import com.netflix.conductor.core.execution.appconfig.cache.AppConfig;
import com.netflix.conductor.core.execution.appconfig.cache.PriorityConfig;
import com.netflix.conductor.core.execution.batch.BatchSweeper;
Expand Down Expand Up @@ -64,6 +65,7 @@ protected void configure() {
bind(SetVariable.class).asEagerSingleton();
bind(AppConfig.class).asEagerSingleton();
bind(PriorityConfig.class).asEagerSingleton();
bind(AlertScheduler.class).asEagerSingleton();
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,12 @@
import com.netflix.conductor.common.metadata.events.EventExecution.Status;
import com.netflix.conductor.common.metadata.events.EventHandler;
import com.netflix.conductor.common.metadata.events.EventHandler.Action;
import com.netflix.conductor.common.run.Alert;
import com.netflix.conductor.core.config.Configuration;
import com.netflix.conductor.core.events.queue.Message;
import com.netflix.conductor.core.events.queue.ObservableQueue;
import com.netflix.conductor.core.execution.ParametersUtils;
import com.netflix.conductor.dao.ExecutionDAO;
import com.netflix.conductor.service.ExecutionService;
import com.netflix.conductor.service.MetadataService;
import com.netflix.conductor.service.MetricService;
Expand Down Expand Up @@ -61,13 +63,16 @@ public class EventProcessor {
private ExecutionService es;
private ActionProcessor ap;
private ObjectMapper om;
private ExecutionDAO edao;


@Inject
public EventProcessor(ExecutionService es, MetadataService ms, ActionProcessor ap, Configuration config, ObjectMapper om) {
public EventProcessor(ExecutionService es, MetadataService ms, ActionProcessor ap, Configuration config, ObjectMapper om, ExecutionDAO edao) {
this.es = es;
this.ms = ms;
this.ap = ap;
this.om = om;
this.edao = edao;

boolean disabled = Boolean.parseBoolean(config.getProperty("workflow.event.processor.disabled", "false"));
if (!disabled) {
Expand Down Expand Up @@ -492,6 +497,9 @@ public void validateHandlerConditions(EventHandler handler) {
evalCondition(condition, conditionClass, payloadObj);
} catch (Exception ex) {
logger.error(handler.getName() + " event handler condition validation failed " + ex.getMessage(), ex);
Alert alert = new Alert();
alert.setMessage("event handler condition validation failed");
edao.addAlert(alert);
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
package com.netflix.conductor.core.execution.alerts;

import com.fasterxml.jackson.databind.ObjectMapper;
import com.netflix.conductor.common.run.AlertRegistry;
import com.netflix.conductor.core.DNSLookup;
import com.netflix.conductor.core.config.Configuration;
import com.netflix.conductor.dao.ExecutionDAO;
import com.sun.jersey.api.client.Client;
import com.sun.jersey.api.client.ClientResponse;
import com.sun.jersey.api.client.WebResource;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.inject.Inject;
import javax.ws.rs.core.MediaType;
import java.util.HashMap;
import java.util.Map;
import java.util.Optional;


public class AlertProcessor {
private static final Logger logger = LoggerFactory.getLogger(AlertProcessor.class);

private ExecutionDAO edao;

private final String notficationUrl;

private final String environment;

public static final String PROPERTY_URL = "conductor.notify.url";


private static final ObjectMapper mapper = new ObjectMapper();


@Inject
public AlertProcessor(ExecutionDAO edao, Configuration config) {
this.edao = edao;
notficationUrl = config.getProperty(PROPERTY_URL, null);
environment = config.getProperty("TLD", "local");
}

public void processAlerts() {
Map<Integer, Integer> groupedAlerts = edao.getGroupedAlerts();

groupedAlerts.forEach((alertLookupId, alertCount) -> {
AlertRegistry alertRegistry = edao.getAlertRegistryFromLookupId(alertLookupId);
if (alertRegistry != null && alertCount > alertRegistry.getAlertCount()) {
logger.info("Alert threshold exceeded for lookup ID: {}. Count: {}, Threshold: {}",
alertLookupId, alertCount, alertRegistry.getAlertCount());
try {
notifyService(alertLookupId, alertCount, alertRegistry.getGeneralMessage());
edao.markAlertsAsProcessed(alertLookupId);
} catch (Exception e) {
e.printStackTrace();
}
}
});
}

private void notifyService(Integer alertLookupId, int alertCount, String message) throws Exception {
String serviceDiscoveryQuery = "notify.service."+ Optional.ofNullable(System.getenv("TLD")).orElse("default");
String uri = "/v1/notify";
String method = "POST";
String contentType = MediaType.APPLICATION_JSON;

Map<String, Object> body = new HashMap<>();
body.put("message", "");
Map<String, Object> outputs = new HashMap<>();

Map<String, Object> slack = new HashMap<>();
slack.put("username", "conductor:"+environment);
slack.put("channel", "#conductor-"+environment);
slack.put("text", message);
outputs.put("slack", slack);

body.put("outputs", outputs);

String response = post(
uri,
method,
contentType,
body,
logger,
serviceDiscoveryQuery
);
}



public String post(
String uri,
String method,
String contentType,
Object body,
Logger logger,
String serviceDiscoveryQuery) throws Exception {

String hostAndPort = DNSLookup.lookup(serviceDiscoveryQuery);
String url = (StringUtils.isEmpty(hostAndPort) ? this.notficationUrl : hostAndPort) + uri;

Client client = Client.create();
WebResource.Builder webResource = client.resource(url)
.type(contentType != null ? contentType : MediaType.APPLICATION_JSON);

ClientResponse response;
if ("POST".equalsIgnoreCase(method)) {
response = webResource.post(ClientResponse.class, mapper.writeValueAsString(body));
} else if ("PUT".equalsIgnoreCase(method)) {
response = webResource.put(ClientResponse.class, mapper.writeValueAsString(body));
} else if ("DELETE".equalsIgnoreCase(method)) {
response = webResource.delete(ClientResponse.class);
} else {
response = webResource.get(ClientResponse.class);
}

String entity = response.getEntity(String.class);
if (response.getStatus() >= 200 && response.getStatus() < 300) {
return entity;
} else {
logger.error("HTTP Error {}: {}", response.getStatus(), entity);
throw new Exception("HTTP Error " + response.getStatus() + ": " + entity);
}
}
}

Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
package com.netflix.conductor.core.execution.alerts;

import com.netflix.conductor.core.config.Configuration;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.inject.Inject;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;

public class AlertScheduler {
private static final Logger logger = LoggerFactory.getLogger(AlertScheduler.class);

private final ScheduledExecutorService scheduler = Executors.newScheduledThreadPool(1);
private final AlertProcessor alertProcessor;
private final Configuration config;

@Inject
public AlertScheduler(AlertProcessor alertProcessor, Configuration config) {
this.config = config;
int alertInitDelay = config.getIntProperty("alert.init.delay", 0);
int alertFrequency = config.getIntProperty("alert.frequency", 10);
this.alertProcessor = alertProcessor;
scheduler.scheduleAtFixedRate(() -> {
try {
alertProcessor.processAlerts();
} catch (Exception e) {
logger.error("Error processing alerts", e);
}
}, alertInitDelay, alertFrequency, TimeUnit.MINUTES);
}


public void stop() {
logger.info("Stopping AlertScheduler...");
scheduler.shutdown();
}
}
Loading
Loading