Skip to content

Commit

Permalink
Watcher: Ensure TriggerEngine start replaces existing watches (#33157)
Browse files Browse the repository at this point in the history
This commit ensures that when `TriggerService.start()` is called,
we ensure in the trigger engine implementations that current watches are
removed instead of adding to the existing ones in
`TickerScheduleTriggerEngine.start()`

Two additional minor fixes, where the result remains the same but less code gets executed.

1. If the node is not a data node, we forgot to set the status to
STARTING when watcher is being started. This should not be a big issue,
because a non-data node does not spent a lot of time loading as there
are no watches which need loading.
2. If a new cluster state came in during a reload, we had two checks in
place to abort loading the current one. The first one before we load all
the watches of the local node and the second before watcher is starting
with those new watches. Turned out that the first check was not
returning, which meant we always tried to load all the watches, and then
would fail on the second check. This has been fixed here.
  • Loading branch information
spinscale committed Aug 30, 2018
1 parent 6a699ad commit e5f8a22
Show file tree
Hide file tree
Showing 4 changed files with 51 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ public void clusterChanged(ClusterChangedEvent event) {
// if this is not a data node, we need to start it ourselves possibly
if (event.state().nodes().getLocalNode().isDataNode() == false &&
isWatcherStoppedManually == false && this.state.get() == WatcherState.STOPPED) {
this.state.set(WatcherState.STARTING);
watcherService.start(event.state(), () -> this.state.set(WatcherState.STARTED));
return;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -183,9 +183,6 @@ void reload(ClusterState state, String reason) {
// by checking the cluster state version before and after loading the watches we can potentially just exit without applying the
// changes
processedClusterStateVersion.set(state.getVersion());
triggerService.pauseExecution();
int cancelledTaskCount = executionService.clearExecutionsAndQueue();
logger.info("reloading watcher, reason [{}], cancelled [{}] queued tasks", reason, cancelledTaskCount);

executor.execute(wrapWatcherService(() -> reloadInner(state, reason, false),
e -> logger.error("error reloading watcher", e)));
Expand Down Expand Up @@ -221,6 +218,7 @@ private synchronized boolean reloadInner(ClusterState state, String reason, bool
if (processedClusterStateVersion.get() != state.getVersion()) {
logger.debug("watch service has not been reloaded for state [{}], another reload for state [{}] in progress",
state.getVersion(), processedClusterStateVersion.get());
return false;
}

Collection<Watch> watches = loadWatches(state);
Expand All @@ -231,7 +229,13 @@ private synchronized boolean reloadInner(ClusterState state, String reason, bool

// if we had another state coming in the meantime, we will not start the trigger engines with these watches, but wait
// until the others are loaded
// also this is the place where we pause the trigger service execution and clear the current execution service, so that we make sure
// that existing executions finish, but no new ones are executed
if (processedClusterStateVersion.get() == state.getVersion()) {
triggerService.pauseExecution();
int cancelledTaskCount = executionService.clearExecutionsAndQueue();
logger.info("reloading watcher, reason [{}], cancelled [{}] queued tasks", reason, cancelledTaskCount);

executionService.unPause();
triggerService.start(watches);
if (triggeredWatches.isEmpty() == false) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ public synchronized void start(Collection<Watch> jobs) {
schedules.put(job.id(), new ActiveSchedule(job.id(), trigger.getSchedule(), startTime));
}
}
this.schedules.putAll(schedules);
this.schedules = schedules;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,9 @@
import static org.elasticsearch.xpack.watcher.trigger.schedule.Schedules.daily;
import static org.elasticsearch.xpack.watcher.trigger.schedule.Schedules.interval;
import static org.elasticsearch.xpack.watcher.trigger.schedule.Schedules.weekly;
import static org.hamcrest.Matchers.everyItem;
import static org.hamcrest.Matchers.is;
import static org.hamcrest.Matchers.startsWith;
import static org.joda.time.DateTimeZone.UTC;
import static org.mockito.Mockito.mock;

Expand All @@ -50,8 +52,12 @@ public void init() throws Exception {
}

private TriggerEngine createEngine() {
return new TickerScheduleTriggerEngine(Settings.EMPTY,
mock(ScheduleRegistry.class), clock);
Settings settings = Settings.EMPTY;
// having a low value here speeds up the tests tremendously, we still want to run with the defaults every now and then
if (usually()) {
settings = Settings.builder().put(TickerScheduleTriggerEngine.TICKER_INTERVAL_SETTING.getKey(), "10ms").build();
}
return new TickerScheduleTriggerEngine(settings, mock(ScheduleRegistry.class), clock);
}

private void advanceClockIfNeeded(DateTime newCurrentDateTime) {
Expand Down Expand Up @@ -104,6 +110,40 @@ public void accept(Iterable<TriggerEvent> events) {
assertThat(bits.cardinality(), is(count));
}

public void testStartClearsExistingSchedules() throws Exception {
final CountDownLatch latch = new CountDownLatch(1);
List<String> firedWatchIds = new ArrayList<>();
engine.register(new Consumer<Iterable<TriggerEvent>>() {
@Override
public void accept(Iterable<TriggerEvent> events) {
for (TriggerEvent event : events) {
firedWatchIds.add(event.jobName());
}
latch.countDown();
}
});

int count = randomIntBetween(2, 5);
List<Watch> watches = new ArrayList<>();
for (int i = 0; i < count; i++) {
watches.add(createWatch(String.valueOf(i), interval("1s")));
}
engine.start(watches);

watches.clear();
for (int i = 0; i < count; i++) {
watches.add(createWatch("another_id" + i, interval("1s")));
}
engine.start(watches);

advanceClockIfNeeded(new DateTime(clock.millis(), UTC).plusMillis(1100));
if (!latch.await(3 * count, TimeUnit.SECONDS)) {
fail("waiting too long for all watches to be triggered");
}

assertThat(firedWatchIds, everyItem(startsWith("another_id")));
}

public void testAddHourly() throws Exception {
final String name = "job_name";
final CountDownLatch latch = new CountDownLatch(1);
Expand Down

0 comments on commit e5f8a22

Please sign in to comment.