Skip to content

Commit

Permalink
Don't break allocation if resize source index is missing (#29311)
Browse files Browse the repository at this point in the history
DiskThresholdDecider currently assumes that the source index of a resize operation (e.g. shrink)
is available, and throws an IndexNotFoundException otherwise, thereby breaking any kind of shard
allocation. This can be quite harmful if the source index is deleted during a shrink, or if the source
index is unavailable during state recovery.

While this behavior has been partly fixed in 6.1 and above (due to #26931), it relies on the order in
which AllocationDeciders are executed (i.e. that ResizeAllocationDecider returns NO, ensuring that
DiskThresholdDecider does not run, something that for example does not hold for the allocation
explain API).

This change adds a more complete fix, and also solves the situation for 5.6.
  • Loading branch information
ywelsch authored Apr 3, 2018
1 parent 989e465 commit 2dc546c
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -409,11 +409,14 @@ public static long getExpectedShardSize(ShardRouting shard, RoutingAllocation al
// the worst case
long targetShardSize = 0;
final Index mergeSourceIndex = metaData.getResizeSourceIndex();
final IndexMetaData sourceIndexMeta = allocation.metaData().getIndexSafe(mergeSourceIndex);
final Set<ShardId> shardIds = IndexMetaData.selectRecoverFromShards(shard.id(), sourceIndexMeta, metaData.getNumberOfShards());
for (IndexShardRoutingTable shardRoutingTable : allocation.routingTable().index(mergeSourceIndex.getName())) {
if (shardIds.contains(shardRoutingTable.shardId())) {
targetShardSize += info.getShardSize(shardRoutingTable.primaryShard(), 0);
final IndexMetaData sourceIndexMeta = allocation.metaData().index(mergeSourceIndex);
if (sourceIndexMeta != null) {
final Set<ShardId> shardIds = IndexMetaData.selectRecoverFromShards(shard.id(),
sourceIndexMeta, metaData.getNumberOfShards());
for (IndexShardRoutingTable shardRoutingTable : allocation.routingTable().index(mergeSourceIndex.getName())) {
if (shardIds.contains(shardRoutingTable.shardId())) {
targetShardSize += info.getShardSize(shardRoutingTable.primaryShard(), 0);
}
}
}
return targetShardSize == 0 ? defaultValue : targetShardSize;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,20 @@ public void testSizeShrinkIndex() {
target2 = ShardRouting.newUnassigned(new ShardId(new Index("target2", "9101112"), 1),
true, LocalShardsRecoverySource.INSTANCE, new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, "foo"));
assertEquals(1000L, DiskThresholdDecider.getExpectedShardSize(target2, allocation, 0));

// check that the DiskThresholdDecider still works even if the source index has been deleted
ClusterState clusterStateWithMissingSourceIndex = ClusterState.builder(clusterState)
.metaData(MetaData.builder(metaData).remove("test"))
.routingTable(RoutingTable.builder(clusterState.routingTable()).remove("test").build())
.build();

allocationService.reroute(clusterState, "foo");

RoutingAllocation allocationWithMissingSourceIndex = new RoutingAllocation(null,
clusterStateWithMissingSourceIndex.getRoutingNodes(), clusterStateWithMissingSourceIndex, info, 0);

assertEquals(42L, DiskThresholdDecider.getExpectedShardSize(target, allocationWithMissingSourceIndex, 42L));
assertEquals(42L, DiskThresholdDecider.getExpectedShardSize(target2, allocationWithMissingSourceIndex, 42L));
}

}

0 comments on commit 2dc546c

Please sign in to comment.