Skip to content

Commit

Permalink
Avoid string out of bounds error in snapshot delete (opensearch-proje…
Browse files Browse the repository at this point in the history
…ct#12337)

* Avoid string out of bounds error in snapshot delete

Test failure opensearch-project#8771 shows cases where certain random seeds trigger this
case. The bug is clear: the substring() call should happen after the
startsWith() check in case the blob name is shorter than the prefix
length being used as the start index of the substring call. I don't yet
know if/how this manifests in real deployments.

Signed-off-by: Andrew Ross <andrross@amazon.com>

* Extract common UUID parsing method

Signed-off-by: Andrew Ross <andrross@amazon.com>

---------

Signed-off-by: Andrew Ross <andrross@amazon.com>
  • Loading branch information
andrross authored and Peter Alfonsi committed Mar 1, 2024
1 parent df88b56 commit ba22437
Showing 1 changed file with 45 additions and 47 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1116,10 +1116,7 @@ private void executeStaleShardDelete(
String indexId = fileToDeletePath[1];
String shardId = fileToDeletePath[2];
String shallowSnapBlob = fileToDeletePath[3];
String snapshotUUID = shallowSnapBlob.substring(
SHALLOW_SNAPSHOT_PREFIX.length(),
shallowSnapBlob.length() - ".dat".length()
);
String snapshotUUID = extractShallowSnapshotUUID(shallowSnapBlob).orElseThrow();
BlobContainer shardContainer = blobStore().blobContainer(indicesPath().add(indexId).add(shardId));
RemoteStoreShardShallowCopySnapshot remoteStoreShardShallowCopySnapshot =
REMOTE_STORE_SHARD_SHALLOW_COPY_SNAPSHOT_FORMAT.read(
Expand Down Expand Up @@ -1586,44 +1583,43 @@ private void executeOneStaleIndexDelete(
try {
logger.debug("[{}] Found stale index [{}]. Cleaning it up", metadata.name(), indexSnId);
if (remoteStoreLockManagerFactory != null) {
Map<String, BlobContainer> shardBlobs = indexEntry.getValue().children();
if (!shardBlobs.isEmpty()) {
for (Map.Entry<String, BlobContainer> shardBlob : shardBlobs.entrySet()) {
Map<String, BlobMetadata> shardLevelBlobs = shardBlob.getValue().listBlobs();
for (Map.Entry<String, BlobMetadata> shardLevelBlob : shardLevelBlobs.entrySet()) {
String blob = shardLevelBlob.getKey();
String snapshotUUID = blob.substring(SHALLOW_SNAPSHOT_PREFIX.length(), blob.length() - ".dat".length());
if (blob.startsWith(SHALLOW_SNAPSHOT_PREFIX) && blob.endsWith(".dat")) {
RemoteStoreShardShallowCopySnapshot remoteStoreShardShallowCopySnapshot =
REMOTE_STORE_SHARD_SHALLOW_COPY_SNAPSHOT_FORMAT.read(
shardBlob.getValue(),
snapshotUUID,
namedXContentRegistry
);
String indexUUID = remoteStoreShardShallowCopySnapshot.getIndexUUID();
String remoteStoreRepoForIndex = remoteStoreShardShallowCopySnapshot.getRemoteStoreRepository();
// Releasing lock files before deleting the shallow-snap-UUID file because in case of any failure
// while releasing the lock file, we would still have the corresponding shallow-snap-UUID file
// and that would be used during next delete operation for releasing this stale lock file
RemoteStoreLockManager remoteStoreMetadataLockManager = remoteStoreLockManagerFactory
.newLockManager(remoteStoreRepoForIndex, indexUUID, shardBlob.getKey());
remoteStoreMetadataLockManager.release(
FileLockInfo.getLockInfoBuilder().withAcquirerId(snapshotUUID).build()
final Map<String, BlobContainer> shardBlobs = indexEntry.getValue().children();
for (Map.Entry<String, BlobContainer> shardBlob : shardBlobs.entrySet()) {
for (String blob : shardBlob.getValue().listBlobs().keySet()) {
final Optional<String> snapshotUUID = extractShallowSnapshotUUID(blob);
if (snapshotUUID.isPresent()) {
RemoteStoreShardShallowCopySnapshot remoteStoreShardShallowCopySnapshot =
REMOTE_STORE_SHARD_SHALLOW_COPY_SNAPSHOT_FORMAT.read(
shardBlob.getValue(),
snapshotUUID.get(),
namedXContentRegistry
);
if (!isIndexPresent(clusterService, indexUUID)) {
// this is a temporary solution where snapshot deletion triggers remote store side
// cleanup if index is already deleted. We will add a poller in future to take
// care of remote store side cleanup.
// see https://github.com/opensearch-project/OpenSearch/issues/8469
new RemoteSegmentStoreDirectoryFactory(
remoteStoreLockManagerFactory.getRepositoriesService(),
threadPool
).newDirectory(
remoteStoreRepoForIndex,
indexUUID,
new ShardId(Index.UNKNOWN_INDEX_NAME, indexUUID, Integer.valueOf(shardBlob.getKey()))
).close();
}
String indexUUID = remoteStoreShardShallowCopySnapshot.getIndexUUID();
String remoteStoreRepoForIndex = remoteStoreShardShallowCopySnapshot.getRemoteStoreRepository();
// Releasing lock files before deleting the shallow-snap-UUID file because in case of any failure
// while releasing the lock file, we would still have the corresponding shallow-snap-UUID file
// and that would be used during next delete operation for releasing this stale lock file
RemoteStoreLockManager remoteStoreMetadataLockManager = remoteStoreLockManagerFactory.newLockManager(
remoteStoreRepoForIndex,
indexUUID,
shardBlob.getKey()
);
remoteStoreMetadataLockManager.release(
FileLockInfo.getLockInfoBuilder().withAcquirerId(snapshotUUID.get()).build()
);
if (!isIndexPresent(clusterService, indexUUID)) {
// this is a temporary solution where snapshot deletion triggers remote store side
// cleanup if index is already deleted. We will add a poller in future to take
// care of remote store side cleanup.
// see https://github.com/opensearch-project/OpenSearch/issues/8469
new RemoteSegmentStoreDirectoryFactory(
remoteStoreLockManagerFactory.getRepositoriesService(),
threadPool
).newDirectory(
remoteStoreRepoForIndex,
indexUUID,
new ShardId(Index.UNKNOWN_INDEX_NAME, indexUUID, Integer.parseInt(shardBlob.getKey()))
).close();
}
}
}
Expand Down Expand Up @@ -3362,12 +3358,7 @@ private static List<String> unusedBlobs(
blob.substring(SNAPSHOT_PREFIX.length(), blob.length() - ".dat".length())
) == false)
|| (remoteStoreLockManagerFactory != null
? (blob.startsWith(SHALLOW_SNAPSHOT_PREFIX)
&& blob.endsWith(".dat")
&& survivingSnapshotUUIDs.contains(
blob.substring(SHALLOW_SNAPSHOT_PREFIX.length(), blob.length() - ".dat".length())
) == false)
: false)
&& extractShallowSnapshotUUID(blob).map(survivingSnapshotUUIDs::contains).orElse(false))
|| (blob.startsWith(UPLOADED_DATA_BLOB_PREFIX) && updatedSnapshots.findNameFile(canonicalName(blob)) == null)
|| FsBlobContainer.isTempBlobName(blob)
)
Expand Down Expand Up @@ -3509,6 +3500,13 @@ private static void failStoreIfCorrupted(Store store, Exception e) {
}
}

private static Optional<String> extractShallowSnapshotUUID(String blobName) {
if (blobName.startsWith(SHALLOW_SNAPSHOT_PREFIX)) {
return Optional.of(blobName.substring(SHALLOW_SNAPSHOT_PREFIX.length(), blobName.length() - ".dat".length()));
}
return Optional.empty();
}

/**
* The result of removing a snapshot from a shard folder in the repository.
*/
Expand Down

0 comments on commit ba22437

Please sign in to comment.