Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Repository Cleanup Endpoint #43900

Merged
merged 105 commits into from
Aug 21, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
105 commits
Select commit Hold shift + click to select a range
add8cbb
Repo cleanup endpoint start
original-brownbear Jun 25, 2019
b850f48
Merge remote-tracking branch 'elastic/master' into cleanup-repo-ep
original-brownbear Jun 25, 2019
7abc873
50%
original-brownbear Jun 26, 2019
7b698f4
Merge remote-tracking branch 'elastic/master' into cleanup-repo-ep
original-brownbear Jun 26, 2019
80624cd
just ack for now
original-brownbear Jun 26, 2019
8d22cd5
just ack for now
original-brownbear Jun 26, 2019
6e14c27
just ack for now
original-brownbear Jun 26, 2019
059eca9
bck
original-brownbear Jun 26, 2019
fd6f8df
Merge remote-tracking branch 'elastic/master' into cleanup-repo-ep
original-brownbear Jun 26, 2019
bfd7e63
bck
original-brownbear Jun 26, 2019
4d1ed1f
Merge remote-tracking branch 'elastic/master' into cleanup-repo-ep
original-brownbear Jun 26, 2019
0b5b0c0
Merge remote-tracking branch 'elastic/master' into cleanup-repo-ep
original-brownbear Jun 27, 2019
a828306
Merge remote-tracking branch 'elastic/master' into cleanup-repo-ep
original-brownbear Jun 27, 2019
d2952d4
bck
original-brownbear Jun 27, 2019
d913ad4
Merge remote-tracking branch 'elastic/master' into cleanup-repo-ep
original-brownbear Jun 28, 2019
c54a075
Fix functionality
original-brownbear Jun 28, 2019
71446c5
add rest action
original-brownbear Jun 28, 2019
48da5d0
fix compilation
original-brownbear Jun 28, 2019
6f2e702
fix compilation
original-brownbear Jun 28, 2019
5a0c826
nicer formatting
original-brownbear Jun 28, 2019
79bcc6b
Merge remote-tracking branch 'elastic/master' into cleanup-repo-ep
original-brownbear Jun 28, 2019
87785e1
Merge remote-tracking branch 'elastic/master' into cleanup-repo-ep
original-brownbear Jun 28, 2019
2bbd0f0
bck
original-brownbear Jun 28, 2019
29b419d
Merge remote-tracking branch 'elastic/master' into cleanup-repo-ep
original-brownbear Jun 29, 2019
2340375
Merge remote-tracking branch 'elastic/master' into cleanup-repo-ep
original-brownbear Jun 30, 2019
b2ebb52
merge fixes
original-brownbear Jun 30, 2019
ba1ad03
add some documentation
original-brownbear Jun 30, 2019
266471f
nicer cleanup logic
original-brownbear Jun 30, 2019
28a4b69
bck
original-brownbear Jul 1, 2019
81ba190
Merge remote-tracking branch 'elastic/master' into cleanup-repo-ep
original-brownbear Jul 1, 2019
76b3fae
Merge remote-tracking branch 'elastic/master' into cleanup-repo-ep
original-brownbear Jul 2, 2019
a0493df
result response
original-brownbear Jul 2, 2019
019b58f
compiles
original-brownbear Jul 2, 2019
35021f1
Merge remote-tracking branch 'elastic/master' into cleanup-repo-ep
original-brownbear Jul 2, 2019
7f41860
fix hdfs repo tests
original-brownbear Jul 2, 2019
8dbd066
add cleanup rest test
original-brownbear Jul 2, 2019
766c0be
fix rest test
original-brownbear Jul 2, 2019
2e2df56
fix rest test
original-brownbear Jul 2, 2019
65a5320
shorter logic
original-brownbear Jul 2, 2019
7e14c60
Merge remote-tracking branch 'elastic/master' into cleanup-repo-ep
original-brownbear Jul 2, 2019
8e68683
Merge remote-tracking branch 'elastic/master' into cleanup-repo-ep
original-brownbear Jul 2, 2019
3cb590e
HLRC integration
original-brownbear Jul 2, 2019
5ad0353
Merge remote-tracking branch 'elastic/master' into cleanup-repo-ep
original-brownbear Jul 2, 2019
0e17678
fix empty repo handling
original-brownbear Jul 2, 2019
2f87260
Fix HLRC integration
original-brownbear Jul 2, 2019
63682c4
cleaner
original-brownbear Jul 2, 2019
115a9bb
Merge remote-tracking branch 'elastic/master' into cleanup-repo-ep
original-brownbear Jul 3, 2019
0a5d85e
ensure no concurrent modifications
original-brownbear Jul 3, 2019
21cfebc
better docs
original-brownbear Jul 3, 2019
1eb2ddc
fix formatting
original-brownbear Jul 3, 2019
951fc9d
fix formatting
original-brownbear Jul 3, 2019
a8363ab
Merge remote-tracking branch 'elastic/master' into cleanup-repo-ep
original-brownbear Jul 3, 2019
a10a4b3
add javadoc
original-brownbear Jul 3, 2019
dbb21af
remove todo that was addressed
original-brownbear Jul 3, 2019
e68439d
simplify
original-brownbear Jul 3, 2019
6b3c0a9
revert needless chagne
original-brownbear Jul 3, 2019
c64f97f
remove resolved todo
original-brownbear Jul 3, 2019
7a05d27
ensure cleanup only runs on 8+
original-brownbear Jul 3, 2019
aaecfed
Merge remote-tracking branch 'elastic/master' into cleanup-repo-ep
original-brownbear Jul 3, 2019
d8f7467
Merge remote-tracking branch 'elastic/master' into cleanup-repo-ep
original-brownbear Jul 3, 2019
1f372df
skip cleanup check in bwc
original-brownbear Jul 3, 2019
1e5a0c7
skip cleanup check in bwc
original-brownbear Jul 3, 2019
68f5dd6
Merge remote-tracking branch 'elastic/master' into cleanup-repo-ep
original-brownbear Jul 3, 2019
3ee6559
deal with null spots
original-brownbear Jul 3, 2019
785e33a
Merge remote-tracking branch 'elastic/master' into cleanup-repo-ep
original-brownbear Jul 4, 2019
9759b9d
revert needless changes
original-brownbear Jul 5, 2019
d6ec556
Merge remote-tracking branch 'elastic/master' into cleanup-repo-ep
original-brownbear Jul 16, 2019
1bf001a
Merge remote-tracking branch 'elastic/master' into cleanup-repo-ep
original-brownbear Jul 16, 2019
9a4e412
Merge remote-tracking branch 'elastic/master' into cleanup-repo-ep
original-brownbear Jul 23, 2019
58a047d
bck
original-brownbear Jul 23, 2019
f5db718
Merge remote-tracking branch 'elastic/master' into cleaup-repo-ep-v2
original-brownbear Jul 24, 2019
18b3b65
Merge remote-tracking branch 'elastic/master' into cleaup-repo-ep-v2
original-brownbear Jul 24, 2019
2a45ba5
Merge remote-tracking branch 'elastic/master' into cleaup-repo-ep-v2
original-brownbear Aug 5, 2019
d4ae1c0
resolve conflict
original-brownbear Aug 5, 2019
461c732
shorter
original-brownbear Aug 5, 2019
10d158f
nicer
original-brownbear Aug 5, 2019
cb6c776
Merge remote-tracking branch 'elastic/master' into cleaup-repo-ep-v2
original-brownbear Aug 5, 2019
2e2a52f
nicer
original-brownbear Aug 5, 2019
776b751
docs + correct handling read-only repo
original-brownbear Aug 5, 2019
e9ae50a
More efficient, don't increment index unless there is something to do
original-brownbear Aug 5, 2019
93d395c
nicer formatting
original-brownbear Aug 5, 2019
590295b
add comment
original-brownbear Aug 5, 2019
f97036f
Merge remote-tracking branch 'elastic/master' into cleanup-repo-ep
original-brownbear Aug 5, 2019
af7bc59
Merge remote-tracking branch 'elastic/master' into cleanup-repo-ep
original-brownbear Aug 6, 2019
c288432
add some logging
original-brownbear Aug 6, 2019
e8a826d
Merge remote-tracking branch 'elastic/master' into cleanup-repo-ep
original-brownbear Aug 6, 2019
d40580f
CR: resolve some comments
original-brownbear Aug 6, 2019
c0ca7cd
CR: renamings
original-brownbear Aug 6, 2019
bc687ac
CR: renamings
original-brownbear Aug 6, 2019
3a2f3cc
CR: fix indent
original-brownbear Aug 6, 2019
aef1735
CR: order things reasonably
original-brownbear Aug 6, 2019
6fc9fd9
CR: nicer snapshot task name
original-brownbear Aug 6, 2019
c9e3e29
CR: nicer naming cleanup CS update tasks
original-brownbear Aug 6, 2019
d777610
simplify away some casting
original-brownbear Aug 6, 2019
dece663
Merge remote-tracking branch 'elastic/master' into cleanup-repo-ep
original-brownbear Aug 6, 2019
091fb70
CR: rename response fields
original-brownbear Aug 6, 2019
c8d513a
CR: return a delete result from container deletes
original-brownbear Aug 6, 2019
3f51c20
Merge remote-tracking branch 'elastic/master' into cleanup-repo-ep
original-brownbear Aug 8, 2019
eb45c17
nicer apis
original-brownbear Aug 8, 2019
10e02d7
Merge remote-tracking branch 'elastic/master' into cleanup-repo-ep
original-brownbear Aug 9, 2019
25fb827
CR: add note on deletes cleaning up repo as well
original-brownbear Aug 9, 2019
6040781
Merge remote-tracking branch 'elastic/master' into cleanup-repo-ep
original-brownbear Aug 9, 2019
3af846c
add missing empty line
original-brownbear Aug 9, 2019
f8238cf
Merge remote-tracking branch 'elastic/master' into cleanup-repo-ep
original-brownbear Aug 11, 2019
6678156
Merge remote-tracking branch 'elastic/master' into cleanup-repo-ep
original-brownbear Aug 13, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
package org.elasticsearch.client;

import org.elasticsearch.action.ActionListener;
import org.elasticsearch.action.admin.cluster.repositories.cleanup.CleanupRepositoryRequest;
import org.elasticsearch.action.admin.cluster.repositories.cleanup.CleanupRepositoryResponse;
import org.elasticsearch.action.admin.cluster.repositories.delete.DeleteRepositoryRequest;
import org.elasticsearch.action.admin.cluster.repositories.get.GetRepositoriesRequest;
import org.elasticsearch.action.admin.cluster.repositories.get.GetRepositoriesResponse;
Expand Down Expand Up @@ -170,6 +172,35 @@ public void verifyRepositoryAsync(VerifyRepositoryRequest verifyRepositoryReques
VerifyRepositoryResponse::fromXContent, listener, emptySet());
}

/**
* Cleans up a snapshot repository.
* See <a href="https://www.elastic.co/guide/en/elasticsearch/reference/current/modules-snapshots.html"> Snapshot and Restore
* API on elastic.co</a>
* @param cleanupRepositoryRequest the request
* @param options the request options (e.g. headers), use {@link RequestOptions#DEFAULT} if nothing needs to be customized
* @return the response
* @throws IOException in case there is a problem sending the request or parsing back the response
*/
public CleanupRepositoryResponse cleanupRepository(CleanupRepositoryRequest cleanupRepositoryRequest, RequestOptions options)
throws IOException {
return restHighLevelClient.performRequestAndParseEntity(cleanupRepositoryRequest, SnapshotRequestConverters::cleanupRepository,
options, CleanupRepositoryResponse::fromXContent, emptySet());
}

/**
* Asynchronously cleans up a snapshot repository.
* See <a href="https://www.elastic.co/guide/en/elasticsearch/reference/current/modules-snapshots.html"> Snapshot and Restore
* API on elastic.co</a>
* @param cleanupRepositoryRequest the request
* @param options the request options (e.g. headers), use {@link RequestOptions#DEFAULT} if nothing needs to be customized
* @param listener the listener to be notified upon request completion
*/
public void cleanupRepositoryAsync(CleanupRepositoryRequest cleanupRepositoryRequest, RequestOptions options,
ActionListener<CleanupRepositoryResponse> listener) {
restHighLevelClient.performRequestAsyncAndParseEntity(cleanupRepositoryRequest, SnapshotRequestConverters::cleanupRepository,
options, CleanupRepositoryResponse::fromXContent, listener, emptySet());
}

/**
* Creates a snapshot.
* <p>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.client.methods.HttpPut;
import org.elasticsearch.action.admin.cluster.repositories.cleanup.CleanupRepositoryRequest;
import org.elasticsearch.action.admin.cluster.repositories.delete.DeleteRepositoryRequest;
import org.elasticsearch.action.admin.cluster.repositories.get.GetRepositoriesRequest;
import org.elasticsearch.action.admin.cluster.repositories.put.PutRepositoryRequest;
Expand Down Expand Up @@ -94,6 +95,20 @@ static Request verifyRepository(VerifyRepositoryRequest verifyRepositoryRequest)
return request;
}

static Request cleanupRepository(CleanupRepositoryRequest cleanupRepositoryRequest) {
String endpoint = new RequestConverters.EndpointBuilder().addPathPartAsIs("_snapshot")
.addPathPart(cleanupRepositoryRequest.name())
.addPathPartAsIs("_cleanup")
.build();
Request request = new Request(HttpPost.METHOD_NAME, endpoint);

RequestConverters.Params parameters = new RequestConverters.Params();
parameters.withMasterTimeout(cleanupRepositoryRequest.masterNodeTimeout());
parameters.withTimeout(cleanupRepositoryRequest.timeout());
request.addParameters(parameters.asMap());
return request;
}

static Request createSnapshot(CreateSnapshotRequest createSnapshotRequest) throws IOException {
String endpoint = new RequestConverters.EndpointBuilder().addPathPart("_snapshot")
.addPathPart(createSnapshotRequest.repository())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
package org.elasticsearch.client;

import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.action.admin.cluster.repositories.cleanup.CleanupRepositoryRequest;
import org.elasticsearch.action.admin.cluster.repositories.cleanup.CleanupRepositoryResponse;
import org.elasticsearch.action.admin.cluster.repositories.delete.DeleteRepositoryRequest;
import org.elasticsearch.action.admin.cluster.repositories.get.GetRepositoriesRequest;
import org.elasticsearch.action.admin.cluster.repositories.get.GetRepositoriesResponse;
Expand Down Expand Up @@ -133,6 +135,17 @@ public void testVerifyRepository() throws IOException {
assertThat(response.getNodes().size(), equalTo(1));
}

public void testCleanupRepository() throws IOException {
AcknowledgedResponse putRepositoryResponse = createTestRepository("test", FsRepository.TYPE, "{\"location\": \".\"}");
assertTrue(putRepositoryResponse.isAcknowledged());

CleanupRepositoryRequest request = new CleanupRepositoryRequest("test");
CleanupRepositoryResponse response = execute(request, highLevelClient().snapshot()::cleanupRepository,
highLevelClient().snapshot()::cleanupRepositoryAsync);
assertThat(response.result().bytes(), equalTo(0L));
assertThat(response.result().blobs(), equalTo(0L));
}

public void testCreateSnapshot() throws IOException {
String repository = "test_repository";
assertTrue(createTestRepository(repository, FsRepository.TYPE, "{\"location\": \".\"}").isAcknowledged());
Expand Down Expand Up @@ -317,4 +330,4 @@ private static Map<String, Object> randomUserMetadata() {
}
return metadata;
}
}
}
36 changes: 36 additions & 0 deletions docs/reference/modules/snapshots.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,42 @@ POST /_snapshot/my_unverified_backup/_verify

It returns a list of nodes where repository was successfully verified or an error message if verification process failed.

[float]
===== Repository Cleanup
Repositories can over time accumulate data that is not referenced by any existing snapshot. This is a result of the data safety guarantees
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Mention that the cleanup functionality is also automatically run on snapshot deletion?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm I'm not sure. It would make sense now, but I was gonna add the logic to clean up all the individual shard folders in a follow-up which we can't run on delete (it's just super slow), not sure if it's worth explaining the difference here?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Most folks shouldn't bother about this API, given that all relevant clean-up will happen on snapshot deletion. I do not want to raise the impression that you will have to run this API to sensibly operate an ES cluster.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fair point, I added a note on that now in
25fb827

the snapshot functionality provides in failure scenarios during snapshot creation and the decentralized nature of the snapshot creation
process. This unreferenced data does in no way negatively impact the performance or safety of a snapshot repository but leads to higher
than necessary storage use. In order to clean up this unreferenced data, users can call the cleanup endpoint for a repository which will
trigger a complete accounting of the repositories contents and subsequent deletion of all unreferenced data that was found.

[source,js]
-----------------------------------
POST /_snapshot/my_repository/_cleanup
-----------------------------------
// CONSOLE
// TEST[continued]

The response to a cleanup request looks as follows:

[source,js]
--------------------------------------------------
{
"results": {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is it called "results" and not "result"? I would expect to have an array if the field is named "result"

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think "results" is more natural language wise here in English (I guess because we're listing multiple non-binary values but I tbh. I don't have a good explanation as to why that is :D).

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is the result of the clean up operation, so I think we could just drop the results/result field, maybe also the RepositoryCleanupResult object, and report

{
    "deleted_bytes": 20,
    "deleted_blobs": 5
}

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I mainly did this to stay a little consistent in style with other snapshot APIs. We never really return a flat object, we always wrap under a descriptive key don't we? (e.g. getting a list of snapshots in a repo, creating a snapshot, ...)

Aesthetically I always kind of like an object like that but I don't have a good argument for it otherwise :) I suppose it makes it a little easier to extend if we ever want to in the future, but that isn't so relevant for this endpoint I guess.

"deleted_bytes": 20,
"deleted_blobs": 5
}
}
--------------------------------------------------
// TESTRESPONSE

Depending on the concrete repository implementation the numbers shown for bytes free as well as the number of blobs removed will either
be an approximation or an exact result. Any non-zero value for the number of blobs removed implies that unreferenced blobs were found and
subsequently cleaned up.

Please note that most of the cleanup operations executed by this endpoint are automatically executed when deleting any snapshot from a
repository. If you regularly delete snapshots, you will in most cases not get any or only minor space savings from using this functionality
and should lower your frequency of invoking it accordingly.

[float]
[[snapshots-take-snapshot]]
=== Snapshot
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import org.elasticsearch.common.blobstore.BlobContainer;
import org.elasticsearch.common.blobstore.BlobMetaData;
import org.elasticsearch.common.blobstore.BlobPath;
import org.elasticsearch.common.blobstore.DeleteResult;
import org.elasticsearch.common.blobstore.support.AbstractBlobContainer;

import java.io.BufferedInputStream;
Expand Down Expand Up @@ -97,7 +98,7 @@ public void deleteBlob(String blobName) throws IOException {
}

@Override
public void delete() {
public DeleteResult delete() {
throw new UnsupportedOperationException("URL repository is read only");
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import org.elasticsearch.common.blobstore.BlobContainer;
import org.elasticsearch.common.blobstore.BlobMetaData;
import org.elasticsearch.common.blobstore.BlobPath;
import org.elasticsearch.common.blobstore.DeleteResult;
import org.elasticsearch.common.blobstore.support.AbstractBlobContainer;
import org.elasticsearch.threadpool.ThreadPool;

Expand Down Expand Up @@ -126,9 +127,9 @@ public void deleteBlob(String blobName) throws IOException {
}

@Override
public void delete() throws IOException {
public DeleteResult delete() throws IOException {
try {
blobStore.deleteBlobDirectory(keyPath, threadPool.executor(AzureRepositoryPlugin.REPOSITORY_THREAD_POOL_NAME));
return blobStore.deleteBlobDirectory(keyPath, threadPool.executor(AzureRepositoryPlugin.REPOSITORY_THREAD_POOL_NAME));
} catch (URISyntaxException | StorageException e) {
throw new IOException(e);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,12 @@

import com.microsoft.azure.storage.LocationMode;
import com.microsoft.azure.storage.StorageException;

import org.elasticsearch.cluster.metadata.RepositoryMetaData;
import org.elasticsearch.common.blobstore.BlobContainer;
import org.elasticsearch.common.blobstore.BlobMetaData;
import org.elasticsearch.common.blobstore.BlobPath;
import org.elasticsearch.common.blobstore.BlobStore;
import org.elasticsearch.common.blobstore.DeleteResult;
import org.elasticsearch.repositories.azure.AzureRepository.Repository;
import org.elasticsearch.threadpool.ThreadPool;

Expand Down Expand Up @@ -92,8 +92,9 @@ public void deleteBlob(String blob) throws URISyntaxException, StorageException
service.deleteBlob(clientName, container, blob);
}

public void deleteBlobDirectory(String path, Executor executor) throws URISyntaxException, StorageException, IOException {
service.deleteBlobDirectory(clientName, container, path, executor);
public DeleteResult deleteBlobDirectory(String path, Executor executor)
throws URISyntaxException, StorageException, IOException {
return service.deleteBlobDirectory(clientName, container, path, executor);
}

public InputStream getInputStream(String blob) throws URISyntaxException, StorageException, IOException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
import org.elasticsearch.action.support.PlainActionFuture;
import org.elasticsearch.common.blobstore.BlobMetaData;
import org.elasticsearch.common.blobstore.BlobPath;
import org.elasticsearch.common.blobstore.DeleteResult;
import org.elasticsearch.common.blobstore.support.PlainBlobMetaData;
import org.elasticsearch.common.collect.Tuple;
import org.elasticsearch.common.settings.Settings;
Expand Down Expand Up @@ -72,7 +73,7 @@
import static java.util.Collections.emptyMap;

public class AzureStorageService {

private static final Logger logger = LogManager.getLogger(AzureStorageService.class);

public static final ByteSizeValue MIN_CHUNK_SIZE = new ByteSizeValue(1, ByteSizeUnit.BYTES);
Expand Down Expand Up @@ -192,13 +193,15 @@ public void deleteBlob(String account, String container, String blob) throws URI
});
}

void deleteBlobDirectory(String account, String container, String path, Executor executor)
DeleteResult deleteBlobDirectory(String account, String container, String path, Executor executor)
throws URISyntaxException, StorageException, IOException {
final Tuple<CloudBlobClient, Supplier<OperationContext>> client = client(account);
final CloudBlobContainer blobContainer = client.v1().getContainerReference(container);
final Collection<Exception> exceptions = Collections.synchronizedList(new ArrayList<>());
final AtomicLong outstanding = new AtomicLong(1L);
final PlainActionFuture<Void> result = PlainActionFuture.newFuture();
final AtomicLong blobsDeleted = new AtomicLong();
final AtomicLong bytesDeleted = new AtomicLong();
SocketAccess.doPrivilegedVoidException(() -> {
for (final ListBlobItem blobItem : blobContainer.listBlobs(path, true)) {
// uri.getPath is of the form /container/keyPath.* and we want to strip off the /container/
Expand All @@ -208,7 +211,17 @@ void deleteBlobDirectory(String account, String container, String path, Executor
executor.execute(new AbstractRunnable() {
@Override
protected void doRun() throws Exception {
final long len;
if (blobItem instanceof CloudBlob) {
len = ((CloudBlob) blobItem).getProperties().getLength();
} else {
len = -1L;
}
deleteBlob(account, container, blobPath);
blobsDeleted.incrementAndGet();
if (len >= 0) {
bytesDeleted.addAndGet(len);
}
}

@Override
Expand All @@ -234,6 +247,7 @@ public void onAfter() {
exceptions.forEach(ex::addSuppressed);
throw ex;
}
return new DeleteResult(blobsDeleted.get(), bytesDeleted.get());
}

public InputStream getInputStream(String account, String container, String blob)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import org.elasticsearch.common.blobstore.BlobContainer;
import org.elasticsearch.common.blobstore.BlobMetaData;
import org.elasticsearch.common.blobstore.BlobPath;
import org.elasticsearch.common.blobstore.DeleteResult;
import org.elasticsearch.common.blobstore.support.AbstractBlobContainer;

import java.io.IOException;
Expand Down Expand Up @@ -77,8 +78,8 @@ public void deleteBlob(String blobName) throws IOException {
}

@Override
public void delete() throws IOException {
blobStore.deleteDirectory(path().buildAsString());
public DeleteResult delete() throws IOException {
return blobStore.deleteDirectory(path().buildAsString());
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
import org.elasticsearch.common.blobstore.BlobPath;
import org.elasticsearch.common.blobstore.BlobStore;
import org.elasticsearch.common.blobstore.BlobStoreException;
import org.elasticsearch.common.blobstore.DeleteResult;
import org.elasticsearch.common.blobstore.support.PlainBlobMetaData;
import org.elasticsearch.common.collect.MapBuilder;
import org.elasticsearch.core.internal.io.Streams;
Expand All @@ -55,6 +56,7 @@
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.atomic.AtomicReference;
import java.util.stream.Collectors;

Expand Down Expand Up @@ -300,15 +302,24 @@ void deleteBlob(String blobName) throws IOException {
*
* @param pathStr Name of path to delete
*/
void deleteDirectory(String pathStr) throws IOException {
SocketAccess.doPrivilegedVoidIOException(() -> {
DeleteResult deleteDirectory(String pathStr) throws IOException {
return SocketAccess.doPrivilegedIOException(() -> {
DeleteResult deleteResult = DeleteResult.ZERO;
Page<Blob> page = client().get(bucketName).list(BlobListOption.prefix(pathStr));
do {
final Collection<String> blobsToDelete = new ArrayList<>();
page.getValues().forEach(b -> blobsToDelete.add(b.getName()));
final AtomicLong blobsDeleted = new AtomicLong(0L);
final AtomicLong bytesDeleted = new AtomicLong(0L);
page.getValues().forEach(b -> {
blobsToDelete.add(b.getName());
blobsDeleted.incrementAndGet();
bytesDeleted.addAndGet(b.getSize());
});
deleteBlobsIgnoringIfNotExists(blobsToDelete);
deleteResult = deleteResult.add(blobsDeleted.get(), bytesDeleted.get());
page = page.getNextPage();
} while (page != null);
return deleteResult;
});
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import org.elasticsearch.common.blobstore.BlobContainer;
import org.elasticsearch.common.blobstore.BlobMetaData;
import org.elasticsearch.common.blobstore.BlobPath;
import org.elasticsearch.common.blobstore.DeleteResult;
import org.elasticsearch.common.blobstore.fs.FsBlobContainer;
import org.elasticsearch.common.blobstore.support.AbstractBlobContainer;
import org.elasticsearch.common.blobstore.support.PlainBlobMetaData;
Expand Down Expand Up @@ -69,9 +70,13 @@ public void deleteBlob(String blobName) throws IOException {
}
}

// TODO: See if we can get precise result reporting.
private static final DeleteResult DELETE_RESULT = new DeleteResult(1L, 0L);

@Override
public void delete() throws IOException {
public DeleteResult delete() throws IOException {
store.execute(fileContext -> fileContext.delete(path, true));
return DELETE_RESULT;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
package org.elasticsearch.repositories.hdfs;

import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters;
import org.elasticsearch.action.admin.cluster.repositories.cleanup.CleanupRepositoryResponse;
import org.elasticsearch.action.support.master.AcknowledgedResponse;
import org.elasticsearch.bootstrap.JavaVersion;
import org.elasticsearch.common.settings.MockSecureSettings;
Expand All @@ -30,6 +31,7 @@
import java.util.Collection;

import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.greaterThan;

@ThreadLeakFilters(filters = HdfsClientThreadLeakFilter.class)
public class HdfsRepositoryTests extends AbstractThirdPartyRepositoryTestCase {
Expand Down Expand Up @@ -58,4 +60,14 @@ protected void createRepository(String repoName) {
).get();
assertThat(putRepositoryResponse.isAcknowledged(), equalTo(true));
}

// HDFS repository doesn't have precise cleanup stats so we only check whether or not any blobs were removed
@Override
protected void assertCleanupResponse(CleanupRepositoryResponse response, long bytes, long blobs) {
if (blobs > 0) {
assertThat(response.result().blobs(), greaterThan(0L));
} else {
assertThat(response.result().blobs(), equalTo(0L));
}
}
}
Loading