Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement delete_objs for S3 #396

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions rust/src/storage/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,14 @@ pub enum StorageError {
#[from]
source: rusoto_core::RusotoError<rusoto_s3::DeleteObjectError>,
},
/// Error returned when the delete failed for a batch delete request
#[cfg(any(feature = "s3", feature = "s3-rustls"))]
#[error("Failed to delete S3 objects: {source}")]
S3BatchDelete {
/// The underlying Rusoto S3 error.
#[from]
source: rusoto_core::RusotoError<rusoto_s3::DeleteObjectsError>,
},
/// Error representing a failure when copying a S3 object
#[cfg(any(feature = "s3", feature = "s3-rustls"))]
#[error("Failed to copy S3 object: {source}")]
Expand Down Expand Up @@ -430,6 +438,16 @@ pub trait StorageBackend: Send + Sync + Debug {

/// Deletes object by `path`.
async fn delete_obj(&self, path: &str) -> Result<(), StorageError>;

/// Deletes objects by `paths`.
async fn delete_objs(&self, paths: &[&str]) -> Result<(), StorageError> {
// default implementation -> iterate through objects
for path in paths {
self.delete_obj(path).await?;
}

Ok(())
}
}

/// Dynamically construct a Storage backend trait object based on scheme for provided URI
Expand Down
53 changes: 51 additions & 2 deletions rust/src/storage/s3/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ use log::debug;
use rusoto_core::{HttpClient, Region, RusotoError};
use rusoto_credential::AutoRefreshingProvider;
use rusoto_s3::{
CopyObjectRequest, DeleteObjectRequest, GetObjectRequest, HeadObjectRequest,
ListObjectsV2Request, PutObjectRequest, S3Client, S3,
CopyObjectRequest, Delete, DeleteObjectRequest, DeleteObjectsRequest, GetObjectRequest,
HeadObjectRequest, ListObjectsV2Request, ObjectIdentifier, PutObjectRequest, S3Client, S3,
};
use rusoto_sts::{StsAssumeRoleSessionCredentialsProvider, StsClient, WebIdentityProvider};
use serde::{Deserialize, Serialize};
Expand Down Expand Up @@ -426,6 +426,55 @@ impl StorageBackend for S3StorageBackend {

Ok(())
}

async fn delete_objs(&self, paths: &[&str]) -> Result<(), StorageError> {
debug!("delete s3 objects: {:?}...", paths);
if paths.is_empty() {
return Ok(());
}

// S3 has a maximum of 1000 files to delete
let chunks = paths.chunks(1000);

let bucket = parse_uri(paths[0])?.into_s3object()?.bucket;

// Check whether all buckets are equal
paths.iter().skip(1).try_for_each(|path| {
let other_bucket = parse_uri(path)?.into_s3object()?.bucket;
if other_bucket != bucket {
Err(StorageError::Generic(
format!("All buckets of the paths in `S3StorageBackend::delete_objs` should be the same. Expected '{}', got '{}'", bucket, other_bucket),
))
} else {
Ok(())
}
})?;

for chunk in chunks {
let objects = chunk
.iter()
.map(|path| {
Ok(ObjectIdentifier {
key: parse_uri(path)?.into_s3object()?.key.to_string(),
..Default::default()
})
})
.collect::<Result<Vec<_>, StorageError>>()?;
let delete = Delete {
objects,
..Default::default()
};
let req = DeleteObjectsRequest {
bucket: bucket.to_string(),
delete,
..Default::default()
};

self.client.delete_objects(req).await?;
}

Ok(())
}
}

/// A lock that has been successfully acquired
Expand Down