Skip to content

Commit

Permalink
Use S3 batch deletes for Delta Lake vacuum procedure
Browse files Browse the repository at this point in the history
  • Loading branch information
ebyhr committed Nov 19, 2022
1 parent 916ece2 commit 2332ed7
Showing 1 changed file with 14 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
import java.io.IOException;
import java.lang.invoke.MethodHandle;
import java.time.Instant;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import java.util.Set;
Expand All @@ -68,6 +69,7 @@ public class VacuumProcedure
implements Provider<Procedure>
{
private static final Logger log = Logger.get(VacuumProcedure.class);
private static final int DELETE_BATCH_SIZE = 1000;

private static final MethodHandle VACUUM;

Expand Down Expand Up @@ -209,6 +211,7 @@ private void doVacuum(
long retainedUnknownFiles = 0;
long removedFiles = 0;

List<String> filesToDelete = new ArrayList<>();
FileIterator listing = fileSystem.listFiles(tableLocation.toString());
while (listing.hasNext()) {
FileEntry entry = listing.next();
Expand Down Expand Up @@ -254,8 +257,17 @@ private void doVacuum(
path,
modificationTime,
modificationInstant);
fileSystem.deleteFile(path);
removedFiles++;
filesToDelete.add(path);
if (filesToDelete.size() == DELETE_BATCH_SIZE) {
fileSystem.deleteFiles(filesToDelete);
removedFiles += filesToDelete.size();
filesToDelete.clear();
}
}

if (!filesToDelete.isEmpty()) {
fileSystem.deleteFiles(filesToDelete);
removedFiles += filesToDelete.size();
}

log.info(
Expand Down

0 comments on commit 2332ed7

Please sign in to comment.