From 230be161176bd6f1251077af7674f80d38ff1e25 Mon Sep 17 00:00:00 2001 From: arostovtsev Date: Thu, 10 Dec 2020 08:33:21 -0800 Subject: [PATCH] Do not interleave readdir() calls with deletion of directory entries. On macOS and some other non-Linux OSes, on some filesystems, readdir(dir) may return NULL (with zero errno) after an entry in dir has been deleted. We thus need to readdir() all directory entries before starting to delete them. A benchmark (deleting 10 copies of the Linux kernel source) seems to show that the new approach is approximately as fast as the previous one (slightly faster on Linux, slightly slower on Mac), and in any case, is noticeably faster than the system's /bin/rm. Bazel's previous unix_jni DeleteTreesBelow implementation: 6.987 s (Linux/ext4); 89.44 s (Mac/APFS) New Bazel unix_jni DeleteTreesBelow implementation: 6.971 s (Linux/ext4); 90.46 s (Mac/APFS) `rm -rf`: 7.323 s (Linux/ext4); 99.09 s (Mac/APFS) RELNOTES: None. PiperOrigin-RevId: 346790610 --- src/main/native/unix_jni.cc | 33 ++++++++++++++++++++++++++++----- 1 file changed, 28 insertions(+), 5 deletions(-) diff --git a/src/main/native/unix_jni.cc b/src/main/native/unix_jni.cc index 6020ecaa4b9d8a..5518a96017c5c7 100644 --- a/src/main/native/unix_jni.cc +++ b/src/main/native/unix_jni.cc @@ -907,6 +907,13 @@ static int DeleteTreesBelow(JNIEnv* env, std::vector* dir_path, } dir_path->push_back(entry); + // On macOS and some other non-Linux OSes, on some filesystems, readdir(dir) + // may return NULL after an entry in dir is deleted even if not all files have + // been read yet - see https://support.apple.com/kb/TA21420; we thus read all + // the names of dir's entries before deleting. We don't want to simply use + // fts(3) because we want to be able to chmod at any point in the directory + // hierarchy to retry a filesystem operation after hitting an EACCES. + std::vector dir_files, dir_subdirs; for (;;) { errno = 0; struct dirent* de = readdir(dir); @@ -927,15 +934,31 @@ static int DeleteTreesBelow(JNIEnv* env, std::vector* dir_path, break; } if (is_dir) { - if (DeleteTreesBelow(env, dir_path, dirfd(dir), de->d_name) == -1) { + dir_subdirs.push_back(de->d_name); + } else { + dir_files.push_back(de->d_name); + } + } + if (env->ExceptionOccurred() == NULL) { + for (const auto &file : dir_files) { + if (ForceDelete(env, *dir_path, dirfd(dir), file.c_str(), false) == -1) { CHECK(env->ExceptionOccurred() != NULL); break; } } - - if (ForceDelete(env, *dir_path, dirfd(dir), de->d_name, is_dir) == -1) { - CHECK(env->ExceptionOccurred() != NULL); - break; + // DeleteTreesBelow is recursive; don't hold on to file names unnecessarily. + dir_files.clear(); + } + if (env->ExceptionOccurred() == NULL) { + for (const auto &subdir : dir_subdirs) { + if (DeleteTreesBelow(env, dir_path, dirfd(dir), subdir.c_str()) == -1) { + CHECK(env->ExceptionOccurred() != NULL); + break; + } + if (ForceDelete(env, *dir_path, dirfd(dir), subdir.c_str(), true) == -1) { + CHECK(env->ExceptionOccurred() != NULL); + break; + } } } if (closedir(dir) == -1) {