From e2d3577f8fe6adc597d8660fbab9ac5eb7f5d695 Mon Sep 17 00:00:00 2001 From: Tom Deseyn Date: Tue, 28 Sep 2021 11:26:42 +0200 Subject: [PATCH] FileSystem.Unix: improve CopyFile. Like the upcoming version of GNU coreutils 'cp' prefer a copy-on-write clone. This shares the physical storage between files, which means no data needs to copied. CoW-clones are supported by a number of Linux file systems, like Btrfs, XFS, and overlayfs. Eliminate a 'stat' call that is always performed for checking if the target is a directory by only performing the check when the 'open' syscall reports an error. Eliminate a 'stat' call for retrieving the file size of the source by passing through the length that was retrieved when checking the opened file is not a directory. Create the destination with file permissions that match the source. We still need to fchmod due to umask being applied to the open mode. When performing a manual copy, limit the allocated buffer for small files. And, avoid the last 'read' call by checking when we've copied the expected nr of bytes. --- .../Unix/System.Native/Interop.CopyFile.cs | 2 +- .../Native/Unix/System.Native/pal_io.c | 91 +++++++++++-------- .../Native/Unix/System.Native/pal_io.h | 2 +- .../Win32/SafeHandles/SafeFileHandle.Unix.cs | 67 ++++++++++---- .../src/System/IO/FileSystem.Unix.cs | 27 +++--- 5 files changed, 118 insertions(+), 71 deletions(-) diff --git a/src/libraries/Common/src/Interop/Unix/System.Native/Interop.CopyFile.cs b/src/libraries/Common/src/Interop/Unix/System.Native/Interop.CopyFile.cs index 89217982ea51b..3bdbcf8c4ff80 100644 --- a/src/libraries/Common/src/Interop/Unix/System.Native/Interop.CopyFile.cs +++ b/src/libraries/Common/src/Interop/Unix/System.Native/Interop.CopyFile.cs @@ -10,6 +10,6 @@ internal static partial class Interop internal static partial class Sys { [GeneratedDllImport(Libraries.SystemNative, EntryPoint = "SystemNative_CopyFile", SetLastError = true)] - internal static partial int CopyFile(SafeFileHandle source, SafeFileHandle destination); + internal static partial int CopyFile(SafeFileHandle source, SafeFileHandle destination, long sourceLength); } } diff --git a/src/libraries/Native/Unix/System.Native/pal_io.c b/src/libraries/Native/Unix/System.Native/pal_io.c index 49aaeca6c2078..f4c60f2308011 100644 --- a/src/libraries/Native/Unix/System.Native/pal_io.c +++ b/src/libraries/Native/Unix/System.Native/pal_io.c @@ -61,6 +61,12 @@ extern int getpeereid(int, uid_t *__restrict__, gid_t *__restrict__); #endif #endif +#ifdef __linux__ +#ifndef FICLONE +#define FICLONE _IOW(0x94, 9, int) +#endif +#endif + #if HAVE_STAT64 #define stat_ stat64 #define fstat_ fstat64 @@ -1085,11 +1091,16 @@ int32_t SystemNative_Write(intptr_t fd, const void* buffer, int32_t bufferSize) #if !HAVE_FCOPYFILE // Read all data from inFd and write it to outFd -static int32_t CopyFile_ReadWrite(int inFd, int outFd) +static int32_t CopyFile_ReadWrite(int inFd, int outFd, int64_t fileLength) { - // Allocate a buffer - const int BufferLength = 80 * 1024 * sizeof(char); - char* buffer = (char*)malloc(BufferLength); + // Allocate a buffer. + size_t bufferLength = 80 * 1024 * sizeof(char); + if (fileLength > 0 && fileLength < (int64_t)bufferLength) + { + // Limit buffer to file length if it is smaller. + bufferLength = (size_t)fileLength; + } + char* buffer = (char*)malloc(bufferLength); if (buffer == NULL) { return -1; @@ -1100,7 +1111,7 @@ static int32_t CopyFile_ReadWrite(int inFd, int outFd) { // Read up to what will fit in our buffer. We're done if we get back 0 bytes. ssize_t bytesRead; - while ((bytesRead = read(inFd, buffer, BufferLength)) < 0 && errno == EINTR); + while ((bytesRead = read(inFd, buffer, bufferLength)) < 0 && errno == EINTR); if (bytesRead == -1) { int tmp = errno; @@ -1130,6 +1141,14 @@ static int32_t CopyFile_ReadWrite(int inFd, int outFd) assert(bytesWritten >= 0); bytesRead -= bytesWritten; offset += bytesWritten; + if (fileLength > 0) + { + fileLength -= bytesWritten; + if (fileLength == 0) + { + break; + } + } } } @@ -1138,7 +1157,7 @@ static int32_t CopyFile_ReadWrite(int inFd, int outFd) } #endif // !HAVE_FCOPYFILE -int32_t SystemNative_CopyFile(intptr_t sourceFd, intptr_t destinationFd) +int32_t SystemNative_CopyFile(intptr_t sourceFd, intptr_t destinationFd, int64_t sourceLength) { int inFd = ToFileDescriptor(sourceFd); int outFd = ToFileDescriptor(destinationFd); @@ -1151,28 +1170,29 @@ int32_t SystemNative_CopyFile(intptr_t sourceFd, intptr_t destinationFd) #else // Get the stats on the source file. int ret; - struct stat_ sourceStat; bool copied = false; -#if HAVE_SENDFILE_4 - // If sendfile is available (Linux), try to use it, as the whole copy - // can be performed in the kernel, without lots of unnecessary copying. - while ((ret = fstat_(inFd, &sourceStat)) < 0 && errno == EINTR); - if (ret != 0) +#ifdef FICLONE + // Try copying data using a copy-on-write clone. This shares storage between the files. + while ((ret = ioctl(outFd, FICLONE, inFd)) < 0 && errno == EINTR); + copied = ret == 0; + if (!copied && errno != EOPNOTSUPP // not supported + && errno != EXDEV // not same file system + && errno != ETXTBSY) // swapfiles can't be reflinked { return -1; } - - // On 32-bit, if you use 64-bit offsets, the last argument of `sendfile' will be a - // `size_t' a 32-bit integer while the `st_size' field of the stat structure will be off64_t. - // So `size' will have to be `uint64_t'. In all other cases, it will be `size_t'. - uint64_t size = (uint64_t)sourceStat.st_size; - if (size != 0) +#endif +#if HAVE_SENDFILE_4 + // Try copying the data using sendfile. + // Certain files (e.g. procfs) may return a size of 0 even though reading from then will + // produce data. We avoid using sendfile with the queried size if the size is reported as 0. + if (!copied && sourceLength != 0) { // Note that per man page for large files, you have to iterate until the // whole file is copied (Linux has a limit of 0x7ffff000 bytes copied). - while (size > 0) + do { - ssize_t sent = sendfile(outFd, inFd, NULL, (size >= SSIZE_MAX ? SSIZE_MAX : (size_t)size)); + ssize_t sent = sendfile(outFd, inFd, NULL, (sourceLength >= SSIZE_MAX ? SSIZE_MAX : (size_t)sourceLength)); if (sent < 0) { if (errno != EINVAL && errno != ENOSYS) @@ -1186,34 +1206,24 @@ int32_t SystemNative_CopyFile(intptr_t sourceFd, intptr_t destinationFd) } else { - assert((size_t)sent <= size); - size -= (size_t)sent; + assert(sent <= sourceLength); + sourceLength -= sent; } - } + } while (sourceLength > 0); - if (size == 0) - { - copied = true; - } + copied = true; } - // sendfile couldn't be used; fall back to a manual copy below. This could happen - // if we're on an old kernel, for example, where sendfile could only be used - // with sockets and not regular files. Additionally, certain files (e.g. procfs) - // may return a size of 0 even though reading from then will produce data. As such, - // we avoid using sendfile with the queried size if the size is reported as 0. #endif // HAVE_SENDFILE_4 - // Manually read all data from the source and write it to the destination. - if (!copied && CopyFile_ReadWrite(inFd, outFd) != 0) + // Perform a manual copy. + if (!copied && CopyFile_ReadWrite(inFd, outFd, sourceLength) != 0) { return -1; } - // Now that the data from the file has been copied, copy over metadata - // from the source file. First copy the file times. - // If futimes nor futimes are available on this platform, file times will - // not be copied over. + // Copy file times. + struct stat_ sourceStat; while ((ret = fstat_(inFd, &sourceStat)) < 0 && errno == EINTR); if (ret == 0) { @@ -1242,7 +1252,10 @@ int32_t SystemNative_CopyFile(intptr_t sourceFd, intptr_t destinationFd) { return -1; } - // Then copy permissions. + + // Copy permissions. + // Even though managed code created the file with permissions matching those of the source file, + // we need to copy permissions because the open permissions may be filtered by 'umask'. while ((ret = fchmod(outFd, sourceStat.st_mode & (S_IRWXU | S_IRWXG | S_IRWXO))) < 0 && errno == EINTR); if (ret != 0 && errno != EPERM) // See EPERM comment above { diff --git a/src/libraries/Native/Unix/System.Native/pal_io.h b/src/libraries/Native/Unix/System.Native/pal_io.h index 0e5d4cae2feb8..726b097aff133 100644 --- a/src/libraries/Native/Unix/System.Native/pal_io.h +++ b/src/libraries/Native/Unix/System.Native/pal_io.h @@ -675,7 +675,7 @@ PALEXPORT int32_t SystemNative_Write(intptr_t fd, const void* buffer, int32_t bu * * Returns 0 on success; otherwise, returns -1 and sets errno. */ -PALEXPORT int32_t SystemNative_CopyFile(intptr_t sourceFd, intptr_t destinationFd); +PALEXPORT int32_t SystemNative_CopyFile(intptr_t sourceFd, intptr_t destinationFd, int64_t sourceLength); /** * Initializes a new inotify instance and returns a file diff --git a/src/libraries/System.Private.CoreLib/src/Microsoft/Win32/SafeHandles/SafeFileHandle.Unix.cs b/src/libraries/System.Private.CoreLib/src/Microsoft/Win32/SafeHandles/SafeFileHandle.Unix.cs index 13847032fbec7..447370fff80cd 100644 --- a/src/libraries/System.Private.CoreLib/src/Microsoft/Win32/SafeHandles/SafeFileHandle.Unix.cs +++ b/src/libraries/System.Private.CoreLib/src/Microsoft/Win32/SafeHandles/SafeFileHandle.Unix.cs @@ -57,12 +57,8 @@ internal bool SupportsRandomAccess internal void EnsureThreadPoolBindingInitialized() { /* nop */ } - /// Opens the specified file with the requested flags and mode. - /// The path to the file. - /// The flags with which to open the file. - /// The mode for opening the file. - /// A SafeFileHandle for the opened file. - private static SafeFileHandle Open(string path, Interop.Sys.OpenFlags flags, int mode) + private static SafeFileHandle Open(string path, Interop.Sys.OpenFlags flags, int mode, + Func? createOpenException) { Debug.Assert(path != null); SafeFileHandle handle = Interop.Sys.Open(path, flags, mode); @@ -73,6 +69,11 @@ private static SafeFileHandle Open(string path, Interop.Sys.OpenFlags flags, int Interop.ErrorInfo error = Interop.Sys.GetLastErrorInfo(); handle.Dispose(); + if (createOpenException?.Invoke(error, flags, path) is { } ex) + { + throw ex; + } + // If we fail to open the file due to a path not existing, we need to know whether to blame // the file itself or its directory. If we're creating the file, then we blame the directory, // otherwise we blame the file. @@ -155,30 +156,52 @@ public override bool IsInvalid } } - internal static SafeFileHandle Open(string fullPath, FileMode mode, FileAccess access, FileShare share, FileOptions options, long preallocationSize) - { - // Translate the arguments into arguments for an open call. - Interop.Sys.OpenFlags openFlags = PreOpenConfigurationFromOptions(mode, access, share, options); - - // If the file gets created a new, we'll select the permissions for it. Most Unix utilities by default use 666 (read and - // write for all), so we do the same (even though this doesn't match Windows, where by default it's possible to write out - // a file and then execute it). No matter what we choose, it'll be subject to the umask applied by the system, such that the - // actual permissions will typically be less than what we select here. - const Interop.Sys.Permissions OpenPermissions = + // If the file gets created a new, we'll select the permissions for it. Most Unix utilities by default use 666 (read and + // write for all), so we do the same (even though this doesn't match Windows, where by default it's possible to write out + // a file and then execute it). No matter what we choose, it'll be subject to the umask applied by the system, such that the + // actual permissions will typically be less than what we select here. + private const Interop.Sys.Permissions DefaultOpenPermissions = Interop.Sys.Permissions.S_IRUSR | Interop.Sys.Permissions.S_IWUSR | Interop.Sys.Permissions.S_IRGRP | Interop.Sys.Permissions.S_IWGRP | Interop.Sys.Permissions.S_IROTH | Interop.Sys.Permissions.S_IWOTH; + // Specialized Open that returns the file length and permissions of the opened file. + // This information is retrieved from the 'stat' syscall that must be performed to ensure the path is not a directory. + internal static SafeFileHandle OpenReadOnly(string fullPath, FileOptions options, out long fileLength, out Interop.Sys.Permissions filePermissions) + { + SafeFileHandle handle = Open(fullPath, FileMode.Open, FileAccess.Read, FileShare.Read, options, preallocationSize: 0, DefaultOpenPermissions, out fileLength, out filePermissions, null); + Debug.Assert(fileLength >= 0); + return handle; + } + + internal static SafeFileHandle Open(string fullPath, FileMode mode, FileAccess access, FileShare share, FileOptions options, long preallocationSize, + Interop.Sys.Permissions openPermissions = DefaultOpenPermissions, + Func? createOpenException = null) + { + long fileLength; + Interop.Sys.Permissions filePermissions; + return Open(fullPath, mode, access, share, options, preallocationSize, openPermissions, out fileLength, out filePermissions, null); + } + + private static SafeFileHandle Open(string fullPath, FileMode mode, FileAccess access, FileShare share, FileOptions options, long preallocationSize, + Interop.Sys.Permissions openPermissions, + out long fileLength, + out Interop.Sys.Permissions filePermissions, + Func? createOpenException = null) + { + // Translate the arguments into arguments for an open call. + Interop.Sys.OpenFlags openFlags = PreOpenConfigurationFromOptions(mode, access, share, options); + SafeFileHandle? safeFileHandle = null; try { while (true) { - safeFileHandle = Open(fullPath, openFlags, (int)OpenPermissions); + safeFileHandle = Open(fullPath, openFlags, (int)openPermissions, createOpenException); // When Init return false, the path has changed to another file entry, and // we need to re-open the path to reflect that. - if (safeFileHandle.Init(fullPath, mode, access, share, options, preallocationSize)) + if (safeFileHandle.Init(fullPath, mode, access, share, options, preallocationSize, out fileLength, out filePermissions)) { return safeFileHandle; } @@ -275,10 +298,13 @@ private static Interop.Sys.OpenFlags PreOpenConfigurationFromOptions(FileMode mo return flags; } - private bool Init(string path, FileMode mode, FileAccess access, FileShare share, FileOptions options, long preallocationSize) + private bool Init(string path, FileMode mode, FileAccess access, FileShare share, FileOptions options, long preallocationSize, + out long fileLength, out Interop.Sys.Permissions filePermissions) { Interop.Sys.FileStatus status = default; bool statusHasValue = false; + fileLength = -1; + filePermissions = 0; // Make sure our handle is not a directory. // We can omit the check when write access is requested. open will have failed with EISDIR. @@ -300,6 +326,9 @@ private bool Init(string path, FileMode mode, FileAccess access, FileShare share _canSeek = NullableBool.True; Debug.Assert(Interop.Sys.LSeek(this, 0, Interop.Sys.SeekWhence.SEEK_CUR) >= 0); } + + fileLength = status.Size; + filePermissions = (Interop.Sys.Permissions)(status.Mode & (int)Interop.Sys.Permissions.Mask); } IsAsync = (options & FileOptions.Asynchronous) != 0; diff --git a/src/libraries/System.Private.CoreLib/src/System/IO/FileSystem.Unix.cs b/src/libraries/System.Private.CoreLib/src/System/IO/FileSystem.Unix.cs index fc4b2fec8dde1..3ea4850739e6f 100644 --- a/src/libraries/System.Private.CoreLib/src/System/IO/FileSystem.Unix.cs +++ b/src/libraries/System.Private.CoreLib/src/System/IO/FileSystem.Unix.cs @@ -11,25 +11,30 @@ namespace System.IO /// Provides an implementation of FileSystem for Unix systems. internal static partial class FileSystem { - internal const int DefaultBufferSize = 4096; - // On Linux, the maximum number of symbolic links that are followed while resolving a pathname is 40. // See: https://man7.org/linux/man-pages/man7/path_resolution.7.html private const int MaxFollowedLinks = 40; public static void CopyFile(string sourceFullPath, string destFullPath, bool overwrite) { - // If the destination path points to a directory, we throw to match Windows behaviour - if (DirectoryExists(destFullPath)) - { - throw new IOException(SR.Format(SR.Arg_FileIsDirectory_Name, destFullPath)); - } + long fileLength; + Interop.Sys.Permissions filePermissions; + using SafeFileHandle src = SafeFileHandle.OpenReadOnly(sourceFullPath, FileOptions.None, out fileLength, out filePermissions); + using SafeFileHandle dst = SafeFileHandle.Open(destFullPath, overwrite ? FileMode.Create : FileMode.CreateNew, + FileAccess.ReadWrite, FileShare.None, FileOptions.None, preallocationSize: 0, + openPermissions: filePermissions, CreateOpenException); + + Interop.CheckIo(Interop.Sys.CopyFile(src, dst, fileLength)); - // Copy the contents of the file from the source to the destination, creating the destination in the process - using (SafeFileHandle src = File.OpenHandle(sourceFullPath, FileMode.Open, FileAccess.Read, FileShare.Read, FileOptions.None)) - using (SafeFileHandle dst = File.OpenHandle(destFullPath, overwrite ? FileMode.Create : FileMode.CreateNew, FileAccess.ReadWrite, FileShare.None, FileOptions.None)) + static Exception? CreateOpenException(Interop.ErrorInfo error, Interop.Sys.OpenFlags flags, string path) { - Interop.CheckIo(Interop.Sys.CopyFile(src, dst)); + // If the destination path points to a directory, we throw to match Windows behaviour. + if (error.Error == Interop.Error.EEXIST && DirectoryExists(path)) + { + return new IOException(SR.Format(SR.Arg_FileIsDirectory_Name, path)); + } + + return null; // Let SafeFileHandle create the exception for this error. } }