Skip to content

Commit

Permalink
FileSystem.Unix: improve CopyFile. (#59695)
Browse files Browse the repository at this point in the history
* FileSystem.Unix: improve CopyFile.

Like the upcoming version of GNU coreutils 'cp' prefer a copy-on-write clone.
This shares the physical storage between files, which means no data needs to copied.
CoW-clones are supported by a number of Linux file systems, like Btrfs, XFS, and overlayfs.

Eliminate a 'stat' call that is always performed for checking if the target is a directory
by only performing the check when the 'open' syscall reports an error.

Eliminate a 'stat' call for retrieving the file size of the source by passing through
the length that was retrieved when checking the opened file is not a directory.

Create the destination with file permissions that match the source.
We still need to fchmod due to umask being applied to the open mode.

When performing a manual copy, limit the allocated buffer for small files.
And, avoid the last 'read' call by checking when we've copied the expected nr of bytes.

* Don't FICLONE for zero sourceLength

* PR feedback

* When using sendfile, don't loop when source file gets truncated.

* Fall through when FICLONE fails.

* Don't stop CopyFile_ReadWrite until read returns zero.

* Revert all changes to CopyFile_ReadWrite

* Move comment a few lines up.

* Fix unused error.
  • Loading branch information
tmds committed Nov 15, 2021
1 parent a888829 commit 9b83294
Show file tree
Hide file tree
Showing 5 changed files with 108 additions and 67 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,6 @@ internal static partial class Interop
internal static partial class Sys
{
[GeneratedDllImport(Libraries.SystemNative, EntryPoint = "SystemNative_CopyFile", SetLastError = true)]
internal static partial int CopyFile(SafeFileHandle source, SafeFileHandle destination);
internal static partial int CopyFile(SafeFileHandle source, SafeFileHandle destination, long sourceLength);
}
}
77 changes: 42 additions & 35 deletions src/libraries/Native/Unix/System.Native/pal_io.c
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,14 @@ extern int getpeereid(int, uid_t *__restrict__, gid_t *__restrict__);
#endif
#endif

// The portable build is performed on RHEL7 which doesn't define FICLONE yet.
// Ensure FICLONE is defined for all Linux builds.
#ifdef __linux__
#ifndef FICLONE
#define FICLONE _IOW(0x94, 9, int)
#endif
#endif

#if HAVE_STAT64
#define stat_ stat64
#define fstat_ fstat64
Expand Down Expand Up @@ -1138,8 +1146,10 @@ static int32_t CopyFile_ReadWrite(int inFd, int outFd)
}
#endif // !HAVE_FCOPYFILE

int32_t SystemNative_CopyFile(intptr_t sourceFd, intptr_t destinationFd)
int32_t SystemNative_CopyFile(intptr_t sourceFd, intptr_t destinationFd, int64_t sourceLength)
{
(void)sourceLength; // unused on some platforms.

int inFd = ToFileDescriptor(sourceFd);
int outFd = ToFileDescriptor(destinationFd);

Expand All @@ -1151,28 +1161,27 @@ int32_t SystemNative_CopyFile(intptr_t sourceFd, intptr_t destinationFd)
#else
// Get the stats on the source file.
int ret;
struct stat_ sourceStat;
bool copied = false;
#if HAVE_SENDFILE_4
// If sendfile is available (Linux), try to use it, as the whole copy
// can be performed in the kernel, without lots of unnecessary copying.
while ((ret = fstat_(inFd, &sourceStat)) < 0 && errno == EINTR);
if (ret != 0)

// Certain files (e.g. procfs) may return a size of 0 even though reading them will
// produce data. We use plain read/write for those.
#ifdef FICLONE
// Try copying data using a copy-on-write clone. This shares storage between the files.
if (sourceLength != 0)
{
return -1;
while ((ret = ioctl(outFd, FICLONE, inFd)) < 0 && errno == EINTR);
copied = ret == 0;
}

// On 32-bit, if you use 64-bit offsets, the last argument of `sendfile' will be a
// `size_t' a 32-bit integer while the `st_size' field of the stat structure will be off64_t.
// So `size' will have to be `uint64_t'. In all other cases, it will be `size_t'.
uint64_t size = (uint64_t)sourceStat.st_size;
if (size != 0)
#endif
#if HAVE_SENDFILE_4
// Try copying the data using sendfile.
if (!copied && sourceLength != 0)
{
// Note that per man page for large files, you have to iterate until the
// whole file is copied (Linux has a limit of 0x7ffff000 bytes copied).
while (size > 0)
do
{
ssize_t sent = sendfile(outFd, inFd, NULL, (size >= SSIZE_MAX ? SSIZE_MAX : (size_t)size));
ssize_t sent = sendfile(outFd, inFd, NULL, (sourceLength >= SSIZE_MAX ? SSIZE_MAX : (size_t)sourceLength));
if (sent < 0)
{
if (errno != EINVAL && errno != ENOSYS)
Expand All @@ -1184,36 +1193,31 @@ int32_t SystemNative_CopyFile(intptr_t sourceFd, intptr_t destinationFd)
break;
}
}
else if (sent == 0)
{
// The file was truncated (or maybe some other condition occurred).
// Perform the remaining copying using read/write.
break;
}
else
{
assert((size_t)sent <= size);
size -= (size_t)sent;
assert(sent <= sourceLength);
sourceLength -= sent;
}
}
} while (sourceLength > 0);

if (size == 0)
{
copied = true;
}
copied = sourceLength == 0;
}

// sendfile couldn't be used; fall back to a manual copy below. This could happen
// if we're on an old kernel, for example, where sendfile could only be used
// with sockets and not regular files. Additionally, certain files (e.g. procfs)
// may return a size of 0 even though reading from then will produce data. As such,
// we avoid using sendfile with the queried size if the size is reported as 0.
#endif // HAVE_SENDFILE_4

// Manually read all data from the source and write it to the destination.
// Perform a manual copy.
if (!copied && CopyFile_ReadWrite(inFd, outFd) != 0)
{
return -1;
}

// Now that the data from the file has been copied, copy over metadata
// from the source file. First copy the file times.
// If futimes nor futimes are available on this platform, file times will
// not be copied over.
// Copy file times.
struct stat_ sourceStat;
while ((ret = fstat_(inFd, &sourceStat)) < 0 && errno == EINTR);
if (ret == 0)
{
Expand Down Expand Up @@ -1242,7 +1246,10 @@ int32_t SystemNative_CopyFile(intptr_t sourceFd, intptr_t destinationFd)
{
return -1;
}
// Then copy permissions.

// Copy permissions.
// Even though managed code created the file with permissions matching those of the source file,
// we need to copy permissions because the open permissions may be filtered by 'umask'.
while ((ret = fchmod(outFd, sourceStat.st_mode & (S_IRWXU | S_IRWXG | S_IRWXO))) < 0 && errno == EINTR);
if (ret != 0 && errno != EPERM) // See EPERM comment above
{
Expand Down
2 changes: 1 addition & 1 deletion src/libraries/Native/Unix/System.Native/pal_io.h
Original file line number Diff line number Diff line change
Expand Up @@ -675,7 +675,7 @@ PALEXPORT int32_t SystemNative_Write(intptr_t fd, const void* buffer, int32_t bu
*
* Returns 0 on success; otherwise, returns -1 and sets errno.
*/
PALEXPORT int32_t SystemNative_CopyFile(intptr_t sourceFd, intptr_t destinationFd);
PALEXPORT int32_t SystemNative_CopyFile(intptr_t sourceFd, intptr_t destinationFd, int64_t sourceLength);

/**
* Initializes a new inotify instance and returns a file
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,12 +57,8 @@ internal bool SupportsRandomAccess

internal void EnsureThreadPoolBindingInitialized() { /* nop */ }

/// <summary>Opens the specified file with the requested flags and mode.</summary>
/// <param name="path">The path to the file.</param>
/// <param name="flags">The flags with which to open the file.</param>
/// <param name="mode">The mode for opening the file.</param>
/// <returns>A SafeFileHandle for the opened file.</returns>
private static SafeFileHandle Open(string path, Interop.Sys.OpenFlags flags, int mode)
private static SafeFileHandle Open(string path, Interop.Sys.OpenFlags flags, int mode,
Func<Interop.ErrorInfo, Interop.Sys.OpenFlags, string, Exception?>? createOpenException)
{
Debug.Assert(path != null);
SafeFileHandle handle = Interop.Sys.Open(path, flags, mode);
Expand All @@ -73,6 +69,11 @@ private static SafeFileHandle Open(string path, Interop.Sys.OpenFlags flags, int
Interop.ErrorInfo error = Interop.Sys.GetLastErrorInfo();
handle.Dispose();

if (createOpenException?.Invoke(error, flags, path) is Exception ex)
{
throw ex;
}

// If we fail to open the file due to a path not existing, we need to know whether to blame
// the file itself or its directory. If we're creating the file, then we blame the directory,
// otherwise we blame the file.
Expand Down Expand Up @@ -155,30 +156,52 @@ public override bool IsInvalid
}
}

internal static SafeFileHandle Open(string fullPath, FileMode mode, FileAccess access, FileShare share, FileOptions options, long preallocationSize)
{
// Translate the arguments into arguments for an open call.
Interop.Sys.OpenFlags openFlags = PreOpenConfigurationFromOptions(mode, access, share, options);

// If the file gets created a new, we'll select the permissions for it. Most Unix utilities by default use 666 (read and
// write for all), so we do the same (even though this doesn't match Windows, where by default it's possible to write out
// a file and then execute it). No matter what we choose, it'll be subject to the umask applied by the system, such that the
// actual permissions will typically be less than what we select here.
const Interop.Sys.Permissions OpenPermissions =
// If the file gets created a new, we'll select the permissions for it. Most Unix utilities by default use 666 (read and
// write for all), so we do the same (even though this doesn't match Windows, where by default it's possible to write out
// a file and then execute it). No matter what we choose, it'll be subject to the umask applied by the system, such that the
// actual permissions will typically be less than what we select here.
private const Interop.Sys.Permissions DefaultOpenPermissions =
Interop.Sys.Permissions.S_IRUSR | Interop.Sys.Permissions.S_IWUSR |
Interop.Sys.Permissions.S_IRGRP | Interop.Sys.Permissions.S_IWGRP |
Interop.Sys.Permissions.S_IROTH | Interop.Sys.Permissions.S_IWOTH;

// Specialized Open that returns the file length and permissions of the opened file.
// This information is retrieved from the 'stat' syscall that must be performed to ensure the path is not a directory.
internal static SafeFileHandle OpenReadOnly(string fullPath, FileOptions options, out long fileLength, out Interop.Sys.Permissions filePermissions)
{
SafeFileHandle handle = Open(fullPath, FileMode.Open, FileAccess.Read, FileShare.Read, options, preallocationSize: 0, DefaultOpenPermissions, out fileLength, out filePermissions, null);
Debug.Assert(fileLength >= 0);
return handle;
}

internal static SafeFileHandle Open(string fullPath, FileMode mode, FileAccess access, FileShare share, FileOptions options, long preallocationSize,
Interop.Sys.Permissions openPermissions = DefaultOpenPermissions,
Func<Interop.ErrorInfo, Interop.Sys.OpenFlags, string, Exception?>? createOpenException = null)
{
long fileLength;
Interop.Sys.Permissions filePermissions;
return Open(fullPath, mode, access, share, options, preallocationSize, openPermissions, out fileLength, out filePermissions, null);
}

private static SafeFileHandle Open(string fullPath, FileMode mode, FileAccess access, FileShare share, FileOptions options, long preallocationSize,
Interop.Sys.Permissions openPermissions,
out long fileLength,
out Interop.Sys.Permissions filePermissions,
Func<Interop.ErrorInfo, Interop.Sys.OpenFlags, string, Exception?>? createOpenException = null)
{
// Translate the arguments into arguments for an open call.
Interop.Sys.OpenFlags openFlags = PreOpenConfigurationFromOptions(mode, access, share, options);

SafeFileHandle? safeFileHandle = null;
try
{
while (true)
{
safeFileHandle = Open(fullPath, openFlags, (int)OpenPermissions);
safeFileHandle = Open(fullPath, openFlags, (int)openPermissions, createOpenException);

// When Init return false, the path has changed to another file entry, and
// we need to re-open the path to reflect that.
if (safeFileHandle.Init(fullPath, mode, access, share, options, preallocationSize))
if (safeFileHandle.Init(fullPath, mode, access, share, options, preallocationSize, out fileLength, out filePermissions))
{
return safeFileHandle;
}
Expand Down Expand Up @@ -275,10 +298,13 @@ private static Interop.Sys.OpenFlags PreOpenConfigurationFromOptions(FileMode mo
return flags;
}

private bool Init(string path, FileMode mode, FileAccess access, FileShare share, FileOptions options, long preallocationSize)
private bool Init(string path, FileMode mode, FileAccess access, FileShare share, FileOptions options, long preallocationSize,
out long fileLength, out Interop.Sys.Permissions filePermissions)
{
Interop.Sys.FileStatus status = default;
bool statusHasValue = false;
fileLength = -1;
filePermissions = 0;

// Make sure our handle is not a directory.
// We can omit the check when write access is requested. open will have failed with EISDIR.
Expand All @@ -300,6 +326,9 @@ private bool Init(string path, FileMode mode, FileAccess access, FileShare share
_canSeek = NullableBool.True;
Debug.Assert(Interop.Sys.LSeek(this, 0, Interop.Sys.SeekWhence.SEEK_CUR) >= 0);
}

fileLength = status.Size;
filePermissions = (Interop.Sys.Permissions)(status.Mode & (int)Interop.Sys.Permissions.Mask);
}

IsAsync = (options & FileOptions.Asynchronous) != 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,25 +11,30 @@ namespace System.IO
/// <summary>Provides an implementation of FileSystem for Unix systems.</summary>
internal static partial class FileSystem
{
internal const int DefaultBufferSize = 4096;

// On Linux, the maximum number of symbolic links that are followed while resolving a pathname is 40.
// See: https://man7.org/linux/man-pages/man7/path_resolution.7.html
private const int MaxFollowedLinks = 40;

public static void CopyFile(string sourceFullPath, string destFullPath, bool overwrite)
{
// If the destination path points to a directory, we throw to match Windows behaviour
if (DirectoryExists(destFullPath))
{
throw new IOException(SR.Format(SR.Arg_FileIsDirectory_Name, destFullPath));
}
long fileLength;
Interop.Sys.Permissions filePermissions;
using SafeFileHandle src = SafeFileHandle.OpenReadOnly(sourceFullPath, FileOptions.None, out fileLength, out filePermissions);
using SafeFileHandle dst = SafeFileHandle.Open(destFullPath, overwrite ? FileMode.Create : FileMode.CreateNew,
FileAccess.ReadWrite, FileShare.None, FileOptions.None, preallocationSize: 0, openPermissions: filePermissions,
(Interop.ErrorInfo error, Interop.Sys.OpenFlags flags, string path) => CreateOpenException(error, flags, path));

Interop.CheckIo(Interop.Sys.CopyFile(src, dst, fileLength));

// Copy the contents of the file from the source to the destination, creating the destination in the process
using (SafeFileHandle src = File.OpenHandle(sourceFullPath, FileMode.Open, FileAccess.Read, FileShare.Read, FileOptions.None))
using (SafeFileHandle dst = File.OpenHandle(destFullPath, overwrite ? FileMode.Create : FileMode.CreateNew, FileAccess.ReadWrite, FileShare.None, FileOptions.None))
static Exception? CreateOpenException(Interop.ErrorInfo error, Interop.Sys.OpenFlags flags, string path)
{
Interop.CheckIo(Interop.Sys.CopyFile(src, dst));
// If the destination path points to a directory, we throw to match Windows behaviour.
if (error.Error == Interop.Error.EEXIST && DirectoryExists(path))
{
return new IOException(SR.Format(SR.Arg_FileIsDirectory_Name, path));
}

return null; // Let SafeFileHandle create the exception for this error.
}
}

Expand Down

0 comments on commit 9b83294

Please sign in to comment.