gh-81340: Use copy_file_range in shutil.copyfile copy functions (GH-93152)
This allows the underlying file system an opportunity to optimise or avoid the actual copy.
This commit is contained in:
parent
a29a9c0f38
commit
a33dcb9e43
@ -512,7 +512,9 @@ the use of userspace buffers in Python as in "``outfd.write(infd.read())``".
|
|||||||
|
|
||||||
On macOS `fcopyfile`_ is used to copy the file content (not metadata).
|
On macOS `fcopyfile`_ is used to copy the file content (not metadata).
|
||||||
|
|
||||||
On Linux and Solaris :func:`os.sendfile` is used.
|
On Linux :func:`os.copy_file_range` or :func:`os.sendfile` is used.
|
||||||
|
|
||||||
|
On Solaris :func:`os.sendfile` is used.
|
||||||
|
|
||||||
On Windows :func:`shutil.copyfile` uses a bigger default buffer size (1 MiB
|
On Windows :func:`shutil.copyfile` uses a bigger default buffer size (1 MiB
|
||||||
instead of 64 KiB) and a :func:`memoryview`-based variant of
|
instead of 64 KiB) and a :func:`memoryview`-based variant of
|
||||||
@ -527,6 +529,10 @@ file then shutil will silently fallback on using less efficient
|
|||||||
.. versionchanged:: 3.14
|
.. versionchanged:: 3.14
|
||||||
Solaris now uses :func:`os.sendfile`.
|
Solaris now uses :func:`os.sendfile`.
|
||||||
|
|
||||||
|
.. versionchanged:: next
|
||||||
|
Copy-on-write or server-side copy may be used internally via
|
||||||
|
:func:`os.copy_file_range` on supported Linux filesystems.
|
||||||
|
|
||||||
.. _shutil-copytree-example:
|
.. _shutil-copytree-example:
|
||||||
|
|
||||||
copytree example
|
copytree example
|
||||||
|
@ -49,6 +49,7 @@ COPY_BUFSIZE = 1024 * 1024 if _WINDOWS else 256 * 1024
|
|||||||
# https://bugs.python.org/issue43743#msg393429
|
# https://bugs.python.org/issue43743#msg393429
|
||||||
_USE_CP_SENDFILE = (hasattr(os, "sendfile")
|
_USE_CP_SENDFILE = (hasattr(os, "sendfile")
|
||||||
and sys.platform.startswith(("linux", "android", "sunos")))
|
and sys.platform.startswith(("linux", "android", "sunos")))
|
||||||
|
_USE_CP_COPY_FILE_RANGE = hasattr(os, "copy_file_range")
|
||||||
_HAS_FCOPYFILE = posix and hasattr(posix, "_fcopyfile") # macOS
|
_HAS_FCOPYFILE = posix and hasattr(posix, "_fcopyfile") # macOS
|
||||||
|
|
||||||
# CMD defaults in Windows 10
|
# CMD defaults in Windows 10
|
||||||
@ -107,6 +108,66 @@ def _fastcopy_fcopyfile(fsrc, fdst, flags):
|
|||||||
else:
|
else:
|
||||||
raise err from None
|
raise err from None
|
||||||
|
|
||||||
|
def _determine_linux_fastcopy_blocksize(infd):
|
||||||
|
"""Determine blocksize for fastcopying on Linux.
|
||||||
|
|
||||||
|
Hopefully the whole file will be copied in a single call.
|
||||||
|
The copying itself should be performed in a loop 'till EOF is
|
||||||
|
reached (0 return) so a blocksize smaller or bigger than the actual
|
||||||
|
file size should not make any difference, also in case the file
|
||||||
|
content changes while being copied.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
blocksize = max(os.fstat(infd).st_size, 2 ** 23) # min 8 MiB
|
||||||
|
except OSError:
|
||||||
|
blocksize = 2 ** 27 # 128 MiB
|
||||||
|
# On 32-bit architectures truncate to 1 GiB to avoid OverflowError,
|
||||||
|
# see gh-82500.
|
||||||
|
if sys.maxsize < 2 ** 32:
|
||||||
|
blocksize = min(blocksize, 2 ** 30)
|
||||||
|
return blocksize
|
||||||
|
|
||||||
|
def _fastcopy_copy_file_range(fsrc, fdst):
|
||||||
|
"""Copy data from one regular mmap-like fd to another by using
|
||||||
|
a high-performance copy_file_range(2) syscall that gives filesystems
|
||||||
|
an opportunity to implement the use of reflinks or server-side copy.
|
||||||
|
|
||||||
|
This should work on Linux >= 4.5 only.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
infd = fsrc.fileno()
|
||||||
|
outfd = fdst.fileno()
|
||||||
|
except Exception as err:
|
||||||
|
raise _GiveupOnFastCopy(err) # not a regular file
|
||||||
|
|
||||||
|
blocksize = _determine_linux_fastcopy_blocksize(infd)
|
||||||
|
offset = 0
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
n_copied = os.copy_file_range(infd, outfd, blocksize, offset_dst=offset)
|
||||||
|
except OSError as err:
|
||||||
|
# ...in oder to have a more informative exception.
|
||||||
|
err.filename = fsrc.name
|
||||||
|
err.filename2 = fdst.name
|
||||||
|
|
||||||
|
if err.errno == errno.ENOSPC: # filesystem is full
|
||||||
|
raise err from None
|
||||||
|
|
||||||
|
# Give up on first call and if no data was copied.
|
||||||
|
if offset == 0 and os.lseek(outfd, 0, os.SEEK_CUR) == 0:
|
||||||
|
raise _GiveupOnFastCopy(err)
|
||||||
|
|
||||||
|
raise err
|
||||||
|
else:
|
||||||
|
if n_copied == 0:
|
||||||
|
# If no bytes have been copied yet, copy_file_range
|
||||||
|
# might silently fail.
|
||||||
|
# https://lore.kernel.org/linux-fsdevel/20210126233840.GG4626@dread.disaster.area/T/#m05753578c7f7882f6e9ffe01f981bc223edef2b0
|
||||||
|
if offset == 0:
|
||||||
|
raise _GiveupOnFastCopy()
|
||||||
|
break
|
||||||
|
offset += n_copied
|
||||||
|
|
||||||
def _fastcopy_sendfile(fsrc, fdst):
|
def _fastcopy_sendfile(fsrc, fdst):
|
||||||
"""Copy data from one regular mmap-like fd to another by using
|
"""Copy data from one regular mmap-like fd to another by using
|
||||||
high-performance sendfile(2) syscall.
|
high-performance sendfile(2) syscall.
|
||||||
@ -128,20 +189,7 @@ def _fastcopy_sendfile(fsrc, fdst):
|
|||||||
except Exception as err:
|
except Exception as err:
|
||||||
raise _GiveupOnFastCopy(err) # not a regular file
|
raise _GiveupOnFastCopy(err) # not a regular file
|
||||||
|
|
||||||
# Hopefully the whole file will be copied in a single call.
|
blocksize = _determine_linux_fastcopy_blocksize(infd)
|
||||||
# sendfile() is called in a loop 'till EOF is reached (0 return)
|
|
||||||
# so a bufsize smaller or bigger than the actual file size
|
|
||||||
# should not make any difference, also in case the file content
|
|
||||||
# changes while being copied.
|
|
||||||
try:
|
|
||||||
blocksize = max(os.fstat(infd).st_size, 2 ** 23) # min 8MiB
|
|
||||||
except OSError:
|
|
||||||
blocksize = 2 ** 27 # 128MiB
|
|
||||||
# On 32-bit architectures truncate to 1GiB to avoid OverflowError,
|
|
||||||
# see bpo-38319.
|
|
||||||
if sys.maxsize < 2 ** 32:
|
|
||||||
blocksize = min(blocksize, 2 ** 30)
|
|
||||||
|
|
||||||
offset = 0
|
offset = 0
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
@ -266,12 +314,20 @@ def copyfile(src, dst, *, follow_symlinks=True):
|
|||||||
except _GiveupOnFastCopy:
|
except _GiveupOnFastCopy:
|
||||||
pass
|
pass
|
||||||
# Linux / Android / Solaris
|
# Linux / Android / Solaris
|
||||||
elif _USE_CP_SENDFILE:
|
elif _USE_CP_SENDFILE or _USE_CP_COPY_FILE_RANGE:
|
||||||
try:
|
# reflink may be implicit in copy_file_range.
|
||||||
_fastcopy_sendfile(fsrc, fdst)
|
if _USE_CP_COPY_FILE_RANGE:
|
||||||
return dst
|
try:
|
||||||
except _GiveupOnFastCopy:
|
_fastcopy_copy_file_range(fsrc, fdst)
|
||||||
pass
|
return dst
|
||||||
|
except _GiveupOnFastCopy:
|
||||||
|
pass
|
||||||
|
if _USE_CP_SENDFILE:
|
||||||
|
try:
|
||||||
|
_fastcopy_sendfile(fsrc, fdst)
|
||||||
|
return dst
|
||||||
|
except _GiveupOnFastCopy:
|
||||||
|
pass
|
||||||
# Windows, see:
|
# Windows, see:
|
||||||
# https://github.com/python/cpython/pull/7160#discussion_r195405230
|
# https://github.com/python/cpython/pull/7160#discussion_r195405230
|
||||||
elif _WINDOWS and file_size > 0:
|
elif _WINDOWS and file_size > 0:
|
||||||
|
@ -3239,12 +3239,8 @@ class _ZeroCopyFileTest(object):
|
|||||||
self.assertRaises(OSError, self.zerocopy_fun, src, dst)
|
self.assertRaises(OSError, self.zerocopy_fun, src, dst)
|
||||||
|
|
||||||
|
|
||||||
@unittest.skipIf(not SUPPORTS_SENDFILE, 'os.sendfile() not supported')
|
class _ZeroCopyFileLinuxTest(_ZeroCopyFileTest):
|
||||||
class TestZeroCopySendfile(_ZeroCopyFileTest, unittest.TestCase):
|
BLOCKSIZE_INDEX = None
|
||||||
PATCHPOINT = "os.sendfile"
|
|
||||||
|
|
||||||
def zerocopy_fun(self, fsrc, fdst):
|
|
||||||
return shutil._fastcopy_sendfile(fsrc, fdst)
|
|
||||||
|
|
||||||
def test_non_regular_file_src(self):
|
def test_non_regular_file_src(self):
|
||||||
with io.BytesIO(self.FILEDATA) as src:
|
with io.BytesIO(self.FILEDATA) as src:
|
||||||
@ -3265,65 +3261,65 @@ class TestZeroCopySendfile(_ZeroCopyFileTest, unittest.TestCase):
|
|||||||
self.assertEqual(dst.read(), self.FILEDATA)
|
self.assertEqual(dst.read(), self.FILEDATA)
|
||||||
|
|
||||||
def test_exception_on_second_call(self):
|
def test_exception_on_second_call(self):
|
||||||
def sendfile(*args, **kwargs):
|
def syscall(*args, **kwargs):
|
||||||
if not flag:
|
if not flag:
|
||||||
flag.append(None)
|
flag.append(None)
|
||||||
return orig_sendfile(*args, **kwargs)
|
return orig_syscall(*args, **kwargs)
|
||||||
else:
|
else:
|
||||||
raise OSError(errno.EBADF, "yo")
|
raise OSError(errno.EBADF, "yo")
|
||||||
|
|
||||||
flag = []
|
flag = []
|
||||||
orig_sendfile = os.sendfile
|
orig_syscall = eval(self.PATCHPOINT)
|
||||||
with unittest.mock.patch('os.sendfile', create=True,
|
with unittest.mock.patch(self.PATCHPOINT, create=True,
|
||||||
side_effect=sendfile):
|
side_effect=syscall):
|
||||||
with self.get_files() as (src, dst):
|
with self.get_files() as (src, dst):
|
||||||
with self.assertRaises(OSError) as cm:
|
with self.assertRaises(OSError) as cm:
|
||||||
shutil._fastcopy_sendfile(src, dst)
|
self.zerocopy_fun(src, dst)
|
||||||
assert flag
|
assert flag
|
||||||
self.assertEqual(cm.exception.errno, errno.EBADF)
|
self.assertEqual(cm.exception.errno, errno.EBADF)
|
||||||
|
|
||||||
def test_cant_get_size(self):
|
def test_cant_get_size(self):
|
||||||
# Emulate a case where src file size cannot be determined.
|
# Emulate a case where src file size cannot be determined.
|
||||||
# Internally bufsize will be set to a small value and
|
# Internally bufsize will be set to a small value and
|
||||||
# sendfile() will be called repeatedly.
|
# a system call will be called repeatedly.
|
||||||
with unittest.mock.patch('os.fstat', side_effect=OSError) as m:
|
with unittest.mock.patch('os.fstat', side_effect=OSError) as m:
|
||||||
with self.get_files() as (src, dst):
|
with self.get_files() as (src, dst):
|
||||||
shutil._fastcopy_sendfile(src, dst)
|
self.zerocopy_fun(src, dst)
|
||||||
assert m.called
|
assert m.called
|
||||||
self.assertEqual(read_file(TESTFN2, binary=True), self.FILEDATA)
|
self.assertEqual(read_file(TESTFN2, binary=True), self.FILEDATA)
|
||||||
|
|
||||||
def test_small_chunks(self):
|
def test_small_chunks(self):
|
||||||
# Force internal file size detection to be smaller than the
|
# Force internal file size detection to be smaller than the
|
||||||
# actual file size. We want to force sendfile() to be called
|
# actual file size. We want to force a system call to be called
|
||||||
# multiple times, also in order to emulate a src fd which gets
|
# multiple times, also in order to emulate a src fd which gets
|
||||||
# bigger while it is being copied.
|
# bigger while it is being copied.
|
||||||
mock = unittest.mock.Mock()
|
mock = unittest.mock.Mock()
|
||||||
mock.st_size = 65536 + 1
|
mock.st_size = 65536 + 1
|
||||||
with unittest.mock.patch('os.fstat', return_value=mock) as m:
|
with unittest.mock.patch('os.fstat', return_value=mock) as m:
|
||||||
with self.get_files() as (src, dst):
|
with self.get_files() as (src, dst):
|
||||||
shutil._fastcopy_sendfile(src, dst)
|
self.zerocopy_fun(src, dst)
|
||||||
assert m.called
|
assert m.called
|
||||||
self.assertEqual(read_file(TESTFN2, binary=True), self.FILEDATA)
|
self.assertEqual(read_file(TESTFN2, binary=True), self.FILEDATA)
|
||||||
|
|
||||||
def test_big_chunk(self):
|
def test_big_chunk(self):
|
||||||
# Force internal file size detection to be +100MB bigger than
|
# Force internal file size detection to be +100MB bigger than
|
||||||
# the actual file size. Make sure sendfile() does not rely on
|
# the actual file size. Make sure a system call does not rely on
|
||||||
# file size value except for (maybe) a better throughput /
|
# file size value except for (maybe) a better throughput /
|
||||||
# performance.
|
# performance.
|
||||||
mock = unittest.mock.Mock()
|
mock = unittest.mock.Mock()
|
||||||
mock.st_size = self.FILESIZE + (100 * 1024 * 1024)
|
mock.st_size = self.FILESIZE + (100 * 1024 * 1024)
|
||||||
with unittest.mock.patch('os.fstat', return_value=mock) as m:
|
with unittest.mock.patch('os.fstat', return_value=mock) as m:
|
||||||
with self.get_files() as (src, dst):
|
with self.get_files() as (src, dst):
|
||||||
shutil._fastcopy_sendfile(src, dst)
|
self.zerocopy_fun(src, dst)
|
||||||
assert m.called
|
assert m.called
|
||||||
self.assertEqual(read_file(TESTFN2, binary=True), self.FILEDATA)
|
self.assertEqual(read_file(TESTFN2, binary=True), self.FILEDATA)
|
||||||
|
|
||||||
def test_blocksize_arg(self):
|
def test_blocksize_arg(self):
|
||||||
with unittest.mock.patch('os.sendfile',
|
with unittest.mock.patch(self.PATCHPOINT,
|
||||||
side_effect=ZeroDivisionError) as m:
|
side_effect=ZeroDivisionError) as m:
|
||||||
self.assertRaises(ZeroDivisionError,
|
self.assertRaises(ZeroDivisionError,
|
||||||
shutil.copyfile, TESTFN, TESTFN2)
|
shutil.copyfile, TESTFN, TESTFN2)
|
||||||
blocksize = m.call_args[0][3]
|
blocksize = m.call_args[0][self.BLOCKSIZE_INDEX]
|
||||||
# Make sure file size and the block size arg passed to
|
# Make sure file size and the block size arg passed to
|
||||||
# sendfile() are the same.
|
# sendfile() are the same.
|
||||||
self.assertEqual(blocksize, os.path.getsize(TESTFN))
|
self.assertEqual(blocksize, os.path.getsize(TESTFN))
|
||||||
@ -3333,9 +3329,19 @@ class TestZeroCopySendfile(_ZeroCopyFileTest, unittest.TestCase):
|
|||||||
self.addCleanup(os_helper.unlink, TESTFN2 + '3')
|
self.addCleanup(os_helper.unlink, TESTFN2 + '3')
|
||||||
self.assertRaises(ZeroDivisionError,
|
self.assertRaises(ZeroDivisionError,
|
||||||
shutil.copyfile, TESTFN2, TESTFN2 + '3')
|
shutil.copyfile, TESTFN2, TESTFN2 + '3')
|
||||||
blocksize = m.call_args[0][3]
|
blocksize = m.call_args[0][self.BLOCKSIZE_INDEX]
|
||||||
self.assertEqual(blocksize, 2 ** 23)
|
self.assertEqual(blocksize, 2 ** 23)
|
||||||
|
|
||||||
|
|
||||||
|
@unittest.skipIf(not SUPPORTS_SENDFILE, 'os.sendfile() not supported')
|
||||||
|
@unittest.mock.patch.object(shutil, "_USE_CP_COPY_FILE_RANGE", False)
|
||||||
|
class TestZeroCopySendfile(_ZeroCopyFileLinuxTest, unittest.TestCase):
|
||||||
|
PATCHPOINT = "os.sendfile"
|
||||||
|
BLOCKSIZE_INDEX = 3
|
||||||
|
|
||||||
|
def zerocopy_fun(self, fsrc, fdst):
|
||||||
|
return shutil._fastcopy_sendfile(fsrc, fdst)
|
||||||
|
|
||||||
def test_file2file_not_supported(self):
|
def test_file2file_not_supported(self):
|
||||||
# Emulate a case where sendfile() only support file->socket
|
# Emulate a case where sendfile() only support file->socket
|
||||||
# fds. In such a case copyfile() is supposed to skip the
|
# fds. In such a case copyfile() is supposed to skip the
|
||||||
@ -3358,6 +3364,29 @@ class TestZeroCopySendfile(_ZeroCopyFileTest, unittest.TestCase):
|
|||||||
shutil._USE_CP_SENDFILE = True
|
shutil._USE_CP_SENDFILE = True
|
||||||
|
|
||||||
|
|
||||||
|
@unittest.skipUnless(shutil._USE_CP_COPY_FILE_RANGE, "os.copy_file_range() not supported")
|
||||||
|
class TestZeroCopyCopyFileRange(_ZeroCopyFileLinuxTest, unittest.TestCase):
|
||||||
|
PATCHPOINT = "os.copy_file_range"
|
||||||
|
BLOCKSIZE_INDEX = 2
|
||||||
|
|
||||||
|
def zerocopy_fun(self, fsrc, fdst):
|
||||||
|
return shutil._fastcopy_copy_file_range(fsrc, fdst)
|
||||||
|
|
||||||
|
def test_empty_file(self):
|
||||||
|
srcname = f"{TESTFN}src"
|
||||||
|
dstname = f"{TESTFN}dst"
|
||||||
|
self.addCleanup(lambda: os_helper.unlink(srcname))
|
||||||
|
self.addCleanup(lambda: os_helper.unlink(dstname))
|
||||||
|
with open(srcname, "wb"):
|
||||||
|
pass
|
||||||
|
|
||||||
|
with open(srcname, "rb") as src, open(dstname, "wb") as dst:
|
||||||
|
# _fastcopy_copy_file_range gives up copying empty files due
|
||||||
|
# to a bug in older Linux.
|
||||||
|
with self.assertRaises(shutil._GiveupOnFastCopy):
|
||||||
|
self.zerocopy_fun(src, dst)
|
||||||
|
|
||||||
|
|
||||||
@unittest.skipIf(not MACOS, 'macOS only')
|
@unittest.skipIf(not MACOS, 'macOS only')
|
||||||
class TestZeroCopyMACOS(_ZeroCopyFileTest, unittest.TestCase):
|
class TestZeroCopyMACOS(_ZeroCopyFileTest, unittest.TestCase):
|
||||||
PATCHPOINT = "posix._fcopyfile"
|
PATCHPOINT = "posix._fcopyfile"
|
||||||
|
@ -1972,6 +1972,7 @@ Johannes Vogel
|
|||||||
Michael Vogt
|
Michael Vogt
|
||||||
Radu Voicilas
|
Radu Voicilas
|
||||||
Alex Volkov
|
Alex Volkov
|
||||||
|
Illia Volochii
|
||||||
Ruben Vorderman
|
Ruben Vorderman
|
||||||
Guido Vranken
|
Guido Vranken
|
||||||
Martijn Vries
|
Martijn Vries
|
||||||
|
@ -0,0 +1,5 @@
|
|||||||
|
Use :func:`os.copy_file_range` in :func:`shutil.copy`, :func:`shutil.copy2`,
|
||||||
|
and :func:`shutil.copyfile` functions by default. An underlying Linux system
|
||||||
|
call gives filesystems an opportunity to implement the use of copy-on-write
|
||||||
|
(in case of btrfs and XFS) or server-side copy (in the case of NFS.)
|
||||||
|
Patch by Illia Volochii.
|
Loading…
x
Reference in New Issue
Block a user