gh-90385: Add pathlib.Path.walk()
method (GH-92517)
Automerge-Triggered-By: GH:brettcannon
This commit is contained in:
parent
e4d3a96a11
commit
c1e929858a
@ -946,6 +946,101 @@ call fails (for example because the path doesn't exist).
|
|||||||
to the directory after creating the iterator, whether a path object for
|
to the directory after creating the iterator, whether a path object for
|
||||||
that file be included is unspecified.
|
that file be included is unspecified.
|
||||||
|
|
||||||
|
.. method:: Path.walk(top_down=True, on_error=None, follow_symlinks=False)
|
||||||
|
|
||||||
|
Generate the file names in a directory tree by walking the tree
|
||||||
|
either top-down or bottom-up.
|
||||||
|
|
||||||
|
For each directory in the directory tree rooted at *self* (including
|
||||||
|
*self* but excluding '.' and '..'), the method yields a 3-tuple of
|
||||||
|
``(dirpath, dirnames, filenames)``.
|
||||||
|
|
||||||
|
*dirpath* is a :class:`Path` to the directory currently being walked,
|
||||||
|
*dirnames* is a list of strings for the names of subdirectories in *dirpath*
|
||||||
|
(excluding ``'.'`` and ``'..'``), and *filenames* is a list of strings for
|
||||||
|
the names of the non-directory files in *dirpath*. To get a full path
|
||||||
|
(which begins with *self*) to a file or directory in *dirpath*, do
|
||||||
|
``dirpath / name``. Whether or not the lists are sorted is file
|
||||||
|
system-dependent.
|
||||||
|
|
||||||
|
If the optional argument *top_down* is true (which is the default), the triple for a
|
||||||
|
directory is generated before the triples for any of its subdirectories
|
||||||
|
(directories are walked top-down). If *top_down* is false, the triple
|
||||||
|
for a directory is generated after the triples for all of its subdirectories
|
||||||
|
(directories are walked bottom-up). No matter the value of *top_down*, the
|
||||||
|
list of subdirectories is retrieved before the triples for the directory and
|
||||||
|
its subdirectories are walked.
|
||||||
|
|
||||||
|
When *top_down* is true, the caller can modify the *dirnames* list in-place
|
||||||
|
(for example, using :keyword:`del` or slice assignment), and :meth:`Path.walk`
|
||||||
|
will only recurse into the subdirectories whose names remain in *dirnames*.
|
||||||
|
This can be used to prune the search, or to impose a specific order of visiting,
|
||||||
|
or even to inform :meth:`Path.walk` about directories the caller creates or
|
||||||
|
renames before it resumes :meth:`Path.walk` again. Modifying *dirnames* when
|
||||||
|
*top_down* is false has no effect on the behavior of :meth:`Path.walk()` since the
|
||||||
|
directories in *dirnames* have already been generated by the time *dirnames*
|
||||||
|
is yielded to the caller.
|
||||||
|
|
||||||
|
By default, errors from :func:`os.scandir` are ignored. If the optional
|
||||||
|
argument *on_error* is specified, it should be a callable; it will be
|
||||||
|
called with one argument, an :exc:`OSError` instance. The callable can handle the
|
||||||
|
error to continue the walk or re-raise it to stop the walk. Note that the
|
||||||
|
filename is available as the ``filename`` attribute of the exception object.
|
||||||
|
|
||||||
|
By default, :meth:`Path.walk` does not follow symbolic links, and instead adds them
|
||||||
|
to the *filenames* list. Set *follow_symlinks* to true to resolve symlinks
|
||||||
|
and place them in *dirnames* and *filenames* as appropriate for their targets, and
|
||||||
|
consequently visit directories pointed to by symlinks (where supported).
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
Be aware that setting *follow_symlinks* to true can lead to infinite
|
||||||
|
recursion if a link points to a parent directory of itself. :meth:`Path.walk`
|
||||||
|
does not keep track of the directories it has already visited.
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
:meth:`Path.walk` assumes the directories it walks are not modified during
|
||||||
|
execution. For example, if a directory from *dirnames* has been replaced
|
||||||
|
with a symlink and *follow_symlinks* is false, :meth:`Path.walk` will
|
||||||
|
still try to descend into it. To prevent such behavior, remove directories
|
||||||
|
from *dirnames* as appropriate.
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
Unlike :func:`os.walk`, :meth:`Path.walk` lists symlinks to directories in
|
||||||
|
*filenames* if *follow_symlinks* is false.
|
||||||
|
|
||||||
|
This example displays the number of bytes used by all files in each directory,
|
||||||
|
while ignoring ``__pycache__`` directories::
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
for root, dirs, files in Path("cpython/Lib/concurrent").walk(on_error=print):
|
||||||
|
print(
|
||||||
|
root,
|
||||||
|
"consumes",
|
||||||
|
sum((root / file).stat().st_size for file in files),
|
||||||
|
"bytes in",
|
||||||
|
len(files),
|
||||||
|
"non-directory files"
|
||||||
|
)
|
||||||
|
if '__pycache__' in dirs:
|
||||||
|
dirs.remove('__pycache__')
|
||||||
|
|
||||||
|
This next example is a simple implementation of :func:`shutil.rmtree`.
|
||||||
|
Walking the tree bottom-up is essential as :func:`rmdir` doesn't allow
|
||||||
|
deleting a directory before it is empty::
|
||||||
|
|
||||||
|
# Delete everything reachable from the directory "top".
|
||||||
|
# CAUTION: This is dangerous! For example, if top == Path('/'),
|
||||||
|
# it could delete all of your files.
|
||||||
|
for root, dirs, files in top.walk(topdown=False):
|
||||||
|
for name in files:
|
||||||
|
(root / name).unlink()
|
||||||
|
for name in dirs:
|
||||||
|
(root / name).rmdir()
|
||||||
|
|
||||||
|
.. versionadded:: 3.12
|
||||||
|
|
||||||
.. method:: Path.lchmod(mode)
|
.. method:: Path.lchmod(mode)
|
||||||
|
|
||||||
Like :meth:`Path.chmod` but, if the path points to a symbolic link, the
|
Like :meth:`Path.chmod` but, if the path points to a symbolic link, the
|
||||||
@ -1285,6 +1380,7 @@ Below is a table mapping various :mod:`os` functions to their corresponding
|
|||||||
:func:`os.path.expanduser` :meth:`Path.expanduser` and
|
:func:`os.path.expanduser` :meth:`Path.expanduser` and
|
||||||
:meth:`Path.home`
|
:meth:`Path.home`
|
||||||
:func:`os.listdir` :meth:`Path.iterdir`
|
:func:`os.listdir` :meth:`Path.iterdir`
|
||||||
|
:func:`os.walk` :meth:`Path.walk`
|
||||||
:func:`os.path.isdir` :meth:`Path.is_dir`
|
:func:`os.path.isdir` :meth:`Path.is_dir`
|
||||||
:func:`os.path.isfile` :meth:`Path.is_file`
|
:func:`os.path.isfile` :meth:`Path.is_file`
|
||||||
:func:`os.path.islink` :meth:`Path.is_symlink`
|
:func:`os.path.islink` :meth:`Path.is_symlink`
|
||||||
|
@ -1321,6 +1321,49 @@ class Path(PurePath):
|
|||||||
|
|
||||||
return self
|
return self
|
||||||
|
|
||||||
|
def walk(self, top_down=True, on_error=None, follow_symlinks=False):
|
||||||
|
"""Walk the directory tree from this directory, similar to os.walk()."""
|
||||||
|
sys.audit("pathlib.Path.walk", self, on_error, follow_symlinks)
|
||||||
|
return self._walk(top_down, on_error, follow_symlinks)
|
||||||
|
|
||||||
|
def _walk(self, top_down, on_error, follow_symlinks):
|
||||||
|
# We may not have read permission for self, in which case we can't
|
||||||
|
# get a list of the files the directory contains. os.walk
|
||||||
|
# always suppressed the exception then, rather than blow up for a
|
||||||
|
# minor reason when (say) a thousand readable directories are still
|
||||||
|
# left to visit. That logic is copied here.
|
||||||
|
try:
|
||||||
|
scandir_it = self._scandir()
|
||||||
|
except OSError as error:
|
||||||
|
if on_error is not None:
|
||||||
|
on_error(error)
|
||||||
|
return
|
||||||
|
|
||||||
|
with scandir_it:
|
||||||
|
dirnames = []
|
||||||
|
filenames = []
|
||||||
|
for entry in scandir_it:
|
||||||
|
try:
|
||||||
|
is_dir = entry.is_dir(follow_symlinks=follow_symlinks)
|
||||||
|
except OSError:
|
||||||
|
# Carried over from os.path.isdir().
|
||||||
|
is_dir = False
|
||||||
|
|
||||||
|
if is_dir:
|
||||||
|
dirnames.append(entry.name)
|
||||||
|
else:
|
||||||
|
filenames.append(entry.name)
|
||||||
|
|
||||||
|
if top_down:
|
||||||
|
yield self, dirnames, filenames
|
||||||
|
|
||||||
|
for dirname in dirnames:
|
||||||
|
dirpath = self._make_child_relpath(dirname)
|
||||||
|
yield from dirpath._walk(top_down, on_error, follow_symlinks)
|
||||||
|
|
||||||
|
if not top_down:
|
||||||
|
yield self, dirnames, filenames
|
||||||
|
|
||||||
|
|
||||||
class PosixPath(Path, PurePosixPath):
|
class PosixPath(Path, PurePosixPath):
|
||||||
"""Path subclass for non-Windows systems.
|
"""Path subclass for non-Windows systems.
|
||||||
|
@ -572,7 +572,7 @@ def fs_is_case_insensitive(directory):
|
|||||||
|
|
||||||
|
|
||||||
class FakePath:
|
class FakePath:
|
||||||
"""Simple implementing of the path protocol.
|
"""Simple implementation of the path protocol.
|
||||||
"""
|
"""
|
||||||
def __init__(self, path):
|
def __init__(self, path):
|
||||||
self.path = path
|
self.path = path
|
||||||
|
@ -2478,6 +2478,203 @@ class _BasePathTest(object):
|
|||||||
def test_complex_symlinks_relative_dot_dot(self):
|
def test_complex_symlinks_relative_dot_dot(self):
|
||||||
self._check_complex_symlinks(os.path.join('dirA', '..'))
|
self._check_complex_symlinks(os.path.join('dirA', '..'))
|
||||||
|
|
||||||
|
class WalkTests(unittest.TestCase):
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
self.addCleanup(os_helper.rmtree, os_helper.TESTFN)
|
||||||
|
|
||||||
|
# Build:
|
||||||
|
# TESTFN/
|
||||||
|
# TEST1/ a file kid and two directory kids
|
||||||
|
# tmp1
|
||||||
|
# SUB1/ a file kid and a directory kid
|
||||||
|
# tmp2
|
||||||
|
# SUB11/ no kids
|
||||||
|
# SUB2/ a file kid and a dirsymlink kid
|
||||||
|
# tmp3
|
||||||
|
# SUB21/ not readable
|
||||||
|
# tmp5
|
||||||
|
# link/ a symlink to TEST2
|
||||||
|
# broken_link
|
||||||
|
# broken_link2
|
||||||
|
# broken_link3
|
||||||
|
# TEST2/
|
||||||
|
# tmp4 a lone file
|
||||||
|
self.walk_path = pathlib.Path(os_helper.TESTFN, "TEST1")
|
||||||
|
self.sub1_path = self.walk_path / "SUB1"
|
||||||
|
self.sub11_path = self.sub1_path / "SUB11"
|
||||||
|
self.sub2_path = self.walk_path / "SUB2"
|
||||||
|
sub21_path= self.sub2_path / "SUB21"
|
||||||
|
tmp1_path = self.walk_path / "tmp1"
|
||||||
|
tmp2_path = self.sub1_path / "tmp2"
|
||||||
|
tmp3_path = self.sub2_path / "tmp3"
|
||||||
|
tmp5_path = sub21_path / "tmp3"
|
||||||
|
self.link_path = self.sub2_path / "link"
|
||||||
|
t2_path = pathlib.Path(os_helper.TESTFN, "TEST2")
|
||||||
|
tmp4_path = pathlib.Path(os_helper.TESTFN, "TEST2", "tmp4")
|
||||||
|
broken_link_path = self.sub2_path / "broken_link"
|
||||||
|
broken_link2_path = self.sub2_path / "broken_link2"
|
||||||
|
broken_link3_path = self.sub2_path / "broken_link3"
|
||||||
|
|
||||||
|
os.makedirs(self.sub11_path)
|
||||||
|
os.makedirs(self.sub2_path)
|
||||||
|
os.makedirs(sub21_path)
|
||||||
|
os.makedirs(t2_path)
|
||||||
|
|
||||||
|
for path in tmp1_path, tmp2_path, tmp3_path, tmp4_path, tmp5_path:
|
||||||
|
with open(path, "x", encoding='utf-8') as f:
|
||||||
|
f.write(f"I'm {path} and proud of it. Blame test_pathlib.\n")
|
||||||
|
|
||||||
|
if os_helper.can_symlink():
|
||||||
|
os.symlink(os.path.abspath(t2_path), self.link_path)
|
||||||
|
os.symlink('broken', broken_link_path, True)
|
||||||
|
os.symlink(pathlib.Path('tmp3', 'broken'), broken_link2_path, True)
|
||||||
|
os.symlink(pathlib.Path('SUB21', 'tmp5'), broken_link3_path, True)
|
||||||
|
self.sub2_tree = (self.sub2_path, ["SUB21"],
|
||||||
|
["broken_link", "broken_link2", "broken_link3",
|
||||||
|
"link", "tmp3"])
|
||||||
|
else:
|
||||||
|
self.sub2_tree = (self.sub2_path, ["SUB21"], ["tmp3"])
|
||||||
|
|
||||||
|
if not is_emscripten:
|
||||||
|
# Emscripten fails with inaccessible directories.
|
||||||
|
os.chmod(sub21_path, 0)
|
||||||
|
try:
|
||||||
|
os.listdir(sub21_path)
|
||||||
|
except PermissionError:
|
||||||
|
self.addCleanup(os.chmod, sub21_path, stat.S_IRWXU)
|
||||||
|
else:
|
||||||
|
os.chmod(sub21_path, stat.S_IRWXU)
|
||||||
|
os.unlink(tmp5_path)
|
||||||
|
os.rmdir(sub21_path)
|
||||||
|
del self.sub2_tree[1][:1]
|
||||||
|
|
||||||
|
def test_walk_topdown(self):
|
||||||
|
all = list(self.walk_path.walk())
|
||||||
|
|
||||||
|
self.assertEqual(len(all), 4)
|
||||||
|
# We can't know which order SUB1 and SUB2 will appear in.
|
||||||
|
# Not flipped: TESTFN, SUB1, SUB11, SUB2
|
||||||
|
# flipped: TESTFN, SUB2, SUB1, SUB11
|
||||||
|
flipped = all[0][1][0] != "SUB1"
|
||||||
|
all[0][1].sort()
|
||||||
|
all[3 - 2 * flipped][-1].sort()
|
||||||
|
all[3 - 2 * flipped][1].sort()
|
||||||
|
self.assertEqual(all[0], (self.walk_path, ["SUB1", "SUB2"], ["tmp1"]))
|
||||||
|
self.assertEqual(all[1 + flipped], (self.sub1_path, ["SUB11"], ["tmp2"]))
|
||||||
|
self.assertEqual(all[2 + flipped], (self.sub11_path, [], []))
|
||||||
|
self.assertEqual(all[3 - 2 * flipped], self.sub2_tree)
|
||||||
|
|
||||||
|
def test_walk_prune(self, walk_path=None):
|
||||||
|
if walk_path is None:
|
||||||
|
walk_path = self.walk_path
|
||||||
|
# Prune the search.
|
||||||
|
all = []
|
||||||
|
for root, dirs, files in walk_path.walk():
|
||||||
|
all.append((root, dirs, files))
|
||||||
|
if 'SUB1' in dirs:
|
||||||
|
# Note that this also mutates the dirs we appended to all!
|
||||||
|
dirs.remove('SUB1')
|
||||||
|
|
||||||
|
self.assertEqual(len(all), 2)
|
||||||
|
self.assertEqual(all[0], (self.walk_path, ["SUB2"], ["tmp1"]))
|
||||||
|
|
||||||
|
all[1][-1].sort()
|
||||||
|
all[1][1].sort()
|
||||||
|
self.assertEqual(all[1], self.sub2_tree)
|
||||||
|
|
||||||
|
def test_file_like_path(self):
|
||||||
|
self.test_walk_prune(FakePath(self.walk_path).__fspath__())
|
||||||
|
|
||||||
|
def test_walk_bottom_up(self):
|
||||||
|
all = list(self.walk_path.walk( top_down=False))
|
||||||
|
|
||||||
|
self.assertEqual(len(all), 4, all)
|
||||||
|
# We can't know which order SUB1 and SUB2 will appear in.
|
||||||
|
# Not flipped: SUB11, SUB1, SUB2, TESTFN
|
||||||
|
# flipped: SUB2, SUB11, SUB1, TESTFN
|
||||||
|
flipped = all[3][1][0] != "SUB1"
|
||||||
|
all[3][1].sort()
|
||||||
|
all[2 - 2 * flipped][-1].sort()
|
||||||
|
all[2 - 2 * flipped][1].sort()
|
||||||
|
self.assertEqual(all[3],
|
||||||
|
(self.walk_path, ["SUB1", "SUB2"], ["tmp1"]))
|
||||||
|
self.assertEqual(all[flipped],
|
||||||
|
(self.sub11_path, [], []))
|
||||||
|
self.assertEqual(all[flipped + 1],
|
||||||
|
(self.sub1_path, ["SUB11"], ["tmp2"]))
|
||||||
|
self.assertEqual(all[2 - 2 * flipped],
|
||||||
|
self.sub2_tree)
|
||||||
|
|
||||||
|
@os_helper.skip_unless_symlink
|
||||||
|
def test_walk_follow_symlinks(self):
|
||||||
|
walk_it = self.walk_path.walk(follow_symlinks=True)
|
||||||
|
for root, dirs, files in walk_it:
|
||||||
|
if root == self.link_path:
|
||||||
|
self.assertEqual(dirs, [])
|
||||||
|
self.assertEqual(files, ["tmp4"])
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
self.fail("Didn't follow symlink with follow_symlinks=True")
|
||||||
|
|
||||||
|
def test_walk_symlink_location(self):
|
||||||
|
# Tests whether symlinks end up in filenames or dirnames depending
|
||||||
|
# on the `follow_symlinks` argument.
|
||||||
|
walk_it = self.walk_path.walk(follow_symlinks=False)
|
||||||
|
for root, dirs, files in walk_it:
|
||||||
|
if root == self.sub2_path:
|
||||||
|
self.assertIn("link", files)
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
self.fail("symlink not found")
|
||||||
|
|
||||||
|
walk_it = self.walk_path.walk(follow_symlinks=True)
|
||||||
|
for root, dirs, files in walk_it:
|
||||||
|
if root == self.sub2_path:
|
||||||
|
self.assertIn("link", dirs)
|
||||||
|
break
|
||||||
|
|
||||||
|
def test_walk_bad_dir(self):
|
||||||
|
errors = []
|
||||||
|
walk_it = self.walk_path.walk(on_error=errors.append)
|
||||||
|
root, dirs, files = next(walk_it)
|
||||||
|
self.assertEqual(errors, [])
|
||||||
|
dir1 = 'SUB1'
|
||||||
|
path1 = root / dir1
|
||||||
|
path1new = (root / dir1).with_suffix(".new")
|
||||||
|
path1.rename(path1new)
|
||||||
|
try:
|
||||||
|
roots = [r for r, _, _ in walk_it]
|
||||||
|
self.assertTrue(errors)
|
||||||
|
self.assertNotIn(path1, roots)
|
||||||
|
self.assertNotIn(path1new, roots)
|
||||||
|
for dir2 in dirs:
|
||||||
|
if dir2 != dir1:
|
||||||
|
self.assertIn(root / dir2, roots)
|
||||||
|
finally:
|
||||||
|
path1new.rename(path1)
|
||||||
|
|
||||||
|
def test_walk_many_open_files(self):
|
||||||
|
depth = 30
|
||||||
|
base = pathlib.Path(os_helper.TESTFN, 'deep')
|
||||||
|
path = pathlib.Path(base, *(['d']*depth))
|
||||||
|
path.mkdir(parents=True)
|
||||||
|
|
||||||
|
iters = [base.walk(top_down=False) for _ in range(100)]
|
||||||
|
for i in range(depth + 1):
|
||||||
|
expected = (path, ['d'] if i else [], [])
|
||||||
|
for it in iters:
|
||||||
|
self.assertEqual(next(it), expected)
|
||||||
|
path = path.parent
|
||||||
|
|
||||||
|
iters = [base.walk(top_down=True) for _ in range(100)]
|
||||||
|
path = base
|
||||||
|
for i in range(depth + 1):
|
||||||
|
expected = (path, ['d'] if i < depth else [], [])
|
||||||
|
for it in iters:
|
||||||
|
self.assertEqual(next(it), expected)
|
||||||
|
path = path / 'd'
|
||||||
|
|
||||||
|
|
||||||
class PathTest(_BasePathTest, unittest.TestCase):
|
class PathTest(_BasePathTest, unittest.TestCase):
|
||||||
cls = pathlib.Path
|
cls = pathlib.Path
|
||||||
|
@ -0,0 +1 @@
|
|||||||
|
Add :meth:`pathlib.Path.walk` as an alternative to :func:`os.walk`.
|
Loading…
x
Reference in New Issue
Block a user