gh-74598: add fnmatch.filterfalse for excluding names matching a patern (#121185)

This commit is contained in:
Bénédikt Tran 2025-04-08 12:11:25 +02:00 committed by GitHub
parent ee3657209b
commit 3eda146035
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 102 additions and 40 deletions

View File

@ -90,6 +90,16 @@ functions: :func:`fnmatch`, :func:`fnmatchcase`, :func:`.filter`.
but implemented more efficiently. but implemented more efficiently.
.. function:: filterfalse(names, pat)
Construct a list from those elements of the :term:`iterable` of filename
strings *names* that do not match the pattern string *pat*.
It is the same as ``[n for n in names if not fnmatch(n, pat)]``,
but implemented more efficiently.
.. versionadded:: next
.. function:: translate(pat) .. function:: translate(pat)
Return the shell-style pattern *pat* converted to a regular expression for Return the shell-style pattern *pat* converted to a regular expression for

View File

@ -677,6 +677,13 @@ errno
(Contributed by James Roy in :gh:`126585`.) (Contributed by James Roy in :gh:`126585`.)
fnmatch
-------
* Added :func:`fnmatch.filterfalse` for excluding names matching a pattern.
(Contributed by Bénédikt Tran in :gh:`74598`.)
fractions fractions
--------- ---------

View File

@ -9,12 +9,15 @@ expression. They cache the compiled regular expressions for speed.
The function translate(PATTERN) returns a regular expression The function translate(PATTERN) returns a regular expression
corresponding to PATTERN. (It does not compile it.) corresponding to PATTERN. (It does not compile it.)
""" """
import functools
import itertools
import os import os
import posixpath import posixpath
import re import re
import functools
__all__ = ["filter", "fnmatch", "fnmatchcase", "translate"] __all__ = ["filter", "filterfalse", "fnmatch", "fnmatchcase", "translate"]
def fnmatch(name, pat): def fnmatch(name, pat):
"""Test whether FILENAME matches PATTERN. """Test whether FILENAME matches PATTERN.
@ -35,6 +38,7 @@ def fnmatch(name, pat):
pat = os.path.normcase(pat) pat = os.path.normcase(pat)
return fnmatchcase(name, pat) return fnmatchcase(name, pat)
@functools.lru_cache(maxsize=32768, typed=True) @functools.lru_cache(maxsize=32768, typed=True)
def _compile_pattern(pat): def _compile_pattern(pat):
if isinstance(pat, bytes): if isinstance(pat, bytes):
@ -45,6 +49,7 @@ def _compile_pattern(pat):
res = translate(pat) res = translate(pat)
return re.compile(res).match return re.compile(res).match
def filter(names, pat): def filter(names, pat):
"""Construct a list from those elements of the iterable NAMES that match PAT.""" """Construct a list from those elements of the iterable NAMES that match PAT."""
result = [] result = []
@ -61,6 +66,22 @@ def filter(names, pat):
result.append(name) result.append(name)
return result return result
def filterfalse(names, pat):
"""Construct a list from those elements of the iterable NAMES that do not match PAT."""
pat = os.path.normcase(pat)
match = _compile_pattern(pat)
if os.path is posixpath:
# normcase on posix is NOP. Optimize it away from the loop.
return list(itertools.filterfalse(match, names))
result = []
for name in names:
if match(os.path.normcase(name)) is None:
result.append(name)
return result
def fnmatchcase(name, pat): def fnmatchcase(name, pat):
"""Test whether FILENAME matches PATTERN, including case. """Test whether FILENAME matches PATTERN, including case.
@ -80,9 +101,11 @@ def translate(pat):
parts, star_indices = _translate(pat, '*', '.') parts, star_indices = _translate(pat, '*', '.')
return _join_translated_parts(parts, star_indices) return _join_translated_parts(parts, star_indices)
_re_setops_sub = re.compile(r'([&~|])').sub _re_setops_sub = re.compile(r'([&~|])').sub
_re_escape = functools.lru_cache(maxsize=512)(re.escape) _re_escape = functools.lru_cache(maxsize=512)(re.escape)
def _translate(pat, star, question_mark): def _translate(pat, star, question_mark):
res = [] res = []
add = res.append add = res.append

View File

@ -1,11 +1,15 @@
"""Test cases for the fnmatch module.""" """Test cases for the fnmatch module."""
import unittest
import os import os
import string import string
import unittest
import warnings import warnings
from fnmatch import fnmatch, fnmatchcase, translate, filter, filterfalse
IGNORECASE = os.path.normcase('P') == os.path.normcase('p')
NORMSEP = os.path.normcase('\\') == os.path.normcase('/')
from fnmatch import fnmatch, fnmatchcase, translate, filter
class FnmatchTestCase(unittest.TestCase): class FnmatchTestCase(unittest.TestCase):
@ -77,23 +81,20 @@ class FnmatchTestCase(unittest.TestCase):
self.check_match(b'foo\nbar', b'foo*') self.check_match(b'foo\nbar', b'foo*')
def test_case(self): def test_case(self):
ignorecase = os.path.normcase('ABC') == os.path.normcase('abc')
check = self.check_match check = self.check_match
check('abc', 'abc') check('abc', 'abc')
check('AbC', 'abc', ignorecase) check('AbC', 'abc', IGNORECASE)
check('abc', 'AbC', ignorecase) check('abc', 'AbC', IGNORECASE)
check('AbC', 'AbC') check('AbC', 'AbC')
def test_sep(self): def test_sep(self):
normsep = os.path.normcase('\\') == os.path.normcase('/')
check = self.check_match check = self.check_match
check('usr/bin', 'usr/bin') check('usr/bin', 'usr/bin')
check('usr\\bin', 'usr/bin', normsep) check('usr\\bin', 'usr/bin', NORMSEP)
check('usr/bin', 'usr\\bin', normsep) check('usr/bin', 'usr\\bin', NORMSEP)
check('usr\\bin', 'usr\\bin') check('usr\\bin', 'usr\\bin')
def test_char_set(self): def test_char_set(self):
ignorecase = os.path.normcase('ABC') == os.path.normcase('abc')
check = self.check_match check = self.check_match
tescases = string.ascii_lowercase + string.digits + string.punctuation tescases = string.ascii_lowercase + string.digits + string.punctuation
for c in tescases: for c in tescases:
@ -101,11 +102,11 @@ class FnmatchTestCase(unittest.TestCase):
check(c, '[!az]', c not in 'az') check(c, '[!az]', c not in 'az')
# Case insensitive. # Case insensitive.
for c in tescases: for c in tescases:
check(c, '[AZ]', (c in 'az') and ignorecase) check(c, '[AZ]', (c in 'az') and IGNORECASE)
check(c, '[!AZ]', (c not in 'az') or not ignorecase) check(c, '[!AZ]', (c not in 'az') or not IGNORECASE)
for c in string.ascii_uppercase: for c in string.ascii_uppercase:
check(c, '[az]', (c in 'AZ') and ignorecase) check(c, '[az]', (c in 'AZ') and IGNORECASE)
check(c, '[!az]', (c not in 'AZ') or not ignorecase) check(c, '[!az]', (c not in 'AZ') or not IGNORECASE)
# Repeated same character. # Repeated same character.
for c in tescases: for c in tescases:
check(c, '[aa]', c == 'a') check(c, '[aa]', c == 'a')
@ -120,8 +121,6 @@ class FnmatchTestCase(unittest.TestCase):
check('[!]', '[!]') check('[!]', '[!]')
def test_range(self): def test_range(self):
ignorecase = os.path.normcase('ABC') == os.path.normcase('abc')
normsep = os.path.normcase('\\') == os.path.normcase('/')
check = self.check_match check = self.check_match
tescases = string.ascii_lowercase + string.digits + string.punctuation tescases = string.ascii_lowercase + string.digits + string.punctuation
for c in tescases: for c in tescases:
@ -131,11 +130,11 @@ class FnmatchTestCase(unittest.TestCase):
check(c, '[!b-dx-z]', c not in 'bcdxyz') check(c, '[!b-dx-z]', c not in 'bcdxyz')
# Case insensitive. # Case insensitive.
for c in tescases: for c in tescases:
check(c, '[B-D]', (c in 'bcd') and ignorecase) check(c, '[B-D]', (c in 'bcd') and IGNORECASE)
check(c, '[!B-D]', (c not in 'bcd') or not ignorecase) check(c, '[!B-D]', (c not in 'bcd') or not IGNORECASE)
for c in string.ascii_uppercase: for c in string.ascii_uppercase:
check(c, '[b-d]', (c in 'BCD') and ignorecase) check(c, '[b-d]', (c in 'BCD') and IGNORECASE)
check(c, '[!b-d]', (c not in 'BCD') or not ignorecase) check(c, '[!b-d]', (c not in 'BCD') or not IGNORECASE)
# Upper bound == lower bound. # Upper bound == lower bound.
for c in tescases: for c in tescases:
check(c, '[b-b]', c == 'b') check(c, '[b-b]', c == 'b')
@ -144,7 +143,7 @@ class FnmatchTestCase(unittest.TestCase):
check(c, '[!-#]', c not in '-#') check(c, '[!-#]', c not in '-#')
check(c, '[!--.]', c not in '-.') check(c, '[!--.]', c not in '-.')
check(c, '[^-`]', c in '^_`') check(c, '[^-`]', c in '^_`')
if not (normsep and c == '/'): if not (NORMSEP and c == '/'):
check(c, '[[-^]', c in r'[\]^') check(c, '[[-^]', c in r'[\]^')
check(c, r'[\-^]', c in r'\]^') check(c, r'[\-^]', c in r'\]^')
check(c, '[b-]', c in '-b') check(c, '[b-]', c in '-b')
@ -160,47 +159,45 @@ class FnmatchTestCase(unittest.TestCase):
check(c, '[d-bx-z]', c in 'xyz') check(c, '[d-bx-z]', c in 'xyz')
check(c, '[!d-bx-z]', c not in 'xyz') check(c, '[!d-bx-z]', c not in 'xyz')
check(c, '[d-b^-`]', c in '^_`') check(c, '[d-b^-`]', c in '^_`')
if not (normsep and c == '/'): if not (NORMSEP and c == '/'):
check(c, '[d-b[-^]', c in r'[\]^') check(c, '[d-b[-^]', c in r'[\]^')
def test_sep_in_char_set(self): def test_sep_in_char_set(self):
normsep = os.path.normcase('\\') == os.path.normcase('/')
check = self.check_match check = self.check_match
check('/', r'[/]') check('/', r'[/]')
check('\\', r'[\]') check('\\', r'[\]')
check('/', r'[\]', normsep) check('/', r'[\]', NORMSEP)
check('\\', r'[/]', normsep) check('\\', r'[/]', NORMSEP)
check('[/]', r'[/]', False) check('[/]', r'[/]', False)
check(r'[\\]', r'[/]', False) check(r'[\\]', r'[/]', False)
check('\\', r'[\t]') check('\\', r'[\t]')
check('/', r'[\t]', normsep) check('/', r'[\t]', NORMSEP)
check('t', r'[\t]') check('t', r'[\t]')
check('\t', r'[\t]', False) check('\t', r'[\t]', False)
def test_sep_in_range(self): def test_sep_in_range(self):
normsep = os.path.normcase('\\') == os.path.normcase('/')
check = self.check_match check = self.check_match
check('a/b', 'a[.-0]b', not normsep) check('a/b', 'a[.-0]b', not NORMSEP)
check('a\\b', 'a[.-0]b', False) check('a\\b', 'a[.-0]b', False)
check('a\\b', 'a[Z-^]b', not normsep) check('a\\b', 'a[Z-^]b', not NORMSEP)
check('a/b', 'a[Z-^]b', False) check('a/b', 'a[Z-^]b', False)
check('a/b', 'a[/-0]b', not normsep) check('a/b', 'a[/-0]b', not NORMSEP)
check(r'a\b', 'a[/-0]b', False) check(r'a\b', 'a[/-0]b', False)
check('a[/-0]b', 'a[/-0]b', False) check('a[/-0]b', 'a[/-0]b', False)
check(r'a[\-0]b', 'a[/-0]b', False) check(r'a[\-0]b', 'a[/-0]b', False)
check('a/b', 'a[.-/]b') check('a/b', 'a[.-/]b')
check(r'a\b', 'a[.-/]b', normsep) check(r'a\b', 'a[.-/]b', NORMSEP)
check('a[.-/]b', 'a[.-/]b', False) check('a[.-/]b', 'a[.-/]b', False)
check(r'a[.-\]b', 'a[.-/]b', False) check(r'a[.-\]b', 'a[.-/]b', False)
check(r'a\b', r'a[\-^]b') check(r'a\b', r'a[\-^]b')
check('a/b', r'a[\-^]b', normsep) check('a/b', r'a[\-^]b', NORMSEP)
check(r'a[\-^]b', r'a[\-^]b', False) check(r'a[\-^]b', r'a[\-^]b', False)
check('a[/-^]b', r'a[\-^]b', False) check('a[/-^]b', r'a[\-^]b', False)
check(r'a\b', r'a[Z-\]b', not normsep) check(r'a\b', r'a[Z-\]b', not NORMSEP)
check('a/b', r'a[Z-\]b', False) check('a/b', r'a[Z-\]b', False)
check(r'a[Z-\]b', r'a[Z-\]b', False) check(r'a[Z-\]b', r'a[Z-\]b', False)
check('a[Z-/]b', r'a[Z-\]b', False) check('a[Z-/]b', r'a[Z-\]b', False)
@ -332,18 +329,41 @@ class FilterTestCase(unittest.TestCase):
self.assertRaises(TypeError, filter, [b'test'], '*') self.assertRaises(TypeError, filter, [b'test'], '*')
def test_case(self): def test_case(self):
ignorecase = os.path.normcase('P') == os.path.normcase('p')
self.assertEqual(filter(['Test.py', 'Test.rb', 'Test.PL'], '*.p*'), self.assertEqual(filter(['Test.py', 'Test.rb', 'Test.PL'], '*.p*'),
['Test.py', 'Test.PL'] if ignorecase else ['Test.py']) ['Test.py', 'Test.PL'] if IGNORECASE else ['Test.py'])
self.assertEqual(filter(['Test.py', 'Test.rb', 'Test.PL'], '*.P*'), self.assertEqual(filter(['Test.py', 'Test.rb', 'Test.PL'], '*.P*'),
['Test.py', 'Test.PL'] if ignorecase else ['Test.PL']) ['Test.py', 'Test.PL'] if IGNORECASE else ['Test.PL'])
def test_sep(self): def test_sep(self):
normsep = os.path.normcase('\\') == os.path.normcase('/')
self.assertEqual(filter(['usr/bin', 'usr', 'usr\\lib'], 'usr/*'), self.assertEqual(filter(['usr/bin', 'usr', 'usr\\lib'], 'usr/*'),
['usr/bin', 'usr\\lib'] if normsep else ['usr/bin']) ['usr/bin', 'usr\\lib'] if NORMSEP else ['usr/bin'])
self.assertEqual(filter(['usr/bin', 'usr', 'usr\\lib'], 'usr\\*'), self.assertEqual(filter(['usr/bin', 'usr', 'usr\\lib'], 'usr\\*'),
['usr/bin', 'usr\\lib'] if normsep else ['usr\\lib']) ['usr/bin', 'usr\\lib'] if NORMSEP else ['usr\\lib'])
class FilterFalseTestCase(unittest.TestCase):
def test_filterfalse(self):
actual = filterfalse(['Python', 'Ruby', 'Perl', 'Tcl'], 'P*')
self.assertListEqual(actual, ['Ruby', 'Tcl'])
actual = filterfalse([b'Python', b'Ruby', b'Perl', b'Tcl'], b'P*')
self.assertListEqual(actual, [b'Ruby', b'Tcl'])
def test_mix_bytes_str(self):
self.assertRaises(TypeError, filterfalse, ['test'], b'*')
self.assertRaises(TypeError, filterfalse, [b'test'], '*')
def test_case(self):
self.assertEqual(filterfalse(['Test.py', 'Test.rb', 'Test.PL'], '*.p*'),
['Test.rb'] if IGNORECASE else ['Test.rb', 'Test.PL'])
self.assertEqual(filterfalse(['Test.py', 'Test.rb', 'Test.PL'], '*.P*'),
['Test.rb'] if IGNORECASE else ['Test.py', 'Test.rb',])
def test_sep(self):
self.assertEqual(filterfalse(['usr/bin', 'usr', 'usr\\lib'], 'usr/*'),
['usr'] if NORMSEP else ['usr', 'usr\\lib'])
self.assertEqual(filterfalse(['usr/bin', 'usr', 'usr\\lib'], 'usr\\*'),
['usr'] if NORMSEP else ['usr/bin', 'usr'])
if __name__ == "__main__": if __name__ == "__main__":

View File

@ -0,0 +1,2 @@
Add :func:`fnmatch.filterfalse` for excluding names matching a pattern.
Patch by Bénédikt Tran.