gh-74598: add fnmatch.filterfalse for excluding names matching a patern (#121185)

2025-04-08 12:11:25 +02:00 · 2025-04-08 12:11:25 +02:00 · 3eda146035
commit 3eda146035
parent ee3657209b
5 changed files with 102 additions and 40 deletions
--- a/Doc/library/fnmatch.rst
+++ b/Doc/library/fnmatch.rst
@ -90,6 +90,16 @@ functions: :func:`fnmatch`, :func:`fnmatchcase`, :func:`.filter`.
   but implemented more efficiently.


+.. function:: filterfalse(names, pat)
+
+   Construct a list from those elements of the :term:`iterable` of filename
+   strings *names* that do not match the pattern string *pat*.
+   It is the same as ``[n for n in names if not fnmatch(n, pat)]``,
+   but implemented more efficiently.
+
+   .. versionadded:: next
+
+
 .. function:: translate(pat)

   Return the shell-style pattern *pat* converted to a regular expression for
--- a/Doc/whatsnew/3.14.rst
+++ b/Doc/whatsnew/3.14.rst
@ -677,6 +677,13 @@ errno
  (Contributed by James Roy in :gh:`126585`.)


+fnmatch
+-------
+
+* Added :func:`fnmatch.filterfalse` for excluding names matching a pattern.
+  (Contributed by Bénédikt Tran in :gh:`74598`.)
+
+
 fractions
 ---------

--- a/Lib/fnmatch.py
+++ b/Lib/fnmatch.py
@ -9,12 +9,15 @@ expression.  They cache the compiled regular expressions for speed.
 The function translate(PATTERN) returns a regular expression
 corresponding to PATTERN.  (It does not compile it.)
 """
+
+import functools
+import itertools
 import os
 import posixpath
 import re
-import functools

-__all__ = ["filter", "fnmatch", "fnmatchcase", "translate"]
+__all__ = ["filter", "filterfalse", "fnmatch", "fnmatchcase", "translate"]
+

 def fnmatch(name, pat):
    """Test whether FILENAME matches PATTERN.
@ -35,6 +38,7 @@ def fnmatch(name, pat):
    pat = os.path.normcase(pat)
    return fnmatchcase(name, pat)

+
@functools.lru_cache(maxsize=32768, typed=True)
 def _compile_pattern(pat):
    if isinstance(pat, bytes):
@ -45,6 +49,7 @@ def _compile_pattern(pat):
        res = translate(pat)
    return re.compile(res).match

+
 def filter(names, pat):
    """Construct a list from those elements of the iterable NAMES that match PAT."""
    result = []
@ -61,6 +66,22 @@ def filter(names, pat):
                result.append(name)
    return result

+
+def filterfalse(names, pat):
+    """Construct a list from those elements of the iterable NAMES that do not match PAT."""
+    pat = os.path.normcase(pat)
+    match = _compile_pattern(pat)
+    if os.path is posixpath:
+        # normcase on posix is NOP. Optimize it away from the loop.
+        return list(itertools.filterfalse(match, names))
+
+    result = []
+    for name in names:
+        if match(os.path.normcase(name)) is None:
+            result.append(name)
+    return result
+
+
 def fnmatchcase(name, pat):
    """Test whether FILENAME matches PATTERN, including case.

@ -80,9 +101,11 @@ def translate(pat):
    parts, star_indices = _translate(pat, '*', '.')
    return _join_translated_parts(parts, star_indices)

+
 _re_setops_sub = re.compile(r'([&~|])').sub
 _re_escape = functools.lru_cache(maxsize=512)(re.escape)

+
 def _translate(pat, star, question_mark):
    res = []
    add = res.append
--- a/Lib/test/test_fnmatch.py
+++ b/Lib/test/test_fnmatch.py
@ -1,11 +1,15 @@
 """Test cases for the fnmatch module."""

-import unittest
 import os
 import string
+import unittest
 import warnings
+from fnmatch import fnmatch, fnmatchcase, translate, filter, filterfalse
+
+
+IGNORECASE = os.path.normcase('P') == os.path.normcase('p')
+NORMSEP = os.path.normcase('\\') == os.path.normcase('/')

-from fnmatch import fnmatch, fnmatchcase, translate, filter

 class FnmatchTestCase(unittest.TestCase):

@ -77,23 +81,20 @@ class FnmatchTestCase(unittest.TestCase):
        self.check_match(b'foo\nbar', b'foo*')

    def test_case(self):
-        ignorecase = os.path.normcase('ABC') == os.path.normcase('abc')
        check = self.check_match
        check('abc', 'abc')
-        check('AbC', 'abc', ignorecase)
-        check('abc', 'AbC', ignorecase)
+        check('AbC', 'abc', IGNORECASE)
+        check('abc', 'AbC', IGNORECASE)
        check('AbC', 'AbC')

    def test_sep(self):
-        normsep = os.path.normcase('\\') == os.path.normcase('/')
        check = self.check_match
        check('usr/bin', 'usr/bin')
-        check('usr\\bin', 'usr/bin', normsep)
-        check('usr/bin', 'usr\\bin', normsep)
+        check('usr\\bin', 'usr/bin', NORMSEP)
+        check('usr/bin', 'usr\\bin', NORMSEP)
        check('usr\\bin', 'usr\\bin')

    def test_char_set(self):
-        ignorecase = os.path.normcase('ABC') == os.path.normcase('abc')
        check = self.check_match
        tescases = string.ascii_lowercase + string.digits + string.punctuation
        for c in tescases:
@ -101,11 +102,11 @@ class FnmatchTestCase(unittest.TestCase):
            check(c, '[!az]', c not in 'az')
        # Case insensitive.
        for c in tescases:
-            check(c, '[AZ]', (c in 'az') and ignorecase)
-            check(c, '[!AZ]', (c not in 'az') or not ignorecase)
+            check(c, '[AZ]', (c in 'az') and IGNORECASE)
+            check(c, '[!AZ]', (c not in 'az') or not IGNORECASE)
        for c in string.ascii_uppercase:
-            check(c, '[az]', (c in 'AZ') and ignorecase)
-            check(c, '[!az]', (c not in 'AZ') or not ignorecase)
+            check(c, '[az]', (c in 'AZ') and IGNORECASE)
+            check(c, '[!az]', (c not in 'AZ') or not IGNORECASE)
        # Repeated same character.
        for c in tescases:
            check(c, '[aa]', c == 'a')
@ -120,8 +121,6 @@ class FnmatchTestCase(unittest.TestCase):
        check('[!]', '[!]')

    def test_range(self):
-        ignorecase = os.path.normcase('ABC') == os.path.normcase('abc')
-        normsep = os.path.normcase('\\') == os.path.normcase('/')
        check = self.check_match
        tescases = string.ascii_lowercase + string.digits + string.punctuation
        for c in tescases:
@ -131,11 +130,11 @@ class FnmatchTestCase(unittest.TestCase):
            check(c, '[!b-dx-z]', c not in 'bcdxyz')
        # Case insensitive.
        for c in tescases:
-            check(c, '[B-D]', (c in 'bcd') and ignorecase)
-            check(c, '[!B-D]', (c not in 'bcd') or not ignorecase)
+            check(c, '[B-D]', (c in 'bcd') and IGNORECASE)
+            check(c, '[!B-D]', (c not in 'bcd') or not IGNORECASE)
        for c in string.ascii_uppercase:
-            check(c, '[b-d]', (c in 'BCD') and ignorecase)
-            check(c, '[!b-d]', (c not in 'BCD') or not ignorecase)
+            check(c, '[b-d]', (c in 'BCD') and IGNORECASE)
+            check(c, '[!b-d]', (c not in 'BCD') or not IGNORECASE)
        # Upper bound == lower bound.
        for c in tescases:
            check(c, '[b-b]', c == 'b')
@ -144,7 +143,7 @@ class FnmatchTestCase(unittest.TestCase):
            check(c, '[!-#]', c not in '-#')
            check(c, '[!--.]', c not in '-.')
            check(c, '[^-`]', c in '^_`')
-            if not (normsep and c == '/'):
+            if not (NORMSEP and c == '/'):
                check(c, '[[-^]', c in r'[\]^')
                check(c, r'[\-^]', c in r'\]^')
            check(c, '[b-]', c in '-b')
@ -160,47 +159,45 @@ class FnmatchTestCase(unittest.TestCase):
            check(c, '[d-bx-z]', c in 'xyz')
            check(c, '[!d-bx-z]', c not in 'xyz')
            check(c, '[d-b^-`]', c in '^_`')
-            if not (normsep and c == '/'):
+            if not (NORMSEP and c == '/'):
                check(c, '[d-b[-^]', c in r'[\]^')

    def test_sep_in_char_set(self):
-        normsep = os.path.normcase('\\') == os.path.normcase('/')
        check = self.check_match
        check('/', r'[/]')
        check('\\', r'[\]')
-        check('/', r'[\]', normsep)
-        check('\\', r'[/]', normsep)
+        check('/', r'[\]', NORMSEP)
+        check('\\', r'[/]', NORMSEP)
        check('[/]', r'[/]', False)
        check(r'[\\]', r'[/]', False)
        check('\\', r'[\t]')
-        check('/', r'[\t]', normsep)
+        check('/', r'[\t]', NORMSEP)
        check('t', r'[\t]')
        check('\t', r'[\t]', False)

    def test_sep_in_range(self):
-        normsep = os.path.normcase('\\') == os.path.normcase('/')
        check = self.check_match
-        check('a/b', 'a[.-0]b', not normsep)
+        check('a/b', 'a[.-0]b', not NORMSEP)
        check('a\\b', 'a[.-0]b', False)
-        check('a\\b', 'a[Z-^]b', not normsep)
+        check('a\\b', 'a[Z-^]b', not NORMSEP)
        check('a/b', 'a[Z-^]b', False)

-        check('a/b', 'a[/-0]b', not normsep)
+        check('a/b', 'a[/-0]b', not NORMSEP)
        check(r'a\b', 'a[/-0]b', False)
        check('a[/-0]b', 'a[/-0]b', False)
        check(r'a[\-0]b', 'a[/-0]b', False)

        check('a/b', 'a[.-/]b')
-        check(r'a\b', 'a[.-/]b', normsep)
+        check(r'a\b', 'a[.-/]b', NORMSEP)
        check('a[.-/]b', 'a[.-/]b', False)
        check(r'a[.-\]b', 'a[.-/]b', False)

        check(r'a\b', r'a[\-^]b')
-        check('a/b', r'a[\-^]b', normsep)
+        check('a/b', r'a[\-^]b', NORMSEP)
        check(r'a[\-^]b', r'a[\-^]b', False)
        check('a[/-^]b', r'a[\-^]b', False)

-        check(r'a\b', r'a[Z-\]b', not normsep)
+        check(r'a\b', r'a[Z-\]b', not NORMSEP)
        check('a/b', r'a[Z-\]b', False)
        check(r'a[Z-\]b', r'a[Z-\]b', False)
        check('a[Z-/]b', r'a[Z-\]b', False)
@ -332,18 +329,41 @@ class FilterTestCase(unittest.TestCase):
        self.assertRaises(TypeError, filter, [b'test'], '*')

    def test_case(self):
-        ignorecase = os.path.normcase('P') == os.path.normcase('p')
        self.assertEqual(filter(['Test.py', 'Test.rb', 'Test.PL'], '*.p*'),
-                         ['Test.py', 'Test.PL'] if ignorecase else ['Test.py'])
+                         ['Test.py', 'Test.PL'] if IGNORECASE else ['Test.py'])
        self.assertEqual(filter(['Test.py', 'Test.rb', 'Test.PL'], '*.P*'),
-                         ['Test.py', 'Test.PL'] if ignorecase else ['Test.PL'])
+                         ['Test.py', 'Test.PL'] if IGNORECASE else ['Test.PL'])

    def test_sep(self):
-        normsep = os.path.normcase('\\') == os.path.normcase('/')
        self.assertEqual(filter(['usr/bin', 'usr', 'usr\\lib'], 'usr/*'),
-                         ['usr/bin', 'usr\\lib'] if normsep else ['usr/bin'])
+                         ['usr/bin', 'usr\\lib'] if NORMSEP else ['usr/bin'])
        self.assertEqual(filter(['usr/bin', 'usr', 'usr\\lib'], 'usr\\*'),
-                         ['usr/bin', 'usr\\lib'] if normsep else ['usr\\lib'])
+                         ['usr/bin', 'usr\\lib'] if NORMSEP else ['usr\\lib'])
+
+
+class FilterFalseTestCase(unittest.TestCase):
+
+    def test_filterfalse(self):
+        actual = filterfalse(['Python', 'Ruby', 'Perl', 'Tcl'], 'P*')
+        self.assertListEqual(actual, ['Ruby', 'Tcl'])
+        actual = filterfalse([b'Python', b'Ruby', b'Perl', b'Tcl'], b'P*')
+        self.assertListEqual(actual, [b'Ruby', b'Tcl'])
+
+    def test_mix_bytes_str(self):
+        self.assertRaises(TypeError, filterfalse, ['test'], b'*')
+        self.assertRaises(TypeError, filterfalse, [b'test'], '*')
+
+    def test_case(self):
+        self.assertEqual(filterfalse(['Test.py', 'Test.rb', 'Test.PL'], '*.p*'),
+                         ['Test.rb'] if IGNORECASE else ['Test.rb', 'Test.PL'])
+        self.assertEqual(filterfalse(['Test.py', 'Test.rb', 'Test.PL'], '*.P*'),
+                         ['Test.rb'] if IGNORECASE else ['Test.py', 'Test.rb',])
+
+    def test_sep(self):
+        self.assertEqual(filterfalse(['usr/bin', 'usr', 'usr\\lib'], 'usr/*'),
+                         ['usr'] if NORMSEP else ['usr', 'usr\\lib'])
+        self.assertEqual(filterfalse(['usr/bin', 'usr', 'usr\\lib'], 'usr\\*'),
+                         ['usr'] if NORMSEP else ['usr/bin', 'usr'])


 if __name__ == "__main__":
--- a/Misc/NEWS.d/next/Library/2024-06-30-17-00-00.gh-issue-74598.1gVy_8.rst
+++ b/Misc/NEWS.d/next/Library/2024-06-30-17-00-00.gh-issue-74598.1gVy_8.rst
@ -0,0 +1,2 @@
+Add :func:`fnmatch.filterfalse` for excluding names matching a pattern.
+Patch by Bénédikt Tran.