Issue #19387: explain and test the sre overlap table
This commit is contained in:
parent
e38b0544c4
commit
79aa68dfc1
@ -353,6 +353,27 @@ def _simple(av):
|
|||||||
lo, hi = av[2].getwidth()
|
lo, hi = av[2].getwidth()
|
||||||
return lo == hi == 1 and av[2][0][0] != SUBPATTERN
|
return lo == hi == 1 and av[2][0][0] != SUBPATTERN
|
||||||
|
|
||||||
|
def _generate_overlap_table(prefix):
|
||||||
|
"""
|
||||||
|
Generate an overlap table for the following prefix.
|
||||||
|
An overlap table is a table of the same size as the prefix which
|
||||||
|
informs about the potential self-overlap for each index in the prefix:
|
||||||
|
- if overlap[i] == 0, prefix[i:] can't overlap prefix[0:...]
|
||||||
|
- if overlap[i] == k with 0 < k <= i, prefix[i-k+1:i+1] overlaps with
|
||||||
|
prefix[0:k]
|
||||||
|
"""
|
||||||
|
table = [0] * len(prefix)
|
||||||
|
for i in range(1, len(prefix)):
|
||||||
|
idx = table[i - 1]
|
||||||
|
while prefix[i] != prefix[idx]:
|
||||||
|
if idx == 0:
|
||||||
|
table[i] = 0
|
||||||
|
break
|
||||||
|
idx = table[idx - 1]
|
||||||
|
else:
|
||||||
|
table[i] = idx + 1
|
||||||
|
return table
|
||||||
|
|
||||||
def _compile_info(code, pattern, flags):
|
def _compile_info(code, pattern, flags):
|
||||||
# internal: compile an info block. in the current version,
|
# internal: compile an info block. in the current version,
|
||||||
# this contains min/max pattern width, and an optional literal
|
# this contains min/max pattern width, and an optional literal
|
||||||
@ -449,12 +470,7 @@ def _compile_info(code, pattern, flags):
|
|||||||
emit(prefix_skip) # skip
|
emit(prefix_skip) # skip
|
||||||
code.extend(prefix)
|
code.extend(prefix)
|
||||||
# generate overlap table
|
# generate overlap table
|
||||||
table = [-1] + ([0]*len(prefix))
|
code.extend(_generate_overlap_table(prefix))
|
||||||
for i in range(len(prefix)):
|
|
||||||
table[i+1] = table[i]+1
|
|
||||||
while table[i+1] > 0 and prefix[i] != prefix[table[i+1]-1]:
|
|
||||||
table[i+1] = table[table[i+1]-1]+1
|
|
||||||
code.extend(table[1:]) # don't store first entry
|
|
||||||
elif charset:
|
elif charset:
|
||||||
_compile_charset(charset, flags, code)
|
_compile_charset(charset, flags, code)
|
||||||
code[skip] = len(code) - skip
|
code[skip] = len(code) - skip
|
||||||
|
@ -3,10 +3,12 @@ from test.support import verbose, run_unittest, gc_collect, bigmemtest, _2G, \
|
|||||||
import io
|
import io
|
||||||
import re
|
import re
|
||||||
from re import Scanner
|
from re import Scanner
|
||||||
|
import sre_compile
|
||||||
import sre_constants
|
import sre_constants
|
||||||
import sys
|
import sys
|
||||||
import string
|
import string
|
||||||
import traceback
|
import traceback
|
||||||
|
import unittest
|
||||||
from weakref import proxy
|
from weakref import proxy
|
||||||
|
|
||||||
# Misc tests from Tim Peters' re.doc
|
# Misc tests from Tim Peters' re.doc
|
||||||
@ -15,8 +17,6 @@ from weakref import proxy
|
|||||||
# what you're doing. Some of these tests were carefully modeled to
|
# what you're doing. Some of these tests were carefully modeled to
|
||||||
# cover most of the code.
|
# cover most of the code.
|
||||||
|
|
||||||
import unittest
|
|
||||||
|
|
||||||
class S(str):
|
class S(str):
|
||||||
def __getitem__(self, index):
|
def __getitem__(self, index):
|
||||||
return S(super().__getitem__(index))
|
return S(super().__getitem__(index))
|
||||||
@ -1140,6 +1140,22 @@ class ReTests(unittest.TestCase):
|
|||||||
self.assertEqual(m.group(1), "")
|
self.assertEqual(m.group(1), "")
|
||||||
self.assertEqual(m.group(2), "y")
|
self.assertEqual(m.group(2), "y")
|
||||||
|
|
||||||
|
|
||||||
|
class ImplementationTest(unittest.TestCase):
|
||||||
|
"""
|
||||||
|
Test implementation details of the re module.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def test_overlap_table(self):
|
||||||
|
f = sre_compile._generate_overlap_table
|
||||||
|
self.assertEqual(f(""), [])
|
||||||
|
self.assertEqual(f("a"), [0])
|
||||||
|
self.assertEqual(f("abcd"), [0, 0, 0, 0])
|
||||||
|
self.assertEqual(f("aaaa"), [0, 1, 2, 3])
|
||||||
|
self.assertEqual(f("ababba"), [0, 0, 1, 2, 0, 1])
|
||||||
|
self.assertEqual(f("abcabdac"), [0, 0, 0, 1, 2, 0, 1, 0])
|
||||||
|
|
||||||
|
|
||||||
def run_re_tests():
|
def run_re_tests():
|
||||||
from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR
|
from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR
|
||||||
if verbose:
|
if verbose:
|
||||||
@ -1269,7 +1285,7 @@ def run_re_tests():
|
|||||||
|
|
||||||
|
|
||||||
def test_main():
|
def test_main():
|
||||||
run_unittest(ReTests)
|
run_unittest(__name__)
|
||||||
run_re_tests()
|
run_re_tests()
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
Loading…
x
Reference in New Issue
Block a user