7189363: Regex Pattern compilation buggy for special sequences

Fixed the incorrect implementation in expr(...)

Reviewed-by: psandoz, alanb
This commit is contained in:
Xueming Shen 2012-08-09 10:15:26 -07:00
parent 12ab6cc327
commit ff93896a39
2 changed files with 34 additions and 4 deletions

View File

@ -1983,6 +1983,7 @@ loop: for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) {
private Node expr(Node end) {
Node prev = null;
Node firstTail = null;
Branch branch = null;
Node branchConn = null;
for (;;) {
@ -2006,8 +2007,8 @@ loop: for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) {
// the "tail.next" of each atom goes to branchConn
nodeTail.next = branchConn;
}
if (prev instanceof Branch) {
((Branch)prev).add(node);
if (prev == branch) {
branch.add(node);
} else {
if (prev == end) {
prev = null;
@ -2016,7 +2017,7 @@ loop: for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) {
// when put the "prev" into the branch as the first atom.
firstTail.next = branchConn;
}
prev = new Branch(prev, node, branchConn);
prev = branch = new Branch(prev, node, branchConn);
}
}
if (peek() != '|') {

View File

@ -33,7 +33,7 @@
* 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940
* 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133
* 6350801 6676425 6878475 6919132 6931676 6948903 6990617 7014645 7039066
* 7067045 7014640
* 7067045 7014640 7189363
*/
import java.util.regex.*;
@ -143,6 +143,7 @@ public class RegExTest {
unicodeClassesTest();
horizontalAndVerticalWSTest();
linebreakTest();
branchTest();
if (failure) {
throw new
RuntimeException("RegExTest failed, 1st failure: " +
@ -3918,4 +3919,32 @@ public class RegExTest {
report("linebreakTest");
}
// #7189363
private static void branchTest() throws Exception {
if (!Pattern.compile("(a)?bc|d").matcher("d").find() || // greedy
!Pattern.compile("(a)+bc|d").matcher("d").find() ||
!Pattern.compile("(a)*bc|d").matcher("d").find() ||
!Pattern.compile("(a)??bc|d").matcher("d").find() || // reluctant
!Pattern.compile("(a)+?bc|d").matcher("d").find() ||
!Pattern.compile("(a)*?bc|d").matcher("d").find() ||
!Pattern.compile("(a)?+bc|d").matcher("d").find() || // possessive
!Pattern.compile("(a)++bc|d").matcher("d").find() ||
!Pattern.compile("(a)*+bc|d").matcher("d").find() ||
!Pattern.compile("(a)?bc|d").matcher("d").matches() || // greedy
!Pattern.compile("(a)+bc|d").matcher("d").matches() ||
!Pattern.compile("(a)*bc|d").matcher("d").matches() ||
!Pattern.compile("(a)??bc|d").matcher("d").matches() || // reluctant
!Pattern.compile("(a)+?bc|d").matcher("d").matches() ||
!Pattern.compile("(a)*?bc|d").matcher("d").matches() ||
!Pattern.compile("(a)?+bc|d").matcher("d").matches() || // possessive
!Pattern.compile("(a)++bc|d").matcher("d").matches() ||
!Pattern.compile("(a)*+bc|d").matcher("d").matches() ||
!Pattern.compile("(a)?bc|de").matcher("de").find() || // others
!Pattern.compile("(a)??bc|de").matcher("de").find() ||
!Pattern.compile("(a)?bc|de").matcher("de").matches() ||
!Pattern.compile("(a)??bc|de").matcher("de").matches())
failCount++;
report("branchTest");
}
}