Skip to content

Commit

Permalink
Fix stack overflow in RegExp for long string (#12462)
Browse files Browse the repository at this point in the history
  • Loading branch information
slow-J authored Aug 17, 2023
1 parent 368dbff commit fb81833
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 11 deletions.
2 changes: 2 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,8 @@ Bug Fixes

* GITHUB#9660: Throw an ArithmeticException when the offset overflows in a ByteBlockPool. (Stefan Vodita)

* GITHUB#11537: Fix stack overflow in RegExp for long strings by reducing recursion. (Jakub Slowinski)

* GITHUB#12388: JoinUtil queries were ignoring boosts. (Alan Woodward)

* GITHUB#12413: Fix HNSW graph search bug that potentially leaked unapproved docs (Ben Trent).
Expand Down
41 changes: 30 additions & 11 deletions lucene/core/src/java/org/apache/lucene/util/automaton/RegExp.java
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.function.BooleanSupplier;
import java.util.function.Supplier;

/**
* Regular Expression extension to <code>Automaton</code>.
Expand Down Expand Up @@ -1067,22 +1069,39 @@ private boolean check(int flag) {
}

final RegExp parseUnionExp() throws IllegalArgumentException {
RegExp e = parseInterExp();
if (match('|')) e = makeUnion(flags, e, parseUnionExp());
return e;
return iterativeParseExp(this::parseInterExp, () -> match('|'), RegExp::makeUnion);
}

final RegExp parseInterExp() throws IllegalArgumentException {
RegExp e = parseConcatExp();
if (check(INTERSECTION) && match('&')) e = makeIntersection(flags, e, parseInterExp());
return e;
return iterativeParseExp(
this::parseConcatExp, () -> check(INTERSECTION) && match('&'), RegExp::makeIntersection);
}

final RegExp parseConcatExp() throws IllegalArgumentException {
RegExp e = parseRepeatExp();
if (more() && !peek(")|") && (!check(INTERSECTION) || !peek("&")))
e = makeConcatenation(flags, e, parseConcatExp());
return e;
return iterativeParseExp(
this::parseRepeatExp,
() -> (more() && !peek(")|") && (!check(INTERSECTION) || !peek("&"))),
RegExp::makeConcatenation);
}

/**
* Custom Functional Interface for a Supplying methods with signature of RegExp(int int1, RegExp
* exp1, RegExp exp2)
*/
@FunctionalInterface
private interface MakeRegexGroup {
RegExp get(int int1, RegExp exp1, RegExp exp2);
}

final RegExp iterativeParseExp(
Supplier<RegExp> gather, BooleanSupplier stop, MakeRegexGroup associativeReduce)
throws IllegalArgumentException {
RegExp result = gather.get();
while (stop.getAsBoolean() == true) {
RegExp e = gather.get();
result = associativeReduce.get(flags, result, e);
}
return result;
}

final RegExp parseRepeatExp() throws IllegalArgumentException {
Expand Down Expand Up @@ -1216,7 +1235,7 @@ else if (match('"')) {
if (i == 0 || i == s.length() - 1 || i != s.lastIndexOf('-'))
throw new NumberFormatException();
String smin = s.substring(0, i);
String smax = s.substring(i + 1, s.length());
String smax = s.substring(i + 1);
int imin = Integer.parseInt(smin);
int imax = Integer.parseInt(smax);
int digits;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -245,4 +245,8 @@ protected String checkRandomExpression(String docValue) {
}
return regexPattern;
}

public void testRegExpNoStackOverflow() {
new RegExp("(a)|".repeat(50000) + "(a)");
}
}

0 comments on commit fb81833

Please sign in to comment.