Skip to content

Commit

Permalink
Fix bugs in SQLScriptScanner with big String literals and PostgreSQ…
Browse files Browse the repository at this point in the history
…L identifiers (as introduced by #7646) (#7818)

Co-authored-by: Eddú Meléndez Gonzales <eddu.melendez@gmail.com>
  • Loading branch information
inponomarev and eddumelendez committed Nov 17, 2023
1 parent d80ce60 commit b59888a
Show file tree
Hide file tree
Showing 2 changed files with 78 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,7 @@ class ScriptScanner {

private final Pattern whitespace = Pattern.compile("\\s+");

private final Pattern identifier = Pattern.compile("[a-z][a-z0-9_]*", Pattern.CASE_INSENSITIVE);

private final Pattern singleQuotedString = Pattern.compile("'(\\\\'|[^'])*'");

private final Pattern ansiQuotedString = Pattern.compile("\"(\\\\\"|[^\"])*\"");
private final Pattern identifier = Pattern.compile("[a-z][a-z0-9_$]*", Pattern.CASE_INSENSITIVE);

private final Pattern dollarQuotedStringDelimiter = Pattern.compile("\\$\\w*\\$");

Expand All @@ -54,7 +50,8 @@ private boolean matches(String substring) {

private boolean matches(Pattern regexp) {
Matcher m = regexp.matcher(script);
if (m.find(offset) && m.start() == offset) {
m.region(offset, script.length());
if (m.lookingAt()) {
currentMatch = m.group();
offset = m.end();
return true;
Expand Down Expand Up @@ -99,6 +96,26 @@ private boolean matchesMultilineComment() {
return false;
}

private boolean matchesQuotedString(final char quote) {
if (script.charAt(offset) == quote) {
boolean escaped = false;
for (int i = offset + 1; i < script.length(); i++) {
char c = script.charAt(i);
if (escaped) {
//just skip the escaped character and drop the flag
escaped = false;
} else if (c == '\\') {
escaped = true;
} else if (c == quote) {
currentMatch = script.substring(offset, i + 1);
offset = i + 1;
return true;
}
}
}
return false;
}

private boolean matchesDollarQuotedString() {
//Matches $<tag>$ .... $<tag>$
if (matches(dollarQuotedStringDelimiter)) {
Expand All @@ -124,7 +141,7 @@ Lexem next() {
return Lexem.SEPARATOR;
} else if (matchesSingleLineComment() || matchesMultilineComment()) {
return Lexem.COMMENT;
} else if (matches(singleQuotedString) || matches(ansiQuotedString) || matchesDollarQuotedString()) {
} else if (matchesQuotedString('\'') || matchesQuotedString('"') || matchesDollarQuotedString()) {
return Lexem.QUOTED_STRING;
} else if (matches(identifier)) {
return Lexem.IDENTIFIER;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
package org.testcontainers.ext;

import org.apache.commons.lang3.StringUtils;
import org.junit.Test;

import java.util.regex.Pattern;

import static org.assertj.core.api.Assertions.assertThat;

public class ScriptScannerTest {

@Test
public void testHugeStringLiteral() {
String script = "/* a comment */ \"" + StringUtils.repeat('~', 10000) + "\";";
ScriptScanner scanner = scanner(script);
assertThat(scanner.next()).isEqualTo(ScriptScanner.Lexem.COMMENT);
assertThat(scanner.next()).isEqualTo(ScriptScanner.Lexem.WHITESPACE);
assertThat(scanner.next()).isEqualTo(ScriptScanner.Lexem.QUOTED_STRING);
assertThat(scanner.getCurrentMatch()).matches(Pattern.compile("\"~+\""));
}

@Test
public void testPgIdentifierWithDollarSigns() {
ScriptScanner scanner = scanner(
"this$is$a$valid$postgreSQL$identifier " +
"$a$While this is a quoted string$a$$ --just followed by a dollar sign"
);
assertThat(scanner.next()).isEqualTo(ScriptScanner.Lexem.IDENTIFIER);
assertThat(scanner.next()).isEqualTo(ScriptScanner.Lexem.WHITESPACE);
assertThat(scanner.next()).isEqualTo(ScriptScanner.Lexem.QUOTED_STRING);
assertThat(scanner.next()).isEqualTo(ScriptScanner.Lexem.OTHER);
}

@Test
public void testQuotedLiterals() {
ScriptScanner scanner = scanner("'this \\'is a literal' \"this \\\" is a literal\"");
assertThat(scanner.next()).isEqualTo(ScriptScanner.Lexem.QUOTED_STRING);
assertThat(scanner.getCurrentMatch()).isEqualTo("'this \\'is a literal'");
assertThat(scanner.next()).isEqualTo(ScriptScanner.Lexem.WHITESPACE);
assertThat(scanner.next()).isEqualTo(ScriptScanner.Lexem.QUOTED_STRING);
assertThat(scanner.getCurrentMatch()).isEqualTo("\"this \\\" is a literal\"");
}

private static ScriptScanner scanner(String script) {
return new ScriptScanner(
"dummy",
script,
ScriptUtils.DEFAULT_STATEMENT_SEPARATOR,
ScriptUtils.DEFAULT_COMMENT_PREFIX,
ScriptUtils.DEFAULT_BLOCK_COMMENT_START_DELIMITER,
ScriptUtils.DEFAULT_BLOCK_COMMENT_END_DELIMITER
);
}
}

0 comments on commit b59888a

Please sign in to comment.