Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix bugs in SQLScriptScanner with big String literals and PostgreSQL identifiers (as introduced by #7646) #7818

Merged
merged 5 commits into from
Nov 17, 2023
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,7 @@ class ScriptScanner {

private final Pattern whitespace = Pattern.compile("\\s+");

private final Pattern identifier = Pattern.compile("[a-z][a-z0-9_]*", Pattern.CASE_INSENSITIVE);

private final Pattern singleQuotedString = Pattern.compile("'(\\\\'|[^'])*'");

private final Pattern ansiQuotedString = Pattern.compile("\"(\\\\\"|[^\"])*\"");
private final Pattern identifier = Pattern.compile("[a-z][a-z0-9_$]*", Pattern.CASE_INSENSITIVE);

private final Pattern dollarQuotedStringDelimiter = Pattern.compile("\\$\\w*\\$");

Expand All @@ -54,7 +50,8 @@ private boolean matches(String substring) {

private boolean matches(Pattern regexp) {
Matcher m = regexp.matcher(script);
if (m.find(offset) && m.start() == offset) {
m.region(offset, script.length());
if (m.lookingAt()) {
currentMatch = m.group();
offset = m.end();
return true;
Expand Down Expand Up @@ -99,6 +96,27 @@ private boolean matchesMultilineComment() {
return false;
}


eddumelendez marked this conversation as resolved.
Show resolved Hide resolved
private boolean matchesQuotedString(final char quote) {
if (script.charAt(offset) == quote) {
boolean escaped = false;
for (int i = offset + 1; i < script.length(); i++) {
char c = script.charAt(i);
if (escaped) {
//just skip the escaped character and drop the flag
escaped = false;
} else if (c == '\\') {
escaped = true;
} else if (c == quote) {
currentMatch = script.substring(offset, i + 1);
offset = i + 1;
return true;
}
}
}
return false;
}

private boolean matchesDollarQuotedString() {
//Matches $<tag>$ .... $<tag>$
if (matches(dollarQuotedStringDelimiter)) {
Expand All @@ -124,7 +142,7 @@ Lexem next() {
return Lexem.SEPARATOR;
} else if (matchesSingleLineComment() || matchesMultilineComment()) {
return Lexem.COMMENT;
} else if (matches(singleQuotedString) || matches(ansiQuotedString) || matchesDollarQuotedString()) {
} else if (matchesQuotedString('\'') || matchesQuotedString('"') || matchesDollarQuotedString()) {
return Lexem.QUOTED_STRING;
} else if (matches(identifier)) {
return Lexem.IDENTIFIER;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
package org.testcontainers.ext;

import org.apache.commons.lang3.StringUtils;
import org.junit.Test;

import java.util.regex.Pattern;

import static org.assertj.core.api.Assertions.assertThat;

public class ScriptScannerTest {
@Test
eddumelendez marked this conversation as resolved.
Show resolved Hide resolved
public void testHugeStringLiteral() {
String script = "/* a comment */ \"" + StringUtils.repeat('~', 10000) + "\";";
eddumelendez marked this conversation as resolved.
Show resolved Hide resolved
eddumelendez marked this conversation as resolved.
Show resolved Hide resolved
ScriptScanner scanner = scanner(script);
assertThat(scanner.next()).isEqualTo(ScriptScanner.Lexem.COMMENT);
assertThat(scanner.next()).isEqualTo(ScriptScanner.Lexem.WHITESPACE);
assertThat(scanner.next()).isEqualTo(ScriptScanner.Lexem.QUOTED_STRING);
assertThat(scanner.getCurrentMatch()).matches(Pattern.compile("\"~+\""));
}

@Test
public void testPgIdentifierWithDollarSigns() {
ScriptScanner scanner = scanner("this$is$a$valid$postgreSQL$identifier " +
"$a$While this is a quoted string$a$$ --just followed by a dollar sign");
assertThat(scanner.next()).isEqualTo(ScriptScanner.Lexem.IDENTIFIER);
assertThat(scanner.next()).isEqualTo(ScriptScanner.Lexem.WHITESPACE);
assertThat(scanner.next()).isEqualTo(ScriptScanner.Lexem.QUOTED_STRING);
assertThat(scanner.next()).isEqualTo(ScriptScanner.Lexem.OTHER);
}

@Test
public void testQuotedLiterals(){
ScriptScanner scanner = scanner("'this \\'is a literal' \"this \\\" is a literal\"");
assertThat(scanner.next()).isEqualTo(ScriptScanner.Lexem.QUOTED_STRING);
assertThat(scanner.getCurrentMatch()).isEqualTo("'this \\'is a literal'");
assertThat(scanner.next()).isEqualTo(ScriptScanner.Lexem.WHITESPACE);
assertThat(scanner.next()).isEqualTo(ScriptScanner.Lexem.QUOTED_STRING);
assertThat(scanner.getCurrentMatch()).isEqualTo("\"this \\\" is a literal\"");
}

private static ScriptScanner scanner(String script) {
return new ScriptScanner("dummy",
script,
ScriptUtils.DEFAULT_STATEMENT_SEPARATOR,
ScriptUtils.DEFAULT_COMMENT_PREFIX,
ScriptUtils.DEFAULT_BLOCK_COMMENT_START_DELIMITER,
ScriptUtils.DEFAULT_BLOCK_COMMENT_END_DELIMITER);
}
}
Loading