diff --git a/modules/database-commons/src/main/java/org/testcontainers/ext/ScriptUtils.java b/modules/database-commons/src/main/java/org/testcontainers/ext/ScriptUtils.java index 73c49c0947c..0f917a1561f 100644 --- a/modules/database-commons/src/main/java/org/testcontainers/ext/ScriptUtils.java +++ b/modules/database-commons/src/main/java/org/testcontainers/ext/ScriptUtils.java @@ -118,8 +118,12 @@ public static void splitSqlScript(String resource, String script, String separat checkArgument(StringUtils.isNotEmpty(blockCommentEndDelimiter), "blockCommentEndDelimiter must not be null or empty"); StringBuilder sb = new StringBuilder(); - boolean inLiteral = false; boolean inEscape = false; + boolean inLineComment = false; + boolean inBlockComment = false; + Character currentLiteralDelimiter = null; + + int compoundStatementDepth = 0; char[] content = script.toCharArray(); for (int i = 0; i < script.length(); i++) { char c = content[i]; @@ -134,10 +138,39 @@ public static void splitSqlScript(String resource, String script, String separat sb.append(c); continue; } - if (c == '\'') { - inLiteral = !inLiteral; + // Determine whether we're entering/leaving a string literal + if (!inBlockComment && !inLineComment && (c == '\'' || c == '"' || c == '`')) { + if (currentLiteralDelimiter == null) { // ignore delimiters within an existing string literal + currentLiteralDelimiter = c; + } else if (currentLiteralDelimiter == c) { // find end of string literal + currentLiteralDelimiter = null; + } + } + final boolean inLiteral = currentLiteralDelimiter != null; + + if (!inLiteral && containsSubstringAtOffset(script, commentPrefix, i)) { + inLineComment = true; + } + if (inLineComment && c == '\n') { + inLineComment = false; + } + if (!inLiteral && containsSubstringAtOffset(script, blockCommentStartDelimiter, i)) { + inBlockComment = true; } - if (!inLiteral) { + if (!inLiteral && inBlockComment && containsSubstringAtOffset(script, blockCommentEndDelimiter, i)) { + inBlockComment = false; + } + final boolean inComment = inLineComment || inBlockComment; + + if (!inLiteral && !inComment && containsSubstringAtOffset(script, "BEGIN", i)) { + compoundStatementDepth++; + } + if (!inLiteral && !inComment && containsSubstringAtOffset(script, "END", i)) { + compoundStatementDepth--; + } + final boolean inCompoundStatement = compoundStatementDepth != 0; + + if (!inLiteral && !inCompoundStatement) { if (script.startsWith(separator, i)) { // we've reached the end of the current statement if (sb.length() > 0) { @@ -189,6 +222,13 @@ else if (c == ' ' || c == '\n' || c == '\t') { } } + private static boolean containsSubstringAtOffset(String string, String substring, int offset) { + String lowercaseString = string.toLowerCase(); + String lowercaseSubstring = substring.toLowerCase(); + + return lowercaseString.startsWith(lowercaseSubstring, offset); + } + private static void checkArgument(boolean expression, String errorMessage) { if (!expression) { throw new IllegalArgumentException(errorMessage); diff --git a/modules/database-commons/src/test/java/org/testcontainers/ext/ScriptUtilsTest.java b/modules/database-commons/src/test/java/org/testcontainers/ext/ScriptUtilsTest.java new file mode 100644 index 00000000000..7a50026cd93 --- /dev/null +++ b/modules/database-commons/src/test/java/org/testcontainers/ext/ScriptUtilsTest.java @@ -0,0 +1,31 @@ +package org.testcontainers.ext; + +import com.google.common.base.Charsets; +import com.google.common.io.Resources; +import org.junit.Test; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import static org.junit.Assert.assertEquals; + +public class ScriptUtilsTest { + + /* + * Test ScriptUtils script splitting with some ugly/hard-to-split cases + */ + @Test + public void testSplit() throws IOException { + final String script = Resources.toString(Resources.getResource("splittable.sql"), Charsets.UTF_8); + + final List statements = new ArrayList<>(); + ScriptUtils.splitSqlScript("resourcename", script, ";", "--", "/*", "*/", statements); + + assertEquals(7, statements.size()); + assertEquals("SELECT \"a /* string literal containing comment characters like -- here\"", statements.get(2)); + assertEquals("SELECT \"a 'quoting' \\\"scenario ` involving BEGIN keyword\\\" here\"", statements.get(3)); + assertEquals("SELECT * from `bar`", statements.get(4)); + assertEquals("INSERT INTO bar (foo) VALUES ('hello world')", statements.get(6)); + } +} diff --git a/modules/database-commons/src/test/resources/splittable.sql b/modules/database-commons/src/test/resources/splittable.sql new file mode 100644 index 00000000000..addede19dd7 --- /dev/null +++ b/modules/database-commons/src/test/resources/splittable.sql @@ -0,0 +1,45 @@ +CREATE TABLE bar ( + foo VARCHAR(255) +); + +DROP PROCEDURE IF EXISTS -- ; + count_foo; + +SELECT "a /* string literal containing comment characters like -- here"; +SELECT "a 'quoting' \"scenario ` involving BEGIN keyword\" here"; +SELECT * from `bar`; + +-- What about a line comment containing imbalanced string delimiters? " + +CREATE PROCEDURE count_foo() + BEGIN + + BEGIN + SELECT * + FROM bar; + SELECT 1 + FROM dual; + END; + + BEGIN + select * from bar; + END; + + -- we can do comments + + /* including block + comments + */ + + /* what if BEGIN appears inside a comment? */ + + select "or what if BEGIN appears inside a literal?"; + + END /*; */; + +/* or a block comment + containing imbalanced string delimiters? + ' " + */ + +INSERT INTO bar (foo) /* ; */ VALUES ('hello world'); diff --git a/modules/jdbc-test/src/test/resources/somepath/init_mysql.sql b/modules/jdbc-test/src/test/resources/somepath/init_mysql.sql index 2b00ee968b0..addede19dd7 100644 --- a/modules/jdbc-test/src/test/resources/somepath/init_mysql.sql +++ b/modules/jdbc-test/src/test/resources/somepath/init_mysql.sql @@ -2,4 +2,44 @@ CREATE TABLE bar ( foo VARCHAR(255) ); -INSERT INTO bar (foo) VALUES ('hello world'); \ No newline at end of file +DROP PROCEDURE IF EXISTS -- ; + count_foo; + +SELECT "a /* string literal containing comment characters like -- here"; +SELECT "a 'quoting' \"scenario ` involving BEGIN keyword\" here"; +SELECT * from `bar`; + +-- What about a line comment containing imbalanced string delimiters? " + +CREATE PROCEDURE count_foo() + BEGIN + + BEGIN + SELECT * + FROM bar; + SELECT 1 + FROM dual; + END; + + BEGIN + select * from bar; + END; + + -- we can do comments + + /* including block + comments + */ + + /* what if BEGIN appears inside a comment? */ + + select "or what if BEGIN appears inside a literal?"; + + END /*; */; + +/* or a block comment + containing imbalanced string delimiters? + ' " + */ + +INSERT INTO bar (foo) /* ; */ VALUES ('hello world');