diff --git a/Version Control.accda.src/modules/clsConcat.cls b/Version Control.accda.src/modules/clsConcat.cls index 0d5dd671..482a287f 100644 --- a/Version Control.accda.src/modules/clsConcat.cls +++ b/Version Control.accda.src/modules/clsConcat.cls @@ -219,6 +219,25 @@ Public Function MidStr(lngStart As Long, Optional lngLength As Long = -1) As Str End Function +'--------------------------------------------------------------------------------------- +' Procedure : RTrim +' Author : Adam Waller +' Date : 8/14/2023 +' Purpose : Trim trailing whitespace from content +'--------------------------------------------------------------------------------------- +' +Public Function RTrim(Optional strTrimChars As String = " ") + Do + If Me.Length < Len(strTrimChars) Then Exit Do + If Me.RightStr(Len(strTrimChars)) = strTrimChars Then + Me.Remove Len(strTrimChars) + Else + Exit Do + End If + Loop +End Function + + '--------------------------------------------------------------------------------------- ' Procedure : Right ' Author : Adam Waller diff --git a/Version Control.accda.src/modules/clsSqlFormatter.cls b/Version Control.accda.src/modules/clsSqlFormatter.cls new file mode 100644 index 00000000..13ea59a7 --- /dev/null +++ b/Version Control.accda.src/modules/clsSqlFormatter.cls @@ -0,0 +1,1010 @@ +VERSION 1.0 CLASS +BEGIN + MultiUse = -1 'True +END +Attribute VB_Name = "clsSqlFormatter" +Attribute VB_GlobalNameSpace = False +Attribute VB_Creatable = False +Attribute VB_PredeclaredId = False +Attribute VB_Exposed = False +'--------------------------------------------------------------------------------------- +' Module : clsSqlFormatter +' Author : Adam Waller +' Date : 8/11/2023 +' Purpose : Provide a "simple" implementation of SQL formatting for consistent output +' : in source code files. This is largely based on the doctrine/sql-formatter +' : project on GitHub. For my purposes I didn't need a huge multi-dialect +' : parser with lots of output options, but something that I could inlcude +' : as a single module in a project to parse SQL statements from Access +' : queries, as well as basic support for other common dialects like MSSQL +' : and MySQL. +'--------------------------------------------------------------------------------------- +Option Compare Database +Option Explicit + + +' Lists Updated 8/11/2023 from doctrine/sql-formatter +Private Const cstrReserved As String = _ + "|ACCESSIBLE|ACTION|AFTER|AGAINST|AGGREGATE|ALGORITHM|ALL|ALTER|ANALYSE|ANALYZE|AS|ASC|AUTOCOMMIT|AUTO_INCREMENT|BACKUP|BEGIN|BETWEEN|BINLOG|BOTH|CASCADE|CASE|CHANGE|CHANGED|CHARACTER SET|CHARSET|CHECK|CHECKSUM|COLLATE|COLLATION|COLUMN|COLUMNS|COMMENT|COMMIT|COMMITTED|COMPRESSED|CONCURRENT|CONSTRAINT|CONTAINS|CONVERT|CREATE|CROSS|CURRENT ROW|CURRENT_TIMESTAMP|DATABASE|DATABASES|DAY|DAY_HOUR|DAY_MINUTE|DAY_SECOND|DEFAULT|DEFINER|DELAYED|DELETE|DESC|DESCRIBE|DETERMINISTIC|DISTINCT|DISTINCTROW|DIV|DO|DUMPFILE|DUPLICATE|DYNAMIC|ELSE|ENCLOSED|END|ENGINE|ENGINE_TYPE|ENGINES|ESCAPE|ESCAPED|EVENTS|EXEC|EXECUTE|EXISTS|EXPLAIN|EXTENDED|FAST|FIELDS|FILE|FILTER|FIRST|FIXED|FLUSH|FOR|FORCE|FOLLOWING|FOREIGN|FULL|FULLTEXT|FUNCTION|GLOBAL|GRANT|GRANTS|GROUP|GROUPS|HEAP|HIGH_PRIORITY|HOSTS|HOUR|HOUR_MINUTE|HOUR_SECOND|IDENTIFIED|IF|IFNULL|IGNORE|IN|INDEX|INDEXES|INFILE|INSERT|INSERT_ID|INSERT_METHOD|INTERVAL|INTO|INVOKER|IS|ISOLATION|" & _ + "KEY|KEYS|KILL|LAST_INSERT_ID|LEADING|LEVEL|LIKE|LINEAR|LINES|LOAD|LOCAL|LOCK|LOCKS|LOGS|LOW_PRIORITY|MARIA|MASTER|MASTER_CONNECT_RETRY|MASTER_HOST|MASTER_LOG_FILE|MATCH|MAX_CONNECTIONS_PER_HOUR|MAX_QUERIES_PER_HOUR|MAX_ROWS|MAX_UPDATES_PER_HOUR|MAX_USER_CONNECTIONS|MEDIUM|MERGE|MINUTE|MINUTE_SECOND|MIN_ROWS|MODE|MONTH|MRG_MYISAM|MYISAM|NAMES|NATURAL|NO OTHERS|NOT|NOW()|NULL|OFFSET|ON|OPEN|OPTIMIZE|OPTION|OPTIONALLY|ON UPDATE|ON DELETE|OUTFILE|OVER|PACK_KEYS|PAGE|PARTIAL|PARTITION|PARTITIONS|PASSWORD|PRECEDING|PRIMARY|PRIVILEGES|PROCEDURE|PROCESS|PROCESSLIST|PURGE|QUICK|RANGE|RAID0|RAID_CHUNKS|RAID_CHUNKSIZE|RAID_TYPE|READ|READ_ONLY|READ_WRITE|RECURSIVE|REFERENCES|REGEXP|RELOAD|RENAME|REPAIR|REPEATABLE|REPLACE|REPLICATION|RESET|RESTORE|RESTRICT|RETURN|RETURNS|REVOKE|RLIKE|ROLLBACK|ROW|ROWS|ROW_FORMAT|SECOND|SECURITY|SEPARATOR|SERIALIZABLE|SESSION|SHARE|SHOW|SHUTDOWN|SLAVE|SONAME|SOUNDS|SQL|SQL_AUTO_IS_NULL|SQL_BIG_RESULT|" & _ + "SQL_BIG_SELECTS|SQL_BIG_TABLES|SQL_BUFFER_RESULT|SQL_CALC_FOUND_ROWS|SQL_LOG_BIN|SQL_LOG_OFF|SQL_LOG_UPDATE|SQL_LOW_PRIORITY_UPDATES|SQL_MAX_JOIN_SIZE|SQL_QUOTE_SHOW_CREATE|SQL_SAFE_UPDATES|SQL_SELECT_LIMIT|SQL_SLAVE_SKIP_COUNTER|SQL_SMALL_RESULT|SQL_WARNINGS|SQL_CACHE|SQL_NO_CACHE|START|STARTING|STATUS|STOP|STORAGE|STRAIGHT_JOIN|STRING|STRIPED|SUPER|TABLE|TABLES|TEMPORARY|TERMINATED|THEN|TIES|TO|TRAILING|TRANSACTIONAL|TRUE|TRUNCATE|TYPE|TYPES|UNBOUNDED|UNCOMMITTED|UNIQUE|UNLOCK|UNSIGNED|USAGE|USE|USING|VARIABLES|VIEW|WHEN|WITH|WORK|WRITE|YEAR_MONTH|" +Private Const cstrFunctions As String = _ + "|ABS|ACOS|ADDDATE|ADDTIME|AES_DECRYPT|AES_ENCRYPT|APPROX_COUNT_DISTINCT|AREA|ASBINARY|ASCII|ASIN|ASTEXT|ATAN|ATAN2|AVG|BDMPOLYFROMTEXT|BDMPOLYFROMWKB|BDPOLYFROMTEXT|BDPOLYFROMWKB|BENCHMARK|BIN|BIT_AND|BIT_COUNT|BIT_LENGTH|BIT_OR|BIT_XOR|BOUNDARY|BUFFER|CAST|CEIL|CEILING|CENTROID|CHAR|CHARACTER_LENGTH|CHARSET|CHAR_LENGTH|CHECKSUM_AGG|COALESCE|COERCIBILITY|COLLATION|COMPRESS|CONCAT|CONCAT_WS|CONNECTION_ID|CONTAINS|CONV|CONVERT|CONVERT_TZ|CONVEXHULL|COS|COT|COUNT|COUNT_BIG|CRC32|CROSSES|CUME_DIST|CURDATE|CURRENT_DATE|CURRENT_TIME|CURRENT_TIMESTAMP|CURRENT_USER|CURTIME|DATABASE|DATE|DATEDIFF|DATE_ADD|DATE_DIFF|DATE_FORMAT|DATE_SUB|DAY|DAYNAME|DAYOFMONTH|DAYOFWEEK|DAYOFYEAR|DECODE|DEFAULT|DEGREES|DENSE_RANK|DES_DECRYPT|DES_ENCRYPT|DIFFERENCE|DIMENSION|DISJOINT|DISTANCE|ELT|ENCODE|ENCRYPT|ENDPOINT|ENVELOPE|EQUALS|EXP|EXPORT_SET|EXTERIORRING|EXTRACT|EXTRACTVALUE|FIELD|FIND_IN_SET|FIRST_VALUE|FLOOR|FORMAT|FOUND_ROWS|FROM_DAYS|" & _ + "FROM_UNIXTIME|GEOMCOLLFROMTEXT|GEOMCOLLFROMWKB|GEOMETRYCOLLECTION|GEOMETRYCOLLECTIONFROMTEXT|GEOMETRYCOLLECTIONFROMWKB|GEOMETRYFROMTEXT|GEOMETRYFROMWKB|GEOMETRYN|GEOMETRYTYPE|GEOMFROMTEXT|GEOMFROMWKB|GET_FORMAT|GET_LOCK|GLENGTH|GREATEST|GROUPING|GROUPING_ID|GROUP_CONCAT|GROUP_UNIQUE_USERS|HEX|HOUR|IF|IFNULL|INET_ATON|INET_NTOA|INSERT|INSTR|INTERIORRINGN|INTERSECTION|INTERSECTS|INTERVAL|ISCLOSED|ISEMPTY|ISNULL|ISRING|ISSIMPLE|IS_FREE_LOCK|IS_USED_LOCK|LAG|LAST_DAY|LAST_INSERT_ID|LAST_VALUE|LCASE|LEAD|LEAST|LEFT|LENGTH|LINEFROMTEXT|LINEFROMWKB|LINESTRING|LINESTRINGFROMTEXT|LINESTRINGFROMWKB|LISTAGG|LN|LOAD_FILE|LOCALTIME|LOCALTIMESTAMP|LOCATE|LOG|LOG10|LOG2|LOWER|LPAD|LTRIM|MAKEDATE|MAKETIME|MAKE_SET|MASTER_POS_WAIT|MAX|MBRCONTAINS|MBRDISJOINT|MBREQUAL|MBRINTERSECTS|MBROVERLAPS|MBRTOUCHES|MBRWITHIN|MD5|MICROSECOND|MID|MIN|MINUTE|MLINEFROMTEXT|MLINEFROMWKB|MOD|MONTH|MONTHNAME|MPOINTFROMTEXT|MPOINTFROMWKB|MPOLYFROMTEXT|" & _ + "MPOLYFROMWKB|MULTILINESTRING|MULTILINESTRINGFROMTEXT|MULTILINESTRINGFROMWKB|MULTIPOINT|MULTIPOINTFROMTEXT|MULTIPOINTFROMWKB|MULTIPOLYGON|MULTIPOLYGONFROMTEXT|MULTIPOLYGONFROMWKB|NAME_CONST|NTH_VALUE|NTILE|NULLIF|NUMGEOMETRIES|NUMINTERIORRINGS|NUMPOINTS|OCT|OCTET_LENGTH|OLD_PASSWORD|ORD|OVERLAPS|PASSWORD|PERCENT_RANK|PERCENTILE_CONT|PERCENTILE_DISC|PERIOD_ADD|PERIOD_DIFF|PI|POINT|POINTFROMTEXT|POINTFROMWKB|POINTN|POINTONSURFACE|POLYFROMTEXT|POLYFROMWKB|POLYGON|POLYGONFROMTEXT|POLYGONFROMWKB|POSITION|POW|POWER|QUARTER|QUOTE|RADIANS|RAND|RANK|RELATED|RELEASE_LOCK|REPEAT|REPLACE|REVERSE|RIGHT|ROUND|ROW_COUNT|ROW_NUMBER|RPAD|RTRIM|SCHEMA|SECOND|SEC_TO_TIME|SESSION_USER|SHA|SHA1|SIGN|SIN|SLEEP|SOUNDEX|SPACE|SQRT|SRID|STARTPOINT|STD|STDEV|STDEVP|STDDEV|STDDEV_POP|STDDEV_SAMP|STRING_AGG|STRCMP|STR_TO_DATE|SUBDATE|SUBSTR|SUBSTRING|SUBSTRING_INDEX|SUBTIME|SUM|SYMDIFFERENCE|SYSDATE|SYSTEM_USER|TAN|TIME|TIMEDIFF|TIMESTAMP|TIMESTAMPADD|" & _ + "TIMESTAMPDIFF|TIME_FORMAT|TIME_TO_SEC|TOUCHES|TO_DAYS|TRIM|TRUNCATE|UCASE|UNCOMPRESS|UNCOMPRESSED_LENGTH|UNHEX|UNIQUE_USERS|UNIX_TIMESTAMP|UPDATEXML|UPPER|USER|UTC_DATE|UTC_TIME|UTC_TIMESTAMP|UUID|VAR|VARIANCE|VARP|VAR_POP|VAR_SAMP|VERSION|WEEK|WEEKDAY|WEEKOFYEAR|WITHIN|X|Y|YEAR|YEARWEEK|" +Private Const cstrReservedToplevel As String = "|WITH|SELECT|FROM|WHERE|SET|ORDER BY|GROUP BY|LIMIT|DROP|VALUES|UPDATE|HAVING|ADD|CHANGE|MODIFY|ALTER TABLE|DELETE FROM|UNION ALL|UNION|EXCEPT|INTERSECT|PARTITION BY|ROWS|RANGE|GROUPS|WINDOW|" +Private Const cstrReservedNewline As String = "|LEFT OUTER JOIN|RIGHT OUTER JOIN|LEFT JOIN|RIGHT JOIN|OUTER JOIN|INNER JOIN|JOIN|XOR|OR|AND|EXCLUDE|" +Private Const cstrBoundaries As String = ",;:)(.=<>+-*/!^%|&#," +Private Const cstrRegExSpecial As String = ".\+*?[^]$(){}=!<>|:-#/" + +' Types of lists +Private Enum eListType + eltReserved + eltFunctions + eltReservedToplevel + eltReservedNewline + eltBoundaries + eltRegExSpecial +End Enum + +' Token types +Private Enum eTokenTypes + ttUnknown + ttWhitespace + ttWord + ttQuote + ttBacktickQuote + ttReserved + ttReservedTopLevel + ttReservedNewline + ttBoundary + ttComment + ttBlockComment + ttNumber + ttError + ttVariable +End Enum + +Private m_strIndent As String +Private m_strSql As String +Private m_colTokens As Collection +Private m_lngPos As Long + + +Public Function Test() + + Debug.Print FormatSQL(CurrentDb.QueryDefs("qryTest2").SQL) + + PrintTokens + +End Function + + +'--------------------------------------------------------------------------------------- +' Procedure : Format +' Author : Adam Waller +' Date : 4/1/2020 +' Purpose : This is the main function used outside the class for SQL formatting. +'--------------------------------------------------------------------------------------- +' +Public Function FormatSQL(strSql As String) As String + + Dim lngIndentLevel As Long + Dim blnNewline As Boolean + Dim blnInlineParentheses As Boolean + Dim blnIncreaseSpecialIndent As Boolean + Dim blnIncreateBlockIndent As Boolean + Dim colIndents As Collection + Dim strIndent As String + Dim blnAddedNewline As Boolean + Dim intInlineCount As Integer + Dim blnInlineIndented As Boolean + Dim blnClauseLimit As Boolean + Dim lngToken As Long + Dim lngToken2 As Long + Dim strNextValue As String + Dim intNextType As eTokenTypes + Dim cReturn As clsConcat + Dim intTokenType As eTokenTypes + Dim strTokenValue As String + Dim lngLength As Long + Dim varItem As Variant + + ' Tokenize the string + Tokenize strSql + + ' Set up collection to hold types of indents + Set colIndents = New Collection + Set cReturn = New clsConcat + 'strTab = Space(2) ' Set indent style + + ' Build formatted output from tokens + For lngToken = 1 To m_colTokens.Count + + intTokenType = m_colTokens(lngToken)(0) + strTokenValue = m_colTokens(lngToken)(1) + + ' If we are increasing the special indent level + If blnIncreaseSpecialIndent Then + IncreaseIndent colIndents, lngIndentLevel, "special" + blnIncreaseSpecialIndent = False + End If + + ' If we are increasing the block indent level + If blnIncreateBlockIndent Then + IncreaseIndent colIndents, lngIndentLevel, "block" + blnIncreateBlockIndent = False + End If + + ' If we need a new line before the token + If blnNewline Then + cReturn.RTrim + cReturn.Add vbCrLf, StrRepeat(vbTab, lngIndentLevel) + blnNewline = False + blnAddedNewline = True + Else + blnAddedNewline = False + End If + + ' Display comments directly where they appear in the source + If IsTokenType(intTokenType, ttComment, ttBlockComment) Then + If intTokenType = ttBlockComment Then + ' Indent multiline block comment to current indent level + strIndent = StrRepeat(vbTab, lngIndentLevel) + cReturn.RTrim " " & vbTab + cReturn.Add vbCrLf, strIndent + cReturn.Add Replace(strTokenValue, vbCrLf, vbCrLf & strIndent) + Else + cReturn.Add strTokenValue + End If + blnNewline = True + GoTo NextToken + End If + + ' If inside parentheses + If blnInlineParentheses Then + ' Check for end of inline parentheses + If strTokenValue = ")" Then + cReturn.RTrim + + If blnInlineIndented Then + DecreaseIndent colIndents, lngIndentLevel + cReturn.Add vbCrLf, StrRepeat(vbTab, lngIndentLevel) + End If + + blnInlineParentheses = False + cReturn.Add ") " + GoTo NextToken + End If + + ' Break to new line if we reach 30 characters + If strTokenValue = "," Then + If intInlineCount >= 30 Then + intInlineCount = 0 + blnNewline = True + End If + End If + + ' Keep count of characters within parentheses + intInlineCount = intInlineCount + Len(strTokenValue) + End If + + ' Opening parentheses increase the block indent level and start a new line + If strTokenValue = "(" Then + ' First check if this should be an inline parentheses block + ' Examples are "NOW()", "COUNT(*)", "int(10)", key(`somecolumn`), DECIMAL(7,2) + ' Allow up to 3 non-whitespace tokens inside inline parentheses + lngLength = 0 + ' Begin secondary loop to look at the next tokens without changing + ' primary token iteration. + For lngToken2 = lngToken To lngToken + 250 + ' Check for end of tokens + If lngToken2 > lngToken Then Exit For + + ' Get type and value of next token + intNextType = m_colTokens(lngToken2)(0) + strNextValue = m_colTokens(lngToken2)(1) + + ' Reached closing parentheses, able to inline it + If strNextValue = ")" Then + blnInlineParentheses = True + intInlineCount = 0 + blnInlineIndented = False + Exit For + End If + + ' Reached an invalid token for inline parentheses + If strNextValue = ";" Or strNextValue = "(" Then + Exit For + End If + + ' Reached an invalid token type for inline parentheses + Select Case intTokenType + Case ttReservedTopLevel, _ + ttReservedNewline, _ + ttComment, _ + ttBlockComment + Exit For + End Select + + ' Add to total length + lngLength = lngLength + (Len(strNextValue)) + + ' Look at next token ahead of current position + Next lngToken2 + + If blnInlineParentheses And (lngLength > 30) Then + blnIncreateBlockIndent = True + blnInlineIndented = True + blnNewline = True + End If + + ' Take out the preceding space unless there was whitespace there in the original query + If m_colTokens.Count > 0 Then + If m_colTokens(lngToken - 1)(0) = ttWhitespace Then + cReturn.RTrim + End If + End If + + If Not blnInlineParentheses Then + blnIncreateBlockIndent = True + ' Add a newline after the parentheses + blnNewline = True + End If + + ElseIf strTokenValue = ")" Then + ' Closing parentheses decrease the block indent level + ' Remove whitespace before the closing parentheses + cReturn.RTrim + DecreaseIndent colIndents, lngIndentLevel + + ' Reduce indents down to the block level + Do While colIndents(1) = "special" + DecreaseIndent colIndents, lngIndentLevel + If colIndents.Count = 0 Then Exit Do + Loop + + ' Add a newline before the closing parentheses (if not already added) + If Not blnAddedNewline Then + cReturn.Add vbCrLf, StrRepeat(vbTab, lngIndentLevel) + End If + + ElseIf intTokenType = ttReservedTopLevel Then + ' Top level reserved words start a new line and increase the special indent level + blnIncreaseSpecialIndent = True + + ' If the last indent type was 'special', decrease the special indent for this round + If colIndents.Count > 0 Then + If colIndents(1) = "special" Then + DecreaseIndent colIndents, lngIndentLevel + End If + End If + + ' Add a newline after the top level reserved word + blnNewline = True + ' Add a newline before the top level reserved word (if not already added) + If Not blnAddedNewline Then + cReturn.RTrim + cReturn.Add vbCrLf, StrRepeat(vbTab, lngIndentLevel) + Else + ' If we already added a newline, redo the indentation since it may be different now + cReturn.RTrim vbTab + cReturn.Add StrRepeat(vbTab, lngIndentLevel) + End If + + ' Purge any extra whitespace + PurgeExtraWhitespace strTokenValue + + ' If SQL 'LIMIT' clause, start variable to reset newline + If (UCase(strTokenValue) = "LIMIT") And Not blnInlineParentheses Then + blnClauseLimit = True + End If + + ElseIf blnClauseLimit And (strTokenValue <> ",") _ + And (Not IsTokenType(intTokenType, ttNumber, ttWhitespace)) Then + ' Checks if we are out of the limit clause + blnClauseLimit = False + + ElseIf (strTokenValue = ",") And Not blnInlineParentheses Then + ' Commas start a new line (unless within inline parentheses or SQL 'LIMIT' clause) + ' If the previous TOKEN_VALUE is 'LIMIT', resets new line + If blnClauseLimit Then + blnNewline = False + blnClauseLimit = False + Else + ' All other cases of commas + blnNewline = True + End If + + ElseIf intTokenType = ttReservedNewline Then + ' Newline reserved words start a new line + ' Add a newline before the reserved word (if not already added) + If Not blnAddedNewline Then + cReturn.RTrim + cReturn.Add vbCrLf, StrRepeat(vbTab, lngIndentLevel) + End If + PurgeExtraWhitespace strTokenValue + + ElseIf intTokenType = ttBoundary Then + ' Multiple boundary characters in a row should not have + ' spaces between them (not including parentheses) + lngToken2 = GetPreviousTokenID(lngToken, ttWhitespace) + If m_colTokens(lngToken2)(0) = ttBoundary Then + ' If previous non-whitespace character was a boundary, then trim + ' any whitespace going back to the boundary. + If m_colTokens(GetPreviousTokenID(lngToken))(0) = ttWhitespace Then + cReturn.RTrim + End If + End If + End If + + ' If the token shouldn't have a space before it + If strTokenValue = "." _ + Or strTokenValue = "," _ + Or strTokenValue = ";" Then + ' Trim any whitespace + cReturn.RTrim + End If + + ' Add current token value, followed by a space + cReturn.Add strTokenValue, " " + + ' If the token shouldn't have a space after it + If strTokenValue = "(" Or strTokenValue = "." Then + cReturn.RTrim + End If + + ' If this is the "-" of a negative number, it shouldn't have a space after it + If strTokenValue <> "-" Then + GoTo NextToken + End If + + ' Check next token for number + lngToken2 = GetNextTokenID(lngToken, ttWhitespace) + If lngToken2 = 0 Then GoTo NextToken + If m_colTokens(lngToken2)(0) <> ttNumber Then + GoTo NextToken + End If + + ' Check previous token + lngToken2 = GetPreviousTokenID(lngToken, ttWhitespace) + If lngToken2 = 0 Then + GoTo NextToken + Else + intTokenType = m_colTokens(lngToken2)(0) + If IsTokenType(intTokenType, _ + ttQuote, _ + ttBacktickQuote, _ + ttWord, _ + ttNumber) Then + GoTo NextToken + End If + End If + + ' Trim whitespace after dash + cReturn.RTrim + +NextToken: + Next lngToken + + ' If there are unmatched parentheses + For Each varItem In colIndents + If CStr(varItem) = "block" Then + cReturn.RTrim + cReturn.Add vbCrLf, "WARNING: unclosed parentheses or section" + Exit For + End If + Next varItem + + ' Final formatting of tab indents when returning formatted string + FormatSQL = Replace(cReturn.GetStr, vbTab, "__") + +End Function + + +'--------------------------------------------------------------------------------------- +' Procedure : Class_Initialize +' Author : Adam Waller +' Date : 4/1/2020 +' Purpose : Set any default options +'--------------------------------------------------------------------------------------- +' +Private Sub Class_Initialize() + + ' Set default constants + m_strIndent = Space(2) + +' ' Set up tokenizer RegEx objects +' Set oRegEx(erWhitespace) = NewRegEx("^(\s+)") +' Set oRegEx(erNumber) = NewRegEx("^((-\s*)?[0-9]+(\.[0-9]+)?|0x[0-9a-fA-F]+|0b[01]+)\b") +' Set oRegEx(erOperator) = NewRegEx("^(!=|<>|==|<=|>=|!<|!>|\|\||::|->>|->|~~\*|~~|!~~\*|!~~|~\*|!~\*|!~|.)") +' Set oRegEx(erBlockComment) = NewRegEx("^(\/\*[^]*?(?:\*\/|$))") +' Set oRegEx(erLineComment) = NewRegEx("^((?:" & cstrLineCommentTypes & ").*?(?:\n|$))") +' Set oRegEx(erReservedTopLevel) = NewRegEx("^(" & cstrReservedToplevelWords & ")\b") +' Set oRegEx(erReservedNewLine) = NewRegEx("^(" & cstrReservedNewlineWords & ")\b") +' Set oRegEx(erReservedPlain) = NewRegEx("^(" & cstrReservedWords & ")\b") +' Set oRegEx(erWord) = NewRegEx("^([\w" & Join(Split(cstrSpecialWordChars, "|"), "") & "]+)") +' Set oRegEx(erString) = NewRegEx(CreateStringPattern) +' Set oRegEx(erOpenParen) = NewRegEx() +' Set oRegEx(erCloseParen) = NewRegEx() +' Set oRegEx(erIndexedPlaceholder) = NewRegEx() +' Set oRegEx(erIndentNamedPlaceholder) = NewRegEx() +' Set oRegEx(erStringNamedPlaceholder) = NewRegEx() + + +End Sub + + +'--------------------------------------------------------------------------------------- +' Procedure : PrintTokens +' Author : Adam Waller +' Date : 8/14/2023 +' Purpose : Print the current sequence of tokens (type and value) for debugging +'--------------------------------------------------------------------------------------- +' +Public Sub PrintTokens() + + Dim varToken As Variant + + If m_colTokens Is Nothing Then + Debug.Print "No tokens found" + Exit Sub + End If + + ' Print header + Debug.Print "---------------------------------------------------" + Debug.Print "TOKEN NAME TOKEN VALUE" + Debug.Print "---------------------------------------------------" + + ' Loop through Tokens + For Each varToken In m_colTokens + Select Case varToken(0) + Case ttWhitespace: Debug.Print "Whitespace: " & varToken(1) + Case ttWord: Debug.Print "Word: " & varToken(1) + Case ttQuote: Debug.Print "Quote: " & varToken(1) + Case ttBacktickQuote: Debug.Print "Backtick Quote: " & varToken(1) + Case ttReserved: Debug.Print "Reserved: " & varToken(1) + Case ttReservedTopLevel: Debug.Print "Reserved Top Level: " & varToken(1) + Case ttReservedNewline: Debug.Print "Reserved Newline: " & varToken(1) + Case ttBoundary: Debug.Print "Boundary: " & varToken(1) + Case ttComment: Debug.Print "Comment: " & varToken(1) + Case ttBlockComment: Debug.Print "Block Comment: " & varToken(1) + Case ttNumber: Debug.Print "Number: " & varToken(1) + Case ttError: Debug.Print "Error: " & varToken(1) + Case ttVariable: Debug.Print "Variable: " & varToken(1) + Case Else: Debug.Print "Unknown type " & varToken(1) + End Select + Next varToken + Debug.Print "---------------------------------------------------" + +End Sub + + +'--------------------------------------------------------------------------------------- +' Procedure : Tokenize +' Author : Adam Waller +' Date : 8/12/2023 +' Purpose : Take a SQL string and break it down into tokens. +' : Each token is an array with type(0) and value(1). +'--------------------------------------------------------------------------------------- +' +Private Sub Tokenize(strSql As String) + + Const cstrBreakAfter As String = "z4" + + Dim strMatch As String + Dim strWord As String + + ' Reset collection of token items + Set m_colTokens = New Collection + m_strSql = strSql + m_lngPos = 1 + + + ' Loop through SQL, converting string into tokens + Do While m_lngPos < Len(strSql) + + ' Debugging breakpoint + If NextChar(cstrBreakAfter) Then Stop + + ' Sequentially move through various possible types of tokens, + ' adding the token when we find a match. + + ' Whitespace + If HasMatches("^\s+", strMatch) Then + AddToken ttWhitespace, strMatch + + ' Single line comment + ElseIf NextChar("#") Or NextChar("--") Then + AddToken ttComment, GetTill(vbCrLf, vbLf) + + ' Block comment + ElseIf NextChar("/*") Then + AddToken ttBlockComment, GetTill("*/") + + ' Backtick quote + ElseIf NextChar("`") Then + AddToken ttBacktickQuote, GetQuotedString + + ' Quoted string + ElseIf NextChar("""") Or NextChar("\'") Or NextChar("[") Then + AddToken ttQuote, GetQuotedString + + ' User defined variable + ElseIf (NextChar("@") Or NextChar(":")) And (RemainingChars > 1) Then + + ' Check for quoted variable name + If PeekChar(1, """") Or PeekChar(1, "`") Or PeekChar(1, "\'") Then + AddToken ttVariable, GetRange & GetQuotedString(1) + Else + ' Non-quoted variable name + If HasMatches("^(" & GetRange & "[a-zA-Z0-9\._\$]+)", strMatch) Then + AddToken ttVariable, strMatch + Else + ' Failed to parse name + 'TODO: log error + End If + End If + + ' Number (decimal, bindary, or hex) + ElseIf HasMatches("^([0-9]+(\.[0-9]+)?|0x[0-9a-fA-F]+|0b[01]+)($|\s|""\'`|" & RegExBoundaries & ")", strMatch) Then + AddToken ttNumber, strMatch + + ' Boundary character (punctuation and symbols) + ElseIf HasMatches("^(" & RegExBoundaries & ")", strMatch) Then + AddToken ttBoundary, strMatch + + ' A reserved word cannot be preceded by a "." + ' This makes it so in "mytable.from", "from" is not considered a reserved word + ElseIf PeekChar(-1, ".") Then + + ' Likely an object name + If HasMatches("^(.*?)($|\s|[""\'`]|" & RegExBoundaries & ")", strMatch) Then + AddToken ttWord, strMatch + Else + Stop + End If + + ' Check for reserved words or functions + Else + + ' Extract next word so we can compare it to some lists + If HasMatches("^(.*?)($|\s|" & RegExBoundaries & ")", strMatch) Then + + ' Convert to upper case + strWord = UCase(strMatch) + + ' Top level reserved word + If InList(strWord, eltReservedToplevel) Then + AddToken ttReservedTopLevel, strMatch + + ' Newline reserved word + ElseIf InList(strWord, eltReservedNewline) Then + AddToken ttReservedNewline, strMatch + + ' Other reserved word + ElseIf InList(strWord, eltReserved) Then + AddToken ttReserved, strMatch + + ' A function must be followed by "(" + ' This makes it so "count(" is considered a function, but "count" alone is not + ElseIf HasMatches("^((" & cstrFunctions & ")[(]|\s|[)])", strMatch) Then + ' Add the function, but not the opening parenthesis + AddToken ttReserved, GetRange(, Len(strMatch) - 1) + + ' Non-reserved word + ElseIf HasMatches("^(.*?)($|\s|[""\'`]|" & RegExBoundaries & ")", strMatch) Then + AddToken ttWord, strMatch + + Else + ' TODO: check for errors + Stop + + End If + End If + End If + + DoEvents + + ' Move to next token + Loop + +End Sub + + +'--------------------------------------------------------------------------------------- +' Procedure : GetMatches +' Author : Adam Waller +' Date : 8/12/2023 +' Purpose : Returns true if we match on the RegEx expression, and sets objMatches to +' : the resulting match. +'--------------------------------------------------------------------------------------- +' +Private Function HasMatches(strRegEx As String, ByRef strMatches As String) As Boolean + Dim objMatches As VBScript_RegExp_55.MatchCollection + With New VBScript_RegExp_55.RegExp + .Pattern = strRegEx + .IgnoreCase = False + '.Global = True + Set objMatches = .Execute(Mid$(m_strSql, m_lngPos)) + If objMatches.Count = 0 Then + strMatches = vbNullString + Else + If objMatches(0).SubMatches.Count > 0 Then + ' Use first submatch, if found + If Len(objMatches(0).SubMatches(0)) Then + strMatches = objMatches(0).SubMatches(0) + Else + ' Use primary match + strMatches = objMatches(0) + End If + Else + ' Fall back to primary match + strMatches = objMatches(0) + End If + HasMatches = True + End If + End With +End Function + + +'--------------------------------------------------------------------------------------- +' Procedure : AddToken +' Author : Adam Waller +' Date : 8/12/2023 +' Purpose : Add a token to the current collection and moves the position marker +'--------------------------------------------------------------------------------------- +' +Private Sub AddToken(intType As eTokenTypes, strValue As String) + m_colTokens.Add Array(intType, strValue) + m_lngPos = m_lngPos + Len(strValue) +End Sub + + +'--------------------------------------------------------------------------------------- +' Procedure : NextChar +' Author : Adam Waller +' Date : 8/12/2023 +' Purpose : Return next one or more characters from current position. +'--------------------------------------------------------------------------------------- +' +Private Function NextChar(strMatch As String, Optional intCompareMode As VbCompareMethod = vbTextCompare) As Boolean + If (m_lngPos + 1) > Len(m_strSql) Then Exit Function + NextChar = (StrComp(Mid(m_strSql, m_lngPos, Len(strMatch)), strMatch, intCompareMode) = 0) +End Function + + +'--------------------------------------------------------------------------------------- +' Procedure : PeekChar +' Author : Adam Waller +' Date : 8/12/2023 +' Purpose : Peek ahead at another character other than the next one. +'--------------------------------------------------------------------------------------- +' +Private Function PeekChar(lngOffset As Long, strMatch As String, Optional intCompareMode As VbCompareMethod = vbTextCompare) As Boolean + If (m_lngPos + lngOffset) < 1 Then Exit Function + If (m_lngPos + lngOffset) > Len(m_strSql) Then Exit Function + PeekChar = (StrComp(Mid(m_strSql, m_lngPos + lngOffset, Len(strMatch)), strMatch, intCompareMode) = 0) +End Function + + +'--------------------------------------------------------------------------------------- +' Procedure : GetTill +' Author : Adam Waller +' Date : 8/12/2023 +' Purpose : Return the string until we find the stop characters, or the remaining +' : string if the stop characters were not found. +'--------------------------------------------------------------------------------------- +' +Private Function GetTill(strStopAt As String, Optional strAltStopAt As String, Optional blnIncludeStopChars As Boolean = False) As String + + Dim lngPos As Long + Dim lngLen As Long + Dim strStop As String + + ' See if the searched string exists + lngPos = InStr(m_lngPos, m_strSql, strStopAt) + + ' Try again using alternate stop + If lngPos = 0 Then + lngPos = InStr(m_lngPos, m_strSql, strAltStopAt) + strStop = strAltStopAt + Else + strStop = strStopAt + End If + + ' Make sure we found a match + If lngPos = 0 Then + ' No match. Return the remaining string + GetTill = Mid(m_strSql, m_lngPos) + Else + lngLen = lngPos - m_lngPos + ' Add in the length of the matched stop characters, if requested + If blnIncludeStopChars Then lngLen = lngLen + Len(strStop) + ' Return the string + GetTill = Mid(m_strSql, m_lngPos, lngLen) + End If + +End Function + + +'--------------------------------------------------------------------------------------- +' Procedure : GetRange +' Author : Adam Waller +' Date : 8/12/2023 +' Purpose : Get a range of characters (kind of like a relative mid() function) +'--------------------------------------------------------------------------------------- +' +Private Function GetRange(Optional lngStartOffset As Long, Optional lngLength As Long = 1) As String + GetRange = Mid(m_strSql, m_lngPos + lngStartOffset, lngLength) +End Function + + +'--------------------------------------------------------------------------------------- +' Procedure : GetQuotedString +' Author : Adam Waller +' Date : 8/12/2023 +' Purpose : Return a quoted string (Applies four possible rules) +'--------------------------------------------------------------------------------------- +' +Private Function GetQuotedString(Optional lngStartOffset As Long = 0) As String + + Dim strExp As String + Dim objMatches As VBScript_RegExp_55.MatchCollection + + With New clsConcat + ' Build out RegEx expression + .Add "^(" + + ' (1) backtick quoted string using `` to escape + .Add "((`[^`]*($|`))+)|" + + ' (2) square bracket quoted string (SQL Server) using ]] to escape + .Add "((\[[^\]]*($|\]))(\][^\]]*($|\]))*)|" + + ' (3) double quoted string using "" or \" to escape + .Add "((""[^""\\\\]*(?:\\\\.[^""\\\\]*)*(""|$))+)|" + + ' (4) single quoted string using '' or \' to escape + .Add "((\'[^\'\\\\]*(?:\\\\.[^\'\\\\]*)*(\'|$))+)" ' sx', + + .Add ")" + strExp = .GetStr + End With + + ' Apply RegEx + With New VBScript_RegExp_55.RegExp + '.Global = True + '.Multiline = True + .Pattern = strExp + Set objMatches = .Execute(Mid(m_strSql, m_lngPos + lngStartOffset)) + If objMatches.Count > 0 Then GetQuotedString = objMatches(0) + End With + +End Function + + +'--------------------------------------------------------------------------------------- +' Procedure : RemainingChars +' Author : Adam Waller +' Date : 8/12/2023 +' Purpose : Return remaining number of characters from current position +'--------------------------------------------------------------------------------------- +' +Private Function RemainingChars() As Long + RemainingChars = Len(m_strSql) - m_lngPos +End Function + + +'--------------------------------------------------------------------------------------- +' Procedure : StrRepeat +' Author : Adam Waller +' Date : 8/14/2023 +' Purpose : Repeat a string a specified number of times. (High performance version) +'--------------------------------------------------------------------------------------- +' +Private Function StrRepeat(strText As String, lngTimes As Long) As String + + Dim lngCnt As Long + Dim lngStart As Long + Dim strReturn As String + + ' Build full length string to avoid slow string contatenation + strReturn = Space(Len(strText) * lngTimes) + + ' Apply repeated text to return string + For lngCnt = 1 To lngTimes + ' Calculate start position + lngStart = 1 + ((lngCnt * Len(strText)) - Len(strText)) + Mid$(strReturn, lngStart, Len(strText)) = strText + Next lngCnt + + ' Return repeated string + StrRepeat = strReturn + +End Function + + +'--------------------------------------------------------------------------------------- +' Procedure : InList +' Author : Adam Waller +' Date : 8/11/2023 +' Purpose : Returns true if the token was found in the list +'--------------------------------------------------------------------------------------- +' +Private Function InList(strToken As String, intList As eListType) As Boolean + Select Case intList + Case eltBoundaries: InList = (InStr(1, cstrBoundaries, strToken) > 0) + Case eltRegExSpecial: InList = (InStr(1, cstrRegExSpecial, strToken) > 0) + Case eltFunctions: InList = (InStr(1, cstrFunctions, "|" & strToken & "|") > 0) + Case eltReserved: InList = (InStr(1, cstrReserved, "|" & strToken & "|") > 0) + Case eltReservedNewline: InList = (InStr(1, cstrReservedNewline, "|" & strToken & "|") > 0) + Case eltReservedToplevel: InList = (InStr(1, cstrReservedToplevel, "|" & strToken & "|") > 0) + End Select +End Function + + +'--------------------------------------------------------------------------------------- +' Procedure : IsTokenType +' Author : Adam Waller +' Date : 8/15/2023 +' Purpose : Returns true if the token type matches any of the specified types. +'--------------------------------------------------------------------------------------- +' +Private Function IsTokenType(intToken As eTokenTypes, ParamArray intMatchTypes() As Variant) As Boolean + Dim intMatch As Integer + For intMatch = 0 To UBound(intMatchTypes) + If intToken = intMatchTypes(intMatch) Then + IsTokenType = True + Exit For + End If + Next intMatch +End Function + + +'--------------------------------------------------------------------------------------- +' Procedure : IncreaseIndent +' Author : Adam Waller +' Date : 8/15/2023 +' Purpose : Increase the indent level +'--------------------------------------------------------------------------------------- +' +Private Sub IncreaseIndent(ByRef colIndents As Collection, ByRef lngIndentLevel As Long, strIndentType As String) + If colIndents Is Nothing Then Exit Sub + If colIndents.Count = 0 Then + colIndents.Add strIndentType + Else + colIndents.Add strIndentType, , 1 + End If + lngIndentLevel = lngIndentLevel + 1 +End Sub + + +'--------------------------------------------------------------------------------------- +' Procedure : DecreaseIndent +' Author : Adam Waller +' Date : 8/15/2023 +' Purpose : Remove the first element of the indents collection, shifting the remaining +' : elements down. (Similar to the PHP array_shift function) +' : Also reduce the indent level by one. +'--------------------------------------------------------------------------------------- +' +Private Sub DecreaseIndent(ByRef colIndents As Collection, ByRef lngIndentLevel As Long) + If colIndents Is Nothing Then Exit Sub + If colIndents.Count = 0 Then Exit Sub + colIndents.Remove 1 + lngIndentLevel = lngIndentLevel - 1 +End Sub + + +'--------------------------------------------------------------------------------------- +' Procedure : PurgeExtraWhitespace +' Author : Adam Waller +' Date : 8/15/2023 +' Purpose : Replace any extra whitespace within the string with a single space. +'--------------------------------------------------------------------------------------- +' +Private Sub PurgeExtraWhitespace(ByRef strValue As String) + + Dim strResult As String + + ' Check for any space, tab, or vbcrlf + If (InStr(1, strValue, " ") > 0) _ + Or (InStr(1, strValue, vbTab) > 0) _ + Or (InStr(1, strValue, vbCrLf) > 0) Then + + With New VBScript_RegExp_55.RegExp + .Multiline = True + '.Global = True + .Pattern = "\s+" + strResult = .Replace(strValue, " ") + If Len(strResult) Then + If strResult <> strValue Then strValue = strResult + End If + End With + End If + +End Sub + + +'--------------------------------------------------------------------------------------- +' Procedure : GetPreviousToken +' Author : Adam Waller +' Date : 8/15/2023 +' Purpose : Return the previous token ID, optionally skipping any excluded type. +'--------------------------------------------------------------------------------------- +' +Private Function GetPreviousTokenID(lngCurrentToken As Long, Optional intExceptType As eTokenTypes) As Long + Dim intToken As Integer + For intToken = lngCurrentToken - 1 To 1 Step -1 + If intToken = 0 Then + ' Could not find a matching token, or no previous token + GetPreviousTokenID = 0 + End If + If m_colTokens(intToken)(0) <> intExceptType Then + GetPreviousTokenID = intToken + Exit For + End If + Next intToken +End Function + + +'--------------------------------------------------------------------------------------- +' Procedure : GetNextTokenID +' Author : Adam Waller +' Date : 8/15/2023 +' Purpose : Return the next token ID, optionally skipping any excluded type. +'--------------------------------------------------------------------------------------- +' +Private Function GetNextTokenID(lngCurrentToken As Long, Optional intExceptType As eTokenTypes) As Long + Dim intToken As Integer + For intToken = lngCurrentToken + 1 To m_colTokens.Count + If m_colTokens(intToken)(0) <> intExceptType Then + GetNextTokenID = intToken + Exit For + End If + Next intToken +End Function + + +'--------------------------------------------------------------------------------------- +' Procedure : RegExBoundaries +' Author : Adam Waller +' Date : 8/12/2023 +' Purpose : Return a string of properly escaped regex boundary characters. +'--------------------------------------------------------------------------------------- +' +Private Function RegExBoundaries() As String + + Dim lngPos As Long + Dim strChar As String + + Static strBoundaries As String + + ' Check for cached list + If strBoundaries = vbNullString Then + + ' Build out + With New clsConcat + .Add "(" + .AppendOnAdd = "|" + For lngPos = 1 To Len(cstrBoundaries) + strChar = Mid(cstrBoundaries, lngPos, 1) + If InList(strChar, eltRegExSpecial) Then + ' Escape this character + .Add "\", strChar + Else + .Add strChar + End If + Next lngPos + ' Trim final delimiter and cache result + .Remove 1 + strBoundaries = .GetStr & ")" + End With + End If + + ' Return list of boundaries + RegExBoundaries = strBoundaries + +End Function