Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

More dialect checking, fixes, inheritance cleanup #2942

Merged
merged 19 commits into from
Mar 30, 2022
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 27 additions & 1 deletion src/sqlfluff/core/dialects/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,33 @@ def replace(self, **kwargs: DialectElementType):
for n in kwargs:
if n not in self._library: # pragma: no cover
raise ValueError(f"{n!r} is not already registered in {self!r}")
self._library[n] = kwargs[n]

# To replace a segment, the replacement must either be a
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This code was in the segment() decorator I removed in the previous PR. That code is still useful, so I moved it to the still-used replace() function.

# subclass of the original, *or* it must have the same
# public methods and/or fields as it.
cls = kwargs[n]
if (
isinstance(self._library[n], type)
and isinstance(cls, type)
and not issubclass(cls, self._library[n])
):
if self._library[n].type != cls.type:
raise ValueError( # pragma: no cover
f"Cannot replace {n!r} because 'type' property does not "
f"match: {cls.type} != {self._library[n].type}"
)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These lines are actually new. I had wondered if the type property was always consistent when replacing a parent segment, and I found several cases where it wasn't. This was presumably a mistake/accident. Several, but not all, of these mismatches were in the Exasol dialect.

Note that when using segment inheritance (now the recommended usual practice when replacing segments), the type property can be inherited and thus mismatches avoided.

base_dir = set(dir(self._library[n]))
cls_dir = set(dir(cls))
missing = set(
n for n in base_dir.difference(cls_dir) if not n.startswith("_")
)
if missing:
raise ValueError( # pragma: no cover
f"Cannot replace {n!r} because it's not a subclass and "
f"is missing these from base: {', '.join(missing)}"
)

self._library[n] = cls

def add_update_segments(self, module_dct):
"""Scans module dictionary, adding or replacing segment definitions."""
Expand Down
60 changes: 18 additions & 42 deletions src/sqlfluff/dialects/dialect_bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,14 +321,9 @@ class SelectStatementSegment(ansi.SelectStatementSegment):
)


class UnorderedSelectStatementSegment(BaseSegment):
class UnorderedSelectStatementSegment(ansi.UnorderedSelectStatementSegment):
"""Enhance unordered `SELECT` statement to include QUALIFY."""

type = "select_statement"
match_grammar = ansi_dialect.get_segment(
"UnorderedSelectStatementSegment"
).match_grammar.copy()
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

With inheritance, no need to copy if not modifying.


parse_grammar = ansi.UnorderedSelectStatementSegment.parse_grammar.copy(
insert=[Ref("QualifyClauseSegment", optional=True)],
before=Ref("OverlapsClauseSegment", optional=True),
Expand All @@ -343,22 +338,20 @@ class StatementSegment(ansi.StatementSegment):
)


class SelectClauseModifierSegment(BaseSegment):
class SelectClauseModifierSegment(ansi.SelectClauseModifierSegment):
"""Things that come after SELECT but before the columns."""

type = "select_clause_modifier"
match_grammar = Sequence(
match_grammar = Sequence( # type: ignore
# https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax
Sequence("AS", OneOf("STRUCT", "VALUE"), optional=True),
OneOf("DISTINCT", "ALL", optional=True),
)


# BigQuery allows functions in INTERVAL
class IntervalExpressionSegment(BaseSegment):
class IntervalExpressionSegment(ansi.IntervalExpressionSegment):
"""An interval with a function as value segment."""

type = "interval_expression"
match_grammar = Sequence(
"INTERVAL",
Ref("ExpressionSegment"),
Expand Down Expand Up @@ -419,7 +412,7 @@ class IntervalExpressionSegment(BaseSegment):
)


class FunctionSegment(BaseSegment):
class FunctionSegment(ansi.FunctionSegment):
"""A scalar or aggregate function.

Maybe in the future we should distinguish between
Expand All @@ -428,7 +421,6 @@ class FunctionSegment(BaseSegment):
for our purposes.
"""

type = "function"
match_grammar = OneOf(
Sequence(
# Treat functions which take date parts separately
Expand Down Expand Up @@ -492,10 +484,9 @@ class FunctionSegment(BaseSegment):
)


class FunctionDefinitionGrammar(BaseSegment):
class FunctionDefinitionGrammar(ansi.FunctionDefinitionGrammar):
"""This is the body of a `CREATE FUNCTION AS` statement."""

type = "function_definition"
match_grammar = Sequence(
AnyNumberOf(
Sequence(
Expand Down Expand Up @@ -536,13 +527,10 @@ class FunctionDefinitionGrammar(BaseSegment):
)


class WildcardExpressionSegment(BaseSegment):
class WildcardExpressionSegment(ansi.WildcardExpressionSegment):
"""An extension of the star expression for Bigquery."""

type = "wildcard_expression"
match_grammar = ansi_dialect.get_segment(
"WildcardExpressionSegment"
).match_grammar.copy(
match_grammar = ansi.WildcardExpressionSegment.match_grammar.copy(
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This PR eliminates dialects using get_segment() in favor of direct attribute access (cleaner).

insert=[
# Optional EXCEPT or REPLACE clause
# https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#select_replace
Expand Down Expand Up @@ -581,14 +569,13 @@ class ReplaceClauseSegment(BaseSegment):
)


class DatatypeSegment(BaseSegment):
class DatatypeSegment(ansi.DatatypeSegment):
"""A data type segment.

In particular here, this enabled the support for
the STRUCT datatypes.
"""

type = "data_type"
match_grammar = OneOf( # Parameter type
Ref("DatatypeIdentifierSegment"), # Simple type
Sequence("ANY", "TYPE"), # SQL UDFs can specify this "type"
Expand Down Expand Up @@ -618,12 +605,11 @@ class DatatypeSegment(BaseSegment):
)


class FunctionParameterListGrammar(BaseSegment):
class FunctionParameterListGrammar(ansi.FunctionParameterListGrammar):
"""The parameters for a function ie. `(string, number)`."""

# Function parameter list. Note that the only difference from the ANSI
# grammar is that BigQuery provides overrides bracket_pairs_set.
type = "function_parameter_list"
match_grammar = Bracketed(
Delimited(
Ref("FunctionParameterGrammar"),
Expand All @@ -634,14 +620,13 @@ class FunctionParameterListGrammar(BaseSegment):
)


class TypelessStructSegment(BaseSegment):
class TypelessStructSegment(ansi.TypelessStructSegment):
"""Expression to construct a STRUCT with implicit types.

https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#typeless_struct_syntax
"""

type = "typeless_struct"
match_grammar = Sequence(
match_grammar = Sequence( # type: ignore
"STRUCT",
Bracketed(
Delimited(
Expand Down Expand Up @@ -850,13 +835,10 @@ def iter_raw_references(self):
yield self.ObjectReferencePart("".join(parts), segments)


class TableExpressionSegment(BaseSegment):
class TableExpressionSegment(ansi.TableExpressionSegment):
"""Main table expression e.g. within a FROM clause, with hyphen support."""

type = "table_expression"
match_grammar = ansi_dialect.get_segment(
"TableExpressionSegment"
).match_grammar.copy(
match_grammar = ansi.TableExpressionSegment.match_grammar.copy(
insert=[
Ref("HyphenatedObjectReferenceSegment"),
]
Expand Down Expand Up @@ -973,10 +955,9 @@ class OptionsSegment(BaseSegment):
)


class CreateTableStatementSegment(BaseSegment):
class CreateTableStatementSegment(ansi.CreateTableStatementSegment):
"""A `CREATE TABLE` statement."""

type = "create_table_statement"
# https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_table_statement
match_grammar = Sequence(
"CREATE",
Expand Down Expand Up @@ -1010,14 +991,12 @@ class CreateTableStatementSegment(BaseSegment):
)


class CreateViewStatementSegment(BaseSegment):
class CreateViewStatementSegment(ansi.CreateViewStatementSegment):
"""A `CREATE VIEW` statement.

https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#view_option_list
"""

type = "create_view_statement"

match_grammar = Sequence(
"CREATE",
Ref("OrReplaceGrammar", optional=True),
Expand Down Expand Up @@ -1142,14 +1121,12 @@ class FromUnpivotExpressionSegment(BaseSegment):
)


class InsertStatementSegment(BaseSegment):
class InsertStatementSegment(ansi.InsertStatementSegment):
"""A `INSERT` statement.

N.B. not a complete implementation.
"""

type = "insert_statement"
match_grammar = StartsWith("INSERT")
parse_grammar = Sequence(
"INSERT",
Ref.keyword("INTO", optional=True),
Expand All @@ -1159,13 +1136,12 @@ class InsertStatementSegment(BaseSegment):
)


class SamplingExpressionSegment(BaseSegment):
class SamplingExpressionSegment(ansi.SamplingExpressionSegment):
"""A sampling expression.

https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#tablesample_operator
"""

type = "sample_expression"
match_grammar = Sequence(
"TABLESAMPLE", "SYSTEM", Bracketed(Ref("NumericLiteralSegment"), "PERCENT")
)
7 changes: 3 additions & 4 deletions src/sqlfluff/dialects/dialect_exasol.py
Original file line number Diff line number Diff line change
Expand Up @@ -1181,10 +1181,9 @@ class ColumnDefinitionSegment(BaseSegment):
)


class ColumnConstraintSegment(BaseSegment):
class ColumnConstraintSegment(ansi.ColumnConstraintSegment):
"""A column option; each CREATE TABLE column can have 0 or more."""

type = "column_option"
match_grammar = Sequence(
OneOf(
Sequence(
Expand Down Expand Up @@ -3047,10 +3046,10 @@ class PreferringPlusPriorTermSegment(BaseSegment):
)


class MLTableExpressionSegment(BaseSegment):
class MLTableExpressionSegment(ansi.MLTableExpressionSegment):
"""Not supported."""

match_grammar = Nothing()
match_grammar = Nothing() # type: ignore


############################
Expand Down
12 changes: 4 additions & 8 deletions src/sqlfluff/dialects/dialect_mysql.py
Original file line number Diff line number Diff line change
Expand Up @@ -1134,17 +1134,14 @@ class SelectClauseSegment(ansi.SelectClauseSegment):
)


class SelectStatementSegment(BaseSegment):
class SelectStatementSegment(ansi.SelectStatementSegment):
"""A `SELECT` statement.

https://dev.mysql.com/doc/refman/5.7/en/select.html
"""

type = "select_statement"
match_grammar = ansi_dialect.get_segment(
"SelectStatementSegment"
).match_grammar.copy()
match_grammar.terminator = match_grammar.terminator.copy( # type: ignore
match_grammar = ansi.SelectStatementSegment.match_grammar.copy()
match_grammar.terminator = match_grammar.terminator.copy(
insert=[Ref("UpsertClauseListSegment")]
)

Expand Down Expand Up @@ -1530,13 +1527,12 @@ class CursorFetchSegment(BaseSegment):
)


class DropIndexStatementSegment(BaseSegment):
class DropIndexStatementSegment(ansi.DropIndexStatementSegment):
"""A `DROP INDEX` statement.

https://dev.mysql.com/doc/refman/8.0/en/drop-index.html
"""

type = "drop_statement"
# DROP INDEX <Index name> ON <table_name>
# [ALGORITHM [=] {DEFAULT | INPLACE | COPY} | LOCK [=] {DEFAULT | NONE | SHARED |
# EXCLUSIVE}]
Expand Down
4 changes: 1 addition & 3 deletions src/sqlfluff/dialects/dialect_oracle.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,7 @@ class IndexTypeReferenceSegment(BaseSegment):

type = "indextype_reference"

match_grammar = ansi_dialect.get_segment(
"ObjectReferenceSegment"
).match_grammar.copy()
match_grammar = ansi.ObjectReferenceSegment.match_grammar.copy()


# Adding Oracle specific statements.
Expand Down
Loading