sqlfluff · barrywhart · Mar 30, 2022 · Mar 29, 2022 · Mar 29, 2022 · Mar 29, 2022
diff --git a/src/sqlfluff/core/dialects/base.py b/src/sqlfluff/core/dialects/base.py
@@ -152,7 +152,33 @@ def replace(self, **kwargs: DialectElementType):
         for n in kwargs:
             if n not in self._library:  # pragma: no cover
                 raise ValueError(f"{n!r} is not already registered in {self!r}")
-            self._library[n] = kwargs[n]
+
+            # To replace a segment, the replacement must either be a
+            # subclass of the original, *or* it must have the same
+            # public methods and/or fields as it.
+            cls = kwargs[n]
+            if (
+                isinstance(self._library[n], type)
+                and isinstance(cls, type)
+                and not issubclass(cls, self._library[n])
+            ):
+                if self._library[n].type != cls.type:
+                    raise ValueError(  # pragma: no cover
+                        f"Cannot replace {n!r} because 'type' property does not "
+                        f"match: {cls.type} != {self._library[n].type}"
+                    )
+                base_dir = set(dir(self._library[n]))
+                cls_dir = set(dir(cls))
+                missing = set(
+                    n for n in base_dir.difference(cls_dir) if not n.startswith("_")
+                )
+                if missing:
+                    raise ValueError(  # pragma: no cover
+                        f"Cannot replace {n!r} because it's not a subclass and "
+                        f"is missing these from base: {', '.join(missing)}"
+                    )
+
+            self._library[n] = cls
 
     def add_update_segments(self, module_dct):
         """Scans module dictionary, adding or replacing segment definitions."""

diff --git a/src/sqlfluff/dialects/dialect_bigquery.py b/src/sqlfluff/dialects/dialect_bigquery.py
@@ -321,14 +321,9 @@ class SelectStatementSegment(ansi.SelectStatementSegment):
     )
 
 
-class UnorderedSelectStatementSegment(BaseSegment):
+class UnorderedSelectStatementSegment(ansi.UnorderedSelectStatementSegment):
     """Enhance unordered `SELECT` statement to include QUALIFY."""
 
-    type = "select_statement"
-    match_grammar = ansi_dialect.get_segment(
-        "UnorderedSelectStatementSegment"
-    ).match_grammar.copy()
-
     parse_grammar = ansi.UnorderedSelectStatementSegment.parse_grammar.copy(
         insert=[Ref("QualifyClauseSegment", optional=True)],
         before=Ref("OverlapsClauseSegment", optional=True),
@@ -343,22 +338,20 @@ class StatementSegment(ansi.StatementSegment):
     )
 
 
-class SelectClauseModifierSegment(BaseSegment):
+class SelectClauseModifierSegment(ansi.SelectClauseModifierSegment):
     """Things that come after SELECT but before the columns."""
 
-    type = "select_clause_modifier"
-    match_grammar = Sequence(
+    match_grammar = Sequence(  # type: ignore
         # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax
         Sequence("AS", OneOf("STRUCT", "VALUE"), optional=True),
         OneOf("DISTINCT", "ALL", optional=True),
     )
 
 
 # BigQuery allows functions in INTERVAL
-class IntervalExpressionSegment(BaseSegment):
+class IntervalExpressionSegment(ansi.IntervalExpressionSegment):
     """An interval with a function as value segment."""
 
-    type = "interval_expression"
     match_grammar = Sequence(
         "INTERVAL",
         Ref("ExpressionSegment"),
@@ -419,7 +412,7 @@ class IntervalExpressionSegment(BaseSegment):
 )
 
 
-class FunctionSegment(BaseSegment):
+class FunctionSegment(ansi.FunctionSegment):
     """A scalar or aggregate function.
 
     Maybe in the future we should distinguish between
@@ -428,7 +421,6 @@ class FunctionSegment(BaseSegment):
     for our purposes.
     """
 
-    type = "function"
     match_grammar = OneOf(
         Sequence(
             # Treat functions which take date parts separately
@@ -492,10 +484,9 @@ class FunctionSegment(BaseSegment):
     )
 
 
-class FunctionDefinitionGrammar(BaseSegment):
+class FunctionDefinitionGrammar(ansi.FunctionDefinitionGrammar):
     """This is the body of a `CREATE FUNCTION AS` statement."""
 
-    type = "function_definition"
     match_grammar = Sequence(
         AnyNumberOf(
             Sequence(
@@ -536,13 +527,10 @@ class FunctionDefinitionGrammar(BaseSegment):
     )
 
 
-class WildcardExpressionSegment(BaseSegment):
+class WildcardExpressionSegment(ansi.WildcardExpressionSegment):
     """An extension of the star expression for Bigquery."""
 
-    type = "wildcard_expression"
-    match_grammar = ansi_dialect.get_segment(
-        "WildcardExpressionSegment"
-    ).match_grammar.copy(
+    match_grammar = ansi.WildcardExpressionSegment.match_grammar.copy(
         insert=[
             # Optional EXCEPT or REPLACE clause
             # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#select_replace
@@ -581,14 +569,13 @@ class ReplaceClauseSegment(BaseSegment):
     )
 
 
-class DatatypeSegment(BaseSegment):
+class DatatypeSegment(ansi.DatatypeSegment):
     """A data type segment.
 
     In particular here, this enabled the support for
     the STRUCT datatypes.
     """
 
-    type = "data_type"
     match_grammar = OneOf(  # Parameter type
         Ref("DatatypeIdentifierSegment"),  # Simple type
         Sequence("ANY", "TYPE"),  # SQL UDFs can specify this "type"
@@ -618,12 +605,11 @@ class DatatypeSegment(BaseSegment):
     )
 
 
-class FunctionParameterListGrammar(BaseSegment):
+class FunctionParameterListGrammar(ansi.FunctionParameterListGrammar):
     """The parameters for a function ie. `(string, number)`."""
 
     # Function parameter list. Note that the only difference from the ANSI
     # grammar is that BigQuery provides overrides bracket_pairs_set.
-    type = "function_parameter_list"
     match_grammar = Bracketed(
         Delimited(
             Ref("FunctionParameterGrammar"),
@@ -634,14 +620,13 @@ class FunctionParameterListGrammar(BaseSegment):
     )
 
 
-class TypelessStructSegment(BaseSegment):
+class TypelessStructSegment(ansi.TypelessStructSegment):
     """Expression to construct a STRUCT with implicit types.
 
     https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#typeless_struct_syntax
     """
 
-    type = "typeless_struct"
-    match_grammar = Sequence(
+    match_grammar = Sequence(  # type: ignore
         "STRUCT",
         Bracketed(
             Delimited(
@@ -850,13 +835,10 @@ def iter_raw_references(self):
                 yield self.ObjectReferencePart("".join(parts), segments)
 
 
-class TableExpressionSegment(BaseSegment):
+class TableExpressionSegment(ansi.TableExpressionSegment):
     """Main table expression e.g. within a FROM clause, with hyphen support."""
 
-    type = "table_expression"
-    match_grammar = ansi_dialect.get_segment(
-        "TableExpressionSegment"
-    ).match_grammar.copy(
+    match_grammar = ansi.TableExpressionSegment.match_grammar.copy(
         insert=[
             Ref("HyphenatedObjectReferenceSegment"),
         ]
@@ -973,10 +955,9 @@ class OptionsSegment(BaseSegment):
     )
 
 
-class CreateTableStatementSegment(BaseSegment):
+class CreateTableStatementSegment(ansi.CreateTableStatementSegment):
     """A `CREATE TABLE` statement."""
 
-    type = "create_table_statement"
     # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_table_statement
     match_grammar = Sequence(
         "CREATE",
@@ -1010,14 +991,12 @@ class CreateTableStatementSegment(BaseSegment):
     )
 
 
-class CreateViewStatementSegment(BaseSegment):
+class CreateViewStatementSegment(ansi.CreateViewStatementSegment):
     """A `CREATE VIEW` statement.
 
     https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#view_option_list
     """
 
-    type = "create_view_statement"
-
     match_grammar = Sequence(
         "CREATE",
         Ref("OrReplaceGrammar", optional=True),
@@ -1142,14 +1121,12 @@ class FromUnpivotExpressionSegment(BaseSegment):
     )
 
 
-class InsertStatementSegment(BaseSegment):
+class InsertStatementSegment(ansi.InsertStatementSegment):
     """A `INSERT` statement.
 
     N.B. not a complete implementation.
     """
 
-    type = "insert_statement"
-    match_grammar = StartsWith("INSERT")
     parse_grammar = Sequence(
         "INSERT",
         Ref.keyword("INTO", optional=True),
@@ -1159,13 +1136,12 @@ class InsertStatementSegment(BaseSegment):
     )
 
 
-class SamplingExpressionSegment(BaseSegment):
+class SamplingExpressionSegment(ansi.SamplingExpressionSegment):
     """A sampling expression.
 
     https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#tablesample_operator
     """
 
-    type = "sample_expression"
     match_grammar = Sequence(
         "TABLESAMPLE", "SYSTEM", Bracketed(Ref("NumericLiteralSegment"), "PERCENT")
     )
diff --git a/src/sqlfluff/dialects/dialect_exasol.py b/src/sqlfluff/dialects/dialect_exasol.py
@@ -1181,10 +1181,9 @@ class ColumnDefinitionSegment(BaseSegment):
     )
 
 
-class ColumnConstraintSegment(BaseSegment):
+class ColumnConstraintSegment(ansi.ColumnConstraintSegment):
     """A column option; each CREATE TABLE column can have 0 or more."""
 
-    type = "column_option"
     match_grammar = Sequence(
         OneOf(
             Sequence(
@@ -3047,10 +3046,10 @@ class PreferringPlusPriorTermSegment(BaseSegment):
     )
 
 
-class MLTableExpressionSegment(BaseSegment):
+class MLTableExpressionSegment(ansi.MLTableExpressionSegment):
     """Not supported."""
 
-    match_grammar = Nothing()
+    match_grammar = Nothing()  # type: ignore
 
 
 ############################

diff --git a/src/sqlfluff/dialects/dialect_mysql.py b/src/sqlfluff/dialects/dialect_mysql.py
@@ -1134,17 +1134,14 @@ class SelectClauseSegment(ansi.SelectClauseSegment):
     )
 
 
-class SelectStatementSegment(BaseSegment):
+class SelectStatementSegment(ansi.SelectStatementSegment):
     """A `SELECT` statement.
 
     https://dev.mysql.com/doc/refman/5.7/en/select.html
     """
 
-    type = "select_statement"
-    match_grammar = ansi_dialect.get_segment(
-        "SelectStatementSegment"
-    ).match_grammar.copy()
-    match_grammar.terminator = match_grammar.terminator.copy(  # type: ignore
+    match_grammar = ansi.SelectStatementSegment.match_grammar.copy()
+    match_grammar.terminator = match_grammar.terminator.copy(
         insert=[Ref("UpsertClauseListSegment")]
     )
 
@@ -1530,13 +1527,12 @@ class CursorFetchSegment(BaseSegment):
     )
 
 
-class DropIndexStatementSegment(BaseSegment):
+class DropIndexStatementSegment(ansi.DropIndexStatementSegment):
     """A `DROP INDEX` statement.
 
     https://dev.mysql.com/doc/refman/8.0/en/drop-index.html
     """
 
-    type = "drop_statement"
     # DROP INDEX <Index name> ON <table_name>
     # [ALGORITHM [=] {DEFAULT | INPLACE | COPY} | LOCK [=] {DEFAULT | NONE | SHARED |
     # EXCLUSIVE}]

diff --git a/src/sqlfluff/dialects/dialect_oracle.py b/src/sqlfluff/dialects/dialect_oracle.py
@@ -78,9 +78,7 @@ class IndexTypeReferenceSegment(BaseSegment):
 
     type = "indextype_reference"
 
-    match_grammar = ansi_dialect.get_segment(
-        "ObjectReferenceSegment"
-    ).match_grammar.copy()
+    match_grammar = ansi.ObjectReferenceSegment.match_grammar.copy()
 
 
 # Adding Oracle specific statements.