-
-
Notifications
You must be signed in to change notification settings - Fork 755
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Update individual rules to take advantage of core rule processing changes #3041
Changes from all commits
283649e
3aa5abd
ea80c56
d5fe681
1488c5b
de85ca7
d138701
abcca8a
78e42e8
7e37250
b426845
4056496
72e3bcd
a723852
5bc2b3d
12141a9
2b11c08
3d11b6c
d3f342e
6111998
b9f9e74
765408e
2408082
bf7b98f
c70059d
8fc64c7
2b2cc1f
d636e10
168022c
7c32cd4
56102c8
dc37633
cedbbe1
52eeebc
9b7deb9
c4f5f8b
23780ad
d93f4f2
571b209
93efb1e
5f11322
a220053
579ac9b
bfef962
66a0507
e0d3af6
a51ff1b
e28b033
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -379,6 +379,23 @@ def siblings_post(self) -> Tuple[BaseSegment, ...]: | |
else: | ||
return tuple() | ||
|
||
@cached_property | ||
def final_segment(self) -> BaseSegment: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. New property |
||
"""Returns rightmost & lowest descendant. | ||
|
||
Similar in spirit to BaseRule.is_final_segment(), but: | ||
- Much faster | ||
- Does not allow filtering out meta segments | ||
""" | ||
last_segment: BaseSegment = ( | ||
self.parent_stack[0] if self.parent_stack else self.segment | ||
) | ||
while True: | ||
try: | ||
last_segment = last_segment.segments[-1] | ||
except IndexError: | ||
return last_segment | ||
|
||
@property | ||
def functional(self): | ||
"""Returns a Surrogates object that simplifies writing rules.""" | ||
|
@@ -503,12 +520,28 @@ class BaseRule: | |
# Lint loop / crawl behavior. When appropriate, rules can (and should) | ||
# override these values to make linting faster. | ||
recurse_into = True | ||
needs_raw_stack = True | ||
# "needs_raw_stack" defaults to False because rules run faster that way, and | ||
# most rules don't need it. Rules that use it are usually those that look | ||
# at the surroundings of a segment, e.g. "is there whitespace preceding this | ||
# segment?" In the long run, it would be good to review rules that use | ||
# raw_stack to try and eliminate its use. These rules will often be good | ||
# candidates for one of the following: | ||
# - Rewriting to use "RuleContext.raw_segment_pre", which is similar to | ||
# "raw_stack", but it's only the ONE raw segment prior to the current | ||
# one. | ||
# - Rewriting to use "BaseRule.recurse_into = False" and traversing the | ||
# parse tree directly. | ||
# - Using "RuleContext.memory" to implement custom, lighter weight tracking | ||
# of just the MINIMUM required state across calls to _eval(). Reason: | ||
# "raw_stack" becomes very large for large files (thousands or more | ||
# segments!). In practice, most rules only need to look at a few adjacent | ||
# segments, e.g. others on the same line or in the same statement. | ||
needs_raw_stack = False | ||
# Rules can override this to specify "post". "Post" rules are those that are | ||
# not expected to trigger any downstream rules, e.g. capitalization fixes. | ||
# They run on two occasions: | ||
# - On the first loop of the main phase | ||
# - In a second linter loop after the main rules run | ||
# - On the first pass of the main phase | ||
# - In a second linter pass after the main phase | ||
lint_phase = "main" | ||
|
||
def __init__(self, code, description, **kwargs): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -193,6 +193,7 @@ class Rule_L003(BaseRule): | |
|
||
targets_templated = True | ||
_works_on_unparsable = False | ||
needs_raw_stack = True | ||
_adjust_anchors = True | ||
_ignore_types: List[str] = ["script_content"] | ||
config_keywords = ["tab_space_size", "indent_unit"] | ||
|
@@ -413,7 +414,7 @@ def _eval(self, context: RuleContext) -> Optional[LintResult]: | |
# First non-whitespace element is our trigger | ||
memory.trigger = segment | ||
|
||
is_last = self.is_final_segment(context) | ||
is_last = context.segment is context.final_segment | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Legend There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How can we avoid rule writers using the wrong call in future? These seem very similarly named and therefore not crazy to think they are interchangeable. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe rename the function? The property ( |
||
if not segment.is_type("newline") and not is_last: | ||
# Process on line ends or file end | ||
return LintResult(memory=memory) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -77,8 +77,8 @@ def _eval(self, context: RuleContext) -> LintResult: | |
) | ||
# Only attempt a fix at the start of a newline for now | ||
and ( | ||
len(context.raw_stack) == 0 | ||
or context.raw_stack[-1].is_type("newline") | ||
context.raw_segment_pre is None | ||
or context.raw_segment_pre.is_type("newline") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Switching from |
||
) | ||
): | ||
fixes = [ | ||
|
@@ -90,7 +90,8 @@ def _eval(self, context: RuleContext) -> LintResult: | |
) | ||
] | ||
elif not ( | ||
len(context.raw_stack) == 0 or context.raw_stack[-1].is_type("newline") | ||
context.raw_segment_pre is None | ||
or context.raw_segment_pre.is_type("newline") | ||
): | ||
# give a helpful message if the wrong indent has been found and is not | ||
# at the start of a newline | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,7 @@ | ||
"""Implementation of Rule L005.""" | ||
from typing import Optional | ||
|
||
from sqlfluff.core.parser import RawSegment | ||
from sqlfluff.core.rules.base import BaseRule, LintResult, LintFix, RuleContext | ||
from sqlfluff.core.rules.doc_decorators import document_fix_compatible | ||
|
||
|
@@ -38,19 +39,15 @@ class Rule_L005(BaseRule): | |
""" | ||
|
||
def _eval(self, context: RuleContext) -> Optional[LintResult]: | ||
"""Commas should not have whitespace directly before them. | ||
|
||
We need at least one segment behind us for this to work. | ||
|
||
""" | ||
if len(context.raw_stack) >= 1: | ||
cm1 = context.raw_stack[-1] | ||
if ( | ||
context.segment.is_type("comma") | ||
and cm1.is_type("whitespace") | ||
and cm1.pos_marker.line_pos > 1 | ||
): | ||
anchor = cm1 | ||
return LintResult(anchor=anchor, fixes=[LintFix.delete(cm1)]) | ||
# Otherwise fine | ||
"""Commas should not have whitespace directly before them.""" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The primary change to this file is switching from |
||
anchor: Optional[RawSegment] = context.raw_segment_pre | ||
if ( | ||
# We need at least one segment previous segment for this to work. | ||
anchor is not None | ||
and context.segment.is_type("comma") | ||
and anchor.is_type("whitespace") | ||
and anchor.pos_marker.line_pos > 1 | ||
): | ||
return LintResult(anchor=anchor, fixes=[LintFix.delete(anchor)]) | ||
# Otherwise fine. | ||
return None |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,13 +1,35 @@ | ||
"""Implementation of Rule L009.""" | ||
from typing import Optional | ||
|
||
from sqlfluff.core.parser import NewlineSegment | ||
from typing import List, Optional, Tuple | ||
|
||
from sqlfluff.core.parser import BaseSegment, NewlineSegment | ||
from sqlfluff.core.rules.base import BaseRule, LintResult, LintFix, RuleContext | ||
from sqlfluff.core.rules.doc_decorators import document_fix_compatible | ||
from sqlfluff.core.rules.functional import Segments, sp, tsp | ||
|
||
|
||
def get_trailing_newlines(segment: BaseSegment) -> List[BaseSegment]: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Reworked this rule so it is only called once per parse tree, with the root segment. |
||
"""Returns list of trailing newlines in the tree.""" | ||
result = [] | ||
for seg in segment.recursive_crawl_all(reverse=True): | ||
if seg.is_type("newline"): | ||
result.append(seg) | ||
if not seg.is_whitespace and not seg.is_type("dedent"): | ||
break | ||
return result | ||
|
||
|
||
def get_last_segment(segment: Segments) -> Tuple[List[BaseSegment], Segments]: | ||
"""Returns rightmost & lowest descendant and its "parent stack".""" | ||
parent_stack: List[BaseSegment] = [] | ||
while True: | ||
children = segment.children() | ||
if children: | ||
parent_stack.append(segment[0]) | ||
segment = children.last() | ||
else: | ||
return parent_stack, segment | ||
Comment on lines
+21
to
+30
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why doesn't it use the new There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Because it also needs the parent stack, which is not provided by |
||
|
||
|
||
@document_fix_compatible | ||
class Rule_L009(BaseRule): | ||
"""Files must end with a single trailing newline. | ||
|
@@ -82,6 +104,9 @@ class Rule_L009(BaseRule): | |
""" | ||
|
||
targets_templated = True | ||
# TRICKY: Tells linter to only call _eval() ONCE, with the root segment | ||
recurse_into = False | ||
lint_phase = "post" | ||
|
||
def _eval(self, context: RuleContext) -> Optional[LintResult]: | ||
"""Files must end with a single trailing newline. | ||
|
@@ -91,21 +116,9 @@ def _eval(self, context: RuleContext) -> Optional[LintResult]: | |
|
||
""" | ||
# We only care about the final segment of the parse tree. | ||
if not self.is_final_segment(context, filter_meta=False): | ||
return None | ||
parent_stack, segment = get_last_segment(context.functional.segment) | ||
|
||
# Include current segment for complete stack and reverse. | ||
parent_stack: Segments = context.functional.parent_stack | ||
complete_stack: Segments = ( | ||
context.functional.raw_stack + context.functional.segment | ||
) | ||
reversed_complete_stack = complete_stack.reversed() | ||
|
||
# Find the trailing newline segments. | ||
trailing_newlines = reversed_complete_stack.select( | ||
select_if=sp.is_type("newline"), | ||
loop_while=sp.or_(sp.is_whitespace(), sp.is_type("dedent")), | ||
) | ||
trailing_newlines = Segments(*get_trailing_newlines(context.segment)) | ||
trailing_literal_newlines = trailing_newlines | ||
if context.templated_file: | ||
trailing_literal_newlines = trailing_newlines.select( | ||
|
@@ -116,12 +129,12 @@ def _eval(self, context: RuleContext) -> Optional[LintResult]: | |
if not trailing_literal_newlines: | ||
# We make an edit to create this segment after the child of the FileSegment. | ||
if len(parent_stack) == 1: | ||
fix_anchor_segment = context.segment | ||
fix_anchor_segment = segment[0] | ||
else: | ||
fix_anchor_segment = parent_stack[1] | ||
|
||
return LintResult( | ||
anchor=context.segment, | ||
anchor=segment[0], | ||
fixes=[ | ||
LintFix.create_after( | ||
fix_anchor_segment, | ||
|
@@ -132,7 +145,7 @@ def _eval(self, context: RuleContext) -> Optional[LintResult]: | |
elif len(trailing_literal_newlines) > 1: | ||
# Delete extra newlines. | ||
return LintResult( | ||
anchor=context.segment, | ||
anchor=segment[0], | ||
fixes=[LintFix.delete(d) for d in trailing_literal_newlines[1:]], | ||
) | ||
else: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -52,9 +52,6 @@ def _eval(self, context: RuleContext) -> Optional[LintResult]: | |
|
||
We look for the alias segment, and then evaluate its parent and whether | ||
it contains an AS keyword. This is the _eval function for both L011 and L012. | ||
|
||
The use of `raw_stack` is just for working out how much whitespace to add. | ||
|
||
""" | ||
# Config type hints | ||
self.aliasing: str | ||
|
@@ -68,14 +65,14 @@ def _eval(self, context: RuleContext) -> Optional[LintResult]: | |
|
||
# Remove the AS as we're using implict aliasing | ||
fixes.append(LintFix.delete(context.segment.segments[0])) | ||
anchor = context.raw_stack[-1] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Switching from |
||
anchor = context.raw_segment_pre | ||
|
||
# Remove whitespace before (if exists) or after (if not) | ||
if ( | ||
len(context.raw_stack) > 0 | ||
and context.raw_stack[-1].type == "whitespace" | ||
context.raw_segment_pre is not None | ||
and context.raw_segment_pre.type == "whitespace" | ||
): | ||
fixes.append(LintFix.delete(context.raw_stack[-1])) | ||
fixes.append(LintFix.delete(context.raw_segment_pre)) | ||
elif ( | ||
len(context.segment.segments) > 0 | ||
and context.segment.segments[1].type == "whitespace" | ||
|
@@ -90,7 +87,8 @@ def _eval(self, context: RuleContext) -> Optional[LintResult]: | |
insert_buff: List[Union[WhitespaceSegment, KeywordSegment]] = [] | ||
|
||
# Add initial whitespace if we need to... | ||
if context.raw_stack[-1].name not in ["whitespace", "newline"]: | ||
assert context.raw_segment_pre | ||
if context.raw_segment_pre.name not in ["whitespace", "newline"]: | ||
insert_buff.append(WhitespaceSegment()) | ||
|
||
# Add an AS (Uppercase for now, but could be corrected later) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this right? I thought default for this was
false
and you had to explicitly set it totrue
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That's what I thought, too, but I checked. I think maybe I (we) were confusing this with the
--fix-even-unparsable
thing added recently. We've got so many settings. 🤯IIUC, the setting documented here controls whether the rule is called for unparsable segments, and
--fix-even-unparsable
controls whether any fixes would be applied. (Note that you can lint without fixing!)