Skip to content

Commit

Permalink
Feature/f string checks (#118)
Browse files Browse the repository at this point in the history
Adds support for nested f-strings introduced by PEP701 & Python 3.12
  • Loading branch information
arnimarj authored Jan 13, 2024
1 parent 96032c1 commit 927816e
Show file tree
Hide file tree
Showing 3 changed files with 159 additions and 79 deletions.
221 changes: 142 additions & 79 deletions flake8_quotes/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import optparse
import sys
import tokenize
import warnings

Expand All @@ -18,6 +19,9 @@
from flake8_quotes.docstring_detection import get_docstring_tokens


_IS_PEP701 = sys.version_info[:2] >= (3, 12)


class QuoteChecker(object):
name = __name__
version = __version__
Expand Down Expand Up @@ -128,6 +132,10 @@ def add_options(cls, parser):
cls._register_opt(parser, '--no-avoid-escape', dest='avoid_escape', default=None, action='store_false',
parse_from_config=False,
help='Disable avoiding escaping same quotes in inline strings')
cls._register_opt(parser, '--check-inside-f-strings',
dest='check_inside_f_strings', default=False, action='store_true',
parse_from_config=True,
help='Check strings inside f-strings, when PEP701 is active (Python 3.12+)')

@classmethod
def parse_options(cls, options):
Expand Down Expand Up @@ -166,6 +174,12 @@ def parse_options(cls, options):
else:
cls.config.update({'avoid_escape': True})

# If check inside f-strings specified, add to config
if hasattr(options, 'check_inside_f_strings') and options.check_inside_f_strings is not None:
cls.config.update({'check_inside_f_strings': options.check_inside_f_strings})
else:
cls.config.update({'check_inside_f_strings': False})

def get_file_contents(self):
if self.filename in ('stdin', '-', None):
return stdin_get_value().splitlines(True)
Expand Down Expand Up @@ -194,98 +208,147 @@ def get_noqa_lines(self, file_contents):
def get_quotes_errors(self, file_contents):
tokens = [Token(t) for t in tokenize.generate_tokens(lambda L=iter(file_contents): next(L))]
docstring_tokens = get_docstring_tokens(tokens)
# when PEP701 is enabled, we track when the token stream
# is passing over an f-string

for token in tokens:
# the start of the current f-string (row, col)
fstring_start = None

if token.type != tokenize.STRING:
# ignore non strings
continue
# > 0 when we are inside an f-string token stream, since
# f-string can be arbitrarily nested, we need a counter
fstring_nesting = 0

# the token.string part of all tokens inside the current
# f-string
fstring_buffer = []

# Remove any prefixes in strings like `u` from `u"foo"`
# DEV: `last_quote_char` is 1 character, even for multiline strings
# `"foo"` -> `"foo"`
# `b"foo"` -> `"foo"`
# `br"foo"` -> `"foo"`
# `b"""foo"""` -> `"""foo"""`
last_quote_char = token.string[-1]
first_quote_index = token.string.index(last_quote_char)
prefix = token.string[:first_quote_index].lower()
unprefixed_string = token.string[first_quote_index:]

# Determine if our string is multiline-based
# "foo"[0] * 3 = " * 3 = """
# "foo"[0:3] = "fo
# """foo"""[0:3] = """
for token in tokens:
is_docstring = token in docstring_tokens
is_multiline_string = unprefixed_string[0] * 3 == unprefixed_string[0:3]
start_row, start_col = token.start

# If our string is a docstring
# DEV: Docstring quotes must come before multiline quotes as it can as a multiline quote
if is_docstring:
if self.config['good_docstring'] in unprefixed_string:
continue
# non PEP701, we only check for STRING tokens
if not _IS_PEP701:
if token.type == tokenize.STRING:
yield from self._check_string(token.string, token.start, is_docstring)

yield {
'message': 'Q002 ' + self.config['docstring_error_message'],
'line': start_row,
'col': start_col,
}
# Otherwise if our string is multiline
elif is_multiline_string:
# If our string is or containing a known good string, then ignore it
# (""")foo""" -> good (continue)
# '''foo(""")''' -> good (continue)
# (''')foo''' -> possibly bad
if self.config['good_multiline'] in unprefixed_string:
continue

# If our string ends with a known good ending, then ignore it
# '''foo("''') -> good (continue)
# Opposite, """foo"""", would break our parser (cannot handle """" ending)
if unprefixed_string.endswith(self.config['good_multiline_ending']):
continue
continue

# Output our error
yield {
'message': 'Q001 ' + self.config['multiline_error_message'],
'line': start_row,
'col': start_col,
}
# Otherwise (string is inline quote)
else:
# 'This is a string' -> Good
# 'This is a "string"' -> Good
# 'This is a \"string\"' -> Good
# 'This is a \'string\'' -> Bad (Q003) Escaped inner quotes
# '"This" is a \'string\'' -> Good Changing outer quotes would not avoid escaping
# "This is a string" -> Bad (Q000)
# "This is a 'string'" -> Good Avoids escaped inner quotes
# "This is a \"string\"" -> Bad (Q000)
# "\"This\" is a 'string'" -> Good

string_contents = unprefixed_string[1:-1]

# If string preferred type, check for escapes
if last_quote_char == self.config['good_single']:
if not self.config['avoid_escape'] or 'r' in prefix:
continue
if (self.config['good_single'] in string_contents and
not self.config['bad_single'] in string_contents):
yield {
'message': 'Q003 Change outer quotes to avoid escaping inner quotes',
'line': start_row,
'col': start_col,
}
# otherwise, we track all tokens for the current f-string
if token.type == tokenize.FSTRING_START:
if fstring_nesting == 0:
fstring_start = token.start

fstring_nesting += 1
fstring_buffer.append(token.string)
elif token.type == tokenize.FSTRING_END:
fstring_nesting -= 1
fstring_buffer.append(token.string)
elif fstring_nesting > 0:
fstring_buffer.append(token.string)

# if we have reached the end of a top-level f-string, we check
# it as if it was a single string (pre PEP701 semantics) when
# check_inside_f_strings is false
if token.type == tokenize.FSTRING_END and fstring_nesting == 0:
token_string = ''.join(fstring_buffer)
fstring_buffer[:] = []

if not self.config['check_inside_f_strings']:
yield from self._check_string(token_string, fstring_start, is_docstring)
continue

# If not preferred type, only allow use to avoid escapes.
if not self.config['good_single'] in string_contents:
# otherwise, we check nested strings and f-strings, we don't
# check FSTRING_END since it should be legal if tokenize.FSTRING_START succeeded
if token.type in (tokenize.STRING, tokenize.FSTRING_START,):
if fstring_nesting > 0:
if self.config['check_inside_f_strings']:
yield from self._check_string(token.string, token.start, is_docstring)
else:
yield from self._check_string(token.string, token.start, is_docstring)

def _check_string(self, token_string, token_start, is_docstring):
# Remove any prefixes in strings like `u` from `u"foo"`
# DEV: `last_quote_char` is 1 character, even for multiline strings
# `"foo"` -> `"foo"`
# `b"foo"` -> `"foo"`
# `br"foo"` -> `"foo"`
# `b"""foo"""` -> `"""foo"""`
last_quote_char = token_string[-1]
first_quote_index = token_string.index(last_quote_char)
prefix = token_string[:first_quote_index].lower()
unprefixed_string = token_string[first_quote_index:]

# Determine if our string is multiline-based
# "foo"[0] * 3 = " * 3 = """
# "foo"[0:3] = "fo
# """foo"""[0:3] = """
is_multiline_string = unprefixed_string[0] * 3 == unprefixed_string[0:3]
start_row, start_col = token_start

# If our string is a docstring
# DEV: Docstring quotes must come before multiline quotes as it can as a multiline quote
if is_docstring:
if self.config['good_docstring'] in unprefixed_string:
return

yield {
'message': 'Q002 ' + self.config['docstring_error_message'],
'line': start_row,
'col': start_col,
}
# Otherwise if our string is multiline
elif is_multiline_string:
# If our string is or containing a known good string, then ignore it
# (""")foo""" -> good (continue)
# '''foo(""")''' -> good (continue)
# (''')foo''' -> possibly bad
if self.config['good_multiline'] in unprefixed_string:
return

# If our string ends with a known good ending, then ignore it
# '''foo("''') -> good (continue)
# Opposite, """foo"""", would break our parser (cannot handle """" ending)
if unprefixed_string.endswith(self.config['good_multiline_ending']):
return

# Output our error
yield {
'message': 'Q001 ' + self.config['multiline_error_message'],
'line': start_row,
'col': start_col,
}
# Otherwise (string is inline quote)
else:
# 'This is a string' -> Good
# 'This is a "string"' -> Good
# 'This is a \"string\"' -> Good
# 'This is a \'string\'' -> Bad (Q003) Escaped inner quotes
# '"This" is a \'string\'' -> Good Changing outer quotes would not avoid escaping
# "This is a string" -> Bad (Q000)
# "This is a 'string'" -> Good Avoids escaped inner quotes
# "This is a \"string\"" -> Bad (Q000)
# "\"This\" is a 'string'" -> Good

string_contents = unprefixed_string[1:-1]
# If string preferred type, check for escapes
if last_quote_char == self.config['good_single']:
if not self.config['avoid_escape'] or 'r' in prefix:
return
if (self.config['good_single'] in string_contents and
not self.config['bad_single'] in string_contents):
yield {
'message': 'Q000 ' + self.config['single_error_message'],
'message': 'Q003 Change outer quotes to avoid escaping inner quotes',
'line': start_row,
'col': start_col,
}
return

# If not preferred type, only allow use to avoid escapes.
if self.config['good_single'] not in string_contents:
yield {
'message': 'Q000 ' + self.config['single_error_message'],
'line': start_row,
'col': start_col,
}


class Token:
Expand Down
2 changes: 2 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ def read(*filenames, **kwargs):
version=about['__version__'],
install_requires=[
'flake8',
'setuptools',
],
url='http://github.com/zheller/flake8-quotes/',
long_description=LONG_DESCRIPTION,
Expand Down Expand Up @@ -61,6 +62,7 @@ def read(*filenames, **kwargs):
'Programming Language :: Python :: 3.9',
'Programming Language :: Python :: 3.10',
'Programming Language :: Python :: 3.11',
'Programming Language :: Python :: 3.12',
'Topic :: Software Development :: Libraries :: Python Modules',
'Topic :: Software Development :: Quality Assurance',
]
Expand Down
15 changes: 15 additions & 0 deletions tox.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# run like so:
# 1. install pyenv
# 2. pyenv install -s 3.7.17 3.8.18 3.9.18 3.10.13 3.11.6 3.12.0
# 3. pyenv local 3.7.17 3.8.18 3.9.18 3.10.13 3.11.6 3.12.0
# 4. pip install tox
# 5. tox

[tox]
envlist = py37,py38,py39,py310,py311,py312
isolated_build = True

[testenv]
extras = flake8,setuptools
commands = ./test.sh
allowlist_externals = ./test.sh

0 comments on commit 927816e

Please sign in to comment.