Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[RemoveUnusedImports] Support string type annotations #353

Merged
merged 7 commits into from
Jul 31, 2020
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 65 additions & 9 deletions libcst/codemod/commands/remove_unused_imports.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,18 @@
# LICENSE file in the root directory of this source tree.
#

from libcst import Import, ImportFrom
from libcst.codemod import VisitorBasedCodemodCommand
from libcst.codemod.visitors import RemoveImportsVisitor
from typing import Set, Tuple, Union

from libcst import Import, ImportFrom, ImportStar, Module
from libcst.codemod import CodemodContext, VisitorBasedCodemodCommand
from libcst.codemod.visitors import GatherCommentsVisitor, RemoveImportsVisitor
from libcst.helpers import get_absolute_module_for_import
from libcst.metadata import PositionProvider, ProviderT


DEFAULT_SUPPRESS_COMMENT_REGEX = (
r".*\W(noqa|lint-ignore: ?unused-import|lint-ignore: ?F401)(\W.*)?$"
)
Comment on lines +16 to +18
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, don't forget about this one as well, I think it's better than the current one:

DEFAULT_SUPPRESS_COMMENT_REGEX = (
    r".*\W(noqa[^:]|noqa: ?F401|lint-ignore: ?unused-import|lint-ignore: ?F401)(\W.*)?$"
)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This doesn't match # noqa though, which is the most common case

Copy link
Contributor

@Kronuz Kronuz Jul 30, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You are right!

What do you think of:

r".*\W(noqa|lint-ignore)(: ?(F401|unused-import)|(?= )|$)(\W.*)?$"



class RemoveUnusedImportsCommand(VisitorBasedCodemodCommand):
Expand All @@ -17,21 +26,68 @@ class RemoveUnusedImportsCommand(VisitorBasedCodemodCommand):
to track cross-references between them. If a symbol is imported in a file
but otherwise unused in it, that import will be removed even if it is being
referenced from another file.

It currently doesn't keep track of string type annotations, so an import
for `MyType` used only in `def f() -> "MyType"` will be removed.
"""

DESCRIPTION: str = (
"Remove all imports that are not used in a file. "
"Note: only considers the file in isolation. "
"Note: does not account for usages in string type annotations. "
)

METADATA_DEPENDENCIES: Tuple[ProviderT] = (
PositionProvider,
*GatherCommentsVisitor.METADATA_DEPENDENCIES,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should only need to list the direct metadata dependency of RemoveUnusedImportsCommand here.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jimmylai, so the line *GatherCommentsVisitor.METADATA_DEPENDENCIES, should be removed, right?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes.

)

def __init__(self, context: CodemodContext) -> None:
super().__init__(context)
self._ignored_lines: Set[int] = set()

def visit_Module(self, node: Module) -> bool:
comment_visitor = GatherCommentsVisitor(
self.context, DEFAULT_SUPPRESS_COMMENT_REGEX
)
node.visit(comment_visitor)
self._ignored_lines = set(comment_visitor.comments.keys())
return True

def visit_Import(self, node: Import) -> bool:
RemoveImportsVisitor.remove_unused_import_by_node(self.context, node)
self._handle_import(node)
return False

def visit_ImportFrom(self, node: ImportFrom) -> bool:
RemoveImportsVisitor.remove_unused_import_by_node(self.context, node)
self._handle_import(node)
return False

def _handle_import(self, node: Union[Import, ImportFrom]) -> None:
node_start = self.get_metadata(PositionProvider, node).start.line
if node_start in self._ignored_lines:
return

names = node.names
if isinstance(names, ImportStar):
return

for alias in names:
position = self.get_metadata(PositionProvider, alias)
lines = set(range(position.start.line, position.end.line + 1))
if lines.isdisjoint(self._ignored_lines):
if isinstance(node, Import):
RemoveImportsVisitor.remove_unused_import(
self.context,
module=alias.evaluated_name,
asname=alias.evaluated_alias,
)
else:
module_name = get_absolute_module_for_import(
self.context.full_module_name, node
)
if module_name is None:
raise ValueError(
f"Couldn't get absolute module name for {alias.evaluated_name}"
)
RemoveImportsVisitor.remove_unused_import(
self.context,
module=module_name,
obj=alias.evaluated_name,
asname=alias.evaluated_alias,
)
30 changes: 30 additions & 0 deletions libcst/codemod/commands/tests/test_remove_unused_imports.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,3 +90,33 @@ def test_no_formatting_if_no_unused_imports(self) -> None:
a(b, 'look at these ugly quotes')
"""
self.assertCodemod(before, before)

def test_suppression_on_first_line_of_multiline_import_refers_to_whole_block(
self,
) -> None:
before = """
from a import ( # lint-ignore: unused-import
b,
c,
)
"""
self.assertCodemod(before, before)

def test_suppression(self) -> None:
before = """
# noqa
import a, b
import c
from x import (
y,
z, # noqa
)
"""
after = """
# noqa
import a, b
from x import (
z, # noqa
)
"""
self.assertCodemod(before, after)
2 changes: 1 addition & 1 deletion libcst/codemod/tests/codemod_formatter_error_input.py.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#
# pyre-strict

import subprocess # noqa: F401
import subprocess
from contextlib import AsyncExitStack


Expand Down
12 changes: 10 additions & 2 deletions libcst/codemod/visitors/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,23 @@
#
from libcst.codemod.visitors._add_imports import AddImportsVisitor
from libcst.codemod.visitors._apply_type_annotations import ApplyTypeAnnotationsVisitor
from libcst.codemod.visitors._gather_comments import GatherCommentsVisitor
from libcst.codemod.visitors._gather_exports import GatherExportsVisitor
from libcst.codemod.visitors._gather_imports import GatherImportsVisitor
from libcst.codemod.visitors._gather_string_annotation_names import (
GatherNamesFromStringAnnotationsVisitor,
)
from libcst.codemod.visitors._gather_unused_imports import GatherUnusedImportsVisitor
from libcst.codemod.visitors._remove_imports import RemoveImportsVisitor


__all__ = [
"AddImportsVisitor",
"GatherImportsVisitor",
"GatherExportsVisitor",
"ApplyTypeAnnotationsVisitor",
"GatherCommentsVisitor",
"GatherExportsVisitor",
"GatherImportsVisitor",
"GatherNamesFromStringAnnotationsVisitor",
"GatherUnusedImportsVisitor",
"RemoveImportsVisitor",
]
37 changes: 37 additions & 0 deletions libcst/codemod/visitors/_gather_comments.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

import re
from typing import Dict, Pattern, Union

import libcst as cst
import libcst.matchers as m
from libcst.codemod._context import CodemodContext
from libcst.codemod._visitor import ContextAwareVisitor
from libcst.metadata import PositionProvider


class GatherCommentsVisitor(ContextAwareVisitor):
METADATA_DEPENDENCIES = (PositionProvider,)

def __init__(self, context: CodemodContext, comment_regex: str) -> None:
super().__init__(context)

self.comments: Dict[int, cst.Comment] = {}

self._comment_matcher: Pattern[str] = re.compile(comment_regex)

@m.visit(m.EmptyLine(comment=m.DoesNotMatch(None)))
@m.visit(m.TrailingWhitespace(comment=m.DoesNotMatch(None)))
def visit_comment(self, node: Union[cst.EmptyLine, cst.TrailingWhitespace]) -> None:
comment = node.comment
assert comment is not None # hello, type checker
if not self._comment_matcher.match(comment.value):
return
line = self.get_metadata(PositionProvider, comment).start.line
if isinstance(node, cst.EmptyLine):
# Standalone comments refer to the next line
line += 1
self.comments[line] = comment
66 changes: 66 additions & 0 deletions libcst/codemod/visitors/_gather_string_annotation_names.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

from typing import Set, Union, cast

import libcst as cst
import libcst.matchers as m
from libcst.codemod._context import CodemodContext
from libcst.codemod._visitor import ContextAwareVisitor
from libcst.metadata import QualifiedNameProvider


FUNCS_CONSIDERED_AS_STRING_ANNOTATIONS = {"typing.TypeVar"}
ANNOTATION_MATCHER: m.BaseMatcherNode = m.Annotation() | m.Call(
metadata=m.MatchMetadataIfTrue(
QualifiedNameProvider,
lambda qualnames: any(
qn.name in FUNCS_CONSIDERED_AS_STRING_ANNOTATIONS for qn in qualnames
),
)
)


class GatherNamesFromStringAnnotationsVisitor(ContextAwareVisitor):
METADATA_DEPENDENCIES = (QualifiedNameProvider,)

def __init__(self, context: CodemodContext) -> None:
super().__init__(context)

self.names: Set[str] = set()

@m.call_if_inside(ANNOTATION_MATCHER)
@m.visit(m.ConcatenatedString())
def handle_any_string(
self, node: Union[cst.SimpleString, cst.ConcatenatedString]
) -> None:
value = node.evaluated_value
if value is None:
return
mod = cst.parse_module(value)
extracted_nodes = m.extractall(
mod,
m.Name(value=m.SaveMatchedNode(m.DoNotCare(), "name"))
| m.SaveMatchedNode(m.Attribute(), "attribute"),
)
zsol marked this conversation as resolved.
Show resolved Hide resolved
# This captures a bit more than necessary. For attributes, we capture the inner
# Name twice.
names = {
cast(str, values["name"]) for values in extracted_nodes if "name" in values
} | {
name
for values in extracted_nodes
if "attribute" in values
for name, _ in cst.metadata.scope_provider._gen_dotted_names(
cast(cst.Attribute, values["attribute"])
)
}
self.names.update(names)

@m.call_if_inside(ANNOTATION_MATCHER)
@m.call_if_not_inside(m.ConcatenatedString())
@m.visit(m.SimpleString())
def handle_simple_string(self, node: cst.SimpleString) -> None:
self.handle_any_string(node)
97 changes: 97 additions & 0 deletions libcst/codemod/visitors/_gather_unused_imports.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
#

from typing import Iterable, Set, Tuple, Union

import libcst as cst
import libcst.matchers as m
from libcst.codemod._context import CodemodContext
from libcst.codemod._visitor import ContextAwareVisitor
from libcst.codemod.visitors._gather_exports import GatherExportsVisitor
from libcst.codemod.visitors._gather_string_annotation_names import (
GatherNamesFromStringAnnotationsVisitor,
)
from libcst.metadata import ProviderT, ScopeProvider
from libcst.metadata.scope_provider import _gen_dotted_names


class GatherUnusedImportsVisitor(ContextAwareVisitor):

SUPPRESS_COMMENT_REGEX_CONTEXT_KEY = f"GatherUnusedImportsVisitor.suppress_regex"
METADATA_DEPENDENCIES: Tuple[ProviderT] = (
*GatherNamesFromStringAnnotationsVisitor.METADATA_DEPENDENCIES,
ScopeProvider,
)

def __init__(self, context: CodemodContext) -> None:
super().__init__(context)

self._string_annotation_names: Set[str] = set()
self._exported_names: Set[str] = set()
self.unused_imports: Set[
Tuple[cst.ImportAlias, Union[cst.Import, cst.ImportFrom]]
] = set()

def visit_Module(self, node: cst.Module) -> bool:
export_collector = GatherExportsVisitor(self.context)
node.visit(export_collector)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jimmylai, just wondering... won't calling node.visit(export_collector) and node.visit(annotation_visitor) here traverses the tree multiple times? wouldn't this be inefficient?

I was thinking maybe using multiple inheritance instead of this, so we make it that it only traverses the tree once.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes it will traverse the tree multiple times. I don't think that's a big deal for a codemod like this. Running the codemod on a bunch of large files is still not visibly slower than before. I don't have benchmarks for you though :)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we use this in a linter, which will run over and over, while the user types, we want this as fast as possible, otherwise linting times will hit the experience. Although I’m not sure how expensive traversing the tree is, processing time would be exponential, potentially noticeable on big files. What do you think @jimmylai ?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Performance is a concern for lint but not for codemod. In lint, we try to avoid creating a visitor and calling node.visit(visitor).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The @m.visitused in this codemod is not supported in the lint framework since we worried about the the pattern can introduce some expensive checks. So we only use m.matches() in lint.

self._exported_names = export_collector.explicit_exported_objects
annotation_visitor = GatherNamesFromStringAnnotationsVisitor(self.context)
node.visit(annotation_visitor)
self._string_annotation_names = annotation_visitor.names
return True

@m.visit(
m.Import()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How could we add other exceptions to this reusable class, like if we wanted to ignore import __strict__, for example?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's best to apply filtering on the results of this visitor (just like how you pointed out that the suppression filtering belongs outside of this class). So I would filter for special module names after running GatherUnusedImportsVisitor, and keep the exceptions here to a minimum (i.e. the ones implied by Python itself)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I agree :)

| m.ImportFrom(
module=m.DoesNotMatch(m.Name("__future__")),
names=m.DoesNotMatch(m.ImportStar()),
)
)
def handle_import(self, node: Union[cst.Import, cst.ImportFrom]) -> None:
names = node.names
assert not isinstance(names, cst.ImportStar) # hello, type checker

for alias in names:
self.unused_imports.add((alias, node))

def leave_Module(self, original_node: cst.Module) -> None:
self.unused_imports = self.filter_unused_imports(self.unused_imports)

def filter_unused_imports(
self,
candidates: Iterable[Tuple[cst.ImportAlias, Union[cst.Import, cst.ImportFrom]]],
) -> Set[Tuple[cst.ImportAlias, Union[cst.Import, cst.ImportFrom]]]:
unused_imports = set()
for (alias, parent) in candidates:
scope = self.get_metadata(ScopeProvider, parent)
if scope is None:
continue
if not self.is_in_use(scope, alias):
unused_imports.add((alias, parent))
return unused_imports

def is_in_use(self, scope: cst.metadata.Scope, alias: cst.ImportAlias) -> bool:
asname = alias.asname
names = _gen_dotted_names(
cst.ensure_type(asname.name, cst.Name) if asname is not None else alias.name
)

for name_or_alias, _ in names:
if (
name_or_alias in self._exported_names
or name_or_alias in self._string_annotation_names
):
return True

for assignment in scope[name_or_alias]:
if (
isinstance(assignment, cst.metadata.Assignment)
and isinstance(assignment.node, (cst.ImportFrom, cst.Import))
and len(assignment.references) > 0
):
return True
return False
Loading