[RemoveUnusedImports] Support string type annotations (#353)

* [RemoveUnusedImports] Support string type annotations This PR adds support for detecting imports being used by string type annotations, as well as imports suppressed by comments. It breaks up the existing visitor into multiple smaller, single-purpose visitors, and composes them together.
Instagram · Jul 31, 2020 · 6a5e739 · 6a5e739
1 parent f8fdc00
commit 6a5e739
Show file tree

Hide file tree

Showing 12 changed files with 640 additions and 61 deletions.
diff --git a/docs/source/codemods.rst b/docs/source/codemods.rst
@@ -146,12 +146,18 @@ LibCST additionally includes a library of transforms to reduce the need for boil
 inside codemods. As of now, the list includes the following helpers.
 
 .. autoclass:: libcst.codemod.visitors.GatherImportsVisitor
-  :exclude-members: visit_Import, visit_ImportFrom
+  :no-undoc-members:
 .. autoclass:: libcst.codemod.visitors.GatherExportsVisitor
-  :exclude-members: visit_AnnAssign, leave_AnnAssign, visit_Assign, leave_Assign, visit_List, leave_List, visit_Tuple, leave_Tuple, visit_Set, leave_Set, visit_Element
+  :no-undoc-members:
 .. autoclass:: libcst.codemod.visitors.AddImportsVisitor
-  :exclude-members: CONTEXT_KEY, visit_Module, leave_ImportFrom, leave_Module
+  :no-undoc-members:
 .. autoclass:: libcst.codemod.visitors.RemoveImportsVisitor
-  :exclude-members: CONTEXT_KEY, METADATA_DEPENDENCIES, visit_Module, leave_ImportFrom, leave_Import
+  :no-undoc-members:
 .. autoclass:: libcst.codemod.visitors.ApplyTypeAnnotationsVisitor
-  :exclude-members: CONTEXT_KEY, transform_module_impl, visit_ClassDef, visit_Comment, visit_FunctionDef, leave_Assign, leave_ClassDef, leave_FunctionDef, leave_ImportFrom, leave_Module
+  :no-undoc-members:
+.. autoclass:: libcst.codemod.visitors.GatherUnusedImportsVisitor
+  :no-undoc-members:
+.. autoclass:: libcst.codemod.visitors.GatherCommentsVisitor
+  :no-undoc-members:
+.. autoclass:: libcst.codemod.visitors.GatherNamesFromStringAnnotationsVisitor
+  :no-undoc-members:
diff --git a/libcst/codemod/commands/remove_unused_imports.py b/libcst/codemod/commands/remove_unused_imports.py
@@ -4,9 +4,18 @@
 # LICENSE file in the root directory of this source tree.
 #
 
-from libcst import Import, ImportFrom
-from libcst.codemod import VisitorBasedCodemodCommand
-from libcst.codemod.visitors import RemoveImportsVisitor
+from typing import Set, Tuple, Union
+
+from libcst import Import, ImportFrom, ImportStar, Module
+from libcst.codemod import CodemodContext, VisitorBasedCodemodCommand
+from libcst.codemod.visitors import GatherCommentsVisitor, RemoveImportsVisitor
+from libcst.helpers import get_absolute_module_for_import
+from libcst.metadata import PositionProvider, ProviderT
+
+
+DEFAULT_SUPPRESS_COMMENT_REGEX = (
+    r".*\W(noqa|lint-ignore: ?unused-import|lint-ignore: ?F401)(\W.*)?$"
+)
 
 
 class RemoveUnusedImportsCommand(VisitorBasedCodemodCommand):
@@ -17,21 +26,65 @@ class RemoveUnusedImportsCommand(VisitorBasedCodemodCommand):
     to track cross-references between them. If a symbol is imported in a file
     but otherwise unused in it, that import will be removed even if it is being
     referenced from another file.
-
-    It currently doesn't keep track of string type annotations, so an import
-    for `MyType` used only in `def f() -> "MyType"` will be removed.
     """
 
     DESCRIPTION: str = (
         "Remove all imports that are not used in a file. "
         "Note: only considers the file in isolation. "
-        "Note: does not account for usages in string type annotations. "
     )
 
+    METADATA_DEPENDENCIES: Tuple[ProviderT] = (PositionProvider,)
+
+    def __init__(self, context: CodemodContext) -> None:
+        super().__init__(context)
+        self._ignored_lines: Set[int] = set()
+
+    def visit_Module(self, node: Module) -> bool:
+        comment_visitor = GatherCommentsVisitor(
+            self.context, DEFAULT_SUPPRESS_COMMENT_REGEX
+        )
+        node.visit(comment_visitor)
+        self._ignored_lines = set(comment_visitor.comments.keys())
+        return True
+
     def visit_Import(self, node: Import) -> bool:
-        RemoveImportsVisitor.remove_unused_import_by_node(self.context, node)
+        self._handle_import(node)
         return False
 
     def visit_ImportFrom(self, node: ImportFrom) -> bool:
-        RemoveImportsVisitor.remove_unused_import_by_node(self.context, node)
+        self._handle_import(node)
         return False
+
+    def _handle_import(self, node: Union[Import, ImportFrom]) -> None:
+        node_start = self.get_metadata(PositionProvider, node).start.line
+        if node_start in self._ignored_lines:
+            return
+
+        names = node.names
+        if isinstance(names, ImportStar):
+            return
+
+        for alias in names:
+            position = self.get_metadata(PositionProvider, alias)
+            lines = set(range(position.start.line, position.end.line + 1))
+            if lines.isdisjoint(self._ignored_lines):
+                if isinstance(node, Import):
+                    RemoveImportsVisitor.remove_unused_import(
+                        self.context,
+                        module=alias.evaluated_name,
+                        asname=alias.evaluated_alias,
+                    )
+                else:
+                    module_name = get_absolute_module_for_import(
+                        self.context.full_module_name, node
+                    )
+                    if module_name is None:
+                        raise ValueError(
+                            f"Couldn't get absolute module name for {alias.evaluated_name}"
+                        )
+                    RemoveImportsVisitor.remove_unused_import(
+                        self.context,
+                        module=module_name,
+                        obj=alias.evaluated_name,
+                        asname=alias.evaluated_alias,
+                    )
diff --git a/libcst/codemod/commands/tests/test_remove_unused_imports.py b/libcst/codemod/commands/tests/test_remove_unused_imports.py
@@ -90,3 +90,33 @@ def test_no_formatting_if_no_unused_imports(self) -> None:
             a(b, 'look at these ugly quotes')
         """
         self.assertCodemod(before, before)
+
+    def test_suppression_on_first_line_of_multiline_import_refers_to_whole_block(
+        self,
+    ) -> None:
+        before = """
+            from a import (  # lint-ignore: unused-import
+                b,
+                c,
+            )
+        """
+        self.assertCodemod(before, before)
+
+    def test_suppression(self) -> None:
+        before = """
+            # noqa
+            import a, b
+            import c
+            from x import (
+                y,
+                z,  # noqa
+            )
+        """
+        after = """
+            # noqa
+            import a, b
+            from x import (
+                z,  # noqa
+            )
+        """
+        self.assertCodemod(before, after)
diff --git a/libcst/codemod/tests/codemod_formatter_error_input.py.txt b/libcst/codemod/tests/codemod_formatter_error_input.py.txt
@@ -5,7 +5,7 @@
 #
 # pyre-strict
 
-import subprocess  # noqa: F401
+import subprocess
 from contextlib import AsyncExitStack
 
 

diff --git a/libcst/codemod/visitors/__init__.py b/libcst/codemod/visitors/__init__.py
@@ -5,15 +5,23 @@
 #
 from libcst.codemod.visitors._add_imports import AddImportsVisitor
 from libcst.codemod.visitors._apply_type_annotations import ApplyTypeAnnotationsVisitor
+from libcst.codemod.visitors._gather_comments import GatherCommentsVisitor
 from libcst.codemod.visitors._gather_exports import GatherExportsVisitor
 from libcst.codemod.visitors._gather_imports import GatherImportsVisitor
+from libcst.codemod.visitors._gather_string_annotation_names import (
+    GatherNamesFromStringAnnotationsVisitor,
+)
+from libcst.codemod.visitors._gather_unused_imports import GatherUnusedImportsVisitor
 from libcst.codemod.visitors._remove_imports import RemoveImportsVisitor
 
 
 __all__ = [
     "AddImportsVisitor",
-    "GatherImportsVisitor",
-    "GatherExportsVisitor",
     "ApplyTypeAnnotationsVisitor",
+    "GatherCommentsVisitor",
+    "GatherExportsVisitor",
+    "GatherImportsVisitor",
+    "GatherNamesFromStringAnnotationsVisitor",
+    "GatherUnusedImportsVisitor",
     "RemoveImportsVisitor",
 ]
diff --git a/libcst/codemod/visitors/_gather_comments.py b/libcst/codemod/visitors/_gather_comments.py
@@ -0,0 +1,51 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import re
+from typing import Dict, Pattern, Union
+
+import libcst as cst
+import libcst.matchers as m
+from libcst.codemod._context import CodemodContext
+from libcst.codemod._visitor import ContextAwareVisitor
+from libcst.metadata import PositionProvider
+
+
+class GatherCommentsVisitor(ContextAwareVisitor):
+    """
+    Collects all comments matching a certain regex and their line numbers.
+    This visitor is useful for capturing special-purpose comments, for example
+    ``noqa`` style lint suppression annotations.
+
+    Standalone comments are assumed to affect the line following them, and
+    inline ones are recorded with the line they are on.
+
+    After visiting a CST, matching comments are collected in the ``comments``
+    attribute.
+    """
+
+    METADATA_DEPENDENCIES = (PositionProvider,)
+
+    def __init__(self, context: CodemodContext, comment_regex: str) -> None:
+        super().__init__(context)
+
+        #: Dictionary of comments found in the CST. Keys are line numbers,
+        #: values are comment nodes.
+        self.comments: Dict[int, cst.Comment] = {}
+
+        self._comment_matcher: Pattern[str] = re.compile(comment_regex)
+
+    @m.visit(m.EmptyLine(comment=m.DoesNotMatch(None)))
+    @m.visit(m.TrailingWhitespace(comment=m.DoesNotMatch(None)))
+    def visit_comment(self, node: Union[cst.EmptyLine, cst.TrailingWhitespace]) -> None:
+        comment = node.comment
+        assert comment is not None  # hello, type checker
+        if not self._comment_matcher.match(comment.value):
+            return
+        line = self.get_metadata(PositionProvider, comment).start.line
+        if isinstance(node, cst.EmptyLine):
+            # Standalone comments refer to the next line
+            line += 1
+        self.comments[line] = comment
diff --git a/libcst/codemod/visitors/_gather_string_annotation_names.py b/libcst/codemod/visitors/_gather_string_annotation_names.py
@@ -0,0 +1,81 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Set, Union, cast
+
+import libcst as cst
+import libcst.matchers as m
+from libcst.codemod._context import CodemodContext
+from libcst.codemod._visitor import ContextAwareVisitor
+from libcst.metadata import MetadataWrapper, QualifiedNameProvider
+
+
+FUNCS_CONSIDERED_AS_STRING_ANNOTATIONS = {"typing.TypeVar"}
+ANNOTATION_MATCHER: m.BaseMatcherNode = m.Annotation() | m.Call(
+    metadata=m.MatchMetadataIfTrue(
+        QualifiedNameProvider,
+        lambda qualnames: any(
+            qn.name in FUNCS_CONSIDERED_AS_STRING_ANNOTATIONS for qn in qualnames
+        ),
+    )
+)
+
+
+class GatherNamesFromStringAnnotationsVisitor(ContextAwareVisitor):
+    """
+    Collects all names from string literals used for typing purposes.
+    This includes annotations like ``foo: "SomeType"``, and parameters to
+    special functions related to typing (currently only `typing.TypeVar`).
+
+    After visiting, a set of all found names will be available on the ``names``
+    attribute of this visitor.
+    """
+
+    METADATA_DEPENDENCIES = (QualifiedNameProvider,)
+
+    def __init__(self, context: CodemodContext) -> None:
+        super().__init__(context)
+
+        #: The set of names collected from string literals.
+        self.names: Set[str] = set()
+
+    @m.call_if_inside(ANNOTATION_MATCHER)
+    @m.visit(m.ConcatenatedString())
+    def handle_any_string(
+        self, node: Union[cst.SimpleString, cst.ConcatenatedString]
+    ) -> None:
+        value = node.evaluated_value
+        if value is None:
+            return
+        mod = cst.parse_module(value)
+        extracted_nodes = m.extractall(
+            mod,
+            m.Name(
+                value=m.SaveMatchedNode(m.DoNotCare(), "name"),
+                metadata=m.MatchMetadataIfTrue(
+                    cst.metadata.ParentNodeProvider,
+                    lambda parent: not isinstance(parent, cst.Attribute),
+                ),
+            )
+            | m.SaveMatchedNode(m.Attribute(), "attribute"),
+            metadata_resolver=MetadataWrapper(mod, unsafe_skip_copy=True),
+        )
+        names = {
+            cast(str, values["name"]) for values in extracted_nodes if "name" in values
+        } | {
+            name
+            for values in extracted_nodes
+            if "attribute" in values
+            for name, _ in cst.metadata.scope_provider._gen_dotted_names(
+                cast(cst.Attribute, values["attribute"])
+            )
+        }
+        self.names.update(names)
+
+    @m.call_if_inside(ANNOTATION_MATCHER)
+    @m.call_if_not_inside(m.ConcatenatedString())
+    @m.visit(m.SimpleString())
+    def handle_simple_string(self, node: cst.SimpleString) -> None:
+        self.handle_any_string(node)