google · lizawang · Aug 18, 2022 · Aug 30, 2022 · Aug 30, 2022 · Aug 30, 2022
diff --git a/CHANGELOG b/CHANGELOG
@@ -2,6 +2,10 @@
 # All notable changes to this project will be documented in this file.
 # This project adheres to [Semantic Versioning](http://semver.org/).
 
+## [0.41.1] 2022-08-30
+### Added
+- Add 4 new knobs to align assignment operators and dictionary colons. They are align_assignment, align_argument_assignment, align_dict_colon and new_alignment_after_commentline.
+
 ## [0.40.0] UNRELEASED
 ### Added
 - Add a new Python parser to generate logical lines.

diff --git a/CONTRIBUTORS b/CONTRIBUTORS
@@ -15,3 +15,4 @@ Sam Clegg <sbc@google.com>
 Łukasz Langa <ambv@fb.com>
 Oleg Butuzov <butuzov@made.ua>
 Mauricio Herrera Cuadra <mauricio@arareko.net>
+Xiao Wang <lizawang87@gmail.com>
diff --git a/README.rst b/README.rst
@@ -390,6 +390,61 @@ Options::
 Knobs
 =====
 
+``ALIGN_ASSIGNMENT``
+    Align assignment or augmented assignment operators.
+    If there is a blank line or a newline comment or a multiline object
+    (e.g. a dictionary, a list, a function call) in between,
+    it will start new block alignment. Lines in the same block have the same
+    indentation level.
+
+    .. code-block:: python
+
+        a   = 1
+        abc = 2
+        if condition == None:
+            var       += ''
+            var_long  -= 4
+        b  = 3
+        bc = 4
+
+``ALIGN_ARGUMENT_ASSIGNMENT``
+    Align assignment operators in the argument list if they are all split on newlines.
+    Arguments without assignment in between will initiate new block alignment calulation;
+    for example, a comment line.
+    Multiline objects in between will also initiate a new alignment block.
+
+    .. code-block:: python
+
+        rglist = test(
+            var_first  = 0,
+            var_second = '',
+            var_dict   = {
+                "key_1" : '',
+                "key_2" : 2,
+                "key_3" : True,
+            },
+            var_third     = 1,
+            var_very_long = None )
+
+``ALIGN_DICT_COLON``
+    Align the colons in the dictionary if all entries in dictionay are split on newlines
+    or 'EACH_DICT_ENTRY_ON_SEPERATE_LINE' is set True.
+    A commentline or multi-line object in between will start new alignment block.
+
+    .. code-block:: python
+
+        fields =
+            {
+                "field" : "ediid",
+                "type"  : "text",
+                # key: value
+                "required" : True,
+            }
+
+``NEW_ALIGNMENT_AFTER_COMMENTLINE``
+    Make it optional to start a new alignmetn block for assignment
+    alignment and colon alignment after a comment line.
+
 ``ALIGN_CLOSING_BRACKET_WITH_VISUAL_INDENT``
     Align closing bracket with visual indentation.
 

diff --git a/yapf/pytree/subtype_assigner.py b/yapf/pytree/subtype_assigner.py
@@ -240,6 +240,7 @@ def Visit_argument(self, node):  # pylint: disable=invalid-name
     # argument ::=
     #     test [comp_for] | test '=' test
     self._ProcessArgLists(node)
+    #TODO add a subtype to each argument?
 
   def Visit_arglist(self, node):  # pylint: disable=invalid-name
     # arglist ::=
@@ -309,6 +310,10 @@ def Visit_typedargslist(self, node):  # pylint: disable=invalid-name
         tname = True
         _SetArgListSubtype(child, subtypes.TYPED_NAME,
                            subtypes.TYPED_NAME_ARG_LIST)
+        # NOTE Every element of the tynamme argument
+        # should have this list type
+        _AppendSubtypeRec(child, subtypes.TYPED_NAME_ARG_LIST)
+
       elif child.type == grammar_token.COMMA:
         tname = False
       elif child.type == grammar_token.EQUAL and tname:

diff --git a/yapf/yapflib/format_decision_state.py b/yapf/yapflib/format_decision_state.py
@@ -978,6 +978,7 @@ def _GetNewlineColumn(self):
         not self.param_list_stack[-1].SplitBeforeClosingBracket(
             top_of_stack.indent) and top_of_stack.indent
         == ((self.line.depth + 1) * style.Get('INDENT_WIDTH'))):
+      # NOTE: comment inside argument list is not excluded in subtype assigner
       if (subtypes.PARAMETER_START in current.subtypes or
           (previous.is_comment and
            subtypes.PARAMETER_START in previous.subtypes)):

diff --git a/yapf/yapflib/format_token.py b/yapf/yapflib/format_token.py
@@ -322,3 +322,48 @@ def is_pytype_comment(self):
   def is_copybara_comment(self):
     return self.is_comment and re.match(
         r'#.*\bcopybara:\s*(strip|insert|replace)', self.value)
+
+  @property
+  def is_argassign(self):
+    return (subtypes.DEFAULT_OR_NAMED_ASSIGN in self.subtypes or
+            subtypes.VARARGS_LIST in self.subtypes)
+
+  @property
+  def is_argname(self):
+    # it's the argument part before argument assignment operator,
+    # including tnames and data type
+    # not the assign operator,
+    # not the value after the assign operator
+
+    # argument without assignment is also included
+    # the token is arg part before '=' but not after '='
+    if self.is_argname_start:
+      return True
+
+    # exclude comment inside argument list
+    if not self.is_comment:
+      # the token is any element in typed arglist
+      if subtypes.TYPED_NAME_ARG_LIST in self.subtypes:
+        return True
+
+    return False
+
+  @property
+  def is_argname_start(self):
+    # return true if it's the start of every argument entry
+    previous_subtypes = {0}
+    if self.previous_token:
+      previous_subtypes = self.previous_token.subtypes
+
+    return ((not self.is_comment and
+             subtypes.DEFAULT_OR_NAMED_ASSIGN not in self.subtypes and
+             subtypes.DEFAULT_OR_NAMED_ASSIGN_ARG_LIST in self.subtypes and
+             subtypes.DEFAULT_OR_NAMED_ASSIGN not in previous_subtypes and
+             (not subtypes.PARAMETER_STOP in self.subtypes or
+              subtypes.PARAMETER_START in self.subtypes))
+            or  # if there is comment, the arg after it is the argname start
+            (not self.is_comment and self.previous_token and
+             self.previous_token.is_comment and
+             (subtypes.DEFAULT_OR_NAMED_ASSIGN_ARG_LIST in previous_subtypes or
+              subtypes.TYPED_NAME_ARG_LIST in self.subtypes or
+              subtypes.DEFAULT_OR_NAMED_ASSIGN_ARG_LIST in self.subtypes)))
diff --git a/yapf/yapflib/reformatter.py b/yapf/yapflib/reformatter.py
@@ -22,6 +22,7 @@
 from __future__ import unicode_literals
 
 import collections
+from distutils.errors import LinkError
 import heapq
 import re
 
@@ -102,6 +103,9 @@ def Reformat(llines, verify=False, lines=None):
     final_lines.append(lline)
     prev_line = lline
 
+  if style.Get('ALIGN_ARGUMENT_ASSIGNMENT'):
+    _AlignArgAssign(final_lines)
+
   _AlignTrailingComments(final_lines)
   return _FormatFinalLines(final_lines, verify)
 
@@ -394,6 +398,207 @@ def _AlignTrailingComments(final_lines):
       final_lines_index += 1
 
 
+def _AlignArgAssign(final_lines):
+  """Align the assign operators in a argument list to the same column"""
+  """NOTE One argument list of one function is on one logical line!
+     But funtion calls/argument lists can be in argument list.
+  """
+  final_lines_index = 0
+  while final_lines_index < len(final_lines):
+    line = final_lines[final_lines_index]
+    if line.disable:
+      final_lines_index += 1
+      continue
+
+    assert line.tokens
+    process_content = False
+
+    for tok in line.tokens:
+      if tok.is_argassign:
+
+        this_line = line
+        line_tokens = this_line.tokens
+
+        for open_index in range(len(line_tokens)):
+          line_tok = line_tokens[open_index]
+
+          if (line_tok.value == '(' and not line_tok.is_pseudo and
+              line_tok.next_token.formatted_whitespace_prefix.startswith('\n')):
+            index = open_index
+            # skip the comments in the beginning
+            index += 1
+            line_tok = line_tokens[index]
+            while not line_tok.is_argname_start and index < len(
+                line_tokens) - 1:
+              index += 1
+              line_tok = line_tokens[index]
+
+            # check if the argstart is on newline
+            if line_tok.is_argname_start and line_tok.formatted_whitespace_prefix.startswith(
+                '\n'):
+              first_arg_index = index
+              first_arg_column = len(
+                  line_tok.formatted_whitespace_prefix.lstrip('\n'))
+
+              closing = False
+              all_arg_name_lengths = []
+              arg_name_lengths = []
+              name_content = ''
+              arg_column = first_arg_column
+
+              # start with the first argument
+              # that has nextline prefix
+              while not closing:
+                # if there is a comment in between, save, reset and continue to calulate new alignment
+                if (style.Get('NEW_ALIGNMENT_AFTER_COMMENTLINE') and
+                    arg_name_lengths and line_tok.is_comment and
+                    line_tok.formatted_whitespace_prefix.startswith('\n')):
+                  all_arg_name_lengths.append(arg_name_lengths)
+                  arg_name_lengths = []
+                  index += 1
+                  line_tok = line_tokens[index]
+                  continue
+
+                prefix = line_tok.formatted_whitespace_prefix
+                newline_index = prefix.rfind('\n')
+
+                if newline_index != -1:
+                  if line_tok.is_argname_start:
+                    name_content = ''
+                    prefix = prefix[newline_index + 1:]
+                    arg_column = len(prefix)
+                  # if a typed arg name is so long
+                  # that there are newlines inside
+                  # only calulate the last line arg_name that has the assignment
+                  elif line_tok.is_argname:
+                    name_content = ''
+                    prefix = prefix[newline_index + 1:]
+                # if any argument not on newline
+                elif line_tok.is_argname_start:
+                  name_content = ''
+                  arg_column = line_tok.column
+                  # in case they are formatted into one line in final_line
+                  # but are put in separated lines in original codes
+                  if arg_column == first_arg_column:
+                    arg_column = line_tok.formatted_whitespace_prefix
+                # on the same argument level
+                if (line_tok.is_argname_start and arg_name_lengths and
+                    arg_column == first_arg_column):
+                  argname_end = line_tok
+                  while argname_end.is_argname:
+                    argname_end = argname_end.next_token
+                  # argument without assignment in between
+                  if not argname_end.is_argassign:
+                    all_arg_name_lengths.append(arg_name_lengths)
+                    arg_name_lengths = []
+                    index += 1
+                    line_tok = line_tokens[index]
+                    continue
+
+                if line_tok.is_argassign and arg_column == first_arg_column:
+                  arg_name_lengths.append(len(name_content))
+                elif line_tok.is_argname and arg_column == first_arg_column:
+                  name_content += '{}{}'.format(prefix, line_tok.value)
+                  # add up all token values before the arg assign operator
+
+                index += 1
+                if index < len(line_tokens):
+                  line_tok = line_tokens[index]
+                # when the matching closing bracket is never found
+                # due to edge cases where the closing bracket
+                # is not indented or dedented
+                else:
+                  all_arg_name_lengths.append(arg_name_lengths)
+                  break
+
+                # if there is a new object(list/tuple/dict) with its entries on newlines,
+                # save, reset and continue to calulate new alignment
+                if (line_tok.value in ['(', '[', '{'] and
+                    line_tok.next_token and
+                    line_tok.next_token.formatted_whitespace_prefix.startswith(
+                        '\n')):
+                  if arg_name_lengths:
+                    all_arg_name_lengths.append(arg_name_lengths)
+                  arg_name_lengths = []
+                  index += 1
+                  line_tok = line_tokens[index]
+                  continue
+
+                if line_tok.value == ')' and not line_tok.is_pseudo:
+                  if line_tok.formatted_whitespace_prefix.startswith('\n'):
+                    close_column = len(
+                        line_tok.formatted_whitespace_prefix.lstrip('\n'))
+                  else:
+                    close_column = line_tok.column
+                  if close_column < first_arg_column:
+                    if arg_name_lengths:
+                      all_arg_name_lengths.append(arg_name_lengths)
+                    closing = True
+
+              # update the alignment once one full arg list is processed
+              if all_arg_name_lengths:
+                # if argument list with only the first argument on newline
+                if len(all_arg_name_lengths) == 1 and len(
+                    all_arg_name_lengths[0]) == 1:
+                  continue
+                max_name_length = 0
+                all_arg_name_lengths_index = 0
+                arg_name_lengths = all_arg_name_lengths[
+                    all_arg_name_lengths_index]
+                max_name_length = max(arg_name_lengths or [0]) + 2
+                arg_lengths_index = 0
+                for token in line_tokens[first_arg_index:index]:
+                  if token.is_argassign:
+                    name_token = token.previous_token
+                    while name_token.is_argname and not name_token.is_argname_start:
+                      name_token = name_token.previous_token
+                    name_column = len(
+                        name_token.formatted_whitespace_prefix.lstrip('\n'))
+                    if name_column == first_arg_column:
+                      if all_arg_name_lengths_index < len(all_arg_name_lengths):
+                        if arg_lengths_index == len(arg_name_lengths):
+                          all_arg_name_lengths_index += 1
+                          arg_name_lengths = all_arg_name_lengths[
+                              all_arg_name_lengths_index]
+                          max_name_length = max(arg_name_lengths or [0]) + 2
+                          arg_lengths_index = 0
+
+                        if arg_lengths_index < len(arg_name_lengths):
+
+                          assert arg_name_lengths[
+                              arg_lengths_index] < max_name_length
+
+                          padded_spaces = ' ' * (
+                              max_name_length -
+                              arg_name_lengths[arg_lengths_index] - 1)
+                          arg_lengths_index += 1
+
+                          assign_content = '{}{}'.format(
+                              padded_spaces, token.value.strip())
+                          existing_whitespace_prefix = \
+                                token.formatted_whitespace_prefix.lstrip('\n')
+
+                          # in case the existing spaces are larger than padded spaces
+                          if (len(padded_spaces) == 1 or
+                              len(padded_spaces) > 1 and
+                              len(existing_whitespace_prefix)
+                              > len(padded_spaces)):
+                            token.whitespace_prefix = ''
+                          elif assign_content.startswith(
+                              existing_whitespace_prefix):
+                            assign_content = assign_content[
+                                len(existing_whitespace_prefix):]
+
+                          token.value = assign_content
+
+        final_lines_index += 1
+        process_content = True
+        break
+
+    if not process_content:
+      final_lines_index += 1
+
+
 def _FormatFinalLines(final_lines, verify):
   """Compose the final output from the finalized lines."""
   formatted_code = []