Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add migrate_to_toml_config.py script to automatically update INI config files to TOML #9054

Merged
merged 4 commits into from
Feb 7, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions build-support/bin/ci.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,7 @@ def get_listed_targets(filename: str) -> TargetSet:
f"--tag={'-' if test_type == TestType.unit else '+'}integration",
"filter",
"--type=python_tests",
"build-support::",
"src/python::",
"tests/python::",
"contrib::",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def create_parser() -> argparse.ArgumentParser:
description='Modernize BUILD files to no longer use globs, rglobs, and zglobs.',
)
parser.add_argument(
'folders', type=Path, nargs='*',
'folders', type=Path, nargs='+',
help="Folders to recursively search for `BUILD` files",
)
parser.add_argument(
Expand Down
127 changes: 127 additions & 0 deletions build-support/migration-support/migrate_to_toml_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
#!/usr/bin/env python3
# Copyright 2020 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

"""A script to automatically convert an INI Pants config file to TOML. There will still likely be
some issues remaining which require manual fixes, but this script will automate most of the tedium.

Run `python3 migrate_to_toml_config.py --help`.
"""

import argparse
import logging
import re
Eric-Arellano marked this conversation as resolved.
Show resolved Hide resolved
from pathlib import Path
from typing import Dict, List


def main() -> None:
args = create_parser().parse_args()
updates: Dict[Path, List[str]] = {}
for config in args.files:
if config.suffix not in [".ini", ".cfg"]:
logging.warning(f"This script may only be run on INI files. Skipping {config}.")
continue
new_path = Path(config.parent, f"{config.stem}.toml")
if new_path.exists():
logging.warning(f"{new_path} already exists. Skipping conversion of {config}.")
continue
new_config_content = generate_new_config(config)
updates[new_path] = new_config_content
for new_path, new_content in updates.items():
joined_new_content = '\n'.join(new_content) + "\n"
if args.preview:
print(f"Would create {new_path} with the following content:\n\n{joined_new_content}")
else:
logging.info(
f"Created {new_path}. There are likely some remaining issues that need manual "
"attention. Please copy the file into https://www.toml-lint.com or open with your editor "
"to fix any remaining issues."
)
new_path.write_text(joined_new_content)


def create_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(
description='Convert INI config files to TOML config files.',
)
parser.add_argument(
'files', type=Path, nargs='*',
help="Config files to convert.",
)
parser.add_argument(
'-p', '--preview', action='store_true',
help="Output to stdout rather than creating the new TOML config file(s).",
)
return parser


def update_primitive_value(original: str) -> str:
if original in ["true", "True"]:
return "true"
if original in ["false", "False"]:
return "false"

try:
return str(int(original))
except ValueError:
pass

try:
return str(float(original))
except ValueError:
pass

return f'"{original}"'


def generate_new_config(config: Path) -> List[str]:
original_text = config.read_text()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You were entirely correct that I would love the regex, and I am aware that this is an 80% script, but it seems like using a python ini config parser wouldn't be a huge amount of effort, because we'll then be working with parsed objects instead of raw strings. Is there a reason we decided to go the regex route here over an ini parser? I would love to toss up an example ini -> toml parser or pair on it if that seems like the right way to go.

original_text_lines = original_text.splitlines()
updated_text_lines = original_text_lines.copy()

for i, line in enumerate(original_text_lines):
option_regex = r"(?P<option>[a-zA-Z0-9_]+)"
before_value_regex = rf"\s*{option_regex}\s*[:=]\s*"
valid_value_characters = r"a-zA-Z0-9_.@!%\=\>\<\-\(\)\/"
value_regex = rf"(?P<value>[{valid_value_characters}]+)"
parsed_line = re.match(
rf"{before_value_regex}{value_regex}\s*$", line,
)

if parsed_line:
option, value = parsed_line.groups()
updated_text_lines[i] = f"{option} = {update_primitive_value(value)}"
continue

# Check if it's a one-line list value
list_value_regex = rf"(?P<list>[\+\-]?\[[{valid_value_characters},\s\'\"]*\])"
parsed_list_line = re.match(rf"{before_value_regex}{list_value_regex}\s*$", line)
if parsed_list_line:
option, value = parsed_list_line.groups()
if value.startswith("+"):
updated_line = f"{option}.add = {value[1:]}"
elif value.startswith("-"):
updated_line = f"{option}.remove = {value[1:]}"
else:
updated_line = f"{option} = {value}"
updated_text_lines[i] = updated_line
continue

# Check if it's a one-line dict value
dict_value_regex = rf"(?P<dict>{{[{valid_value_characters},:\s\'\"]*}})"
parsed_dict_line = re.match(rf"{before_value_regex}{dict_value_regex}\s*$", line)
if parsed_dict_line:
option, value = parsed_dict_line.groups()
updated_text_lines[i] = f'{option} = """{value}"""'
continue

Copy link
Contributor

@blorente blorente Feb 5, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe I'll go on a limb here and suggest some plaintext code:
This is supposed to go after the last if and before the return

Suggested change
print(f'We couldn't automatically convert line {i}: {line}. Please refer to https://github.com/pantsbuild/pants/pull/9054 for a description of what might be the case.', file=sys.stderr)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you for the code snippet!

The one challenge with this snippet is that it will trigger on every non-option line, such as every single one of these lines, except for the very last one:

# Comment not attached to any option
[cache.java]
args: [
     'foo',
     'bar'
  ]


next_option: foo

I think that's more output than we would want. We could, of course, teach the script how to handle those different things like what a section header looks like, but then we're getting to the complexity I wanted to avoid.

--

Favor: can you please pull down this PR and run build-support/migration-support/migrate_to_toml_config.py pants.ini pants.travis-ci.ini pants.remote.ini, then open up the new TOML files in your editor? (Or, do this on Twitter's config files).

I suspect you'll find that there aren't many remaining issues and that those errors are fairly intuitive to fix, thanks to the editor warnings. This is why I'm pushing back on going from 80-100%; after trying out the script, it doesn't seem worth it to make it even better.

@benjyw's point is that we don't even need to provide a script in the first place - it's not very difficult to convert from INI to TOML, only a little tedious. We're providing this script as a courtesy to users, and I don't think we need to have it be perfect for them to be happy.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I still think I'd prefer to be overly verbose, but I agree that nobody is going to die for figuring out 20 manual changes. Particularly, because "users" here most likely means the same people that are seeing these changes happen, not actual end-users.

So not going to block on that, thanks for putting the effort to consider this point :)

return updated_text_lines


if __name__ == '__main__':
logging.basicConfig(format="[%(levelname)s]: %(message)s", level=logging.INFO)
try:
main()
except KeyboardInterrupt:
pass
228 changes: 228 additions & 0 deletions build-support/migration-support/migrate_to_toml_config_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,228 @@
# Copyright 2020 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

from pathlib import Path
from textwrap import dedent

from migrate_to_toml_config import generate_new_config

from pants.util.contextutil import temporary_dir


def assert_rewrite(*, original: str, expected: str) -> None:
with temporary_dir() as tmpdir:
build = Path(tmpdir, "pants.ini")
build.write_text(original)
result = generate_new_config(build)
assert result == expected.splitlines()


def test_fully_automatable_config() -> None:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you for writing great tests for this script and acknowledging that everything we ship to users should be tested!!! This makes me happy.

"""We should be able to safely convert all of this config without any issues."""
assert_rewrite(
original=dedent(
"""\
[GLOBAL]
bool_opt1: false
bool_opt2: True
int_opt: 10
float_opt: 5.0

[str_values]
normal: .isort.cfg
version: isort>=4.8
path: /usr/bin/test.txt
fromfile: @build-support/example.txt
interpolation: %(foo)s/example
"""
),
expected=dedent(
"""\
[GLOBAL]
bool_opt1 = false
bool_opt2 = true
int_opt = 10
float_opt = 5.0

[str_values]
normal = ".isort.cfg"
version = "isort>=4.8"
path = "/usr/bin/test.txt"
fromfile = "@build-support/example.txt"
interpolation = "%(foo)s/example"
"""
)
)


def test_different_key_value_symbols() -> None:
"""You can use both `:` or `=` in INI for key-value pairs."""
assert_rewrite(
original=dedent(
"""\
[GLOBAL]
o1: a
o2: a
o3 : a
o4 : a
o5 :a
o6 :a
o7= a
o8= a
o9 = a
o10 = a
o11 =a
o12 =a
"""
),
expected=dedent(
"""\
[GLOBAL]
o1 = "a"
o2 = "a"
o3 = "a"
o4 = "a"
o5 = "a"
o6 = "a"
o7 = "a"
o8 = "a"
o9 = "a"
o10 = "a"
o11 = "a"
o12 = "a"
"""
)
)


def test_comments() -> None:
"""We don't mess with comments."""
assert_rewrite(
original=dedent(
"""\
[GLOBAL]
bool_opt1: False # Good riddance!
bool_opt2: True
int_opt: 10 ; semicolons matter too
# commented_out: 10
; commented_out: 10

# Comments on new lines should be preserved
; Semicolon comments should also be preserved
[isort] # comments on section headers shouldn't matter because we don't convert sections
config: .isort.cfg
"""
),
expected=dedent(
"""\
[GLOBAL]
bool_opt1: False # Good riddance!
bool_opt2 = true
int_opt: 10 ; semicolons matter too
# commented_out: 10
; commented_out: 10

# Comments on new lines should be preserved
; Semicolon comments should also be preserved
[isort] # comments on section headers shouldn't matter because we don't convert sections
config = ".isort.cfg"
"""
)
)


def test_list_options() -> None:
"""We can safely update one-line lists.

The list members will already be correctly quoted for us. All that we need to update is the
option->key symbol and simple `+` adds and `-` removes.
"""
assert_rewrite(
original=dedent(
"""\
[GLOBAL]
l1: []
l2: [0, 1]
l3: ["a", "b"]
l4: ['a', 'b']
l5: [ "a", 'b' ]
l6: +["a", "b"]
l7: -["x", "y"]
l8: +["a", "b"],-["x", "y"]
l9: [[0], [1]]
l10: [0, 1] # comment
l11: [0, 1] ; comment
"""
),
expected=dedent(
"""\
[GLOBAL]
l1 = []
l2 = [0, 1]
l3 = ["a", "b"]
l4 = ['a', 'b']
l5 = [ "a", 'b' ]
l6.add = ["a", "b"]
l7.remove = ["x", "y"]
l8: +["a", "b"],-["x", "y"]
l9: [[0], [1]]
l10: [0, 1] # comment
l11: [0, 1] ; comment
"""
)
)


def test_dict_options() -> None:
"""We can safely preserve one-line dict values, which only need to be wrapped in quotes to work
properly.
"""
assert_rewrite(
original=dedent(
"""\
[GLOBAL]
d1: {}
d2: {"a": 0}
d3: {'a': 0}
d4: { "a": 0, "b: 0" }
d5: {"a": {"nested": 0}}
d6: {"a": 0} # comment
d7: {"a": 0} ; comment
"""
),
expected=dedent(
"""\
[GLOBAL]
d1 = \"""{}\"""
d2 = \"""{"a": 0}\"""
d3 = \"""{'a': 0}\"""
d4 = \"""{ "a": 0, "b: 0" }\"""
d5: {"a": {"nested": 0}}
d6: {"a": 0} # comment
d7: {"a": 0} ; comment
"""
)
)


def test_multiline_options_ignored() -> None:
"""Don't mess with multiline options, which are too difficult to get right."""
original = dedent(
"""\
[GLOBAL]
multiline_string: in a galaxy far,
far, away...

l1: [
'foo',
]

l2: ['foo',
'bar']

d: {
'a': 0,
}
"""
)
assert_rewrite(original=original, expected=original)