Skip to content

Commit

Permalink
Add first version of testdata generator from Markdown
Browse files Browse the repository at this point in the history
  • Loading branch information
etalian committed Aug 9, 2022
1 parent fbd707a commit 954549f
Show file tree
Hide file tree
Showing 5 changed files with 198 additions and 0 deletions.
5 changes: 5 additions & 0 deletions WORKSPACE
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,11 @@ pip_install(
requirements = "//github_tools:requirements.txt",
)

pip_install(
name = "py_deps_doctests",
requirements = "//docs:tests/requirements.txt",
)

###############################################################################
# C++ rules
###############################################################################
Expand Down
189 changes: 189 additions & 0 deletions docs/tests/md_to_testdata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
#!/usr/bin/env python3

"""Generate Explorer test cases from Markdown code snippets"""

__copyright__ = """
Part of the Carbon Language project, under the Apache License v2.0 with LLVM
Exceptions. See /LICENSE for license information.
SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
"""

import argparse
import glob
from html.parser import HTMLParser
import os
from pathlib import Path
from markdown import Markdown, Extension, markdownFromFile
from markdown.extensions.fenced_code import FencedCodeExtension
from markdown.postprocessors import Postprocessor
import re
from typing import Any, Dict, List, Tuple

TEST_HEADER = """
// RUN: %{explorer} %s 2>&1 | \\
// RUN: %{FileCheck} --match-full-lines --allow-unused-prefixes=false %s
// RUN: %{explorer} --parser_debug --trace_file=- %s 2>&1 | \\
// RUN: %{FileCheck} --match-full-lines --allow-unused-prefixes %s
// AUTOUPDATE: %{explorer} %s
"""
NAMED_SNIPPETS = {
"m": ["fn Main() -> i32 { return 0; }"],
"mo": ["fn Main() -> i32 {"],
"rc": ["return 0; }"],
"r": ["return 0;"],
"c": ["}"],
}
RE_TEST = re.compile(r"\s*test\s+")
RE_TEST_COMMAND = re.compile(r"""\s*(?P<command>
(?P<out_buf>[_])
|`(?P<out_code>[^`]+)`
|(?P<out_name>[\w_][\w\d_]*)
|[-](?P<del_line>[\d]+)([+](?P<del_lines>[\d]+))?
|[+](?P<ins_line>[\d]+)(`(?P<ins_code>[^`]+)`|(?P<ins_name>[\w_][\w\d_]*))
|[.]`(?P<dot_code>[^`]+)`
|[.](?P<dot_name>[\w_][\w\d_]*)
|(?P<dot_none>[.])
|[=](?P<cpy_name>[\w_][\w\d_]*)
)""", re.VERBOSE)
RE_DOTS = re.compile(r"[.]{3}")

def parse_test(text : str) -> List[Dict]:
if not (match := RE_TEST.match(text)):
return None
matches = []
while match := RE_TEST_COMMAND.match(text, match.end()):
matches.append(match.groupdict())
return matches

class DocsSnippetParser(HTMLParser):
def __init__(self, outdir: Path) -> None:
super().__init__()
self.outdir = outdir
self.comm_lineno = 0
self.code_lineno = 0
self.code_text = None
self.code_lines = None
self.snippets = NAMED_SNIPPETS
self.test_text = None
self.test_cmds = None
self.test_lines = None
self.test_index = 0
def handle_comment(self, data: str) -> None:
self.test_text = data
self.comm_lineno = self.getpos()[0]
#TODO: keep track of line numbers in original document for error reporting
def handle_starttag(self, tag: str, attrs: List[Tuple[str, str]]) -> None:
langs = [v for k, v in attrs if k == "class" and v.startswith("lang-")]
if tag == "code" and (not langs or "lang-carbon" in langs):
self.code_lineno = self.getpos()[0]
def handle_data(self, data: str) -> None:
#TODO: there is no error handling
if self.code_lineno > 0 and self.comm_lineno >= self.code_lineno - 2 \
and self.test_text and (test_cmds := parse_test(self.test_text)):
self.code_text = data
self.code_lines = self.code_text.splitlines()
self.test_cmds = test_cmds
self.test_lines = []
for cmd in self.test_cmds:
if cmd["out_buf"]:
self.test_lines.extend(self.code_lines)
elif cmd["out_code"]:
lines = cmd["out_code"].splitlines()
self.test_lines.extend(lines)
elif cmd["out_name"]:
self.test_lines.extend(self.snippets[cmd["out_name"]])
elif cmd["del_lines"]:
del_line = int(cmd["del_line"]) - 1
del_count = int(cmd["del_lines"])
del self.code_lines[del_line:del_line + del_count - 1]
elif cmd["del_line"]:
del_line = int(cmd["del_line"]) - 1
del self.code_lines[del_line]
elif cmd["ins_code"]:
ins_line = int(cmd["ins_line"]) - 1
lines = cmd["ins_code"].splitlines()
self.code_lines = self.code_lines[:ins_line] + lines + self.code_lines[:ins_line]
elif cmd["ins_name"]:
ins_line = int(cmd["ins_line"]) - 1
lines = self.snippets[cmd["ins_name"]]
self.code_lines = self.code_lines[:ins_line] + lines + self.code_lines[ins_line:]
elif cmd["dot_code"]:
code = cmd["dot_code"]
self.code_text = RE_DOTS.sub(code, self.code_text)
self.code_lines = self.code_text.splitlines()
elif cmd["dot_name"]:
code = self.snippets[cmd["dot_name"]]
self.code_text = RE_DOTS.sub(code, self.code_text)
self.code_lines = self.code_text.splitlines()
elif cmd["dot_none"]:
self.code_text = RE_DOTS.sub("", self.code_text)
self.code_lines = self.code_text.splitlines()
elif cmd["cpy_name"]:
name = cmd["cpy_name"]
self.snippets[name] = self.code_lines

def handle_endtag(self, tag: str) -> None:
if self.test_lines:
testpath = os.path.join(self.outdir, f"test_{self.test_index}.carbon")
with open(testpath, "w", encoding="utf-8") as testfile:
testfile.write(TEST_HEADER)
testfile.write(f"// CHECK: result: 0") #TODO: make configurable
testfile.write("\n")
testfile.write("package ExplorerTest api;\n")
testfile.write("\n")
testfile.writelines(map(lambda l: l + "\n", self.test_lines))
testfile.write("\n")
self.test_index += 1
self.code_lineno = 0
self.code_text = None
self.code_lines = None
self.test_text = None
self.test_data = None
self.test_lines = None

class DocsSnippetProcessor(Postprocessor):
def __init__(self, md, outdir) -> None:
self.md = md
self.outdir = outdir
def run(self, text: str) -> str:
parser = DocsSnippetParser(self.outdir)
parser.feed(text)
return text

class DocsSnippetToTest(Extension):
def __init__(self, path, **kwargs: Any) -> None:
super().__init__(**kwargs)
self.path = path
def extendMarkdown(self, md: Markdown) -> None:
md.postprocessors.register(DocsSnippetProcessor(md, self.path), "md_to_lit", 0)
md.registerExtension(self)

def main() -> None:
arg_parser = argparse.ArgumentParser(description=__doc__)
arg_parser.add_argument(
"--debug", help="Test generator commands to debug."
)
arg_parser.add_argument(
"--input", help="Markdown file to parse."
)
arg_parser.add_argument(
"--output", help="Output directory. NOTE: contents will be cleared!"
)
args = arg_parser.parse_args()
if args.debug:
if matches := parse_test(args.debug):
for match in matches:
print({k: v for (k, v) in match.items() if v})
exit(0)

outpath = Path(args.output).resolve()
os.makedirs(outpath, exist_ok=True)
outfiles = glob.glob(f"{args.output}/*.carbon")
for f in outfiles:
os.remove(f)

md_extensions = [FencedCodeExtension(lang_prefix="lang-"), DocsSnippetToTest(outpath)]
markdownFromFile(input=args.input, extensions=md_extensions, output=os.devnull)

if __name__ == "__main__":
main()
1 change: 1 addition & 0 deletions docs/tests/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
markdown
1 change: 1 addition & 0 deletions explorer/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ cc_binary(
":main",
"@llvm-project//llvm:Support",
],
visibility = ["//docs:__pkg__"] # for doctests
)

py_binary(
Expand Down
2 changes: 2 additions & 0 deletions explorer/testdata/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,5 @@ filegroup(
srcs = glob(["**/*.carbon"]),
visibility = ["//visibility:public"],
)

exports_files(["lit.cfg.py"]) # for doctests

0 comments on commit 954549f

Please sign in to comment.