From 8de59c1bb9fdcea69ff6e6357972ef1b75b71721 Mon Sep 17 00:00:00 2001 From: Jacob Bower <1978924+jbower-fb@users.noreply.github.com> Date: Fri, 3 Mar 2023 20:59:21 -0800 Subject: [PATCH] gh-102021 : Allow multiple input files for interpreter loop generator (#102022) The input files no longer use `-i`. --- Makefile.pre.in | 4 +- Python/generated_cases.c.h | 3 +- Python/opcode_metadata.h | 5 +- Tools/cases_generator/generate_cases.py | 115 ++++++++++++++++++------ Tools/cases_generator/lexer.py | 2 +- Tools/cases_generator/parser.py | 17 ++-- 6 files changed, 105 insertions(+), 41 deletions(-) diff --git a/Makefile.pre.in b/Makefile.pre.in index b12a1bc060af90..1a1853bf3d7871 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -1485,9 +1485,9 @@ regen-cases: PYTHONPATH=$(srcdir)/Tools/cases_generator \ $(PYTHON_FOR_REGEN) \ $(srcdir)/Tools/cases_generator/generate_cases.py \ - -i $(srcdir)/Python/bytecodes.c \ -o $(srcdir)/Python/generated_cases.c.h.new \ - -m $(srcdir)/Python/opcode_metadata.h.new + -m $(srcdir)/Python/opcode_metadata.h.new \ + $(srcdir)/Python/bytecodes.c $(UPDATE_FILE) $(srcdir)/Python/generated_cases.c.h $(srcdir)/Python/generated_cases.c.h.new $(UPDATE_FILE) $(srcdir)/Python/opcode_metadata.h $(srcdir)/Python/opcode_metadata.h.new diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index f59f7c17451c17..82e18505b0d430 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -1,5 +1,6 @@ // This file is generated by Tools/cases_generator/generate_cases.py -// from Python/bytecodes.c +// from: +// Python/bytecodes.c // Do not edit! TARGET(NOP) { diff --git a/Python/opcode_metadata.h b/Python/opcode_metadata.h index f27906a3e349eb..67cb0088c3b789 100644 --- a/Python/opcode_metadata.h +++ b/Python/opcode_metadata.h @@ -1,5 +1,6 @@ -// This file is generated by Tools/cases_generator/generate_cases.py --metadata -// from Python/bytecodes.c +// This file is generated by Tools/cases_generator/generate_cases.py +// from: +// Python/bytecodes.c // Do not edit! #ifndef NEED_OPCODE_TABLES diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py index b760172974c8a5..25cf75e4c1c490 100644 --- a/Tools/cases_generator/generate_cases.py +++ b/Tools/cases_generator/generate_cases.py @@ -37,15 +37,15 @@ description="Generate the code for the interpreter switch.", formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) -arg_parser.add_argument( - "-i", "--input", type=str, help="Instruction definitions", default=DEFAULT_INPUT -) arg_parser.add_argument( "-o", "--output", type=str, help="Generated code", default=DEFAULT_OUTPUT ) arg_parser.add_argument( "-m", "--metadata", type=str, help="Generated metadata", default=DEFAULT_METADATA_OUTPUT ) +arg_parser.add_argument( + "input", nargs=argparse.REMAINDER, help="Instruction definition file(s)" +) def effect_size(effect: StackEffect) -> tuple[int, str]: @@ -485,6 +485,11 @@ class MacroInstruction(SuperOrMacroInstruction): parts: list[Component | parser.CacheEffect] +@dataclasses.dataclass +class OverriddenInstructionPlaceHolder: + name: str + + AnyInstruction = Instruction | SuperInstruction | MacroInstruction INSTR_FMT_PREFIX = "INSTR_FMT_" @@ -492,32 +497,33 @@ class MacroInstruction(SuperOrMacroInstruction): class Analyzer: """Parse input, analyze it, and write to output.""" - filename: str + input_filenames: list[str] output_filename: str metadata_filename: str - src: str errors: int = 0 - def __init__(self, filename: str, output_filename: str, metadata_filename: str): + def __init__(self, input_filenames: list[str], output_filename: str, metadata_filename: str): """Read the input file.""" - self.filename = filename + self.input_filenames = input_filenames self.output_filename = output_filename self.metadata_filename = metadata_filename - with open(filename) as f: - self.src = f.read() def error(self, msg: str, node: parser.Node) -> None: lineno = 0 + filename = "" if context := node.context: + filename = context.owner.filename # Use line number of first non-comment in the node for token in context.owner.tokens[context.begin : context.end]: lineno = token.line if token.kind != "COMMENT": break - print(f"{self.filename}:{lineno}: {msg}", file=sys.stderr) + print(f"{filename}:{lineno}: {msg}", file=sys.stderr) self.errors += 1 - everything: list[parser.InstDef | parser.Super | parser.Macro] + everything: list[ + parser.InstDef | parser.Super | parser.Macro | OverriddenInstructionPlaceHolder + ] instrs: dict[str, Instruction] # Includes ops supers: dict[str, parser.Super] super_instrs: dict[str, SuperInstruction] @@ -531,7 +537,31 @@ def parse(self) -> None: We only want the parser to see the stuff between the begin and end markers. """ - psr = parser.Parser(self.src, filename=self.filename) + + self.everything = [] + self.instrs = {} + self.supers = {} + self.macros = {} + self.families = {} + + instrs_idx: dict[str, int] = dict() + + for filename in self.input_filenames: + self.parse_file(filename, instrs_idx) + + files = " + ".join(self.input_filenames) + print( + f"Read {len(self.instrs)} instructions/ops, " + f"{len(self.supers)} supers, {len(self.macros)} macros, " + f"and {len(self.families)} families from {files}", + file=sys.stderr, + ) + + def parse_file(self, filename: str, instrs_idx: dict[str, int]) -> None: + with open(filename) as file: + src = file.read() + + psr = parser.Parser(src, filename=filename) # Skip until begin marker while tkn := psr.next(raw=True): @@ -551,16 +581,27 @@ def parse(self) -> None: # Parse from start psr.setpos(start) - self.everything = [] - self.instrs = {} - self.supers = {} - self.macros = {} - self.families = {} thing: parser.InstDef | parser.Super | parser.Macro | parser.Family | None + thing_first_token = psr.peek() while thing := psr.definition(): match thing: case parser.InstDef(name=name): + if name in self.instrs: + if not thing.override: + raise psr.make_syntax_error( + f"Duplicate definition of '{name}' @ {thing.context} " + f"previous definition @ {self.instrs[name].inst.context}", + thing_first_token, + ) + self.everything[instrs_idx[name]] = OverriddenInstructionPlaceHolder(name=name) + if name not in self.instrs and thing.override: + raise psr.make_syntax_error( + f"Definition of '{name}' @ {thing.context} is supposed to be " + "an override but no previous definition exists.", + thing_first_token, + ) self.instrs[name] = Instruction(thing) + instrs_idx[name] = len(self.everything) self.everything.append(thing) case parser.Super(name): self.supers[name] = thing @@ -573,14 +614,7 @@ def parse(self) -> None: case _: typing.assert_never(thing) if not psr.eof(): - raise psr.make_syntax_error("Extra stuff at the end") - - print( - f"Read {len(self.instrs)} instructions/ops, " - f"{len(self.supers)} supers, {len(self.macros)} macros, " - f"and {len(self.families)} families from {self.filename}", - file=sys.stderr, - ) + raise psr.make_syntax_error(f"Extra stuff at the end of {filename}") def analyze(self) -> None: """Analyze the inputs. @@ -879,6 +913,8 @@ def write_stack_effect_functions(self) -> None: popped_data: list[tuple[AnyInstruction, str]] = [] pushed_data: list[tuple[AnyInstruction, str]] = [] for thing in self.everything: + if isinstance(thing, OverriddenInstructionPlaceHolder): + continue instr, popped, pushed = self.get_stack_effect_info(thing) if instr is not None: popped_data.append((instr, popped)) @@ -907,6 +943,13 @@ def write_function( write_function("pushed", pushed_data) self.out.emit("") + def from_source_files(self) -> str: + paths = "\n// ".join( + os.path.relpath(filename, ROOT).replace(os.path.sep, posixpath.sep) + for filename in self.input_filenames + ) + return f"// from:\n// {paths}\n" + def write_metadata(self) -> None: """Write instruction metadata to output file.""" @@ -914,6 +957,8 @@ def write_metadata(self) -> None: all_formats: set[str] = set() for thing in self.everything: match thing: + case OverriddenInstructionPlaceHolder(): + continue case parser.InstDef(): format = self.instrs[thing.name].instr_fmt case parser.Super(): @@ -928,8 +973,8 @@ def write_metadata(self) -> None: with open(self.metadata_filename, "w") as f: # Write provenance header - f.write(f"// This file is generated by {THIS} --metadata\n") - f.write(f"// from {os.path.relpath(self.filename, ROOT).replace(os.path.sep, posixpath.sep)}\n") + f.write(f"// This file is generated by {THIS}\n") + f.write(self.from_source_files()) f.write(f"// Do not edit!\n") # Create formatter; the rest of the code uses this @@ -959,6 +1004,8 @@ def write_metadata(self) -> None: # Write metadata for each instruction for thing in self.everything: match thing: + case OverriddenInstructionPlaceHolder(): + continue case parser.InstDef(): if thing.kind != "op": self.write_metadata_for_inst(self.instrs[thing.name]) @@ -1008,7 +1055,7 @@ def write_instructions(self) -> None: with open(self.output_filename, "w") as f: # Write provenance header f.write(f"// This file is generated by {THIS}\n") - f.write(f"// from {os.path.relpath(self.filename, ROOT).replace(os.path.sep, posixpath.sep)}\n") + f.write(self.from_source_files()) f.write(f"// Do not edit!\n") # Create formatter; the rest of the code uses this @@ -1020,6 +1067,8 @@ def write_instructions(self) -> None: n_macros = 0 for thing in self.everything: match thing: + case OverriddenInstructionPlaceHolder(): + self.write_overridden_instr_place_holder(thing) case parser.InstDef(): if thing.kind != "op": n_instrs += 1 @@ -1039,9 +1088,17 @@ def write_instructions(self) -> None: file=sys.stderr, ) + def write_overridden_instr_place_holder(self, + place_holder: OverriddenInstructionPlaceHolder) -> None: + self.out.emit("") + self.out.emit( + f"// TARGET({place_holder.name}) overridden by later definition") + def write_instr(self, instr: Instruction) -> None: name = instr.name self.out.emit("") + if instr.inst.override: + self.out.emit("// Override") with self.out.block(f"TARGET({name})"): if instr.predicted: self.out.emit(f"PREDICTED({name});") @@ -1190,6 +1247,8 @@ def variable_used(node: parser.Node, name: str) -> bool: def main(): """Parse command line, parse input, analyze, write output.""" args = arg_parser.parse_args() # Prints message and sys.exit(2) on error + if len(args.input) == 0: + args.input.append(DEFAULT_INPUT) a = Analyzer(args.input, args.output, args.metadata) # Raises OSError if input unreadable a.parse() # Raises SyntaxError on failure a.analyze() # Prints messages and sets a.errors on failure diff --git a/Tools/cases_generator/lexer.py b/Tools/cases_generator/lexer.py index 39b6a212a67b1c..1c70d1c4089e4e 100644 --- a/Tools/cases_generator/lexer.py +++ b/Tools/cases_generator/lexer.py @@ -119,7 +119,7 @@ def choice(*opts): kwds = ( 'AUTO', 'BREAK', 'CASE', 'CHAR', 'CONST', 'CONTINUE', 'DEFAULT', 'DO', 'DOUBLE', 'ELSE', 'ENUM', 'EXTERN', - 'FLOAT', 'FOR', 'GOTO', 'IF', 'INLINE', 'INT', 'LONG', + 'FLOAT', 'FOR', 'GOTO', 'IF', 'INLINE', 'INT', 'LONG', 'OVERRIDE', 'REGISTER', 'OFFSETOF', 'RESTRICT', 'RETURN', 'SHORT', 'SIGNED', 'SIZEOF', 'STATIC', 'STRUCT', 'SWITCH', 'TYPEDEF', 'UNION', 'UNSIGNED', 'VOID', diff --git a/Tools/cases_generator/parser.py b/Tools/cases_generator/parser.py index c7c8d8af6b7318..7bf45a350bc84b 100644 --- a/Tools/cases_generator/parser.py +++ b/Tools/cases_generator/parser.py @@ -33,7 +33,7 @@ class Context(NamedTuple): owner: PLexer def __repr__(self): - return f"<{self.begin}-{self.end}>" + return f"<{self.owner.filename}: {self.begin}-{self.end}>" @dataclass @@ -99,6 +99,7 @@ class OpName(Node): @dataclass class InstHeader(Node): + override: bool register: bool kind: Literal["inst", "op", "legacy"] # Legacy means no (inputs -- outputs) name: str @@ -108,6 +109,7 @@ class InstHeader(Node): @dataclass class InstDef(Node): + override: bool register: bool kind: Literal["inst", "op", "legacy"] name: str @@ -152,17 +154,18 @@ def inst_def(self) -> InstDef | None: if hdr := self.inst_header(): if block := self.block(): return InstDef( - hdr.register, hdr.kind, hdr.name, hdr.inputs, hdr.outputs, block + hdr.override, hdr.register, hdr.kind, hdr.name, hdr.inputs, hdr.outputs, block ) raise self.make_syntax_error("Expected block") return None @contextual def inst_header(self) -> InstHeader | None: - # inst(NAME) - # | [register] inst(NAME, (inputs -- outputs)) - # | [register] op(NAME, (inputs -- outputs)) + # [override] inst(NAME) + # | [override] [register] inst(NAME, (inputs -- outputs)) + # | [override] [register] op(NAME, (inputs -- outputs)) # TODO: Make INST a keyword in the lexer. + override = bool(self.expect(lx.OVERRIDE)) register = bool(self.expect(lx.REGISTER)) if (tkn := self.expect(lx.IDENTIFIER)) and (kind := tkn.text) in ("inst", "op"): if self.expect(lx.LPAREN) and (tkn := self.expect(lx.IDENTIFIER)): @@ -171,10 +174,10 @@ def inst_header(self) -> InstHeader | None: inp, outp = self.io_effect() if self.expect(lx.RPAREN): if (tkn := self.peek()) and tkn.kind == lx.LBRACE: - return InstHeader(register, kind, name, inp, outp) + return InstHeader(override, register, kind, name, inp, outp) elif self.expect(lx.RPAREN) and kind == "inst": # No legacy stack effect if kind is "op". - return InstHeader(register, "legacy", name, [], []) + return InstHeader(override, register, "legacy", name, [], []) return None def io_effect(self) -> tuple[list[InputEffect], list[OutputEffect]]: