From 242ee8ac18f48ddfb7997dab4fb62016e115823a Mon Sep 17 00:00:00 2001 From: Andy C Date: Sun, 24 Nov 2024 16:32:05 -0500 Subject: [PATCH] [ASDL] Debug code for hnode_t blowup, and doc_t blowup It's roughly a 10x blowup, and it's linear, not quadratic or worse Also add some tests for tuple return values. --- asdl/format.py | 89 +++++++++++++++++++++++++++- display/pretty-benchmark.sh | 10 +++- display/pretty.asdl | 2 +- mycpp/cppgen_pass.py | 8 +-- mycpp/examples/tuple_return_value.py | 29 +++++++++ 5 files changed, 127 insertions(+), 11 deletions(-) diff --git a/asdl/format.py b/asdl/format.py index 392f95f475..f337da291a 100644 --- a/asdl/format.py +++ b/asdl/format.py @@ -1,13 +1,14 @@ """ format.py -- Pretty print an ASDL data structure. """ -from _devbuild.gen.hnode_asdl import hnode_t +from _devbuild.gen.hnode_asdl import hnode, hnode_e, hnode_t +from _devbuild.gen.pretty_asdl import doc, doc_e, doc_t, MeasuredDoc from display import pp_hnode from display import pretty from mycpp import mylib -from mycpp.mylib import log +from mycpp.mylib import log, tagswitch -from typing import Any, Optional +from typing import Any, Optional, cast _ = log @@ -21,10 +22,89 @@ def PrettyPrint(obj, f=None): HNodePrettyPrint(tree, f) +def _HNodeCount(h): + # type: (hnode_t) -> int + """ + Return the size of the tree + """ + UP_h = h + with tagswitch(h) as case: + if case(hnode_e.AlreadySeen): + return 1 + + elif case(hnode_e.Leaf): + return 1 + + elif case(hnode_e.Array): + h = cast(hnode.Array, UP_h) + n = 0 + for child in h.children: + n += _HNodeCount(child) + return n + + elif case(hnode_e.Record): + h = cast(hnode.Record, UP_h) + n = 0 + for field in h.fields: + n += _HNodeCount(field.val) + + if h.unnamed_fields is not None: + for child in h.unnamed_fields: + n += _HNodeCount(child) + return n + + else: + raise AssertionError() + + +def _DocCount(d): + # type: (doc_t) -> int + """ + Return the size of the tree + """ + UP_d = d + with tagswitch(d) as case: + if case(doc_e.Break): + return 1 + + elif case(doc_e.Text): + return 1 + + elif case(doc_e.Indent): + d = cast(doc.Indent, UP_d) + return _DocCount(d.mdoc.doc) + + elif case(doc_e.Group): + d = cast(MeasuredDoc, UP_d) + return _DocCount(d.doc) + + elif case(doc_e.Flat): + d = cast(doc.Flat, UP_d) + return _DocCount(d.mdoc.doc) + + elif case(doc_e.IfFlat): + d = cast(doc.IfFlat, UP_d) + return _DocCount(d.flat_mdoc.doc) + _DocCount(d.nonflat_mdoc.doc) + + elif case(doc_e.Concat): + d = cast(doc.Concat, UP_d) + n = 0 + for mdoc in d.mdocs: + n += _DocCount(mdoc.doc) + return n + + else: + raise AssertionError() + + def _HNodePrettyPrint(perf_stats, node, f, max_width=80): # type: (bool, hnode_t, mylib.Writer, int) -> None + mylib.MaybeCollect() if perf_stats: + log('___ HNODE COUNT %d', _HNodeCount(node)) + log('') + log('___ GC: after hnode_t conversion') mylib.PrintGcStats() log('') @@ -37,6 +117,9 @@ def _HNodePrettyPrint(perf_stats, node, f, max_width=80): mylib.MaybeCollect() if perf_stats: + log('___ DOC COUNT %d', _DocCount(doc)) + log('') + log('___ GC: after doc_t conversion') mylib.PrintGcStats() log('') diff --git a/display/pretty-benchmark.sh b/display/pretty-benchmark.sh index 0e6539ff3d..637b15d9ca 100755 --- a/display/pretty-benchmark.sh +++ b/display/pretty-benchmark.sh @@ -55,16 +55,19 @@ compare() { } gc-stats() { - local osh=_bin/cxx-opt/osh + #local osh=_bin/cxx-opt/osh + local osh=_bin/cxx-asan/osh ninja $osh # We should be doing some big GCs here export _OILS_GC_VERBOSE=1 + # 41 KB file + #for file in benchmarks/testdata/functions; do # 615K file - for file in benchmarks/testdata/configure; do + #for file in benchmarks/testdata/configure; do # 1.7 MB file - #for file in benchmarks/testdata/configure-coreutils; do + for file in benchmarks/testdata/configure-coreutils; do local fmt=__perf echo "___ parsing and pretty printing $file" @@ -73,6 +76,7 @@ gc-stats() { # even after adding GC # - max RSS is 878 MB, on configure + # - other the other hand, the output is just 30 MB (30,370,809 bytes) # - max RSS is 2,386 MB, on configure-coreutils /usr/bin/time --format '*** elapsed %e, max RSS %M' -- \ $osh --ast-format $fmt --tool syntax-tree $file | wc --bytes diff --git a/display/pretty.asdl b/display/pretty.asdl index 6b7f7650fd..8ef8af8405 100644 --- a/display/pretty.asdl +++ b/display/pretty.asdl @@ -19,10 +19,10 @@ module pretty Break(str string) | Text(str string) | Indent(int indent, MeasuredDoc mdoc) - | Concat(List[MeasuredDoc] mdocs) | Group %MeasuredDoc | Flat(MeasuredDoc mdoc) | IfFlat(MeasuredDoc flat_mdoc, MeasuredDoc nonflat_mdoc) + | Concat(List[MeasuredDoc] mdocs) # Optimizing allocations # - Concat can be a List subtype diff --git a/mycpp/cppgen_pass.py b/mycpp/cppgen_pass.py index 523899c4d5..d85e9d4c42 100644 --- a/mycpp/cppgen_pass.py +++ b/mycpp/cppgen_pass.py @@ -401,13 +401,13 @@ def GetCType(t, param=False, local=False): def GetCReturnType(t) -> Tuple[str, bool, Optional[str]]: """ - Returns a C string, whether the tuple-by-value optimization was applied, and - the C type of an extra output param if the function is a generator. - """ + Returns a C string, whether the tuple-by-value optimization was applied, + and the C type of an extra output param if the function is a generator. + """ c_ret_type = GetCType(t) - # Optimization: Return tupels BY VALUE + # Optimization: Return tuples BY VALUE if isinstance(t, TupleType): assert c_ret_type.endswith('*') return c_ret_type[:-1], True, None diff --git a/mycpp/examples/tuple_return_value.py b/mycpp/examples/tuple_return_value.py index 4f10b5bd7c..41d588a349 100755 --- a/mycpp/examples/tuple_return_value.py +++ b/mycpp/examples/tuple_return_value.py @@ -21,12 +21,41 @@ def f(x): return i, s[1:] +def g(t): + # type: (Tuple[int, int]) -> Tuple[int, str] + #x = t[0] + t[1] + t0, t1 = t + return t0 + t1, 'zzz' + + +def identity(t): + # type: (Tuple[int, int]) -> Tuple[int, int] + + # doesn't work + #return t + + # This works + a, b = t + return a, b + + def run_tests(): # type: () -> None i, s = f(0) log("i = %d", i) log("s = %s", s) + log('') + + i, s = g((3, 4)) + log("i = %d", i) + log("s = %s", s) + log('') + + a, b = identity((8, 9)) + log("a = %d", a) + log("b = %d", b) + log('') items = [] # type: List[Tuple[int, str]] items.append((43, 'bar'))