Skip to content

Commit

Permalink
Added scripts/code_size.py, for more in-depth code-size reporting
Browse files Browse the repository at this point in the history
Inspired by Linux's Bloat-O-Meter, code_size.py wraps nm to provide
function-level code size, and supports detailed comparison between
different builds.

One difference is that code_size.py invokes littlefs's build system
similarly to test.py, creating a duplicate build in the "sizes"
directory. This makes it easy to monitor a cross-compiled build size
while simultaneously testing on the host machine.
  • Loading branch information
geky committed Dec 20, 2020
1 parent 1a59954 commit d804c2d
Show file tree
Hide file tree
Showing 2 changed files with 334 additions and 0 deletions.
6 changes: 6 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ endif
CC ?= gcc
AR ?= ar
SIZE ?= size
NM ?= nm

SRC += $(wildcard *.c bd/*.c)
OBJ := $(SRC:.c=.o)
Expand All @@ -29,6 +30,7 @@ override CFLAGS += -Wextra -Wshadow -Wjump-misses-init -Wundef

ifdef VERBOSE
override TFLAGS += -v
override SFLAGS += -v
endif


Expand All @@ -39,6 +41,9 @@ asm: $(ASM)
size: $(OBJ)
$(SIZE) -t $^

code_size:
./scripts/code_size.py $(SFLAGS)

test:
./scripts/test.py $(TFLAGS)
.SECONDEXPANSION:
Expand All @@ -65,3 +70,4 @@ clean:
rm -f $(DEP)
rm -f $(ASM)
rm -f tests/*.toml.*
rm -f sizes/*
328 changes: 328 additions & 0 deletions scripts/code_size.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,328 @@
#!/usr/bin/env python3
#
# This script finds the code size at the function level, with/without
# static functions, and has some conveniences for comparing different
# versions. It's basically one big wrapper around nm, and may or may
# not have been written out of jealousy of Linux's Bloat-O-Meter.
#
# Here's a useful bash script to use while developing:
# ./scripts/code_size.py -qo old.csv
# while true ; do ./code_scripts/size.py -d old.csv ; inotifywait -rqe modify * ; done
#
# Or even better, to automatically update results on commit:
# ./scripts/code_size.py -qo commit.csv
# while true ; do ./scripts/code_size.py -d commit.csv -o current.csv ; git diff --exit-code --quiet && cp current.csv commit.csv ; inotifywait -rqe modify * ; done
#
# Or my personal favorite:
# ./scripts/code_size.py -qo master.csv && cp master.csv commit.csv
# while true ; do ( ./scripts/code_size.py -i commit.csv -d master.csv -s ; ./scripts/code_size.py -i current.csv -d master.csv -s ; ./scripts/code_size.py -d master.csv -o current.csv -s ) | awk 'BEGIN {printf "%-16s %7s %7s %7s\n","","old","new","diff"} (NR==2 && $1="commit") || (NR==4 && $1="prev") || (NR==6 && $1="current") {printf "%-16s %7s %7s %7s %s\n",$1,$2,$3,$5,$6}' ; git diff --exit-code --quiet && cp current.csv commit.csv ; inotifywait -rqe modify * ; done
#

import os
import itertools as it
import subprocess as sp
import shlex
import re
import csv
import collections as co

SIZEDIR = 'sizes'
RULES = """
define FLATTEN
%(sizedir)s/%(build)s.$(subst /,.,$(target)): $(target)
( echo "#line 1 \\"$$<\\"" ; %(cat)s $$< ) > $$@
%(sizedir)s/%(build)s.$(subst /,.,$(target:.c=.size)): \\
%(sizedir)s/%(build)s.$(subst /,.,$(target:.c=.o))
$(NM) --size-sort $$^ | sed 's/^/$(subst /,\\/,$(target:.c=.o)):/' > $$@
endef
$(foreach target,$(SRC),$(eval $(FLATTEN)))
-include %(sizedir)s/*.d
.SECONDARY:
%%.size: $(foreach t,$(subst /,.,$(SRC:.c=.size)),%%.$t)
cat $^ > $@
"""
CATS = {
'code': 'cat',
'code_inlined': 'sed \'s/^static\( inline\)\?//\'',
}

def build(**args):
# mkdir -p sizedir
os.makedirs(args['sizedir'], exist_ok=True)

if args.get('inlined', False):
builds = ['code', 'code_inlined']
else:
builds = ['code']

# write makefiles for the different types of builds
makefiles = []
targets = []
for build in builds:
path = args['sizedir'] + '/' + build
with open(path + '.mk', 'w') as mk:
mk.write(RULES.replace(4*' ', '\t') % dict(
sizedir=args['sizedir'],
build=build,
cat=CATS[build]))
mk.write('\n')

# pass on defines
for d in args['D']:
mk.write('%s: override CFLAGS += -D%s\n' % (
path+'.size', d))

makefiles.append(path + '.mk')
targets.append(path + '.size')

# build in parallel
cmd = (['make', '-f', 'Makefile'] +
list(it.chain.from_iterable(['-f', m] for m in makefiles)) +
[target for target in targets])
if args.get('verbose', False):
print(' '.join(shlex.quote(c) for c in cmd))
proc = sp.Popen(cmd,
stdout=sp.DEVNULL if not args.get('verbose', False) else None)
proc.wait()
if proc.returncode != 0:
sys.exit(-1)

# find results
build_results = co.defaultdict(lambda: 0)
# notes
# - filters type
# - discards internal/debug functions (leading __)
pattern = re.compile(
'^(?P<file>[^:]+)' +
':(?P<size>[0-9a-fA-F]+)' +
' (?P<type>[%s])' % re.escape(args['type']) +
' (?!__)(?P<name>.+?)$')
for build in builds:
path = args['sizedir'] + '/' + build
with open(path + '.size') as size:
for line in size:
match = pattern.match(line)
if match:
file = match.group('file')
# discard .8449 suffixes created by optimizer
name = re.sub('\.[0-9]+', '', match.group('name'))
size = int(match.group('size'), 16)
build_results[(build, file, name)] += size

results = []
for (build, file, name), size in build_results.items():
if build == 'code':
results.append((file, name, size, False))
elif (build == 'code_inlined' and
('inlined', file, name) not in results):
results.append((file, name, size, True))

return results

def main(**args):
# find results
if not args.get('input', None):
results = build(**args)
else:
with open(args['input']) as f:
r = csv.DictReader(f)
results = [
( result['file'],
result['name'],
int(result['size']),
bool(int(result.get('inlined', 0))))
for result in r
if (not bool(int(result.get('inlined', 0))) or
args.get('inlined', False))]

total = 0
for _, _, size, inlined in results:
if not inlined:
total += size

# find previous results?
if args.get('diff', None):
with open(args['diff']) as f:
r = csv.DictReader(f)
prev_results = [
( result['file'],
result['name'],
int(result['size']),
bool(int(result.get('inlined', 0))))
for result in r
if (not bool(int(result.get('inlined', 0))) or
args.get('inlined', False))]

prev_total = 0
for _, _, size, inlined in prev_results:
if not inlined:
prev_total += size

# write results to CSV
if args.get('output', None):
results.sort(key=lambda x: (-x[2], x))
with open(args['output'], 'w') as f:
w = csv.writer(f)
if args.get('inlined', False):
w.writerow(['file', 'name', 'size', 'inlined'])
for file, name, size, inlined in results:
w.writerow((file, name, size, int(inlined)))
else:
w.writerow(['file', 'name', 'size'])
for file, name, size, inlined in results:
w.writerow((file, name, size))

# print results
def dedup_functions(results):
functions = co.defaultdict(lambda: (0, True))
for _, name, size, inlined in results:
if not inlined:
functions[name] = (functions[name][0] + size, False)
for _, name, size, inlined in results:
if inlined and functions[name][1]:
functions[name] = (functions[name][0] + size, True)
return functions

def dedup_files(results):
files = co.defaultdict(lambda: 0)
for file, _, size, inlined in results:
if not inlined:
files[file] += size
return files

def diff_sizes(olds, news):
diff = co.defaultdict(lambda: (None, None, None))
for name, new in news.items():
diff[name] = (None, new, new)
for name, old in olds.items():
new = diff[name][1] or 0
diff[name] = (old, new, new-old)
return diff

def print_header(name=''):
if not args.get('diff', False):
print('%-40s %7s' % (name, 'size'))
else:
print('%-40s %7s %7s %7s' % (name, 'old', 'new', 'diff'))

def print_functions():
functions = dedup_functions(results)
functions = {
name+' (inlined)' if inlined else name: size
for name, (size, inlined) in functions.items()}

if not args.get('diff', None):
print_header('function')
for name, size in sorted(functions.items(),
key=lambda x: (-x[1], x)):
print("%-40s %7d" % (name, size))
else:
prev_functions = dedup_functions(prev_results)
prev_functions = {
name+' (inlined)' if inlined else name: size
for name, (size, inlined) in prev_functions.items()}
diff = diff_sizes(functions, prev_functions)
print_header('function (%d added, %d removed)' % (
sum(1 for old, _, _ in diff.values() if not old),
sum(1 for _, new, _ in diff.values() if not new)))
for name, (old, new, diff) in sorted(diff.items(),
key=lambda x: (-(x[1][2] or 0), x)):
if diff or args.get('all', False):
print("%-40s %7s %7s %+7d%s" % (
name, old or "-", new or "-", diff,
' (%+.2f%%)' % (100*((new-old)/old))
if old and new else
''))

def print_files():
files = dedup_files(results)

if not args.get('diff', None):
print_header('file')
for file, size in sorted(files.items(),
key=lambda x: (-x[1], x)):
print("%-40s %7d" % (file, size))
else:
prev_files = dedup_files(prev_results)
diff = diff_sizes(files, prev_files)
print_header('file (%d added, %d removed)' % (
sum(1 for old, _, _ in diff.values() if not old),
sum(1 for _, new, _ in diff.values() if not new)))
for name, (old, new, diff) in sorted(diff.items(),
key=lambda x: (-(x[1][2] or 0), x)):
if diff or args.get('all', False):
print("%-40s %7s %7s %+7d%s" % (
name, old or "-", new or "-", diff,
' (%+.2f%%)' % (100*((new-old)/old))
if old and new else
''))

def print_totals():
if not args.get('diff', None):
print("%-40s %7d" % ('TOTALS', total))
else:
print("%-40s %7s %7s %+7d%s" % (
'TOTALS', prev_total, total, total-prev_total,
' (%+.2f%%)' % (100*((total-prev_total)/total))
if prev_total and total else
''))

def print_status():
if not args.get('diff', None):
print(total)
else:
print("%d (%+.2f%%)" % (total, 100*((total-prev_total)/total)))

if args.get('quiet', False):
pass
elif args.get('status', False):
print_status()
elif args.get('summary', False):
print_header()
print_totals()
elif args.get('files', False):
print_files()
print_totals()
else:
print_functions()
print_totals()

if __name__ == "__main__":
import argparse
import sys
parser = argparse.ArgumentParser(
description="Find code size at the function level.")
parser.add_argument('sizedir', nargs='?', default=SIZEDIR,
help="Directory to store intermediary results. Defaults "
"to \"%s\"." % SIZEDIR)
parser.add_argument('-D', action='append', default=[],
help="Specify compile-time define.")
parser.add_argument('-v', '--verbose', action='store_true',
help="Output commands that run behind the scenes.")
parser.add_argument('-i', '--input',
help="Don't compile and find code sizes, instead use this CSV file.")
parser.add_argument('-o', '--output',
help="Specify CSV file to store results.")
parser.add_argument('-d', '--diff',
help="Specify CSV file to diff code size against.")
parser.add_argument('-a', '--all', action='store_true',
help="Show all functions, not just the ones that changed.")
parser.add_argument('--inlined', action='store_true',
help="Run a second compilation to find the sizes of functions normally "
"removed by optimizations. These will be shown as \"*.inlined\" "
"functions, and will not be included in the total.")
parser.add_argument('--files', action='store_true',
help="Show file-level code sizes. Note this does not include padding! "
"So sizes may differ from other tools.")
parser.add_argument('-s', '--summary', action='store_true',
help="Only show the total code size.")
parser.add_argument('-S', '--status', action='store_true',
help="Show minimum info useful for a single-line status.")
parser.add_argument('-q', '--quiet', action='store_true',
help="Don't show anything, useful with -o.")
parser.add_argument('--type', default='tTrRdDbB',
help="Type of symbols to report, this uses the same single-character "
"type-names emitted by nm. Defaults to %(default)r.")
sys.exit(main(**vars(parser.parse_args())))

0 comments on commit d804c2d

Please sign in to comment.