Reimplemented coverage.py, using only gcov and with line+branch coverage

This also adds coverage support to the new test framework, which due to
reduction in scope, no longer needs aggregation and can be much
simpler. Really all we need to do is pass --coverage to GCC, which
builds its .gcda files during testing in a multi-process-safe manner.

The addition of branch coverage leverages information that was available
in both lcov and gcov.

This was made easier with the addition of the --json-format to gcov
in GCC 9.0, however the lax backwards compatibility for gcov's
intermediary options is a bit concerning. Hopefully --json-format
sticks around for a while.
This commit is contained in:
Christopher Haster 2022-05-15 23:03:58 -05:00
parent 2b11f2b426
commit 4a7e94fb15
4 changed files with 300 additions and 225 deletions

View File

@ -31,6 +31,7 @@ OBJ := $(SRC:%.c=$(BUILDDIR)%.o)
DEP := $(SRC:%.c=$(BUILDDIR)%.d)
ASM := $(SRC:%.c=$(BUILDDIR)%.s)
CGI := $(SRC:%.c=$(BUILDDIR)%.ci)
TAGCDA := $(SRC:%.c=$(BUILDDIR)%.t.a.gcda)
TESTS ?= $(wildcard tests/*.toml)
TEST_SRC ?= $(SRC) \
@ -40,6 +41,8 @@ TEST_TSRC := $(TESTS:%.toml=$(BUILDDIR)%.t.c) $(TEST_SRC:%.c=$(BUILDDIR)%.t.c)
TEST_TASRC := $(TEST_TSRC:%.t.c=%.t.a.c)
TEST_TAOBJ := $(TEST_TASRC:%.t.a.c=%.t.a.o)
TEST_TADEP := $(TEST_TASRC:%.t.a.c=%.t.a.d)
TEST_TAGCNO := $(TEST_TASRC:%.t.a.c=%.t.a.gcno)
TEST_TAGCDA := $(TEST_TASRC:%.t.a.c=%.t.a.gcda)
ifdef DEBUG
override CFLAGS += -O0
@ -106,15 +109,17 @@ size: $(OBJ)
tags:
$(CTAGS) --totals --c-types=+p $(shell find -H -name '*.h') $(SRC)
.PHONY: test_runner
test_runner: $(BUILDDIR)runners/test_runner
.PHONY: test-runner
test-runner: override CFLAGS+=--coverage
test-runner: $(BUILDDIR)runners/test_runner
.PHONY: test
test: test_runner
test: test-runner
rm -f $(TEST_TAGCDA)
./scripts/test.py --runner=$(BUILDDIR)runners/test_runner $(TESTFLAGS)
.PHONY: test_list
test_list: test_runner
.PHONY: test-list
test-list: test-runner
./scripts/test.py --runner=$(BUILDDIR)runners/test_runner $(TESTFLAGS) -l
.PHONY: code
@ -134,8 +139,8 @@ structs: $(OBJ)
./scripts/structs.py $^ -S $(STRUCTSFLAGS)
.PHONY: coverage
coverage:
./scripts/coverage.py $(BUILDDIR)tests/*.toml.info -s $(COVERAGEFLAGS)
coverage: $(TAGCDA)
./scripts/coverage.py $^ -s $(COVERAGEFLAGS)
.PHONY: summary
summary: $(BUILDDIR)lfs.csv
@ -194,10 +199,12 @@ clean:
rm -f $(BUILDDIR)lfs.csv
rm -f $(BUILDDIR)runners/test_runner
rm -f $(OBJ)
rm -f $(CGI)
rm -f $(DEP)
rm -f $(ASM)
rm -f $(CGI)
rm -f $(TEST_TSRC)
rm -f $(TEST_TASRC)
rm -f $(TEST_TAOBJ)
rm -f $(TEST_TADEP)
rm -f $(TEST_TAGCNO)
rm -f $(TEST_TAGCDA)

View File

@ -1,16 +1,25 @@
#!/usr/bin/env python3
#
# Parse and report coverage info from .info files generated by lcov
# Script to find test coverage. Basically just a big wrapper around gcov with
# some extra conveniences for comparing builds. Heavily inspired by Linux's
# Bloat-O-Meter.
#
import os
import glob
import csv
import re
import collections as co
import bisect as b
import csv
import glob
import itertools as it
import json
import os
import re
import shlex
import subprocess as sp
# TODO use explode_asserts to avoid counting assert branches?
# TODO use dwarf=info to find functions for inline functions?
INFO_PATHS = ['tests/*.toml.info']
GCDA_PATHS = ['*.gcda']
def openio(path, mode='r'):
if path == '-':
@ -21,114 +30,214 @@ def openio(path, mode='r'):
else:
return open(path, mode)
def collect(paths, **args):
file = None
funcs = []
lines = co.defaultdict(lambda: 0)
pattern = re.compile(
'^(?P<file>SF:/?(?P<file_name>.*))$'
'|^(?P<func>FN:(?P<func_lineno>[0-9]*),(?P<func_name>.*))$'
'|^(?P<line>DA:(?P<line_lineno>[0-9]*),(?P<line_hits>[0-9]*))$')
for path in paths:
with open(path) as f:
for line in f:
m = pattern.match(line)
if m and m.group('file'):
file = m.group('file_name')
elif m and file and m.group('func'):
funcs.append((file, int(m.group('func_lineno')),
m.group('func_name')))
elif m and file and m.group('line'):
lines[(file, int(m.group('line_lineno')))] += (
int(m.group('line_hits')))
class CoverageResult(co.namedtuple('CoverageResult',
'line_hits,line_count,branch_hits,branch_count')):
__slots__ = ()
def __new__(cls, line_hits=0, line_count=0, branch_hits=0, branch_count=0):
return super().__new__(cls,
int(line_hits),
int(line_count),
int(branch_hits),
int(branch_count))
# map line numbers to functions
funcs.sort()
def func_from_lineno(file, lineno):
i = b.bisect(funcs, (file, lineno))
if i and funcs[i-1][0] == file:
return funcs[i-1][2]
def __add__(self, other):
return self.__class__(
self.line_hits + other.line_hits,
self.line_count + other.line_count,
self.branch_hits + other.branch_hits,
self.branch_count + other.branch_count)
def __sub__(self, other):
return CoverageDiff(other, self)
def key(self, **args):
line_ratio = (self.line_hits/self.line_count
if self.line_count else -1)
branch_ratio = (self.branch_hits/self.branch_count
if self.branch_count else -1)
if args.get('line_sort'):
return (-line_ratio, -branch_ratio)
elif args.get('reverse_line_sort'):
return (+line_ratio, +branch_ratio)
elif args.get('branch_sort'):
return (-branch_ratio, -line_ratio)
elif args.get('reverse_branch_sort'):
return (+branch_ratio, +line_ratio)
else:
return None
# reduce to function info
reduced_funcs = co.defaultdict(lambda: (0, 0))
for (file, line_lineno), line_hits in lines.items():
func = func_from_lineno(file, line_lineno)
if not func:
continue
hits, count = reduced_funcs[(file, func)]
reduced_funcs[(file, func)] = (hits + (line_hits > 0), count + 1)
_header = '%19s %19s' % ('hits/line', 'hits/branch')
def __str__(self):
return '%11s %7s %11s %7s' % (
'%d/%d' % (self.line_hits, self.line_count)
if self.line_count else '-',
'%.1f%%' % (100*self.line_hits/self.line_count)
if self.line_count else '-',
'%d/%d' % (self.branch_hits, self.branch_count)
if self.branch_count else '-',
'%.1f%%' % (100*self.branch_hits/self.branch_count)
if self.branch_count else '-')
results = []
for (file, func), (hits, count) in reduced_funcs.items():
# discard internal/testing functions (test_* injected with
# internal testing)
if not args.get('everything'):
if func.startswith('__') or func.startswith('test_'):
class CoverageDiff(co.namedtuple('CoverageDiff', 'old,new')):
__slots__ = ()
def line_hits_diff(self):
return self.new.line_hits - self.old.line_hits
def line_count_diff(self):
return self.new.line_count - self.old.line_count
def line_ratio(self):
return ((self.new.line_hits/self.new.line_count
if self.new.line_count else 1.0)
- (self.old.line_hits / self.old.line_count
if self.old.line_count else 1.0))
def branch_hits_diff(self):
return self.new.branch_hits - self.old.branch_hits
def branch_count_diff(self):
return self.new.branch_count - self.old.branch_count
def branch_ratio(self):
return ((self.new.branch_hits/self.new.branch_count
if self.new.branch_count else 1.0)
- (self.old.branch_hits / self.old.branch_count
if self.old.branch_count else 1.0))
def key(self, **args):
new_key = self.new.key(**args)
line_ratio = self.line_ratio()
branch_ratio = self.branch_ratio()
if new_key is not None:
return new_key
else:
return (-line_ratio, -branch_ratio)
def __bool__(self):
return bool(self.line_ratio() or self.branch_ratio())
_header = '%23s %23s %23s' % ('old', 'new', 'diff')
def __str__(self):
line_ratio = self.line_ratio()
branch_ratio = self.branch_ratio()
return '%11s %11s %11s %11s %11s %11s%-10s%s' % (
'%d/%d' % (self.old.line_hits, self.old.line_count)
if self.old.line_count else '-',
'%d/%d' % (self.old.branch_hits, self.old.branch_count)
if self.old.branch_count else '-',
'%d/%d' % (self.new.line_hits, self.new.line_count)
if self.new.line_count else '-',
'%d/%d' % (self.new.branch_hits, self.new.branch_count)
if self.new.branch_count else '-',
'%+d/%+d' % (self.line_hits_diff(), self.line_count_diff()),
'%+d/%+d' % (self.branch_hits_diff(), self.branch_count_diff()),
' (%+.1f%%)' % (100*line_ratio) if line_ratio else '',
' (%+.1f%%)' % (100*branch_ratio) if branch_ratio else '')
def collect(paths, **args):
results = {}
for path in paths:
# map to source file
src_path = re.sub('\.t\.a\.gcda$', '.c', path)
# TODO test this
if args.get('build_dir'):
src_path = re.sub('%s/*' % re.escape(args['build_dir']), '',
src_path)
# get coverage info through gcov's json output
# note, gcov-tool may contain extra args
cmd = args['gcov_tool'] + ['-b', '-t', '--json-format', path]
if args.get('verbose'):
print(' '.join(shlex.quote(c) for c in cmd))
proc = sp.Popen(cmd,
stdout=sp.PIPE,
stderr=sp.PIPE if not args.get('verbose') else None,
universal_newlines=True,
errors='replace')
data = json.load(proc.stdout)
proc.wait()
if proc.returncode != 0:
if not args.get('verbose'):
for line in proc.stderr:
sys.stdout.write(line)
sys.exit(-1)
# collect line/branch coverage
for file in data['files']:
if file['file'] != src_path:
continue
# discard .8449 suffixes created by optimizer
func = re.sub('\.[0-9]+', '', func)
results.append((file, func, hits, count))
return results
for line in file['lines']:
func = line.get('function_name', '(inlined)')
# discard internal function (this includes injected test cases)
if not args.get('everything'):
if func.startswith('__'):
continue
results[(src_path, func, line['line_number'])] = (
line['count'],
CoverageResult(
line_hits=1 if line['count'] > 0 else 0,
line_count=1,
branch_hits=sum(
1 if branch['count'] > 0 else 0
for branch in line['branches']),
branch_count=len(line['branches'])))
# merge into functions, since this is what other scripts use
func_results = co.defaultdict(lambda: CoverageResult())
for (file, func, _), (_, result) in results.items():
func_results[(file, func)] += result
return func_results, results
def main(**args):
# find coverage
if not args.get('use'):
# find *.info files
# find sizes
if not args.get('use', None):
# find .gcda files
paths = []
for path in args['info_paths']:
for path in args['gcda_paths']:
if os.path.isdir(path):
path = path + '/*.gcov'
path = path + '/*.gcda'
for path in glob.glob(path):
paths.append(path)
if not paths:
print('no .info files found in %r?' % args['info_paths'])
print('no .gcda files found in %r?' % args['gcda_paths'])
sys.exit(-1)
results = collect(paths, **args)
# TODO consistent behavior between this and stack.py for deps?
results, line_results = collect(paths, **args)
else:
with openio(args['use']) as f:
r = csv.DictReader(f)
results = [
( result['file'],
result['name'],
int(result['coverage_hits']),
int(result['coverage_count']))
results = {
(result['file'], result['name']): CoverageResult(**{
k: v for k, v in result.items()
if k in CoverageResult._fields})
for result in r
if result.get('coverage_hits') not in {None, ''}
if result.get('coverage_count') not in {None, ''}]
total_hits, total_count = 0, 0
for _, _, hits, count in results:
total_hits += hits
total_count += count
if all(result.get(f) not in {None, ''}
for f in CoverageResult._fields)}
# find previous results?
if args.get('diff'):
try:
with openio(args['diff']) as f:
r = csv.DictReader(f)
prev_results = [
( result['file'],
result['name'],
int(result['coverage_hits']),
int(result['coverage_count']))
prev_results = {
(result['file'], result['name']): CoverageResult(**{
k: v for k, v in result.items()
if k in CoverageResult._fields})
for result in r
if result.get('coverage_hits') not in {None, ''}
if result.get('coverage_count') not in {None, ''}]
if all(result.get(f) not in {None, ''}
for f in CoverageResult._fields)}
except FileNotFoundError:
prev_results = []
prev_total_hits, prev_total_count = 0, 0
for _, _, hits, count in prev_results:
prev_total_hits += hits
prev_total_count += count
# write results to CSV
if args.get('output'):
merged_results = co.defaultdict(lambda: {})
@ -142,163 +251,113 @@ def main(**args):
for result in r:
file = result.pop('file', '')
func = result.pop('name', '')
result.pop('coverage_hits', None)
result.pop('coverage_count', None)
for f in CoverageResult._fields:
result.pop(f, None)
merged_results[(file, func)] = result
other_fields = result.keys()
except FileNotFoundError:
pass
for file, func, hits, count in results:
merged_results[(file, func)]['coverage_hits'] = hits
merged_results[(file, func)]['coverage_count'] = count
for (file, func), result in results.items():
for f in CoverageResult._fields:
merged_results[(file, func)][f] = getattr(result, f)
with openio(args['output'], 'w') as f:
w = csv.DictWriter(f, ['file', 'name', *other_fields, 'coverage_hits', 'coverage_count'])
w = csv.DictWriter(f, ['file', 'name',
*other_fields, *CoverageResult._fields])
w.writeheader()
for (file, func), result in sorted(merged_results.items()):
w.writerow({'file': file, 'name': func, **result})
# print results
def dedup_entries(results, by='name'):
entries = co.defaultdict(lambda: (0, 0))
for file, func, hits, count in results:
entry = (file if by == 'file' else func)
entry_hits, entry_count = entries[entry]
entries[entry] = (entry_hits + hits, entry_count + count)
return entries
def diff_entries(olds, news):
diff = co.defaultdict(lambda: (0, 0, 0, 0, 0, 0, 0))
for name, (new_hits, new_count) in news.items():
diff[name] = (
0, 0,
new_hits, new_count,
new_hits, new_count,
(new_hits/new_count if new_count else 1.0) - 1.0)
for name, (old_hits, old_count) in olds.items():
_, _, new_hits, new_count, _, _, _ = diff[name]
diff[name] = (
old_hits, old_count,
new_hits, new_count,
new_hits-old_hits, new_count-old_count,
((new_hits/new_count if new_count else 1.0)
- (old_hits/old_count if old_count else 1.0)))
return diff
def sorted_entries(entries):
if args.get('coverage_sort'):
return sorted(entries, key=lambda x: (-(x[1][0]/x[1][1] if x[1][1] else -1), x))
elif args.get('reverse_coverage_sort'):
return sorted(entries, key=lambda x: (+(x[1][0]/x[1][1] if x[1][1] else -1), x))
def print_header(by):
if by == 'total':
entry = lambda k: 'TOTAL'
elif by == 'file':
entry = lambda k: k[0]
else:
return sorted(entries)
def sorted_diff_entries(entries):
if args.get('coverage_sort'):
return sorted(entries, key=lambda x: (-(x[1][2]/x[1][3] if x[1][3] else -1), x))
elif args.get('reverse_coverage_sort'):
return sorted(entries, key=lambda x: (+(x[1][2]/x[1][3] if x[1][3] else -1), x))
else:
return sorted(entries, key=lambda x: (-x[1][6], x))
def print_header(by=''):
if not args.get('diff'):
print('%-36s %19s' % (by, 'hits/line'))
else:
print('%-36s %19s %19s %11s' % (by, 'old', 'new', 'diff'))
def print_entry(name, hits, count):
print("%-36s %11s %7s" % (name,
'%d/%d' % (hits, count)
if count else '-',
'%.1f%%' % (100*hits/count)
if count else '-'))
def print_diff_entry(name,
old_hits, old_count,
new_hits, new_count,
diff_hits, diff_count,
ratio):
print("%-36s %11s %7s %11s %7s %11s%s" % (name,
'%d/%d' % (old_hits, old_count)
if old_count else '-',
'%.1f%%' % (100*old_hits/old_count)
if old_count else '-',
'%d/%d' % (new_hits, new_count)
if new_count else '-',
'%.1f%%' % (100*new_hits/new_count)
if new_count else '-',
'%+d/%+d' % (diff_hits, diff_count),
' (%+.1f%%)' % (100*ratio) if ratio else ''))
def print_entries(by='name'):
entries = dedup_entries(results, by=by)
entry = lambda k: k[1]
if not args.get('diff'):
print_header(by=by)
for name, (hits, count) in sorted_entries(entries.items()):
print_entry(name, hits, count)
print('%-36s %s' % (by, CoverageResult._header))
else:
prev_entries = dedup_entries(prev_results, by=by)
diff = diff_entries(prev_entries, entries)
print_header(by='%s (%d added, %d removed)' % (by,
sum(1 for _, old, _, _, _, _, _ in diff.values() if not old),
sum(1 for _, _, _, new, _, _, _ in diff.values() if not new)))
for name, (
old_hits, old_count,
new_hits, new_count,
diff_hits, diff_count, ratio) in sorted_diff_entries(
diff.items()):
if ratio or args.get('all'):
print_diff_entry(name,
old_hits, old_count,
new_hits, new_count,
diff_hits, diff_count,
ratio)
old = {entry(k) for k in results.keys()}
new = {entry(k) for k in prev_results.keys()}
print('%-36s %s' % (
'%s (%d added, %d removed)' % (by,
sum(1 for k in new if k not in old),
sum(1 for k in old if k not in new))
if by else '',
CoverageDiff._header))
def print_totals():
if not args.get('diff'):
print_entry('TOTAL', total_hits, total_count)
def print_entries(by):
if by == 'total':
entry = lambda k: 'TOTAL'
elif by == 'file':
entry = lambda k: k[0]
else:
ratio = ((total_hits/total_count
if total_count else 1.0)
- (prev_total_hits/prev_total_count
if prev_total_count else 1.0))
print_diff_entry('TOTAL',
prev_total_hits, prev_total_count,
total_hits, total_count,
total_hits-prev_total_hits, total_count-prev_total_count,
ratio)
entry = lambda k: k[1]
entries = co.defaultdict(lambda: CoverageResult())
for k, result in results.items():
entries[entry(k)] += result
if not args.get('diff'):
for name, result in sorted(entries.items(),
key=lambda p: (p[1].key(**args), p)):
print('%-36s %s' % (name, result))
else:
prev_entries = co.defaultdict(lambda: CoverageResult())
for k, result in prev_results.items():
prev_entries[entry(k)] += result
diff_entries = {name: entries[name] - prev_entries[name]
for name in (entries.keys() | prev_entries.keys())}
for name, diff in sorted(diff_entries.items(),
key=lambda p: (p[1].key(**args), p)):
if diff or args.get('all'):
print('%-36s %s' % (name, diff))
if args.get('quiet'):
pass
elif args.get('summary'):
print_header()
print_totals()
print_header('')
print_entries('total')
elif args.get('files'):
print_entries(by='file')
print_totals()
print_header('file')
print_entries('file')
print_entries('total')
else:
print_entries(by='name')
print_totals()
print_header('function')
print_entries('function')
print_entries('total')
# catch lack of coverage
if args.get('error_on_lines') and any(
r.line_hits < r.line_count for r in results.values()):
sys.exit(2)
elif args.get('error_on_branches') and any(
r.branch_hits < r.branch_count for r in results.values()):
sys.exit(3)
if __name__ == "__main__":
import argparse
import sys
parser = argparse.ArgumentParser(
description="Parse and report coverage info from .info files \
generated by lcov")
parser.add_argument('info_paths', nargs='*', default=INFO_PATHS,
help="Description of where to find *.info files. May be a directory \
or list of paths. *.info files will be merged to show the total \
coverage. Defaults to %r." % INFO_PATHS)
description="Find coverage info after running tests.")
parser.add_argument('gcda_paths', nargs='*', default=GCDA_PATHS,
help="Description of where to find *.gcda files. May be a directory \
or a list of paths. Defaults to %r." % GCDA_PATHS)
parser.add_argument('-v', '--verbose', action='store_true',
help="Output commands that run behind the scenes.")
parser.add_argument('-q', '--quiet', action='store_true',
help="Don't show anything, useful with -o.")
parser.add_argument('-o', '--output',
help="Specify CSV file to store results.")
parser.add_argument('-u', '--use',
help="Don't do any work, instead use this CSV file.")
help="Don't compile and find code sizes, instead use this CSV file.")
parser.add_argument('-d', '--diff',
help="Specify CSV file to diff code size against.")
parser.add_argument('-m', '--merge',
@ -307,16 +366,25 @@ if __name__ == "__main__":
help="Show all functions, not just the ones that changed.")
parser.add_argument('-A', '--everything', action='store_true',
help="Include builtin and libc specific symbols.")
parser.add_argument('-s', '--coverage-sort', action='store_true',
help="Sort by coverage.")
parser.add_argument('-S', '--reverse-coverage-sort', action='store_true',
help="Sort by coverage, but backwards.")
parser.add_argument('-s', '--line-sort', action='store_true',
help="Sort by line coverage.")
parser.add_argument('-S', '--reverse-line-sort', action='store_true',
help="Sort by line coverage, but backwards.")
parser.add_argument('--branch-sort', action='store_true',
help="Sort by branch coverage.")
parser.add_argument('--reverse-branch-sort', action='store_true',
help="Sort by branch coverage, but backwards.")
parser.add_argument('-F', '--files', action='store_true',
help="Show file-level coverage.")
parser.add_argument('-Y', '--summary', action='store_true',
help="Only show the total coverage.")
parser.add_argument('-q', '--quiet', action='store_true',
help="Don't show anything, useful with -o.")
parser.add_argument('-e', '--error-on-lines', action='store_true',
help="Error if any lines are not covered.")
parser.add_argument('-E', '--error-on-branches', action='store_true',
help="Error if any branches are not covered.")
parser.add_argument('--gcov-tool', default=['gcov'],
type=lambda x: x.split(),
help="Path to the gcov tool to use.")
parser.add_argument('--build-dir',
help="Specify the relative build directory. Used to map object files \
to the correct source files.")

View File

@ -414,14 +414,14 @@ if __name__ == "__main__":
help="Show all functions, not just the ones that changed.")
parser.add_argument('-A', '--everything', action='store_true',
help="Include builtin and libc specific symbols.")
parser.add_argument('-s', '--limit-sort', action='store_true',
help="Sort by stack limit.")
parser.add_argument('-S', '--reverse-limit-sort', action='store_true',
help="Sort by stack limit, but backwards.")
parser.add_argument('--frame-sort', action='store_true',
help="Sort by stack frame size.")
parser.add_argument('--reverse-frame-sort', action='store_true',
help="Sort by stack frame size, but backwards.")
parser.add_argument('-s', '--limit-sort', action='store_true',
help="Sort by stack limit.")
parser.add_argument('-S', '--reverse-limit-sort', action='store_true',
help="Sort by stack limit, but backwards.")
parser.add_argument('-L', '--depth', default=0, type=lambda x: int(x, 0),
nargs='?', const=float('inf'),
help="Depth of dependencies to show.")

View File

@ -985,11 +985,11 @@ if __name__ == "__main__":
test_parser.add_argument('--gdb-main', action='store_true',
help="Drop into gdb on test failure but stop at the beginning \
of main.")
test_parser.add_argument('--exec', default=[], type=lambda e: e.split(),
help="Run under another executable.")
test_parser.add_argument('--valgrind', action='store_true',
help="Run under Valgrind to find memory errors. Implicitly sets \
--isolate.")
test_parser.add_argument('--exec', default=[], type=lambda e: e.split(),
help="Run under another executable.")
# compilation flags
comp_parser = parser.add_argument_group('compilation options')
comp_parser.add_argument('-c', '--compile', action='store_true',