Files
littlefs/scripts/stack.py
Christopher Haster 56d888933f scripts: Reworked stack.py to use dwarf, dropped -fcallgraph-info=su
There were a lot of small challenges (see previous commits), but this
commit reworks stack.py to rely only on dwarf-info and symbols to build
stack + callgraph info.

Not only does this remove an annoying dependency on a GCC-specific flag,
but it also should give us more correct stack measurements by only
penalizing calls for the stack usage at the call site. This should
better account for things like shrinkwrapping, which make the
-fcallgraph-info=su results look worse than they actually are.

To make this work required jumping through a couple hoops:

1. Map symbols -> dwarf entries by address (DW_AT_low_pc).

   We use symbols here to make sure function names line up with other
   scripts.

   Note that there can be multiple dwarf entries with the same name due
   to optimization passes. Apparently the optimized name is not included
   because that would be too useful.

2. Find each functions' frame info.

   This is stored in the .debug_frames section (objdump --dwarf=frames),
   and requires _yet another state machine_ to parse, but gives us the
   stack frame info for each function at the instruction level, so
   that's nice.

3. Find call sites (DW_TAG_call_site).

   The hierchical nesting of DW_TAG_lexical_blocks gets a bit annoying
   here, but ultimately we can find all DW_TAG_call_sites by looking at
   the DW_TAG_subprogram's children tags.

4. Map call sites to frame info.

   This gets funky.

   Finding the target function is simple enough, DW_AT_call_origin
   contains its dwarf offset (but why is this the _origin_?). But we
   don't actually know what address the call originated from.

   Fortunately we do know the return address, DW_AT_call_return_pc?

   The instruction before DW_AT_call_return_pc should be the call
   instruction. Subtracting 1 will awkwardly put us in the middle of the
   instruction, but it should at least map to the correct stack frame?
   And without ISA-specific info it's the best we can do.

It's messy, but this should be all the info we need.

---

To build confidence in the new script, I included the --no-shrinkwrap
flag, which reverts to penalizing each call site for the function's
worst-case stack frame. This makes it easy to compare against the
-fcallgraph-info=su approach:

  with -fcallgraph-info=su:          2624
  with --dwarf=info --no-shrinkwrap: 2624

I was hoping that accounting for shrinkwrap-like optimizations would
reveal a lower stack cost, but for better or worse it seems that
worst-case stack usage is unchanged:

  with --dwarf=info --no-shrinkwrap: 2624
  with --dwarf=info:                 2624

Still, it's good to know that our stack measurement is correct.
2024-12-16 18:01:46 -06:00

1549 lines
51 KiB
Python
Executable File

#!/usr/bin/env python3
#
# Script to find stack usage at the function level. Will detect recursion and
# report as infinite stack usage.
#
# Example:
# ./scripts/stack.py lfs.ci lfs_util.ci -Slimit
#
# Copyright (c) 2022, The littlefs authors.
# SPDX-License-Identifier: BSD-3-Clause
#
# prevent local imports
__import__('sys').path.pop(0)
import collections as co
import csv
import itertools as it
import functools as ft
import math as mt
import os
import re
import subprocess as sp
OBJDUMP_PATH = ['objdump']
# integer fields
class RInt(co.namedtuple('RInt', 'x')):
__slots__ = ()
def __new__(cls, x=0):
if isinstance(x, RInt):
return x
if isinstance(x, str):
try:
x = int(x, 0)
except ValueError:
# also accept +-∞ and +-inf
if re.match('^\s*\+?\s*(?:∞|inf)\s*$', x):
x = mt.inf
elif re.match('^\s*-\s*(?:∞|inf)\s*$', x):
x = -mt.inf
else:
raise
if not (isinstance(x, int) or mt.isinf(x)):
x = int(x)
return super().__new__(cls, x)
def __repr__(self):
return '%s(%r)' % (self.__class__.__name__, self.x)
def __str__(self):
if self.x == mt.inf:
return ''
elif self.x == -mt.inf:
return '-∞'
else:
return str(self.x)
def __bool__(self):
return bool(self.x)
def __int__(self):
assert not mt.isinf(self.x)
return self.x
def __float__(self):
return float(self.x)
none = '%7s' % '-'
def table(self):
return '%7s' % (self,)
def diff(self, other):
new = self.x if self else 0
old = other.x if other else 0
diff = new - old
if diff == +mt.inf:
return '%7s' % '+∞'
elif diff == -mt.inf:
return '%7s' % '-∞'
else:
return '%+7d' % diff
def ratio(self, other):
new = self.x if self else 0
old = other.x if other else 0
if mt.isinf(new) and mt.isinf(old):
return 0.0
elif mt.isinf(new):
return +mt.inf
elif mt.isinf(old):
return -mt.inf
elif not old and not new:
return 0.0
elif not old:
return +mt.inf
else:
return (new-old) / old
def __pos__(self):
return self.__class__(+self.x)
def __neg__(self):
return self.__class__(-self.x)
def __abs__(self):
return self.__class__(abs(self.x))
def __add__(self, other):
return self.__class__(self.x + other.x)
def __sub__(self, other):
return self.__class__(self.x - other.x)
def __mul__(self, other):
return self.__class__(self.x * other.x)
def __truediv__(self, other):
if not other:
if self >= self.__class__(0):
return self.__class__(+mt.inf)
else:
return self.__class__(-mt.inf)
return self.__class__(self.x // other.x)
def __mod__(self, other):
return self.__class__(self.x % other.x)
# size results
class StackResult(co.namedtuple('StackResult', [
'file', 'function',
'frame', 'limit',
'children', 'notes'])):
_by = ['file', 'function']
_fields = ['frame', 'limit']
_sort = ['limit', 'frame']
_types = {'frame': RInt, 'limit': RInt}
_children = 'children'
_notes = 'notes'
__slots__ = ()
def __new__(cls, file='', function='', frame=0, limit=0,
children=None, notes=None):
return super().__new__(cls, file, function,
RInt(frame), RInt(limit),
children if children is not None else [],
notes if notes is not None else [])
def __add__(self, other):
return StackResult(self.file, self.function,
self.frame + other.frame,
max(self.limit, other.limit),
self.children + other.children,
self.notes + other.notes)
def openio(path, mode='r', buffering=-1):
# allow '-' for stdin/stdout
if path == '-':
if 'r' in mode:
return os.fdopen(os.dup(sys.stdin.fileno()), mode, buffering)
else:
return os.fdopen(os.dup(sys.stdout.fileno()), mode, buffering)
else:
return open(path, mode, buffering)
class Sym(co.namedtuple('Sym', [
'name', 'global_', 'section', 'addr', 'size'])):
__slots__ = ()
def __new__(cls, name, global_, section, addr, size):
return super().__new__(cls, name, global_, section, addr, size)
def __repr__(self):
return '%s(%r, %r, %r, 0x%x, 0x%x)' % (
self.__class__.__name__,
self.name,
self.global_,
self.section,
self.addr,
self.size)
class SymInfo:
def __init__(self, syms):
self.syms = syms
def get(self, k, d=None):
# allow lookup by both symbol and address
if isinstance(k, str):
# organize by symbol, note multiple symbols can share a name
if not hasattr(self, '_by_sym'):
by_sym = {}
for sym in self.syms:
if sym.name not in by_sym:
by_sym[sym.name] = []
if sym not in by_sym[sym.name]:
by_sym[sym.name].append(sym)
self._by_sym = by_sym
return self._by_sym.get(k, d)
else:
import bisect
# organize by address
if not hasattr(self, '_by_addr'):
# sort and keep largest/first when duplicates
syms = self.syms.copy()
syms.sort(key=lambda x: (x.addr, -x.size))
by_addr = []
for sym in syms:
if (len(by_addr) == 0
or by_addr[-1].addr != sym.addr):
by_addr.append(sym)
self._by_addr = by_addr
# find sym by range
i = bisect.bisect(self._by_addr, k,
key=lambda x: x.addr)
# check that we're actually in this sym's size
if i > 0 and k < self._by_addr[i-1].addr+self._by_addr[i-1].size:
return self._by_addr[i-1]
else:
return d
def __getitem__(self, k):
v = self.get(k)
if v is None:
raise KeyError(k)
return v
def __contains__(self, k):
return self.get(k) is not None
def __len__(self):
return len(self.syms)
def __iter__(self):
return iter(self.syms)
def globals(self):
return SymInfo([sym for sym in self.syms
if sym.global_])
def section(self, section):
return SymInfo([sym for sym in self.syms
# note we accept prefixes
if s.startswith(section)])
def collect_syms(obj_path, global_=False, sections=None, *,
objdump_path=OBJDUMP_PATH,
**args):
symbol_pattern = re.compile(
'^(?P<addr>[0-9a-fA-F]+)'
' (?P<scope>.).*'
'\s+(?P<section>[^\s]+)'
'\s+(?P<size>[0-9a-fA-F]+)'
'\s+(?P<name>[^\s]+)\s*$')
# find symbol addresses and sizes
syms = []
cmd = objdump_path + ['--syms', obj_path]
if args.get('verbose'):
print(' '.join(shlex.quote(c) for c in cmd))
proc = sp.Popen(cmd,
stdout=sp.PIPE,
universal_newlines=True,
errors='replace',
close_fds=False)
for line in proc.stdout:
m = symbol_pattern.match(line)
if m:
name = m.group('name')
scope = m.group('scope')
section = m.group('section')
addr = int(m.group('addr'), 16)
size = int(m.group('size'), 16)
# skip non-globals?
# l => local
# g => global
# u => unique global
# => neither
# ! => local + global
global__ = scope not in 'l '
if global_ and not global__:
continue
# filter by section? note we accept prefixes
if (sections is not None
and not any(section.startswith(prefix)
for prefix in sections)):
continue
# skip zero sized symbols
if not size:
continue
# note multiple symbols can share a name
syms.append(Sym(name, global__, section, addr, size))
proc.wait()
if proc.returncode != 0:
raise sp.CalledProcessError(proc.returncode, proc.args)
return SymInfo(syms)
def collect_dwarf_files(obj_path, *,
objdump_path=OBJDUMP_PATH,
**args):
line_pattern = re.compile(
'^\s*(?P<no>[0-9]+)'
'(?:\s+(?P<dir>[0-9]+))?'
'.*\s+(?P<path>[^\s]+)\s*$')
# find source paths
dirs = co.OrderedDict()
files = co.OrderedDict()
# note objdump-path may contain extra args
cmd = objdump_path + ['--dwarf=rawline', obj_path]
if args.get('verbose'):
print(' '.join(shlex.quote(c) for c in cmd))
proc = sp.Popen(cmd,
stdout=sp.PIPE,
universal_newlines=True,
errors='replace',
close_fds=False)
for line in proc.stdout:
# note that files contain references to dirs, which we
# dereference as soon as we see them as each file table
# follows a dir table
m = line_pattern.match(line)
if m:
if not m.group('dir'):
# found a directory entry
dirs[int(m.group('no'))] = m.group('path')
else:
# found a file entry
dir = int(m.group('dir'))
if dir in dirs:
files[int(m.group('no'))] = os.path.join(
dirs[dir],
m.group('path'))
else:
files[int(m.group('no'))] = m.group('path')
proc.wait()
if proc.returncode != 0:
raise sp.CalledProcessError(proc.returncode, proc.args)
# simplify paths
files_ = co.OrderedDict()
for no, file in files.items():
if os.path.commonpath([
os.getcwd(),
os.path.abspath(file)]) == os.getcwd():
files_[no] = os.path.relpath(file)
else:
files_[no] = os.path.abspath(file)
files = files_
return files
# each dwarf entry can have attrs and children entries
class DwarfEntry:
def __init__(self, level, off, tag, ats={}, children=[]):
self.level = level
self.off = off
self.tag = tag
self.ats = ats or {}
self.children = children or []
def get(self, k, d=None):
return self.ats.get(k, d)
def __getitem__(self, k):
return self.ats[k]
def __contains__(self, k):
return k in self.ats
def __repr__(self):
return '%s(%d, 0x%x, %r, %r)' % (
self.__class__.__name__,
self.level,
self.off,
self.tag,
self.ats)
@ft.cached_property
def name(self):
if 'DW_AT_name' in self:
name = self['DW_AT_name'].split(':')[-1].strip()
# prefix with struct/union/enum
if self.tag == 'DW_TAG_structure_type':
name = 'struct ' + name
elif self.tag == 'DW_TAG_union_type':
name = 'union ' + name
elif self.tag == 'DW_TAG_enumeration_type':
name = 'enum ' + name
return name
else:
return None
@ft.cached_property
def addr(self):
if (self.tag == 'DW_TAG_subprogram'
and 'DW_AT_low_pc' in self):
return int(self['DW_AT_low_pc'], 0)
else:
return None
@ft.cached_property
def size(self):
if (self.tag == 'DW_TAG_subprogram'
and 'DW_AT_high_pc' in self):
# this looks wrong, but high_pc does store the size,
# for whatever reason
return int(self['DW_AT_high_pc'], 0)
else:
return None
def info(self, tags=None):
# recursively flatten children
def flatten(entry):
for child in entry.children:
# filter if requested
if tags is None or child.tag in tags:
yield child
yield from flatten(child)
return DwarfInfo(co.OrderedDict(
(child.off, child) for child in flatten(self)))
# a collection of dwarf entries
class DwarfInfo:
def __init__(self, entries):
self.entries = entries
def get(self, k, d=None):
# allow lookup by offset, symbol, or dwarf name
if not isinstance(k, str) and not hasattr(k, 'addr'):
return self.entries.get(k, d)
elif hasattr(k, 'addr'):
import bisect
# organize by address
if not hasattr(self, '_by_addr'):
# sort and keep largest/first when duplicates
entries = [entry
for entry in self.entries.values()
if entry.addr is not None
and entry.size is not None]
entries.sort(key=lambda x: (x.addr, -x.size))
by_addr = []
for entry in entries:
if (len(by_addr) == 0
or by_addr[-1].addr != entry.addr):
by_addr.append(entry)
self._by_addr = by_addr
# find entry by range
i = bisect.bisect(self._by_addr, k.addr,
key=lambda x: x.addr)
# check that we're actually in this entry's size
if (i > 0
and k.addr
< self._by_addr[i-1].addr
+ self._by_addr[i-1].size):
return self._by_addr[i-1]
else:
# fallback to lookup by name
return self.get(k.name, d)
else:
# organize entries by name
if not hasattr(self, '_by_name'):
self._by_name = {}
for entry in self.entries.values():
if entry.name is not None:
self._by_name[entry.name] = entry
# exact match? do a quick lookup
if k in self._by_name:
return self._by_name[k]
# find the best matching dwarf entry with a simple
# heuristic
#
# this can be different from the actual symbol because
# of optimization passes
else:
def key(entry):
i = entry.name.find(k)
if i == -1:
return None
return (i, len(entry.name)-(i+len(k)), entry.name)
return min(
filter(key, self._by_name.values()),
key=key,
default=d)
def __getitem__(self, k):
v = self.get(k)
if v is None:
raise KeyError(k)
return v
def __contains__(self, k):
return self.get(k) is not None
def __len__(self):
return len(self.entries)
def __iter__(self):
return iter(self.entries.values())
def collect_dwarf_info(obj_path, tags=None, *,
objdump_path=OBJDUMP_PATH,
**args):
info_pattern = re.compile(
'^\s*<(?P<level>[^>]*)>'
'\s*<(?P<off>[^>]*)>'
'.*\(\s*(?P<tag>[^)]*?)\s*\)\s*$'
'|' '^\s*<(?P<off_>[^>]*)>'
'\s*(?P<at>[^>:]*?)'
'\s*:(?P<v>.*)\s*$')
# collect dwarf entries
info = co.OrderedDict()
entry = None
levels = {}
# note objdump-path may contain extra args
cmd = objdump_path + ['--dwarf=info', obj_path]
if args.get('verbose'):
print(' '.join(shlex.quote(c) for c in cmd))
proc = sp.Popen(cmd,
stdout=sp.PIPE,
universal_newlines=True,
errors='replace',
close_fds=False)
for line in proc.stdout:
# state machine here to find dwarf entries
m = info_pattern.match(line)
if m:
if m.group('tag'):
entry = DwarfEntry(
level=int(m.group('level'), 0),
off=int(m.group('off'), 16),
tag=m.group('tag').strip(),
)
# keep track of unfiltered entries
if tags is None or entry.tag in tags:
info[entry.off] = entry
# store entry in parent
levels[entry.level] = entry
if entry.level-1 in levels:
levels[entry.level-1].children.append(entry)
elif m.group('at'):
if entry:
entry.ats[m.group('at').strip()] = (
m.group('v').strip())
proc.wait()
if proc.returncode != 0:
raise sp.CalledProcessError(proc.returncode, proc.args)
# resolve abstract origins
for entry in info.values():
if 'DW_AT_abstract_origin' in entry:
off = int(entry['DW_AT_abstract_origin'].strip('<>'), 0)
origin = info[off]
assert 'DW_AT_abstract_origin' not in origin, (
"Recursive abstract origin?")
for k, v in origin.ats.items():
if k not in entry.ats:
entry.ats[k] = v
return DwarfInfo(info)
class Frame(co.namedtuple('Sym', ['addr', 'frame'])):
__slots__ = ()
def __new__(cls, addr, frame):
return super().__new__(cls, addr, frame)
def __repr__(self):
return '%s(0x%x, %d)' % (
self.__class__.__name__,
self.addr,
self.frame)
class FrameInfo:
def __init__(self, frames):
self.frames = frames
def get(self, k, d=None):
import bisect
# organize by address
if not hasattr(self, '_by_addr'):
# sort and keep largest when duplicates
frames = self.frames.copy()
frames.sort(key=lambda x: (x.addr, -x.frame))
by_addr = []
for frame in frames:
if (len(by_addr) == 0
or by_addr[-1].addr != frame.addr):
by_addr.append(frame)
self._by_addr = by_addr
# allow lookup by addr or range of addrs
if not isinstance(k, slice):
# find frame by addr
i = bisect.bisect(self._by_addr, k,
key=lambda x: x.addr)
if i > 0:
return self._by_addr[i-1]
else:
return d
else:
# find frame by range
if k.start is None:
start = 0
else:
start = max(
bisect.bisect(self._by_addr, k.start,
key=lambda x: x.addr) - 1,
0)
if k.stop is None:
stop = len(self._by_addr)
else:
stop = bisect.bisect(self._by_addr, k.stop,
key=lambda x: x.addr)
return FrameInfo(self._by_addr[start:stop])
def __getitem__(self, k):
v = self.get(k)
if v is None:
raise KeyError(k)
return v
def __contains__(self, k):
return self.get(k) is not None
def __len__(self):
return len(self.frames)
def __iter__(self):
return iter(self.frames)
def collect_dwarf_frames(obj_path, tags=None, *,
objdump_path=OBJDUMP_PATH,
**args):
frame_pattern = re.compile(
'^\s*(?P<cie_off>[0-9a-fA-F]+)'
'\s+(?P<cie_size>[0-9a-fA-F]+)'
'\s+(?P<cie_id>[0-9a-fA-F]+)'
'\s+CIE\s*$'
'|' '^\s*(?P<fde_off>[0-9a-fA-F]+)'
'\s+(?P<fde_size>[0-9a-fA-F]+)'
'\s+(?P<fde_id>[0-9a-fA-F]+)'
'\s+FDE'
'\s+cie=(?P<fde_cie>[0-9a-fA-F]+)'
'\s+pc=(?P<fde_pc_lo>[0-9a-fA-F]+)'
'\.\.(?P<fde_pc_hi>[0-9a-fA-F]+)\s*$'
'|' '^\s*(?P<op>DW_CFA_[^\s:]*)\s*:?'
'\s*(?P<change>.*?)\s*$')
# collect frame info
#
# Frame info is encoded in a state machine stored in fde/cie
# entries. fde entries can share cie entries, otherwise they are
# mostly the same.
#
cies = co.OrderedDict()
fdes = co.OrderedDict()
entry = None
# note objdump-path may contain extra args
cmd = objdump_path + ['--dwarf=frames', obj_path]
if args.get('verbose'):
print(' '.join(shlex.quote(c) for c in cmd))
proc = sp.Popen(cmd,
stdout=sp.PIPE,
universal_newlines=True,
errors='replace',
close_fds=False)
for line in proc.stdout:
# state machine here to find fde/cie entries
m = frame_pattern.match(line)
if m:
# start cie?
if m.group('cie_off'):
entry = {
'type': 'cie',
'off': int(m.group('cie_off'), 16),
'ops': []}
cies[entry['off']] = entry
# start fde?
elif m.group('fde_off'):
entry = {
'type': 'fde',
'off': int(m.group('fde_off'), 16),
'cie': int(m.group('fde_cie'), 16),
'pc': (
int(m.group('fde_pc_lo'), 16),
int(m.group('fde_pc_hi'), 16)),
'ops': []}
fdes[entry['off']] = entry
# found op?
elif m.group('op'):
entry['ops'].append((m.group('op'), m.group('change')))
else:
assert False
proc.wait()
if proc.returncode != 0:
raise sp.CalledProcessError(proc.returncode, proc.args)
# execute the state machine
frames = []
for _, fde in fdes.items():
cie = cies[fde['cie']]
cfa_loc = fde['pc'][0]
cfa_stack = []
for op, change in it.chain(cie['ops'], fde['ops']):
# advance location
if op in {
'DW_CFA_advance_loc',
'DW_CFA_advance_loc1',
'DW_CFA_advance_loc2',
'DW_CFA_advance_loc4'}:
cfa_loc = int(change.split('to')[-1], 16)
# change cfa offset
elif op in {
'DW_CFA_def_cfa',
'DW_CFA_def_cfa_offset'}:
cfa_off = int(change.split('ofs')[-1], 0)
frames.append(Frame(cfa_loc, cfa_off))
# push state, because of course we need a stack
elif op == 'DW_CFA_remember_state':
cfa_stack.append(cfa_off)
# pop state
elif op == 'DW_CFA_restore_state':
cfa_off = cfa_stack.pop()
# ignore these
elif op in {
'DW_CFA_nop',
'DW_CFA_offset',
'DW_CFA_restore'}:
pass
else:
assert False, "Unknown frame op? %r" % op
return FrameInfo(frames)
def collect(obj_paths, *,
sources=None,
everything=False,
**args):
funcs = []
globals = co.OrderedDict()
for obj_path in obj_paths:
# find relevant symbols
syms = collect_syms(obj_path,
sections=['.text'],
**args)
# find source paths
files = collect_dwarf_files(obj_path, **args)
# find dwarf info, we only care about functions
info = collect_dwarf_info(obj_path,
tags={'DW_TAG_subprogram'},
**args)
# find frame info
frames = collect_dwarf_frames(obj_path, **args)
# find the max stack frame for each function
locals = co.OrderedDict()
for sym in syms:
# discard internal functions
if not everything and sym.name.startswith('__'):
continue
# find best matching dwarf entry, this may have a slightly
# different name due to optimizations
entry = info.get(sym)
# if we have no file guess from obj path
if entry is not None and 'DW_AT_decl_file' in entry:
file = files.get(int(entry['DW_AT_decl_file']), '?')
else:
file = re.sub('(\.o)?$', '.c', obj_path, 1)
# ignore filtered sources
if sources is not None:
if not any(os.path.abspath(file) == os.path.abspath(s)
for s in sources):
continue
else:
# default to only cwd
if not everything and not os.path.commonpath([
os.getcwd(),
os.path.abspath(file)]) == os.getcwd():
continue
# find the stack frames for each function
frames_ = frames[sym.addr:sym.addr+sym.size]
func = {'file': file,
'sym': sym,
'entry': entry,
'frames': frames_,
'calls': []}
funcs.append(func)
# keep track of locals/globals
if sym.global_:
globals[sym.name] = func
if entry is not None:
locals[entry.off] = func
# link local function calls via dwarf entries
for caller in locals.values():
if not caller['entry']:
continue
for call in caller['entry'].info(
tags={'DW_TAG_call_site'}):
if ('DW_AT_call_return_pc' not in call
or 'DW_AT_call_origin' not in call):
continue
# note DW_AT_call_return_pc refers to the address
# _after_ the call
#
# we change this to the last byte in the call
# instruction, which is a bit weird, but should at least
# map to the right stack frame
addr = int(call['DW_AT_call_return_pc'], 0) - 1
off = int(call['DW_AT_call_origin'].strip('<>'), 0)
# callee in locals?
if off in locals:
callee = locals[off]
else:
# if not, just keep track of the symbol and try to link
# during the global pass
callee = info[off]
if callee.name is None:
continue
callee = callee.name
caller['calls'].append((addr, callee))
# link global function calls via symbol
for caller in funcs:
calls_ = []
for addr, callee in caller['calls']:
if isinstance(callee, str):
if callee in globals:
calls_.append((addr, globals[callee]))
else:
calls_.append((addr, callee))
caller['calls'] = calls_
# recursive+cached limit finder
def limitof(func, seen=set()):
# found a cycle? stop here
if id(func) in seen:
return 0, 0
# cached?
if not hasattr(limitof, 'cache'):
limitof.cache = {}
if id(func) in limitof.cache:
return limitof.cache[id(func)]
# find max stack frame
frame = max((frame.frame for frame in func['frames']), default=0)
# find stack limit recursively
limit = frame
for addr, callee in func['calls']:
if args.get('no_shrinkwrap'):
frame_ = frame
else:
# use stack frame at call site
frame_ = func['frames'][addr].frame
_, limit_ = limitof(callee, seen | {id(func)})
limit = max(limit, frame_ + limit_)
limitof.cache[id(func)] = frame, limit
return frame, limit
# recursive+cached children finder
def childrenof(func, seen=set()):
# found a cycle? stop here
if id(func) in seen:
return [], ['cycle detected']
# cached?
if not hasattr(childrenof, 'cache'):
childrenof.cache = {}
if id(func) in childrenof.cache:
return childrenof.cache[id(func)]
# find children recursively
children = []
for addr, callee in func['calls']:
file_ = callee['file']
name_ = callee['sym'].name
frame_, limit_ = limitof(callee, seen | {id(func)})
children_, notes_ = childrenof(callee, seen | {id(func)})
children.append(StackResult(file_, name_, frame_, limit_,
children=children_,
notes=notes_))
childrenof.cache[id(func)] = children, []
return children, []
# build results
results = []
for func in funcs:
file = func['file']
name = func['sym'].name
frame, limit = limitof(func)
children, notes = childrenof(func)
results.append(StackResult(file, name, frame, limit,
children=children,
notes=notes))
return results
def fold(Result, results, by=None, defines=[]):
if by is None:
by = Result._by
for k in it.chain(by or [], (k for k, _ in defines)):
if k not in Result._by and k not in Result._fields:
print("error: could not find field %r?" % k,
file=sys.stderr)
sys.exit(-1)
# filter by matching defines
if defines:
results_ = []
for r in results:
if all(getattr(r, k) in vs for k, vs in defines):
results_.append(r)
results = results_
# organize results into conflicts
folding = co.OrderedDict()
for r in results:
name = tuple(getattr(r, k) for k in by)
if name not in folding:
folding[name] = []
folding[name].append(r)
# merge conflicts
folded = []
for name, rs in folding.items():
folded.append(sum(rs[1:], start=rs[0]))
return folded
def table(Result, results, diff_results=None, *,
by=None,
fields=None,
sort=None,
diff=None,
percent=None,
all=False,
compare=None,
summary=False,
depth=1,
hot=None,
detect_cycles=True,
**_):
all_, all = all, __builtins__.all
if by is None:
by = Result._by
if fields is None:
fields = Result._fields
types = Result._types
# fold again
results = fold(Result, results, by=by)
if diff_results is not None:
diff_results = fold(Result, diff_results, by=by)
# reduce children to hot paths? only used by some scripts
if hot:
# subclass to reintroduce __dict__
Result_ = Result
class HotResult(Result_):
_i = '_hot_i'
_children = '_hot_children'
_notes = '_hot_notes'
def __new__(cls, r, i=None, children=None, notes=None):
self = HotResult._make(r)
self._hot_i = i
self._hot_children = children if children is not None else []
self._hot_notes = notes if notes is not None else []
if hasattr(Result_, '_notes'):
self._hot_notes.extend(getattr(r, r._notes))
return self
def __add__(self, other):
return HotResult(
Result_.__add__(self, other),
self._hot_i if other._hot_i is None
else other._hot_i if self._hot_i is None
else min(self._hot_i, other._hot_i),
self._hot_children + other._hot_children,
self._hot_notes + other._hot_notes)
results_ = []
for r in results:
hot_ = []
def recurse(results_, depth_, seen=set()):
nonlocal hot_
if not results_:
return
# find the hottest result
r = max(results_,
key=lambda r: tuple(
tuple((getattr(r, k),)
if getattr(r, k, None) is not None
else ()
for k in (
[k] if k else [
k for k in Result._sort
if k in fields])
if k in fields)
for k in it.chain(hot, [None])))
hot_.append(HotResult(r, i=len(hot_)))
# found a cycle?
if (detect_cycles
and tuple(getattr(r, k) for k in Result._by) in seen):
hot_[-1]._hot_notes.append('cycle detected')
return
# recurse?
if depth_ > 1:
recurse(getattr(r, Result._children),
depth_-1,
seen | {tuple(getattr(r, k) for k in Result._by)})
recurse(getattr(r, Result._children), depth-1)
results_.append(HotResult(r, children=hot_))
Result = HotResult
results = results_
# organize by name
table = {
','.join(str(getattr(r, k) or '') for k in by): r
for r in results}
diff_table = {
','.join(str(getattr(r, k) or '') for k in by): r
for r in diff_results or []}
names = [name
for name in table.keys() | diff_table.keys()
if diff_results is None
or all_
or any(
types[k].ratio(
getattr(table.get(name), k, None),
getattr(diff_table.get(name), k, None))
for k in fields)]
# find compare entry if there is one
if compare:
compare_result = table.get(','.join(str(k) for k in compare))
# sort again, now with diff info, note that python's sort is stable
names.sort()
if compare:
names.sort(
key=lambda n: (
table.get(n) == compare_result,
tuple(
types[k].ratio(
getattr(table.get(n), k, None),
getattr(compare_result, k, None))
for k in fields)),
reverse=True)
if diff or percent:
names.sort(
key=lambda n: tuple(
types[k].ratio(
getattr(table.get(n), k, None),
getattr(diff_table.get(n), k, None))
for k in fields),
reverse=True)
if sort:
for k, reverse in reversed(sort):
names.sort(
key=lambda n: tuple(
(getattr(table[n], k),)
if getattr(table.get(n), k, None) is not None
else ()
for k in (
[k] if k else [
k for k in Result._sort
if k in fields])),
reverse=reverse ^ (not k or k in Result._fields))
# build up our lines
lines = []
# header
header = ['%s%s' % (
','.join(by),
' (%d added, %d removed)' % (
sum(1 for n in table if n not in diff_table),
sum(1 for n in diff_table if n not in table))
if diff else '')
if not summary else '']
if not diff:
for k in fields:
header.append(k)
else:
for k in fields:
header.append('o'+k)
for k in fields:
header.append('n'+k)
for k in fields:
header.append('d'+k)
lines.append(header)
# entry helper
def table_entry(name, r, diff_r=None):
entry = [name]
# normal entry?
if ((compare is None or r == compare_result)
and not percent
and not diff):
for k in fields:
entry.append(
(getattr(r, k).table(),
getattr(getattr(r, k), 'notes', lambda: [])())
if getattr(r, k, None) is not None
else types[k].none)
# compare entry?
elif not percent and not diff:
for k in fields:
entry.append(
(getattr(r, k).table()
if getattr(r, k, None) is not None
else types[k].none,
(lambda t: ['+∞%'] if t == +mt.inf
else ['-∞%'] if t == -mt.inf
else ['%+.1f%%' % (100*t)])(
types[k].ratio(
getattr(r, k, None),
getattr(compare_result, k, None)))))
# percent entry?
elif not diff:
for k in fields:
entry.append(
(getattr(r, k).table()
if getattr(r, k, None) is not None
else types[k].none,
(lambda t: ['+∞%'] if t == +mt.inf
else ['-∞%'] if t == -mt.inf
else ['%+.1f%%' % (100*t)])(
types[k].ratio(
getattr(r, k, None),
getattr(diff_r, k, None)))))
# diff entry?
else:
for k in fields:
entry.append(getattr(diff_r, k).table()
if getattr(diff_r, k, None) is not None
else types[k].none)
for k in fields:
entry.append(getattr(r, k).table()
if getattr(r, k, None) is not None
else types[k].none)
for k in fields:
entry.append(
(types[k].diff(
getattr(r, k, None),
getattr(diff_r, k, None)),
(lambda t: ['+∞%'] if t == +mt.inf
else ['-∞%'] if t == -mt.inf
else ['%+.1f%%' % (100*t)] if t
else [])(
types[k].ratio(
getattr(r, k, None),
getattr(diff_r, k, None)))))
# append any notes
if hasattr(Result, '_notes'):
entry[-1][1].extend(getattr(r, Result._notes))
return entry
# recursive entry helper, only used by some scripts
def recurse(results_, depth_, seen=set(),
prefixes=('', '', '', '')):
# build the children table at each layer
results_ = fold(Result, results_, by=by)
table_ = {
','.join(str(getattr(r, k) or '') for k in by): r
for r in results_}
names_ = list(table_.keys())
# sort the children layer
names_.sort()
if hasattr(Result, '_i'):
names_.sort(key=lambda n: getattr(table_[n], Result._i))
if sort:
for k, reverse in reversed(sort):
names_.sort(
key=lambda n: tuple(
(getattr(table_[n], k),)
if getattr(table_.get(n), k, None)
is not None
else ()
for k in (
[k] if k else [
k for k in Result._sort
if k in fields])),
reverse=reverse ^ (not k or k in Result._fields))
for i, name in enumerate(names_):
r = table_[name]
is_last = (i == len(names_)-1)
line = table_entry(name, r)
line = [x if isinstance(x, tuple) else (x, []) for x in line]
# add prefixes
line[0] = (prefixes[0+is_last] + line[0][0], line[0][1])
# add cycle detection
if detect_cycles and name in seen:
line[-1] = (line[-1][0], line[-1][1] + ['cycle detected'])
lines.append(line)
# found a cycle?
if detect_cycles and name in seen:
continue
# recurse?
if depth_ > 1:
recurse(getattr(r, Result._children),
depth_-1,
seen | {name},
(prefixes[2+is_last] + "|-> ",
prefixes[2+is_last] + "'-> ",
prefixes[2+is_last] + "| ",
prefixes[2+is_last] + " "))
# entries
if (not summary) or compare:
for name in names:
r = table.get(name)
if diff_results is None:
diff_r = None
else:
diff_r = diff_table.get(name)
lines.append(table_entry(name, r, diff_r))
# recursive entries
if name in table and depth > 1:
recurse(getattr(table[name], Result._children),
depth-1,
{name},
("|-> ",
"'-> ",
"| ",
" "))
# total, unless we're comparing
if not (compare and not percent and not diff):
r = next(iter(fold(Result, results, by=[])), None)
if diff_results is None:
diff_r = None
else:
diff_r = next(iter(fold(Result, diff_results, by=[])), None)
lines.append(table_entry('TOTAL', r, diff_r))
# homogenize
lines = [
[x if isinstance(x, tuple) else (x, []) for x in line]
for line in lines]
# find the best widths, note that column 0 contains the names and is
# handled a bit differently
widths = co.defaultdict(lambda: 7, {0: 7})
nwidths = co.defaultdict(lambda: 0)
for line in lines:
for i, x in enumerate(line):
widths[i] = max(widths[i], ((len(x[0])+1+4-1)//4)*4-1)
if i != len(line)-1:
nwidths[i] = max(nwidths[i], 1+sum(2+len(n) for n in x[1]))
# print our table
for line in lines:
print('%-*s %s' % (
widths[0], line[0][0],
' '.join('%*s%-*s' % (
widths[i], x[0],
nwidths[i], ' (%s)' % ', '.join(x[1]) if x[1] else '')
for i, x in enumerate(line[1:], 1))))
def main(obj_paths,
by=None,
fields=None,
defines=[],
sort=None,
**args):
# figure out depth
if args.get('depth') is None:
args['depth'] = mt.inf if args.get('hot') else 1
elif args.get('depth') == 0:
args['depth'] = mt.inf
# find sizes
if not args.get('use', None):
results = collect(obj_paths, **args)
else:
results = []
with openio(args['use']) as f:
reader = csv.DictReader(f, restval='')
for r in reader:
# filter by matching defines
if not all(k in r and r[k] in vs for k, vs in defines):
continue
if not any(k in r and r[k].strip()
for k in StackResult._fields):
continue
try:
results.append(StackResult(
**{k: r[k] for k in StackResult._by
if k in r and r[k].strip()},
**{k: r[k] for k in StackResult._fields
if k in r and r[k].strip()}))
except TypeError:
pass
# fold
results = fold(StackResult, results, by=by, defines=defines)
# sort, note that python's sort is stable
results.sort()
if sort:
for k, reverse in reversed(sort):
results.sort(
key=lambda r: tuple(
(getattr(r, k),) if getattr(r, k) is not None else ()
for k in ([k] if k else StackResult._sort)),
reverse=reverse ^ (not k or k in StackResult._fields))
# write results to CSV
if args.get('output'):
with openio(args['output'], 'w') as f:
writer = csv.DictWriter(f,
(by if by is not None else StackResult._by)
+ [k for k in (
fields if fields is not None
else StackResult._fields)])
writer.writeheader()
for r in results:
writer.writerow(
{k: getattr(r, k) for k in (
by if by is not None else StackResult._by)}
| {k: getattr(r, k) for k in (
fields if fields is not None
else StackResult._fields)})
# find previous results?
diff_results = None
if args.get('diff') or args.get('percent'):
diff_results = []
try:
with openio(args.get('diff') or args.get('percent')) as f:
reader = csv.DictReader(f, restval='')
for r in reader:
# filter by matching defines
if not all(k in r and r[k] in vs for k, vs in defines):
continue
if not any(k in r and r[k].strip()
for k in StackResult._fields):
continue
try:
diff_results.append(StackResult(
**{k: r[k] for k in StackResult._by
if k in r and r[k].strip()},
**{k: r[k] for k in StackResult._fields
if k in r and r[k].strip()}))
except TypeError:
raise
except FileNotFoundError:
pass
# fold
diff_results = fold(StackResult, diff_results, by=by, defines=defines)
# print table
if not args.get('quiet'):
table(StackResult, results, diff_results,
by=by if by is not None else ['function'],
fields=fields,
sort=sort,
**args)
# error on recursion
if args.get('error_on_recursion') and any(
mt.isinf(float(r.limit)) for r in results):
sys.exit(2)
if __name__ == "__main__":
import argparse
import sys
parser = argparse.ArgumentParser(
description="Find stack usage at the function level.",
allow_abbrev=False)
parser.add_argument(
'obj_paths',
nargs='*',
help="Input *.o files.")
parser.add_argument(
'-v', '--verbose',
action='store_true',
help="Output commands that run behind the scenes.")
parser.add_argument(
'-q', '--quiet',
action='store_true',
help="Don't show anything, useful with -o.")
parser.add_argument(
'-o', '--output',
help="Specify CSV file to store results.")
parser.add_argument(
'-u', '--use',
help="Don't parse anything, use this CSV file.")
parser.add_argument(
'-d', '--diff',
help="Specify CSV file to diff against.")
parser.add_argument(
'-p', '--percent',
help="Specify CSV file to diff against, but only show precentage "
"change, not a full diff.")
parser.add_argument(
'-a', '--all',
action='store_true',
help="Show all, not just the ones that changed.")
parser.add_argument(
'-c', '--compare',
type=lambda x: tuple(v.strip() for v in x.split(',')),
help="Compare results to the row matching this by pattern.")
parser.add_argument(
'-Y', '--summary',
action='store_true',
help="Only show the total.")
parser.add_argument(
'-b', '--by',
action='append',
choices=StackResult._by,
help="Group by this field.")
parser.add_argument(
'-f', '--field',
dest='fields',
action='append',
choices=StackResult._fields,
help="Show this field.")
parser.add_argument(
'-D', '--define',
dest='defines',
action='append',
type=lambda x: (
lambda k, vs: (
k.strip(),
{v.strip() for v in vs.split(',')})
)(*x.split('=', 1)),
help="Only include results where this field is this value.")
class AppendSort(argparse.Action):
def __call__(self, parser, namespace, value, option):
if namespace.sort is None:
namespace.sort = []
namespace.sort.append((value, True if option == '-S' else False))
parser.add_argument(
'-s', '--sort',
nargs='?',
action=AppendSort,
help="Sort by this field.")
parser.add_argument(
'-S', '--reverse-sort',
nargs='?',
action=AppendSort,
help="Sort by this field, but backwards.")
parser.add_argument(
'-F', '--source',
dest='sources',
action='append',
help="Only consider definitions in this file. Defaults to "
"anything in the current directory.")
parser.add_argument(
'--everything',
action='store_true',
help="Include builtin and libc specific symbols.")
parser.add_argument(
'--no-shrinkwrap',
action='store_true',
help="Ignore the effects of shrinkwrap optimizations (assume one "
"big frame per function).")
parser.add_argument(
'-z', '--depth',
nargs='?',
type=lambda x: int(x, 0),
const=0,
help="Depth of function calls to show. 0 shows all calls unless "
"we find a cycle. Defaults to 0.")
parser.add_argument(
'-t', '--hot',
nargs='?',
action='append',
help="Show only the hot path for each function call.")
parser.add_argument(
'-e', '--error-on-recursion',
action='store_true',
help="Error if any functions are recursive.")
parser.add_argument(
'--objdump-path',
type=lambda x: x.split(),
default=OBJDUMP_PATH,
help="Path to the objdump executable, may include flags. "
"Defaults to %r." % OBJDUMP_PATH)
sys.exit(main(**{k: v
for k, v in vars(parser.parse_intermixed_args()).items()
if v is not None}))