scripts: Fixed O(n^2) slicing in Rbyd.fetch

Do you see the O(n^2) behavior in this loop?

  j = 0
  while j < len(data):
      word, d = fromleb(data[j:])
      j += d

The slice, data[j:], creates a O(n) copy every iteration of the loop.

A bit tricky. Or at least I found it tricky to notice. Maybe because
array indexing being cheap is baked into my brain...

Long story short, this repeated slicing resulted in O(n^2) behavior in
Rbyd.fetch and probably some other functions. Even though we don't care
_too_ much about performance in these scripts, having Rbyd.fetch run in
O(n^2) isn't great.

Tweaking all from* functions to take an optional index solves this, at
least on paper.

---

In practice I didn't actually find any measurable performance gain. I
guess array slicing in Python is optimized enough that the constant
factor takes over?

(Maybe it's being helped by us limiting Rbyd.fetch to block_size in most
scripts? I haven't tested NAND block sizes yet...)

Still, it's good to at least know this isn't a bottleneck.
This commit is contained in:
Christopher Haster
2025-04-14 14:27:44 -05:00
parent 8b11cea3f2
commit 0cea8b96fb
9 changed files with 317 additions and 293 deletions

View File

@ -200,61 +200,64 @@ def popc(x):
def parity(x):
return popc(x) & 1
def fromle32(data):
return struct.unpack('<I', data[0:4].ljust(4, b'\0'))[0]
def fromle32(data, j=0):
return struct.unpack('<I', data[j:j+4].ljust(4, b'\0'))[0]
def fromleb128(data):
def fromleb128(data, j=0):
word = 0
for i, b in enumerate(data):
word |= ((b & 0x7f) << 7*i)
d = 0
while j+d < len(data):
b = data[j+d]
word |= (b & 0x7f) << 7*d
word &= 0xffffffff
if not b & 0x80:
return word, i+1
return word, d+1
d += 1
return word, len(data)
def fromtag(data):
data = data.ljust(4, b'\0')
tag = struct.unpack('>H', data[:2])[0]
weight, d = fromleb128(data[2:])
size, d_ = fromleb128(data[2+d:])
return tag>>15, tag&0x7fff, weight, size, 2+d+d_
def fromtag(data, j=0):
d = 0
tag = struct.unpack('>H', data[j:j+2].ljust(2, b'\0'))[0]; d += 2
weight, d_ = fromleb128(data, j+d); d += d_
size, d_ = fromleb128(data, j+d); d += d_
return tag>>15, tag&0x7fff, weight, size, d
def frommdir(data):
def frombranch(data, j=0):
d = 0
block, d_ = fromleb128(data, j+d); d += d_
trunk, d_ = fromleb128(data, j+d); d += d_
cksum = fromle32(data, j+d); d += 4
return block, trunk, cksum, d
def frombtree(data, j=0):
d = 0
w, d_ = fromleb128(data, j+d); d += d_
block, trunk, cksum, d_ = frombranch(data, j+d); d += d_
return w, block, trunk, cksum, d
def frommdir(data, j=0):
blocks = []
d = 0
while d < len(data):
block, d_ = fromleb128(data[d:])
while j+d < len(data):
block, d_ = fromleb128(data, j+d)
blocks.append(block)
d += d_
return blocks
return tuple(blocks), d
def fromshrub(data):
def fromshrub(data, j=0):
d = 0
weight, d_ = fromleb128(data[d:]); d += d_
trunk, d_ = fromleb128(data[d:]); d += d_
return weight, trunk
weight, d_ = fromleb128(data, j+d); d += d_
trunk, d_ = fromleb128(data, j+d); d += d_
return weight, trunk, d
def frombranch(data):
def frombptr(data, j=0):
d = 0
block, d_ = fromleb128(data[d:]); d += d_
trunk, d_ = fromleb128(data[d:]); d += d_
cksum = fromle32(data[d:]); d += 4
return block, trunk, cksum
def frombtree(data):
d = 0
w, d_ = fromleb128(data[d:]); d += d_
block, trunk, cksum = frombranch(data[d:])
return w, block, trunk, cksum
def frombptr(data):
d = 0
size, d_ = fromleb128(data[d:]); d += d_
block, d_ = fromleb128(data[d:]); d += d_
off, d_ = fromleb128(data[d:]); d += d_
cksize, d_ = fromleb128(data[d:]); d += d_
cksum = fromle32(data[d:]); d += 4
return size, block, off, cksize, cksum
size, d_ = fromleb128(data, j+d); d += d_
block, d_ = fromleb128(data, j+d); d += d_
off, d_ = fromleb128(data, j+d); d += d_
cksize, d_ = fromleb128(data, j+d); d += d_
cksum = fromle32(data, j+d); d += 4
return size, block, off, cksize, cksum, d
def xxd(data, width=16):
for i in range(0, len(data), width):
@ -626,7 +629,7 @@ class Rbyd:
@classmethod
def _fetch(cls, data, block, trunk=None):
# fetch the rbyd
rev = fromle32(data[0:4])
rev = fromle32(data, 0)
cksum = 0
cksum_ = crc32c(data[0:4])
cksum__ = cksum_
@ -644,7 +647,7 @@ class Rbyd:
gcksumdelta_ = None
while j_ < len(data) and (not trunk or eoff <= trunk):
# read next tag
v, tag, w, size, d = fromtag(data[j_:])
v, tag, w, size, d = fromtag(data, j_)
if v != parity(cksum__):
break
cksum__ ^= 0x00000080 if v else 0
@ -667,7 +670,7 @@ class Rbyd:
# found a cksum?
else:
# check cksum
cksum___ = fromle32(data[j_:j_+4])
cksum___ = fromle32(data, j_)
if cksum__ != cksum___:
break
# commit what we have
@ -806,7 +809,7 @@ class Rbyd:
# descend down tree
j = self.trunk
while True:
_, alt, w, jump, d = fromtag(self.data[j:])
_, alt, w, jump, d = fromtag(self.data, j)
# found an alt?
if alt & TAG_ALT:
@ -822,7 +825,7 @@ class Rbyd:
if path:
# figure out which color
if alt & TAG_R:
_, nalt, _, _, _ = fromtag(self.data[j+jump+d:])
_, nalt, _, _, _ = fromtag(self.data, j+jump+d)
if nalt & TAG_R:
color = 'y'
else:
@ -845,7 +848,7 @@ class Rbyd:
if path:
# figure out which color
if alt & TAG_R:
_, nalt, _, _, _ = fromtag(self.data[j:])
_, nalt, _, _, _ = fromtag(self.data, j)
if nalt & TAG_R:
color = 'y'
else:
@ -1106,7 +1109,7 @@ class Btree:
# descend down branch?
if branch_ is not None and (
not depth or depth_ < depth):
block, trunk, cksum = frombranch(branch_.data)
block, trunk, cksum, _ = frombranch(branch_.data)
rbyd = Rbyd.fetchck(self.bd, block, trunk, name_.weight,
cksum)
@ -1337,7 +1340,7 @@ class Btree:
# found another branch
if branch_ is not None and (
not depth or depth_ < depth):
block, trunk, cksum = frombranch(branch_.data)
block, trunk, cksum, _ = frombranch(branch_.data)
rbyd = Rbyd.fetchck(self.bd, block, trunk, name_.weight,
cksum)
@ -1731,7 +1734,7 @@ class Mtree:
rattr_ = mroot.lookup(-1, TAG_MROOT, 0x3)
if rattr_ is None:
break
blocks_ = frommdir(rattr_.data)
blocks_, _ = frommdir(rattr_.data)
mroot = Mdir.fetch(bd, -1, blocks_)
mrootchain.append(mroot)
@ -1740,7 +1743,7 @@ class Mtree:
if not depth or len(mrootchain) < depth:
rattr_ = mroot.lookup(-1, TAG_MTREE, 0x3)
if rattr_ is not None:
w_, block_, trunk_, cksum_ = frombtree(rattr_.data)
w_, block_, trunk_, cksum_, _ = frombtree(rattr_.data)
mtree = Btree.fetchck(bd, block_, trunk_, w_, cksum_)
return cls(bd, mrootchain, mtree,
@ -1820,7 +1823,7 @@ class Mtree:
return (bid_, rbyd_, rid_), path_
else:
return (bid_, rbyd_, rid_)
blocks_ = frommdir(rattr_.data)
blocks_, _ = frommdir(rattr_.data)
mdir = Mdir.fetch(self.bd, mid, blocks_)
if path:
return mdir, path_
@ -2194,7 +2197,7 @@ class Mtree:
return (bid_, rbyd_, rid_), path_
else:
return (bid_, rbyd_, rid_)
blocks_ = frommdir(rattr_.data)
blocks_, _ = frommdir(rattr_.data)
mdir = Mdir.fetch(self.bd, self.mid(bid_), blocks_)
if path:
return mdir, path_
@ -2458,8 +2461,8 @@ class Config:
def __init__(self, mroot, tag, rattr):
super().__init__(mroot, tag, rattr)
d = 0
self.major, d_ = fromleb128(self.data[d:]); d += d_
self.minor, d_ = fromleb128(self.data[d:]); d += d_
self.major, d_ = fromleb128(self.data, d); d += d_
self.minor, d_ = fromleb128(self.data, d); d += d_
@property
def tuple(self):
@ -2498,8 +2501,8 @@ class Config:
def __init__(self, mroot, tag, rattr):
super().__init__(mroot, tag, rattr)
d = 0
block_size, d_ = fromleb128(self.data[d:]); d += d_
block_count, d_ = fromleb128(self.data[d:]); d += d_
block_size, d_ = fromleb128(self.data, d); d += d_
block_count, d_ = fromleb128(self.data, d); d += d_
# these are offset by 1 to avoid overflow issues
self.block_size = block_size + 1
self.block_count = block_count + 1
@ -2703,11 +2706,11 @@ class Gstate:
def __init__(self, mtree, tag, gdeltas):
super().__init__(mtree, tag, gdeltas)
d = 0
count, d_ = fromleb128(self.data[d:]); d += d_
count, d_ = fromleb128(self.data, d); d += d_
rms = []
if count <= 2:
for _ in range(count):
mid, d_ = fromleb128(self.data[d:]); d += d_
mid, d_ = fromleb128(self.data, d); d += d_
rms.append(mtree.mid(mid))
self.count = count
self.rms = rms
@ -3242,11 +3245,11 @@ class Lfs:
self.bshrub = None
if (self.struct is not None
and (self.struct.tag & ~0x3) == TAG_BSHRUB):
weight, trunk = fromshrub(self.struct.data)
weight, trunk, _ = fromshrub(self.struct.data)
self.bshrub = Btree.fetchshrub(lfs.bd, mdir.rbyd, trunk)
elif (self.struct is not None
and (self.struct.tag & ~0x3) == TAG_BTREE):
weight, block, trunk, cksum = frombtree(self.struct.data)
weight, block, trunk, cksum, _ = frombtree(self.struct.data)
self.bshrub = Btree.fetchck(
lfs.bd, block, trunk, weight, cksum)
@ -3366,7 +3369,7 @@ class Lfs:
return bid-(rattr.weight-1), rattr
# block pointer?
elif (rattr.tag & ~0x1003) == TAG_BLOCK:
size, block, off, cksize, cksum = frombptr(rattr.data)
size, block, off, cksize, cksum, _ = frombptr(rattr.data)
bptr = Bptr.fetchck(self.lfs.bd, rattr,
block, off, size, cksize, cksum)
if path: