Fixed incorrect cache reuse when seeking from end-of-block

In v2.5, we introduced an optimization to avoid rereading data when
seeking inside the file cache. Unfortunately this used a slightly
wrong condition to check if the cache was "live", which meant seeks from
end-of-blocks could end up with invalid caches and wrong data. Not
great.

The problem is the nuance of when a file's cache is "live":

1. The file is marked as LFS_F_READING or LFS_F_WRITING.

   But we can't reuse the cache when writing, so we only care about
   LFS_F_READING.

2. file->off != lfs->cfg->block_size (end-of-block).

   This is an optimization to avoid eagerly reading blocks we may not
   actually care about.

We weren't checking for the end-of-block case, which meant if you seeked
_from_ the end of a block to a seemingly valid location in the file
cache, you could end up with an invalid cache.

Note that end-of-block may not be powers-of-two due to CTZ skip-list
pointers.

---

The fix is to check for the end-of-block case in lfs_file_seek. Note
this now matches the need-new-block logic in lfs_file_flushedread.

This logic change may also make lfs_file_seek call lfs_file_flush more
often, but only in cases where lfs_file_flush is a noop.

I've also extended the test_seek tests to cover a few more boundary-read
cases and prevent a regression in the future.

Found by wjl and lrodorigo
This commit is contained in:
Christopher Haster 2024-12-19 01:32:50 -06:00
parent 630a0d87c2
commit 366100b140
2 changed files with 150 additions and 8 deletions

9
lfs.c
View File

@ -3725,13 +3725,8 @@ static lfs_soff_t lfs_file_seek_(lfs_t *lfs, lfs_file_t *file,
// if we're only reading and our new offset is still in the file's cache
// we can avoid flushing and needing to reread the data
if (
#ifndef LFS_READONLY
!(file->flags & LFS_F_WRITING)
#else
true
#endif
) {
if ((file->flags & LFS_F_READING)
&& file->off != lfs->cfg->block_size) {
int oindex = lfs_ctz_index(lfs, &(lfs_off_t){file->pos});
lfs_off_t noff = npos;
int nindex = lfs_ctz_index(lfs, &noff);

View File

@ -137,6 +137,130 @@ code = '''
lfs_unmount(&lfs) => 0;
'''
# boundary seek and reads
[cases.test_seek_boundary_read]
defines.COUNT = 132
code = '''
lfs_t lfs;
lfs_format(&lfs, cfg) => 0;
lfs_mount(&lfs, cfg) => 0;
lfs_file_t file;
lfs_file_open(&lfs, &file, "kitty",
LFS_O_WRONLY | LFS_O_CREAT | LFS_O_APPEND) => 0;
size_t size = strlen("kittycatcat");
uint8_t buffer[1024];
memcpy(buffer, "kittycatcat", size);
for (int j = 0; j < COUNT; j++) {
lfs_file_write(&lfs, &file, buffer, size);
}
lfs_file_close(&lfs, &file) => 0;
lfs_unmount(&lfs) => 0;
lfs_mount(&lfs, cfg) => 0;
lfs_file_open(&lfs, &file, "kitty", LFS_O_RDONLY) => 0;
size = strlen("kittycatcat");
const lfs_soff_t offsets[] = {
512,
1024-4,
512+1,
1024-4+1,
512-1,
1024-4-1,
512-strlen("kittycatcat"),
1024-4-strlen("kittycatcat"),
512-strlen("kittycatcat")+1,
1024-4-strlen("kittycatcat")+1,
512-strlen("kittycatcat")-1,
1024-4-strlen("kittycatcat")-1,
strlen("kittycatcat")*(COUNT-2)-1,
};
for (unsigned i = 0; i < sizeof(offsets) / sizeof(offsets[0]); i++) {
lfs_soff_t off = offsets[i];
// read @ offset
lfs_file_seek(&lfs, &file, off, LFS_SEEK_SET) => off;
lfs_file_read(&lfs, &file, buffer, size) => size;
memcmp(buffer,
&"kittycatcatkittycatcat"[off % strlen("kittycatcat")],
size) => 0;
// read after
lfs_file_seek(&lfs, &file, off+strlen("kittycatcat")+1, LFS_SEEK_SET)
=> off+strlen("kittycatcat")+1;
lfs_file_read(&lfs, &file, buffer, size) => size;
memcmp(buffer,
&"kittycatcatkittycatcat"[(off+1) % strlen("kittycatcat")],
size) => 0;
// read before
lfs_file_seek(&lfs, &file, off-strlen("kittycatcat")-1, LFS_SEEK_SET)
=> off-strlen("kittycatcat")-1;
lfs_file_read(&lfs, &file, buffer, size) => size;
memcmp(buffer,
&"kittycatcatkittycatcat"[(off-1) % strlen("kittycatcat")],
size) => 0;
// read @ 0
lfs_file_seek(&lfs, &file, 0, LFS_SEEK_SET) => 0;
lfs_file_read(&lfs, &file, buffer, size) => size;
memcmp(buffer, "kittycatcat", size) => 0;
// read @ offset
lfs_file_seek(&lfs, &file, off, LFS_SEEK_SET) => off;
lfs_file_read(&lfs, &file, buffer, size) => size;
memcmp(buffer,
&"kittycatcatkittycatcat"[off % strlen("kittycatcat")],
size) => 0;
// read after
lfs_file_seek(&lfs, &file, off+strlen("kittycatcat")+1, LFS_SEEK_SET)
=> off+strlen("kittycatcat")+1;
lfs_file_read(&lfs, &file, buffer, size) => size;
memcmp(buffer,
&"kittycatcatkittycatcat"[(off+1) % strlen("kittycatcat")],
size) => 0;
// read before
lfs_file_seek(&lfs, &file, off-strlen("kittycatcat")-1, LFS_SEEK_SET)
=> off-strlen("kittycatcat")-1;
lfs_file_read(&lfs, &file, buffer, size) => size;
memcmp(buffer,
&"kittycatcatkittycatcat"[(off-1) % strlen("kittycatcat")],
size) => 0;
// sync
lfs_file_sync(&lfs, &file) => 0;
// read @ 0
lfs_file_seek(&lfs, &file, 0, LFS_SEEK_SET) => 0;
lfs_file_read(&lfs, &file, buffer, size) => size;
memcmp(buffer, "kittycatcat", size) => 0;
// read @ offset
lfs_file_seek(&lfs, &file, off, LFS_SEEK_SET) => off;
lfs_file_read(&lfs, &file, buffer, size) => size;
memcmp(buffer,
&"kittycatcatkittycatcat"[off % strlen("kittycatcat")],
size) => 0;
// read after
lfs_file_seek(&lfs, &file, off+strlen("kittycatcat")+1, LFS_SEEK_SET)
=> off+strlen("kittycatcat")+1;
lfs_file_read(&lfs, &file, buffer, size) => size;
memcmp(buffer,
&"kittycatcatkittycatcat"[(off+1) % strlen("kittycatcat")],
size) => 0;
// read before
lfs_file_seek(&lfs, &file, off-strlen("kittycatcat")-1, LFS_SEEK_SET)
=> off-strlen("kittycatcat")-1;
lfs_file_read(&lfs, &file, buffer, size) => size;
memcmp(buffer,
&"kittycatcatkittycatcat"[(off-1) % strlen("kittycatcat")],
size) => 0;
}
lfs_file_close(&lfs, &file) => 0;
lfs_unmount(&lfs) => 0;
'''
# boundary seek and writes
[cases.test_seek_boundary_write]
defines.COUNT = 132
@ -160,31 +284,54 @@ code = '''
lfs_file_open(&lfs, &file, "kitty", LFS_O_RDWR) => 0;
size = strlen("hedgehoghog");
const lfs_soff_t offsets[] = {512, 1020, 513, 1021, 511, 1019, 1441};
const lfs_soff_t offsets[] = {
512,
1024-4,
512+1,
1024-4+1,
512-1,
1024-4-1,
512-strlen("kittycatcat"),
1024-4-strlen("kittycatcat"),
512-strlen("kittycatcat")+1,
1024-4-strlen("kittycatcat")+1,
512-strlen("kittycatcat")-1,
1024-4-strlen("kittycatcat")-1,
strlen("kittycatcat")*(COUNT-2)-1,
};
for (unsigned i = 0; i < sizeof(offsets) / sizeof(offsets[0]); i++) {
lfs_soff_t off = offsets[i];
// write @ offset
memcpy(buffer, "hedgehoghog", size);
lfs_file_seek(&lfs, &file, off, LFS_SEEK_SET) => off;
lfs_file_write(&lfs, &file, buffer, size) => size;
// read @ offset
lfs_file_seek(&lfs, &file, off, LFS_SEEK_SET) => off;
lfs_file_read(&lfs, &file, buffer, size) => size;
memcmp(buffer, "hedgehoghog", size) => 0;
// read @ 0
lfs_file_seek(&lfs, &file, 0, LFS_SEEK_SET) => 0;
lfs_file_read(&lfs, &file, buffer, size) => size;
memcmp(buffer, "kittycatcat", size) => 0;
// read @ offset
lfs_file_seek(&lfs, &file, off, LFS_SEEK_SET) => off;
lfs_file_read(&lfs, &file, buffer, size) => size;
memcmp(buffer, "hedgehoghog", size) => 0;
lfs_file_sync(&lfs, &file) => 0;
// read @ 0
lfs_file_seek(&lfs, &file, 0, LFS_SEEK_SET) => 0;
lfs_file_read(&lfs, &file, buffer, size) => size;
memcmp(buffer, "kittycatcat", size) => 0;
// read @ offset
lfs_file_seek(&lfs, &file, off, LFS_SEEK_SET) => off;
lfs_file_read(&lfs, &file, buffer, size) => size;
memcmp(buffer, "hedgehoghog", size) => 0;