add separate tu_hwifo_*() with data from buffer/software fifo. Remove duplicated packet write/read for fsdev

This commit is contained in:
hathach
2025-12-31 17:45:20 +07:00
parent f20ad05d71
commit 9b5c7761cc
6 changed files with 53 additions and 148 deletions

View File

@ -128,14 +128,15 @@ enum {
STRIDE_REMAIN_MASK = sizeof(stride_item_t) - 1u
};
// Copy to fifo from fixed address buffer (usually a rx register) with TU_FIFO_FIXED_ADDR_RW32 mode
static void ff_push_stride(uint8_t *ff_buf, const volatile stride_item_t *src, uint16_t len) {
// Reading full available 16/32-bit src and write to fifo
void tu_hwfifo_read(const volatile void *hwfifo, uint8_t *dest, uint16_t len) {
const volatile stride_item_t *src = (const volatile stride_item_t *)hwfifo;
// Reading full available 16/32-bit hwfifo and write to fifo
uint16_t n_items = len >> (CFG_TUSB_FIFO_ACCESS_DATA_STRIDE >> 1); // len / data_stride;
while (n_items--) {
const stride_item_t tmp = *src;
stride_unaligned_write(ff_buf, tmp);
ff_buf += sizeof(stride_item_t);
stride_unaligned_write(dest, tmp);
dest += sizeof(stride_item_t);
#if CFG_TUSB_FIFO_ACCESS_ADDR_STRIDE
src = (const volatile stride_item_t *)((uintptr_t)src + CFG_TUSB_FIFO_ACCESS_ADDR_STRIDE);
@ -146,17 +147,19 @@ static void ff_push_stride(uint8_t *ff_buf, const volatile stride_item_t *src, u
const uint8_t bytes_rem = len & STRIDE_REMAIN_MASK;
if (bytes_rem) {
const stride_item_t tmp = *src;
memcpy(ff_buf, &tmp, bytes_rem);
memcpy(dest, &tmp, bytes_rem);
}
}
// Copy from fifo to fixed address buffer (usually a tx register) with TU_FIFO_FIXED_ADDR_RW32 mode
static void ff_pull_stride(volatile stride_item_t *dest, const uint8_t *ff_buf, uint16_t len) {
void tu_hwfifo_write(volatile void *hwfifo, const uint8_t *src, uint16_t len) {
volatile stride_item_t *dest = (volatile stride_item_t *)hwfifo;
// Write full available 16/32 bit words to dest
uint16_t n_items = len >> (CFG_TUSB_FIFO_ACCESS_DATA_STRIDE >> 1); // len / data_stride;
while (n_items--) {
*dest = stride_unaligned_read(ff_buf);
ff_buf += sizeof(stride_item_t);
*dest = stride_unaligned_read(src);
src += sizeof(stride_item_t);
#if CFG_TUSB_FIFO_ACCESS_ADDR_STRIDE
dest = (volatile stride_item_t *)((uintptr_t)dest + CFG_TUSB_FIFO_ACCESS_ADDR_STRIDE);
@ -167,7 +170,7 @@ static void ff_pull_stride(volatile stride_item_t *dest, const uint8_t *ff_buf,
const uint8_t bytes_rem = len & STRIDE_REMAIN_MASK;
if (bytes_rem) {
stride_item_t tmp = 0u;
memcpy(&tmp, ff_buf, bytes_rem);
memcpy(&tmp, src, bytes_rem);
*dest = tmp;
}
}
@ -182,23 +185,23 @@ static void ff_push_n(const tu_fifo_t *f, const void *app_buf, uint16_t n, uint1
#if CFG_TUD_EDPT_DEDICATED_HWFIFO
if (stride_mode) {
const volatile stride_item_t *stride_src = (const volatile stride_item_t *)app_buf;
const volatile stride_item_t *hwfifo = (const volatile stride_item_t *)app_buf;
if (n <= lin_bytes) {
// Linear only case
ff_push_stride(ff_buf, stride_src, n);
tu_hwfifo_read(hwfifo, ff_buf, n);
} else {
// Wrap around case
// Write full words to linear part of buffer
uint16_t lin_nitems_bytes = lin_bytes & ~STRIDE_REMAIN_MASK;
ff_push_stride(ff_buf, stride_src, lin_nitems_bytes);
tu_hwfifo_read(hwfifo, ff_buf, lin_nitems_bytes);
ff_buf += lin_nitems_bytes;
// There could be odd 1 byte (16bit) or 1-3 bytes (32bit) before the wrap-around boundary
const uint8_t rem = lin_bytes & STRIDE_REMAIN_MASK;
if (rem > 0) {
const uint8_t remrem = (uint8_t)tu_min16(wrap_bytes, sizeof(stride_item_t) - rem);
const stride_item_t tmp = *stride_src;
const stride_item_t tmp = *hwfifo;
tu_scatter_write32(tmp, ff_buf, rem, f->buffer, remrem);
wrap_bytes -= remrem;
@ -209,7 +212,7 @@ static void ff_push_n(const tu_fifo_t *f, const void *app_buf, uint16_t n, uint1
// Write data wrapped part
if (wrap_bytes > 0) {
ff_push_stride(ff_buf, stride_src, wrap_bytes);
tu_hwfifo_read(hwfifo, ff_buf, wrap_bytes);
}
}
} else
@ -236,17 +239,17 @@ static void ff_pull_n(const tu_fifo_t *f, void *app_buf, uint16_t n, uint16_t rd
#if CFG_TUD_EDPT_DEDICATED_HWFIFO
if (stride_mode) {
volatile stride_item_t *stride_dst = (volatile stride_item_t *)app_buf;
volatile stride_item_t *hwfifo = (volatile stride_item_t *)app_buf;
if (n <= lin_bytes) {
// Linear only case
ff_pull_stride(stride_dst, ff_buf, n);
tu_hwfifo_write(hwfifo, ff_buf, n);
} else {
// Wrap around case
// Read full words from linear part
uint16_t lin_nitems_bytes = lin_bytes & ~STRIDE_REMAIN_MASK;
ff_pull_stride(stride_dst, ff_buf, lin_nitems_bytes);
tu_hwfifo_write(hwfifo, ff_buf, lin_nitems_bytes);
ff_buf += lin_nitems_bytes;
// There could be odd 1 byte (16bit) or 1-3 bytes (32bit) before the wrap-around boundary
@ -255,7 +258,7 @@ static void ff_pull_n(const tu_fifo_t *f, void *app_buf, uint16_t n, uint16_t rd
const uint8_t remrem = (uint8_t)tu_min16(wrap_bytes, sizeof(stride_item_t) - rem);
const stride_item_t scatter = (stride_item_t)tu_scatter_read32(ff_buf, rem, f->buffer, remrem);
*stride_dst = scatter;
*hwfifo = scatter;
wrap_bytes -= remrem;
ff_buf = f->buffer + remrem; // wrap around
@ -265,7 +268,7 @@ static void ff_pull_n(const tu_fifo_t *f, void *app_buf, uint16_t n, uint16_t rd
// Read data wrapped part
if (wrap_bytes > 0) {
ff_pull_stride(stride_dst, ff_buf, wrap_bytes);
tu_hwfifo_write(hwfifo, ff_buf, wrap_bytes);
}
}
} else

View File

@ -204,9 +204,6 @@ bool tu_fifo_read(tu_fifo_t *f, void *buffer);
TU_ATTR_ALWAYS_INLINE static inline uint16_t tu_fifo_read_n(tu_fifo_t *f, void *buffer, uint16_t n) {
return tu_fifo_read_n_access_mode(f, buffer, n, false);
}
TU_ATTR_ALWAYS_INLINE static inline uint16_t tu_fifo_read_to_hwfifo(tu_fifo_t *f, void *buffer, uint16_t n) {
return tu_fifo_read_n_access_mode(f, buffer, n, true);
}
// discard first n items from fifo i.e advance read pointer by n with mutex
// return number of discarded items
@ -220,10 +217,29 @@ bool tu_fifo_write(tu_fifo_t *f, const void *data);
TU_ATTR_ALWAYS_INLINE static inline uint16_t tu_fifo_write_n(tu_fifo_t *f, const void *data, uint16_t n) {
return tu_fifo_write_n_access_mode(f, data, n, false);
}
TU_ATTR_ALWAYS_INLINE static inline uint16_t tu_fifo_write_from_hwfifo(tu_fifo_t *f, const void *data, uint16_t n) {
return tu_fifo_write_n_access_mode(f, data, n, true);
//--------------------------------------------------------------------+
// Hardware FIFO API
// Special hardware FIFO/Buffer to hold USB data, usually requires certain access method these can be configured with
// CFG_TUSB_FIFO_ACCESS_DATA_STRIDE (data width) and CFG_TUSB_FIFO_ACCESS_ADDR_STRIDE (address increment)
// Note: these usually has opposiite direction (read/write) to/from our software FIFO (tu_fifo_t)
//--------------------------------------------------------------------+
#if CFG_TUD_EDPT_DEDICATED_HWFIFO
TU_ATTR_ALWAYS_INLINE static inline uint16_t tu_hwfifo_write_from_fifo(tu_fifo_t *f, void *hwfifo, uint16_t n) {
return tu_fifo_read_n_access_mode(f, hwfifo, n, true);
}
TU_ATTR_ALWAYS_INLINE static inline uint16_t tu_hwfifo_read_to_fifo(tu_fifo_t *f, const void *hwfifo, uint16_t n) {
return tu_fifo_write_n_access_mode(f, hwfifo, n, true);
}
// read from hwfifo to buffer
void tu_hwfifo_read(const volatile void *hwfifo, uint8_t *dest, uint16_t len);
// write to hwfifo from buffer
void tu_hwfifo_write(volatile void *hwfifo, const uint8_t *src, uint16_t len);
#endif
//--------------------------------------------------------------------+
// Internal Helper Local
// work on local copies of read/write indices in order to only access them once for re-entrancy

View File

@ -321,13 +321,12 @@ static void handle_ctr_rx(uint32_t ep_id) {
}
const uint16_t rx_count = btable_get_count(ep_id, buf_id);
uint16_t pma_addr = (uint16_t) btable_get_addr(ep_id, buf_id);
fsdev_pma_buf_t *pma_buf = PMA_BUF_AT(pma_addr);
if (xfer->ff) {
// fsdev_read_packet_memory_ff(xfer->ff, pma_addr, rx_count);
fsdev_pma_buf_t *pma_buf = PMA_BUF_AT(pma_addr);
tu_fifo_write_from_hwfifo(xfer->ff, (void *)pma_buf, rx_count);
tu_hwfifo_read_to_fifo(xfer->ff, (void *)pma_buf, rx_count);
} else {
fsdev_read_packet_memory(xfer->buffer + xfer->queued_len, pma_addr, rx_count);
tu_hwfifo_read(pma_buf, xfer->buffer + xfer->queued_len, rx_count);
}
xfer->queued_len += rx_count;
@ -719,14 +718,13 @@ static void dcd_transmit_packet(xfer_ctl_t *xfer, uint16_t ep_ix) {
} else {
buf_id = BTABLE_BUF_TX;
}
uint16_t addr_ptr = (uint16_t) btable_get_addr(ep_ix, buf_id);
uint16_t addr_ptr = (uint16_t)btable_get_addr(ep_ix, buf_id);
fsdev_pma_buf_t *pma_buf = PMA_BUF_AT(addr_ptr);
if (xfer->ff) {
// fsdev_write_packet_memory_ff(xfer->ff, addr_ptr, len);
fsdev_pma_buf_t *pma_buf = PMA_BUF_AT(addr_ptr);
tu_fifo_read_to_hwfifo(xfer->ff, (void *)(uintptr_t)pma_buf, len);
tu_hwfifo_write_from_fifo(xfer->ff, (void *)(uintptr_t)pma_buf, len);
} else {
fsdev_write_packet_memory(addr_ptr, &(xfer->buffer[xfer->queued_len]), len);
tu_hwfifo_write(pma_buf, &(xfer->buffer[xfer->queued_len]), len);
}
xfer->queued_len += len;

View File

@ -136,112 +136,6 @@ bool fsdev_read_packet_memory(void *__restrict dst, uint16_t src, uint16_t nbyte
return true;
}
// Write to PMA from FIFO
bool fsdev_write_packet_memory_ff(tu_fifo_t *ff, uint16_t dst, uint16_t wNBytes) {
if (wNBytes == 0) {
return true;
}
// Since we copy from a ring buffer FIFO, a wrap might occur making it necessary to conduct two copies
tu_fifo_buffer_info_t info;
tu_fifo_get_read_info(ff, &info);
uint16_t cnt_lin = tu_min16(wNBytes, info.linear.len);
uint16_t cnt_wrap = tu_min16(wNBytes - cnt_lin, info.wrapped.len);
uint16_t const cnt_total = cnt_lin + cnt_wrap;
// We want to read from the FIFO and write it into the PMA, if LIN part is ODD and has WRAPPED part,
// last lin byte will be combined with wrapped part To ensure PMA is always access aligned
uint16_t lin_even = cnt_lin & ~(FSDEV_BUS_SIZE - 1);
uint16_t lin_odd = cnt_lin & (FSDEV_BUS_SIZE - 1);
uint8_t const *src8 = (uint8_t const*) info.linear.ptr;
// write even linear part
fsdev_write_packet_memory(dst, src8, lin_even);
dst += lin_even;
src8 += lin_even;
if (lin_odd == 0) {
src8 = (uint8_t const*) info.wrapped.ptr;
} else {
// Combine last linear bytes + first wrapped bytes to form fsdev bus width data
fsdev_bus_t temp = 0;
uint16_t i;
for(i = 0; i < lin_odd; i++) {
temp |= *src8++ << (i * 8);
}
src8 = (uint8_t const*) info.wrapped.ptr;
for(; i < FSDEV_BUS_SIZE && cnt_wrap > 0; i++, cnt_wrap--) {
temp |= *src8++ << (i * 8);
}
fsdev_write_packet_memory(dst, &temp, FSDEV_BUS_SIZE);
dst += FSDEV_BUS_SIZE;
}
// write the rest of the wrapped part
fsdev_write_packet_memory(dst, src8, cnt_wrap);
tu_fifo_advance_read_pointer(ff, cnt_total);
return true;
}
// Read from PMA to FIFO
bool fsdev_read_packet_memory_ff(tu_fifo_t *ff, uint16_t src, uint16_t wNBytes) {
if (wNBytes == 0) {
return true;
}
// Since we copy into a ring buffer FIFO, a wrap might occur making it necessary to conduct two copies
// Check for first linear part
tu_fifo_buffer_info_t info;
tu_fifo_get_write_info(ff, &info); // We want to read from the FIFO
uint16_t cnt_lin = tu_min16(wNBytes, info.linear.len);
uint16_t cnt_wrap = tu_min16(wNBytes - cnt_lin, info.wrapped.len);
uint16_t cnt_total = cnt_lin + cnt_wrap;
// We want to read from the FIFO and write it into the PMA, if LIN part is ODD and has WRAPPED part,
// last lin byte will be combined with wrapped part To ensure PMA is always access aligned
uint16_t lin_even = cnt_lin & ~(FSDEV_BUS_SIZE - 1);
uint16_t lin_odd = cnt_lin & (FSDEV_BUS_SIZE - 1);
uint8_t *dst8 = (uint8_t *) info.linear.ptr;
// read even linear part
fsdev_read_packet_memory(dst8, src, lin_even);
dst8 += lin_even;
src += lin_even;
if (lin_odd == 0) {
dst8 = (uint8_t *) info.wrapped.ptr;
} else {
// Combine last linear bytes + first wrapped bytes to form fsdev bus width data
fsdev_bus_t temp;
fsdev_read_packet_memory(&temp, src, FSDEV_BUS_SIZE);
src += FSDEV_BUS_SIZE;
uint16_t i;
for (i = 0; i < lin_odd; i++) {
*dst8++ = (uint8_t) (temp & 0xfful);
temp >>= 8;
}
dst8 = (uint8_t *) info.wrapped.ptr;
for (; i < FSDEV_BUS_SIZE && cnt_wrap > 0; i++, cnt_wrap--) {
*dst8++ = (uint8_t) (temp & 0xfful);
temp >>= 8;
}
}
// read the rest of the wrapped part
fsdev_read_packet_memory(dst8, src, cnt_wrap);
tu_fifo_advance_write_pointer(ff, cnt_total);
return true;
}
//--------------------------------------------------------------------+
// BTable Helper
//--------------------------------------------------------------------+

View File

@ -337,12 +337,6 @@ bool fsdev_write_packet_memory(uint16_t dst, const void *__restrict src, uint16_
// - Uses unaligned for RAM (since M0 cannot access unaligned address)
bool fsdev_read_packet_memory(void *__restrict dst, uint16_t src, uint16_t nbytes);
// Write to PMA from FIFO
bool fsdev_write_packet_memory_ff(tu_fifo_t *ff, uint16_t dst, uint16_t wNBytes);
// Read from PMA to FIFO
bool fsdev_read_packet_memory_ff(tu_fifo_t *ff, uint16_t src, uint16_t wNBytes);
#ifdef __cplusplus
}
#endif

View File

@ -367,7 +367,7 @@ static uint16_t epin_write_tx_fifo(dwc2_regs_t *dwc2, uint8_t epnum) {
// Push packet to Tx-FIFO
if (xfer->ff) {
volatile uint32_t* tx_fifo = dwc2->fifo[epnum];
tu_fifo_read_to_hwfifo(xfer->ff, (void *)(uintptr_t)tx_fifo, xact_bytes);
tu_hwfifo_write_from_fifo(xfer->ff, (void *)(uintptr_t)tx_fifo, xact_bytes);
total_bytes_written += xact_bytes;
} else {
dfifo_write_packet(dwc2, epnum, xfer->buffer, xact_bytes);
@ -889,7 +889,7 @@ static void handle_rxflvl_irq(uint8_t rhport) {
if (byte_count != 0) {
// Read packet off RxFIFO
if (xfer->ff != NULL) {
tu_fifo_write_from_hwfifo(xfer->ff, (const void *)(uintptr_t)rx_fifo, byte_count);
tu_hwfifo_read_to_fifo(xfer->ff, (const void *)(uintptr_t)rx_fifo, byte_count);
} else {
dfifo_read_packet(dwc2, xfer->buffer, byte_count);
xfer->buffer += byte_count;