mirror of
https://github.com/joeycastillo/second-movement.git
synced 2026-02-12 04:05:57 +00:00
190 lines
6.3 KiB
C
190 lines
6.3 KiB
C
/*
|
|
* USB mass storage class driver that mimics littlefs to FAT12 file system.
|
|
*
|
|
* Copyright 2024, Hiroyuki OYAMA. All rights reserved.
|
|
* SPDX-License-Identifier: BSD-3-Clause
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without modification,
|
|
* are permitted provided that the following conditions are met:
|
|
*
|
|
* - Redistributions of source code must retain the above copyright notice, this
|
|
* list of conditions and the following disclaimer.
|
|
* - Redistributions in binary form must reproduce the above copyright notice, this
|
|
* list of conditions and the following disclaimer in the documentation and/or
|
|
* other materials provided with the distribution.
|
|
* - Neither the name of the copyright holder nor the names of its contributors may
|
|
* be used to endorse or promote products derived from this software without
|
|
* specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” AND
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
|
|
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
|
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
|
* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
#include "unicode.h"
|
|
|
|
|
|
size_t strlen_utf8(const char *src) {
|
|
size_t count = 0;
|
|
size_t i = 0;
|
|
size_t src_size = strlen(src);
|
|
|
|
while (i < src_size) {
|
|
uint8_t byte = src[i];
|
|
|
|
if ((byte & 0x80) == 0) { // 1-byte UTF-8
|
|
count++;
|
|
} else if ((byte & 0xE0) == 0xC0) { // 2-byte UTF-8
|
|
count++;
|
|
i++; // Skip the continuation byte
|
|
} else if ((byte & 0xF0) == 0xE0) { // 3-byte UTF-8
|
|
count++;
|
|
i += 2; // Skip the continuation bytes
|
|
} else if ((byte & 0xF8) == 0xF0) { // 4-byte UTF-8
|
|
count++;
|
|
i += 3; // Skip the continuation bytes
|
|
} else {
|
|
return -1; // Invalid UTF-8 byte
|
|
}
|
|
|
|
i++;
|
|
}
|
|
return count;
|
|
}
|
|
|
|
size_t ascii_to_utf16le(uint16_t *dist, size_t dist_size, const char *src, size_t src_size) {
|
|
size_t utf16le_pos = 0;
|
|
|
|
for (size_t i = 0; i < src_size && src[i] != '\0'; ++i) {
|
|
uint32_t codepoint = (uint32_t)src[i];
|
|
|
|
if (utf16le_pos + 1 <= dist_size) {
|
|
dist[utf16le_pos++] = (uint16_t)codepoint;
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (utf16le_pos < dist_size) {
|
|
dist[utf16le_pos] = '\0';
|
|
}
|
|
return utf16le_pos;
|
|
}
|
|
|
|
// Convert UTF-8 to UTF-16LE and return the length of the converted string
|
|
size_t utf8_to_utf16le(uint16_t* dist, size_t dist_size, const char *src, size_t src_size) {
|
|
size_t dist_pos = 0;
|
|
size_t src_pos = 0;
|
|
|
|
while (src_pos < src_size && dist_pos < dist_size) {
|
|
uint32_t codepoint = 0;
|
|
size_t extra_bytes = 0;
|
|
|
|
uint8_t byte = src[src_pos];
|
|
|
|
// Determine the number of bytes for the UTF-8 codepoint
|
|
if ((byte & 0x80) == 0) { // 1-byte UTF-8
|
|
codepoint = byte;
|
|
} else if ((byte & 0xE0) == 0xC0) { // 2-byte UTF-8
|
|
codepoint = byte & 0x1F;
|
|
extra_bytes = 1;
|
|
} else if ((byte & 0xF0) == 0xE0) { // 3-byte UTF-8
|
|
codepoint = byte & 0x0F;
|
|
extra_bytes = 2;
|
|
} else if ((byte & 0xF8) == 0xF0) { // 4-byte UTF-8
|
|
codepoint = byte & 0x07;
|
|
extra_bytes = 3;
|
|
} else {
|
|
// Invalid UTF-8 byte
|
|
return -1; // Return -1 to indicate an error
|
|
}
|
|
|
|
// Calculate the complete codepoint
|
|
for (size_t j = 0; j < extra_bytes; ++j) {
|
|
src_pos++;
|
|
if (src_pos >= src_size) {
|
|
return -1; // Incomplete UTF-8 sequence
|
|
}
|
|
|
|
byte = src[src_pos];
|
|
if ((byte & 0xC0) != 0x80) {
|
|
return -1; // Invalid UTF-8 continuation byte
|
|
}
|
|
|
|
codepoint = (codepoint << 6) | (byte & 0x3F);
|
|
}
|
|
|
|
// Convert to UTF-16LE
|
|
if (codepoint <= 0xFFFF) { // Basic Multilingual Plane
|
|
if (dist_pos < dist_size) {
|
|
dist[dist_pos++] = (uint16_t)codepoint;
|
|
}
|
|
} else { // Supplementary Planes (surrogates)
|
|
codepoint -= 0x10000;
|
|
if (dist_pos + 1 < dist_size) {
|
|
dist[dist_pos++] = 0xD800 | ((codepoint >> 10) & 0x3FF);
|
|
dist[dist_pos++] = 0xDC00 | (codepoint & 0x3FF);
|
|
dist_pos += 2;
|
|
} else {
|
|
return -1; // Not enough space for surrogates
|
|
}
|
|
}
|
|
|
|
src_pos++;
|
|
}
|
|
|
|
if (dist_pos < dist_size) {
|
|
dist[dist_pos] = 0; // Null-terminate
|
|
}
|
|
|
|
return dist_pos;
|
|
}
|
|
|
|
size_t utf16le_to_utf8(char *dist, size_t buffer_size, const uint16_t *src, size_t len) {
|
|
size_t dist_len = 0;
|
|
|
|
for (size_t i = 0; i < len; ++i) {
|
|
uint32_t codepoint = src[i];
|
|
if (codepoint == 0xFFFF) {
|
|
break;
|
|
}
|
|
|
|
if (codepoint <= 0x7F) {
|
|
if (dist_len + 1 <= buffer_size) {
|
|
dist[dist_len++] = (uint8_t)codepoint;
|
|
} else {
|
|
break;
|
|
}
|
|
} else if (codepoint <= 0x7FF) {
|
|
if (dist_len + 2 <= buffer_size) {
|
|
dist[dist_len++] = (uint8_t)(0xC0 | (codepoint >> 6));
|
|
dist[dist_len++] = (uint8_t)(0x80 | (codepoint & 0x3F));
|
|
} else {
|
|
break;
|
|
}
|
|
} else if (codepoint <= 0xFFFF) {
|
|
if (dist_len + 3 <= buffer_size) {
|
|
dist[dist_len++] = (uint8_t)(0xE0 | (codepoint >> 12));
|
|
dist[dist_len++] = (uint8_t)(0x80 | ((codepoint >> 6) & 0x3F));
|
|
dist[dist_len++] = (uint8_t)(0x80 | (codepoint & 0x3F));
|
|
} else {
|
|
break;
|
|
}
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (dist_len < buffer_size) {
|
|
dist[dist_len] = '\0';
|
|
}
|
|
return dist_len;
|
|
}
|