/* * USB mass storage class driver that mimics littlefs to FAT12 file system. * * Copyright 2024, Hiroyuki OYAMA. All rights reserved. * SPDX-License-Identifier: BSD-3-Clause * * Redistribution and use in source and binary forms, with or without modification, * are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, this * list of conditions and the following disclaimer in the documentation and/or * other materials provided with the distribution. * - Neither the name of the copyright holder nor the names of its contributors may * be used to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "unicode.h" size_t strlen_utf8(const char *src) { size_t count = 0; size_t i = 0; size_t src_size = strlen(src); while (i < src_size) { uint8_t byte = src[i]; if ((byte & 0x80) == 0) { // 1-byte UTF-8 count++; } else if ((byte & 0xE0) == 0xC0) { // 2-byte UTF-8 count++; i++; // Skip the continuation byte } else if ((byte & 0xF0) == 0xE0) { // 3-byte UTF-8 count++; i += 2; // Skip the continuation bytes } else if ((byte & 0xF8) == 0xF0) { // 4-byte UTF-8 count++; i += 3; // Skip the continuation bytes } else { return -1; // Invalid UTF-8 byte } i++; } return count; } size_t ascii_to_utf16le(uint16_t *dist, size_t dist_size, const char *src, size_t src_size) { size_t utf16le_pos = 0; for (size_t i = 0; i < src_size && src[i] != '\0'; ++i) { uint32_t codepoint = (uint32_t)src[i]; if (utf16le_pos + 1 <= dist_size) { dist[utf16le_pos++] = (uint16_t)codepoint; } else { break; } } if (utf16le_pos < dist_size) { dist[utf16le_pos] = '\0'; } return utf16le_pos; } // Convert UTF-8 to UTF-16LE and return the length of the converted string size_t utf8_to_utf16le(uint16_t* dist, size_t dist_size, const char *src, size_t src_size) { size_t dist_pos = 0; size_t src_pos = 0; while (src_pos < src_size && dist_pos < dist_size) { uint32_t codepoint = 0; size_t extra_bytes = 0; uint8_t byte = src[src_pos]; // Determine the number of bytes for the UTF-8 codepoint if ((byte & 0x80) == 0) { // 1-byte UTF-8 codepoint = byte; } else if ((byte & 0xE0) == 0xC0) { // 2-byte UTF-8 codepoint = byte & 0x1F; extra_bytes = 1; } else if ((byte & 0xF0) == 0xE0) { // 3-byte UTF-8 codepoint = byte & 0x0F; extra_bytes = 2; } else if ((byte & 0xF8) == 0xF0) { // 4-byte UTF-8 codepoint = byte & 0x07; extra_bytes = 3; } else { // Invalid UTF-8 byte return -1; // Return -1 to indicate an error } // Calculate the complete codepoint for (size_t j = 0; j < extra_bytes; ++j) { src_pos++; if (src_pos >= src_size) { return -1; // Incomplete UTF-8 sequence } byte = src[src_pos]; if ((byte & 0xC0) != 0x80) { return -1; // Invalid UTF-8 continuation byte } codepoint = (codepoint << 6) | (byte & 0x3F); } // Convert to UTF-16LE if (codepoint <= 0xFFFF) { // Basic Multilingual Plane if (dist_pos < dist_size) { dist[dist_pos++] = (uint16_t)codepoint; } } else { // Supplementary Planes (surrogates) codepoint -= 0x10000; if (dist_pos + 1 < dist_size) { dist[dist_pos++] = 0xD800 | ((codepoint >> 10) & 0x3FF); dist[dist_pos++] = 0xDC00 | (codepoint & 0x3FF); dist_pos += 2; } else { return -1; // Not enough space for surrogates } } src_pos++; } if (dist_pos < dist_size) { dist[dist_pos] = 0; // Null-terminate } return dist_pos; } size_t utf16le_to_utf8(char *dist, size_t buffer_size, const uint16_t *src, size_t len) { size_t dist_len = 0; for (size_t i = 0; i < len; ++i) { uint32_t codepoint = src[i]; if (codepoint == 0xFFFF) { break; } if (codepoint <= 0x7F) { if (dist_len + 1 <= buffer_size) { dist[dist_len++] = (uint8_t)codepoint; } else { break; } } else if (codepoint <= 0x7FF) { if (dist_len + 2 <= buffer_size) { dist[dist_len++] = (uint8_t)(0xC0 | (codepoint >> 6)); dist[dist_len++] = (uint8_t)(0x80 | (codepoint & 0x3F)); } else { break; } } else if (codepoint <= 0xFFFF) { if (dist_len + 3 <= buffer_size) { dist[dist_len++] = (uint8_t)(0xE0 | (codepoint >> 12)); dist[dist_len++] = (uint8_t)(0x80 | ((codepoint >> 6) & 0x3F)); dist[dist_len++] = (uint8_t)(0x80 | (codepoint & 0x3F)); } else { break; } } else { break; } } if (dist_len < buffer_size) { dist[dist_len] = '\0'; } return dist_len; }