Skip to content

关于Tutorial04中将Unicode字符转换为UTF-8存储的lept_encode_utf8的写法 #237

@Meha555

Description

@Meha555

作者在课程文档中提到的了对于多字节存储的情况,可以直接使用 lept_encode_utf8 来压入缓冲区。以下是我的写法,可以通过测试。想请教各位这样的写法该怎么改进,使其接近于作者的代码风格?

#define PUTC_S(c, ch, size)                                         \
    do {                                                            \
        int i = 0;                                                  \
        while (i < size) {                                            \
            *(char*)lept_context_push(c, sizeof(char)) = (ch[i++]); \
        }                                                           \
    } while (0)

static void lept_encode_utf8(lept_context* c, unsigned u) {
    if (u <= 0x7F) {
        PUTC(c, u & 0xFF);
    } else if (u <= 0x07FF) {
        char* ch = (char*)malloc(2 * sizeof(char));
        ch[0] = 0xC0 | ((u >> 6) & 0xFF);
        ch[1] = 0x80 | (u & 0x3F);
        PUTC_S(c, ch, 2);
        free(ch);
        // PUTC(c, 0xC0 | ((u >> 6) & 0xFF));  // 110xxxxx
        // PUTC(c, 0x80 | (u & 0x3F));         // 10xxxxxx
    } else if (u <= 0xFFFF) {
        char* ch = (char*)malloc(3 * sizeof(char));
        ch[0] = 0xE0 | ((u >> 12) & 0xFF);
        ch[1] = 0x80 | ((u >> 6) & 0x3F);
        ch[2] = 0x80 | (u & 0x3F);
        PUTC_S(c, ch, 3);
        free(ch);
        // PUTC(c, 0xE0 | ((u >> 12) & 0xFF));  // 1110xxxx
        // PUTC(c, 0x80 | ((u >> 6) & 0x3F));   // 10xxxxxx
        // PUTC(c, 0x80 | (u & 0x3F));          // 10xxxxxx
    } else {
        assert(u <= 0x10FFFF);
        char* ch = (char*)malloc(4 * sizeof(char));
        ch[0] = 0xF0 | ((u >> 18) & 0xFF);
        ch[1] = 0x80 | ((u >> 12) & 0x3F);
        ch[2] = 0x80 | ((u >> 6) & 0x3F);
        ch[3] = 0x80 | (u & 0x3F);
        PUTC_S(c, ch, 4);
        free(ch);
        // PUTC(c, 0xF0 | ((u >> 18) & 0xFF));  // 11110xxx
        // PUTC(c, 0x80 | ((u >> 12) & 0x3F));  // 10xxxxxx
        // PUTC(c, 0x80 | ((u >> 6) & 0x3F));   // 10xxxxxx
        // PUTC(c, 0x80 | (u & 0x3F));          // 10xxxxxx
    }
}

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions