X-Git-Url: https://pd.if.org/git/?p=pdclib;a=blobdiff_plain;f=opt%2Fbasecodecs%2F_PDCLIB_utf8.c;h=6348c79ed852a251cd714ebb05ea1db6077207ce;hp=a88f964077457c04f4d3cdba420b86747b82bc2e;hb=276f7e69f80ac53bfb5da5cc24072dd393485106;hpb=03c5509c96451f0ca62df447ffefce99ac823e89 diff --git a/opt/basecodecs/_PDCLIB_utf8.c b/opt/basecodecs/_PDCLIB_utf8.c index a88f964..6348c79 100644 --- a/opt/basecodecs/_PDCLIB_utf8.c +++ b/opt/basecodecs/_PDCLIB_utf8.c @@ -9,6 +9,7 @@ #include #include #include +#include <_PDCLIB_encoding.h> /* Use of the mbstate: * @@ -16,6 +17,9 @@ * _St32[1] is the character accumulated so far */ +static bool utf8_mbsinit( const mbstate_t *p_s ) +{ return p_s->_StUC[0] == 0; } + enum { DecStart = 0, @@ -44,21 +48,23 @@ end_conversion: \ goto end_conversion; \ } while(0) -#define OUT32(_c) do { \ - (*((*p_outbuf)++)) = (_c); \ - (*p_outsz)--; \ - _PDCLIB_UNDEFINED(accum); \ - state = DecStart; \ +#define OUT32(_c) do { \ + if(p_outbuf) \ + (*((*p_outbuf)++)) = (_c); \ + (*p_outsz)--; \ + _PDCLIB_UNDEFINED(accum); \ + state = DecStart; \ } while(0) + #define CHECK_CONTINUATION \ do { if((c & 0xC0) != 0x80) return false; } while(0) static bool utf8toc32( - char32_t **restrict p_outbuf, - size_t *restrict p_outsz, - const char **restrict p_inbuf, - size_t *restrict p_insz, - mbstate_t *restrict p_s + char32_t *restrict *restrict p_outbuf, + size_t *restrict p_outsz, + const char *restrict *restrict p_inbuf, + size_t *restrict p_insz, + mbstate_t *restrict p_s ) { START_CONVERSION @@ -67,7 +73,7 @@ static bool utf8toc32( char32_t c32; switch(state) { case DecStart: - // 1 byte + // 1 byte if(c <= 0x7F) { OUT32(c); } else if(c <= 0xDF) { @@ -150,7 +156,7 @@ static bool utf8toc32( } (*p_inbuf)++; - (*p_insz)--; + (*p_insz)--; } END_CONVERSION; } @@ -163,29 +169,29 @@ enum { }; static bool c32toutf8( - char **restrict p_outbuf, - size_t *restrict p_outsz, - const char32_t **restrict p_inbuf, - size_t *restrict p_insz, - mbstate_t *restrict p_s + char *restrict *restrict p_outbuf, + size_t *restrict p_outsz, + const char32_t *restrict *restrict p_inbuf, + size_t *restrict p_insz, + mbstate_t *restrict p_s ) { START_CONVERSION while(*p_outsz) { - char *c8 = *p_outbuf; + unsigned char outc = 0; switch(state) { case Enc3R: - *c8 = 0x80 | ((accum >> 12) & 0x3F); + outc = 0x80 | ((accum >> 12) & 0x3F); state = Enc2R; break; case Enc2R: - *c8 = 0x80 | ((accum >> 6) & 0x3F); + outc = 0x80 | ((accum >> 6) & 0x3F); state = Enc1R; break; case Enc1R: - *c8 = 0x80 | (accum & 0x3F); + outc = 0x80 | (accum & 0x3F); state = EncStart; _PDCLIB_UNDEFINED(accum); break; @@ -199,17 +205,17 @@ static bool c32toutf8( (*p_insz)--; if(accum <= 0x7F) { - *c8 = accum; + outc = accum; state = EncStart; _PDCLIB_UNDEFINED(accum); } else if(accum <= 0x7FF) { - *c8 = 0xC0 | (accum >> 6); + outc = 0xC0 | (accum >> 6); state = Enc1R; } else if(accum <= 0xFFFF) { - *c8 = 0xE0 | (accum >> 12); + outc = 0xE0 | (accum >> 12); state = Enc2R; } else if(accum <= 0x10FFFF) { - *c8 = 0xF0 | (accum >> 18); + outc = 0xF0 | (accum >> 18); state = Enc3R; } else { FINISH(false); @@ -217,12 +223,22 @@ static bool c32toutf8( break; } - - (*p_outbuf)++; - (*p_outsz)--; + if(p_outbuf) { + **p_outbuf = outc; + (*p_outbuf)++; + } + (*p_outsz)--; } END_CONVERSION; } + +const struct _PDCLIB_charcodec_t _PDCLIB_utf8_codec = { + .__mbsinit = utf8_mbsinit, + .__mbstoc32s = utf8toc32, + .__c32stombs = c32toutf8, + .__mb_max = 4, +}; + #endif #ifdef TEST @@ -233,16 +249,16 @@ int main( void ) #ifndef REGTEST // Valid conversion & back - static const char* input = "abcde" "\xDF\xBF" "\xEF\xBF\xBF" + static const char* input = "abcde" "\xDF\xBF" "\xEF\xBF\xBF" "\xF4\x8F\xBF\xBF"; char32_t c32out[8]; - char32_t *c32ptr = &c32out[0]; - size_t c32rem = 8; - char *chrptr = (char*) &input[0]; - size_t chrrem = strlen(input); - mbstate_t mbs = { 0 }; + char32_t *c32ptr = &c32out[0]; + size_t c32rem = 8; + const char *chrptr = (char*) &input[0]; + size_t chrrem = strlen(input); + mbstate_t mbs = { 0 }; TESTCASE(utf8toc32(&c32ptr, &c32rem, &chrptr, &chrrem, &mbs)); TESTCASE(c32rem == 0);