X-Git-Url: https://pd.if.org/git/?a=blobdiff_plain;f=opt%2Fbasecodecs%2F_PDCLIB_utf8.c;h=22fa808c963fa33b4a1bfbd71565bfc141c5bc84;hb=4b1f856ea2b21f30b6af8d4cca7129ebc84d3d6f;hp=a88f964077457c04f4d3cdba420b86747b82bc2e;hpb=03c5509c96451f0ca62df447ffefce99ac823e89;p=pdclib diff --git a/opt/basecodecs/_PDCLIB_utf8.c b/opt/basecodecs/_PDCLIB_utf8.c index a88f964..22fa808 100644 --- a/opt/basecodecs/_PDCLIB_utf8.c +++ b/opt/basecodecs/_PDCLIB_utf8.c @@ -9,6 +9,7 @@ #include #include #include +#include <_PDCLIB_encoding.h> /* Use of the mbstate: * @@ -16,6 +17,9 @@ * _St32[1] is the character accumulated so far */ +static bool utf8_mbsinit( const mbstate_t *p_s ) +{ return p_s->_StUC[0] == 0; } + enum { DecStart = 0, @@ -44,21 +48,23 @@ end_conversion: \ goto end_conversion; \ } while(0) -#define OUT32(_c) do { \ - (*((*p_outbuf)++)) = (_c); \ - (*p_outsz)--; \ - _PDCLIB_UNDEFINED(accum); \ - state = DecStart; \ +#define OUT32(_c) do { \ + if(p_outbuf) \ + (*((*p_outbuf)++)) = (_c); \ + (*p_outsz)--; \ + _PDCLIB_UNDEFINED(accum); \ + state = DecStart; \ } while(0) + #define CHECK_CONTINUATION \ do { if((c & 0xC0) != 0x80) return false; } while(0) static bool utf8toc32( - char32_t **restrict p_outbuf, - size_t *restrict p_outsz, - const char **restrict p_inbuf, - size_t *restrict p_insz, - mbstate_t *restrict p_s + char32_t *restrict *restrict p_outbuf, + size_t *restrict p_outsz, + const char *restrict *restrict p_inbuf, + size_t *restrict p_insz, + mbstate_t *restrict p_s ) { START_CONVERSION @@ -163,29 +169,29 @@ enum { }; static bool c32toutf8( - char **restrict p_outbuf, - size_t *restrict p_outsz, - const char32_t **restrict p_inbuf, - size_t *restrict p_insz, - mbstate_t *restrict p_s + char *restrict *restrict p_outbuf, + size_t *restrict p_outsz, + const char32_t *restrict *restrict p_inbuf, + size_t *restrict p_insz, + mbstate_t *restrict p_s ) { START_CONVERSION while(*p_outsz) { - char *c8 = *p_outbuf; + unsigned char outc; switch(state) { case Enc3R: - *c8 = 0x80 | ((accum >> 12) & 0x3F); + outc = 0x80 | ((accum >> 12) & 0x3F); state = Enc2R; break; case Enc2R: - *c8 = 0x80 | ((accum >> 6) & 0x3F); + outc = 0x80 | ((accum >> 6) & 0x3F); state = Enc1R; break; case Enc1R: - *c8 = 0x80 | (accum & 0x3F); + outc = 0x80 | (accum & 0x3F); state = EncStart; _PDCLIB_UNDEFINED(accum); break; @@ -199,17 +205,17 @@ static bool c32toutf8( (*p_insz)--; if(accum <= 0x7F) { - *c8 = accum; + outc = accum; state = EncStart; _PDCLIB_UNDEFINED(accum); } else if(accum <= 0x7FF) { - *c8 = 0xC0 | (accum >> 6); + outc = 0xC0 | (accum >> 6); state = Enc1R; } else if(accum <= 0xFFFF) { - *c8 = 0xE0 | (accum >> 12); + outc = 0xE0 | (accum >> 12); state = Enc2R; } else if(accum <= 0x10FFFF) { - *c8 = 0xF0 | (accum >> 18); + outc = 0xF0 | (accum >> 18); state = Enc3R; } else { FINISH(false); @@ -217,12 +223,22 @@ static bool c32toutf8( break; } - - (*p_outbuf)++; + if(p_outbuf) { + **p_outbuf = outc; + (*p_outbuf)++; + } (*p_outsz)--; } END_CONVERSION; } + +const struct _PDCLIB_charcodec _PDCLIB_utf8_codec = { + .__mbsinit = utf8_mbsinit, + .__mbstoc32s = utf8toc32, + .__c32stombs = c32toutf8, + .__mb_max = 4, +}; + #endif #ifdef TEST