#include <stdint.h>
#include <uchar.h>
#include <assert.h>
+#include <_PDCLIB_encoding.h>
/* Use of the mbstate:
*
* _St32[1] is the character accumulated so far
*/
+static bool utf8_mbsinit( const mbstate_t *p_s )
+{ return p_s->_StUC[0] == 0; }
+
enum {
DecStart = 0,
goto end_conversion; \
} while(0)
-#define OUT32(_c) do { \
- (*((*p_outbuf)++)) = (_c); \
- (*p_outsz)--; \
- _PDCLIB_UNDEFINED(accum); \
- state = DecStart; \
+#define OUT32(_c) do { \
+ if(p_outbuf) \
+ (*((*p_outbuf)++)) = (_c); \
+ (*p_outsz)--; \
+ _PDCLIB_UNDEFINED(accum); \
+ state = DecStart; \
} while(0)
+
#define CHECK_CONTINUATION \
do { if((c & 0xC0) != 0x80) return false; } while(0)
static bool utf8toc32(
- char32_t **restrict p_outbuf,
- size_t *restrict p_outsz,
- const char **restrict p_inbuf,
- size_t *restrict p_insz,
- mbstate_t *restrict p_s
+ char32_t *restrict *restrict p_outbuf,
+ size_t *restrict p_outsz,
+ const char *restrict *restrict p_inbuf,
+ size_t *restrict p_insz,
+ mbstate_t *restrict p_s
)
{
START_CONVERSION
char32_t c32;
switch(state) {
case DecStart:
- // 1 byte
+ // 1 byte
if(c <= 0x7F) {
OUT32(c);
} else if(c <= 0xDF) {
}
(*p_inbuf)++;
- (*p_insz)--;
+ (*p_insz)--;
}
END_CONVERSION;
}
};
static bool c32toutf8(
- char **restrict p_outbuf,
- size_t *restrict p_outsz,
- const char32_t **restrict p_inbuf,
- size_t *restrict p_insz,
- mbstate_t *restrict p_s
+ char *restrict *restrict p_outbuf,
+ size_t *restrict p_outsz,
+ const char32_t *restrict *restrict p_inbuf,
+ size_t *restrict p_insz,
+ mbstate_t *restrict p_s
)
{
START_CONVERSION
while(*p_outsz) {
- char *c8 = *p_outbuf;
+ unsigned char outc = 0;
switch(state) {
case Enc3R:
- *c8 = 0x80 | ((accum >> 12) & 0x3F);
+ outc = 0x80 | ((accum >> 12) & 0x3F);
state = Enc2R;
break;
case Enc2R:
- *c8 = 0x80 | ((accum >> 6) & 0x3F);
+ outc = 0x80 | ((accum >> 6) & 0x3F);
state = Enc1R;
break;
case Enc1R:
- *c8 = 0x80 | (accum & 0x3F);
+ outc = 0x80 | (accum & 0x3F);
state = EncStart;
_PDCLIB_UNDEFINED(accum);
break;
(*p_insz)--;
if(accum <= 0x7F) {
- *c8 = accum;
+ outc = accum;
state = EncStart;
_PDCLIB_UNDEFINED(accum);
} else if(accum <= 0x7FF) {
- *c8 = 0xC0 | (accum >> 6);
+ outc = 0xC0 | (accum >> 6);
state = Enc1R;
} else if(accum <= 0xFFFF) {
- *c8 = 0xE0 | (accum >> 12);
+ outc = 0xE0 | (accum >> 12);
state = Enc2R;
} else if(accum <= 0x10FFFF) {
- *c8 = 0xF0 | (accum >> 18);
+ outc = 0xF0 | (accum >> 18);
state = Enc3R;
} else {
FINISH(false);
break;
}
-
- (*p_outbuf)++;
- (*p_outsz)--;
+ if(p_outbuf) {
+ **p_outbuf = outc;
+ (*p_outbuf)++;
+ }
+ (*p_outsz)--;
}
END_CONVERSION;
}
+
+const struct _PDCLIB_charcodec _PDCLIB_utf8_codec = {
+ .__mbsinit = utf8_mbsinit,
+ .__mbstoc32s = utf8toc32,
+ .__c32stombs = c32toutf8,
+ .__mb_max = 4,
+};
+
#endif
#ifdef TEST
#ifndef REGTEST
// Valid conversion & back
- static const char* input = "abcde" "\xDF\xBF" "\xEF\xBF\xBF"
+ static const char* input = "abcde" "\xDF\xBF" "\xEF\xBF\xBF"
"\xF4\x8F\xBF\xBF";
char32_t c32out[8];
- char32_t *c32ptr = &c32out[0];
- size_t c32rem = 8;
- char *chrptr = (char*) &input[0];
- size_t chrrem = strlen(input);
- mbstate_t mbs = { 0 };
+ char32_t *c32ptr = &c32out[0];
+ size_t c32rem = 8;
+ const char *chrptr = (char*) &input[0];
+ size_t chrrem = strlen(input);
+ mbstate_t mbs = { 0 };
TESTCASE(utf8toc32(&c32ptr, &c32rem, &chrptr, &chrrem, &mbs));
TESTCASE(c32rem == 0);