]> pd.if.org Git - pdclib/blobdiff - opt/basecodecs/_PDCLIB_utf8.c
PDCLib includes with quotes, not <>.
[pdclib] / opt / basecodecs / _PDCLIB_utf8.c
index a88f964077457c04f4d3cdba420b86747b82bc2e..a4c19adb349a735c6715af6c66641eab61de66b3 100644 (file)
@@ -9,6 +9,7 @@
 #include <stdint.h>
 #include <uchar.h>
 #include <assert.h>
+#include "_PDCLIB_encoding.h"
 
 /* Use of the mbstate:
  *
@@ -16,6 +17,9 @@
  * _St32[1] is the character accumulated so far
  */
 
+static bool utf8_mbsinit( const mbstate_t *p_s )
+{ return p_s->_StUC[0] == 0; }
+
 enum {
     DecStart = 0,
 
@@ -44,21 +48,23 @@ end_conversion:             \
     goto end_conversion;    \
 } while(0)
 
-#define OUT32(_c)  do {         \
-    (*((*p_outbuf)++)) = (_c);  \
-    (*p_outsz)--;               \
-    _PDCLIB_UNDEFINED(accum);   \
-    state = DecStart;           \
+#define OUT32(_c)  do {             \
+    if(p_outbuf)                    \
+        (*((*p_outbuf)++)) = (_c);  \
+    (*p_outsz)--;                   \
+    _PDCLIB_UNDEFINED(accum);       \
+    state = DecStart;               \
 } while(0)
+
 #define CHECK_CONTINUATION \
     do { if((c & 0xC0) != 0x80) return false; } while(0)
 
 static bool utf8toc32(
-    char32_t       **restrict   p_outbuf,
-    size_t          *restrict   p_outsz,
-    const char     **restrict   p_inbuf,
-    size_t          *restrict   p_insz,
-    mbstate_t       *restrict   p_s
+    char32_t       *restrict *restrict   p_outbuf,
+    size_t                   *restrict   p_outsz,
+    const char     *restrict *restrict   p_inbuf,
+    size_t                   *restrict   p_insz,
+    mbstate_t                *restrict   p_s
 )
 {
     START_CONVERSION
@@ -67,7 +73,7 @@ static bool utf8toc32(
         char32_t      c32;
         switch(state) {
         case DecStart:
-            // 1 byte 
+            // 1 byte
             if(c <= 0x7F) {
                 OUT32(c);
             } else if(c <= 0xDF) {
@@ -150,7 +156,7 @@ static bool utf8toc32(
         }
 
         (*p_inbuf)++;
-        (*p_insz)--; 
+        (*p_insz)--;
     }
     END_CONVERSION;
 }
@@ -163,29 +169,29 @@ enum {
 };
 
 static bool c32toutf8(
-    char           **restrict  p_outbuf,
-    size_t          *restrict  p_outsz,
-    const char32_t **restrict  p_inbuf,
-    size_t          *restrict  p_insz,
-    mbstate_t       *restrict  p_s
+    char           *restrict *restrict  p_outbuf,
+    size_t                   *restrict  p_outsz,
+    const char32_t *restrict *restrict  p_inbuf,
+    size_t                   *restrict  p_insz,
+    mbstate_t                *restrict  p_s
 )
 {
     START_CONVERSION
     while(*p_outsz) {
-        char     *c8 =  *p_outbuf;
+        unsigned char outc = 0;
         switch(state) {
         case Enc3R:
-            *c8 = 0x80 | ((accum >> 12) & 0x3F);
+            outc = 0x80 | ((accum >> 12) & 0x3F);
             state = Enc2R;
             break;
 
         case Enc2R:
-            *c8 = 0x80 | ((accum >> 6) & 0x3F);
+            outc = 0x80 | ((accum >> 6) & 0x3F);
             state = Enc1R;
             break;
 
         case Enc1R:
-            *c8 = 0x80 | (accum & 0x3F);
+            outc = 0x80 | (accum & 0x3F);
             state = EncStart;
             _PDCLIB_UNDEFINED(accum);
             break;
@@ -199,17 +205,17 @@ static bool c32toutf8(
             (*p_insz)--;
 
             if(accum <= 0x7F) {
-                *c8 = accum;
+                outc = accum;
                 state = EncStart;
                 _PDCLIB_UNDEFINED(accum);
             } else if(accum <= 0x7FF) {
-                *c8 = 0xC0 | (accum >> 6);
+                outc = 0xC0 | (accum >> 6);
                 state = Enc1R;
             } else if(accum <= 0xFFFF) {
-                *c8 = 0xE0 | (accum >> 12);
+                outc = 0xE0 | (accum >> 12);
                 state = Enc2R;
             } else if(accum <= 0x10FFFF) {
-                *c8 = 0xF0 | (accum >> 18);
+                outc = 0xF0 | (accum >> 18);
                 state = Enc3R;
             } else {
                 FINISH(false);
@@ -217,32 +223,42 @@ static bool c32toutf8(
             break;
         }
 
-
-        (*p_outbuf)++; 
-        (*p_outsz)--;        
+        if(p_outbuf) {
+            **p_outbuf = outc;
+            (*p_outbuf)++;
+        }
+        (*p_outsz)--;
     }
     END_CONVERSION;
 }
+
+const struct _PDCLIB_charcodec_t _PDCLIB_utf8_codec = {
+    .__mbsinit   = utf8_mbsinit,
+    .__mbstoc32s = utf8toc32,
+    .__c32stombs = c32toutf8,
+    .__mb_max    = 4,
+};
+
 #endif
 
 #ifdef TEST
-#include <_PDCLIB_test.h>
+#include "_PDCLIB_test.h"
 
 int main( void )
 {
 #ifndef REGTEST
     // Valid conversion & back
 
-    static const char* input = "abcde" "\xDF\xBF" "\xEF\xBF\xBF" 
+    static const char* input = "abcde" "\xDF\xBF" "\xEF\xBF\xBF"
                                "\xF4\x8F\xBF\xBF";
 
     char32_t c32out[8];
 
-    char32_t *c32ptr = &c32out[0];
-    size_t    c32rem = 8;
-    char     *chrptr = (char*) &input[0];
-    size_t    chrrem = strlen(input);
-    mbstate_t mbs = { 0 };
+    char32_t   *c32ptr = &c32out[0];
+    size_t      c32rem = 8;
+    const char *chrptr = (char*) &input[0];
+    size_t      chrrem = strlen(input);
+    mbstate_t   mbs = { 0 };
 
     TESTCASE(utf8toc32(&c32ptr, &c32rem, &chrptr, &chrrem, &mbs));
     TESTCASE(c32rem == 0);