]> pd.if.org Git - pdclib.old/commitdiff
PDCLIB-2 PDCLIB-12: If the internal character conversion routines are called
authorOwen Shepherd <owen.shepherd@e43.eu>
Mon, 31 Dec 2012 01:38:06 +0000 (01:38 +0000)
committerOwen Shepherd <owen.shepherd@e43.eu>
Mon, 31 Dec 2012 01:38:06 +0000 (01:38 +0000)
with p_outbuf == NULL, then they will perform the conversion but *not* store
the result. This facilitates the implementation of mbrlen and similar
functions.

Added (optional) UTF-16 variants of the conversion routines. If provided them,
then PDCLib will use them whenever it needs a UTF-16 conversion (e.g. uchar.h
UTF-16 routines, or wchar_t on win32)

internals/_PDCLIB_encoding.h
opt/basecodecs/_PDCLIB_ascii.c
opt/basecodecs/_PDCLIB_latin1.c
opt/basecodecs/_PDCLIB_utf8.c

index 01e60a1916389b0263827bf5d597400ce5ffebc8..0178b12f70dcf6e338770caf4e1086c8ef7a7d91 100644 (file)
@@ -147,15 +147,21 @@ static inline _PDCLIB_size_t _PDCLIB_c32rtowc(
 #endif
 
 typedef struct {
 #endif
 
 typedef struct {
-    /* Reads at most *_P_insz bytes from *_P_inbuf and writes the result into 
-     * *_P_outbuf, writing at most *_P_outsz characters. Updates *_P_outbuf,
-     * *_P_outsz, *_P_inbuf, *_P_outsz with the resulting state
+    /* Reads at most *_P_insz code units from *_P_inbuf and writes the result 
+     * into *_P_outbuf, writing at most *_P_outsz code units. Updates 
+     * *_P_outbuf, *_P_outsz, *_P_inbuf, *_P_outsz with the resulting state
+     *
+     * If _P_outbuf is NULL, then the input must be processed but no output 
+     * generated. _P_outsz may be processed as normal.
      *
      * Returns true if the conversion completed successfully (i.e. one of 
      * _P_outsize or _P_insize reached zero and no coding errors were 
      * encountered), else return false.
      */
      *
      * Returns true if the conversion completed successfully (i.e. one of 
      * _P_outsize or _P_insize reached zero and no coding errors were 
      * encountered), else return false.
      */
-    _PDCLIB_bool (*__mbtoc32)(
+
+    /* UCS-4 variants. Mandatory. */
+
+    _PDCLIB_bool (*__mbstoc32s)(
         _PDCLIB_char32_t       **_PDCLIB_restrict   _P_outbuf,
         _PDCLIB_size_t          *_PDCLIB_restrict   _P_outsz,
         const char             **_PDCLIB_restrict   _P_inbuf,
         _PDCLIB_char32_t       **_PDCLIB_restrict   _P_outbuf,
         _PDCLIB_size_t          *_PDCLIB_restrict   _P_outsz,
         const char             **_PDCLIB_restrict   _P_inbuf,
@@ -163,13 +169,35 @@ typedef struct {
         _PDCLIB_mbstate_t       *_PDCLIB_restrict   _P_ps
     );
 
         _PDCLIB_mbstate_t       *_PDCLIB_restrict   _P_ps
     );
 
-    _PDCLIB_bool (*__c32tomb)(
+    _PDCLIB_bool (*__c32stombs)(
         char                   **_PDCLIB_restrict  _P_outbuf,
         _PDCLIB_size_t          *_PDCLIB_restrict  _P_outsz,
         const _PDCLIB_char32_t **_PDCLIB_restrict  _P_inbuf,
         _PDCLIB_size_t          *_PDCLIB_restrict  _P_insz,
         _PDCLIB_mbstate_t       *_PDCLIB_restrict  _P_ps
     );
         char                   **_PDCLIB_restrict  _P_outbuf,
         _PDCLIB_size_t          *_PDCLIB_restrict  _P_outsz,
         const _PDCLIB_char32_t **_PDCLIB_restrict  _P_inbuf,
         _PDCLIB_size_t          *_PDCLIB_restrict  _P_insz,
         _PDCLIB_mbstate_t       *_PDCLIB_restrict  _P_ps
     );
+
+    /* UTF-16 variants; same as above except optional. 
+     *
+     * If not provided, _PDCLib will internally synthesize on top of the UCS-4
+     * variants above, albeit at a performance cost.
+     */
+
+    _PDCLIB_bool (*__mbstoc16s)(
+        _PDCLIB_char16_t       **_PDCLIB_restrict   _P_outbuf,
+        _PDCLIB_size_t          *_PDCLIB_restrict   _P_outsz,
+        const char             **_PDCLIB_restrict   _P_inbuf,
+        _PDCLIB_size_t          *_PDCLIB_restrict   _P_insz,
+        _PDCLIB_mbstate_t       *_PDCLIB_restrict   _P_ps
+    );
+
+    _PDCLIB_bool (*__c16stombs)(
+        char                   **_PDCLIB_restrict  _P_outbuf,
+        _PDCLIB_size_t          *_PDCLIB_restrict  _P_outsz,
+        const _PDCLIB_char16_t **_PDCLIB_restrict  _P_inbuf,
+        _PDCLIB_size_t          *_PDCLIB_restrict  _P_insz,
+        _PDCLIB_mbstate_t       *_PDCLIB_restrict  _P_ps
+    );
 } _PDCLIB_charcodec;
 
 #endif
 } _PDCLIB_charcodec;
 
 #endif
index ca70a667af84069938925c81605a836c35883317..a705a7a75b43da5b18e429b681601b0ad7d144dc 100644 (file)
@@ -20,10 +20,13 @@ static bool asciitoc32(
         unsigned char c = **p_inbuf;
         if(c > 127)
             return false;
         unsigned char c = **p_inbuf;
         if(c > 127)
             return false;
-        **p_outbuf = c;
+        
+        if(p_outbuf) {
+            **p_outbuf = c;
+            (*p_outbuf)++; 
+        }
 
         (*p_inbuf)++;
 
         (*p_inbuf)++;
-        (*p_outbuf)++; 
         (*p_insz)--; 
         (*p_outsz)--;
     }
         (*p_insz)--; 
         (*p_outsz)--;
     }
@@ -42,15 +45,24 @@ static bool c32toascii(
         char32_t c = **p_inbuf;
         if(c > 127)
             return false;
         char32_t c = **p_inbuf;
         if(c > 127)
             return false;
-        **p_outbuf = c;
+
+        if(p_outbuf) {
+            **p_outbuf = c;
+            (*p_outbuf)++; 
+        }
 
         (*p_inbuf)++;
 
         (*p_inbuf)++;
-        (*p_outbuf)++; 
         (*p_insz)--; 
         (*p_outsz)--;        
     }
     return true;
 }
         (*p_insz)--; 
         (*p_outsz)--;        
     }
     return true;
 }
+
+_PDCLIB_charcodec _PDCLIB_ascii_codec = {
+    .__mbstoc32s = asciitoc32,
+    .__c32stombs = c32toascii,
+};
+
 #endif
 
 #ifdef TEST
 #endif
 
 #ifdef TEST
index 73844f417b6aa32587cf5f1e03346d6330d0e0e1..f78574f4b947b79adc48fed36608097ef0897bd9 100644 (file)
@@ -18,10 +18,13 @@ static bool latin1toc32(
 {
     while(*p_outsz && *p_insz) {
         unsigned char c = **p_inbuf;
 {
     while(*p_outsz && *p_insz) {
         unsigned char c = **p_inbuf;
-        **p_outbuf = c;
+
+        if(p_outbuf) {
+            **p_outbuf = c;
+            (*p_outbuf)++; 
+        }
 
         (*p_inbuf)++;
 
         (*p_inbuf)++;
-        (*p_outbuf)++; 
         (*p_insz)--; 
         (*p_outsz)--;
     }
         (*p_insz)--; 
         (*p_outsz)--;
     }
@@ -40,15 +43,24 @@ static bool c32tolatin1(
         char32_t c = **p_inbuf;
         if(c > 255)
             return false;
         char32_t c = **p_inbuf;
         if(c > 255)
             return false;
-        **p_outbuf = c;
+
+        if(p_outbuf) {
+            **p_outbuf = c;
+            (*p_outbuf)++;
+        }
 
         (*p_inbuf)++;
 
         (*p_inbuf)++;
-        (*p_outbuf)++; 
         (*p_insz)--; 
         (*p_outsz)--;        
     }
     return true;
 }
         (*p_insz)--; 
         (*p_outsz)--;        
     }
     return true;
 }
+
+_PDCLIB_charcodec _PDCLIB_latin1_codec = {
+    .__mbstoc32s = latin1toc32,
+    .__c32stombs = c32tolatin1,
+};
+
 #endif
 
 #ifdef TEST
 #endif
 
 #ifdef TEST
index a88f964077457c04f4d3cdba420b86747b82bc2e..8183aef254ecd69e2126b06ea9d1d7686f2b3863 100644 (file)
@@ -44,11 +44,12 @@ end_conversion:             \
     goto end_conversion;    \
 } while(0)
 
     goto end_conversion;    \
 } while(0)
 
-#define OUT32(_c)  do {         \
-    (*((*p_outbuf)++)) = (_c);  \
-    (*p_outsz)--;               \
-    _PDCLIB_UNDEFINED(accum);   \
-    state = DecStart;           \
+#define OUT32(_c)  do {             \
+    if(p_outbuf)                    \
+        (*((*p_outbuf)++)) = (_c);  \
+    (*p_outsz)--;                   \
+    _PDCLIB_UNDEFINED(accum);       \
+    state = DecStart;               \
 } while(0)
 #define CHECK_CONTINUATION \
     do { if((c & 0xC0) != 0x80) return false; } while(0)
 } while(0)
 #define CHECK_CONTINUATION \
     do { if((c & 0xC0) != 0x80) return false; } while(0)
@@ -172,20 +173,20 @@ static bool c32toutf8(
 {
     START_CONVERSION
     while(*p_outsz) {
 {
     START_CONVERSION
     while(*p_outsz) {
-        char     *c8 =  *p_outbuf;
+        unsigned char outc;
         switch(state) {
         case Enc3R:
         switch(state) {
         case Enc3R:
-            *c8 = 0x80 | ((accum >> 12) & 0x3F);
+            outc = 0x80 | ((accum >> 12) & 0x3F);
             state = Enc2R;
             break;
 
         case Enc2R:
             state = Enc2R;
             break;
 
         case Enc2R:
-            *c8 = 0x80 | ((accum >> 6) & 0x3F);
+            outc = 0x80 | ((accum >> 6) & 0x3F);
             state = Enc1R;
             break;
 
         case Enc1R:
             state = Enc1R;
             break;
 
         case Enc1R:
-            *c8 = 0x80 | (accum & 0x3F);
+            outc = 0x80 | (accum & 0x3F);
             state = EncStart;
             _PDCLIB_UNDEFINED(accum);
             break;
             state = EncStart;
             _PDCLIB_UNDEFINED(accum);
             break;
@@ -199,17 +200,17 @@ static bool c32toutf8(
             (*p_insz)--;
 
             if(accum <= 0x7F) {
             (*p_insz)--;
 
             if(accum <= 0x7F) {
-                *c8 = accum;
+                outc = accum;
                 state = EncStart;
                 _PDCLIB_UNDEFINED(accum);
             } else if(accum <= 0x7FF) {
                 state = EncStart;
                 _PDCLIB_UNDEFINED(accum);
             } else if(accum <= 0x7FF) {
-                *c8 = 0xC0 | (accum >> 6);
+                outc = 0xC0 | (accum >> 6);
                 state = Enc1R;
             } else if(accum <= 0xFFFF) {
                 state = Enc1R;
             } else if(accum <= 0xFFFF) {
-                *c8 = 0xE0 | (accum >> 12);
+                outc = 0xE0 | (accum >> 12);
                 state = Enc2R;
             } else if(accum <= 0x10FFFF) {
                 state = Enc2R;
             } else if(accum <= 0x10FFFF) {
-                *c8 = 0xF0 | (accum >> 18);
+                outc = 0xF0 | (accum >> 18);
                 state = Enc3R;
             } else {
                 FINISH(false);
                 state = Enc3R;
             } else {
                 FINISH(false);
@@ -217,12 +218,20 @@ static bool c32toutf8(
             break;
         }
 
             break;
         }
 
-
-        (*p_outbuf)++; 
+        if(p_outbuf) {
+            **p_outbuf = outc;
+            (*p_outbuf)++; 
+        }
         (*p_outsz)--;        
     }
     END_CONVERSION;
 }
         (*p_outsz)--;        
     }
     END_CONVERSION;
 }
+
+_PDCLIB_charcodec _PDCLIB_utf8_codec = {
+    .__mbstoc32s = utf8toc32,
+    .__c32stombs = c32toutf8,
+};
+
 #endif
 
 #ifdef TEST
 #endif
 
 #ifdef TEST