]> pd.if.org Git - pdclib/blobdiff - internals/_PDCLIB_encoding.h
Pointer typedef resulted in non-const structure where const was intended.
[pdclib] / internals / _PDCLIB_encoding.h
index 11e2ac5ee27c81e99903d007363dcae4553c5f27..10f1140c3e73527670d08ab20c246a4ce9572d27 100644 (file)
@@ -4,36 +4,27 @@
    Permission is granted to use, modify, and / or redistribute at will.
 */
 
-#ifndef _PDCLIB_ENCODING_H
-#define _PDCLIB_ENCODING_H _PDCLIB_ENCODING_H
-#include "_PDCLIB_int.h"
+#ifndef __PDCLIB_ENCODING_H
+#define __PDCLIB_ENCODING_H __PDCLIB_ENCODING_H
+#include <uchar.h>
 
-#ifndef __cplusplus
-typedef _PDCLIB_int16_t         _PDCLIB_char16_t;
-typedef _PDCLIB_int32_t         _PDCLIB_char32_t;
-#else
-typedef char16_t                _PDCLIB_char16_t;
-typedef char32_t                _PDCLIB_char32_t;
-#endif
-
-#ifdef _PDCLIB_WCHAR_IS_UCS2
 /* Must be cauued with bufsize >= 1, in != NULL, out != NULL, ps != NULL
  *
- * Converts a wchar to a UCS4 (char32_t) value. Returns
+ * Converts a UTF-16 (char16_t) to a UCS4 (char32_t) value. Returns
  *   1, 2   : Valid character (converted to UCS-4)
  *   -1     : Encoding error
  *   -2     : Partial character (only lead surrogate in buffer)
  */
-static inline int _PDCLIB_wcrtoc32(
+static inline int _PDCLIB_c16rtoc32(
             _PDCLIB_char32_t    *_PDCLIB_restrict   out, 
-    const   _PDCLIB_wchar_t     *_PDCLIB_restrict   in,
+    const   _PDCLIB_char16_t    *_PDCLIB_restrict   in,
             _PDCLIB_size_t                          bufsize,
             _PDCLIB_mbstate_t   *_PDCLIB_restrict   ps  
 )
 {
     if(ps->_Surrogate) {
         // We already have a lead surrogate
-        if(*in & ~0x3FF != 0xDC00) {
+        if((*in & ~0x3FF) != 0xDC00) {
             // Encoding error
             return -1;
         } else {
@@ -42,11 +33,11 @@ static inline int _PDCLIB_wcrtoc32(
             ps->_Surrogate = 0;
             return 1;
         }
-    } if(*in & ~0x3FF == 0xD800) {
+    } if((*in & ~0x3FF) == 0xD800) {
         // Lead surrogate
         if(bufsize >= 2) {
             // Buffer big enough
-            if(in[1] & ~0x3FF != 0xDC00) {
+            if((in[1] & ~0x3FF) != 0xDC00) {
                 // Encoding error
                 return -1;
             } else {
@@ -65,7 +56,7 @@ static inline int _PDCLIB_wcrtoc32(
     }
 }
 
-static inline _PDCLIB_size_t _PDCLIB_c32rtowc(
+static inline _PDCLIB_size_t _PDCLIB_c32rtoc16(
             _PDCLIB_wchar_t     *_PDCLIB_restrict   out,
     const   _PDCLIB_char32_t    *_PDCLIB_restrict   in,
             _PDCLIB_size_t                          bufsize,
@@ -84,65 +75,134 @@ static inline _PDCLIB_size_t _PDCLIB_c32rtowc(
         return 1;
     } else {
         // Supplementary plane character
-        *out = 0xD800 | (*in & 0x3FF);
+        *out = 0xD800 | (*in >> 10);
         if(bufsize >= 2) {
-            out[1] = 0xDC00 | (*in >> 10);
+            out[1] = 0xDC00 | (*in & 0x3FF);
             return 2;
         } else {
-            ps->_Surrogate = 0xDC00 | (*in >> 10);
+            ps->_Surrogate = 0xDC00 | (*in & 0x3FF);
             return 1;
         }
     }
 }
-#else
-/* Dummy implementation for when wc == c32 */
-static inline _PDCLIB_size_t _PDCLIB_wcrtoc32(
-            _PDCLIB_char32_t    *_PDCLIB_restrict   out, 
-    const   _PDCLIB_wchar_t     *_PDCLIB_restrict   in,
-            _PDCLIB_size_t                          bufsize,
-            _PDCLIB_mbstate_t   *_PDCLIB_restrict   ps  
-)
-{
-    *out = *in;
-    return 1;
-}
-
-static inline _PDCLIB_size_t _PDCLIB_c32rtowc(
-            _PDCLIB_wchar_t     *_PDCLIB_restrict   out,
-    const   _PDCLIB_char32_t    *_PDCLIB_restrict   in,
-            _PDCLIB_size_t                          bufsize,
-            _PDCLIB_mbstate_t   *_PDCLIB_restrict   ps
-)
-{
-    *out = *in;
-    return 1;
-}
-#endif
 
-typedef struct {
-    /* Reads at most *_P_insz bytes from *_P_inbuf and writes the result into 
-     * *_P_outbuf, writing at most *_P_outsz characters. Updates *_P_outbuf,
-     * *_P_outsz, *_P_inbuf, *_P_outsz with the resulting state
+struct _PDCLIB_charcodec_t {
+    /* Reads at most *_P_insz code units from *_P_inbuf and writes the result 
+     * into *_P_outbuf, writing at most *_P_outsz code units. Updates 
+     * *_P_outbuf, *_P_outsz, *_P_inbuf, *_P_outsz with the resulting state
+     *
+     * If _P_outbuf is NULL, then the input must be processed but no output 
+     * generated. _P_outsz may be processed as normal.
      *
      * Returns true if the conversion completed successfully (i.e. one of 
      * _P_outsize or _P_insize reached zero and no coding errors were 
      * encountered), else return false.
      */
-    _PDCLIB_bool (*__mbtoc32)(
-        _PDCLIB_char32_t       **_PDCLIB_restrict   _P_outbuf,
-        _PDCLIB_size_t          *_PDCLIB_restrict   _P_outsz,
-        const char             **_PDCLIB_restrict   _P_inbuf,
-        _PDCLIB_size_t          *_PDCLIB_restrict   _P_insz,
-        _PDCLIB_mbstate_t       *_PDCLIB_restrict   _P_ps
+
+    /* mbsinit. Mandatory. */
+    _PDCLIB_bool (*__mbsinit)(const _PDCLIB_mbstate_t *_P_ps);
+
+    /* UCS-4 variants. Mandatory. */
+
+    _PDCLIB_bool (*__mbstoc32s)(
+        _PDCLIB_char32_t       *_PDCLIB_restrict *_PDCLIB_restrict   _P_outbuf,
+        _PDCLIB_size_t                           *_PDCLIB_restrict   _P_outsz,
+        const char             *_PDCLIB_restrict *_PDCLIB_restrict   _P_inbuf,
+        _PDCLIB_size_t                           *_PDCLIB_restrict   _P_insz,
+        _PDCLIB_mbstate_t                        *_PDCLIB_restrict   _P_ps
+    );
+
+    _PDCLIB_bool (*__c32stombs)(
+        char                   *_PDCLIB_restrict *_PDCLIB_restrict  _P_outbuf,
+        _PDCLIB_size_t                           *_PDCLIB_restrict  _P_outsz,
+        const _PDCLIB_char32_t *_PDCLIB_restrict *_PDCLIB_restrict  _P_inbuf,
+        _PDCLIB_size_t                           *_PDCLIB_restrict  _P_insz,
+        _PDCLIB_mbstate_t                        *_PDCLIB_restrict  _P_ps
+    );
+
+    /* UTF-16 variants; same as above except optional. 
+     *
+     * If not provided, _PDCLib will internally synthesize on top of the UCS-4
+     * variants above, albeit at a performance cost.
+     */
+
+    _PDCLIB_bool (*__mbstoc16s)(
+        _PDCLIB_char16_t       *_PDCLIB_restrict *_PDCLIB_restrict   _P_outbuf,
+        _PDCLIB_size_t                           *_PDCLIB_restrict   _P_outsz,
+        const char             *_PDCLIB_restrict *_PDCLIB_restrict   _P_inbuf,
+        _PDCLIB_size_t                           *_PDCLIB_restrict   _P_insz,
+        _PDCLIB_mbstate_t                        *_PDCLIB_restrict   _P_ps
     );
 
-    _PDCLIB_bool (*__c32tomb)(
-        char                   **_PDCLIB_restrict  _P_outbuf,
-        _PDCLIB_size_t          *_PDCLIB_restrict  _P_outsz,
-        const _PDCLIB_char32_t **_PDCLIB_restrict  _P_inbuf,
-        _PDCLIB_size_t          *_PDCLIB_restrict  _P_insz,
-        _PDCLIB_mbstate_t       *_PDCLIB_restrict  _P_ps
+    _PDCLIB_bool (*__c16stombs)(
+        char                   *_PDCLIB_restrict *_PDCLIB_restrict  _P_outbuf,
+        _PDCLIB_size_t                           *_PDCLIB_restrict  _P_outsz,
+        const _PDCLIB_char16_t *_PDCLIB_restrict *_PDCLIB_restrict  _P_inbuf,
+        _PDCLIB_size_t                           *_PDCLIB_restrict  _P_insz,
+        _PDCLIB_mbstate_t                        *_PDCLIB_restrict  _P_ps
     );
-} _PDCLIB_charcodec;
+
+    size_t __mb_max;
+};
+
+/* mbstate _PendState values */
+enum {
+    /* Nothing pending; _PendChar ignored */
+    _PendClear = 0, 
+
+    /* Process the character stored in _PendChar before reading the buffer 
+     * passed for the conversion
+     */
+    _PendPrefix = 1,
+};
+
+/* XXX Defining these here is temporary - will move to xlocale in future */
+size_t mbrtoc16_l(
+        char16_t    *_PDCLIB_restrict   pc16,
+        const char  *_PDCLIB_restrict   s, 
+        size_t                          n,
+        mbstate_t   *_PDCLIB_restrict   ps,
+_PDCLIB_locale_t     _PDCLIB_restrict   l);
+
+size_t c16rtomb_l(
+        char        *_PDCLIB_restrict   s, 
+        char16_t                        c16, 
+        mbstate_t   *_PDCLIB_restrict   ps,
+_PDCLIB_locale_t     _PDCLIB_restrict   l);
+
+size_t mbrtoc32_l(
+        char32_t    *_PDCLIB_restrict   pc32,
+        const char  *_PDCLIB_restrict   s, 
+        size_t                          n,
+        mbstate_t   *_PDCLIB_restrict   ps,
+_PDCLIB_locale_t     _PDCLIB_restrict   l);
+
+size_t c32rtomb_l(
+        char        *_PDCLIB_restrict   s, 
+        char32_t                        c32,
+        mbstate_t   *_PDCLIB_restrict   ps,
+_PDCLIB_locale_t     _PDCLIB_restrict   l);
+
+#define _PDCLIB_WCHAR_ENCODING_UTF16 16
+#define _PDCLIB_WCHAR_ENCODING_UCS4  32
+
+#if !defined(_PDCLIB_WCHAR_ENCODING)
+    #define _PDCLIB_WCHAR_ENCODING 0
+#endif
+
+#if _PDCLIB_WCHAR_ENCODING == _PDCLIB_WCHAR_ENCODING_UTF16
+    #define _PDCLIB_mbrtocwc_l mbrtoc16_l
+    #define _PDCLIB_mbrtocwc   mbrtoc16
+    #define _PDCLIB_cwcrtomb_l c16rtomb_l
+    #define _PDCLIB_cwcrtomb   c16rtomb
+#elif _PDCLIB_WCHAR_ENCODING == _PDCLIB_WCHAR_ENCODING_UCS4
+    #define _PDCLIB_mbrtocwc_l mbrtoc32_l
+    #define _PDCLIB_mbrtocwc   mbrtoc32
+    #define _PDCLIB_cwcrtomb_l c32rtomb_l
+    #define _PDCLIB_cwcrtomb   c32rtomb
+#else
+    #error _PDCLIB_WCHAR_ENCODING not defined correctly
+    #error Define to one of _PDCLIB_WCHAR_ENCODING_UCS4 or _PDCLIB_WCHAR_ENCODING_UTF16
+#endif
 
 #endif