Permission is granted to use, modify, and / or redistribute at will.
*/
-#ifndef _PDCLIB_ENCODING_H
-#define _PDCLIB_ENCODING_H _PDCLIB_ENCODING_H
-#include "_PDCLIB_int.h"
+#ifndef __PDCLIB_ENCODING_H
+#define __PDCLIB_ENCODING_H __PDCLIB_ENCODING_H
+#include <uchar.h>
-#ifndef __cplusplus
-typedef _PDCLIB_int_least16_t _PDCLIB_char16_t;
-typedef _PDCLIB_int_least32_t _PDCLIB_char32_t;
-#else
-typedef char16_t _PDCLIB_char16_t;
-typedef char32_t _PDCLIB_char32_t;
-#endif
-
-#ifdef _PDCLIB_WCHAR_IS_UCS2
/* Must be cauued with bufsize >= 1, in != NULL, out != NULL, ps != NULL
*
- * Converts a wchar to a UCS4 (char32_t) value. Returns
+ * Converts a UTF-16 (char16_t) to a UCS4 (char32_t) value. Returns
* 1, 2 : Valid character (converted to UCS-4)
* -1 : Encoding error
* -2 : Partial character (only lead surrogate in buffer)
*/
-static inline int _PDCLIB_wcrtoc32(
+static inline int _PDCLIB_c16rtoc32(
_PDCLIB_char32_t *_PDCLIB_restrict out,
- const _PDCLIB_wchar_t *_PDCLIB_restrict in,
- _PDCLIB__PDCLIB_size_t bufsize,
+ const _PDCLIB_char16_t *_PDCLIB_restrict in,
+ _PDCLIB_size_t bufsize,
_PDCLIB_mbstate_t *_PDCLIB_restrict ps
)
{
if(ps->_Surrogate) {
// We already have a lead surrogate
- if(*in & ~0x3FF != 0xDC00) {
+ if((*in & ~0x3FF) != 0xDC00) {
// Encoding error
return -1;
} else {
ps->_Surrogate = 0;
return 1;
}
- } if(*in & ~0x3FF == 0xD800) {
+ } if((*in & ~0x3FF) == 0xD800) {
// Lead surrogate
if(bufsize >= 2) {
// Buffer big enough
- if(in[1] & ~0x3FF != 0xDC00) {
+ if((in[1] & ~0x3FF) != 0xDC00) {
// Encoding error
return -1;
} else {
}
}
-static inline _PDCLIB_size_t _PDCLIB_c32rtowc(
+static inline _PDCLIB_size_t _PDCLIB_c32rtoc16(
_PDCLIB_wchar_t *_PDCLIB_restrict out,
const _PDCLIB_char32_t *_PDCLIB_restrict in,
- _PDCLIB__PDCLIB_size_t bufsize,
+ _PDCLIB_size_t bufsize,
_PDCLIB_mbstate_t *_PDCLIB_restrict ps
)
{
return 1;
} else {
// Supplementary plane character
- *out = 0xD800 | (*in & 0x3FF);
+ *out = 0xD800 | (*in >> 10);
if(bufsize >= 2) {
- out[1] = 0xDC00 | (*in >> 10);
+ out[1] = 0xDC00 | (*in & 0x3FF);
return 2;
} else {
- ps->_Surrogate = 0xDC00 | (*in >> 10);
+ ps->_Surrogate = 0xDC00 | (*in & 0x3FF);
return 1;
}
}
}
-#else
-/* Dummy implementation for when wc == c32 */
-static inline _PDCLIB_size_t _PDCLIB_wcrtoc32(
- _PDCLIB_char32_t *_PDCLIB_restrict out,
- const _PDCLIB_wchar_t *_PDCLIB_restrict in,
- _PDCLIB__PDCLIB_size_t bufsize,
- _PDCLIB_mbstate_t *_PDCLIB_restrict ps
-)
-{
- *out = *in;
- return 1;
-}
-static inline _PDCLIB_size_t _PDCLIB_c32rtowc(
- _PDCLIB_wchar_t *_PDCLIB_restrict out,
- const _PDCLIB_char32_t *_PDCLIB_restrict in,
- _PDCLIB__PDCLIB_size_t bufsize,
- _PDCLIB_mbstate_t *_PDCLIB_restrict ps
-)
-{
- *out = *in;
- return 1;
-}
-#endif
-
-typedef struct {
- /* Reads at most *_P_insz bytes from *_P_inbuf and writes the result into
- * *_P_outbuf, writing at most *_P_outsz characters. Updates *_P_outbuf,
- * *_P_outsz, *_P_inbuf, *_P_outsz with the resulting state
+struct _PDCLIB_charcodec {
+ /* Reads at most *_P_insz code units from *_P_inbuf and writes the result
+ * into *_P_outbuf, writing at most *_P_outsz code units. Updates
+ * *_P_outbuf, *_P_outsz, *_P_inbuf, *_P_outsz with the resulting state
+ *
+ * If _P_outbuf is NULL, then the input must be processed but no output
+ * generated. _P_outsz may be processed as normal.
*
* Returns true if the conversion completed successfully (i.e. one of
* _P_outsize or _P_insize reached zero and no coding errors were
* encountered), else return false.
*/
- _PDCLIB_bool_t (*__mbtoc32)(
- _PDCLIB_char32_t **_PDCLIB_restrict _P_outbuf,
- _PDCLIB_size_t *_PDCLIB_restrict _P_outsz,
- const char **_PDCLIB_restrict _P_inbuf,
- _PDCLIB_size_t *_PDCLIB_restrict _P_insz,
- _PDCLIB_mbstate_t *_PDCLIB_restrict _P_ps
+
+ /* mbsinit. Mandatory. */
+ _PDCLIB_bool (*__mbsinit)(const _PDCLIB_mbstate_t *_P_ps);
+
+ /* UCS-4 variants. Mandatory. */
+
+ _PDCLIB_bool (*__mbstoc32s)(
+ _PDCLIB_char32_t *_PDCLIB_restrict *_PDCLIB_restrict _P_outbuf,
+ _PDCLIB_size_t *_PDCLIB_restrict _P_outsz,
+ const char *_PDCLIB_restrict *_PDCLIB_restrict _P_inbuf,
+ _PDCLIB_size_t *_PDCLIB_restrict _P_insz,
+ _PDCLIB_mbstate_t *_PDCLIB_restrict _P_ps
+ );
+
+ _PDCLIB_bool (*__c32stombs)(
+ char *_PDCLIB_restrict *_PDCLIB_restrict _P_outbuf,
+ _PDCLIB_size_t *_PDCLIB_restrict _P_outsz,
+ const _PDCLIB_char32_t *_PDCLIB_restrict *_PDCLIB_restrict _P_inbuf,
+ _PDCLIB_size_t *_PDCLIB_restrict _P_insz,
+ _PDCLIB_mbstate_t *_PDCLIB_restrict _P_ps
+ );
+
+ /* UTF-16 variants; same as above except optional.
+ *
+ * If not provided, _PDCLib will internally synthesize on top of the UCS-4
+ * variants above, albeit at a performance cost.
+ */
+
+ _PDCLIB_bool (*__mbstoc16s)(
+ _PDCLIB_char16_t *_PDCLIB_restrict *_PDCLIB_restrict _P_outbuf,
+ _PDCLIB_size_t *_PDCLIB_restrict _P_outsz,
+ const char *_PDCLIB_restrict *_PDCLIB_restrict _P_inbuf,
+ _PDCLIB_size_t *_PDCLIB_restrict _P_insz,
+ _PDCLIB_mbstate_t *_PDCLIB_restrict _P_ps
);
- _PDCLIB_bool_t (*__c32tomb)(
- char **_PDCLIB_restrict _P_outbuf,
- _PDCLIB_size_t *_PDCLIB_restrict _P_outsz,
- const _PDCLIB_char32_t **_PDCLIB_restrict _P_inbuf,
- _PDCLIB_size_t *_PDCLIB_restrict _P_insz,
- _PDCLIB_mbstate_t *_PDCLIB_restrict _P_ps
+ _PDCLIB_bool (*__c16stombs)(
+ char *_PDCLIB_restrict *_PDCLIB_restrict _P_outbuf,
+ _PDCLIB_size_t *_PDCLIB_restrict _P_outsz,
+ const _PDCLIB_char16_t *_PDCLIB_restrict *_PDCLIB_restrict _P_inbuf,
+ _PDCLIB_size_t *_PDCLIB_restrict _P_insz,
+ _PDCLIB_mbstate_t *_PDCLIB_restrict _P_ps
);
-} _PDCLIB_charcodec;
+
+ size_t __mb_max;
+};
+
+/* mbstate _PendState values */
+enum {
+ /* Nothing pending; _PendChar ignored */
+ _PendClear = 0,
+
+ /* Process the character stored in _PendChar before reading the buffer
+ * passed for the conversion
+ */
+ _PendPrefix = 1,
+};
+
+/* XXX Defining these here is temporary - will move to xlocale in future */
+size_t mbrtoc16_l(
+ char16_t *_PDCLIB_restrict pc16,
+ const char *_PDCLIB_restrict s,
+ size_t n,
+ mbstate_t *_PDCLIB_restrict ps,
+_PDCLIB_locale_t _PDCLIB_restrict l);
+
+size_t c16rtomb_l(
+ char *_PDCLIB_restrict s,
+ char16_t c16,
+ mbstate_t *_PDCLIB_restrict ps,
+_PDCLIB_locale_t _PDCLIB_restrict l);
+
+size_t mbrtoc32_l(
+ char32_t *_PDCLIB_restrict pc32,
+ const char *_PDCLIB_restrict s,
+ size_t n,
+ mbstate_t *_PDCLIB_restrict ps,
+_PDCLIB_locale_t _PDCLIB_restrict l);
+
+size_t c32rtomb_l(
+ char *_PDCLIB_restrict s,
+ char32_t c32,
+ mbstate_t *_PDCLIB_restrict ps,
+_PDCLIB_locale_t _PDCLIB_restrict l);
+
+#define _PDCLIB_WCHAR_ENCODING_UTF16 16
+#define _PDCLIB_WCHAR_ENCODING_UCS4 32
+
+#if !defined(_PDCLIB_WCHAR_ENCODING)
+ #define _PDCLIB_WCHAR_ENCODING 0
+#endif
+
+#if _PDCLIB_WCHAR_ENCODING == _PDCLIB_WCHAR_ENCODING_UTF16
+ #define _PDCLIB_mbrtocwc_l mbrtoc16_l
+ #define _PDCLIB_mbrtocwc mbrtoc16
+ #define _PDCLIB_cwcrtomb_l c16rtomb_l
+ #define _PDCLIB_cwcrtomb c16rtomb
+#elif _PDCLIB_WCHAR_ENCODING == _PDCLIB_WCHAR_ENCODING_UCS4
+ #define _PDCLIB_mbrtocwc_l mbrtoc32_l
+ #define _PDCLIB_mbrtocwc mbrtoc32
+ #define _PDCLIB_cwcrtomb_l c32rtomb_l
+ #define _PDCLIB_cwcrtomb c32rtomb
+#else
+ #error _PDCLIB_WCHAR_ENCODING not defined correctly
+ #error Define to one of _PDCLIB_WCHAR_ENCODING_UCS4 or _PDCLIB_WCHAR_ENCODING_UTF16
+#endif
#endif