From 1d4e8006a01beb9e40b23fb8b58283fcc46c0757 Mon Sep 17 00:00:00 2001 From: Owen Shepherd Date: Tue, 1 Jan 2013 17:58:45 +0000 Subject: [PATCH] PDCLIB-2 PDCLIB-8: Implement mbrtowc using mbrtoc32/mbrtoc16 depending upon definition of wchar_t --- functions/uchar/mbrtoc32.c | 1 + internals/_PDCLIB_encoding.h | 58 ++++++++++++++++++++++- internals/_PDCLIB_int.h | 27 +++++++---- platform/posix/internals/_PDCLIB_config.h | 3 ++ platform/win32/internals/_PDCLIB_config.h | 3 ++ 5 files changed, 81 insertions(+), 11 deletions(-) diff --git a/functions/uchar/mbrtoc32.c b/functions/uchar/mbrtoc32.c index 05d9184..bb269cd 100644 --- a/functions/uchar/mbrtoc32.c +++ b/functions/uchar/mbrtoc32.c @@ -33,6 +33,7 @@ size_t mbrtoc32_l( // A character was output if(nr == n) { // The output character resulted entirely from stored state + // With UTF-32, this shouldn't be possible? return (size_t) -3; } else if(pc32[-1] == 0) { // Was null character diff --git a/internals/_PDCLIB_encoding.h b/internals/_PDCLIB_encoding.h index 9b31735..4d17123 100644 --- a/internals/_PDCLIB_encoding.h +++ b/internals/_PDCLIB_encoding.h @@ -6,7 +6,7 @@ #ifndef __PDCLIB_ENCODING_H #define __PDCLIB_ENCODING_H __PDCLIB_ENCODING_H -#include "_PDCLIB_int.h" +#include /* Must be cauued with bufsize >= 1, in != NULL, out != NULL, ps != NULL * @@ -142,4 +142,60 @@ struct _PDCLIB_charcodec { size_t __mb_max; }; +/* mbstate _PendState values */ +enum { + /* Nothing pending; _PendChar ignored */ + _PendClear = 0, + + /* Process the character stored in _PendChar before reading the buffer + * passed for the conversion + */ + _PendPrefix = 1, +}; + +/* XXX Defining these here is temporary - will move to xlocale in future */ +size_t mbrtoc16_l( + char16_t *_PDCLIB_restrict pc16, + const char *_PDCLIB_restrict s, + size_t n, + mbstate_t *_PDCLIB_restrict ps, +_PDCLIB_locale_t _PDCLIB_restrict l); + +size_t c16rtomb_l( + char *_PDCLIB_restrict s, + char16_t c16, + mbstate_t *_PDCLIB_restrict ps, +_PDCLIB_locale_t _PDCLIB_restrict l); + +size_t mbrtoc32_l( + char32_t *_PDCLIB_restrict pc32, + const char *_PDCLIB_restrict s, + size_t n, + mbstate_t *_PDCLIB_restrict ps, +_PDCLIB_locale_t _PDCLIB_restrict l); + +size_t c32rtomb_l( + char *_PDCLIB_restrict s, + char32_t c32, + mbstate_t *_PDCLIB_restrict ps, +_PDCLIB_locale_t _PDCLIB_restrict l); + +#define _PDCLIB_WCHAR_ENCODING_UTF16 16 +#define _PDCLIB_WCHAR_ENCODING_UCS4 32 + +#if !defined(_PDCLIB_WCHAR_ENCODING) + #define _PDCLIB_WCHAR_ENCODING 0 +#endif + +#if _PDCLIB_WCHAR_ENCODING == _PDCLIB_WCHAR_ENCODING_UTF16 + #define _PDCLIB_mbrtocwc_l mbrtoc16_l + #define _PDCLIB_cwcrtomb_l c16rtomb_l +#elif _PDCLIB_WCHAR_ENCODING == _PDCLIB_WCHAR_ENCODING_UCS4 + #define _PDCLIB_mbrtocwc_l mbrtoc32_l + #define _PDCLIB_cwcrtomb_l c32rtomb_l +#else + #error _PDCLIB_WCHAR_ENCODING not defined correctly + #error Define to one of _PDCLIB_WCHAR_ENCODING_UCS4 or _PDCLIB_WCHAR_ENCODING_UTF16 +#endif + #endif diff --git a/internals/_PDCLIB_int.h b/internals/_PDCLIB_int.h index 52ba8b3..9ff9e69 100644 --- a/internals/_PDCLIB_int.h +++ b/internals/_PDCLIB_int.h @@ -348,16 +348,23 @@ typedef struct _PDCLIB_mbstate { char _StC [124]; }; - union { - /* c16/related functions: Surrogate storage - * - * If zero, no surrogate pending. If nonzero, surrogate. - */ - _PDCLIB_uint16_t _Surrogate; - - /* Reserved for potential mbtoutf8/etc functions */ - unsigned char _U8[4]; - }; + /* c16/related functions: Surrogate storage + * + * If zero, no surrogate pending. If nonzero, surrogate. + */ + _PDCLIB_uint16_t _Surrogate; + + /* In cases where the underlying codec is capable of regurgitating a + * character without consuming any extra input (e.g. a surrogate pair in a + * UCS-4 to UTF-16 conversion) then these fields are used to track that + * state. In particular, they are used to buffer/fake the input for mbrtowc + * and similar functions. + * + * See _PDCLIB_encoding.h for values of _PendState and the resultant value + * in _PendChar. + */ + unsigned char _PendState; + char _PendChar; } _PDCLIB_mbstate_t; typedef struct _PDCLIB_charcodec *_PDCLIB_charcodec_t; diff --git a/platform/posix/internals/_PDCLIB_config.h b/platform/posix/internals/_PDCLIB_config.h index 273f2d2..576b347 100644 --- a/platform/posix/internals/_PDCLIB_config.h +++ b/platform/posix/internals/_PDCLIB_config.h @@ -327,6 +327,9 @@ struct _PDCLIB_imaxdiv_t /* Locale method. See _PDCLIB_locale.h */ #define _PDCLIB_LOCALE_METHOD _PDCLIB_LOCALE_METHOD_TSS +/* wchar_t encoding */ +#define _PDCLIB_WCHAR_ENCODING _PDCLIB_WCHAR_ENCODING_UCS4 + /* I/O ---------------------------------------------------------------------- */ /* The default size for file buffers. Must be at least 256. */ diff --git a/platform/win32/internals/_PDCLIB_config.h b/platform/win32/internals/_PDCLIB_config.h index 4e40501..c6da3ef 100644 --- a/platform/win32/internals/_PDCLIB_config.h +++ b/platform/win32/internals/_PDCLIB_config.h @@ -338,6 +338,9 @@ struct _PDCLIB_imaxdiv_t /* Locale method. See _PDCLIB_locale.h */ #define _PDCLIB_LOCALE_METHOD _PDCLIB_LOCALE_METHOD_TSS +/* wchar_t encoding */ +#define _PDCLIB_WCHAR_ENCODING _PDCLIB_WCHAR_ENCODING_UTF16 + /* I/O ---------------------------------------------------------------------- */ /* The default size for file buffers. Must be at least 256. */ -- 2.40.0