/* Encoding support <_PDCLIB_encoding.h> This file is part of the Public Domain C Library (PDCLib). Permission is granted to use, modify, and / or redistribute at will. */ #ifndef __PDCLIB_ENCODING_H #define __PDCLIB_ENCODING_H __PDCLIB_ENCODING_H #include /* Must be cauued with bufsize >= 1, in != NULL, out != NULL, ps != NULL * * Converts a UTF-16 (char16_t) to a UCS4 (char32_t) value. Returns * 1, 2 : Valid character (converted to UCS-4) * -1 : Encoding error * -2 : Partial character (only lead surrogate in buffer) */ static inline int _PDCLIB_c16rtoc32( _PDCLIB_char32_t *_PDCLIB_restrict out, const _PDCLIB_char16_t *_PDCLIB_restrict in, _PDCLIB_size_t bufsize, _PDCLIB_mbstate_t *_PDCLIB_restrict ps ) { if(ps->_Surrogate) { // We already have a lead surrogate if((*in & ~0x3FF) != 0xDC00) { // Encoding error return -1; } else { // Decode and reset state *out = (ps->_Surrogate & 0x3FF) << 10 | (*in & 0x3FF); ps->_Surrogate = 0; return 1; } } if((*in & ~0x3FF) == 0xD800) { // Lead surrogate if(bufsize >= 2) { // Buffer big enough if((in[1] & ~0x3FF) != 0xDC00) { // Encoding error return -1; } else { *out = (in[0] & 0x3FF) << 10 | (in[1] & 0x3FF); return 2; } } else { // Buffer too small - update state ps->_Surrogate = *in; return -2; } } else { // BMP character *out = *in; return 1; } } static inline _PDCLIB_size_t _PDCLIB_c32rtoc16( _PDCLIB_wchar_t *_PDCLIB_restrict out, const _PDCLIB_char32_t *_PDCLIB_restrict in, _PDCLIB_size_t bufsize, _PDCLIB_mbstate_t *_PDCLIB_restrict ps ) { if(ps->_Surrogate) { *out = ps->_Surrogate; ps->_Surrogate = 0; return 0; } if(*in <= 0xFFFF) { // BMP character *out = *in; return 1; } else { // Supplementary plane character *out = 0xD800 | (*in >> 10); if(bufsize >= 2) { out[1] = 0xDC00 | (*in & 0x3FF); return 2; } else { ps->_Surrogate = 0xDC00 | (*in & 0x3FF); return 1; } } } struct _PDCLIB_charcodec { /* Reads at most *_P_insz code units from *_P_inbuf and writes the result * into *_P_outbuf, writing at most *_P_outsz code units. Updates * *_P_outbuf, *_P_outsz, *_P_inbuf, *_P_outsz with the resulting state * * If _P_outbuf is NULL, then the input must be processed but no output * generated. _P_outsz may be processed as normal. * * Returns true if the conversion completed successfully (i.e. one of * _P_outsize or _P_insize reached zero and no coding errors were * encountered), else return false. */ /* mbsinit. Mandatory. */ _PDCLIB_bool (*__mbsinit)(const _PDCLIB_mbstate_t *_P_ps); /* UCS-4 variants. Mandatory. */ _PDCLIB_bool (*__mbstoc32s)( _PDCLIB_char32_t *_PDCLIB_restrict *_PDCLIB_restrict _P_outbuf, _PDCLIB_size_t *_PDCLIB_restrict _P_outsz, const char *_PDCLIB_restrict *_PDCLIB_restrict _P_inbuf, _PDCLIB_size_t *_PDCLIB_restrict _P_insz, _PDCLIB_mbstate_t *_PDCLIB_restrict _P_ps ); _PDCLIB_bool (*__c32stombs)( char *_PDCLIB_restrict *_PDCLIB_restrict _P_outbuf, _PDCLIB_size_t *_PDCLIB_restrict _P_outsz, const _PDCLIB_char32_t *_PDCLIB_restrict *_PDCLIB_restrict _P_inbuf, _PDCLIB_size_t *_PDCLIB_restrict _P_insz, _PDCLIB_mbstate_t *_PDCLIB_restrict _P_ps ); /* UTF-16 variants; same as above except optional. * * If not provided, _PDCLib will internally synthesize on top of the UCS-4 * variants above, albeit at a performance cost. */ _PDCLIB_bool (*__mbstoc16s)( _PDCLIB_char16_t *_PDCLIB_restrict *_PDCLIB_restrict _P_outbuf, _PDCLIB_size_t *_PDCLIB_restrict _P_outsz, const char *_PDCLIB_restrict *_PDCLIB_restrict _P_inbuf, _PDCLIB_size_t *_PDCLIB_restrict _P_insz, _PDCLIB_mbstate_t *_PDCLIB_restrict _P_ps ); _PDCLIB_bool (*__c16stombs)( char *_PDCLIB_restrict *_PDCLIB_restrict _P_outbuf, _PDCLIB_size_t *_PDCLIB_restrict _P_outsz, const _PDCLIB_char16_t *_PDCLIB_restrict *_PDCLIB_restrict _P_inbuf, _PDCLIB_size_t *_PDCLIB_restrict _P_insz, _PDCLIB_mbstate_t *_PDCLIB_restrict _P_ps ); size_t __mb_max; }; /* mbstate _PendState values */ enum { /* Nothing pending; _PendChar ignored */ _PendClear = 0, /* Process the character stored in _PendChar before reading the buffer * passed for the conversion */ _PendPrefix = 1, }; /* XXX Defining these here is temporary - will move to xlocale in future */ size_t mbrtoc16_l( char16_t *_PDCLIB_restrict pc16, const char *_PDCLIB_restrict s, size_t n, mbstate_t *_PDCLIB_restrict ps, _PDCLIB_locale_t _PDCLIB_restrict l); size_t c16rtomb_l( char *_PDCLIB_restrict s, char16_t c16, mbstate_t *_PDCLIB_restrict ps, _PDCLIB_locale_t _PDCLIB_restrict l); size_t mbrtoc32_l( char32_t *_PDCLIB_restrict pc32, const char *_PDCLIB_restrict s, size_t n, mbstate_t *_PDCLIB_restrict ps, _PDCLIB_locale_t _PDCLIB_restrict l); size_t c32rtomb_l( char *_PDCLIB_restrict s, char32_t c32, mbstate_t *_PDCLIB_restrict ps, _PDCLIB_locale_t _PDCLIB_restrict l); #define _PDCLIB_WCHAR_ENCODING_UTF16 16 #define _PDCLIB_WCHAR_ENCODING_UCS4 32 #if !defined(_PDCLIB_WCHAR_ENCODING) #define _PDCLIB_WCHAR_ENCODING 0 #endif #if _PDCLIB_WCHAR_ENCODING == _PDCLIB_WCHAR_ENCODING_UTF16 #define _PDCLIB_mbrtocwc_l mbrtoc16_l #define _PDCLIB_mbrtocwc mbrtoc16 #define _PDCLIB_cwcrtomb_l c16rtomb_l #define _PDCLIB_cwcrtomb c16rtomb #elif _PDCLIB_WCHAR_ENCODING == _PDCLIB_WCHAR_ENCODING_UCS4 #define _PDCLIB_mbrtocwc_l mbrtoc32_l #define _PDCLIB_mbrtocwc mbrtoc32 #define _PDCLIB_cwcrtomb_l c32rtomb_l #define _PDCLIB_cwcrtomb c32rtomb #else #error _PDCLIB_WCHAR_ENCODING not defined correctly #error Define to one of _PDCLIB_WCHAR_ENCODING_UCS4 or _PDCLIB_WCHAR_ENCODING_UTF16 #endif #endif