1 /* Encoding support <_PDCLIB_encoding.h>
3 This file is part of the Public Domain C Library (PDCLib).
4 Permission is granted to use, modify, and / or redistribute at will.
7 #ifndef __PDCLIB_ENCODING_H
8 #define __PDCLIB_ENCODING_H __PDCLIB_ENCODING_H
11 /* Must be cauued with bufsize >= 1, in != NULL, out != NULL, ps != NULL
13 * Converts a UTF-16 (char16_t) to a UCS4 (char32_t) value. Returns
14 * 1, 2 : Valid character (converted to UCS-4)
16 * -2 : Partial character (only lead surrogate in buffer)
18 static inline int _PDCLIB_c16rtoc32(
19 _PDCLIB_char32_t *_PDCLIB_restrict out,
20 const _PDCLIB_char16_t *_PDCLIB_restrict in,
21 _PDCLIB_size_t bufsize,
22 _PDCLIB_mbstate_t *_PDCLIB_restrict ps
26 // We already have a lead surrogate
27 if((*in & ~0x3FF) != 0xDC00) {
31 // Decode and reset state
32 *out = (ps->_Surrogate & 0x3FF) << 10 | (*in & 0x3FF);
36 } if((*in & ~0x3FF) == 0xD800) {
40 if((in[1] & ~0x3FF) != 0xDC00) {
44 *out = (in[0] & 0x3FF) << 10 | (in[1] & 0x3FF);
48 // Buffer too small - update state
59 static inline _PDCLIB_size_t _PDCLIB_c32rtoc16(
60 _PDCLIB_wchar_t *_PDCLIB_restrict out,
61 const _PDCLIB_char32_t *_PDCLIB_restrict in,
62 _PDCLIB_size_t bufsize,
63 _PDCLIB_mbstate_t *_PDCLIB_restrict ps
67 *out = ps->_Surrogate;
77 // Supplementary plane character
78 *out = 0xD800 | (*in >> 10);
80 out[1] = 0xDC00 | (*in & 0x3FF);
83 ps->_Surrogate = 0xDC00 | (*in & 0x3FF);
89 struct _PDCLIB_charcodec {
90 /* Reads at most *_P_insz code units from *_P_inbuf and writes the result
91 * into *_P_outbuf, writing at most *_P_outsz code units. Updates
92 * *_P_outbuf, *_P_outsz, *_P_inbuf, *_P_outsz with the resulting state
94 * If _P_outbuf is NULL, then the input must be processed but no output
95 * generated. _P_outsz may be processed as normal.
97 * Returns true if the conversion completed successfully (i.e. one of
98 * _P_outsize or _P_insize reached zero and no coding errors were
99 * encountered), else return false.
102 /* UCS-4 variants. Mandatory. */
104 _PDCLIB_bool (*__mbstoc32s)(
105 _PDCLIB_char32_t *_PDCLIB_restrict *_PDCLIB_restrict _P_outbuf,
106 _PDCLIB_size_t *_PDCLIB_restrict _P_outsz,
107 const char *_PDCLIB_restrict *_PDCLIB_restrict _P_inbuf,
108 _PDCLIB_size_t *_PDCLIB_restrict _P_insz,
109 _PDCLIB_mbstate_t *_PDCLIB_restrict _P_ps
112 _PDCLIB_bool (*__c32stombs)(
113 char *_PDCLIB_restrict *_PDCLIB_restrict _P_outbuf,
114 _PDCLIB_size_t *_PDCLIB_restrict _P_outsz,
115 const _PDCLIB_char32_t *_PDCLIB_restrict *_PDCLIB_restrict _P_inbuf,
116 _PDCLIB_size_t *_PDCLIB_restrict _P_insz,
117 _PDCLIB_mbstate_t *_PDCLIB_restrict _P_ps
120 /* UTF-16 variants; same as above except optional.
122 * If not provided, _PDCLib will internally synthesize on top of the UCS-4
123 * variants above, albeit at a performance cost.
126 _PDCLIB_bool (*__mbstoc16s)(
127 _PDCLIB_char16_t *_PDCLIB_restrict *_PDCLIB_restrict _P_outbuf,
128 _PDCLIB_size_t *_PDCLIB_restrict _P_outsz,
129 const char *_PDCLIB_restrict *_PDCLIB_restrict _P_inbuf,
130 _PDCLIB_size_t *_PDCLIB_restrict _P_insz,
131 _PDCLIB_mbstate_t *_PDCLIB_restrict _P_ps
134 _PDCLIB_bool (*__c16stombs)(
135 char *_PDCLIB_restrict *_PDCLIB_restrict _P_outbuf,
136 _PDCLIB_size_t *_PDCLIB_restrict _P_outsz,
137 const _PDCLIB_char16_t *_PDCLIB_restrict *_PDCLIB_restrict _P_inbuf,
138 _PDCLIB_size_t *_PDCLIB_restrict _P_insz,
139 _PDCLIB_mbstate_t *_PDCLIB_restrict _P_ps
145 /* mbstate _PendState values */
147 /* Nothing pending; _PendChar ignored */
150 /* Process the character stored in _PendChar before reading the buffer
151 * passed for the conversion
156 /* XXX Defining these here is temporary - will move to xlocale in future */
158 char16_t *_PDCLIB_restrict pc16,
159 const char *_PDCLIB_restrict s,
161 mbstate_t *_PDCLIB_restrict ps,
162 _PDCLIB_locale_t _PDCLIB_restrict l);
165 char *_PDCLIB_restrict s,
167 mbstate_t *_PDCLIB_restrict ps,
168 _PDCLIB_locale_t _PDCLIB_restrict l);
171 char32_t *_PDCLIB_restrict pc32,
172 const char *_PDCLIB_restrict s,
174 mbstate_t *_PDCLIB_restrict ps,
175 _PDCLIB_locale_t _PDCLIB_restrict l);
178 char *_PDCLIB_restrict s,
180 mbstate_t *_PDCLIB_restrict ps,
181 _PDCLIB_locale_t _PDCLIB_restrict l);
183 #define _PDCLIB_WCHAR_ENCODING_UTF16 16
184 #define _PDCLIB_WCHAR_ENCODING_UCS4 32
186 #if !defined(_PDCLIB_WCHAR_ENCODING)
187 #define _PDCLIB_WCHAR_ENCODING 0
190 #if _PDCLIB_WCHAR_ENCODING == _PDCLIB_WCHAR_ENCODING_UTF16
191 #define _PDCLIB_mbrtocwc_l mbrtoc16_l
192 #define _PDCLIB_mbrtocwc mbrtoc16
193 #define _PDCLIB_cwcrtomb_l c16rtomb_l
194 #define _PDCLIB_cwcrtomb c16rtomb
195 #elif _PDCLIB_WCHAR_ENCODING == _PDCLIB_WCHAR_ENCODING_UCS4
196 #define _PDCLIB_mbrtocwc_l mbrtoc32_l
197 #define _PDCLIB_mbrtocwc mbrtoc32
198 #define _PDCLIB_cwcrtomb_l c32rtomb_l
199 #define _PDCLIB_cwcrtomb c32rtomb
201 #error _PDCLIB_WCHAR_ENCODING not defined correctly
202 #error Define to one of _PDCLIB_WCHAR_ENCODING_UCS4 or _PDCLIB_WCHAR_ENCODING_UTF16