1 /* Encoding support <_PDCLIB_encoding.h>
3 This file is part of the Public Domain C Library (PDCLib).
4 Permission is granted to use, modify, and / or redistribute at will.
7 #ifndef __PDCLIB_ENCODING_H
8 #define __PDCLIB_ENCODING_H __PDCLIB_ENCODING_H
12 /* Must be cauued with bufsize >= 1, in != NULL, out != NULL, ps != NULL
14 * Converts a UTF-16 (char16_t) to a UCS4 (char32_t) value. Returns
15 * 1, 2 : Valid character (converted to UCS-4)
17 * -2 : Partial character (only lead surrogate in buffer)
19 static inline int _PDCLIB_c16rtoc32(
20 _PDCLIB_char32_t *_PDCLIB_restrict out,
21 const _PDCLIB_char16_t *_PDCLIB_restrict in,
22 _PDCLIB_size_t bufsize,
23 _PDCLIB_mbstate_t *_PDCLIB_restrict ps
27 // We already have a lead surrogate
28 if((*in & ~0x3FF) != 0xDC00) {
32 // Decode and reset state
33 *out = (ps->_Surrogate & 0x3FF) << 10 | (*in & 0x3FF);
37 } if((*in & ~0x3FF) == 0xD800) {
41 if((in[1] & ~0x3FF) != 0xDC00) {
45 *out = (in[0] & 0x3FF) << 10 | (in[1] & 0x3FF);
49 // Buffer too small - update state
60 static inline _PDCLIB_size_t _PDCLIB_c32rtoc16(
61 _PDCLIB_wchar_t *_PDCLIB_restrict out,
62 const _PDCLIB_char32_t *_PDCLIB_restrict in,
63 _PDCLIB_size_t bufsize,
64 _PDCLIB_mbstate_t *_PDCLIB_restrict ps
68 *out = ps->_Surrogate;
78 // Supplementary plane character
79 *out = 0xD800 | (*in >> 10);
81 out[1] = 0xDC00 | (*in & 0x3FF);
84 ps->_Surrogate = 0xDC00 | (*in & 0x3FF);
90 struct _PDCLIB_charcodec_t {
91 /* Reads at most *_P_insz code units from *_P_inbuf and writes the result
92 * into *_P_outbuf, writing at most *_P_outsz code units. Updates
93 * *_P_outbuf, *_P_outsz, *_P_inbuf, *_P_outsz with the resulting state
95 * If _P_outbuf is NULL, then the input must be processed but no output
96 * generated. _P_outsz may be processed as normal.
98 * Returns true if the conversion completed successfully (i.e. one of
99 * _P_outsize or _P_insize reached zero and no coding errors were
100 * encountered), else return false.
103 /* mbsinit. Mandatory. */
104 _PDCLIB_bool (*__mbsinit)(const _PDCLIB_mbstate_t *_P_ps);
106 /* UCS-4 variants. Mandatory. */
108 _PDCLIB_bool (*__mbstoc32s)(
109 _PDCLIB_char32_t *_PDCLIB_restrict *_PDCLIB_restrict _P_outbuf,
110 _PDCLIB_size_t *_PDCLIB_restrict _P_outsz,
111 const char *_PDCLIB_restrict *_PDCLIB_restrict _P_inbuf,
112 _PDCLIB_size_t *_PDCLIB_restrict _P_insz,
113 _PDCLIB_mbstate_t *_PDCLIB_restrict _P_ps
116 _PDCLIB_bool (*__c32stombs)(
117 char *_PDCLIB_restrict *_PDCLIB_restrict _P_outbuf,
118 _PDCLIB_size_t *_PDCLIB_restrict _P_outsz,
119 const _PDCLIB_char32_t *_PDCLIB_restrict *_PDCLIB_restrict _P_inbuf,
120 _PDCLIB_size_t *_PDCLIB_restrict _P_insz,
121 _PDCLIB_mbstate_t *_PDCLIB_restrict _P_ps
124 /* UTF-16 variants; same as above except optional.
126 * If not provided, _PDCLib will internally synthesize on top of the UCS-4
127 * variants above, albeit at a performance cost.
130 _PDCLIB_bool (*__mbstoc16s)(
131 _PDCLIB_char16_t *_PDCLIB_restrict *_PDCLIB_restrict _P_outbuf,
132 _PDCLIB_size_t *_PDCLIB_restrict _P_outsz,
133 const char *_PDCLIB_restrict *_PDCLIB_restrict _P_inbuf,
134 _PDCLIB_size_t *_PDCLIB_restrict _P_insz,
135 _PDCLIB_mbstate_t *_PDCLIB_restrict _P_ps
138 _PDCLIB_bool (*__c16stombs)(
139 char *_PDCLIB_restrict *_PDCLIB_restrict _P_outbuf,
140 _PDCLIB_size_t *_PDCLIB_restrict _P_outsz,
141 const _PDCLIB_char16_t *_PDCLIB_restrict *_PDCLIB_restrict _P_inbuf,
142 _PDCLIB_size_t *_PDCLIB_restrict _P_insz,
143 _PDCLIB_mbstate_t *_PDCLIB_restrict _P_ps
149 /* mbstate _PendState values */
151 /* Nothing pending; _PendChar ignored */
154 /* Process the character stored in _PendChar before reading the buffer
155 * passed for the conversion
160 /* XXX Defining these here is temporary - will move to xlocale in future */
162 char16_t *_PDCLIB_restrict pc16,
163 const char *_PDCLIB_restrict s,
165 mbstate_t *_PDCLIB_restrict ps,
166 _PDCLIB_locale_t _PDCLIB_restrict l);
169 char *_PDCLIB_restrict s,
171 mbstate_t *_PDCLIB_restrict ps,
172 _PDCLIB_locale_t _PDCLIB_restrict l);
175 char32_t *_PDCLIB_restrict pc32,
176 const char *_PDCLIB_restrict s,
178 mbstate_t *_PDCLIB_restrict ps,
179 _PDCLIB_locale_t _PDCLIB_restrict l);
182 char *_PDCLIB_restrict s,
184 mbstate_t *_PDCLIB_restrict ps,
185 _PDCLIB_locale_t _PDCLIB_restrict l);
187 #define _PDCLIB_WCHAR_ENCODING_UTF16 16
188 #define _PDCLIB_WCHAR_ENCODING_UCS4 32
190 #if !defined(_PDCLIB_WCHAR_ENCODING)
191 #define _PDCLIB_WCHAR_ENCODING 0
194 #if _PDCLIB_WCHAR_ENCODING == _PDCLIB_WCHAR_ENCODING_UTF16
195 #define _PDCLIB_mbrtocwc_l mbrtoc16_l
196 #define _PDCLIB_mbrtocwc mbrtoc16
197 #define _PDCLIB_cwcrtomb_l c16rtomb_l
198 #define _PDCLIB_cwcrtomb c16rtomb
199 #elif _PDCLIB_WCHAR_ENCODING == _PDCLIB_WCHAR_ENCODING_UCS4
200 #define _PDCLIB_mbrtocwc_l mbrtoc32_l
201 #define _PDCLIB_mbrtocwc mbrtoc32
202 #define _PDCLIB_cwcrtomb_l c32rtomb_l
203 #define _PDCLIB_cwcrtomb c32rtomb
205 #error _PDCLIB_WCHAR_ENCODING not defined correctly
206 #error Define to one of _PDCLIB_WCHAR_ENCODING_UCS4 or _PDCLIB_WCHAR_ENCODING_UTF16