1 /* Encoding support <_PDCLIB_encoding.h>
3 This file is part of the Public Domain C Library (PDCLib).
4 Permission is granted to use, modify, and / or redistribute at will.
7 #ifndef __PDCLIB_ENCODING_H
8 #define __PDCLIB_ENCODING_H __PDCLIB_ENCODING_H
11 /* Must be cauued with bufsize >= 1, in != NULL, out != NULL, ps != NULL
13 * Converts a UTF-16 (char16_t) to a UCS4 (char32_t) value. Returns
14 * 1, 2 : Valid character (converted to UCS-4)
16 * -2 : Partial character (only lead surrogate in buffer)
18 static inline int _PDCLIB_c16rtoc32(
19 _PDCLIB_char32_t *_PDCLIB_restrict out,
20 const _PDCLIB_char16_t *_PDCLIB_restrict in,
21 _PDCLIB_size_t bufsize,
22 _PDCLIB_mbstate_t *_PDCLIB_restrict ps
26 // We already have a lead surrogate
27 if((*in & ~0x3FF) != 0xDC00) {
31 // Decode and reset state
32 *out = (ps->_Surrogate & 0x3FF) << 10 | (*in & 0x3FF);
36 } if((*in & ~0x3FF) == 0xD800) {
40 if((in[1] & ~0x3FF) != 0xDC00) {
44 *out = (in[0] & 0x3FF) << 10 | (in[1] & 0x3FF);
48 // Buffer too small - update state
59 static inline _PDCLIB_size_t _PDCLIB_c32rtoc16(
60 _PDCLIB_wchar_t *_PDCLIB_restrict out,
61 const _PDCLIB_char32_t *_PDCLIB_restrict in,
62 _PDCLIB_size_t bufsize,
63 _PDCLIB_mbstate_t *_PDCLIB_restrict ps
67 *out = ps->_Surrogate;
77 // Supplementary plane character
78 *out = 0xD800 | (*in >> 10);
80 out[1] = 0xDC00 | (*in & 0x3FF);
83 ps->_Surrogate = 0xDC00 | (*in & 0x3FF);
89 struct _PDCLIB_charcodec {
90 /* Reads at most *_P_insz code units from *_P_inbuf and writes the result
91 * into *_P_outbuf, writing at most *_P_outsz code units. Updates
92 * *_P_outbuf, *_P_outsz, *_P_inbuf, *_P_outsz with the resulting state
94 * If _P_outbuf is NULL, then the input must be processed but no output
95 * generated. _P_outsz may be processed as normal.
97 * Returns true if the conversion completed successfully (i.e. one of
98 * _P_outsize or _P_insize reached zero and no coding errors were
99 * encountered), else return false.
102 /* mbsinit. Mandatory. */
103 _PDCLIB_bool (*__mbsinit)(const _PDCLIB_mbstate_t *_P_ps);
105 /* UCS-4 variants. Mandatory. */
107 _PDCLIB_bool (*__mbstoc32s)(
108 _PDCLIB_char32_t *_PDCLIB_restrict *_PDCLIB_restrict _P_outbuf,
109 _PDCLIB_size_t *_PDCLIB_restrict _P_outsz,
110 const char *_PDCLIB_restrict *_PDCLIB_restrict _P_inbuf,
111 _PDCLIB_size_t *_PDCLIB_restrict _P_insz,
112 _PDCLIB_mbstate_t *_PDCLIB_restrict _P_ps
115 _PDCLIB_bool (*__c32stombs)(
116 char *_PDCLIB_restrict *_PDCLIB_restrict _P_outbuf,
117 _PDCLIB_size_t *_PDCLIB_restrict _P_outsz,
118 const _PDCLIB_char32_t *_PDCLIB_restrict *_PDCLIB_restrict _P_inbuf,
119 _PDCLIB_size_t *_PDCLIB_restrict _P_insz,
120 _PDCLIB_mbstate_t *_PDCLIB_restrict _P_ps
123 /* UTF-16 variants; same as above except optional.
125 * If not provided, _PDCLib will internally synthesize on top of the UCS-4
126 * variants above, albeit at a performance cost.
129 _PDCLIB_bool (*__mbstoc16s)(
130 _PDCLIB_char16_t *_PDCLIB_restrict *_PDCLIB_restrict _P_outbuf,
131 _PDCLIB_size_t *_PDCLIB_restrict _P_outsz,
132 const char *_PDCLIB_restrict *_PDCLIB_restrict _P_inbuf,
133 _PDCLIB_size_t *_PDCLIB_restrict _P_insz,
134 _PDCLIB_mbstate_t *_PDCLIB_restrict _P_ps
137 _PDCLIB_bool (*__c16stombs)(
138 char *_PDCLIB_restrict *_PDCLIB_restrict _P_outbuf,
139 _PDCLIB_size_t *_PDCLIB_restrict _P_outsz,
140 const _PDCLIB_char16_t *_PDCLIB_restrict *_PDCLIB_restrict _P_inbuf,
141 _PDCLIB_size_t *_PDCLIB_restrict _P_insz,
142 _PDCLIB_mbstate_t *_PDCLIB_restrict _P_ps
148 /* mbstate _PendState values */
150 /* Nothing pending; _PendChar ignored */
153 /* Process the character stored in _PendChar before reading the buffer
154 * passed for the conversion
159 /* XXX Defining these here is temporary - will move to xlocale in future */
161 char16_t *_PDCLIB_restrict pc16,
162 const char *_PDCLIB_restrict s,
164 mbstate_t *_PDCLIB_restrict ps,
165 _PDCLIB_locale_t _PDCLIB_restrict l);
168 char *_PDCLIB_restrict s,
170 mbstate_t *_PDCLIB_restrict ps,
171 _PDCLIB_locale_t _PDCLIB_restrict l);
174 char32_t *_PDCLIB_restrict pc32,
175 const char *_PDCLIB_restrict s,
177 mbstate_t *_PDCLIB_restrict ps,
178 _PDCLIB_locale_t _PDCLIB_restrict l);
181 char *_PDCLIB_restrict s,
183 mbstate_t *_PDCLIB_restrict ps,
184 _PDCLIB_locale_t _PDCLIB_restrict l);
186 #define _PDCLIB_WCHAR_ENCODING_UTF16 16
187 #define _PDCLIB_WCHAR_ENCODING_UCS4 32
189 #if !defined(_PDCLIB_WCHAR_ENCODING)
190 #define _PDCLIB_WCHAR_ENCODING 0
193 #if _PDCLIB_WCHAR_ENCODING == _PDCLIB_WCHAR_ENCODING_UTF16
194 #define _PDCLIB_mbrtocwc_l mbrtoc16_l
195 #define _PDCLIB_mbrtocwc mbrtoc16
196 #define _PDCLIB_cwcrtomb_l c16rtomb_l
197 #define _PDCLIB_cwcrtomb c16rtomb
198 #elif _PDCLIB_WCHAR_ENCODING == _PDCLIB_WCHAR_ENCODING_UCS4
199 #define _PDCLIB_mbrtocwc_l mbrtoc32_l
200 #define _PDCLIB_mbrtocwc mbrtoc32
201 #define _PDCLIB_cwcrtomb_l c32rtomb_l
202 #define _PDCLIB_cwcrtomb c32rtomb
204 #error _PDCLIB_WCHAR_ENCODING not defined correctly
205 #error Define to one of _PDCLIB_WCHAR_ENCODING_UCS4 or _PDCLIB_WCHAR_ENCODING_UTF16