1 /* Encoding support <_PDCLIB_encoding.h>
3 This file is part of the Public Domain C Library (PDCLib).
4 Permission is granted to use, modify, and / or redistribute at will.
7 #ifndef _PDCLIB_ENCODING_H
8 #define _PDCLIB_ENCODING_H _PDCLIB_ENCODING_H
9 #include "_PDCLIB_int.h"
12 typedef _PDCLIB_int16_t _PDCLIB_char16_t;
13 typedef _PDCLIB_int32_t _PDCLIB_char32_t;
15 typedef char16_t _PDCLIB_char16_t;
16 typedef char32_t _PDCLIB_char32_t;
19 /* -------------------------------------------------------------------------- */
21 /* -------------------------------------------------------------------------- */
23 typedef struct _PDCLIB_mbstate_t {
25 /* Is this the best way to represent this? Is this big enough? */
26 _PDCLIB_uint64_t _St64[15];
27 _PDCLIB_uint32_t _St32[31];
28 _PDCLIB_uint16_t _St16[62];
29 unsigned char _StUC[124];
30 signed char _StSC[124];
35 /* c16/related functions: Surrogate storage
37 * If zero, no surrogate pending. If nonzero, surrogate.
39 _PDCLIB_uint16_t _Surrogate;
41 /* Reserved for potential mbtoutf8/etc functions */
46 #ifdef _PDCLIB_WCHAR_IS_UCS2
47 /* Must be cauued with bufsize >= 1, in != NULL, out != NULL, ps != NULL
49 * Converts a wchar to a UCS4 (char32_t) value. Returns
50 * 1, 2 : Valid character (converted to UCS-4)
52 * -2 : Partial character (only lead surrogate in buffer)
54 static inline int _PDCLIB_wcrtoc32(
55 _PDCLIB_char32_t *_PDCLIB_restrict out,
56 const _PDCLIB_wchar_t *_PDCLIB_restrict in,
57 _PDCLIB_size_t bufsize,
58 _PDCLIB_mbstate_t *_PDCLIB_restrict ps
62 // We already have a lead surrogate
63 if(*in & ~0x3FF != 0xDC00) {
67 // Decode and reset state
68 *out = (ps->_Surrogate & 0x3FF) << 10 | (*in & 0x3FF);
72 } if(*in & ~0x3FF == 0xD800) {
76 if(in[1] & ~0x3FF != 0xDC00) {
80 *out = (in[0] & 0x3FF) << 10 | (in[1] & 0x3FF);
84 // Buffer too small - update state
95 static inline _PDCLIB_size_t _PDCLIB_c32rtowc(
96 _PDCLIB_wchar_t *_PDCLIB_restrict out,
97 const _PDCLIB_char32_t *_PDCLIB_restrict in,
98 _PDCLIB_size_t bufsize,
99 _PDCLIB_mbstate_t *_PDCLIB_restrict ps
103 *out = ps->_Surrogate;
113 // Supplementary plane character
114 *out = 0xD800 | (*in & 0x3FF);
116 out[1] = 0xDC00 | (*in >> 10);
119 ps->_Surrogate = 0xDC00 | (*in >> 10);
125 /* Dummy implementation for when wc == c32 */
126 static inline _PDCLIB_size_t _PDCLIB_wcrtoc32(
127 _PDCLIB_char32_t *_PDCLIB_restrict out,
128 const _PDCLIB_wchar_t *_PDCLIB_restrict in,
129 _PDCLIB_size_t bufsize,
130 _PDCLIB_mbstate_t *_PDCLIB_restrict ps
137 static inline _PDCLIB_size_t _PDCLIB_c32rtowc(
138 _PDCLIB_wchar_t *_PDCLIB_restrict out,
139 const _PDCLIB_char32_t *_PDCLIB_restrict in,
140 _PDCLIB_size_t bufsize,
141 _PDCLIB_mbstate_t *_PDCLIB_restrict ps
150 /* Reads at most *_P_insz code units from *_P_inbuf and writes the result
151 * into *_P_outbuf, writing at most *_P_outsz code units. Updates
152 * *_P_outbuf, *_P_outsz, *_P_inbuf, *_P_outsz with the resulting state
154 * If _P_outbuf is NULL, then the input must be processed but no output
155 * generated. _P_outsz may be processed as normal.
157 * Returns true if the conversion completed successfully (i.e. one of
158 * _P_outsize or _P_insize reached zero and no coding errors were
159 * encountered), else return false.
162 /* UCS-4 variants. Mandatory. */
164 _PDCLIB_bool (*__mbstoc32s)(
165 _PDCLIB_char32_t **_PDCLIB_restrict _P_outbuf,
166 _PDCLIB_size_t *_PDCLIB_restrict _P_outsz,
167 const char **_PDCLIB_restrict _P_inbuf,
168 _PDCLIB_size_t *_PDCLIB_restrict _P_insz,
169 _PDCLIB_mbstate_t *_PDCLIB_restrict _P_ps
172 _PDCLIB_bool (*__c32stombs)(
173 char **_PDCLIB_restrict _P_outbuf,
174 _PDCLIB_size_t *_PDCLIB_restrict _P_outsz,
175 const _PDCLIB_char32_t **_PDCLIB_restrict _P_inbuf,
176 _PDCLIB_size_t *_PDCLIB_restrict _P_insz,
177 _PDCLIB_mbstate_t *_PDCLIB_restrict _P_ps
180 /* UTF-16 variants; same as above except optional.
182 * If not provided, _PDCLib will internally synthesize on top of the UCS-4
183 * variants above, albeit at a performance cost.
186 _PDCLIB_bool (*__mbstoc16s)(
187 _PDCLIB_char16_t **_PDCLIB_restrict _P_outbuf,
188 _PDCLIB_size_t *_PDCLIB_restrict _P_outsz,
189 const char **_PDCLIB_restrict _P_inbuf,
190 _PDCLIB_size_t *_PDCLIB_restrict _P_insz,
191 _PDCLIB_mbstate_t *_PDCLIB_restrict _P_ps
194 _PDCLIB_bool (*__c16stombs)(
195 char **_PDCLIB_restrict _P_outbuf,
196 _PDCLIB_size_t *_PDCLIB_restrict _P_outsz,
197 const _PDCLIB_char16_t **_PDCLIB_restrict _P_inbuf,
198 _PDCLIB_size_t *_PDCLIB_restrict _P_insz,
199 _PDCLIB_mbstate_t *_PDCLIB_restrict _P_ps