9 int scanner_init(struct scanner *s) {
16 int scan_json_file(FILE *input) {
20 char *skip_ws(char *s) {
21 while (*s && isspace(*s)) {
31 /* TODO check for illegal json characters */
32 static void add_unicode(struct token *t, unsigned unicode) {
33 /* TODO check string length */
35 t->str[t->len++] = unicode;
36 } else if (unicode <= 0x7FF) {
37 t->str[t->len++] = (unicode >> 6) + 0xC0;
38 t->str[t->len++] = (unicode & 0x3F) + 0x80;
39 } else if (unicode <= 0xFFFF) {
40 t->str[t->len++] = (unicode >> 12) + 0xE0;
41 t->str[t->len++] = ((unicode >> 6) & 0x3F) + 0x80;
42 t->str[t->len++] = (unicode & 0x3F) + 0x80;
43 } else if (unicode <= 0x10FFFF) {
44 t->str[t->len++] = (unicode >> 18) + 0xF0;
45 t->str[t->len++] = ((unicode >> 12) & 0x3F) + 0x80;
46 t->str[t->len++] = ((unicode >> 6) & 0x3F) + 0x80;
47 t->str[t->len++] = (unicode & 0x3F) + 0x80;
49 t->error = TOKENERROR;
53 static int hexval(int ch) {
54 if (ch >= '0' && ch <= '9') {
80 int add_str_ch(int ch, struct token *t) {
89 t->str[t->len++] = ch;
95 t->str[t->len++] = 0xC;
98 t->str[t->len++] = 0xA;
101 t->str[t->len++] = 0xD;
104 t->str[t->len++] = 0x9;
110 /* TODO handle unicode */
119 t->unicode = t->unicode << 4 | hexval(ch);
123 add_unicode(t, t->unicode);
127 t->error = TOKENERROR;
132 case 0: switch (ch) {
141 t->str[t->len++] = ch;
149 static int digitclass(int ch) {
158 case 'E': return 'e';
166 static int start_num(int ch, struct token *t) {
168 class = digitclass(ch);
170 case '-': t->state = 1; break;
171 case '0': t->state = 2; break;
172 case '1': t->state = 3; break;
173 default: t->state = 0;break; /* TODO error */
180 static int add_str_num(int ch, struct token *t) {
182 class = digitclass(ch);
184 case 1: /* leading hyphen */
186 case '0': t->state = 2; break;
187 case '1': t->state = 3; break;
188 default: t->error = NUMERROR;
191 case 2: /* integral leading zero */
193 case '.': t->state = 5; break;
194 case 'e': t->state = 7; break;
195 case 0: t->state = 0; break;
196 default: t->error = NUMERROR;
199 case 3: /* integral leading 1-9 */
200 case 4: /* integer part digits */
202 case '.': t->state = 5; break;
203 case 'e': t->state = 7; break;
205 case '1': t->state = 4; break;
206 case 0: t->state = 0; break;
207 default: t->error = NUMERROR;
210 case 5: /* integer / fraction separating dot */
213 case '1': t->state = 6; break;
214 default: t->error = NUMERROR;
217 case 6: /* fractional digit */
220 case '1': t->state = 6; break;
221 case 'e': t->state = 7; break;
222 case 0: t->state = 0; break;
223 default: t->error = NUMERROR;
226 case 7: /* exponent e or E */
229 case '+': t->state = 8; break;
231 case '1': t->state = 9; break;
232 default: t->error = NUMERROR;
235 case 8: /* exponent plus or minus */
236 case 9: /* exponent digit */
239 case '1': t->state = 9; break;
240 case 0: t->state = 0; break;
241 default: t->error = NUMERROR;
251 t->str[t->len++] = ch;
261 int checklit(char *lit, int ch, struct token *t) {
264 // printf("checklit(%s, %c, %d)\n", lit, ch, t->len);
265 if (ch == lit[t->len]) {
268 t->error = TOKENERROR;
277 if (t->len == len && !isalnum(ch)) {
280 } else if (t->len < len && ch == lit[t->len]) {
289 /* returns true if we finish a token */
290 int scan_json_ch(int ch, struct token *t) {
293 case PSTRING: return add_str_ch(ch, t);
294 case PNUMBER: return add_str_num(ch, t);
295 case PTRUE: return checklit("true", ch, t);
296 case PFALSE: return checklit("false", ch, t);
297 case PNULL: return checklit("null", ch, t);
305 case '{': t->type = LBRACE; return LBRACE; break;
306 case '}': t->type = RBRACE; return RBRACE; break;
307 case '[': t->type = LBRACKET; return LBRACKET; break;
308 case ']': t->type = RBRACKET; return RBRACKET; break;
309 case ':': t->type = COLON; return COLON; break;
310 case ',': t->type = COMMA; return COMMA; break;
311 case '"': t->type = PSTRING;
313 t->state = 0; /* not \ or \u */
316 case 't': t->type = PTRUE;
320 case 'f': t->type = PFALSE;
324 case 'n': t->type = PNULL;
332 return start_num(ch, t);
338 return start_num(ch, t);
342 t->error = TOKENERROR;