]> pd.if.org Git - json/blob - scan.c
initial commit
[json] / scan.c
1 #include <stdio.h>
2 #include <ctype.h>
3 #include <string.h>
4
5 #include "scan.h"
6
7 #define NEED_MORE 0
8
9 int scanner_init(struct scanner *s) {
10         if (!s->free) {
11                 s->free = free;
12         } 
13         return 1;
14 }
15
16 int scan_json_file(FILE *input) {
17         return 1;
18 }
19
20 char *skip_ws(char *s) {
21         while (*s && isspace(*s)) {
22                 s++;
23         }
24         return s;
25 }
26
27 #define KW_TRUE 1
28 #define KW_FALSE 2
29 #define KW_NULL 3
30
31 /* TODO check for illegal json characters */
32 static void add_unicode(struct token *t, unsigned unicode) {
33         /* TODO check string length */
34         if (unicode < 0x80) {
35                 t->str[t->len++] = unicode;
36         } else if (unicode <= 0x7FF) {
37                 t->str[t->len++] = (unicode >> 6) + 0xC0;
38                 t->str[t->len++] = (unicode & 0x3F) + 0x80;
39         } else if (unicode <= 0xFFFF) {
40                 t->str[t->len++] = (unicode >> 12) + 0xE0;
41                 t->str[t->len++] = ((unicode >> 6) & 0x3F) + 0x80;
42                 t->str[t->len++] = (unicode & 0x3F) + 0x80;
43         } else if (unicode <= 0x10FFFF) {
44                 t->str[t->len++] = (unicode >> 18) + 0xF0;
45                 t->str[t->len++] = ((unicode >> 12) & 0x3F) + 0x80;
46                 t->str[t->len++] = ((unicode >> 6) & 0x3F) + 0x80;
47                 t->str[t->len++] = (unicode & 0x3F) + 0x80;
48         } else {
49                 t->error = TOKENERROR;
50         }
51 }
52
53 static int hexval(int ch) {
54         if (ch >= '0' && ch <= '9') {
55                 return ch - '0';
56         }
57         switch (ch) {
58                 case 'a':
59                 case 'A':
60                         return 10;
61                 case 'b':
62                 case 'B':
63                         return 11;
64                 case 'c':
65                 case 'C':
66                         return 12;
67                 case 'd':
68                 case 'D':
69                         return 13;
70                 case 'e':
71                 case 'E':
72                         return 14;
73                 case 'f':
74                 case 'F':
75                         return 15;
76         }
77         return -1;
78 }
79
80 int add_str_ch(int ch, struct token *t) {
81         if (t->state == 2) {
82
83         }
84         switch (t->state) {
85                 case 1: switch (ch) {
86                         case '\\':
87                         case '/':
88                         case '"':
89                                 t->str[t->len++] = ch;
90                                 break;
91                         case 'b':
92                                 t->str[t->len++] = 8;
93                                 break;
94                         case 'f':
95                                 t->str[t->len++] = 0xC;
96                                 break;
97                         case 'n':
98                                 t->str[t->len++] = 0xA;
99                                 break;
100                         case 'r':
101                                 t->str[t->len++] = 0xD;
102                                 break;
103                         case 't':
104                                 t->str[t->len++] = 0x9;
105                                 break;
106                         case 'u':
107                                 t->state = 2;
108                                 t->unicode = 0;
109                                 break;
110                                 /* TODO handle unicode */
111                         }
112                 break;
113
114                 case 2:
115                 case 3:
116                 case 4:
117                 case 5:
118                 if (isxdigit(ch)) {
119                         t->unicode = t->unicode << 4 | hexval(ch);
120
121                         t->state++;
122                         if (t->state == 6) {
123                                 add_unicode(t, t->unicode);
124                                 t->state = 0;
125                         }
126                 } else {
127                         t->error = TOKENERROR;
128                         return 0;
129                 }
130                 break;
131
132                 case 0: switch (ch) {
133                         case '"':
134                                 t->type = STRING;
135                                 return STRING;
136                         case '\\':
137                                 t->state = 1;
138                                 break;
139                         default:
140                                 t->state = 0;
141                                 t->str[t->len++] = ch;
142                                 break;
143                         }
144                 break;
145         }
146         return 0;
147 }
148
149 static int digitclass(int ch) {
150         switch (ch) {
151                 case '.':
152                 case '0':
153                 case '1':
154                 case '+':
155                 case '-':
156                 case 'e':
157                         return ch;
158                 case 'E': return 'e';
159         }
160         if (isdigit(ch)) {
161                 return '1';
162         }
163         return 0;
164 }
165
166 static int start_num(int ch, struct token *t) {
167         int class;
168         class = digitclass(ch);
169         switch (class) {
170                 case '-': t->state = 1; break;
171                 case '0': t->state = 2; break;
172                 case '1': t->state = 3; break;
173                 default: t->state = 0;break; /* TODO error */
174         }
175         t->len = 1;
176         t->str[0] = ch;
177         return 0;
178 }
179
180 static int add_str_num(int ch, struct token *t) {
181         int class;
182         class = digitclass(ch);
183         switch (t->state) {
184                 case 1: /* leading hyphen */
185                         switch (class) {
186                                 case '0': t->state = 2; break;
187                                 case '1': t->state = 3; break;
188                                 default: t->error = NUMERROR;
189                         }
190                         break;
191                 case 2: /* integral leading zero */
192                         switch (class) {
193                                 case '.': t->state = 5; break;
194                                 case 'e': t->state = 7; break;
195                                 case 0: t->state = 0; break;
196                                 default: t->error = NUMERROR;
197                         }
198                         break;
199                 case 3: /* integral leading 1-9 */
200                 case 4: /* integer part digits */
201                         switch (class) {
202                                 case '.': t->state = 5; break;
203                                 case 'e': t->state = 7; break;
204                                 case '0':
205                                 case '1': t->state = 4; break;
206                                 case 0: t->state = 0; break;
207                                 default: t->error = NUMERROR;
208                         }
209                         break;
210                 case 5: /* integer / fraction separating dot */
211                         switch (class) {
212                                 case '0':
213                                 case '1': t->state = 6; break;
214                                 default: t->error = NUMERROR;
215                         }
216                         break;
217                 case 6: /* fractional digit */
218                         switch (class) {
219                                 case '0':
220                                 case '1': t->state = 6; break;
221                                 case 'e': t->state = 7; break;
222                                 case 0: t->state = 0; break;
223                                 default: t->error = NUMERROR;
224                         }
225                         break;
226                 case 7: /* exponent e or E */
227                         switch (class) {
228                                 case '-':
229                                 case '+': t->state = 8; break;
230                                 case '0':
231                                 case '1': t->state = 9; break;
232                                 default: t->error = NUMERROR;
233                         }
234                         break;
235                 case 8: /* exponent plus or minus */
236                 case 9: /* exponent digit */
237                         switch (class) {
238                                 case '0':
239                                 case '1': t->state = 9; break;
240                                 case 0: t->state = 0; break;
241                                 default: t->error = NUMERROR;
242                         }
243                         break;
244         }
245
246         if (t->error) {
247                 return 0;
248         }
249
250         if (t->state) {
251                 t->str[t->len++] = ch;
252                 return 0;
253         }
254
255         t->ch = ch;
256         t->type = NUMBER;
257         return NUMBER;
258
259 }
260
261 int checklit(char *lit, int ch, struct token *t) {
262         size_t len;
263         len = strlen(lit);
264 //      printf("checklit(%s, %c, %d)\n", lit, ch, t->len);
265         if (ch == lit[t->len]) {
266                 t->len++;
267         } else {
268                 t->error = TOKENERROR;
269                 return 0;
270         }
271         if (len == t->len) {
272                 return t->maybe;
273         }
274         return 0;
275
276 #if 0
277         if (t->len == len && !isalnum(ch)) {
278                 t->ch = ch;
279                 return t->maybe;
280         } else if (t->len < len && ch == lit[t->len]) {
281                 t->len++;
282                 return 0;
283         }
284         return -t->maybe;
285 #endif
286
287 }
288
289 /* returns true if we finish a token */
290 int scan_json_ch(int ch, struct token *t) {
291         t->ch = 0;
292         switch (t->type) {
293                 case PSTRING: return add_str_ch(ch, t);
294                 case PNUMBER: return add_str_num(ch, t);
295                 case PTRUE: return checklit("true", ch, t);
296                 case PFALSE: return checklit("false", ch, t);
297                 case PNULL: return checklit("null", ch, t);
298         }
299
300         if (isspace(ch)) {
301                 return 0;
302         }
303
304         switch (ch) {
305                 case '{': t->type = LBRACE; return LBRACE; break;
306                 case '}': t->type = RBRACE; return RBRACE; break;
307                 case '[': t->type = LBRACKET; return LBRACKET; break;
308                 case ']': t->type = RBRACKET; return RBRACKET; break;
309                 case ':': t->type = COLON; return COLON; break;
310                 case ',': t->type = COMMA; return COMMA; break;
311                 case '"': t->type = PSTRING;
312                           t->len = 0;
313                           t->state = 0; /* not \ or \u */
314                           t->str[0] = 0;
315                           return 0;
316                 case 't': t->type = PTRUE;
317                           t->maybe = LITTRUE;
318                           t->len = 1;
319                           return 0;
320                 case 'f': t->type = PFALSE;
321                           t->maybe = LITFALSE;
322                           t->len = 1;
323                           return 0;
324                 case 'n': t->type = PNULL;
325                           t->maybe = LITNULL;
326                           t->len = 1;
327                           return 0;
328                 case '.':
329                 case '-':
330                           t->type = PNUMBER;
331                           t->len = 0;
332                           return start_num(ch, t);
333         }
334         
335         if (isdigit(ch)) {
336                 t->type = PNUMBER;
337                 t->len = 0;
338                 return start_num(ch, t);
339         }
340
341         /* error */
342         t->error = TOKENERROR;
343         return 0;
344 }