#include #include #include #include "scan.h" #define NEED_MORE 0 int scanner_init(struct scanner *s) { if (!s->free) { s->free = free; } return 1; } int scan_json_file(FILE *input) { return 1; } char *skip_ws(char *s) { while (*s && isspace(*s)) { s++; } return s; } #define KW_TRUE 1 #define KW_FALSE 2 #define KW_NULL 3 /* TODO check for illegal json characters */ static void add_unicode(struct token *t, unsigned unicode) { /* TODO check string length */ if (unicode < 0x80) { t->str[t->len++] = unicode; } else if (unicode <= 0x7FF) { t->str[t->len++] = (unicode >> 6) + 0xC0; t->str[t->len++] = (unicode & 0x3F) + 0x80; } else if (unicode <= 0xFFFF) { t->str[t->len++] = (unicode >> 12) + 0xE0; t->str[t->len++] = ((unicode >> 6) & 0x3F) + 0x80; t->str[t->len++] = (unicode & 0x3F) + 0x80; } else if (unicode <= 0x10FFFF) { t->str[t->len++] = (unicode >> 18) + 0xF0; t->str[t->len++] = ((unicode >> 12) & 0x3F) + 0x80; t->str[t->len++] = ((unicode >> 6) & 0x3F) + 0x80; t->str[t->len++] = (unicode & 0x3F) + 0x80; } else { t->error = TOKENERROR; } } static int hexval(int ch) { if (ch >= '0' && ch <= '9') { return ch - '0'; } switch (ch) { case 'a': case 'A': return 10; case 'b': case 'B': return 11; case 'c': case 'C': return 12; case 'd': case 'D': return 13; case 'e': case 'E': return 14; case 'f': case 'F': return 15; } return -1; } int add_str_ch(int ch, struct token *t) { if (t->state == 2) { } switch (t->state) { case 1: switch (ch) { case '\\': case '/': case '"': t->str[t->len++] = ch; break; case 'b': t->str[t->len++] = 8; break; case 'f': t->str[t->len++] = 0xC; break; case 'n': t->str[t->len++] = 0xA; break; case 'r': t->str[t->len++] = 0xD; break; case 't': t->str[t->len++] = 0x9; break; case 'u': t->state = 2; t->unicode = 0; break; /* TODO handle unicode */ } break; case 2: case 3: case 4: case 5: if (isxdigit(ch)) { t->unicode = t->unicode << 4 | hexval(ch); t->state++; if (t->state == 6) { add_unicode(t, t->unicode); t->state = 0; } } else { t->error = TOKENERROR; return 0; } break; case 0: switch (ch) { case '"': t->type = STRING; return STRING; case '\\': t->state = 1; break; default: t->state = 0; t->str[t->len++] = ch; break; } break; } return 0; } static int digitclass(int ch) { switch (ch) { case '.': case '0': case '1': case '+': case '-': case 'e': return ch; case 'E': return 'e'; } if (isdigit(ch)) { return '1'; } return 0; } static int start_num(int ch, struct token *t) { int class; class = digitclass(ch); switch (class) { case '-': t->state = 1; break; case '0': t->state = 2; break; case '1': t->state = 3; break; default: t->state = 0;break; /* TODO error */ } t->len = 1; t->str[0] = ch; return 0; } static int add_str_num(int ch, struct token *t) { int class; class = digitclass(ch); switch (t->state) { case 1: /* leading hyphen */ switch (class) { case '0': t->state = 2; break; case '1': t->state = 3; break; default: t->error = NUMERROR; } break; case 2: /* integral leading zero */ switch (class) { case '.': t->state = 5; break; case 'e': t->state = 7; break; case 0: t->state = 0; break; default: t->error = NUMERROR; } break; case 3: /* integral leading 1-9 */ case 4: /* integer part digits */ switch (class) { case '.': t->state = 5; break; case 'e': t->state = 7; break; case '0': case '1': t->state = 4; break; case 0: t->state = 0; break; default: t->error = NUMERROR; } break; case 5: /* integer / fraction separating dot */ switch (class) { case '0': case '1': t->state = 6; break; default: t->error = NUMERROR; } break; case 6: /* fractional digit */ switch (class) { case '0': case '1': t->state = 6; break; case 'e': t->state = 7; break; case 0: t->state = 0; break; default: t->error = NUMERROR; } break; case 7: /* exponent e or E */ switch (class) { case '-': case '+': t->state = 8; break; case '0': case '1': t->state = 9; break; default: t->error = NUMERROR; } break; case 8: /* exponent plus or minus */ case 9: /* exponent digit */ switch (class) { case '0': case '1': t->state = 9; break; case 0: t->state = 0; break; default: t->error = NUMERROR; } break; } if (t->error) { return 0; } if (t->state) { t->str[t->len++] = ch; return 0; } t->ch = ch; t->type = NUMBER; return NUMBER; } int checklit(char *lit, int ch, struct token *t) { size_t len; len = strlen(lit); // printf("checklit(%s, %c, %d)\n", lit, ch, t->len); if (ch == lit[t->len]) { t->len++; } else { t->error = TOKENERROR; return 0; } if (len == t->len) { return t->maybe; } return 0; #if 0 if (t->len == len && !isalnum(ch)) { t->ch = ch; return t->maybe; } else if (t->len < len && ch == lit[t->len]) { t->len++; return 0; } return -t->maybe; #endif } /* returns true if we finish a token */ int scan_json_ch(int ch, struct token *t) { t->ch = 0; switch (t->type) { case PSTRING: return add_str_ch(ch, t); case PNUMBER: return add_str_num(ch, t); case PTRUE: return checklit("true", ch, t); case PFALSE: return checklit("false", ch, t); case PNULL: return checklit("null", ch, t); } if (isspace(ch)) { return 0; } switch (ch) { case '{': t->type = LBRACE; return LBRACE; break; case '}': t->type = RBRACE; return RBRACE; break; case '[': t->type = LBRACKET; return LBRACKET; break; case ']': t->type = RBRACKET; return RBRACKET; break; case ':': t->type = COLON; return COLON; break; case ',': t->type = COMMA; return COMMA; break; case '"': t->type = PSTRING; t->len = 0; t->state = 0; /* not \ or \u */ t->str[0] = 0; return 0; case 't': t->type = PTRUE; t->maybe = LITTRUE; t->len = 1; return 0; case 'f': t->type = PFALSE; t->maybe = LITFALSE; t->len = 1; return 0; case 'n': t->type = PNULL; t->maybe = LITNULL; t->len = 1; return 0; case '.': case '-': t->type = PNUMBER; t->len = 0; return start_num(ch, t); } if (isdigit(ch)) { t->type = PNUMBER; t->len = 0; return start_num(ch, t); } /* error */ t->error = TOKENERROR; return 0; }