]> pd.if.org Git - lice/commitdiff
autocommit for files dated 2014-11-17 20:13:28
authorunknown <>
Mon, 17 Nov 2014 20:13:28 +0000 (20:13 +0000)
committerNathan Wagner <nw@hydaspes.if.org>
Tue, 25 Oct 2016 16:29:31 +0000 (16:29 +0000)
ast.c [new file with mode: 0644]
conv.c [new file with mode: 0644]
decl.c [new file with mode: 0644]
gen.c [new file with mode: 0644]
gen_amd64.c [new file with mode: 0644]
init.c [new file with mode: 0644]
lexer.c [new file with mode: 0644]

diff --git a/ast.c b/ast.c
new file mode 100644 (file)
index 0000000..38204fe
--- /dev/null
+++ b/ast.c
@@ -0,0 +1,804 @@
+#include <stdlib.h>
+#include <string.h>
+#include <setjmp.h>
+
+#include "lice.h"
+#include "ast.h"
+#include "lexer.h"
+#include "conv.h"
+
+data_type_t *ast_data_table[AST_DATA_COUNT] = {
+    &(data_type_t) { TYPE_VOID,    0,                      true },   /* void                */
+    &(data_type_t) { TYPE_BOOL,    ARCH_TYPE_SIZE_INT,     false},   /* _Bool               */
+    &(data_type_t) { TYPE_LONG,    ARCH_TYPE_SIZE_LONG,    true },   /* long                */
+    &(data_type_t) { TYPE_LLONG,   ARCH_TYPE_SIZE_LLONG,   true },   /* long long           */
+    &(data_type_t) { TYPE_INT,     ARCH_TYPE_SIZE_INT,     true },   /* int                 */
+    &(data_type_t) { TYPE_SHORT,   ARCH_TYPE_SIZE_SHORT,   true },   /* short               */
+    &(data_type_t) { TYPE_CHAR,    ARCH_TYPE_SIZE_CHAR,    true },   /* char                */
+    &(data_type_t) { TYPE_FLOAT,   ARCH_TYPE_SIZE_FLOAT,   true },   /* float               */
+    &(data_type_t) { TYPE_DOUBLE,  ARCH_TYPE_SIZE_DOUBLE,  true },   /* double              */
+    &(data_type_t) { TYPE_LDOUBLE, ARCH_TYPE_SIZE_LDOUBLE, true },   /* long double         */
+    &(data_type_t) { TYPE_LONG,    ARCH_TYPE_SIZE_LONG,    false },  /* unsigned long       */
+    &(data_type_t) { TYPE_LLONG,   ARCH_TYPE_SIZE_LLONG,   false },  /* unsigned long long  */
+    NULL                                                             /* function            */
+};
+
+data_type_t *ast_data_function = NULL;
+list_t      *ast_locals        = NULL;
+list_t      *ast_gotos         = NULL;
+table_t     *ast_labels        = NULL;
+table_t     *ast_globalenv     = &SENTINEL_TABLE;
+table_t     *ast_localenv      = &SENTINEL_TABLE;
+table_t     *ast_structures    = &SENTINEL_TABLE;
+table_t     *ast_unions        = &SENTINEL_TABLE;
+
+bool ast_struct_compare(data_type_t *a, data_type_t *b) {
+    list_t          *la;
+    list_t          *lb;
+    list_iterator_t *lait;
+    list_iterator_t *lbit;
+
+    if (a->type != b->type)
+        return false;
+
+    switch (a->type) {
+        case TYPE_ARRAY:
+            if (a->length == b->length)
+                return ast_struct_compare(a->pointer, b->pointer);
+            return false;
+
+        case TYPE_POINTER:
+            return ast_struct_compare(a->pointer, b->pointer);
+
+        case TYPE_STRUCTURE:
+            if (a->isstruct != b->isstruct)
+                return false;
+
+            la = table_keys(a->fields);
+            lb = table_keys(b->fields);
+
+            if (list_length(la) != list_length(lb))
+                return false;
+
+            lait = list_iterator(la);
+            lbit = list_iterator(lb);
+
+            while (!list_iterator_end(lait))
+                if (!ast_struct_compare(list_iterator_next(lait), list_iterator_next(lbit)))
+                    return false;
+
+        default:
+            return true;
+    }
+    return false;
+}
+
+data_type_t *ast_result_type(int operation, data_type_t *type) {
+    switch (operation) {
+        case AST_TYPE_LEQUAL:
+        case AST_TYPE_GEQUAL:
+        case AST_TYPE_EQUAL:
+        case AST_TYPE_NEQUAL:
+        case '<':
+        case '>':
+            return ast_data_table[AST_DATA_INT];
+        default:
+            return conv_senority(type, type);
+    }
+}
+
+ast_t *ast_copy(ast_t *ast) {
+    ast_t *copy = memory_allocate(sizeof(ast_t));
+    *copy = *ast;
+    return copy;
+}
+
+ast_t *ast_structure_reference(data_type_t *type, ast_t *structure, char *name) {
+    return ast_copy(&(ast_t) {
+        .type       = AST_TYPE_STRUCT,
+        .ctype      = type,
+        .structure  = structure,
+        .field      = name
+    });
+}
+
+ast_t *ast_new_unary(int type, data_type_t *data, ast_t *operand) {
+    return ast_copy(&(ast_t) {
+        .type          = type,
+        .ctype         = data,
+        .unary.operand = operand
+    });
+}
+
+ast_t *ast_new_binary(data_type_t *ctype, int type, ast_t *left, ast_t *right) {
+    ast_t *ast = ast_copy(&(ast_t){
+        .type  = type,
+        .ctype = ctype
+    });
+    ast->left  = left;
+    ast->right = right;
+    return ast;
+}
+
+ast_t *ast_new_integer(data_type_t *type, int value) {
+    return ast_copy(&(ast_t) {
+        .type    = AST_TYPE_LITERAL,
+        .ctype   = type,
+        .integer = value
+    });
+}
+
+ast_t *ast_new_floating(data_type_t *type, double value) {
+    return ast_copy(&(ast_t){
+        .type           = AST_TYPE_LITERAL,
+        .ctype          = type,
+        .floating.value = value,
+        .floating.label = NULL
+    });
+}
+
+ast_t *ast_new_string(char *value) {
+    return ast_copy(&(ast_t) {
+        .type         = AST_TYPE_STRING,
+        .ctype        = ast_array(ast_data_table[AST_DATA_CHAR], strlen(value) + 1),
+        .string.data  = value,
+        .string.label = NULL
+    });
+}
+
+ast_t *ast_variable_local(data_type_t *type, char *name) {
+    ast_t *ast = ast_copy(&(ast_t){
+        .type          = AST_TYPE_VAR_LOCAL,
+        .ctype         = type,
+        .variable.name = name
+    });
+    if (ast_localenv)
+        table_insert(ast_localenv, name, ast);
+    if (ast_locals)
+        list_push(ast_locals, ast);
+    return ast;
+}
+
+ast_t *ast_variable_global(data_type_t *type, char *name) {
+    ast_t *ast = ast_copy(&(ast_t){
+        .type           = AST_TYPE_VAR_GLOBAL,
+        .ctype          = type,
+        .variable.name  = name,
+        .variable.label = name
+    });
+    table_insert(ast_globalenv, name, ast);
+    return ast;
+}
+
+ast_t *ast_function(data_type_t *ret, char *name, list_t *params, ast_t *body, list_t *locals) {
+    return ast_copy(&(ast_t) {
+        .type             = AST_TYPE_FUNCTION,
+        .ctype            = ret,
+        .function.name    = name,
+        .function.params  = params,
+        .function.locals  = locals,
+        .function.body    = body
+    });
+}
+
+ast_t *ast_designator(char *name, ast_t *func) {
+    return ast_copy(&(ast_t){
+        .type                          = AST_TYPE_DESIGNATOR,
+        .ctype                         = ast_data_table[AST_DATA_VOID],
+        .function.name                 = name,
+        .function.call.functionpointer = func
+    });
+}
+
+ast_t *ast_pointercall(ast_t *functionpointer, list_t *args) {
+    return ast_copy(&(ast_t) {
+        .type                          = AST_TYPE_POINTERCALL,
+        .ctype                         = functionpointer->ctype->pointer->returntype,
+        .function.call.functionpointer = functionpointer,
+        .function.call.args            = args
+    });
+}
+
+ast_t *ast_call(data_type_t *type, char *name, list_t *arguments) {
+    return ast_copy(&(ast_t) {
+        .type               = AST_TYPE_CALL,
+        .ctype              = type->returntype,
+        .function.call.args = arguments,
+        .function.call.type = type,
+        .function.name      = name,
+    });
+}
+
+ast_t *ast_va_start(ast_t *ap) {
+    return ast_copy(&(ast_t){
+        .type  = AST_TYPE_VA_START,
+        .ctype = ast_data_table[AST_DATA_VOID],
+        .ap    = ap
+    });
+}
+
+ast_t *ast_va_arg(data_type_t *type, ast_t *ap) {
+    return ast_copy(&(ast_t){
+        .type  = AST_TYPE_VA_ARG,
+        .ctype = type,
+        .ap    = ap
+    });
+}
+
+ast_t *ast_declaration(ast_t *var, list_t *init) {
+    return ast_copy(&(ast_t) {
+        .type      = AST_TYPE_DECLARATION,
+        .ctype     = NULL,
+        .decl.var  = var,
+        .decl.init = init,
+    });
+}
+
+ast_t *ast_initializer(ast_t *value, data_type_t *to, int offset) {
+    return ast_copy(&(ast_t){
+        .type          = AST_TYPE_INITIALIZER,
+        .init.value    = value,
+        .init.offset   = offset,
+        .init.type     = to
+    });
+}
+
+ast_t *ast_ternary(data_type_t *type, ast_t *cond, ast_t *then, ast_t *last) {
+    return ast_copy(&(ast_t){
+        .type         = AST_TYPE_EXPRESSION_TERNARY,
+        .ctype        = type,
+        .ifstmt.cond  = cond,
+        .ifstmt.then  = then,
+        .ifstmt.last  = last
+    });
+}
+
+static ast_t *ast_for_intermediate(int type, ast_t *init, ast_t *cond, ast_t *step, ast_t *body) {
+    return ast_copy(&(ast_t){
+        .type         = type,
+        .ctype        = NULL,
+        .forstmt.init = init,
+        .forstmt.cond = cond,
+        .forstmt.step = step,
+        .forstmt.body = body
+    });
+}
+
+ast_t *ast_switch(ast_t *expr, ast_t *body) {
+    return ast_copy(&(ast_t){
+        .type            = AST_TYPE_STATEMENT_SWITCH,
+        .switchstmt.expr = expr,
+        .switchstmt.body = body
+    });
+}
+
+ast_t *ast_case(int begin, int end) {
+    return ast_copy(&(ast_t){
+        .type    = AST_TYPE_STATEMENT_CASE,
+        .casebeg = begin,
+        .caseend = end
+    });
+}
+
+ast_t *ast_make(int type) {
+    return ast_copy(&(ast_t){
+        .type = type
+    });
+}
+
+ast_t *ast_if(ast_t *cond, ast_t *then, ast_t *last) {
+    return ast_copy(&(ast_t){
+        .type        = AST_TYPE_STATEMENT_IF,
+        .ctype       = NULL,
+        .ifstmt.cond = cond,
+        .ifstmt.then = then,
+        .ifstmt.last = last
+    });
+}
+
+ast_t *ast_for(ast_t *init, ast_t *cond, ast_t *step, ast_t *body) {
+    return ast_for_intermediate(AST_TYPE_STATEMENT_FOR, init, cond, step, body);
+}
+ast_t *ast_while(ast_t *cond, ast_t *body) {
+    return ast_for_intermediate(AST_TYPE_STATEMENT_WHILE, NULL, cond, NULL, body);
+}
+ast_t *ast_do(ast_t *cond, ast_t *body) {
+    return ast_for_intermediate(AST_TYPE_STATEMENT_DO, NULL, cond, NULL, body);
+}
+
+ast_t *ast_goto(char *label) {
+    return ast_copy(&(ast_t){
+        .type           = AST_TYPE_STATEMENT_GOTO,
+        .gotostmt.label = label,
+        .gotostmt.where = NULL
+    });
+}
+
+ast_t *ast_new_label(char *label) {
+    return ast_copy(&(ast_t){
+        .type           = AST_TYPE_STATEMENT_LABEL,
+        .gotostmt.label = label,
+        .gotostmt.where = NULL
+    });
+}
+
+ast_t *ast_return(ast_t *value) {
+    return ast_copy(&(ast_t){
+        .type       = AST_TYPE_STATEMENT_RETURN,
+        .returnstmt = value
+    });
+}
+
+ast_t *ast_compound(list_t *statements) {
+    return ast_copy(&(ast_t){
+        .type     = AST_TYPE_STATEMENT_COMPOUND,
+        .ctype    = NULL,
+        .compound = statements
+    });
+}
+
+data_type_t *ast_structure_new(table_t *fields, int size, bool isstruct) {
+    return ast_type_copy(&(data_type_t) {
+        .type     = TYPE_STRUCTURE,
+        .size     = size,
+        .fields   = fields,
+        .isstruct = isstruct
+    });
+}
+
+char *ast_label(void) {
+    static int index = 0;
+    string_t *string = string_create();
+    string_catf(string, ".L%d", index++);
+    return string_buffer(string);
+}
+
+ast_t *ast_label_address(char *label) {
+    return ast_copy(&(ast_t){
+        .type           = AST_TYPE_STATEMENT_LABEL_COMPUTED,
+        .ctype          = ast_pointer(ast_data_table[AST_DATA_VOID]),
+        .gotostmt.label = label
+    });
+}
+
+ast_t *ast_goto_computed(ast_t *expression) {
+    return ast_copy(&(ast_t){
+        .type          = AST_TYPE_STATEMENT_GOTO_COMPUTED,
+        .unary.operand = expression
+    });
+}
+
+bool ast_type_isinteger(data_type_t *type) {
+    switch (type->type) {
+        case TYPE_BOOL:
+        case TYPE_CHAR:
+        case TYPE_SHORT:
+        case TYPE_INT:
+        case TYPE_LONG:
+        case TYPE_LLONG:
+            return true;
+        default:
+            return false;
+    }
+}
+
+bool ast_type_isfloating(data_type_t *type) {
+    switch (type->type) {
+        case TYPE_FLOAT:
+        case TYPE_DOUBLE:
+        case TYPE_LDOUBLE:
+            return true;
+        default:
+            return false;
+    }
+}
+
+bool ast_type_isstring(data_type_t *type) {
+    return type->type == TYPE_ARRAY && type->pointer->type == TYPE_CHAR;
+}
+
+data_type_t *ast_type_copy(data_type_t *type) {
+    return memcpy(memory_allocate(sizeof(data_type_t)), type, sizeof(data_type_t));
+}
+
+data_type_t *ast_type_copy_incomplete(data_type_t *type) {
+    if (!type)
+        return NULL;
+    return (type->length == -1)
+                ? ast_type_copy(type)
+                : type;
+}
+
+data_type_t *ast_type_create(type_t type, bool sign) {
+
+    data_type_t *t = memory_allocate(sizeof(data_type_t));
+
+    t->type = type;
+    t->sign = sign;
+
+    switch (type) {
+        case TYPE_VOID:    t->size = 0;                      break;
+        case TYPE_BOOL:    t->size = ARCH_TYPE_SIZE_INT;     break;
+        case TYPE_CHAR:    t->size = ARCH_TYPE_SIZE_CHAR;    break;
+        case TYPE_SHORT:   t->size = ARCH_TYPE_SIZE_SHORT;   break;
+        case TYPE_INT:     t->size = ARCH_TYPE_SIZE_INT;     break;
+        case TYPE_LONG:    t->size = ARCH_TYPE_SIZE_LONG;    break;
+        case TYPE_LLONG:   t->size = ARCH_TYPE_SIZE_LLONG;   break;
+        case TYPE_FLOAT:   t->size = ARCH_TYPE_SIZE_FLOAT;   break;
+        case TYPE_DOUBLE:  t->size = ARCH_TYPE_SIZE_DOUBLE;  break;
+        case TYPE_LDOUBLE: t->size = ARCH_TYPE_SIZE_LDOUBLE; break;
+        default:
+            compile_error("ICE");
+    }
+
+    return t;
+}
+
+data_type_t *ast_type_stub(void) {
+    return ast_type_copy(&(data_type_t) {
+        .type = TYPE_CDECL,
+        .size = 0
+    });
+}
+
+ast_t *ast_type_convert(data_type_t *type, ast_t *ast) {
+    return ast_copy(&(ast_t){
+        .type          = AST_TYPE_CONVERT,
+        .ctype         = type,
+        .unary.operand = ast
+    });
+}
+
+data_type_t *ast_prototype(data_type_t *returntype, list_t *paramtypes, bool dots) {
+    return ast_type_copy(&(data_type_t){
+        .type       = TYPE_FUNCTION,
+        .returntype = returntype,
+        .parameters = paramtypes,
+        .hasdots    = dots
+    });
+}
+
+data_type_t *ast_array(data_type_t *type, int length) {
+    return ast_type_copy(&(data_type_t){
+        .type    = TYPE_ARRAY,
+        .pointer = type,
+        .size    = (length < 0) ? -1 : type->size * length,
+        .length  = length
+    });
+}
+
+data_type_t *ast_array_convert(data_type_t *type) {
+    if (type->type != TYPE_ARRAY)
+        return type;
+    return ast_pointer(type->pointer);
+}
+
+ast_t *ast_designator_convert(ast_t *ast) {
+    if (!ast)
+        return NULL;
+    if (ast->type == AST_TYPE_DESIGNATOR) {
+        return ast_new_unary(
+                    AST_TYPE_ADDRESS,
+                    ast_pointer(ast->function.call.functionpointer->function.call.type),
+                    ast->function.call.functionpointer
+        );
+    }
+
+    return ast;
+}
+
+data_type_t *ast_pointer(data_type_t *type) {
+    return ast_type_copy(&(data_type_t){
+        .type    = TYPE_POINTER,
+        .pointer = type,
+        .size    = ARCH_TYPE_SIZE_POINTER
+    });
+}
+
+const char *ast_type_string(data_type_t *type) {
+    string_t *string;
+
+    switch (type->type) {
+        case TYPE_VOID:     return "void";
+        case TYPE_BOOL:     return "_Bool";
+        case TYPE_INT:      return "int";
+        case TYPE_CHAR:     return "char";
+        case TYPE_LONG:     return "long";
+        case TYPE_LLONG:    return "long long";
+        case TYPE_SHORT:    return "short";
+        case TYPE_FLOAT:    return "float";
+        case TYPE_DOUBLE:   return "double";
+        case TYPE_LDOUBLE:  return "long double";
+
+        case TYPE_FUNCTION:
+            string = string_create();
+            string_cat(string, '(');
+            for (list_iterator_t *it = list_iterator(type->parameters); !list_iterator_end(it); ) {
+                data_type_t *next = list_iterator_next(it);
+                string_catf(string, "%s", ast_type_string(next));
+                if (!list_iterator_end(it))
+                    string_cat(string, ',');
+            }
+            string_catf(string, ") -> %s", ast_type_string(type->returntype));
+            return string_buffer(string);
+
+        case TYPE_POINTER:
+            string = string_create();
+            string_catf(string, "%s*", ast_type_string(type->pointer));
+            return string_buffer(string);
+
+        case TYPE_ARRAY:
+            string = string_create();
+            string_catf(
+                string,
+                "%s[%d]",
+                ast_type_string(type->pointer),
+                type->length
+            );
+            return string_buffer(string);
+
+        case TYPE_STRUCTURE:
+            string = string_create();
+            string_catf(string, "(struct");
+            for (list_iterator_t *it = list_iterator(table_values(type->fields)); !list_iterator_end(it); ) {
+                data_type_t *ftype = list_iterator_next(it);
+                if (ftype->bitfield.size < 0) {
+                    string_catf(string, " (%s)", ast_type_string(ftype));
+                } else {
+                    string_catf(
+                        string,
+                        "(%s:%d:%d)",
+                        ast_type_string(ftype),
+                        ftype->bitfield.offset,
+                        ftype->bitfield.offset + ftype->bitfield.size
+                    );
+                }
+            }
+            string_cat(string, ')');
+            return string_buffer(string);
+
+        default:
+            break;
+    }
+    return NULL;
+}
+
+static void ast_string_unary(string_t *string, const char *op, ast_t *ast) {
+    string_catf(string, "(%s %s)", op, ast_string(ast->unary.operand));
+}
+
+static void ast_string_binary(string_t *string, const char *op, ast_t *ast) {
+    string_catf(string, "(%s %s %s)", op, ast_string(ast->left), ast_string(ast->right));
+}
+
+static void ast_string_initialization_declaration(string_t *string, list_t *initlist) {
+    if (!initlist)
+        return;
+
+    for (list_iterator_t *it = list_iterator(initlist); !list_iterator_end(it); ) {
+        ast_t *init = list_iterator_next(it);
+        string_catf(string, "%s", ast_string(init));
+        if (!list_iterator_end(it))
+            string_cat(string, ' ');
+    }
+}
+
+static void ast_string_impl(string_t *string, ast_t *ast) {
+    char *left  = NULL;
+    char *right = NULL;
+
+    if (!ast) {
+        string_catf(string, "(null)");
+        return;
+    }
+
+    switch (ast->type) {
+        case AST_TYPE_LITERAL:
+            switch (ast->ctype->type) {
+                case TYPE_INT:
+                case TYPE_SHORT:
+                    string_catf(string, "%d",   ast->integer);
+                    break;
+
+                case TYPE_FLOAT:
+                case TYPE_DOUBLE:
+                    string_catf(string, "%f",   ast->floating.value);
+                    break;
+
+                case TYPE_LONG:
+                    string_catf(string, "%ldL", ast->integer);
+                    break;
+
+                case TYPE_CHAR:
+                    if (ast->integer == '\n')
+                        string_catf(string, "'\n'");
+                    else if (ast->integer == '\\')
+                        string_catf(string, "'\\\\'");
+                    else if (ast->integer == '\0')
+                        string_catf(string, "'\\0'");
+                    else
+                        string_catf(string, "'%c'", ast->integer);
+                    break;
+
+                default:
+                    compile_ice("ast_string_impl");
+                    break;
+            }
+            break;
+
+        case AST_TYPE_STRING:
+            string_catf(string, "\"%s\"", string_quote(ast->string.data));
+            break;
+
+        case AST_TYPE_VAR_LOCAL:
+            string_catf(string, "%s", ast->variable.name);
+            if (ast->variable.init) {
+                string_cat(string, '(');
+                ast_string_initialization_declaration(string, ast->variable.init);
+                string_cat(string, ')');
+            }
+            break;
+
+        case AST_TYPE_VAR_GLOBAL:
+            string_catf(string, "%s", ast->variable.name);
+            break;
+
+        case AST_TYPE_CALL:
+        case AST_TYPE_POINTERCALL:
+            string_catf(string, "(%s)%s(", ast_type_string(ast->ctype),
+                (ast->type == AST_TYPE_CALL)
+                    ?ast->function.name
+                    : ast_string(ast)
+            );
+
+            for (list_iterator_t *it = list_iterator(ast->function.call.args); !list_iterator_end(it); ) {
+                string_catf(string, "%s", ast_string(list_iterator_next(it)));
+                if (!list_iterator_end(it))
+                    string_cat(string, ',');
+            }
+            string_cat(string, ')');
+            break;
+
+        case AST_TYPE_FUNCTION:
+            string_catf(string, "(%s)%s(", ast_type_string(ast->ctype), ast->function.name);
+            for (list_iterator_t *it = list_iterator(ast->function.params); !list_iterator_end(it); ) {
+                ast_t *param = list_iterator_next(it);
+                string_catf(string, "%s %s", ast_type_string(param->ctype), ast_string(param));
+                if (!list_iterator_end(it))
+                    string_cat(string, ',');
+            }
+            string_cat(string, ')');
+            ast_string_impl(string, ast->function.body);
+            break;
+
+        case AST_TYPE_DECLARATION:
+            string_catf(string, "(decl %s %s ",
+                    ast_type_string(ast->decl.var->ctype),
+                    ast->decl.var->variable.name
+            );
+            ast_string_initialization_declaration(string, ast->decl.init);
+            string_cat(string, ')');
+            break;
+
+        case AST_TYPE_INITIALIZER:
+            string_catf(string, "%s@%d", ast_string(ast->init.value), ast->init.offset);
+            break;
+
+        case AST_TYPE_CONVERT:
+            string_catf(string, "(convert %s -> %s)", ast_string(ast->unary.operand), ast_type_string(ast->ctype));
+            break;
+
+        case AST_TYPE_STATEMENT_COMPOUND:
+            string_cat(string, '{');
+            for (list_iterator_t *it = list_iterator(ast->compound); !list_iterator_end(it); ) {
+                ast_string_impl(string, list_iterator_next(it));
+                string_cat(string, ';');
+            }
+            string_cat(string, '}');
+            break;
+
+        case AST_TYPE_STRUCT:
+            ast_string_impl(string, ast->structure);
+            string_cat(string, '.');
+            string_catf(string, ast->field);
+            break;
+
+        case AST_TYPE_EXPRESSION_TERNARY:
+            string_catf(string, "(? %s %s %s)",
+                            ast_string(ast->ifstmt.cond),
+                            ast_string(ast->ifstmt.then),
+                            ast_string(ast->ifstmt.last)
+            );
+            break;
+
+        case AST_TYPE_STATEMENT_IF:
+            string_catf(string, "(if %s %s", ast_string(ast->ifstmt.cond), ast_string(ast->ifstmt.then));
+            if (ast->ifstmt.last)
+                string_catf(string, " %s", ast_string(ast->ifstmt.last));
+            string_cat(string, ')');
+            break;
+
+        case AST_TYPE_STATEMENT_FOR:
+            string_catf(string, "(for %s %s %s %s)",
+                ast_string(ast->forstmt.init),
+                ast_string(ast->forstmt.cond),
+                ast_string(ast->forstmt.step),
+                ast_string(ast->forstmt.body)
+            );
+            break;
+
+        case AST_TYPE_STATEMENT_WHILE:
+            string_catf(string, "(while %s %s)",
+                ast_string(ast->forstmt.cond),
+                ast_string(ast->forstmt.body)
+            );
+            break;
+
+        case AST_TYPE_STATEMENT_DO:
+            string_catf(string, "(do %s %s)",
+                ast_string(ast->forstmt.cond),
+                ast_string(ast->forstmt.body)
+            );
+            break;
+
+        case AST_TYPE_STATEMENT_RETURN:
+            string_catf(string, "(return %s)", ast_string(ast->returnstmt));
+            break;
+
+        case AST_TYPE_LRSHIFT:            ast_string_binary(string, ">>",    ast); break;
+        case AST_TYPE_ADDRESS:            ast_string_unary (string, "addr",  ast); break;
+        case AST_TYPE_DEREFERENCE:        ast_string_unary (string, "deref", ast); break;
+
+
+        case LEXER_TOKEN_COMPOUND_LSHIFT: ast_string_binary(string, "<<=",     ast); break;
+        case LEXER_TOKEN_COMPOUND_RSHIFT: ast_string_binary(string, ">>=",     ast); break;
+        case AST_TYPE_POST_INCREMENT:     ast_string_unary (string, "postinc", ast); break;
+        case AST_TYPE_POST_DECREMENT:     ast_string_unary (string, "postdec", ast); break;
+        case AST_TYPE_PRE_INCREMENT:      ast_string_unary (string, "preinc",  ast); break;
+        case AST_TYPE_PRE_DECREMENT:      ast_string_unary (string, "predec",  ast); break;
+        case AST_TYPE_NEGATE:             ast_string_unary (string, "negate",  ast); break;
+        case '!':                         ast_string_unary (string, "bitnot",  ast); break;
+        case '&':                         ast_string_binary(string, "bitand",  ast); break;
+        case '|':                         ast_string_binary(string, "bitor",   ast); break;
+        case AST_TYPE_AND:                ast_string_binary(string, "logand",  ast); break;
+        case AST_TYPE_OR:                 ast_string_binary(string, "logor",   ast); break;
+        case AST_TYPE_GEQUAL:             ast_string_binary(string, "gteq",    ast); break;
+        case AST_TYPE_LEQUAL:             ast_string_binary(string, "lteq",    ast); break;
+        case AST_TYPE_NEQUAL:             ast_string_binary(string, "ne",      ast); break;
+        case AST_TYPE_LSHIFT:             ast_string_binary(string, "lshift",  ast); break;
+        case AST_TYPE_RSHIFT:             ast_string_binary(string, "rshift",  ast); break;
+
+        case AST_TYPE_DESIGNATOR:
+            string_catf(string, "(designator %s)", ast_string(ast->function.call.functionpointer));
+            break;
+
+        case AST_TYPE_EXPRESSION_CAST:
+            string_catf(string, "((%s) -> (%s) %s)",
+                ast_type_string(ast->unary.operand->ctype),
+                ast_type_string(ast->ctype),
+                ast_string(ast->unary.operand)
+            );
+            break;
+
+        case AST_TYPE_STATEMENT_LABEL_COMPUTED:
+            string_catf(string, "(labeladdr %s)", ast->gotostmt.label);
+            break;
+
+        default:
+            if (!ast->left || !ast->right)
+                break;
+
+            left  = ast_string(ast->left);
+            right = ast_string(ast->right);
+            if (ast->type == LEXER_TOKEN_EQUAL)
+                string_catf(string, "(== %s %s)", left, right);
+            else
+                string_catf(string, "(%c %s %s)", ast->type, left, right);
+    }
+}
+
+char *ast_string(ast_t *ast) {
+    string_t *string = string_create();
+    ast_string_impl(string, ast);
+    return string_buffer(string);
+}
diff --git a/conv.c b/conv.c
new file mode 100644 (file)
index 0000000..31412de
--- /dev/null
+++ b/conv.c
@@ -0,0 +1,118 @@
+/*
+ * The complicated C rule set for type conversion. This is a full research
+ * oriented approach, run against the standard, and the tons of trial and
+ * error.
+ *
+ * A little bit about what is involed in type conversion:
+ *  - Arithmetic type rules
+ *  - Implicit conversion
+ *  - Explicit conversion
+ *
+ * 1. Arithmetic type rules:
+ *      The C standard defines a set of rules about arithmetic type
+ *      conversion, known as the conversion rank rules, which
+ *      essentially dictate which sides of an expression need to be
+ *      converted.
+ *
+ *      First rule:
+ *      If the left hand side of an expression isn't an arithmetic type
+ *      or the right hand side of an expression isn't an arithmetic type
+ *      no conversion takes place.
+ *
+ *      Second rule:
+ *      If the conversion rank of the left hand side expression type
+ *      is less than the conversion rank of the right hand side
+ *      expression type, then the left hand side of that expressions type
+ *      gets converted to the right hands type.
+ *
+ *      Third rule:
+ *      If the conversion rank of the left hand expression type doesn't
+ *      compare equal to the right hands type, then the right hand side of
+ *      that expressions type gets converted to the left hands type.
+ *
+ *      Last rule:
+ *      If none of the above applies, then nothing is subjected to conversion,
+ *      and doesn't need to be converted, unless the following:
+ *
+ *          The binary expression in which each operand is associated with happens
+ *          to be of a relational one in which case the type is converted to
+ *          integer type.
+ *
+ *          The expression happens to be of array type, in which case the array
+ *          decays to a pointer of it's base type.
+ *
+ *  2. Implicit conversion
+ *      Implicit type conversion takes place in two senarios, 1, when
+ *      conversion ranking is involved (promoted types), or when the
+ *      subject of a shift operation where the larger types is always
+ *      assumed to satisfy the shift operation.
+ *
+ *  3. Explicit conversion
+ *      The type which is assumed in explicit conversion (casting) is
+ *      the type in which the operand is converted to, unless the conversion
+ *      isn't legal (vector -> scalar for instance)
+ */
+#include "ast.h"
+#include "lice.h"
+
+bool conv_capable(data_type_t *type) {
+    return ast_type_isinteger(type) || ast_type_isfloating(type);
+}
+
+int conv_rank(data_type_t *type) {
+    if (!conv_capable(type))
+        goto error;
+
+    switch (type->type) {
+        case TYPE_BOOL:    return 0;
+        case TYPE_CHAR:    return 1;
+        case TYPE_SHORT:   return 2;
+        case TYPE_INT:     return 3;
+        case TYPE_LONG:    return 4;
+        case TYPE_LLONG:   return 5;
+        case TYPE_FLOAT:   return 6;
+        case TYPE_DOUBLE:  return 7;
+        case TYPE_LDOUBLE: return 8;
+        default:
+            goto error;
+    }
+
+error:
+    compile_ice("conv_rank");
+}
+
+data_type_t *conv_senority(data_type_t *lhs, data_type_t *rhs) {
+    return conv_rank(lhs) < conv_rank(rhs) ? rhs : lhs;
+}
+
+ast_t *conv_usual(int operation, ast_t *left, ast_t *right) {
+    if (!conv_capable(left->ctype) || !conv_capable(right->ctype)) {
+        data_type_t *result;
+        switch (operation) {
+            case AST_TYPE_LEQUAL:
+            case AST_TYPE_GEQUAL:
+            case AST_TYPE_EQUAL:
+            case AST_TYPE_NEQUAL:
+            case '<':
+            case '>':
+                result = ast_data_table[AST_DATA_INT];
+                break;
+            default:
+                result = ast_array_convert(left->ctype);
+                break;
+        }
+
+        return ast_new_binary(result, operation, left, right);
+    }
+
+    int lrank = conv_rank(left->ctype);
+    int rrank = conv_rank(right->ctype);
+
+    if (lrank < rrank)
+        left  = ast_type_convert(right->ctype, left);
+    else if (lrank != rrank)
+        right = ast_type_convert(left->ctype, right);
+
+    data_type_t *result = ast_result_type(operation, left->ctype);
+    return ast_new_binary(result, operation, left, right);
+}
diff --git a/decl.c b/decl.c
new file mode 100644 (file)
index 0000000..450e617
--- /dev/null
+++ b/decl.c
@@ -0,0 +1,339 @@
+/*
+ * Deals with all the complexity in C's declaration specification with
+ * a rather large state machine model. C has a lot of ways to specify
+ * something, that happens to be equivlant to other meanings, which are
+ * also used. This state machine monitors the occurance of certain
+ * identifiers to build a serise of on/off state which ultimatly
+ * allows us to disambiguate the meaning, while at the same time enforcing
+ * correctness.
+ *
+ * For instance it isn't legal in C to have a typedef of a 'signed' size
+ * specified type, than use that typedef with another size specifier.
+ * More of these rules apply as well, and are documented in the state
+ * machine set logic.
+ *
+ * Once the state machine has completed it's work the get function uses
+ * the state of the machine to determine what type to return from the
+ * ast data table for types, or if there needs to be a new type created
+ * to compensate for the declaration. Similarly at this stage the state
+ * can be invalid (if something wen terribly wrong) and we can handle,
+ * or ice.
+ *
+ * The main entry point is decl_spec and it's called from the parser,
+ * if everything passes the callsite gets a data_type_t of the type
+ * specified.
+ */
+#include <string.h>
+
+#include "parse.h"
+#include "lice.h"
+#include "lexer.h"
+
+typedef enum {
+    SPEC_TYPE_NULL,
+    SPEC_TYPE_VOID,
+    SPEC_TYPE_BOOL,
+    SPEC_TYPE_CHAR,
+    SPEC_TYPE_INT,
+    SPEC_TYPE_FLOAT,
+    SPEC_TYPE_DOUBLE,
+} spec_type_t;
+
+typedef enum {
+    SPEC_SIZE_NULL,
+    SPEC_SIZE_SHORT,
+    SPEC_SIZE_LONG,
+    SPEC_SIZE_LLONG
+} spec_size_t;
+
+typedef enum {
+    SPEC_SIGN_NULL,
+    SPEC_SIGN_SIGNED,
+    SPEC_SIGN_UNSIGNED
+} spec_sign_t;
+
+static const char *spec_type_string[] = {
+    "null", "void",  "_Bool", "char",
+    "int",  "float", "double"
+};
+
+static const char *spec_size_string[] = {
+    "null", "short", "long", "long long"
+};
+
+static const char *spec_sign_string[] = {
+    "null", "signed", "unsigned"
+};
+
+static const char *spec_var_string[] = {
+    "null", "type", "size", "sign", "user"
+};
+
+typedef struct {
+    storage_t    class;
+    spec_type_t  type;
+    spec_size_t  size;
+    spec_sign_t  sign;
+    data_type_t *user;
+    bool         kconst;
+    bool         kvolatile;
+    bool         kinline;
+} decl_spec_t;
+
+typedef enum {
+    SPEC_VAR_NULL,
+    SPEC_VAR_TYPE,
+    SPEC_VAR_SIZE,
+    SPEC_VAR_SIGN,
+    SPEC_VAR_USER
+} decl_var_t;
+
+#define decl_spec_error(X, SELECT) \
+    decl_spec_error_impl((X), (SELECT), __LINE__)
+
+static const char *debug_storage_string(const storage_t class) {
+    switch (class) {
+        case STORAGE_AUTO:      return "auto";
+        case STORAGE_EXTERN:    return "extern";
+        case STORAGE_REGISTER:  return "register";
+        case STORAGE_STATIC:    return "static";
+        case STORAGE_TYPEDEF:   return "typedef";
+    }
+    return "default";
+}
+
+static void decl_spec_error_impl(const decl_spec_t *spec, const decl_var_t select, const size_t line) {
+    const char *type = spec_type_string[spec->type];
+    const char *size = spec_size_string[spec->size];
+    const char *sign = spec_sign_string[spec->sign];
+    const char *var  = spec_var_string[select];
+
+    if (!type) type = "unspecified";
+    if (!size) size = "unspecified";
+    if (!sign) sign = "unspecified";
+    if (!var)  var  = "unspecified";
+
+    compile_ice("declaration specifier error %d\n"
+                "debug info:\n"
+                "   select:   %s\n"
+                "   class:    %s\n"
+                "   type:     %s\n"
+                "   size:     %s\n"
+                "   sign:     %s\n"
+                "   const:    %s\n"
+                "   volatile: %s\n"
+                "   inline:   %s\n",
+                line,
+                var,
+                debug_storage_string(spec->class),
+                type,
+                size,
+                sign,
+                bool_string(spec->kconst),
+                bool_string(spec->kvolatile),
+                bool_string(spec->kinline)
+    );
+}
+
+static void decl_spec_class(decl_spec_t *spec, const storage_t class) {
+    if (spec->class != 0)
+        decl_spec_error(spec, SPEC_VAR_NULL);
+    spec->class = class;
+}
+
+static void decl_spec_set(decl_spec_t *spec, const decl_var_t select, void *value) {
+    switch (select) {
+        case SPEC_VAR_SIGN:
+            if (spec->sign != SPEC_SIGN_NULL)
+                decl_spec_error(spec, select);
+            spec->sign = *(spec_sign_t*)value;
+            break;
+        case SPEC_VAR_SIZE:
+            if (spec->size != SPEC_SIZE_NULL)
+                decl_spec_error(spec, select);
+            spec->size = *(spec_size_t*)value;
+            break;
+        case SPEC_VAR_TYPE:
+            if (spec->type != SPEC_TYPE_NULL)
+                decl_spec_error(spec, select);
+            spec->type = *(spec_type_t*)value;
+            break;
+        case SPEC_VAR_USER:
+            if (spec->user != 0)
+                decl_spec_error(spec, select);
+            spec->user = value;
+            break;
+        default:
+            compile_ice("decl_spec_get state machine got null variable reference");
+            break;
+    }
+
+    /* bool cannot have a sign, it's only legal as it's own entity. */
+    if (spec->type == SPEC_TYPE_BOOL && (spec->size != SPEC_SIZE_NULL && spec->sign != SPEC_SIGN_NULL))
+        decl_spec_error(spec, select);
+
+    switch (spec->size) {
+        case SPEC_SIZE_SHORT:
+            /*
+             * short and short int are the only legal uses of the short
+             * size specifier.
+             */
+            if (spec->type != SPEC_TYPE_NULL && spec->type != SPEC_TYPE_INT)
+                decl_spec_error(spec, select);
+            break;
+
+        case SPEC_SIZE_LONG:
+            /*
+             * long, long int and long double are the only legal uses of
+             * long size specifier.
+             */
+            if (spec->type != SPEC_TYPE_NULL && spec->type != SPEC_TYPE_INT && spec->type != SPEC_TYPE_DOUBLE)
+                decl_spec_error(spec, select);
+            break;
+
+        default:
+            break;
+    }
+
+    /*
+     * sign and unsigned sign specifiers are not legal on void, float and
+     * double types.
+     */
+    if (spec->sign != SPEC_SIGN_NULL) {
+        switch (spec->type) {
+            case SPEC_TYPE_VOID:
+            case SPEC_TYPE_FLOAT:
+            case SPEC_TYPE_DOUBLE:
+                decl_spec_error(spec, select);
+                break;
+            default:
+                break;
+        }
+    }
+
+    /*
+     * user types cannot have additional levels of specification on it,
+     * for instance 'typedef unsigned int foo; 'signed foo'.
+     */
+    if (spec->user && (spec->type != SPEC_TYPE_NULL ||
+                       spec->size != SPEC_SIZE_NULL ||
+                       spec->sign != SPEC_SIGN_NULL))
+        decl_spec_error(spec, select);
+}
+
+#define decl_spec_seti(SPEC, SELECT, VAR) \
+    decl_spec_set((SPEC), (SELECT), &(int){ VAR })
+
+static data_type_t *decl_spec_get(const decl_spec_t *spec) {
+    bool sign = !!(spec->sign != SPEC_SIGN_UNSIGNED);
+
+    switch (spec->type) {
+        case SPEC_TYPE_VOID:
+            return ast_data_table[AST_DATA_VOID];
+        case SPEC_TYPE_BOOL:
+            return ast_type_create(TYPE_BOOL, false);
+        case SPEC_TYPE_CHAR:
+            return ast_type_create(TYPE_CHAR, sign);
+        case SPEC_TYPE_FLOAT:
+            return ast_type_create(TYPE_FLOAT, false);
+        case SPEC_TYPE_DOUBLE:
+            if (spec->size == SPEC_SIZE_LONG)
+                return ast_type_create(TYPE_LDOUBLE, false);
+            return ast_type_create(TYPE_DOUBLE, false);
+        default:
+            break;
+    }
+
+    switch (spec->size) {
+        case SPEC_SIZE_SHORT:
+            return ast_type_create(TYPE_SHORT, sign);
+        case SPEC_SIZE_LONG:
+            return ast_type_create(TYPE_LONG, sign);
+        case SPEC_SIZE_LLONG:
+            return ast_type_create(TYPE_LLONG, sign);
+        default:
+            /* implicit int */
+            return ast_type_create(TYPE_INT, sign);
+    }
+    compile_ice("declaration specifier");
+}
+
+data_type_t *decl_spec(storage_t *const class) {
+    decl_spec_t spec;
+    memset(&spec, 0, sizeof(spec));
+
+    for (;;) {
+        lexer_token_t *token = lexer_next();
+        if (!token)
+            compile_error("type specification with unexpected ending");
+
+        if (token->type != LEXER_TOKEN_IDENTIFIER) {
+            lexer_unget(token);
+            break;
+        }
+
+        if (!strcmp(token->string, "const"))
+            spec.kconst = true;
+        else if (!strcmp(token->string, "volatile"))
+            spec.kvolatile = true;
+        else if (!strcmp(token->string, "inline"))
+            spec.kinline = true;
+        else if (!strcmp(token->string, "typedef"))
+            decl_spec_class(&spec, STORAGE_TYPEDEF);
+        else if (!strcmp(token->string, "extern"))
+            decl_spec_class(&spec, STORAGE_EXTERN);
+        else if (!strcmp(token->string, "static") || !strcmp(token->string, "__static__"))
+            decl_spec_class(&spec, STORAGE_STATIC);
+        else if (!strcmp(token->string, "auto"))
+            decl_spec_class(&spec, STORAGE_AUTO);
+        else if (!strcmp(token->string, "register"))
+            decl_spec_class(&spec, STORAGE_REGISTER);
+        else if (!strcmp(token->string, "void"))
+            decl_spec_seti(&spec, SPEC_VAR_TYPE, SPEC_TYPE_VOID);
+        else if (!strcmp(token->string, "_Bool"))
+            decl_spec_seti(&spec, SPEC_VAR_TYPE, SPEC_TYPE_BOOL);
+        else if (!strcmp(token->string, "char"))
+            decl_spec_seti(&spec, SPEC_VAR_TYPE, SPEC_TYPE_CHAR);
+        else if (!strcmp(token->string, "int"))
+            decl_spec_seti(&spec, SPEC_VAR_TYPE, SPEC_TYPE_INT);
+        else if (!strcmp(token->string, "float"))
+            decl_spec_seti(&spec, SPEC_VAR_TYPE, SPEC_TYPE_FLOAT);
+        else if (!strcmp(token->string, "double"))
+            decl_spec_seti(&spec, SPEC_VAR_TYPE, SPEC_TYPE_DOUBLE);
+        else if (!strcmp(token->string, "signed"))
+            decl_spec_seti(&spec, SPEC_VAR_SIGN, SPEC_SIGN_SIGNED);
+        else if (!strcmp(token->string, "unsigned"))
+            decl_spec_seti(&spec, SPEC_VAR_SIGN, SPEC_SIGN_UNSIGNED);
+        else if (!strcmp(token->string, "struct"))
+            decl_spec_set(&spec, SPEC_VAR_USER, parse_structure());
+        else if (!strcmp(token->string, "union"))
+            decl_spec_set(&spec, SPEC_VAR_USER, parse_union());
+        else if (!strcmp(token->string, "enum"))
+            decl_spec_set(&spec, SPEC_VAR_USER, parse_enumeration());
+        else if (!strcmp(token->string, "short"))
+            decl_spec_seti(&spec, SPEC_VAR_SIZE, SPEC_SIZE_SHORT);
+        else if (!strcmp(token->string, "long")) {
+            if (spec.size == 0)
+                decl_spec_seti(&spec, SPEC_VAR_SIZE, SPEC_SIZE_LONG);
+            else if (spec.size == SPEC_SIZE_LONG)
+                spec.size = SPEC_SIZE_LLONG;
+            else
+                decl_spec_error(&spec, SPEC_VAR_NULL);
+        }
+        else if (!strcmp(token->string, "typeof") || !strcmp(token->string, "__typeof__"))
+            decl_spec_set(&spec, SPEC_VAR_USER, parse_typeof());
+        else if (parse_typedef_find(token->string) && !spec.user)
+            decl_spec_set(&spec, SPEC_VAR_USER, parse_typedef_find(token->string));
+        else {
+            lexer_unget(token);
+            break;
+        }
+    }
+
+    if (class)
+        *class = spec.class;
+    if (spec.user)
+        return spec.user;
+
+    return decl_spec_get(&spec);
+}
diff --git a/gen.c b/gen.c
new file mode 100644 (file)
index 0000000..0466de3
--- /dev/null
+++ b/gen.c
@@ -0,0 +1,298 @@
+/*
+ * File: gen.c
+ *  Common code generator facilities.
+ */
+#include <stdarg.h>
+#include <stdio.h>
+
+#include "gen.h"
+#include "lice.h"
+
+char *gen_label_break           = NULL;
+char *gen_label_continue        = NULL;
+char *gen_label_switch          = NULL;
+char *gen_label_break_backup    = NULL;
+char *gen_label_continue_backup = NULL;
+char *gen_label_switch_backup   = NULL;
+
+static void gen_emit_emitter(bool indent, const char *fmt, va_list list) {
+    if (indent)
+        fputc('\t', stdout);
+
+    va_list va;
+    va_copy(va, list);
+    vprintf(fmt, va);
+    va_end(va);
+
+    fputc('\n', stdout);
+}
+
+void gen_emit(const char *fmt, ...) {
+    va_list va;
+    va_start(va, fmt);
+    gen_emit_emitter(true, fmt, va);
+    va_end(va);
+}
+
+void gen_emit_inline(const char *fmt, ...) {
+    va_list va;
+    va_start(va, fmt);
+    gen_emit_emitter(false, fmt, va);
+    va_end(va);
+}
+
+void gen_jump_backup(void) {
+    gen_label_break_backup    = gen_label_break;
+    gen_label_continue_backup = gen_label_continue;
+}
+
+void gen_jump_save(char *lbreak, char *lcontinue) {
+    gen_jump_backup();
+
+    gen_label_break           = lbreak;
+    gen_label_continue        = lcontinue;
+}
+
+void gen_jump_restore(void) {
+    gen_label_break    = gen_label_break_backup;
+    gen_label_continue = gen_label_continue_backup;
+}
+
+void gen_jump(const char *label) {
+    if (!label)
+        compile_ice("gen_jump");
+
+    gen_emit("jmp %s", label);
+}
+
+void gen_label(const char *label) {
+    gen_emit("%s:", label);
+}
+
+/*
+ * Some expressions are architecture-independent thanks to generic generation
+ * functions.
+ */
+static void gen_statement_switch(ast_t *ast) {
+    gen_label_switch_backup = gen_label_switch;
+    gen_label_break_backup  = gen_label_break;
+    gen_expression(ast->switchstmt.expr);
+    gen_label_switch = ast_label();
+    gen_label_break  = ast_label();
+    gen_jump(gen_label_switch);
+    if (ast->switchstmt.body)
+        gen_expression(ast->switchstmt.body);
+    gen_label(gen_label_switch);
+    gen_label(gen_label_break);
+    gen_label_switch = gen_label_switch_backup;
+    gen_label_break  = gen_label_break_backup;
+}
+
+static void gen_statement_do(ast_t *ast) {
+    char *begin = ast_label();
+    char *end   = ast_label();
+    gen_jump_save(end, begin);
+    gen_label(begin);
+    gen_expression(ast->forstmt.body);
+    gen_expression(ast->forstmt.cond);
+    gen_je(end);
+    gen_jump(begin);
+    gen_label(end);
+    gen_jump_restore();
+}
+
+static void gen_statement_compound(ast_t *ast) {
+    for (list_iterator_t *it = list_iterator(ast->compound); !list_iterator_end(it); )
+        gen_expression(list_iterator_next(it));
+}
+
+static void gen_statement_goto(ast_t *ast) {
+    gen_jump(ast->gotostmt.where);
+}
+
+static void gen_statement_label(ast_t *ast) {
+    if (ast->gotostmt.where)
+        gen_label(ast->gotostmt.where);
+}
+
+static void gen_statement_cond(ast_t *ast) {
+    gen_expression(ast->ifstmt.cond);
+    char *ne = ast_label();
+    gen_je(ne);
+    if (ast->ifstmt.then)
+        gen_expression(ast->ifstmt.then);
+    if (ast->ifstmt.last) {
+        char *end = ast_label();
+        gen_jump(end);
+        gen_label(ne);
+        gen_expression(ast->ifstmt.last);
+        gen_label(end);
+    } else {
+        gen_label(ne);
+    }
+}
+
+static void gen_statement_for(ast_t *ast) {
+    if (ast->forstmt.init)
+        gen_expression(ast->forstmt.init);
+    char *begin = ast_label();
+    char *step  = ast_label();
+    char *end   = ast_label();
+    gen_jump_save(end, step);
+    gen_label(begin);
+    if (ast->forstmt.cond) {
+        gen_expression(ast->forstmt.cond);
+        gen_je(end);
+    }
+    gen_expression(ast->forstmt.body);
+    gen_label(step);
+    if (ast->forstmt.step)
+        gen_expression(ast->forstmt.step);
+    gen_jump(begin);
+    gen_label(end);
+    gen_jump_restore();
+}
+
+static void gen_statement_while(ast_t *ast) {
+    char *begin = ast_label();
+    char *end   = ast_label();
+    gen_jump_save(end, begin);
+    gen_label(begin);
+    gen_expression(ast->forstmt.cond);
+    gen_je(end);
+    gen_expression(ast->forstmt.body);
+    gen_jump(begin);
+    gen_label(end);
+    gen_jump_restore();
+}
+
+static void gen_statement_return(ast_t *ast) {
+    if (ast->returnstmt) {
+        gen_expression(ast->returnstmt);
+        gen_boolean_maybe(ast->returnstmt->ctype);
+    }
+    gen_return();
+}
+
+static void gen_statement_break(void) {
+    gen_jump(gen_label_break);
+}
+
+static void gen_statement_continue(void) {
+    gen_jump(gen_label_continue);
+}
+
+static void gen_statement_default(void) {
+    gen_label(gen_label_switch);
+    gen_label_switch = ast_label();
+}
+
+static void gen_comma(ast_t *ast) {
+    gen_expression(ast->left);
+    gen_expression(ast->right);
+}
+
+static void gen_data_bss(ast_t *ast) {
+    gen_emit(".data");
+    if (!ast->decl.var->ctype->isstatic)
+        gen_emit(".global %s", ast->decl.var->variable.name);
+    gen_emit(".lcomm %s, %d", ast->decl.var->variable.name, ast->decl.var->ctype->size);
+}
+
+static void gen_data_global(ast_t *variable) {
+    if (variable->decl.init)
+        gen_data(variable, 0, 0);
+    else
+        gen_data_bss(variable);
+}
+
+static void gen_declaration_initialization(list_t *init, int offset) {
+    for (list_iterator_t *it = list_iterator(init); !list_iterator_end(it); ) {
+        ast_t *node = list_iterator_next(it);
+        if (node->init.value->type == AST_TYPE_LITERAL && node->init.type->bitfield.size <= 0)
+            gen_literal_save(node->init.value, node->init.type, node->init.offset + offset);
+        else {
+            gen_expression(node->init.value);
+            gen_save_local(node->init.type, node->init.offset + offset);
+        }
+    }
+}
+
+static void gen_declaration(ast_t *ast) {
+    if (!ast->decl.init)
+        return;
+
+    gen_zero(ast->decl.var->variable.off, ast->decl.var->variable.off + ast->decl.var->ctype->size);
+    gen_declaration_initialization(ast->decl.init, ast->decl.var->variable.off);
+}
+
+void gen_ensure_lva(ast_t *ast) {
+    if (ast->variable.init) {
+        gen_zero(ast->variable.off, ast->variable.off + ast->ctype->size);
+        gen_declaration_initialization(ast->variable.init, ast->variable.off);
+    }
+    ast->variable.init = NULL;
+}
+
+void gen_expression(ast_t *ast) {
+    if (!ast) return;
+
+    switch (ast->type) {
+        case AST_TYPE_STATEMENT_IF:             gen_statement_cond(ast);         break;
+        case AST_TYPE_EXPRESSION_TERNARY:       gen_statement_cond(ast);         break;
+        case AST_TYPE_STATEMENT_FOR:            gen_statement_for(ast);          break;
+        case AST_TYPE_STATEMENT_WHILE:          gen_statement_while(ast);        break;
+        case AST_TYPE_STATEMENT_DO:             gen_statement_do(ast);           break;
+        case AST_TYPE_STATEMENT_COMPOUND:       gen_statement_compound(ast);     break;
+        case AST_TYPE_STATEMENT_SWITCH:         gen_statement_switch(ast);       break;
+        case AST_TYPE_STATEMENT_GOTO:           gen_statement_goto(ast);         break;
+        case AST_TYPE_STATEMENT_LABEL:          gen_statement_label(ast);        break;
+        case AST_TYPE_STATEMENT_RETURN:         gen_statement_return(ast);       break;
+        case AST_TYPE_STATEMENT_BREAK:          gen_statement_break();           break;
+        case AST_TYPE_STATEMENT_CONTINUE:       gen_statement_continue();        break;
+        case AST_TYPE_STATEMENT_DEFAULT:        gen_statement_default();         break;
+        case AST_TYPE_CALL:                     gen_function_call(ast);          break;
+        case AST_TYPE_POINTERCALL:              gen_function_call(ast);          break;
+        case AST_TYPE_LITERAL:                  gen_literal(ast);                break;
+        case AST_TYPE_STRING:                   gen_literal_string(ast);         break;
+        case AST_TYPE_VAR_LOCAL:                gen_variable_local(ast);         break;
+        case AST_TYPE_VAR_GLOBAL:               gen_variable_global(ast);        break;
+        case AST_TYPE_DECLARATION:              gen_declaration(ast);            break;
+        case AST_TYPE_DEREFERENCE:              gen_dereference(ast);            break;
+        case AST_TYPE_ADDRESS:                  gen_address(ast->unary.operand); break;
+        case AST_TYPE_STATEMENT_CASE:           gen_case(ast);                   break;
+        case AST_TYPE_VA_START:                 gen_va_start(ast);               break;
+        case AST_TYPE_VA_ARG:                   gen_va_arg(ast);                 break;
+        case '!':                               gen_not(ast);                    break;
+        case AST_TYPE_NEGATE:                   gen_negate(ast);                 break;
+        case AST_TYPE_AND:                      gen_and(ast);                    break;
+        case AST_TYPE_OR:                       gen_or(ast);                     break;
+        case AST_TYPE_POST_INCREMENT:           gen_postfix(ast, "add");         break;
+        case AST_TYPE_POST_DECREMENT:           gen_postfix(ast, "sub");         break;
+        case AST_TYPE_PRE_INCREMENT:            gen_prefix (ast, "add");         break;
+        case AST_TYPE_PRE_DECREMENT:            gen_prefix (ast, "sub");         break;
+        case AST_TYPE_EXPRESSION_CAST:          gen_cast(ast);                   break;
+        case AST_TYPE_STRUCT:                   gen_struct(ast);                 break;
+        case '&':                               gen_bitandor(ast);               break;
+        case '|':                               gen_bitandor(ast);               break;
+        case '~':                               gen_bitnot(ast);                 break;
+        case ',':                               gen_comma(ast);                  break;
+        case '=':                               gen_assign(ast);                 break;
+        case AST_TYPE_CONVERT:                  gen_conversion(ast);             break;
+        case AST_TYPE_STATEMENT_GOTO_COMPUTED:  gen_goto_computed(ast);          break;
+        case AST_TYPE_STATEMENT_LABEL_COMPUTED: gen_address_label(ast);          break;
+        default:
+            gen_binary(ast);
+    }
+}
+
+void gen_toplevel(ast_t *ast) {
+    gen_function(ast);
+    if (ast->type == AST_TYPE_FUNCTION) {
+        gen_function_prologue(ast);
+        gen_expression(ast->function.body);
+        gen_function_epilogue();
+    } else if (ast->type == AST_TYPE_DECLARATION) {
+        gen_data_global(ast);
+    }
+}
diff --git a/gen_amd64.c b/gen_amd64.c
new file mode 100644 (file)
index 0000000..1a7080a
--- /dev/null
@@ -0,0 +1,1201 @@
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+#define __STDC_FORMAT_MACROS
+#include <inttypes.h>
+
+#include "lice.h"
+#include "gen.h"
+
+#define REGISTER_AREA_SIZE     304
+#define REGISTER_MULT_SIZE_XMM 8
+#define REGISTER_MULT_SIZE     6
+
+#define SRDI "rdi"
+#define SRSI "rsi"
+#define SRDX "rdx"
+#define SRCX "rcx"
+#define SR8  "r8"
+#define SR9  "r9"
+#define SEDI "edi"
+#define SESI "esi"
+#define SEDX "edx"
+#define SECX "ecx"
+#define SR8D "r8d"
+#define SR9D "r9d"
+#define SDIL "dil"
+#define SSIL "sil"
+#define SDL  "dl"
+#define SCL  "cl"
+#define SR8B "r8b"
+#define SR9B "r9b"
+#define SRAX "rax"
+#define SRBX "rbx"
+#define SR11 "r11"
+
+static const char *register_table[][REGISTER_MULT_SIZE] = {
+    { SRDI,    SRSI,    SRDX,    SRCX,    SR8,     SR9  },
+    { SEDI,    SESI,    SEDX,    SECX,    SR8D,    SR9D },
+    { SDIL,    SSIL,    SDL,     SCL,     SR8B,    SR9B }
+};
+
+#define NREG(I) register_table[0][I]
+#define SREG(I) register_table[1][I]
+#define MREG(I) register_table[2][I]
+
+static int stack = 0;
+static int gp    = 0;
+static int fp    = 0;
+
+static void gen_push(const char *reg) {
+    gen_emit("push %%%s", reg);
+    stack += 8;
+}
+static void gen_pop(const char *reg) {
+    gen_emit("pop %%%s", reg);
+    stack -= 8;
+}
+static void gen_push_xmm(int r) {
+    gen_emit("sub $8, %%rsp");
+    gen_emit("movsd %%xmm%d, (%%rsp)", r);
+    stack += 8;
+}
+static void gen_pop_xmm(int r) {
+    gen_emit("movsd (%%rsp), %%xmm%d", r);
+    gen_emit("add $8, %%rsp");
+    stack -= 8;
+}
+
+/*
+ * Technically not the safest, but also can't legally be optimized with
+ * strict aliasing optimizations. Volatile will mark the construction
+ * of the literal from being directly delt with in the optimizer. Plus
+ * aliasing though the use of a union, while it isn't technically legal,
+ * all compilers do deal with it to some extent. Restrict on want will
+ * prevent the compiler from emitting two loads for the same address, since
+ * it is likely already in an register.
+ */
+#define TYPEPUN(TYPE, VALUE) \
+    *(((volatile union { __typeof__(VALUE) *have; TYPE *restrict want; }) { &(VALUE) }).want)
+
+static void *gen_mapping_table(const void **table, size_t index, size_t length, const char *func) {
+    const unsigned char **ptr = (const unsigned char **)table;
+    const unsigned char **end = &ptr[length];
+    const unsigned char **ret = &ptr[index];
+
+    if (ret < ptr || ret >= end || !*ret)
+        compile_ice("gen_mapping_table from %s (index: %zu, length: %zu)", func, index, length);
+
+    return *((void **)ret);
+}
+
+#define gen_mapping(TABLE, INDEX, LENGTH) \
+    gen_mapping_table((const void **)(TABLE), (INDEX), (LENGTH), __func__)
+
+static const char *gen_register_integer(data_type_t *type, char r) {
+    static const char *items[] = {
+        "cl", "cx", 0, "ecx", 0, 0, 0, "rcx",
+        "al", "ax", 0, "eax", 0, 0, 0, "rax"
+    };
+    static const size_t length = sizeof(items)/sizeof(*items);
+    return gen_mapping(items, (type->size - 1) + !!(r == 'a') * 8, length);
+}
+
+static const char *gen_load_instruction(data_type_t *type) {
+    static const char *items[] = {
+        "movsbq", "movswq", 0,
+        "movslq", 0,        0, 0,
+        "mov"
+    };
+    return gen_mapping(items, type->size - 1, sizeof(items)/sizeof(*items));
+}
+
+static void gen_shift_load(data_type_t *type) {
+    if (type->bitfield.size <= 0)
+        return;
+    gen_emit("shr $%d, %%rax", type->bitfield.offset);
+    gen_push(SRCX);
+    gen_emit("mov $0x%" PRIx64 ", %%rcx", (1 << (uint64_t)type->bitfield.size) - 1);
+    gen_emit("and %%rcx, %%rax");
+    gen_pop(SRCX);
+}
+
+static void gen_shift_save(data_type_t *type, char *address) {
+    if (type->bitfield.size <= 0)
+        return;
+    gen_push(SRCX);
+    gen_push(SRDI);
+
+    gen_emit("mov $0x%" PRIx64 ", %%rdi", (1 << (uint64_t)type->bitfield.size) - 1);
+    gen_emit("and %%rdi, %%rax");
+    gen_emit("shl $%d, %%rax", type->bitfield.offset);
+    gen_emit("mov %s, %%%s", address, gen_register_integer(type, 'c'));
+    gen_emit("mov $0x%" PRIx64 ", %%rdi", ~(((1 << (uint64_t)type->bitfield.size) - 1) << type->bitfield.offset));
+    gen_emit("and %%rdi, %%rcx");
+    gen_emit("or %%rcx, %%rax");
+
+    gen_pop(SRDI);
+    gen_pop(SRCX);
+}
+
+static void gen_load_global(data_type_t *type, char *label, int offset) {
+    if (type->type == TYPE_ARRAY) {
+        if (offset)
+            gen_emit("lea %s+%d(%%rip), %%rax", label, offset);
+        else
+            gen_emit("lea %s(%%rip), %%rax", label);
+        return;
+    }
+    gen_emit("%s %s+%d(%%rip), %%rax", gen_load_instruction(type), label, offset);
+    gen_shift_load(type);
+}
+
+static void gen_cast_int(data_type_t *type) {
+    if (type->type == TYPE_FLOAT)
+        gen_emit("cvttss2si %%xmm0, %%eax");
+    else if (type->type == TYPE_DOUBLE)
+        gen_emit("cvttsd2si %%xmm0, %%eax");
+}
+
+static void gen_cast_bool(data_type_t *type) {
+    if (ast_type_isfloating(type)) {
+        gen_push_xmm(1);
+        gen_emit("xorpd %%xmm1, %%xmm1");
+        gen_emit("ucomisd %%xmm1, %%xmm0");
+        gen_emit("setne %%al");
+        gen_pop_xmm(1);
+    } else {
+        gen_emit("cmp $0, %%rax");
+        gen_emit("setne %%al");
+    }
+    gen_emit("movzb %%al, %%eax");
+}
+
+static void gen_load_local(data_type_t *var, const char *base, int offset) {
+    if (var->type == TYPE_ARRAY) {
+        gen_emit("lea %d(%%%s), %%rax", offset, base);
+    } else if (var->type == TYPE_FLOAT) {
+        gen_emit("movss %d(%%%s), %%xmm0", offset, base);
+    } else if (var->type == TYPE_DOUBLE || var->type == TYPE_LDOUBLE) {
+        gen_emit("movsd %d(%%%s), %%xmm0", offset, base);
+    } else {
+        gen_emit("%s %d(%%%s), %%rax", gen_load_instruction(var), offset, base);
+        gen_shift_load(var);
+    }
+}
+
+void gen_boolean_maybe(data_type_t *type) {
+    if (type->type != TYPE_BOOL)
+        return;
+
+    gen_emit("test %%rax, %%rax");
+    gen_emit("setne %%al");
+}
+
+static void gen_save_global(char *name, data_type_t *type, int offset) {
+    gen_boolean_maybe(type);
+
+    const char *reg = gen_register_integer(type, 'a');
+    string_t   *str = string_create();
+
+    if (offset != 0)
+        string_catf(str, "%s+%d(%%rip)", name, offset);
+    else
+        string_catf(str, "%s(%%rip)", name);
+
+    gen_shift_save(type, string_buffer(str));
+    gen_emit("mov %%%s, %s", reg, string_buffer(str));
+}
+
+void gen_save_local(data_type_t *type, int offset) {
+    if (type->type == TYPE_FLOAT)
+        gen_emit("movss %%xmm0, %d(%%rbp)", offset);
+    else if (type->type == TYPE_DOUBLE || type->type == TYPE_LDOUBLE)
+        gen_emit("movsd %%xmm0, %d(%%rbp)", offset);
+    else {
+        gen_boolean_maybe(type);
+
+        string_t   *str = string_create();
+        const char *reg = gen_register_integer(type, 'a');
+
+        if (offset != 0)
+            string_catf(str, "%d(%%rbp)", offset);
+        else
+            string_catf(str, "(%%rbp)");
+
+        gen_shift_save(type, string_buffer(str));
+        gen_emit("mov %%%s, %s", reg, string_buffer(str));
+    }
+}
+
+static void gen_assignment_dereference_intermediate(data_type_t *type, int offset) {
+    gen_emit("mov (%%rsp), %%rcx");
+
+    const char *reg = gen_register_integer(type, 'c');
+    if (offset)
+        gen_emit("mov %%%s, %d(%%rax)", reg, offset);
+    else
+        gen_emit("mov %%%s, (%%rax)", reg);
+    gen_pop(SRAX);
+}
+
+void gen_address(ast_t *ast) {
+    switch (ast->type) {
+        case AST_TYPE_VAR_LOCAL:
+            gen_emit("lea %d(%%rbp), %%rax", ast->variable.off);
+            break;
+        case AST_TYPE_VAR_GLOBAL:
+            gen_emit("lea %s(%%rip), %%rax", ast->variable.label);
+            break;
+        case AST_TYPE_DEREFERENCE:
+            gen_expression(ast->unary.operand);
+            break;
+        case AST_TYPE_STRUCT:
+            gen_address(ast->structure);
+            gen_emit("add $%d, %%rax", ast->ctype->offset);
+            break;
+        default:
+            compile_ice("gen_address (%s)", ast_type_string(ast->ctype));
+    }
+}
+
+void gen_address_label(ast_t *ast) {
+    gen_emit("mov $%s, %%rax", ast->gotostmt.where);
+}
+
+void gen_goto_computed(ast_t *ast) {
+    gen_expression(ast->unary.operand);
+    gen_emit("jmp *%%rax");
+}
+
+static void gen_structure_copy(int size, const char *base) {
+    int i = 0;
+    for (; i < size; i += 8) {
+        gen_emit("movq %d(%%rcx), %%r11", i);
+        gen_emit("movq %%r11, %d(%%%s)", i, base);
+    }
+
+    for (; i < size; i += 4) {
+        gen_emit("movl %d(%%rcx), %%r11", i);
+        gen_emit("movl %%r11d, %d(%%%s)", i, base);
+    }
+
+    for (; i < size; i++) {
+        gen_emit("movb %d(%%rcx), %%r11", i);
+        gen_emit("movb %%r11b, %d(%%%s)", i, base);
+    }
+}
+
+static void gen_structure_assign(ast_t *left, ast_t *right) {
+    gen_push(SRCX);
+    gen_push(SR11);
+    gen_address(right);
+    gen_emit("mov %%rax, %%rcx");
+    gen_address(left);
+    gen_structure_copy(left->ctype->size, "rax");
+    gen_pop(SR11);
+    gen_pop(SRCX);
+}
+
+static int gen_alignment(int n, int align) {
+    int remainder = n % align;
+    return (remainder == 0)
+                ? n
+                : n - remainder + align;
+}
+
+static int gen_structure_push(int size) {
+    compile_error("cannot pass structure of size: %d bytes by copy (unimplemented)", size);
+}
+
+void gen_zero(int start, int end) {
+    for (; start <= end - 8; start += 8)
+      gen_emit("movq $0, %d(%%rbp)", start);
+    for (; start <= end - 4; start += 4)
+        gen_emit("movl $0, %d(%%rbp)", start);
+    for (; start < end; start ++)
+        gen_emit("movb $0, %d(%%rbp)", start);
+}
+
+static void gen_assignment_dereference(ast_t *var) {
+    gen_push(SRAX);
+    gen_expression(var->unary.operand);
+    gen_assignment_dereference_intermediate(var->unary.operand->ctype->pointer, 0);
+}
+
+static void gen_pointer_arithmetic(char op, ast_t *left, ast_t *right) {
+    gen_expression(left);
+    gen_push(SRCX);
+    gen_push(SRAX);
+    gen_expression(right);
+
+    int size = left->ctype->pointer->size;
+    if (size > 1)
+        gen_emit("imul $%d, %%rax", size);
+
+    gen_emit("mov %%rax, %%rcx");
+    gen_pop(SRAX);
+
+    switch (op) {
+        case '+': gen_emit("add %%rcx, %%rax"); break;
+        case '-': gen_emit("sub %%rcx, %%rax"); break;
+    }
+    gen_pop(SRCX);
+}
+
+static void gen_assignment_structure(ast_t *structure, data_type_t *field, int offset) {
+    switch (structure->type) {
+        case AST_TYPE_VAR_LOCAL:
+            gen_ensure_lva(structure);
+            gen_save_local(field, structure->variable.off + field->offset + offset);
+            break;
+        case AST_TYPE_VAR_GLOBAL:
+            gen_save_global(structure->variable.name, field, field->offset + offset);
+            break;
+        case AST_TYPE_STRUCT:
+            gen_assignment_structure(structure->structure, field, offset + structure->ctype->offset);
+            break;
+        case AST_TYPE_DEREFERENCE:
+            gen_push(SRAX);
+            gen_expression(structure->unary.operand);
+            gen_assignment_dereference_intermediate(field, field->offset + offset);
+            break;
+        default:
+            compile_ice("gen_assignment_structure");
+            break;
+    }
+}
+
+static void gen_load_structure(ast_t *structure, data_type_t *field, int offset) {
+    switch (structure->type) {
+        case AST_TYPE_VAR_LOCAL:
+            gen_ensure_lva(structure);
+            gen_load_local(field, "rbp", structure->variable.off + field->offset + offset);
+            break;
+        case AST_TYPE_VAR_GLOBAL:
+            gen_load_global(field, structure->variable.name, field->offset + offset);
+            break;
+        case AST_TYPE_STRUCT:
+            gen_load_structure(structure->structure, field, structure->ctype->offset + offset);
+            break;
+        case AST_TYPE_DEREFERENCE:
+            gen_expression(structure->unary.operand);
+            gen_load_local(field, SRAX, field->offset + offset);
+            break;
+        default:
+            compile_ice("gen_assignment_structure");
+            break;
+    }
+}
+
+static void gen_store(ast_t *var) {
+    switch (var->type) {
+        case AST_TYPE_DEREFERENCE:
+            gen_assignment_dereference(var);
+            break;
+        case AST_TYPE_STRUCT:
+            gen_assignment_structure(var->structure, var->ctype, 0);
+            break;
+        case AST_TYPE_VAR_LOCAL:
+            gen_ensure_lva(var);
+            gen_save_local(var->ctype, var->variable.off);
+            break;
+        case AST_TYPE_VAR_GLOBAL:
+            gen_save_global(var->variable.name, var->ctype, 0);
+            break;
+        default:
+            compile_ice("gen_assignment");
+    }
+}
+
+static void gen_comparision(char *operation, ast_t *ast) {
+    if (ast_type_isfloating(ast->left->ctype)) {
+        gen_expression(ast->left);
+        gen_push_xmm(0);
+        gen_expression(ast->right);
+        gen_pop_xmm(1);
+        if (ast->left->ctype->type == TYPE_FLOAT)
+            gen_emit("ucomiss %%xmm0, %%xmm1");
+        else
+            gen_emit("ucomisd %%xmm0, %%xmm1");
+    } else {
+        gen_expression(ast->left);
+        gen_push(SRAX);
+        gen_expression(ast->right);
+        gen_pop(SRCX);
+
+        int type = ast->left->ctype->type;
+        if (type == TYPE_LONG || type == TYPE_LLONG)
+            gen_emit("cmp %%rax, %%rcx");
+        else
+            gen_emit("cmp %%eax, %%ecx");
+    }
+    gen_emit("%s %%al", operation);
+    gen_emit("movzb %%al, %%eax");
+}
+
+static const char *gen_binary_instruction(ast_t *ast) {
+    string_t *string = string_create();
+    if (ast_type_isfloating(ast->ctype)) {
+        switch (ast->type) {
+            case '+': string_catf(string, "adds"); break;
+            case '-': string_catf(string, "subs"); break;
+            case '*': string_catf(string, "muls"); break;
+            case '/': string_catf(string, "divs"); break;
+        }
+        if (ast->ctype->type == TYPE_DOUBLE || ast->ctype->type == TYPE_LDOUBLE)
+            string_cat(string, 'd');
+        else
+            string_cat(string, 's');
+        if (!string_length(string))
+            goto error;
+
+        return string_buffer(string);
+    }
+    /* integer */
+    switch (ast->type) {
+        case '+':              string_catf(string, "add");  break;
+        case '-':              string_catf(string, "sub");  break;
+        case '*':              string_catf(string, "imul"); break;
+        case '^':              string_catf(string, "xor");  break;
+        case AST_TYPE_LSHIFT:  string_catf(string, "sal");  break;
+        case AST_TYPE_RSHIFT:  string_catf(string, "sar");  break;
+        case AST_TYPE_LRSHIFT: string_catf(string, "shr");  break;
+
+        /* need to be handled specially */
+        case '/': return "@/";
+        case '%': return "@%";
+    }
+    return string_buffer(string);
+error:
+    compile_ice("gen_binary_instruction");
+}
+
+static void gen_binary_arithmetic_integer(ast_t *ast) {
+    const char *op = gen_binary_instruction(ast);
+    gen_expression(ast->left);
+    gen_push(SRAX);
+    gen_expression(ast->right);
+    gen_emit("mov %%rax, %%rcx");
+    gen_pop(SRAX);
+
+    if (*op == '@') {
+        gen_emit("cqto");
+        gen_emit("idiv %%rcx");
+        if (op[1] == '%')
+            gen_emit("mov %%edx, %%eax");
+    } else if (ast->type == AST_TYPE_LSHIFT
+        ||     ast->type == AST_TYPE_RSHIFT
+        ||     ast->type == AST_TYPE_LRSHIFT
+    ) {
+        gen_emit("%s %%cl, %%%s", op, gen_register_integer(ast->left->ctype, 'a'));
+    } else {
+        gen_emit("%s %%rcx, %%rax", op);
+    }
+}
+
+static void gen_binary_arithmetic_floating(ast_t *ast) {
+    const char *op = gen_binary_instruction(ast);
+    gen_expression(ast->left);
+    gen_push_xmm(0);
+    gen_expression(ast->right);
+    if (ast->ctype->type == TYPE_DOUBLE)
+        gen_emit("movsd %%xmm0, %%xmm1");
+    else
+        gen_emit("movss %%xmm0, %%xmm1");
+    gen_pop_xmm(0);
+    gen_emit("%s %%xmm1, %%xmm0", op);
+}
+
+void gen_load_convert(data_type_t *to, data_type_t *from) {
+    if (ast_type_isinteger(from) && to->type == TYPE_FLOAT)
+        gen_emit("cvtsi2ss %%eax, %%xmm0");
+    else if (ast_type_isinteger(from) && to->type == TYPE_DOUBLE)
+        gen_emit("cvtsi2sd %%eax, %%xmm0");
+    else if (from->type == TYPE_FLOAT && to->type == TYPE_DOUBLE)
+        gen_emit("cvtps2pd %%xmm0, %%xmm0");
+    else if (from->type == TYPE_DOUBLE && to->type == TYPE_FLOAT)
+        gen_emit("cvtpd2ps %%xmm0, %%xmm0");
+    else if (to->type == TYPE_BOOL)
+        gen_cast_bool(from);
+    else if (ast_type_isinteger(to))
+        gen_cast_int(from);
+}
+
+void gen_conversion(ast_t *ast) {
+    gen_expression(ast->unary.operand);
+    gen_load_convert(ast->ctype, ast->unary.operand->ctype);
+}
+
+void gen_binary(ast_t *ast) {
+    if (ast->ctype->type == TYPE_POINTER) {
+        gen_pointer_arithmetic(ast->type, ast->left, ast->right);
+        return;
+    }
+
+    switch (ast->type) {
+        case '<':             gen_comparision("setl",  ast); return;
+        case '>':             gen_comparision("setg",  ast); return;
+        case AST_TYPE_EQUAL:  gen_comparision("sete",  ast); return;
+        case AST_TYPE_GEQUAL: gen_comparision("setge", ast); return;
+        case AST_TYPE_LEQUAL: gen_comparision("setle", ast); return;
+        case AST_TYPE_NEQUAL: gen_comparision("setne", ast); return;
+    }
+
+    if (ast_type_isinteger(ast->ctype))
+        gen_binary_arithmetic_integer(ast);
+    else if (ast_type_isfloating(ast->ctype))
+        gen_binary_arithmetic_floating(ast);
+    else
+        compile_ice("gen_binary");
+}
+
+void gen_literal_save(ast_t *ast, data_type_t *type, int offset) {
+    uint64_t load64  = ((uint64_t)ast->integer);
+    uint32_t load32  = ast->integer;
+    float    loadf32 = ast->floating.value;
+    double   loadf64 = ast->floating.value;
+
+    gen_emit("# literal save {");
+    switch (type->type) {
+        case TYPE_BOOL:  gen_emit("movb $%d, %d(%%rbp)", !!ast->integer, offset); break;
+        case TYPE_CHAR:  gen_emit("movb $%d, %d(%%rbp)", load32, offset); break;
+        case TYPE_SHORT: gen_emit("movw $%d, %d(%%rbp)", load32, offset); break;
+        case TYPE_INT:   gen_emit("movl $%d, %d(%%rbp)", load32, offset); break;
+        case TYPE_LONG:
+        case TYPE_LLONG:
+        case TYPE_POINTER:
+            gen_emit("movl $0x%" PRIx64 ", %d(%%rbp)", load64 & 0xFFFFFFFF, offset);
+            gen_emit("movl $0x%" PRIx64 ", %d(%%rbp)", load64 >> 32, offset + 4);
+            break;
+        case TYPE_FLOAT:
+            load32 = TYPEPUN(uint32_t, loadf32);
+            gen_emit("movl $0x%" PRIx32 ", %d(%%rbp)", load32, offset);
+            break;
+        case TYPE_DOUBLE:
+            load64 = TYPEPUN(uint64_t, loadf64);
+            gen_emit("movl $0x%" PRIx64 ", %d(%%rbp)", load64 & 0xFFFFFFFF, offset);
+            gen_emit("movl $0x%" PRIx64 ", %d(%%rbp)", load64 >> 32, offset + 4);
+            break;
+
+        default:
+            compile_ice("gen_literal_save");
+    }
+    gen_emit("# }");
+}
+
+void gen_prefix(ast_t *ast, const char *op) {
+    gen_expression(ast->unary.operand);
+    if (ast->ctype->type == TYPE_POINTER)
+        gen_emit("%s $%d, %%rax", op, ast->ctype->pointer->size);
+    else
+        gen_emit("%s $1, %%rax", op);
+    gen_store(ast->unary.operand);
+}
+
+void gen_postfix(ast_t *ast, const char *op) {
+    gen_expression(ast->unary.operand);
+    gen_push(SRAX);
+    if (ast->ctype->type == TYPE_POINTER)
+        gen_emit("%s $%d, %%rax", op, ast->ctype->pointer->size);
+    else
+        gen_emit("%s $1, %%rax", op);
+    gen_store(ast->unary.operand);
+    gen_pop(SRAX);
+}
+
+static void gen_register_area_calculate(list_t *args) {
+    gp = 0;
+    fp = 0;
+    for (list_iterator_t *it = list_iterator(args); !list_iterator_end(it); )
+        (*((ast_type_isfloating(((ast_t*)list_iterator_next(it))->ctype)) ? &fp : &gp)) ++;
+}
+
+void gen_je(const char *label) {
+    gen_emit("test %%rax, %%rax");
+    gen_emit("je %s", label);
+}
+
+void gen_cast(ast_t *ast) {
+    gen_expression(ast->unary.operand);
+    gen_load_convert(ast->ctype, ast->unary.operand->ctype);
+}
+
+void gen_literal(ast_t *ast) {
+    switch (ast->ctype->type) {
+        case TYPE_CHAR:
+        case TYPE_BOOL:
+            gen_emit("mov $%d, %%rax", ast->integer);
+            break;
+        case TYPE_INT:
+            gen_emit("mov $%d, %%rax", ast->integer);
+            break;
+        case TYPE_LONG:
+        case TYPE_LLONG:
+            gen_emit("mov $%" PRIi64 ", %%rax", (uint64_t)ast->integer);
+            break;
+
+        case TYPE_FLOAT:
+            if (!ast->floating.label) {
+                ast->floating.label = ast_label();
+                float  fval = ast->floating.value;
+                int   *iptr = (int*)&fval;
+                gen_emit_inline(".data");
+                gen_label(ast->floating.label);
+                gen_emit(".long %d", *iptr);
+                gen_emit_inline(".text");
+            }
+            gen_emit("movss %s(%%rip), %%xmm0", ast->floating.label);
+            break;
+
+        case TYPE_DOUBLE:
+        case TYPE_LDOUBLE:
+            if (!ast->floating.label) {
+                ast->floating.label = ast_label();
+                double dval = ast->floating.value;
+                int   *iptr = (int*)&dval;
+                gen_emit_inline(".data");
+                gen_label(ast->floating.label);
+                gen_emit(".long %d", iptr[0]);
+                gen_emit(".long %d", iptr[1]);
+                gen_emit_inline(".text");
+            }
+            gen_emit("movsd %s(%%rip), %%xmm0", ast->floating.label);
+            break;
+
+        default:
+            compile_ice("gen_expression (%s)", ast_type_string(ast->ctype));
+    }
+}
+
+void gen_literal_string(ast_t *ast) {
+    if (!ast->string.label) {
+        ast->string.label = ast_label();
+        gen_emit_inline(".data");
+        gen_label(ast->string.label);
+        gen_emit(".string \"%s\"", string_quote(ast->string.data));
+        gen_emit_inline(".text");
+    }
+    gen_emit("lea %s(%%rip), %%rax", ast->string.label);
+}
+
+void gen_variable_local(ast_t *ast) {
+    gen_ensure_lva(ast);
+    gen_load_local(ast->ctype, "rbp", ast->variable.off);
+}
+
+void gen_variable_global(ast_t *ast) {
+    gen_load_global(ast->ctype, ast->variable.label, 0);
+}
+
+void gen_dereference(ast_t *ast) {
+    gen_expression(ast->unary.operand);
+    gen_load_local(ast->unary.operand->ctype->pointer, SRAX, 0);
+    gen_load_convert(ast->ctype, ast->unary.operand->ctype->pointer);
+}
+
+static void gen_function_args_classify(list_t *i, list_t *f, list_t *r, list_t *a) {
+    int ir = 0;
+    int xr = 0;
+    int mi = REGISTER_MULT_SIZE;
+    int mx = REGISTER_MULT_SIZE_XMM;
+
+    list_iterator_t *it = list_iterator(a);
+    while (!list_iterator_end(it)) {
+        ast_t *value = list_iterator_next(it);
+        if (value->ctype->type == TYPE_STRUCTURE)
+            list_push(r, value);
+        else if (ast_type_isfloating(value->ctype))
+            list_push((xr++ < mx) ? f : r, value);
+        else
+            list_push((ir++ < mi) ? i : r, value);
+    }
+}
+
+static void gen_function_args_save(int in, int fl) {
+    gen_emit("# function args save {");
+    for (int i = 0; i < in; i++)      gen_push(NREG(i));
+    for (int i = 1; i < fl; i++)      gen_push_xmm(i);
+    gen_emit("# }");
+}
+static void gen_function_args_restore(int in, int fl) {
+    gen_emit("# function args restore {");
+    for (int i = fl - 1; i >  0; i--) gen_pop_xmm(i);
+    for (int i = in - 1; i >= 0; i--) gen_pop(NREG(i));
+    gen_emit("# }");
+}
+static void gen_function_args_popi(int l) {
+    gen_emit("# function args pop {");
+    for (int i = l - 1; i >= 0; i--)  gen_pop(NREG(i));
+    gen_emit("# }");
+}
+static void gen_function_args_popf(int l) {
+    gen_emit("# function args pop (xmm registers) {");
+    for (int i = l - 1; i >= 0; i--)  gen_pop_xmm(i);
+    gen_emit("# }");
+}
+
+static int gen_function_args(list_t *args) {
+    gen_emit("# functiona arguments { ");
+    int rest = 0;
+    list_iterator_t *it = list_iterator(args);
+    while (!list_iterator_end(it)) {
+        ast_t *value = list_iterator_next(it);
+        if (value->ctype->type == TYPE_STRUCTURE) {
+            gen_address(value);
+            rest += gen_structure_push(value->ctype->size);
+        } else if (ast_type_isfloating(value->ctype)) {
+            gen_expression(value);
+            gen_push_xmm(0);
+            rest += 8;
+        } else {
+            gen_expression(value);
+            gen_push(SRAX);
+            rest += 8;
+        }
+    }
+    gen_emit("# } ");
+    return rest;
+}
+
+static void gen_function_call_default(ast_t *ast) {
+    int          save = stack;
+    bool         fptr = (ast->type == AST_TYPE_POINTERCALL);
+    data_type_t *type = fptr ? ast->function.call.functionpointer->ctype->pointer
+                             : ast->function.call.type;
+
+    gen_emit("# function call {");
+
+    /* deal with arguments */
+    list_t *in = list_create();
+    list_t *fl = list_create();
+    list_t *re = list_create();
+
+    gen_function_args_classify(in, fl, re, ast->function.call.args);
+    gen_function_args_save(list_length(in), list_length(fl));
+
+    bool algn = stack % 16;
+    if (algn) {
+        gen_emit("sub $8, %%rsp");
+        stack += 8;
+    }
+
+    int rest = gen_function_args(list_reverse(re));
+
+    if (fptr) {
+        gen_expression(ast->function.call.functionpointer);
+        gen_push(SRAX);
+    }
+
+    gen_function_args(in);
+    gen_function_args(fl);
+    gen_function_args_popf(list_length(fl));
+    gen_function_args_popi(list_length(in));
+
+    if (fptr)
+        gen_pop(SR11);
+
+    if (type->hasdots)
+        gen_emit("mov $%d, %%eax", list_length(fl));
+
+    if (fptr)
+        gen_emit("call *%%r11");
+    else
+        gen_emit("call %s", ast->function.name);
+
+    gen_boolean_maybe(ast->ctype);
+
+    if (rest > 0) {
+        gen_emit("add $%d, %%rsp", rest);
+        stack -= rest;
+    }
+
+    if (algn) {
+        gen_emit("add $8, %%rsp");
+        stack -= 8;
+    }
+
+    gen_function_args_restore(list_length(in), list_length(fl));
+
+    gen_emit("# }");
+
+    if (stack != save)
+        compile_ice("gen_function_call (stack out of alignment)");
+}
+
+void gen_function_call(ast_t *ast) {
+    char *loopbeg;
+    char *loopend;
+
+    if (!ast->function.name || strcmp(ast->function.name, "__builtin_return_address")) {
+        gen_function_call_default(ast);
+        return;
+    }
+
+    /*
+     * deal with builtin return address extension. This should be
+     * as easy as emitting the expression for the return address
+     * argument and using some loops.
+     */
+    gen_push(SR11);
+    gen_expression(list_head(ast->function.call.args));
+    loopbeg = ast_label();
+    loopend = ast_label();
+    gen_emit("mov %%rbp, %%r11");
+    gen_label(loopbeg);
+    gen_emit("test %%rax, %%rax");
+    gen_emit("jz %s", loopend);
+    gen_emit("mov (%%r11), %%r11");
+    gen_emit("dec %%rax");
+    gen_jump(loopbeg);
+    gen_label(loopend);
+    gen_emit("mov 8(%%r11), %%rax");
+    gen_pop(SR11);
+}
+
+void gen_case(ast_t *ast) {
+    char *skip;
+    gen_jump((skip = ast_label()));
+    gen_label(gen_label_switch);
+    gen_label_switch = ast_label();
+    gen_emit("cmp $%d, %%eax", ast->casebeg);
+    if (ast->casebeg == ast->caseend)
+        gen_emit("jne %s", gen_label_switch);
+    else {
+        gen_emit("jl %s", gen_label_switch);
+        gen_emit("cmp $%d, %%eax", ast->caseend);
+        gen_emit("jg %s", gen_label_switch);
+    }
+    gen_label(skip);
+}
+
+void gen_va_start(ast_t *ast) {
+    gen_expression(ast->ap);
+    gen_push(SRCX);
+    gen_emit("movl $%d, (%%rax)", gp * 8);
+    gen_emit("movl $%d, 4(%%rax)", 48 + fp * 16);
+    gen_emit("lea %d(%%rbp), %%rcx", -REGISTER_AREA_SIZE);
+    gen_emit("mov %%rcx, 16(%%rax)");
+    gen_pop(SRCX);
+}
+
+void gen_va_arg(ast_t *ast) {
+    gen_expression(ast->ap);
+    gen_emit("nop");
+    gen_push(SRCX);
+    gen_push("rbx");
+    gen_emit("mov 16(%%rax), %%rcx");
+    if (ast_type_isfloating(ast->ctype)) {
+        gen_emit("mov 4(%%rax), %%ebx");
+        gen_emit("add %%rbx, %%rcx");
+        gen_emit("add $16, %%ebx");
+        gen_emit("mov %%ebx, 4(%%rax)");
+        gen_emit("movsd (%%rcx), %%xmm0");
+        if (ast->ctype->type == TYPE_FLOAT)
+            gen_emit("cvtpd2ps %%xmm0, %%xmm0");
+    } else {
+        gen_emit("mov (%%rax), %%ebx");
+        gen_emit("add %%rbx, %%rcx");
+        gen_emit("add $8, %%ebx");
+        gen_emit("mov %%rbx, (%%rax)");
+        gen_emit("mov (%%rcx), %%rax");
+    }
+    gen_pop(SRBX);
+    gen_pop(SRCX);
+}
+
+void gen_not(ast_t *ast) {
+    gen_expression(ast->unary.operand);
+    gen_emit("cmp $0, %%rax");
+    gen_emit("sete %%al");
+    gen_emit("movzb %%al, %%eax");
+}
+
+void gen_and(ast_t *ast) {
+    char *end = ast_label();
+    gen_expression(ast->left);
+    gen_emit("test %%rax, %%rax");
+    gen_emit("mov $0, %%rax");
+    gen_emit("je %s", end);
+    gen_expression(ast->right);
+    gen_emit("test %%rax, %%rax");
+    gen_emit("mov $0, %%rax");
+    gen_emit("je %s", end);
+    gen_emit("mov $1, %%rax");
+    gen_label(end);
+}
+
+void gen_or(ast_t *ast) {
+    char *end = ast_label();
+    gen_expression(ast->left);
+    gen_emit("test %%rax, %%rax");
+    gen_emit("mov $1, %%rax");
+    gen_emit("jne %s", end);
+    gen_expression(ast->right);
+    gen_emit("test %%rax, %%rax");
+    gen_emit("mov $1, %%rax");
+    gen_emit("jne %s", end);
+    gen_emit("mov $0, %%rax");
+    gen_label(end);
+}
+
+void gen_struct(ast_t *ast) {
+    gen_load_structure(ast->structure, ast->ctype, 0);
+}
+
+void gen_bitandor(ast_t *ast) {
+    static const char *instruction[] = { "and", "or" };
+    gen_expression(ast->left);
+    gen_push(SRAX);
+    gen_expression(ast->right);
+    gen_pop(SRCX);
+    gen_emit("%s %%rcx, %%rax", instruction[!!(ast->type == '|')]);
+}
+
+void gen_bitnot(ast_t *ast) {
+    gen_expression(ast->left);
+    gen_emit("not %%rax");
+}
+
+void gen_negate(ast_t *ast) {
+    gen_expression(ast->unary.operand);
+    if (ast_type_isfloating(ast->ctype)) {
+        gen_push_xmm(1);
+        gen_emit("xorpd %%xmm1, %%xmm1");
+        if (ast->ctype->type == TYPE_DOUBLE)
+            gen_emit("subsd %%xmm1, %%xmm0");
+        else
+            gen_emit("subss %%xmm1, %%xmm0");
+        gen_pop_xmm(1);
+        return;
+    }
+    gen_emit("neg %%rax");
+}
+
+void gen_assign(ast_t *ast) {
+    if (ast->left->ctype->type == TYPE_STRUCTURE) {
+        if (ast->left->ctype->size > 8) {
+            gen_structure_assign(ast->left, ast->right);
+            return;
+        }
+    }
+    gen_expression(ast->right);
+    gen_load_convert(ast->ctype, ast->right->ctype);
+    gen_store(ast->left);
+}
+
+int parse_evaluate(ast_t *ast);
+static void gen_data_zero(int size) {
+    for (; size >= 8; size -= 8) gen_emit(".quad 0");
+    for (; size >= 4; size -= 4) gen_emit(".long 0");
+    for (; size >  0; size --)   gen_emit(".byte 0");
+}
+
+static void gen_data_padding(ast_t *ast, int offset) {
+    int d = ast->init.offset - offset;
+    if (d < 0)
+        compile_ice("gen_data_padding");
+    gen_data_zero(d);
+}
+
+static void gen_data_intermediate(list_t *inits, int size, int offset, int depth) {
+    uint64_t load64;
+    uint32_t load32;
+
+    list_iterator_t *it = list_iterator(inits);
+    while (!list_iterator_end(it) && 0 < size) {
+        ast_t *node = list_iterator_next(it);
+        ast_t *v    = node->init.value;
+
+        gen_data_padding(node, offset);
+        offset += node->init.type->size;
+        size   -= node->init.type->size;
+
+        if (v->type == AST_TYPE_ADDRESS) {
+            char *label;
+            switch (v->unary.operand->type) {
+                case AST_TYPE_VAR_LOCAL:
+                    label  = ast_label();
+                    gen_emit(".data %d", depth + 1);
+                    gen_label(label);
+                    gen_data_intermediate(v->unary.operand->variable.init, v->unary.operand->ctype->size, 0, depth + 1);
+                    gen_emit(".data %d", depth);
+                    gen_emit(".quad %s", label);
+                    continue;
+
+                case AST_TYPE_VAR_GLOBAL:
+                    gen_emit(".quad %s", v->unary.operand->variable.name);
+                    continue;
+
+                default:
+                    compile_ice("gen_datat_intermediate");
+            }
+        }
+
+        if (node->init.value->type == AST_TYPE_VAR_LOCAL && node->init.value->variable.init) {
+            gen_data_intermediate(v->variable.init, v->ctype->size, 0, depth);
+            continue;
+        }
+
+        if (v->ctype->type == TYPE_ARRAY && v->ctype->pointer->type == TYPE_CHAR) {
+            char *label = ast_label();
+            gen_emit(".data %d", depth + 1);
+            gen_label(label);
+            gen_emit(".string \"%s\"", string_quote(v->string.data));
+            gen_emit(".data %d", depth);
+            gen_emit(".quad %s", label);
+            continue;
+        }
+
+
+        /* load alias */
+        load32 = TYPEPUN(uint32_t, node->init.value->floating.value);
+        load64 = TYPEPUN(uint64_t, node->init.value->floating.value);
+
+        switch (node->init.type->type) {
+            case TYPE_FLOAT:   gen_emit(".long 0x%"  PRIx32, load32); break;
+            case TYPE_DOUBLE:  gen_emit(".quad 0x%"  PRIx64, load64); break;
+            case TYPE_CHAR:    gen_emit(".byte %d",  parse_evaluate(node->init.value)); break;
+            case TYPE_SHORT:   gen_emit(".short %d", parse_evaluate(node->init.value)); break;
+            case TYPE_INT:     gen_emit(".long %d",  parse_evaluate(node->init.value)); break;
+
+            case TYPE_LONG:
+            case TYPE_LLONG:
+            case TYPE_POINTER:
+                if (node->init.value->type == AST_TYPE_VAR_GLOBAL)
+                    gen_emit(".quad %s", node->init.value->variable.name);
+                else
+                    gen_emit(".quad %ld", parse_evaluate(node->init.value));
+                break;
+
+            default:
+                compile_ice("gen_data_intermediate (%s)", ast_type_string(node->init.type));
+        }
+    }
+    gen_data_zero(size);
+}
+
+void gen_data(ast_t *ast, int offset, int depth) {
+    gen_emit(".data %d", depth);
+    if (!ast->decl.var->ctype->isstatic)
+        gen_emit_inline(".global %s", ast->decl.var->variable.name);
+    gen_emit_inline("%s:", ast->decl.var->variable.name);
+    gen_data_intermediate(ast->decl.init, ast->decl.var->ctype->size, offset, depth);
+}
+
+static int gen_register_area(void) {
+    int top = -REGISTER_AREA_SIZE;
+    gen_emit("mov %%rdi, %d(%%rsp)", top);
+    gen_emit("mov %%rsi, %d(%%rsp)", (top += 8));
+    gen_emit("mov %%rdx, %d(%%rsp)", (top += 8));
+    gen_emit("mov %%rcx, %d(%%rsp)", (top += 8));
+    gen_emit("mov %%r8,  %d(%%rsp)", (top += 8));
+    gen_emit("mov %%r9,  %d(%%rsp)", top + 8);
+
+    char *end = ast_label();
+    for (int i = 0; i < 16; i++) {
+        gen_emit("test %%al, %%al");
+        gen_emit("jz %s", end);
+        gen_emit("movsd %%xmm%d, %d(%%rsp)", i, (top += 16));
+        gen_emit("sub $1, %%al");
+    }
+    gen_label(end);
+    gen_emit("sub $%d, %%rsp", REGISTER_AREA_SIZE);
+    return REGISTER_AREA_SIZE;
+}
+
+static void gen_function_parameters(list_t *parameters, int offset) {
+    gen_emit("# function parameters { ");
+    int ir = 0;
+    int xr = 0;
+    int ar = REGISTER_MULT_SIZE_XMM - REGISTER_MULT_SIZE;
+
+    for (list_iterator_t *it = list_iterator(parameters); !list_iterator_end(it); ) {
+        ast_t *value = list_iterator_next(it);
+        if (value->ctype->type == TYPE_STRUCTURE) {
+            gen_emit("lea %d(%%rbp), %%rax", ar * 8);
+            int emit = gen_structure_push(value->ctype->size);
+            offset -= emit;
+            ar += emit / 8;
+        } else if (ast_type_isfloating(value->ctype)) {
+            if (xr >= REGISTER_MULT_SIZE_XMM) {
+                gen_emit("mov %d(%%rbp), %%rax", ar++ * 8);
+                gen_push(SRAX);
+            } else {
+                gen_push_xmm(xr++);
+            }
+            offset -= 8;
+        } else {
+            if (ir >= REGISTER_MULT_SIZE) {
+                if (value->ctype->type == TYPE_BOOL) {
+                    gen_emit("mov %d(%%rbp), %%al", ar++ * 8);
+                    gen_emit("movzb %%al, %%eax");
+                } else {
+                    gen_emit("mov %d(%%rbp), %%al", ar++ * 8);
+                }
+                gen_push(SRAX);
+            } else {
+                if (value->ctype->type == TYPE_BOOL)
+                    gen_emit("movsb %%%s, %%%s", SREG(ir), MREG(ir));
+                gen_push(NREG(ir++));
+            }
+            offset -= 8;
+        }
+        value->variable.off = offset;
+    }
+    gen_emit("# }");
+}
+
+void gen_function_prologue(ast_t *ast) {
+    gen_emit("# function prologue {");
+    gen_emit_inline(".text");
+    if (!ast->ctype->isstatic)
+        gen_emit_inline(".global %s", ast->function.name);
+    gen_emit_inline("%s:", ast->function.name);
+    gen_emit("nop");
+    gen_push("rbp");
+    gen_emit("mov %%rsp, %%rbp");
+
+    int offset = 0;
+
+    if (ast->ctype->hasdots) {
+        gen_register_area_calculate(ast->function.params);
+        offset -= gen_register_area();
+    }
+
+    gen_function_parameters(ast->function.params, offset);
+    offset -= list_length(ast->function.params) * 8;
+
+    int localdata = 0;
+    for (list_iterator_t *it = list_iterator(ast->function.locals); !list_iterator_end(it); ) {
+        ast_t *value = list_iterator_next(it);
+        int    align = gen_alignment(value->ctype->size, 8);
+
+        offset -= align;
+        value->variable.off = offset;
+        localdata += align;
+    }
+
+    if (localdata) {
+        gen_emit("sub $%d, %%rsp", localdata);
+        stack += localdata;
+    }
+    gen_emit("# }");
+}
+
+void gen_function_epilogue(void) {
+    if (stack != 0)
+        gen_emit("# stack misalignment: %d\n", stack);
+    gen_return();
+}
+
+void gen_return(void) {
+    gen_emit("leave");
+    gen_emit("ret");
+}
+
+void gen_function(ast_t *ast) {
+    (void)ast;
+    stack = 8;
+}
diff --git a/init.c b/init.c
new file mode 100644 (file)
index 0000000..471e43f
--- /dev/null
+++ b/init.c
@@ -0,0 +1,302 @@
+/*
+ * This file implements a small state machine for handling all the forms
+ * of initialization C offers. It's called from the parser just like
+ * declaration specification. It's a sub portion of the core parser,
+ * seperated from all that logic due to the nature of initializer
+ * complexity.
+ */
+#include "parse.h"
+#include "init.h"
+#include "lice.h"
+#include "lexer.h"
+#include "conv.h"
+
+#include <string.h>
+#include <stdlib.h>
+
+static void init_element   (list_t *, data_type_t *, int, bool);
+static void init_structure (list_t *, data_type_t *, int, bool);
+static void init_array     (list_t *, data_type_t *, int, bool);
+static void init_list      (list_t *, data_type_t *, int, bool);
+
+/*
+ * Initializer elements need to be sorted by semantic order, instead
+ * of lexical order since designated initializers are allowed to
+ * overwrite existingly assigned fields lexically, but the order needs
+ * to stay dependent on semantics. It's also generally more efficent for
+ * initialization to stay sorted.
+ */
+static int init_sort_predicate(const void *p, const void *q) {
+    const ast_t *const *restrict a = p;
+    const ast_t *const *restrict b = q;
+
+    return (*a)->init.offset <  (*b)->init.offset ? -1 :
+           (*a)->init.offset == (*b)->init.offset ?  0 : 1;
+}
+
+static void init_sort(list_t *inits) {
+    size_t           length = list_length(inits);
+    size_t           index  = 0;
+    ast_t          **temp   = memory_allocate(sizeof(ast_t *) * length);
+    list_iterator_t *it     = list_iterator(inits);
+
+    while (!list_iterator_end(it))
+        temp[index++] = list_iterator_next(it);
+
+    qsort(temp, length, sizeof(ast_t *), &init_sort_predicate);
+
+    list_empty(inits);
+    for (index = 0; index < length; index++)
+        list_push(inits, temp[index]);
+}
+
+static bool init_earlyout(lexer_token_t *token, bool brace, bool designated) {
+    if ((lexer_ispunct(token, '.') || lexer_ispunct(token, '[')) && !brace && !designated) {
+        lexer_unget(token);
+        return true;
+    }
+    return false;
+}
+
+/*
+ * Utility routines to determine and skip to braces for initialization
+ * involving aggregates.
+ */
+static bool init_skip_brace_maybe(void) {
+    lexer_token_t *token = lexer_next();
+    if (lexer_ispunct(token, '{'))
+        return true;
+    lexer_unget(token);
+    return false;
+}
+
+static void init_skip_comma_maybe(void) {
+    lexer_token_t *token = lexer_next();
+
+    if (!lexer_ispunct(token, ','))
+        lexer_unget(token);
+}
+
+static void init_skip_brace(void) {
+    for (;;) {
+        /*
+         * Potentially infinite look a head, got to love C's grammar for
+         * this sort of crap.
+         */
+        lexer_token_t *token = lexer_next();
+        if (lexer_ispunct(token, '}'))
+            return;
+
+        if (lexer_ispunct(token, '.')) {
+            lexer_next();
+            parse_expect('=');
+        } else {
+            lexer_unget(token);
+        }
+
+        ast_t *ignore = parse_expression_assignment();
+        if (!ignore)
+            return;
+
+        compile_warn("excess elements in initializer");
+        init_skip_comma_maybe();
+    }
+}
+
+/*
+ * Structure and array initialization routines:
+ *  deals with standard initialization via aggregate initializer, as well
+ *  as designated initialization, and nested aggreate + designation. In
+ *  the case of array designated initialization array subscripting is
+ *  handled, where as in the case of structure designated initialization
+ *  field members are indexed by .fieldname. The GCC style of designated
+ *  initializers isn't supported yet, neither is range initialization.
+ */
+static void init_structure_intermediate(list_t *init, data_type_t *type, int offset, bool designated) {
+    bool             brace = init_skip_brace_maybe();
+    list_iterator_t *it    = list_iterator(table_keys(type->fields));
+
+    for (;;) {
+        lexer_token_t *token = lexer_next();
+        if (lexer_ispunct(token, '}')) {
+            if (!brace)
+                lexer_unget(token);
+            return;
+        }
+
+        char        *fieldname;
+        data_type_t *fieldtype;
+
+        if (init_earlyout(token, brace, designated))
+            return;
+
+        if (lexer_ispunct(token, '.')) {
+            if (!(token = lexer_next()) || token->type != LEXER_TOKEN_IDENTIFIER)
+                compile_error("invalid designated initializer");
+            fieldname = token->string;
+            if (!(fieldtype = table_find(type->fields, fieldname)))
+                compile_error("field `%s' doesn't exist in designated initializer", fieldname);
+
+            it = list_iterator(table_keys(type->fields));
+            while (!list_iterator_end(it))
+                if (!strcmp(fieldname, list_iterator_next(it)))
+                    break;
+            designated = true;
+        } else {
+            lexer_unget(token);
+            if (list_iterator_end(it))
+                break;
+
+            fieldname = list_iterator_next(it);
+            fieldtype = table_find(type->fields, fieldname);
+        }
+        init_element(init, fieldtype, offset + fieldtype->offset, designated);
+        init_skip_comma_maybe();
+        designated = false;
+
+        if (!type->isstruct)
+            break;
+    }
+    if (brace)
+        init_skip_brace();
+}
+
+static void init_array_intermediate(list_t *init, data_type_t *type, int offset, bool designated) {
+    bool brace    = init_skip_brace_maybe();
+    bool flexible = (type->length <= 0);
+    int  size     = type->pointer->size;
+    int  i;
+
+    for (i = 0; flexible || i < type->length; i++) {
+        lexer_token_t *token = lexer_next();
+        if (lexer_ispunct(token, '}')) {
+            if (!brace)
+                lexer_unget(token);
+            goto complete;
+        }
+
+        if (init_earlyout(token, brace, designated))
+            return;
+
+        if (lexer_ispunct(token, '[')) {
+            /* designated array initializer */
+            int index = parse_expression_evaluate();
+            if (index < 0 || (!flexible && type->length <= index))
+                compile_error("out of bounds");
+            i = index;
+            parse_expect(']');
+            designated = true;
+        } else {
+            lexer_unget(token);
+        }
+        init_element(init, type->pointer, offset + size * i, designated);
+        init_skip_comma_maybe();
+        designated = false;
+    }
+    if (brace)
+        init_skip_brace();
+
+complete:
+    if (type->length < 0) {
+        type->length = i;
+        type->size   = size * i;
+    }
+}
+
+/*
+ * Intermediate stages deal with all the logic, these functions are
+ * just tail calls (hopefully optimized) to the intermediate stages followed
+ * by a sorting of the elements to honor semantic ordering of initialization.
+ */
+static void init_structure(list_t *init, data_type_t *type, int offset, bool designated) {
+    init_structure_intermediate(init, type, offset, designated);
+    init_sort(init);
+}
+
+static void init_array(list_t *init, data_type_t *type, int offset, bool designated) {
+    init_array_intermediate(init, type, offset, designated);
+    init_sort(init);
+}
+
+/*
+ * The entry points to the initializers, single element initialization
+ * and initializer list initialization will dispatch into the appropriate
+ * initialization parsing routines as defined above.
+ */
+static void init_element(list_t *init, data_type_t *type, int offset, bool designated) {
+    parse_next('=');
+    if (type->type == TYPE_ARRAY || type->type == TYPE_STRUCTURE)
+        init_list(init, type, offset, designated);
+    else if (parse_next('{')) {
+        init_element(init, type, offset, designated);
+        parse_expect('}');
+    } else {
+        ast_t *expression = parse_expression_assignment();
+        parse_semantic_assignable(type, expression->ctype);
+        list_push(init, ast_initializer(expression, type, offset));
+    }
+}
+
+static void init_string(list_t *init, data_type_t *type, char *p, int offset) {
+    if (type->length == -1)
+        type->length = type->size = strlen(p) + 1;
+
+    int i = 0;
+    for (; i < type->length && *p; i++) {
+        list_push(init, ast_initializer(
+            ast_new_integer(ast_data_table[AST_DATA_CHAR], *p++),
+            ast_data_table[AST_DATA_CHAR], offset + i
+        ));
+    }
+    for (; i < type->length; i++) {
+        list_push(init, ast_initializer(
+            ast_new_integer(ast_data_table[AST_DATA_CHAR], 0),
+            ast_data_table[AST_DATA_CHAR], offset + i
+        ));
+    }
+}
+
+static void init_list(list_t *init, data_type_t *type, int offset, bool designated) {
+    lexer_token_t *token = lexer_next();
+    if (ast_type_isstring(type)) {
+        if (token->type == LEXER_TOKEN_STRING) {
+            init_string(init, type, token->string, offset);
+            return;
+        }
+
+        if (lexer_ispunct(token, '{') && lexer_peek()->type == LEXER_TOKEN_STRING) {
+            token = lexer_next();
+            init_string(init, type, token->string, offset);
+            parse_expect('}');
+            return;
+        }
+    }
+    lexer_unget(token);
+
+    if (type->type == TYPE_ARRAY)
+        init_array(init, type, offset, designated);
+    else if (type->type == TYPE_STRUCTURE)
+        init_structure(init, type, offset, designated);
+    else
+        init_array(init, ast_array(type, 1), offset, designated);
+}
+
+/*
+ * Actual entry point of the parser, parses an initializer list, while
+ * also dispatching into the appropriate parser routines depending on
+ * certain state like, array/structure, designated or not.
+ */
+list_t *init_entry(data_type_t *type) {
+    list_t *list = list_create();
+    if (lexer_ispunct(lexer_peek(), '{') || ast_type_isstring(type)) {
+        init_list(list, type, 0, false);
+        return list;
+    }
+
+    ast_t *init = parse_expression_assignment();
+    if (conv_capable(init->ctype) && init->ctype->type != type->type)
+        init = ast_type_convert(type, init);
+    list_push(list, ast_initializer(init, type, 0));
+
+    return list;
+}
diff --git a/lexer.c b/lexer.c
new file mode 100644 (file)
index 0000000..376cb66
--- /dev/null
+++ b/lexer.c
@@ -0,0 +1,505 @@
+#include <stdlib.h>
+#include <ctype.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "lexer.h"
+#include "util.h"
+#include "lice.h"
+#include "opt.h"
+
+static list_t *lexer_buffer = &SENTINEL_LIST;
+
+typedef struct {
+    char   *file;
+    size_t  line;
+    FILE   *fp;
+} lexer_file_t;
+
+static int          lexer_continuation = -1;
+static lexer_file_t lexer_file;
+
+__attribute__((constructor)) void lexer_init(void) {
+    lexer_file.file = "(stdin)";
+    lexer_file.line = 1;
+    lexer_file.fp   = stdin;
+}
+
+static void lexer_file_unget(int ch) {
+    if (ch == '\n')
+        lexer_file.line --;
+    if (lexer_continuation >= 0)
+        ungetc(lexer_continuation, lexer_file.fp);
+    lexer_continuation = ch;
+}
+
+static int lexer_file_get(void) {
+    int ch = (lexer_continuation < 0) ? getc(lexer_file.fp) : lexer_continuation;
+    lexer_continuation = -1;
+    if (ch == '\\') {
+        if ((ch = getc(lexer_file.fp)) == '\n') {
+            lexer_file.line ++;
+            return lexer_file_get();
+        }
+        lexer_file_unget(ch);
+        return '\\';
+
+    }
+    if (ch == '\n')
+        lexer_file.line ++;
+
+    return ch;
+}
+
+static lexer_token_t *lexer_token_copy(lexer_token_t *token) {
+    return memcpy(malloc(sizeof(lexer_token_t)), token, sizeof(lexer_token_t));
+}
+
+static lexer_token_t *lexer_identifier(string_t *str) {
+    return lexer_token_copy(&(lexer_token_t){
+        .type      = LEXER_TOKEN_IDENTIFIER,
+        .string    = string_buffer(str)
+    });
+}
+static lexer_token_t *lexer_strtok(string_t *str) {
+    return lexer_token_copy(&(lexer_token_t){
+        .type      = LEXER_TOKEN_STRING,
+        .string    = string_buffer(str)
+    });
+}
+static lexer_token_t *lexer_punct(int punct) {
+    return lexer_token_copy(&(lexer_token_t){
+        .type      = LEXER_TOKEN_PUNCT,
+        .punct     = punct
+    });
+}
+static lexer_token_t *lexer_number(char *string) {
+    return lexer_token_copy(&(lexer_token_t){
+        .type      = LEXER_TOKEN_NUMBER,
+        .string    = string
+    });
+}
+static lexer_token_t *lexer_char(char value) {
+    return lexer_token_copy(&(lexer_token_t){
+        .type      = LEXER_TOKEN_CHAR,
+        .character = value
+    });
+}
+
+static void lexer_skip_comment_line(void) {
+    for (;;) {
+        int c = lexer_file_get();
+        if (c == EOF)
+            return;
+        if (c == '\n') {
+            lexer_file_unget(c);
+            return;
+        }
+    }
+}
+
+static void lexer_skip_comment_block(void) {
+    enum {
+        comment_outside,
+        comment_astrick
+    } state = comment_outside;
+
+    for (;;) {
+        int c = lexer_file_get();
+        if (c == '*')
+            state = comment_astrick;
+        else if (state == comment_astrick && c == '/')
+            return;
+        else
+            state = comment_outside;
+    }
+}
+
+static int lexer_skip(void) {
+    int c;
+    while ((c = lexer_file_get()) != EOF) {
+        if (isspace(c) || c == '\n' || c == '\r')
+            continue;
+        lexer_file_unget(c);
+        return c;
+    }
+    return EOF;
+}
+
+static lexer_token_t *lexer_read_number(int c) {
+    string_t *string = string_create();
+    string_cat(string, c);
+    for (;;) {
+        int p = lexer_file_get();
+        if (!isdigit(p) && !isalpha(p) && p != '.') {
+            lexer_file_unget(p);
+            return lexer_number(string_buffer(string));
+        }
+        string_cat(string, p);
+    }
+    return NULL;
+}
+
+static bool lexer_read_character_octal_brace(int c, int *r) {
+    if ('0' <= c && c <= '7') {
+        *r = (*r << 3) | (c - '0');
+        return true;
+    }
+    return false;
+}
+
+static int lexer_read_character_octal(int c) {
+    int r = c - '0';
+    if (lexer_read_character_octal_brace((c = lexer_file_get()), &r)) {
+        if (!lexer_read_character_octal_brace((c = lexer_file_get()), &r))
+            lexer_file_unget(c);
+    } else
+        lexer_file_unget(c);
+    return r;
+}
+
+static bool lexer_read_character_universal_test(unsigned int c) {
+    if (0x800 <= c && c<= 0xDFFF)
+        return false;
+    return 0xA0 <= c || c == '$' || c == '@' || c == '`';
+}
+
+static int lexer_read_character_universal(int length) {
+    unsigned int r = 0;
+    for (int i = 0; i < length; i++) {
+        int c = lexer_file_get();
+        switch (c) {
+            case '0' ... '9': r = (r << 4) | (c - '0');      continue;
+            case 'a' ... 'f': r = (r << 4) | (c - 'a' + 10); continue;
+            case 'A' ... 'F': r = (r << 4) | (c - 'A' + 10); continue;
+            default:
+                compile_error("not a valid universal character: %c", c);
+
+        }
+    }
+    if (!lexer_read_character_universal_test(r)) {
+        compile_error(
+            "not a valid universal character: \\%c%0*x",
+            (length == 4) ? 'u' : 'U',
+            length,
+            r
+        );
+    }
+    return r;
+}
+
+static int lexer_read_character_hexadecimal(void) {
+    int c = lexer_file_get();
+    int r = 0;
+
+    if (!isxdigit(c))
+        compile_error("malformatted hexadecimal character");
+
+    for (;; c = lexer_file_get()) {
+        switch (c) {
+            case '0' ... '9': r = (r << 4) | (c - '0');      continue;
+            case 'a' ... 'f': r = (r << 4) | (c - 'a' + 10); continue;
+            case 'A' ... 'F': r = (r << 4) | (c - 'A' + 10); continue;
+
+            default:
+                lexer_file_unget(c);
+                return r;
+        }
+    }
+    return -1;
+}
+
+static int lexer_read_character_escaped(void) {
+    int c = lexer_file_get();
+
+    switch (c) {
+        case '\'':        return '\'';
+        case '"':         return '"';
+        case '?':         return '?';
+        case '\\':        return '\\';
+        case 'a':         return '\a';
+        case 'b':         return '\b';
+        case 'f':         return '\f';
+        case 'n':         return '\n';
+        case 'r':         return '\r';
+        case 't':         return '\t';
+        case 'v':         return '\v';
+        case 'e':         return '\033';
+        case '0' ... '7': return lexer_read_character_octal(c);
+        case 'x':         return lexer_read_character_hexadecimal();
+        case 'u':         return lexer_read_character_universal(4);
+        case 'U':         return lexer_read_character_universal(8);
+        case EOF:
+            compile_error("malformatted escape sequence");
+
+        default:
+            return c;
+    }
+}
+
+static lexer_token_t *lexer_read_character(void) {
+    int c = lexer_file_get();
+    int r = (c == '\\') ? lexer_read_character_escaped() : c;
+
+    if (lexer_file_get() != '\'')
+        compile_error("unterminated character");
+
+    return lexer_char((char)r);
+}
+
+static lexer_token_t *lexer_read_string(void) {
+    string_t *string = string_create();
+    for (;;) {
+        int c = lexer_file_get();
+        if (c == EOF)
+            compile_error("Expected termination for string literal");
+
+        if (c == '"')
+            break;
+        if (c == '\\')
+            c = lexer_read_character_escaped();
+        string_cat(string, c);
+    }
+    return lexer_strtok(string);
+}
+
+static lexer_token_t *lexer_read_identifier(int c1) {
+    string_t *string = string_create();
+    string_cat(string, (char)c1);
+
+    for (;;) {
+        int c2 = lexer_file_get();
+        if (isalnum(c2) || c2 == '_' || c2 == '$') {
+            string_cat(string, c2);
+        } else {
+            lexer_file_unget(c2);
+            return lexer_identifier(string);
+        }
+    }
+    return NULL;
+}
+
+static lexer_token_t *lexer_read_reclassify_one(int expect1, int a, int e) {
+    int c = lexer_file_get();
+    if (c == expect1)
+        return lexer_punct(a);
+    lexer_file_unget(c);
+    return lexer_punct(e);
+}
+static lexer_token_t *lexer_read_reclassify_two(int expect1, int a, int expect2, int b, int e) {
+    int c = lexer_file_get();
+    if (c == expect1)
+        return lexer_punct(a);
+    if (c == expect2)
+        return lexer_punct(b);
+    lexer_file_unget(c);
+    return lexer_punct(e);
+}
+
+static lexer_token_t *lexer_read_token(void);
+
+static lexer_token_t *lexer_minicpp(void) {
+    string_t *string = string_create();
+    string_t *method = string_create();
+    char     *buffer;
+    int       ch;
+
+    for (const char *p = "pragma"; *p; p++) {
+        if ((ch = lexer_file_get()) != *p) {
+            string_cat(string, ch);
+            goto error;
+        }
+    }
+
+    for (ch = lexer_file_get(); ch && ch != '\n'; ch = lexer_file_get()) {
+        if (isspace(ch))
+            continue;
+        string_cat(method, ch);
+    }
+
+    buffer = string_buffer(method);
+
+    if (!strcmp(buffer, "warning_disable"))
+        compile_warning = false;
+    if (!strcmp(buffer, "warning_enable"))
+        compile_warning = true;
+
+    goto fall;
+
+error:
+    buffer = string_buffer(string);
+    for (char *beg = &buffer[string_length(string)]; beg != &buffer[-1]; --beg)
+        lexer_file_unget(*beg);
+
+fall:
+    lexer_skip_comment_line();
+    return lexer_read_token();
+}
+
+static lexer_token_t *lexer_read_token(void) {
+    int c;
+    int n;
+
+    lexer_skip();
+
+    switch ((c = lexer_file_get())) {
+        case '0' ... '9':  return lexer_read_number(c);
+        case '"':          return lexer_read_string();
+        case '\'':         return lexer_read_character();
+        case 'a' ... 'z':
+        case 'A' ... 'K':
+        case 'M' ... 'Z':
+        case '_':
+            return lexer_read_identifier(c);
+        case '$':
+            if (opt_extension_test(EXTENSION_DOLLAR))
+                return lexer_read_identifier(c);
+            break;
+
+        case 'L':
+            switch ((c = lexer_file_get())) {
+                case '"':  return lexer_read_string();
+                case '\'': return lexer_read_character();
+            }
+            lexer_file_unget(c);
+            return lexer_read_identifier('L');
+
+        case '/':
+            switch ((c = lexer_file_get())) {
+                case '/':
+                    lexer_skip_comment_line();
+                    return lexer_read_token();
+                case '*':
+                    lexer_skip_comment_block();
+                    return lexer_read_token();
+            }
+            if (c == '=')
+                return lexer_punct(LEXER_TOKEN_COMPOUND_DIV);
+            lexer_file_unget(c);
+            return lexer_punct('/');
+
+        // ignore preprocessor lines for now
+        case '#':
+            return lexer_minicpp();
+
+        case '(': case ')':
+        case ',': case ';':
+        case '[': case ']':
+        case '{': case '}':
+        case '?': case ':':
+        case '~':
+            return lexer_punct(c);
+
+        case '+': return lexer_read_reclassify_two('+', LEXER_TOKEN_INCREMENT,    '=', LEXER_TOKEN_COMPOUND_ADD, '+');
+        case '&': return lexer_read_reclassify_two('&', LEXER_TOKEN_AND,          '=', LEXER_TOKEN_COMPOUND_AND, '&');
+        case '|': return lexer_read_reclassify_two('|', LEXER_TOKEN_OR,           '=', LEXER_TOKEN_COMPOUND_OR,  '|');
+        case '*': return lexer_read_reclassify_one('=', LEXER_TOKEN_COMPOUND_MUL, '*');
+        case '%': return lexer_read_reclassify_one('=', LEXER_TOKEN_COMPOUND_MOD, '%');
+        case '=': return lexer_read_reclassify_one('=', LEXER_TOKEN_EQUAL,        '=');
+        case '!': return lexer_read_reclassify_one('=', LEXER_TOKEN_NEQUAL,       '!');
+        case '^': return lexer_read_reclassify_one('=', LEXER_TOKEN_COMPOUND_XOR, '^');
+
+        case '-':
+            switch ((c = lexer_file_get())) {
+                case '-': return lexer_punct(LEXER_TOKEN_DECREMENT);
+                case '>': return lexer_punct(LEXER_TOKEN_ARROW);
+                case '=': return lexer_punct(LEXER_TOKEN_COMPOUND_SUB);
+                default:
+                    break;
+            }
+            lexer_file_unget(c);
+            return lexer_punct('-');
+
+        case '<':
+            if ((c = lexer_file_get()) == '=')
+                return lexer_punct(LEXER_TOKEN_LEQUAL);
+            if (c == '<')
+                return lexer_read_reclassify_one('=', LEXER_TOKEN_COMPOUND_LSHIFT, LEXER_TOKEN_LSHIFT);
+            lexer_file_unget(c);
+            return lexer_punct('<');
+        case '>':
+            if ((c = lexer_file_get()) == '=')
+                return lexer_punct(LEXER_TOKEN_GEQUAL);
+            if (c == '>')
+                return lexer_read_reclassify_one('=', LEXER_TOKEN_COMPOUND_RSHIFT, LEXER_TOKEN_RSHIFT);
+            lexer_file_unget(c);
+            return lexer_punct('>');
+
+        case '.':
+            n = lexer_file_get();
+            if (isdigit(n)) {
+                lexer_file_unget(n);
+                return lexer_read_number(c);
+            }
+            if (n == '.') {
+                string_t *str = string_create();
+                string_catf(str, "..%c", lexer_file_get());
+                return lexer_identifier(str);
+            }
+            lexer_file_unget(n);
+            return lexer_punct('.');
+
+        case EOF:
+            return NULL;
+
+        default:
+            compile_error("Unexpected character: `%c`", c);
+    }
+    return NULL;
+}
+
+bool lexer_ispunct(lexer_token_t *token, int c) {
+    return token && (token->type == LEXER_TOKEN_PUNCT) && (token->punct == c);
+}
+
+void lexer_unget(lexer_token_t *token) {
+    if (!token)
+        return;
+    list_push(lexer_buffer, token);
+}
+
+lexer_token_t *lexer_next(void) {
+    if (list_length(lexer_buffer) > 0)
+        return list_pop(lexer_buffer);
+    return lexer_read_token();
+}
+
+lexer_token_t *lexer_peek(void) {
+    lexer_token_t *token = lexer_next();
+    lexer_unget(token);
+    return token;
+}
+
+char *lexer_token_string(lexer_token_t *token) {
+    string_t *string = string_create();
+    if (!token)
+        return "(null)";
+    switch (token->type) {
+        case LEXER_TOKEN_PUNCT:
+            if (token->punct == LEXER_TOKEN_EQUAL) {
+                string_catf(string, "==");
+                return string_buffer(string);
+            }
+        case LEXER_TOKEN_CHAR:
+            string_cat(string, token->character);
+            return string_buffer(string);
+        case LEXER_TOKEN_NUMBER:
+            string_catf(string, "%d", token->integer);
+            return string_buffer(string);
+        case LEXER_TOKEN_STRING:
+            string_catf(string, "\"%s\"", token->string);
+            return string_buffer(string);
+        case LEXER_TOKEN_IDENTIFIER:
+            return token->string;
+        default:
+            break;
+    }
+    compile_ice("unexpected token");
+    return NULL;
+}
+
+char *lexer_marker(void) {
+    string_t *string = string_create();
+    string_catf(string, "%s:%zu", lexer_file.file, lexer_file.line);
+    return string_buffer(string);
+}