From e8938573442eb3f5f550420ff3a0eb703730c1a3 Mon Sep 17 00:00:00 2001 From: unknown <> Date: Mon, 17 Nov 2014 20:13:28 +0000 Subject: [PATCH] autocommit for files dated 2014-11-17 20:13:28 --- ast.c | 804 ++++++++++++++++++++++++++++++++++ conv.c | 118 +++++ decl.c | 339 +++++++++++++++ gen.c | 298 +++++++++++++ gen_amd64.c | 1201 +++++++++++++++++++++++++++++++++++++++++++++++++++ init.c | 302 +++++++++++++ lexer.c | 505 ++++++++++++++++++++++ 7 files changed, 3567 insertions(+) create mode 100644 ast.c create mode 100644 conv.c create mode 100644 decl.c create mode 100644 gen.c create mode 100644 gen_amd64.c create mode 100644 init.c create mode 100644 lexer.c diff --git a/ast.c b/ast.c new file mode 100644 index 0000000..38204fe --- /dev/null +++ b/ast.c @@ -0,0 +1,804 @@ +#include +#include +#include + +#include "lice.h" +#include "ast.h" +#include "lexer.h" +#include "conv.h" + +data_type_t *ast_data_table[AST_DATA_COUNT] = { + &(data_type_t) { TYPE_VOID, 0, true }, /* void */ + &(data_type_t) { TYPE_BOOL, ARCH_TYPE_SIZE_INT, false}, /* _Bool */ + &(data_type_t) { TYPE_LONG, ARCH_TYPE_SIZE_LONG, true }, /* long */ + &(data_type_t) { TYPE_LLONG, ARCH_TYPE_SIZE_LLONG, true }, /* long long */ + &(data_type_t) { TYPE_INT, ARCH_TYPE_SIZE_INT, true }, /* int */ + &(data_type_t) { TYPE_SHORT, ARCH_TYPE_SIZE_SHORT, true }, /* short */ + &(data_type_t) { TYPE_CHAR, ARCH_TYPE_SIZE_CHAR, true }, /* char */ + &(data_type_t) { TYPE_FLOAT, ARCH_TYPE_SIZE_FLOAT, true }, /* float */ + &(data_type_t) { TYPE_DOUBLE, ARCH_TYPE_SIZE_DOUBLE, true }, /* double */ + &(data_type_t) { TYPE_LDOUBLE, ARCH_TYPE_SIZE_LDOUBLE, true }, /* long double */ + &(data_type_t) { TYPE_LONG, ARCH_TYPE_SIZE_LONG, false }, /* unsigned long */ + &(data_type_t) { TYPE_LLONG, ARCH_TYPE_SIZE_LLONG, false }, /* unsigned long long */ + NULL /* function */ +}; + +data_type_t *ast_data_function = NULL; +list_t *ast_locals = NULL; +list_t *ast_gotos = NULL; +table_t *ast_labels = NULL; +table_t *ast_globalenv = &SENTINEL_TABLE; +table_t *ast_localenv = &SENTINEL_TABLE; +table_t *ast_structures = &SENTINEL_TABLE; +table_t *ast_unions = &SENTINEL_TABLE; + +bool ast_struct_compare(data_type_t *a, data_type_t *b) { + list_t *la; + list_t *lb; + list_iterator_t *lait; + list_iterator_t *lbit; + + if (a->type != b->type) + return false; + + switch (a->type) { + case TYPE_ARRAY: + if (a->length == b->length) + return ast_struct_compare(a->pointer, b->pointer); + return false; + + case TYPE_POINTER: + return ast_struct_compare(a->pointer, b->pointer); + + case TYPE_STRUCTURE: + if (a->isstruct != b->isstruct) + return false; + + la = table_keys(a->fields); + lb = table_keys(b->fields); + + if (list_length(la) != list_length(lb)) + return false; + + lait = list_iterator(la); + lbit = list_iterator(lb); + + while (!list_iterator_end(lait)) + if (!ast_struct_compare(list_iterator_next(lait), list_iterator_next(lbit))) + return false; + + default: + return true; + } + return false; +} + +data_type_t *ast_result_type(int operation, data_type_t *type) { + switch (operation) { + case AST_TYPE_LEQUAL: + case AST_TYPE_GEQUAL: + case AST_TYPE_EQUAL: + case AST_TYPE_NEQUAL: + case '<': + case '>': + return ast_data_table[AST_DATA_INT]; + default: + return conv_senority(type, type); + } +} + +ast_t *ast_copy(ast_t *ast) { + ast_t *copy = memory_allocate(sizeof(ast_t)); + *copy = *ast; + return copy; +} + +ast_t *ast_structure_reference(data_type_t *type, ast_t *structure, char *name) { + return ast_copy(&(ast_t) { + .type = AST_TYPE_STRUCT, + .ctype = type, + .structure = structure, + .field = name + }); +} + +ast_t *ast_new_unary(int type, data_type_t *data, ast_t *operand) { + return ast_copy(&(ast_t) { + .type = type, + .ctype = data, + .unary.operand = operand + }); +} + +ast_t *ast_new_binary(data_type_t *ctype, int type, ast_t *left, ast_t *right) { + ast_t *ast = ast_copy(&(ast_t){ + .type = type, + .ctype = ctype + }); + ast->left = left; + ast->right = right; + return ast; +} + +ast_t *ast_new_integer(data_type_t *type, int value) { + return ast_copy(&(ast_t) { + .type = AST_TYPE_LITERAL, + .ctype = type, + .integer = value + }); +} + +ast_t *ast_new_floating(data_type_t *type, double value) { + return ast_copy(&(ast_t){ + .type = AST_TYPE_LITERAL, + .ctype = type, + .floating.value = value, + .floating.label = NULL + }); +} + +ast_t *ast_new_string(char *value) { + return ast_copy(&(ast_t) { + .type = AST_TYPE_STRING, + .ctype = ast_array(ast_data_table[AST_DATA_CHAR], strlen(value) + 1), + .string.data = value, + .string.label = NULL + }); +} + +ast_t *ast_variable_local(data_type_t *type, char *name) { + ast_t *ast = ast_copy(&(ast_t){ + .type = AST_TYPE_VAR_LOCAL, + .ctype = type, + .variable.name = name + }); + if (ast_localenv) + table_insert(ast_localenv, name, ast); + if (ast_locals) + list_push(ast_locals, ast); + return ast; +} + +ast_t *ast_variable_global(data_type_t *type, char *name) { + ast_t *ast = ast_copy(&(ast_t){ + .type = AST_TYPE_VAR_GLOBAL, + .ctype = type, + .variable.name = name, + .variable.label = name + }); + table_insert(ast_globalenv, name, ast); + return ast; +} + +ast_t *ast_function(data_type_t *ret, char *name, list_t *params, ast_t *body, list_t *locals) { + return ast_copy(&(ast_t) { + .type = AST_TYPE_FUNCTION, + .ctype = ret, + .function.name = name, + .function.params = params, + .function.locals = locals, + .function.body = body + }); +} + +ast_t *ast_designator(char *name, ast_t *func) { + return ast_copy(&(ast_t){ + .type = AST_TYPE_DESIGNATOR, + .ctype = ast_data_table[AST_DATA_VOID], + .function.name = name, + .function.call.functionpointer = func + }); +} + +ast_t *ast_pointercall(ast_t *functionpointer, list_t *args) { + return ast_copy(&(ast_t) { + .type = AST_TYPE_POINTERCALL, + .ctype = functionpointer->ctype->pointer->returntype, + .function.call.functionpointer = functionpointer, + .function.call.args = args + }); +} + +ast_t *ast_call(data_type_t *type, char *name, list_t *arguments) { + return ast_copy(&(ast_t) { + .type = AST_TYPE_CALL, + .ctype = type->returntype, + .function.call.args = arguments, + .function.call.type = type, + .function.name = name, + }); +} + +ast_t *ast_va_start(ast_t *ap) { + return ast_copy(&(ast_t){ + .type = AST_TYPE_VA_START, + .ctype = ast_data_table[AST_DATA_VOID], + .ap = ap + }); +} + +ast_t *ast_va_arg(data_type_t *type, ast_t *ap) { + return ast_copy(&(ast_t){ + .type = AST_TYPE_VA_ARG, + .ctype = type, + .ap = ap + }); +} + +ast_t *ast_declaration(ast_t *var, list_t *init) { + return ast_copy(&(ast_t) { + .type = AST_TYPE_DECLARATION, + .ctype = NULL, + .decl.var = var, + .decl.init = init, + }); +} + +ast_t *ast_initializer(ast_t *value, data_type_t *to, int offset) { + return ast_copy(&(ast_t){ + .type = AST_TYPE_INITIALIZER, + .init.value = value, + .init.offset = offset, + .init.type = to + }); +} + +ast_t *ast_ternary(data_type_t *type, ast_t *cond, ast_t *then, ast_t *last) { + return ast_copy(&(ast_t){ + .type = AST_TYPE_EXPRESSION_TERNARY, + .ctype = type, + .ifstmt.cond = cond, + .ifstmt.then = then, + .ifstmt.last = last + }); +} + +static ast_t *ast_for_intermediate(int type, ast_t *init, ast_t *cond, ast_t *step, ast_t *body) { + return ast_copy(&(ast_t){ + .type = type, + .ctype = NULL, + .forstmt.init = init, + .forstmt.cond = cond, + .forstmt.step = step, + .forstmt.body = body + }); +} + +ast_t *ast_switch(ast_t *expr, ast_t *body) { + return ast_copy(&(ast_t){ + .type = AST_TYPE_STATEMENT_SWITCH, + .switchstmt.expr = expr, + .switchstmt.body = body + }); +} + +ast_t *ast_case(int begin, int end) { + return ast_copy(&(ast_t){ + .type = AST_TYPE_STATEMENT_CASE, + .casebeg = begin, + .caseend = end + }); +} + +ast_t *ast_make(int type) { + return ast_copy(&(ast_t){ + .type = type + }); +} + +ast_t *ast_if(ast_t *cond, ast_t *then, ast_t *last) { + return ast_copy(&(ast_t){ + .type = AST_TYPE_STATEMENT_IF, + .ctype = NULL, + .ifstmt.cond = cond, + .ifstmt.then = then, + .ifstmt.last = last + }); +} + +ast_t *ast_for(ast_t *init, ast_t *cond, ast_t *step, ast_t *body) { + return ast_for_intermediate(AST_TYPE_STATEMENT_FOR, init, cond, step, body); +} +ast_t *ast_while(ast_t *cond, ast_t *body) { + return ast_for_intermediate(AST_TYPE_STATEMENT_WHILE, NULL, cond, NULL, body); +} +ast_t *ast_do(ast_t *cond, ast_t *body) { + return ast_for_intermediate(AST_TYPE_STATEMENT_DO, NULL, cond, NULL, body); +} + +ast_t *ast_goto(char *label) { + return ast_copy(&(ast_t){ + .type = AST_TYPE_STATEMENT_GOTO, + .gotostmt.label = label, + .gotostmt.where = NULL + }); +} + +ast_t *ast_new_label(char *label) { + return ast_copy(&(ast_t){ + .type = AST_TYPE_STATEMENT_LABEL, + .gotostmt.label = label, + .gotostmt.where = NULL + }); +} + +ast_t *ast_return(ast_t *value) { + return ast_copy(&(ast_t){ + .type = AST_TYPE_STATEMENT_RETURN, + .returnstmt = value + }); +} + +ast_t *ast_compound(list_t *statements) { + return ast_copy(&(ast_t){ + .type = AST_TYPE_STATEMENT_COMPOUND, + .ctype = NULL, + .compound = statements + }); +} + +data_type_t *ast_structure_new(table_t *fields, int size, bool isstruct) { + return ast_type_copy(&(data_type_t) { + .type = TYPE_STRUCTURE, + .size = size, + .fields = fields, + .isstruct = isstruct + }); +} + +char *ast_label(void) { + static int index = 0; + string_t *string = string_create(); + string_catf(string, ".L%d", index++); + return string_buffer(string); +} + +ast_t *ast_label_address(char *label) { + return ast_copy(&(ast_t){ + .type = AST_TYPE_STATEMENT_LABEL_COMPUTED, + .ctype = ast_pointer(ast_data_table[AST_DATA_VOID]), + .gotostmt.label = label + }); +} + +ast_t *ast_goto_computed(ast_t *expression) { + return ast_copy(&(ast_t){ + .type = AST_TYPE_STATEMENT_GOTO_COMPUTED, + .unary.operand = expression + }); +} + +bool ast_type_isinteger(data_type_t *type) { + switch (type->type) { + case TYPE_BOOL: + case TYPE_CHAR: + case TYPE_SHORT: + case TYPE_INT: + case TYPE_LONG: + case TYPE_LLONG: + return true; + default: + return false; + } +} + +bool ast_type_isfloating(data_type_t *type) { + switch (type->type) { + case TYPE_FLOAT: + case TYPE_DOUBLE: + case TYPE_LDOUBLE: + return true; + default: + return false; + } +} + +bool ast_type_isstring(data_type_t *type) { + return type->type == TYPE_ARRAY && type->pointer->type == TYPE_CHAR; +} + +data_type_t *ast_type_copy(data_type_t *type) { + return memcpy(memory_allocate(sizeof(data_type_t)), type, sizeof(data_type_t)); +} + +data_type_t *ast_type_copy_incomplete(data_type_t *type) { + if (!type) + return NULL; + return (type->length == -1) + ? ast_type_copy(type) + : type; +} + +data_type_t *ast_type_create(type_t type, bool sign) { + + data_type_t *t = memory_allocate(sizeof(data_type_t)); + + t->type = type; + t->sign = sign; + + switch (type) { + case TYPE_VOID: t->size = 0; break; + case TYPE_BOOL: t->size = ARCH_TYPE_SIZE_INT; break; + case TYPE_CHAR: t->size = ARCH_TYPE_SIZE_CHAR; break; + case TYPE_SHORT: t->size = ARCH_TYPE_SIZE_SHORT; break; + case TYPE_INT: t->size = ARCH_TYPE_SIZE_INT; break; + case TYPE_LONG: t->size = ARCH_TYPE_SIZE_LONG; break; + case TYPE_LLONG: t->size = ARCH_TYPE_SIZE_LLONG; break; + case TYPE_FLOAT: t->size = ARCH_TYPE_SIZE_FLOAT; break; + case TYPE_DOUBLE: t->size = ARCH_TYPE_SIZE_DOUBLE; break; + case TYPE_LDOUBLE: t->size = ARCH_TYPE_SIZE_LDOUBLE; break; + default: + compile_error("ICE"); + } + + return t; +} + +data_type_t *ast_type_stub(void) { + return ast_type_copy(&(data_type_t) { + .type = TYPE_CDECL, + .size = 0 + }); +} + +ast_t *ast_type_convert(data_type_t *type, ast_t *ast) { + return ast_copy(&(ast_t){ + .type = AST_TYPE_CONVERT, + .ctype = type, + .unary.operand = ast + }); +} + +data_type_t *ast_prototype(data_type_t *returntype, list_t *paramtypes, bool dots) { + return ast_type_copy(&(data_type_t){ + .type = TYPE_FUNCTION, + .returntype = returntype, + .parameters = paramtypes, + .hasdots = dots + }); +} + +data_type_t *ast_array(data_type_t *type, int length) { + return ast_type_copy(&(data_type_t){ + .type = TYPE_ARRAY, + .pointer = type, + .size = (length < 0) ? -1 : type->size * length, + .length = length + }); +} + +data_type_t *ast_array_convert(data_type_t *type) { + if (type->type != TYPE_ARRAY) + return type; + return ast_pointer(type->pointer); +} + +ast_t *ast_designator_convert(ast_t *ast) { + if (!ast) + return NULL; + if (ast->type == AST_TYPE_DESIGNATOR) { + return ast_new_unary( + AST_TYPE_ADDRESS, + ast_pointer(ast->function.call.functionpointer->function.call.type), + ast->function.call.functionpointer + ); + } + + return ast; +} + +data_type_t *ast_pointer(data_type_t *type) { + return ast_type_copy(&(data_type_t){ + .type = TYPE_POINTER, + .pointer = type, + .size = ARCH_TYPE_SIZE_POINTER + }); +} + +const char *ast_type_string(data_type_t *type) { + string_t *string; + + switch (type->type) { + case TYPE_VOID: return "void"; + case TYPE_BOOL: return "_Bool"; + case TYPE_INT: return "int"; + case TYPE_CHAR: return "char"; + case TYPE_LONG: return "long"; + case TYPE_LLONG: return "long long"; + case TYPE_SHORT: return "short"; + case TYPE_FLOAT: return "float"; + case TYPE_DOUBLE: return "double"; + case TYPE_LDOUBLE: return "long double"; + + case TYPE_FUNCTION: + string = string_create(); + string_cat(string, '('); + for (list_iterator_t *it = list_iterator(type->parameters); !list_iterator_end(it); ) { + data_type_t *next = list_iterator_next(it); + string_catf(string, "%s", ast_type_string(next)); + if (!list_iterator_end(it)) + string_cat(string, ','); + } + string_catf(string, ") -> %s", ast_type_string(type->returntype)); + return string_buffer(string); + + case TYPE_POINTER: + string = string_create(); + string_catf(string, "%s*", ast_type_string(type->pointer)); + return string_buffer(string); + + case TYPE_ARRAY: + string = string_create(); + string_catf( + string, + "%s[%d]", + ast_type_string(type->pointer), + type->length + ); + return string_buffer(string); + + case TYPE_STRUCTURE: + string = string_create(); + string_catf(string, "(struct"); + for (list_iterator_t *it = list_iterator(table_values(type->fields)); !list_iterator_end(it); ) { + data_type_t *ftype = list_iterator_next(it); + if (ftype->bitfield.size < 0) { + string_catf(string, " (%s)", ast_type_string(ftype)); + } else { + string_catf( + string, + "(%s:%d:%d)", + ast_type_string(ftype), + ftype->bitfield.offset, + ftype->bitfield.offset + ftype->bitfield.size + ); + } + } + string_cat(string, ')'); + return string_buffer(string); + + default: + break; + } + return NULL; +} + +static void ast_string_unary(string_t *string, const char *op, ast_t *ast) { + string_catf(string, "(%s %s)", op, ast_string(ast->unary.operand)); +} + +static void ast_string_binary(string_t *string, const char *op, ast_t *ast) { + string_catf(string, "(%s %s %s)", op, ast_string(ast->left), ast_string(ast->right)); +} + +static void ast_string_initialization_declaration(string_t *string, list_t *initlist) { + if (!initlist) + return; + + for (list_iterator_t *it = list_iterator(initlist); !list_iterator_end(it); ) { + ast_t *init = list_iterator_next(it); + string_catf(string, "%s", ast_string(init)); + if (!list_iterator_end(it)) + string_cat(string, ' '); + } +} + +static void ast_string_impl(string_t *string, ast_t *ast) { + char *left = NULL; + char *right = NULL; + + if (!ast) { + string_catf(string, "(null)"); + return; + } + + switch (ast->type) { + case AST_TYPE_LITERAL: + switch (ast->ctype->type) { + case TYPE_INT: + case TYPE_SHORT: + string_catf(string, "%d", ast->integer); + break; + + case TYPE_FLOAT: + case TYPE_DOUBLE: + string_catf(string, "%f", ast->floating.value); + break; + + case TYPE_LONG: + string_catf(string, "%ldL", ast->integer); + break; + + case TYPE_CHAR: + if (ast->integer == '\n') + string_catf(string, "'\n'"); + else if (ast->integer == '\\') + string_catf(string, "'\\\\'"); + else if (ast->integer == '\0') + string_catf(string, "'\\0'"); + else + string_catf(string, "'%c'", ast->integer); + break; + + default: + compile_ice("ast_string_impl"); + break; + } + break; + + case AST_TYPE_STRING: + string_catf(string, "\"%s\"", string_quote(ast->string.data)); + break; + + case AST_TYPE_VAR_LOCAL: + string_catf(string, "%s", ast->variable.name); + if (ast->variable.init) { + string_cat(string, '('); + ast_string_initialization_declaration(string, ast->variable.init); + string_cat(string, ')'); + } + break; + + case AST_TYPE_VAR_GLOBAL: + string_catf(string, "%s", ast->variable.name); + break; + + case AST_TYPE_CALL: + case AST_TYPE_POINTERCALL: + string_catf(string, "(%s)%s(", ast_type_string(ast->ctype), + (ast->type == AST_TYPE_CALL) + ?ast->function.name + : ast_string(ast) + ); + + for (list_iterator_t *it = list_iterator(ast->function.call.args); !list_iterator_end(it); ) { + string_catf(string, "%s", ast_string(list_iterator_next(it))); + if (!list_iterator_end(it)) + string_cat(string, ','); + } + string_cat(string, ')'); + break; + + case AST_TYPE_FUNCTION: + string_catf(string, "(%s)%s(", ast_type_string(ast->ctype), ast->function.name); + for (list_iterator_t *it = list_iterator(ast->function.params); !list_iterator_end(it); ) { + ast_t *param = list_iterator_next(it); + string_catf(string, "%s %s", ast_type_string(param->ctype), ast_string(param)); + if (!list_iterator_end(it)) + string_cat(string, ','); + } + string_cat(string, ')'); + ast_string_impl(string, ast->function.body); + break; + + case AST_TYPE_DECLARATION: + string_catf(string, "(decl %s %s ", + ast_type_string(ast->decl.var->ctype), + ast->decl.var->variable.name + ); + ast_string_initialization_declaration(string, ast->decl.init); + string_cat(string, ')'); + break; + + case AST_TYPE_INITIALIZER: + string_catf(string, "%s@%d", ast_string(ast->init.value), ast->init.offset); + break; + + case AST_TYPE_CONVERT: + string_catf(string, "(convert %s -> %s)", ast_string(ast->unary.operand), ast_type_string(ast->ctype)); + break; + + case AST_TYPE_STATEMENT_COMPOUND: + string_cat(string, '{'); + for (list_iterator_t *it = list_iterator(ast->compound); !list_iterator_end(it); ) { + ast_string_impl(string, list_iterator_next(it)); + string_cat(string, ';'); + } + string_cat(string, '}'); + break; + + case AST_TYPE_STRUCT: + ast_string_impl(string, ast->structure); + string_cat(string, '.'); + string_catf(string, ast->field); + break; + + case AST_TYPE_EXPRESSION_TERNARY: + string_catf(string, "(? %s %s %s)", + ast_string(ast->ifstmt.cond), + ast_string(ast->ifstmt.then), + ast_string(ast->ifstmt.last) + ); + break; + + case AST_TYPE_STATEMENT_IF: + string_catf(string, "(if %s %s", ast_string(ast->ifstmt.cond), ast_string(ast->ifstmt.then)); + if (ast->ifstmt.last) + string_catf(string, " %s", ast_string(ast->ifstmt.last)); + string_cat(string, ')'); + break; + + case AST_TYPE_STATEMENT_FOR: + string_catf(string, "(for %s %s %s %s)", + ast_string(ast->forstmt.init), + ast_string(ast->forstmt.cond), + ast_string(ast->forstmt.step), + ast_string(ast->forstmt.body) + ); + break; + + case AST_TYPE_STATEMENT_WHILE: + string_catf(string, "(while %s %s)", + ast_string(ast->forstmt.cond), + ast_string(ast->forstmt.body) + ); + break; + + case AST_TYPE_STATEMENT_DO: + string_catf(string, "(do %s %s)", + ast_string(ast->forstmt.cond), + ast_string(ast->forstmt.body) + ); + break; + + case AST_TYPE_STATEMENT_RETURN: + string_catf(string, "(return %s)", ast_string(ast->returnstmt)); + break; + + case AST_TYPE_LRSHIFT: ast_string_binary(string, ">>", ast); break; + case AST_TYPE_ADDRESS: ast_string_unary (string, "addr", ast); break; + case AST_TYPE_DEREFERENCE: ast_string_unary (string, "deref", ast); break; + + + case LEXER_TOKEN_COMPOUND_LSHIFT: ast_string_binary(string, "<<=", ast); break; + case LEXER_TOKEN_COMPOUND_RSHIFT: ast_string_binary(string, ">>=", ast); break; + case AST_TYPE_POST_INCREMENT: ast_string_unary (string, "postinc", ast); break; + case AST_TYPE_POST_DECREMENT: ast_string_unary (string, "postdec", ast); break; + case AST_TYPE_PRE_INCREMENT: ast_string_unary (string, "preinc", ast); break; + case AST_TYPE_PRE_DECREMENT: ast_string_unary (string, "predec", ast); break; + case AST_TYPE_NEGATE: ast_string_unary (string, "negate", ast); break; + case '!': ast_string_unary (string, "bitnot", ast); break; + case '&': ast_string_binary(string, "bitand", ast); break; + case '|': ast_string_binary(string, "bitor", ast); break; + case AST_TYPE_AND: ast_string_binary(string, "logand", ast); break; + case AST_TYPE_OR: ast_string_binary(string, "logor", ast); break; + case AST_TYPE_GEQUAL: ast_string_binary(string, "gteq", ast); break; + case AST_TYPE_LEQUAL: ast_string_binary(string, "lteq", ast); break; + case AST_TYPE_NEQUAL: ast_string_binary(string, "ne", ast); break; + case AST_TYPE_LSHIFT: ast_string_binary(string, "lshift", ast); break; + case AST_TYPE_RSHIFT: ast_string_binary(string, "rshift", ast); break; + + case AST_TYPE_DESIGNATOR: + string_catf(string, "(designator %s)", ast_string(ast->function.call.functionpointer)); + break; + + case AST_TYPE_EXPRESSION_CAST: + string_catf(string, "((%s) -> (%s) %s)", + ast_type_string(ast->unary.operand->ctype), + ast_type_string(ast->ctype), + ast_string(ast->unary.operand) + ); + break; + + case AST_TYPE_STATEMENT_LABEL_COMPUTED: + string_catf(string, "(labeladdr %s)", ast->gotostmt.label); + break; + + default: + if (!ast->left || !ast->right) + break; + + left = ast_string(ast->left); + right = ast_string(ast->right); + if (ast->type == LEXER_TOKEN_EQUAL) + string_catf(string, "(== %s %s)", left, right); + else + string_catf(string, "(%c %s %s)", ast->type, left, right); + } +} + +char *ast_string(ast_t *ast) { + string_t *string = string_create(); + ast_string_impl(string, ast); + return string_buffer(string); +} diff --git a/conv.c b/conv.c new file mode 100644 index 0000000..31412de --- /dev/null +++ b/conv.c @@ -0,0 +1,118 @@ +/* + * The complicated C rule set for type conversion. This is a full research + * oriented approach, run against the standard, and the tons of trial and + * error. + * + * A little bit about what is involed in type conversion: + * - Arithmetic type rules + * - Implicit conversion + * - Explicit conversion + * + * 1. Arithmetic type rules: + * The C standard defines a set of rules about arithmetic type + * conversion, known as the conversion rank rules, which + * essentially dictate which sides of an expression need to be + * converted. + * + * First rule: + * If the left hand side of an expression isn't an arithmetic type + * or the right hand side of an expression isn't an arithmetic type + * no conversion takes place. + * + * Second rule: + * If the conversion rank of the left hand side expression type + * is less than the conversion rank of the right hand side + * expression type, then the left hand side of that expressions type + * gets converted to the right hands type. + * + * Third rule: + * If the conversion rank of the left hand expression type doesn't + * compare equal to the right hands type, then the right hand side of + * that expressions type gets converted to the left hands type. + * + * Last rule: + * If none of the above applies, then nothing is subjected to conversion, + * and doesn't need to be converted, unless the following: + * + * The binary expression in which each operand is associated with happens + * to be of a relational one in which case the type is converted to + * integer type. + * + * The expression happens to be of array type, in which case the array + * decays to a pointer of it's base type. + * + * 2. Implicit conversion + * Implicit type conversion takes place in two senarios, 1, when + * conversion ranking is involved (promoted types), or when the + * subject of a shift operation where the larger types is always + * assumed to satisfy the shift operation. + * + * 3. Explicit conversion + * The type which is assumed in explicit conversion (casting) is + * the type in which the operand is converted to, unless the conversion + * isn't legal (vector -> scalar for instance) + */ +#include "ast.h" +#include "lice.h" + +bool conv_capable(data_type_t *type) { + return ast_type_isinteger(type) || ast_type_isfloating(type); +} + +int conv_rank(data_type_t *type) { + if (!conv_capable(type)) + goto error; + + switch (type->type) { + case TYPE_BOOL: return 0; + case TYPE_CHAR: return 1; + case TYPE_SHORT: return 2; + case TYPE_INT: return 3; + case TYPE_LONG: return 4; + case TYPE_LLONG: return 5; + case TYPE_FLOAT: return 6; + case TYPE_DOUBLE: return 7; + case TYPE_LDOUBLE: return 8; + default: + goto error; + } + +error: + compile_ice("conv_rank"); +} + +data_type_t *conv_senority(data_type_t *lhs, data_type_t *rhs) { + return conv_rank(lhs) < conv_rank(rhs) ? rhs : lhs; +} + +ast_t *conv_usual(int operation, ast_t *left, ast_t *right) { + if (!conv_capable(left->ctype) || !conv_capable(right->ctype)) { + data_type_t *result; + switch (operation) { + case AST_TYPE_LEQUAL: + case AST_TYPE_GEQUAL: + case AST_TYPE_EQUAL: + case AST_TYPE_NEQUAL: + case '<': + case '>': + result = ast_data_table[AST_DATA_INT]; + break; + default: + result = ast_array_convert(left->ctype); + break; + } + + return ast_new_binary(result, operation, left, right); + } + + int lrank = conv_rank(left->ctype); + int rrank = conv_rank(right->ctype); + + if (lrank < rrank) + left = ast_type_convert(right->ctype, left); + else if (lrank != rrank) + right = ast_type_convert(left->ctype, right); + + data_type_t *result = ast_result_type(operation, left->ctype); + return ast_new_binary(result, operation, left, right); +} diff --git a/decl.c b/decl.c new file mode 100644 index 0000000..450e617 --- /dev/null +++ b/decl.c @@ -0,0 +1,339 @@ +/* + * Deals with all the complexity in C's declaration specification with + * a rather large state machine model. C has a lot of ways to specify + * something, that happens to be equivlant to other meanings, which are + * also used. This state machine monitors the occurance of certain + * identifiers to build a serise of on/off state which ultimatly + * allows us to disambiguate the meaning, while at the same time enforcing + * correctness. + * + * For instance it isn't legal in C to have a typedef of a 'signed' size + * specified type, than use that typedef with another size specifier. + * More of these rules apply as well, and are documented in the state + * machine set logic. + * + * Once the state machine has completed it's work the get function uses + * the state of the machine to determine what type to return from the + * ast data table for types, or if there needs to be a new type created + * to compensate for the declaration. Similarly at this stage the state + * can be invalid (if something wen terribly wrong) and we can handle, + * or ice. + * + * The main entry point is decl_spec and it's called from the parser, + * if everything passes the callsite gets a data_type_t of the type + * specified. + */ +#include + +#include "parse.h" +#include "lice.h" +#include "lexer.h" + +typedef enum { + SPEC_TYPE_NULL, + SPEC_TYPE_VOID, + SPEC_TYPE_BOOL, + SPEC_TYPE_CHAR, + SPEC_TYPE_INT, + SPEC_TYPE_FLOAT, + SPEC_TYPE_DOUBLE, +} spec_type_t; + +typedef enum { + SPEC_SIZE_NULL, + SPEC_SIZE_SHORT, + SPEC_SIZE_LONG, + SPEC_SIZE_LLONG +} spec_size_t; + +typedef enum { + SPEC_SIGN_NULL, + SPEC_SIGN_SIGNED, + SPEC_SIGN_UNSIGNED +} spec_sign_t; + +static const char *spec_type_string[] = { + "null", "void", "_Bool", "char", + "int", "float", "double" +}; + +static const char *spec_size_string[] = { + "null", "short", "long", "long long" +}; + +static const char *spec_sign_string[] = { + "null", "signed", "unsigned" +}; + +static const char *spec_var_string[] = { + "null", "type", "size", "sign", "user" +}; + +typedef struct { + storage_t class; + spec_type_t type; + spec_size_t size; + spec_sign_t sign; + data_type_t *user; + bool kconst; + bool kvolatile; + bool kinline; +} decl_spec_t; + +typedef enum { + SPEC_VAR_NULL, + SPEC_VAR_TYPE, + SPEC_VAR_SIZE, + SPEC_VAR_SIGN, + SPEC_VAR_USER +} decl_var_t; + +#define decl_spec_error(X, SELECT) \ + decl_spec_error_impl((X), (SELECT), __LINE__) + +static const char *debug_storage_string(const storage_t class) { + switch (class) { + case STORAGE_AUTO: return "auto"; + case STORAGE_EXTERN: return "extern"; + case STORAGE_REGISTER: return "register"; + case STORAGE_STATIC: return "static"; + case STORAGE_TYPEDEF: return "typedef"; + } + return "default"; +} + +static void decl_spec_error_impl(const decl_spec_t *spec, const decl_var_t select, const size_t line) { + const char *type = spec_type_string[spec->type]; + const char *size = spec_size_string[spec->size]; + const char *sign = spec_sign_string[spec->sign]; + const char *var = spec_var_string[select]; + + if (!type) type = "unspecified"; + if (!size) size = "unspecified"; + if (!sign) sign = "unspecified"; + if (!var) var = "unspecified"; + + compile_ice("declaration specifier error %d\n" + "debug info:\n" + " select: %s\n" + " class: %s\n" + " type: %s\n" + " size: %s\n" + " sign: %s\n" + " const: %s\n" + " volatile: %s\n" + " inline: %s\n", + line, + var, + debug_storage_string(spec->class), + type, + size, + sign, + bool_string(spec->kconst), + bool_string(spec->kvolatile), + bool_string(spec->kinline) + ); +} + +static void decl_spec_class(decl_spec_t *spec, const storage_t class) { + if (spec->class != 0) + decl_spec_error(spec, SPEC_VAR_NULL); + spec->class = class; +} + +static void decl_spec_set(decl_spec_t *spec, const decl_var_t select, void *value) { + switch (select) { + case SPEC_VAR_SIGN: + if (spec->sign != SPEC_SIGN_NULL) + decl_spec_error(spec, select); + spec->sign = *(spec_sign_t*)value; + break; + case SPEC_VAR_SIZE: + if (spec->size != SPEC_SIZE_NULL) + decl_spec_error(spec, select); + spec->size = *(spec_size_t*)value; + break; + case SPEC_VAR_TYPE: + if (spec->type != SPEC_TYPE_NULL) + decl_spec_error(spec, select); + spec->type = *(spec_type_t*)value; + break; + case SPEC_VAR_USER: + if (spec->user != 0) + decl_spec_error(spec, select); + spec->user = value; + break; + default: + compile_ice("decl_spec_get state machine got null variable reference"); + break; + } + + /* bool cannot have a sign, it's only legal as it's own entity. */ + if (spec->type == SPEC_TYPE_BOOL && (spec->size != SPEC_SIZE_NULL && spec->sign != SPEC_SIGN_NULL)) + decl_spec_error(spec, select); + + switch (spec->size) { + case SPEC_SIZE_SHORT: + /* + * short and short int are the only legal uses of the short + * size specifier. + */ + if (spec->type != SPEC_TYPE_NULL && spec->type != SPEC_TYPE_INT) + decl_spec_error(spec, select); + break; + + case SPEC_SIZE_LONG: + /* + * long, long int and long double are the only legal uses of + * long size specifier. + */ + if (spec->type != SPEC_TYPE_NULL && spec->type != SPEC_TYPE_INT && spec->type != SPEC_TYPE_DOUBLE) + decl_spec_error(spec, select); + break; + + default: + break; + } + + /* + * sign and unsigned sign specifiers are not legal on void, float and + * double types. + */ + if (spec->sign != SPEC_SIGN_NULL) { + switch (spec->type) { + case SPEC_TYPE_VOID: + case SPEC_TYPE_FLOAT: + case SPEC_TYPE_DOUBLE: + decl_spec_error(spec, select); + break; + default: + break; + } + } + + /* + * user types cannot have additional levels of specification on it, + * for instance 'typedef unsigned int foo; 'signed foo'. + */ + if (spec->user && (spec->type != SPEC_TYPE_NULL || + spec->size != SPEC_SIZE_NULL || + spec->sign != SPEC_SIGN_NULL)) + decl_spec_error(spec, select); +} + +#define decl_spec_seti(SPEC, SELECT, VAR) \ + decl_spec_set((SPEC), (SELECT), &(int){ VAR }) + +static data_type_t *decl_spec_get(const decl_spec_t *spec) { + bool sign = !!(spec->sign != SPEC_SIGN_UNSIGNED); + + switch (spec->type) { + case SPEC_TYPE_VOID: + return ast_data_table[AST_DATA_VOID]; + case SPEC_TYPE_BOOL: + return ast_type_create(TYPE_BOOL, false); + case SPEC_TYPE_CHAR: + return ast_type_create(TYPE_CHAR, sign); + case SPEC_TYPE_FLOAT: + return ast_type_create(TYPE_FLOAT, false); + case SPEC_TYPE_DOUBLE: + if (spec->size == SPEC_SIZE_LONG) + return ast_type_create(TYPE_LDOUBLE, false); + return ast_type_create(TYPE_DOUBLE, false); + default: + break; + } + + switch (spec->size) { + case SPEC_SIZE_SHORT: + return ast_type_create(TYPE_SHORT, sign); + case SPEC_SIZE_LONG: + return ast_type_create(TYPE_LONG, sign); + case SPEC_SIZE_LLONG: + return ast_type_create(TYPE_LLONG, sign); + default: + /* implicit int */ + return ast_type_create(TYPE_INT, sign); + } + compile_ice("declaration specifier"); +} + +data_type_t *decl_spec(storage_t *const class) { + decl_spec_t spec; + memset(&spec, 0, sizeof(spec)); + + for (;;) { + lexer_token_t *token = lexer_next(); + if (!token) + compile_error("type specification with unexpected ending"); + + if (token->type != LEXER_TOKEN_IDENTIFIER) { + lexer_unget(token); + break; + } + + if (!strcmp(token->string, "const")) + spec.kconst = true; + else if (!strcmp(token->string, "volatile")) + spec.kvolatile = true; + else if (!strcmp(token->string, "inline")) + spec.kinline = true; + else if (!strcmp(token->string, "typedef")) + decl_spec_class(&spec, STORAGE_TYPEDEF); + else if (!strcmp(token->string, "extern")) + decl_spec_class(&spec, STORAGE_EXTERN); + else if (!strcmp(token->string, "static") || !strcmp(token->string, "__static__")) + decl_spec_class(&spec, STORAGE_STATIC); + else if (!strcmp(token->string, "auto")) + decl_spec_class(&spec, STORAGE_AUTO); + else if (!strcmp(token->string, "register")) + decl_spec_class(&spec, STORAGE_REGISTER); + else if (!strcmp(token->string, "void")) + decl_spec_seti(&spec, SPEC_VAR_TYPE, SPEC_TYPE_VOID); + else if (!strcmp(token->string, "_Bool")) + decl_spec_seti(&spec, SPEC_VAR_TYPE, SPEC_TYPE_BOOL); + else if (!strcmp(token->string, "char")) + decl_spec_seti(&spec, SPEC_VAR_TYPE, SPEC_TYPE_CHAR); + else if (!strcmp(token->string, "int")) + decl_spec_seti(&spec, SPEC_VAR_TYPE, SPEC_TYPE_INT); + else if (!strcmp(token->string, "float")) + decl_spec_seti(&spec, SPEC_VAR_TYPE, SPEC_TYPE_FLOAT); + else if (!strcmp(token->string, "double")) + decl_spec_seti(&spec, SPEC_VAR_TYPE, SPEC_TYPE_DOUBLE); + else if (!strcmp(token->string, "signed")) + decl_spec_seti(&spec, SPEC_VAR_SIGN, SPEC_SIGN_SIGNED); + else if (!strcmp(token->string, "unsigned")) + decl_spec_seti(&spec, SPEC_VAR_SIGN, SPEC_SIGN_UNSIGNED); + else if (!strcmp(token->string, "struct")) + decl_spec_set(&spec, SPEC_VAR_USER, parse_structure()); + else if (!strcmp(token->string, "union")) + decl_spec_set(&spec, SPEC_VAR_USER, parse_union()); + else if (!strcmp(token->string, "enum")) + decl_spec_set(&spec, SPEC_VAR_USER, parse_enumeration()); + else if (!strcmp(token->string, "short")) + decl_spec_seti(&spec, SPEC_VAR_SIZE, SPEC_SIZE_SHORT); + else if (!strcmp(token->string, "long")) { + if (spec.size == 0) + decl_spec_seti(&spec, SPEC_VAR_SIZE, SPEC_SIZE_LONG); + else if (spec.size == SPEC_SIZE_LONG) + spec.size = SPEC_SIZE_LLONG; + else + decl_spec_error(&spec, SPEC_VAR_NULL); + } + else if (!strcmp(token->string, "typeof") || !strcmp(token->string, "__typeof__")) + decl_spec_set(&spec, SPEC_VAR_USER, parse_typeof()); + else if (parse_typedef_find(token->string) && !spec.user) + decl_spec_set(&spec, SPEC_VAR_USER, parse_typedef_find(token->string)); + else { + lexer_unget(token); + break; + } + } + + if (class) + *class = spec.class; + if (spec.user) + return spec.user; + + return decl_spec_get(&spec); +} diff --git a/gen.c b/gen.c new file mode 100644 index 0000000..0466de3 --- /dev/null +++ b/gen.c @@ -0,0 +1,298 @@ +/* + * File: gen.c + * Common code generator facilities. + */ +#include +#include + +#include "gen.h" +#include "lice.h" + +char *gen_label_break = NULL; +char *gen_label_continue = NULL; +char *gen_label_switch = NULL; +char *gen_label_break_backup = NULL; +char *gen_label_continue_backup = NULL; +char *gen_label_switch_backup = NULL; + +static void gen_emit_emitter(bool indent, const char *fmt, va_list list) { + if (indent) + fputc('\t', stdout); + + va_list va; + va_copy(va, list); + vprintf(fmt, va); + va_end(va); + + fputc('\n', stdout); +} + +void gen_emit(const char *fmt, ...) { + va_list va; + va_start(va, fmt); + gen_emit_emitter(true, fmt, va); + va_end(va); +} + +void gen_emit_inline(const char *fmt, ...) { + va_list va; + va_start(va, fmt); + gen_emit_emitter(false, fmt, va); + va_end(va); +} + +void gen_jump_backup(void) { + gen_label_break_backup = gen_label_break; + gen_label_continue_backup = gen_label_continue; +} + +void gen_jump_save(char *lbreak, char *lcontinue) { + gen_jump_backup(); + + gen_label_break = lbreak; + gen_label_continue = lcontinue; +} + +void gen_jump_restore(void) { + gen_label_break = gen_label_break_backup; + gen_label_continue = gen_label_continue_backup; +} + +void gen_jump(const char *label) { + if (!label) + compile_ice("gen_jump"); + + gen_emit("jmp %s", label); +} + +void gen_label(const char *label) { + gen_emit("%s:", label); +} + +/* + * Some expressions are architecture-independent thanks to generic generation + * functions. + */ +static void gen_statement_switch(ast_t *ast) { + gen_label_switch_backup = gen_label_switch; + gen_label_break_backup = gen_label_break; + gen_expression(ast->switchstmt.expr); + gen_label_switch = ast_label(); + gen_label_break = ast_label(); + gen_jump(gen_label_switch); + if (ast->switchstmt.body) + gen_expression(ast->switchstmt.body); + gen_label(gen_label_switch); + gen_label(gen_label_break); + gen_label_switch = gen_label_switch_backup; + gen_label_break = gen_label_break_backup; +} + +static void gen_statement_do(ast_t *ast) { + char *begin = ast_label(); + char *end = ast_label(); + gen_jump_save(end, begin); + gen_label(begin); + gen_expression(ast->forstmt.body); + gen_expression(ast->forstmt.cond); + gen_je(end); + gen_jump(begin); + gen_label(end); + gen_jump_restore(); +} + +static void gen_statement_compound(ast_t *ast) { + for (list_iterator_t *it = list_iterator(ast->compound); !list_iterator_end(it); ) + gen_expression(list_iterator_next(it)); +} + +static void gen_statement_goto(ast_t *ast) { + gen_jump(ast->gotostmt.where); +} + +static void gen_statement_label(ast_t *ast) { + if (ast->gotostmt.where) + gen_label(ast->gotostmt.where); +} + +static void gen_statement_cond(ast_t *ast) { + gen_expression(ast->ifstmt.cond); + char *ne = ast_label(); + gen_je(ne); + if (ast->ifstmt.then) + gen_expression(ast->ifstmt.then); + if (ast->ifstmt.last) { + char *end = ast_label(); + gen_jump(end); + gen_label(ne); + gen_expression(ast->ifstmt.last); + gen_label(end); + } else { + gen_label(ne); + } +} + +static void gen_statement_for(ast_t *ast) { + if (ast->forstmt.init) + gen_expression(ast->forstmt.init); + char *begin = ast_label(); + char *step = ast_label(); + char *end = ast_label(); + gen_jump_save(end, step); + gen_label(begin); + if (ast->forstmt.cond) { + gen_expression(ast->forstmt.cond); + gen_je(end); + } + gen_expression(ast->forstmt.body); + gen_label(step); + if (ast->forstmt.step) + gen_expression(ast->forstmt.step); + gen_jump(begin); + gen_label(end); + gen_jump_restore(); +} + +static void gen_statement_while(ast_t *ast) { + char *begin = ast_label(); + char *end = ast_label(); + gen_jump_save(end, begin); + gen_label(begin); + gen_expression(ast->forstmt.cond); + gen_je(end); + gen_expression(ast->forstmt.body); + gen_jump(begin); + gen_label(end); + gen_jump_restore(); +} + +static void gen_statement_return(ast_t *ast) { + if (ast->returnstmt) { + gen_expression(ast->returnstmt); + gen_boolean_maybe(ast->returnstmt->ctype); + } + gen_return(); +} + +static void gen_statement_break(void) { + gen_jump(gen_label_break); +} + +static void gen_statement_continue(void) { + gen_jump(gen_label_continue); +} + +static void gen_statement_default(void) { + gen_label(gen_label_switch); + gen_label_switch = ast_label(); +} + +static void gen_comma(ast_t *ast) { + gen_expression(ast->left); + gen_expression(ast->right); +} + +static void gen_data_bss(ast_t *ast) { + gen_emit(".data"); + if (!ast->decl.var->ctype->isstatic) + gen_emit(".global %s", ast->decl.var->variable.name); + gen_emit(".lcomm %s, %d", ast->decl.var->variable.name, ast->decl.var->ctype->size); +} + +static void gen_data_global(ast_t *variable) { + if (variable->decl.init) + gen_data(variable, 0, 0); + else + gen_data_bss(variable); +} + +static void gen_declaration_initialization(list_t *init, int offset) { + for (list_iterator_t *it = list_iterator(init); !list_iterator_end(it); ) { + ast_t *node = list_iterator_next(it); + if (node->init.value->type == AST_TYPE_LITERAL && node->init.type->bitfield.size <= 0) + gen_literal_save(node->init.value, node->init.type, node->init.offset + offset); + else { + gen_expression(node->init.value); + gen_save_local(node->init.type, node->init.offset + offset); + } + } +} + +static void gen_declaration(ast_t *ast) { + if (!ast->decl.init) + return; + + gen_zero(ast->decl.var->variable.off, ast->decl.var->variable.off + ast->decl.var->ctype->size); + gen_declaration_initialization(ast->decl.init, ast->decl.var->variable.off); +} + +void gen_ensure_lva(ast_t *ast) { + if (ast->variable.init) { + gen_zero(ast->variable.off, ast->variable.off + ast->ctype->size); + gen_declaration_initialization(ast->variable.init, ast->variable.off); + } + ast->variable.init = NULL; +} + +void gen_expression(ast_t *ast) { + if (!ast) return; + + switch (ast->type) { + case AST_TYPE_STATEMENT_IF: gen_statement_cond(ast); break; + case AST_TYPE_EXPRESSION_TERNARY: gen_statement_cond(ast); break; + case AST_TYPE_STATEMENT_FOR: gen_statement_for(ast); break; + case AST_TYPE_STATEMENT_WHILE: gen_statement_while(ast); break; + case AST_TYPE_STATEMENT_DO: gen_statement_do(ast); break; + case AST_TYPE_STATEMENT_COMPOUND: gen_statement_compound(ast); break; + case AST_TYPE_STATEMENT_SWITCH: gen_statement_switch(ast); break; + case AST_TYPE_STATEMENT_GOTO: gen_statement_goto(ast); break; + case AST_TYPE_STATEMENT_LABEL: gen_statement_label(ast); break; + case AST_TYPE_STATEMENT_RETURN: gen_statement_return(ast); break; + case AST_TYPE_STATEMENT_BREAK: gen_statement_break(); break; + case AST_TYPE_STATEMENT_CONTINUE: gen_statement_continue(); break; + case AST_TYPE_STATEMENT_DEFAULT: gen_statement_default(); break; + case AST_TYPE_CALL: gen_function_call(ast); break; + case AST_TYPE_POINTERCALL: gen_function_call(ast); break; + case AST_TYPE_LITERAL: gen_literal(ast); break; + case AST_TYPE_STRING: gen_literal_string(ast); break; + case AST_TYPE_VAR_LOCAL: gen_variable_local(ast); break; + case AST_TYPE_VAR_GLOBAL: gen_variable_global(ast); break; + case AST_TYPE_DECLARATION: gen_declaration(ast); break; + case AST_TYPE_DEREFERENCE: gen_dereference(ast); break; + case AST_TYPE_ADDRESS: gen_address(ast->unary.operand); break; + case AST_TYPE_STATEMENT_CASE: gen_case(ast); break; + case AST_TYPE_VA_START: gen_va_start(ast); break; + case AST_TYPE_VA_ARG: gen_va_arg(ast); break; + case '!': gen_not(ast); break; + case AST_TYPE_NEGATE: gen_negate(ast); break; + case AST_TYPE_AND: gen_and(ast); break; + case AST_TYPE_OR: gen_or(ast); break; + case AST_TYPE_POST_INCREMENT: gen_postfix(ast, "add"); break; + case AST_TYPE_POST_DECREMENT: gen_postfix(ast, "sub"); break; + case AST_TYPE_PRE_INCREMENT: gen_prefix (ast, "add"); break; + case AST_TYPE_PRE_DECREMENT: gen_prefix (ast, "sub"); break; + case AST_TYPE_EXPRESSION_CAST: gen_cast(ast); break; + case AST_TYPE_STRUCT: gen_struct(ast); break; + case '&': gen_bitandor(ast); break; + case '|': gen_bitandor(ast); break; + case '~': gen_bitnot(ast); break; + case ',': gen_comma(ast); break; + case '=': gen_assign(ast); break; + case AST_TYPE_CONVERT: gen_conversion(ast); break; + case AST_TYPE_STATEMENT_GOTO_COMPUTED: gen_goto_computed(ast); break; + case AST_TYPE_STATEMENT_LABEL_COMPUTED: gen_address_label(ast); break; + default: + gen_binary(ast); + } +} + +void gen_toplevel(ast_t *ast) { + gen_function(ast); + if (ast->type == AST_TYPE_FUNCTION) { + gen_function_prologue(ast); + gen_expression(ast->function.body); + gen_function_epilogue(); + } else if (ast->type == AST_TYPE_DECLARATION) { + gen_data_global(ast); + } +} diff --git a/gen_amd64.c b/gen_amd64.c new file mode 100644 index 0000000..1a7080a --- /dev/null +++ b/gen_amd64.c @@ -0,0 +1,1201 @@ +#include +#include +#include +#include +#define __STDC_FORMAT_MACROS +#include + +#include "lice.h" +#include "gen.h" + +#define REGISTER_AREA_SIZE 304 +#define REGISTER_MULT_SIZE_XMM 8 +#define REGISTER_MULT_SIZE 6 + +#define SRDI "rdi" +#define SRSI "rsi" +#define SRDX "rdx" +#define SRCX "rcx" +#define SR8 "r8" +#define SR9 "r9" +#define SEDI "edi" +#define SESI "esi" +#define SEDX "edx" +#define SECX "ecx" +#define SR8D "r8d" +#define SR9D "r9d" +#define SDIL "dil" +#define SSIL "sil" +#define SDL "dl" +#define SCL "cl" +#define SR8B "r8b" +#define SR9B "r9b" +#define SRAX "rax" +#define SRBX "rbx" +#define SR11 "r11" + +static const char *register_table[][REGISTER_MULT_SIZE] = { + { SRDI, SRSI, SRDX, SRCX, SR8, SR9 }, + { SEDI, SESI, SEDX, SECX, SR8D, SR9D }, + { SDIL, SSIL, SDL, SCL, SR8B, SR9B } +}; + +#define NREG(I) register_table[0][I] +#define SREG(I) register_table[1][I] +#define MREG(I) register_table[2][I] + +static int stack = 0; +static int gp = 0; +static int fp = 0; + +static void gen_push(const char *reg) { + gen_emit("push %%%s", reg); + stack += 8; +} +static void gen_pop(const char *reg) { + gen_emit("pop %%%s", reg); + stack -= 8; +} +static void gen_push_xmm(int r) { + gen_emit("sub $8, %%rsp"); + gen_emit("movsd %%xmm%d, (%%rsp)", r); + stack += 8; +} +static void gen_pop_xmm(int r) { + gen_emit("movsd (%%rsp), %%xmm%d", r); + gen_emit("add $8, %%rsp"); + stack -= 8; +} + +/* + * Technically not the safest, but also can't legally be optimized with + * strict aliasing optimizations. Volatile will mark the construction + * of the literal from being directly delt with in the optimizer. Plus + * aliasing though the use of a union, while it isn't technically legal, + * all compilers do deal with it to some extent. Restrict on want will + * prevent the compiler from emitting two loads for the same address, since + * it is likely already in an register. + */ +#define TYPEPUN(TYPE, VALUE) \ + *(((volatile union { __typeof__(VALUE) *have; TYPE *restrict want; }) { &(VALUE) }).want) + +static void *gen_mapping_table(const void **table, size_t index, size_t length, const char *func) { + const unsigned char **ptr = (const unsigned char **)table; + const unsigned char **end = &ptr[length]; + const unsigned char **ret = &ptr[index]; + + if (ret < ptr || ret >= end || !*ret) + compile_ice("gen_mapping_table from %s (index: %zu, length: %zu)", func, index, length); + + return *((void **)ret); +} + +#define gen_mapping(TABLE, INDEX, LENGTH) \ + gen_mapping_table((const void **)(TABLE), (INDEX), (LENGTH), __func__) + +static const char *gen_register_integer(data_type_t *type, char r) { + static const char *items[] = { + "cl", "cx", 0, "ecx", 0, 0, 0, "rcx", + "al", "ax", 0, "eax", 0, 0, 0, "rax" + }; + static const size_t length = sizeof(items)/sizeof(*items); + return gen_mapping(items, (type->size - 1) + !!(r == 'a') * 8, length); +} + +static const char *gen_load_instruction(data_type_t *type) { + static const char *items[] = { + "movsbq", "movswq", 0, + "movslq", 0, 0, 0, + "mov" + }; + return gen_mapping(items, type->size - 1, sizeof(items)/sizeof(*items)); +} + +static void gen_shift_load(data_type_t *type) { + if (type->bitfield.size <= 0) + return; + gen_emit("shr $%d, %%rax", type->bitfield.offset); + gen_push(SRCX); + gen_emit("mov $0x%" PRIx64 ", %%rcx", (1 << (uint64_t)type->bitfield.size) - 1); + gen_emit("and %%rcx, %%rax"); + gen_pop(SRCX); +} + +static void gen_shift_save(data_type_t *type, char *address) { + if (type->bitfield.size <= 0) + return; + gen_push(SRCX); + gen_push(SRDI); + + gen_emit("mov $0x%" PRIx64 ", %%rdi", (1 << (uint64_t)type->bitfield.size) - 1); + gen_emit("and %%rdi, %%rax"); + gen_emit("shl $%d, %%rax", type->bitfield.offset); + gen_emit("mov %s, %%%s", address, gen_register_integer(type, 'c')); + gen_emit("mov $0x%" PRIx64 ", %%rdi", ~(((1 << (uint64_t)type->bitfield.size) - 1) << type->bitfield.offset)); + gen_emit("and %%rdi, %%rcx"); + gen_emit("or %%rcx, %%rax"); + + gen_pop(SRDI); + gen_pop(SRCX); +} + +static void gen_load_global(data_type_t *type, char *label, int offset) { + if (type->type == TYPE_ARRAY) { + if (offset) + gen_emit("lea %s+%d(%%rip), %%rax", label, offset); + else + gen_emit("lea %s(%%rip), %%rax", label); + return; + } + gen_emit("%s %s+%d(%%rip), %%rax", gen_load_instruction(type), label, offset); + gen_shift_load(type); +} + +static void gen_cast_int(data_type_t *type) { + if (type->type == TYPE_FLOAT) + gen_emit("cvttss2si %%xmm0, %%eax"); + else if (type->type == TYPE_DOUBLE) + gen_emit("cvttsd2si %%xmm0, %%eax"); +} + +static void gen_cast_bool(data_type_t *type) { + if (ast_type_isfloating(type)) { + gen_push_xmm(1); + gen_emit("xorpd %%xmm1, %%xmm1"); + gen_emit("ucomisd %%xmm1, %%xmm0"); + gen_emit("setne %%al"); + gen_pop_xmm(1); + } else { + gen_emit("cmp $0, %%rax"); + gen_emit("setne %%al"); + } + gen_emit("movzb %%al, %%eax"); +} + +static void gen_load_local(data_type_t *var, const char *base, int offset) { + if (var->type == TYPE_ARRAY) { + gen_emit("lea %d(%%%s), %%rax", offset, base); + } else if (var->type == TYPE_FLOAT) { + gen_emit("movss %d(%%%s), %%xmm0", offset, base); + } else if (var->type == TYPE_DOUBLE || var->type == TYPE_LDOUBLE) { + gen_emit("movsd %d(%%%s), %%xmm0", offset, base); + } else { + gen_emit("%s %d(%%%s), %%rax", gen_load_instruction(var), offset, base); + gen_shift_load(var); + } +} + +void gen_boolean_maybe(data_type_t *type) { + if (type->type != TYPE_BOOL) + return; + + gen_emit("test %%rax, %%rax"); + gen_emit("setne %%al"); +} + +static void gen_save_global(char *name, data_type_t *type, int offset) { + gen_boolean_maybe(type); + + const char *reg = gen_register_integer(type, 'a'); + string_t *str = string_create(); + + if (offset != 0) + string_catf(str, "%s+%d(%%rip)", name, offset); + else + string_catf(str, "%s(%%rip)", name); + + gen_shift_save(type, string_buffer(str)); + gen_emit("mov %%%s, %s", reg, string_buffer(str)); +} + +void gen_save_local(data_type_t *type, int offset) { + if (type->type == TYPE_FLOAT) + gen_emit("movss %%xmm0, %d(%%rbp)", offset); + else if (type->type == TYPE_DOUBLE || type->type == TYPE_LDOUBLE) + gen_emit("movsd %%xmm0, %d(%%rbp)", offset); + else { + gen_boolean_maybe(type); + + string_t *str = string_create(); + const char *reg = gen_register_integer(type, 'a'); + + if (offset != 0) + string_catf(str, "%d(%%rbp)", offset); + else + string_catf(str, "(%%rbp)"); + + gen_shift_save(type, string_buffer(str)); + gen_emit("mov %%%s, %s", reg, string_buffer(str)); + } +} + +static void gen_assignment_dereference_intermediate(data_type_t *type, int offset) { + gen_emit("mov (%%rsp), %%rcx"); + + const char *reg = gen_register_integer(type, 'c'); + if (offset) + gen_emit("mov %%%s, %d(%%rax)", reg, offset); + else + gen_emit("mov %%%s, (%%rax)", reg); + gen_pop(SRAX); +} + +void gen_address(ast_t *ast) { + switch (ast->type) { + case AST_TYPE_VAR_LOCAL: + gen_emit("lea %d(%%rbp), %%rax", ast->variable.off); + break; + case AST_TYPE_VAR_GLOBAL: + gen_emit("lea %s(%%rip), %%rax", ast->variable.label); + break; + case AST_TYPE_DEREFERENCE: + gen_expression(ast->unary.operand); + break; + case AST_TYPE_STRUCT: + gen_address(ast->structure); + gen_emit("add $%d, %%rax", ast->ctype->offset); + break; + default: + compile_ice("gen_address (%s)", ast_type_string(ast->ctype)); + } +} + +void gen_address_label(ast_t *ast) { + gen_emit("mov $%s, %%rax", ast->gotostmt.where); +} + +void gen_goto_computed(ast_t *ast) { + gen_expression(ast->unary.operand); + gen_emit("jmp *%%rax"); +} + +static void gen_structure_copy(int size, const char *base) { + int i = 0; + for (; i < size; i += 8) { + gen_emit("movq %d(%%rcx), %%r11", i); + gen_emit("movq %%r11, %d(%%%s)", i, base); + } + + for (; i < size; i += 4) { + gen_emit("movl %d(%%rcx), %%r11", i); + gen_emit("movl %%r11d, %d(%%%s)", i, base); + } + + for (; i < size; i++) { + gen_emit("movb %d(%%rcx), %%r11", i); + gen_emit("movb %%r11b, %d(%%%s)", i, base); + } +} + +static void gen_structure_assign(ast_t *left, ast_t *right) { + gen_push(SRCX); + gen_push(SR11); + gen_address(right); + gen_emit("mov %%rax, %%rcx"); + gen_address(left); + gen_structure_copy(left->ctype->size, "rax"); + gen_pop(SR11); + gen_pop(SRCX); +} + +static int gen_alignment(int n, int align) { + int remainder = n % align; + return (remainder == 0) + ? n + : n - remainder + align; +} + +static int gen_structure_push(int size) { + compile_error("cannot pass structure of size: %d bytes by copy (unimplemented)", size); +} + +void gen_zero(int start, int end) { + for (; start <= end - 8; start += 8) + gen_emit("movq $0, %d(%%rbp)", start); + for (; start <= end - 4; start += 4) + gen_emit("movl $0, %d(%%rbp)", start); + for (; start < end; start ++) + gen_emit("movb $0, %d(%%rbp)", start); +} + +static void gen_assignment_dereference(ast_t *var) { + gen_push(SRAX); + gen_expression(var->unary.operand); + gen_assignment_dereference_intermediate(var->unary.operand->ctype->pointer, 0); +} + +static void gen_pointer_arithmetic(char op, ast_t *left, ast_t *right) { + gen_expression(left); + gen_push(SRCX); + gen_push(SRAX); + gen_expression(right); + + int size = left->ctype->pointer->size; + if (size > 1) + gen_emit("imul $%d, %%rax", size); + + gen_emit("mov %%rax, %%rcx"); + gen_pop(SRAX); + + switch (op) { + case '+': gen_emit("add %%rcx, %%rax"); break; + case '-': gen_emit("sub %%rcx, %%rax"); break; + } + gen_pop(SRCX); +} + +static void gen_assignment_structure(ast_t *structure, data_type_t *field, int offset) { + switch (structure->type) { + case AST_TYPE_VAR_LOCAL: + gen_ensure_lva(structure); + gen_save_local(field, structure->variable.off + field->offset + offset); + break; + case AST_TYPE_VAR_GLOBAL: + gen_save_global(structure->variable.name, field, field->offset + offset); + break; + case AST_TYPE_STRUCT: + gen_assignment_structure(structure->structure, field, offset + structure->ctype->offset); + break; + case AST_TYPE_DEREFERENCE: + gen_push(SRAX); + gen_expression(structure->unary.operand); + gen_assignment_dereference_intermediate(field, field->offset + offset); + break; + default: + compile_ice("gen_assignment_structure"); + break; + } +} + +static void gen_load_structure(ast_t *structure, data_type_t *field, int offset) { + switch (structure->type) { + case AST_TYPE_VAR_LOCAL: + gen_ensure_lva(structure); + gen_load_local(field, "rbp", structure->variable.off + field->offset + offset); + break; + case AST_TYPE_VAR_GLOBAL: + gen_load_global(field, structure->variable.name, field->offset + offset); + break; + case AST_TYPE_STRUCT: + gen_load_structure(structure->structure, field, structure->ctype->offset + offset); + break; + case AST_TYPE_DEREFERENCE: + gen_expression(structure->unary.operand); + gen_load_local(field, SRAX, field->offset + offset); + break; + default: + compile_ice("gen_assignment_structure"); + break; + } +} + +static void gen_store(ast_t *var) { + switch (var->type) { + case AST_TYPE_DEREFERENCE: + gen_assignment_dereference(var); + break; + case AST_TYPE_STRUCT: + gen_assignment_structure(var->structure, var->ctype, 0); + break; + case AST_TYPE_VAR_LOCAL: + gen_ensure_lva(var); + gen_save_local(var->ctype, var->variable.off); + break; + case AST_TYPE_VAR_GLOBAL: + gen_save_global(var->variable.name, var->ctype, 0); + break; + default: + compile_ice("gen_assignment"); + } +} + +static void gen_comparision(char *operation, ast_t *ast) { + if (ast_type_isfloating(ast->left->ctype)) { + gen_expression(ast->left); + gen_push_xmm(0); + gen_expression(ast->right); + gen_pop_xmm(1); + if (ast->left->ctype->type == TYPE_FLOAT) + gen_emit("ucomiss %%xmm0, %%xmm1"); + else + gen_emit("ucomisd %%xmm0, %%xmm1"); + } else { + gen_expression(ast->left); + gen_push(SRAX); + gen_expression(ast->right); + gen_pop(SRCX); + + int type = ast->left->ctype->type; + if (type == TYPE_LONG || type == TYPE_LLONG) + gen_emit("cmp %%rax, %%rcx"); + else + gen_emit("cmp %%eax, %%ecx"); + } + gen_emit("%s %%al", operation); + gen_emit("movzb %%al, %%eax"); +} + +static const char *gen_binary_instruction(ast_t *ast) { + string_t *string = string_create(); + if (ast_type_isfloating(ast->ctype)) { + switch (ast->type) { + case '+': string_catf(string, "adds"); break; + case '-': string_catf(string, "subs"); break; + case '*': string_catf(string, "muls"); break; + case '/': string_catf(string, "divs"); break; + } + if (ast->ctype->type == TYPE_DOUBLE || ast->ctype->type == TYPE_LDOUBLE) + string_cat(string, 'd'); + else + string_cat(string, 's'); + if (!string_length(string)) + goto error; + + return string_buffer(string); + } + /* integer */ + switch (ast->type) { + case '+': string_catf(string, "add"); break; + case '-': string_catf(string, "sub"); break; + case '*': string_catf(string, "imul"); break; + case '^': string_catf(string, "xor"); break; + case AST_TYPE_LSHIFT: string_catf(string, "sal"); break; + case AST_TYPE_RSHIFT: string_catf(string, "sar"); break; + case AST_TYPE_LRSHIFT: string_catf(string, "shr"); break; + + /* need to be handled specially */ + case '/': return "@/"; + case '%': return "@%"; + } + return string_buffer(string); +error: + compile_ice("gen_binary_instruction"); +} + +static void gen_binary_arithmetic_integer(ast_t *ast) { + const char *op = gen_binary_instruction(ast); + gen_expression(ast->left); + gen_push(SRAX); + gen_expression(ast->right); + gen_emit("mov %%rax, %%rcx"); + gen_pop(SRAX); + + if (*op == '@') { + gen_emit("cqto"); + gen_emit("idiv %%rcx"); + if (op[1] == '%') + gen_emit("mov %%edx, %%eax"); + } else if (ast->type == AST_TYPE_LSHIFT + || ast->type == AST_TYPE_RSHIFT + || ast->type == AST_TYPE_LRSHIFT + ) { + gen_emit("%s %%cl, %%%s", op, gen_register_integer(ast->left->ctype, 'a')); + } else { + gen_emit("%s %%rcx, %%rax", op); + } +} + +static void gen_binary_arithmetic_floating(ast_t *ast) { + const char *op = gen_binary_instruction(ast); + gen_expression(ast->left); + gen_push_xmm(0); + gen_expression(ast->right); + if (ast->ctype->type == TYPE_DOUBLE) + gen_emit("movsd %%xmm0, %%xmm1"); + else + gen_emit("movss %%xmm0, %%xmm1"); + gen_pop_xmm(0); + gen_emit("%s %%xmm1, %%xmm0", op); +} + +void gen_load_convert(data_type_t *to, data_type_t *from) { + if (ast_type_isinteger(from) && to->type == TYPE_FLOAT) + gen_emit("cvtsi2ss %%eax, %%xmm0"); + else if (ast_type_isinteger(from) && to->type == TYPE_DOUBLE) + gen_emit("cvtsi2sd %%eax, %%xmm0"); + else if (from->type == TYPE_FLOAT && to->type == TYPE_DOUBLE) + gen_emit("cvtps2pd %%xmm0, %%xmm0"); + else if (from->type == TYPE_DOUBLE && to->type == TYPE_FLOAT) + gen_emit("cvtpd2ps %%xmm0, %%xmm0"); + else if (to->type == TYPE_BOOL) + gen_cast_bool(from); + else if (ast_type_isinteger(to)) + gen_cast_int(from); +} + +void gen_conversion(ast_t *ast) { + gen_expression(ast->unary.operand); + gen_load_convert(ast->ctype, ast->unary.operand->ctype); +} + +void gen_binary(ast_t *ast) { + if (ast->ctype->type == TYPE_POINTER) { + gen_pointer_arithmetic(ast->type, ast->left, ast->right); + return; + } + + switch (ast->type) { + case '<': gen_comparision("setl", ast); return; + case '>': gen_comparision("setg", ast); return; + case AST_TYPE_EQUAL: gen_comparision("sete", ast); return; + case AST_TYPE_GEQUAL: gen_comparision("setge", ast); return; + case AST_TYPE_LEQUAL: gen_comparision("setle", ast); return; + case AST_TYPE_NEQUAL: gen_comparision("setne", ast); return; + } + + if (ast_type_isinteger(ast->ctype)) + gen_binary_arithmetic_integer(ast); + else if (ast_type_isfloating(ast->ctype)) + gen_binary_arithmetic_floating(ast); + else + compile_ice("gen_binary"); +} + +void gen_literal_save(ast_t *ast, data_type_t *type, int offset) { + uint64_t load64 = ((uint64_t)ast->integer); + uint32_t load32 = ast->integer; + float loadf32 = ast->floating.value; + double loadf64 = ast->floating.value; + + gen_emit("# literal save {"); + switch (type->type) { + case TYPE_BOOL: gen_emit("movb $%d, %d(%%rbp)", !!ast->integer, offset); break; + case TYPE_CHAR: gen_emit("movb $%d, %d(%%rbp)", load32, offset); break; + case TYPE_SHORT: gen_emit("movw $%d, %d(%%rbp)", load32, offset); break; + case TYPE_INT: gen_emit("movl $%d, %d(%%rbp)", load32, offset); break; + case TYPE_LONG: + case TYPE_LLONG: + case TYPE_POINTER: + gen_emit("movl $0x%" PRIx64 ", %d(%%rbp)", load64 & 0xFFFFFFFF, offset); + gen_emit("movl $0x%" PRIx64 ", %d(%%rbp)", load64 >> 32, offset + 4); + break; + case TYPE_FLOAT: + load32 = TYPEPUN(uint32_t, loadf32); + gen_emit("movl $0x%" PRIx32 ", %d(%%rbp)", load32, offset); + break; + case TYPE_DOUBLE: + load64 = TYPEPUN(uint64_t, loadf64); + gen_emit("movl $0x%" PRIx64 ", %d(%%rbp)", load64 & 0xFFFFFFFF, offset); + gen_emit("movl $0x%" PRIx64 ", %d(%%rbp)", load64 >> 32, offset + 4); + break; + + default: + compile_ice("gen_literal_save"); + } + gen_emit("# }"); +} + +void gen_prefix(ast_t *ast, const char *op) { + gen_expression(ast->unary.operand); + if (ast->ctype->type == TYPE_POINTER) + gen_emit("%s $%d, %%rax", op, ast->ctype->pointer->size); + else + gen_emit("%s $1, %%rax", op); + gen_store(ast->unary.operand); +} + +void gen_postfix(ast_t *ast, const char *op) { + gen_expression(ast->unary.operand); + gen_push(SRAX); + if (ast->ctype->type == TYPE_POINTER) + gen_emit("%s $%d, %%rax", op, ast->ctype->pointer->size); + else + gen_emit("%s $1, %%rax", op); + gen_store(ast->unary.operand); + gen_pop(SRAX); +} + +static void gen_register_area_calculate(list_t *args) { + gp = 0; + fp = 0; + for (list_iterator_t *it = list_iterator(args); !list_iterator_end(it); ) + (*((ast_type_isfloating(((ast_t*)list_iterator_next(it))->ctype)) ? &fp : &gp)) ++; +} + +void gen_je(const char *label) { + gen_emit("test %%rax, %%rax"); + gen_emit("je %s", label); +} + +void gen_cast(ast_t *ast) { + gen_expression(ast->unary.operand); + gen_load_convert(ast->ctype, ast->unary.operand->ctype); +} + +void gen_literal(ast_t *ast) { + switch (ast->ctype->type) { + case TYPE_CHAR: + case TYPE_BOOL: + gen_emit("mov $%d, %%rax", ast->integer); + break; + case TYPE_INT: + gen_emit("mov $%d, %%rax", ast->integer); + break; + case TYPE_LONG: + case TYPE_LLONG: + gen_emit("mov $%" PRIi64 ", %%rax", (uint64_t)ast->integer); + break; + + case TYPE_FLOAT: + if (!ast->floating.label) { + ast->floating.label = ast_label(); + float fval = ast->floating.value; + int *iptr = (int*)&fval; + gen_emit_inline(".data"); + gen_label(ast->floating.label); + gen_emit(".long %d", *iptr); + gen_emit_inline(".text"); + } + gen_emit("movss %s(%%rip), %%xmm0", ast->floating.label); + break; + + case TYPE_DOUBLE: + case TYPE_LDOUBLE: + if (!ast->floating.label) { + ast->floating.label = ast_label(); + double dval = ast->floating.value; + int *iptr = (int*)&dval; + gen_emit_inline(".data"); + gen_label(ast->floating.label); + gen_emit(".long %d", iptr[0]); + gen_emit(".long %d", iptr[1]); + gen_emit_inline(".text"); + } + gen_emit("movsd %s(%%rip), %%xmm0", ast->floating.label); + break; + + default: + compile_ice("gen_expression (%s)", ast_type_string(ast->ctype)); + } +} + +void gen_literal_string(ast_t *ast) { + if (!ast->string.label) { + ast->string.label = ast_label(); + gen_emit_inline(".data"); + gen_label(ast->string.label); + gen_emit(".string \"%s\"", string_quote(ast->string.data)); + gen_emit_inline(".text"); + } + gen_emit("lea %s(%%rip), %%rax", ast->string.label); +} + +void gen_variable_local(ast_t *ast) { + gen_ensure_lva(ast); + gen_load_local(ast->ctype, "rbp", ast->variable.off); +} + +void gen_variable_global(ast_t *ast) { + gen_load_global(ast->ctype, ast->variable.label, 0); +} + +void gen_dereference(ast_t *ast) { + gen_expression(ast->unary.operand); + gen_load_local(ast->unary.operand->ctype->pointer, SRAX, 0); + gen_load_convert(ast->ctype, ast->unary.operand->ctype->pointer); +} + +static void gen_function_args_classify(list_t *i, list_t *f, list_t *r, list_t *a) { + int ir = 0; + int xr = 0; + int mi = REGISTER_MULT_SIZE; + int mx = REGISTER_MULT_SIZE_XMM; + + list_iterator_t *it = list_iterator(a); + while (!list_iterator_end(it)) { + ast_t *value = list_iterator_next(it); + if (value->ctype->type == TYPE_STRUCTURE) + list_push(r, value); + else if (ast_type_isfloating(value->ctype)) + list_push((xr++ < mx) ? f : r, value); + else + list_push((ir++ < mi) ? i : r, value); + } +} + +static void gen_function_args_save(int in, int fl) { + gen_emit("# function args save {"); + for (int i = 0; i < in; i++) gen_push(NREG(i)); + for (int i = 1; i < fl; i++) gen_push_xmm(i); + gen_emit("# }"); +} +static void gen_function_args_restore(int in, int fl) { + gen_emit("# function args restore {"); + for (int i = fl - 1; i > 0; i--) gen_pop_xmm(i); + for (int i = in - 1; i >= 0; i--) gen_pop(NREG(i)); + gen_emit("# }"); +} +static void gen_function_args_popi(int l) { + gen_emit("# function args pop {"); + for (int i = l - 1; i >= 0; i--) gen_pop(NREG(i)); + gen_emit("# }"); +} +static void gen_function_args_popf(int l) { + gen_emit("# function args pop (xmm registers) {"); + for (int i = l - 1; i >= 0; i--) gen_pop_xmm(i); + gen_emit("# }"); +} + +static int gen_function_args(list_t *args) { + gen_emit("# functiona arguments { "); + int rest = 0; + list_iterator_t *it = list_iterator(args); + while (!list_iterator_end(it)) { + ast_t *value = list_iterator_next(it); + if (value->ctype->type == TYPE_STRUCTURE) { + gen_address(value); + rest += gen_structure_push(value->ctype->size); + } else if (ast_type_isfloating(value->ctype)) { + gen_expression(value); + gen_push_xmm(0); + rest += 8; + } else { + gen_expression(value); + gen_push(SRAX); + rest += 8; + } + } + gen_emit("# } "); + return rest; +} + +static void gen_function_call_default(ast_t *ast) { + int save = stack; + bool fptr = (ast->type == AST_TYPE_POINTERCALL); + data_type_t *type = fptr ? ast->function.call.functionpointer->ctype->pointer + : ast->function.call.type; + + gen_emit("# function call {"); + + /* deal with arguments */ + list_t *in = list_create(); + list_t *fl = list_create(); + list_t *re = list_create(); + + gen_function_args_classify(in, fl, re, ast->function.call.args); + gen_function_args_save(list_length(in), list_length(fl)); + + bool algn = stack % 16; + if (algn) { + gen_emit("sub $8, %%rsp"); + stack += 8; + } + + int rest = gen_function_args(list_reverse(re)); + + if (fptr) { + gen_expression(ast->function.call.functionpointer); + gen_push(SRAX); + } + + gen_function_args(in); + gen_function_args(fl); + gen_function_args_popf(list_length(fl)); + gen_function_args_popi(list_length(in)); + + if (fptr) + gen_pop(SR11); + + if (type->hasdots) + gen_emit("mov $%d, %%eax", list_length(fl)); + + if (fptr) + gen_emit("call *%%r11"); + else + gen_emit("call %s", ast->function.name); + + gen_boolean_maybe(ast->ctype); + + if (rest > 0) { + gen_emit("add $%d, %%rsp", rest); + stack -= rest; + } + + if (algn) { + gen_emit("add $8, %%rsp"); + stack -= 8; + } + + gen_function_args_restore(list_length(in), list_length(fl)); + + gen_emit("# }"); + + if (stack != save) + compile_ice("gen_function_call (stack out of alignment)"); +} + +void gen_function_call(ast_t *ast) { + char *loopbeg; + char *loopend; + + if (!ast->function.name || strcmp(ast->function.name, "__builtin_return_address")) { + gen_function_call_default(ast); + return; + } + + /* + * deal with builtin return address extension. This should be + * as easy as emitting the expression for the return address + * argument and using some loops. + */ + gen_push(SR11); + gen_expression(list_head(ast->function.call.args)); + loopbeg = ast_label(); + loopend = ast_label(); + gen_emit("mov %%rbp, %%r11"); + gen_label(loopbeg); + gen_emit("test %%rax, %%rax"); + gen_emit("jz %s", loopend); + gen_emit("mov (%%r11), %%r11"); + gen_emit("dec %%rax"); + gen_jump(loopbeg); + gen_label(loopend); + gen_emit("mov 8(%%r11), %%rax"); + gen_pop(SR11); +} + +void gen_case(ast_t *ast) { + char *skip; + gen_jump((skip = ast_label())); + gen_label(gen_label_switch); + gen_label_switch = ast_label(); + gen_emit("cmp $%d, %%eax", ast->casebeg); + if (ast->casebeg == ast->caseend) + gen_emit("jne %s", gen_label_switch); + else { + gen_emit("jl %s", gen_label_switch); + gen_emit("cmp $%d, %%eax", ast->caseend); + gen_emit("jg %s", gen_label_switch); + } + gen_label(skip); +} + +void gen_va_start(ast_t *ast) { + gen_expression(ast->ap); + gen_push(SRCX); + gen_emit("movl $%d, (%%rax)", gp * 8); + gen_emit("movl $%d, 4(%%rax)", 48 + fp * 16); + gen_emit("lea %d(%%rbp), %%rcx", -REGISTER_AREA_SIZE); + gen_emit("mov %%rcx, 16(%%rax)"); + gen_pop(SRCX); +} + +void gen_va_arg(ast_t *ast) { + gen_expression(ast->ap); + gen_emit("nop"); + gen_push(SRCX); + gen_push("rbx"); + gen_emit("mov 16(%%rax), %%rcx"); + if (ast_type_isfloating(ast->ctype)) { + gen_emit("mov 4(%%rax), %%ebx"); + gen_emit("add %%rbx, %%rcx"); + gen_emit("add $16, %%ebx"); + gen_emit("mov %%ebx, 4(%%rax)"); + gen_emit("movsd (%%rcx), %%xmm0"); + if (ast->ctype->type == TYPE_FLOAT) + gen_emit("cvtpd2ps %%xmm0, %%xmm0"); + } else { + gen_emit("mov (%%rax), %%ebx"); + gen_emit("add %%rbx, %%rcx"); + gen_emit("add $8, %%ebx"); + gen_emit("mov %%rbx, (%%rax)"); + gen_emit("mov (%%rcx), %%rax"); + } + gen_pop(SRBX); + gen_pop(SRCX); +} + +void gen_not(ast_t *ast) { + gen_expression(ast->unary.operand); + gen_emit("cmp $0, %%rax"); + gen_emit("sete %%al"); + gen_emit("movzb %%al, %%eax"); +} + +void gen_and(ast_t *ast) { + char *end = ast_label(); + gen_expression(ast->left); + gen_emit("test %%rax, %%rax"); + gen_emit("mov $0, %%rax"); + gen_emit("je %s", end); + gen_expression(ast->right); + gen_emit("test %%rax, %%rax"); + gen_emit("mov $0, %%rax"); + gen_emit("je %s", end); + gen_emit("mov $1, %%rax"); + gen_label(end); +} + +void gen_or(ast_t *ast) { + char *end = ast_label(); + gen_expression(ast->left); + gen_emit("test %%rax, %%rax"); + gen_emit("mov $1, %%rax"); + gen_emit("jne %s", end); + gen_expression(ast->right); + gen_emit("test %%rax, %%rax"); + gen_emit("mov $1, %%rax"); + gen_emit("jne %s", end); + gen_emit("mov $0, %%rax"); + gen_label(end); +} + +void gen_struct(ast_t *ast) { + gen_load_structure(ast->structure, ast->ctype, 0); +} + +void gen_bitandor(ast_t *ast) { + static const char *instruction[] = { "and", "or" }; + gen_expression(ast->left); + gen_push(SRAX); + gen_expression(ast->right); + gen_pop(SRCX); + gen_emit("%s %%rcx, %%rax", instruction[!!(ast->type == '|')]); +} + +void gen_bitnot(ast_t *ast) { + gen_expression(ast->left); + gen_emit("not %%rax"); +} + +void gen_negate(ast_t *ast) { + gen_expression(ast->unary.operand); + if (ast_type_isfloating(ast->ctype)) { + gen_push_xmm(1); + gen_emit("xorpd %%xmm1, %%xmm1"); + if (ast->ctype->type == TYPE_DOUBLE) + gen_emit("subsd %%xmm1, %%xmm0"); + else + gen_emit("subss %%xmm1, %%xmm0"); + gen_pop_xmm(1); + return; + } + gen_emit("neg %%rax"); +} + +void gen_assign(ast_t *ast) { + if (ast->left->ctype->type == TYPE_STRUCTURE) { + if (ast->left->ctype->size > 8) { + gen_structure_assign(ast->left, ast->right); + return; + } + } + gen_expression(ast->right); + gen_load_convert(ast->ctype, ast->right->ctype); + gen_store(ast->left); +} + +int parse_evaluate(ast_t *ast); +static void gen_data_zero(int size) { + for (; size >= 8; size -= 8) gen_emit(".quad 0"); + for (; size >= 4; size -= 4) gen_emit(".long 0"); + for (; size > 0; size --) gen_emit(".byte 0"); +} + +static void gen_data_padding(ast_t *ast, int offset) { + int d = ast->init.offset - offset; + if (d < 0) + compile_ice("gen_data_padding"); + gen_data_zero(d); +} + +static void gen_data_intermediate(list_t *inits, int size, int offset, int depth) { + uint64_t load64; + uint32_t load32; + + list_iterator_t *it = list_iterator(inits); + while (!list_iterator_end(it) && 0 < size) { + ast_t *node = list_iterator_next(it); + ast_t *v = node->init.value; + + gen_data_padding(node, offset); + offset += node->init.type->size; + size -= node->init.type->size; + + if (v->type == AST_TYPE_ADDRESS) { + char *label; + switch (v->unary.operand->type) { + case AST_TYPE_VAR_LOCAL: + label = ast_label(); + gen_emit(".data %d", depth + 1); + gen_label(label); + gen_data_intermediate(v->unary.operand->variable.init, v->unary.operand->ctype->size, 0, depth + 1); + gen_emit(".data %d", depth); + gen_emit(".quad %s", label); + continue; + + case AST_TYPE_VAR_GLOBAL: + gen_emit(".quad %s", v->unary.operand->variable.name); + continue; + + default: + compile_ice("gen_datat_intermediate"); + } + } + + if (node->init.value->type == AST_TYPE_VAR_LOCAL && node->init.value->variable.init) { + gen_data_intermediate(v->variable.init, v->ctype->size, 0, depth); + continue; + } + + if (v->ctype->type == TYPE_ARRAY && v->ctype->pointer->type == TYPE_CHAR) { + char *label = ast_label(); + gen_emit(".data %d", depth + 1); + gen_label(label); + gen_emit(".string \"%s\"", string_quote(v->string.data)); + gen_emit(".data %d", depth); + gen_emit(".quad %s", label); + continue; + } + + + /* load alias */ + load32 = TYPEPUN(uint32_t, node->init.value->floating.value); + load64 = TYPEPUN(uint64_t, node->init.value->floating.value); + + switch (node->init.type->type) { + case TYPE_FLOAT: gen_emit(".long 0x%" PRIx32, load32); break; + case TYPE_DOUBLE: gen_emit(".quad 0x%" PRIx64, load64); break; + case TYPE_CHAR: gen_emit(".byte %d", parse_evaluate(node->init.value)); break; + case TYPE_SHORT: gen_emit(".short %d", parse_evaluate(node->init.value)); break; + case TYPE_INT: gen_emit(".long %d", parse_evaluate(node->init.value)); break; + + case TYPE_LONG: + case TYPE_LLONG: + case TYPE_POINTER: + if (node->init.value->type == AST_TYPE_VAR_GLOBAL) + gen_emit(".quad %s", node->init.value->variable.name); + else + gen_emit(".quad %ld", parse_evaluate(node->init.value)); + break; + + default: + compile_ice("gen_data_intermediate (%s)", ast_type_string(node->init.type)); + } + } + gen_data_zero(size); +} + +void gen_data(ast_t *ast, int offset, int depth) { + gen_emit(".data %d", depth); + if (!ast->decl.var->ctype->isstatic) + gen_emit_inline(".global %s", ast->decl.var->variable.name); + gen_emit_inline("%s:", ast->decl.var->variable.name); + gen_data_intermediate(ast->decl.init, ast->decl.var->ctype->size, offset, depth); +} + +static int gen_register_area(void) { + int top = -REGISTER_AREA_SIZE; + gen_emit("mov %%rdi, %d(%%rsp)", top); + gen_emit("mov %%rsi, %d(%%rsp)", (top += 8)); + gen_emit("mov %%rdx, %d(%%rsp)", (top += 8)); + gen_emit("mov %%rcx, %d(%%rsp)", (top += 8)); + gen_emit("mov %%r8, %d(%%rsp)", (top += 8)); + gen_emit("mov %%r9, %d(%%rsp)", top + 8); + + char *end = ast_label(); + for (int i = 0; i < 16; i++) { + gen_emit("test %%al, %%al"); + gen_emit("jz %s", end); + gen_emit("movsd %%xmm%d, %d(%%rsp)", i, (top += 16)); + gen_emit("sub $1, %%al"); + } + gen_label(end); + gen_emit("sub $%d, %%rsp", REGISTER_AREA_SIZE); + return REGISTER_AREA_SIZE; +} + +static void gen_function_parameters(list_t *parameters, int offset) { + gen_emit("# function parameters { "); + int ir = 0; + int xr = 0; + int ar = REGISTER_MULT_SIZE_XMM - REGISTER_MULT_SIZE; + + for (list_iterator_t *it = list_iterator(parameters); !list_iterator_end(it); ) { + ast_t *value = list_iterator_next(it); + if (value->ctype->type == TYPE_STRUCTURE) { + gen_emit("lea %d(%%rbp), %%rax", ar * 8); + int emit = gen_structure_push(value->ctype->size); + offset -= emit; + ar += emit / 8; + } else if (ast_type_isfloating(value->ctype)) { + if (xr >= REGISTER_MULT_SIZE_XMM) { + gen_emit("mov %d(%%rbp), %%rax", ar++ * 8); + gen_push(SRAX); + } else { + gen_push_xmm(xr++); + } + offset -= 8; + } else { + if (ir >= REGISTER_MULT_SIZE) { + if (value->ctype->type == TYPE_BOOL) { + gen_emit("mov %d(%%rbp), %%al", ar++ * 8); + gen_emit("movzb %%al, %%eax"); + } else { + gen_emit("mov %d(%%rbp), %%al", ar++ * 8); + } + gen_push(SRAX); + } else { + if (value->ctype->type == TYPE_BOOL) + gen_emit("movsb %%%s, %%%s", SREG(ir), MREG(ir)); + gen_push(NREG(ir++)); + } + offset -= 8; + } + value->variable.off = offset; + } + gen_emit("# }"); +} + +void gen_function_prologue(ast_t *ast) { + gen_emit("# function prologue {"); + gen_emit_inline(".text"); + if (!ast->ctype->isstatic) + gen_emit_inline(".global %s", ast->function.name); + gen_emit_inline("%s:", ast->function.name); + gen_emit("nop"); + gen_push("rbp"); + gen_emit("mov %%rsp, %%rbp"); + + int offset = 0; + + if (ast->ctype->hasdots) { + gen_register_area_calculate(ast->function.params); + offset -= gen_register_area(); + } + + gen_function_parameters(ast->function.params, offset); + offset -= list_length(ast->function.params) * 8; + + int localdata = 0; + for (list_iterator_t *it = list_iterator(ast->function.locals); !list_iterator_end(it); ) { + ast_t *value = list_iterator_next(it); + int align = gen_alignment(value->ctype->size, 8); + + offset -= align; + value->variable.off = offset; + localdata += align; + } + + if (localdata) { + gen_emit("sub $%d, %%rsp", localdata); + stack += localdata; + } + gen_emit("# }"); +} + +void gen_function_epilogue(void) { + if (stack != 0) + gen_emit("# stack misalignment: %d\n", stack); + gen_return(); +} + +void gen_return(void) { + gen_emit("leave"); + gen_emit("ret"); +} + +void gen_function(ast_t *ast) { + (void)ast; + stack = 8; +} diff --git a/init.c b/init.c new file mode 100644 index 0000000..471e43f --- /dev/null +++ b/init.c @@ -0,0 +1,302 @@ +/* + * This file implements a small state machine for handling all the forms + * of initialization C offers. It's called from the parser just like + * declaration specification. It's a sub portion of the core parser, + * seperated from all that logic due to the nature of initializer + * complexity. + */ +#include "parse.h" +#include "init.h" +#include "lice.h" +#include "lexer.h" +#include "conv.h" + +#include +#include + +static void init_element (list_t *, data_type_t *, int, bool); +static void init_structure (list_t *, data_type_t *, int, bool); +static void init_array (list_t *, data_type_t *, int, bool); +static void init_list (list_t *, data_type_t *, int, bool); + +/* + * Initializer elements need to be sorted by semantic order, instead + * of lexical order since designated initializers are allowed to + * overwrite existingly assigned fields lexically, but the order needs + * to stay dependent on semantics. It's also generally more efficent for + * initialization to stay sorted. + */ +static int init_sort_predicate(const void *p, const void *q) { + const ast_t *const *restrict a = p; + const ast_t *const *restrict b = q; + + return (*a)->init.offset < (*b)->init.offset ? -1 : + (*a)->init.offset == (*b)->init.offset ? 0 : 1; +} + +static void init_sort(list_t *inits) { + size_t length = list_length(inits); + size_t index = 0; + ast_t **temp = memory_allocate(sizeof(ast_t *) * length); + list_iterator_t *it = list_iterator(inits); + + while (!list_iterator_end(it)) + temp[index++] = list_iterator_next(it); + + qsort(temp, length, sizeof(ast_t *), &init_sort_predicate); + + list_empty(inits); + for (index = 0; index < length; index++) + list_push(inits, temp[index]); +} + +static bool init_earlyout(lexer_token_t *token, bool brace, bool designated) { + if ((lexer_ispunct(token, '.') || lexer_ispunct(token, '[')) && !brace && !designated) { + lexer_unget(token); + return true; + } + return false; +} + +/* + * Utility routines to determine and skip to braces for initialization + * involving aggregates. + */ +static bool init_skip_brace_maybe(void) { + lexer_token_t *token = lexer_next(); + if (lexer_ispunct(token, '{')) + return true; + lexer_unget(token); + return false; +} + +static void init_skip_comma_maybe(void) { + lexer_token_t *token = lexer_next(); + + if (!lexer_ispunct(token, ',')) + lexer_unget(token); +} + +static void init_skip_brace(void) { + for (;;) { + /* + * Potentially infinite look a head, got to love C's grammar for + * this sort of crap. + */ + lexer_token_t *token = lexer_next(); + if (lexer_ispunct(token, '}')) + return; + + if (lexer_ispunct(token, '.')) { + lexer_next(); + parse_expect('='); + } else { + lexer_unget(token); + } + + ast_t *ignore = parse_expression_assignment(); + if (!ignore) + return; + + compile_warn("excess elements in initializer"); + init_skip_comma_maybe(); + } +} + +/* + * Structure and array initialization routines: + * deals with standard initialization via aggregate initializer, as well + * as designated initialization, and nested aggreate + designation. In + * the case of array designated initialization array subscripting is + * handled, where as in the case of structure designated initialization + * field members are indexed by .fieldname. The GCC style of designated + * initializers isn't supported yet, neither is range initialization. + */ +static void init_structure_intermediate(list_t *init, data_type_t *type, int offset, bool designated) { + bool brace = init_skip_brace_maybe(); + list_iterator_t *it = list_iterator(table_keys(type->fields)); + + for (;;) { + lexer_token_t *token = lexer_next(); + if (lexer_ispunct(token, '}')) { + if (!brace) + lexer_unget(token); + return; + } + + char *fieldname; + data_type_t *fieldtype; + + if (init_earlyout(token, brace, designated)) + return; + + if (lexer_ispunct(token, '.')) { + if (!(token = lexer_next()) || token->type != LEXER_TOKEN_IDENTIFIER) + compile_error("invalid designated initializer"); + fieldname = token->string; + if (!(fieldtype = table_find(type->fields, fieldname))) + compile_error("field `%s' doesn't exist in designated initializer", fieldname); + + it = list_iterator(table_keys(type->fields)); + while (!list_iterator_end(it)) + if (!strcmp(fieldname, list_iterator_next(it))) + break; + designated = true; + } else { + lexer_unget(token); + if (list_iterator_end(it)) + break; + + fieldname = list_iterator_next(it); + fieldtype = table_find(type->fields, fieldname); + } + init_element(init, fieldtype, offset + fieldtype->offset, designated); + init_skip_comma_maybe(); + designated = false; + + if (!type->isstruct) + break; + } + if (brace) + init_skip_brace(); +} + +static void init_array_intermediate(list_t *init, data_type_t *type, int offset, bool designated) { + bool brace = init_skip_brace_maybe(); + bool flexible = (type->length <= 0); + int size = type->pointer->size; + int i; + + for (i = 0; flexible || i < type->length; i++) { + lexer_token_t *token = lexer_next(); + if (lexer_ispunct(token, '}')) { + if (!brace) + lexer_unget(token); + goto complete; + } + + if (init_earlyout(token, brace, designated)) + return; + + if (lexer_ispunct(token, '[')) { + /* designated array initializer */ + int index = parse_expression_evaluate(); + if (index < 0 || (!flexible && type->length <= index)) + compile_error("out of bounds"); + i = index; + parse_expect(']'); + designated = true; + } else { + lexer_unget(token); + } + init_element(init, type->pointer, offset + size * i, designated); + init_skip_comma_maybe(); + designated = false; + } + if (brace) + init_skip_brace(); + +complete: + if (type->length < 0) { + type->length = i; + type->size = size * i; + } +} + +/* + * Intermediate stages deal with all the logic, these functions are + * just tail calls (hopefully optimized) to the intermediate stages followed + * by a sorting of the elements to honor semantic ordering of initialization. + */ +static void init_structure(list_t *init, data_type_t *type, int offset, bool designated) { + init_structure_intermediate(init, type, offset, designated); + init_sort(init); +} + +static void init_array(list_t *init, data_type_t *type, int offset, bool designated) { + init_array_intermediate(init, type, offset, designated); + init_sort(init); +} + +/* + * The entry points to the initializers, single element initialization + * and initializer list initialization will dispatch into the appropriate + * initialization parsing routines as defined above. + */ +static void init_element(list_t *init, data_type_t *type, int offset, bool designated) { + parse_next('='); + if (type->type == TYPE_ARRAY || type->type == TYPE_STRUCTURE) + init_list(init, type, offset, designated); + else if (parse_next('{')) { + init_element(init, type, offset, designated); + parse_expect('}'); + } else { + ast_t *expression = parse_expression_assignment(); + parse_semantic_assignable(type, expression->ctype); + list_push(init, ast_initializer(expression, type, offset)); + } +} + +static void init_string(list_t *init, data_type_t *type, char *p, int offset) { + if (type->length == -1) + type->length = type->size = strlen(p) + 1; + + int i = 0; + for (; i < type->length && *p; i++) { + list_push(init, ast_initializer( + ast_new_integer(ast_data_table[AST_DATA_CHAR], *p++), + ast_data_table[AST_DATA_CHAR], offset + i + )); + } + for (; i < type->length; i++) { + list_push(init, ast_initializer( + ast_new_integer(ast_data_table[AST_DATA_CHAR], 0), + ast_data_table[AST_DATA_CHAR], offset + i + )); + } +} + +static void init_list(list_t *init, data_type_t *type, int offset, bool designated) { + lexer_token_t *token = lexer_next(); + if (ast_type_isstring(type)) { + if (token->type == LEXER_TOKEN_STRING) { + init_string(init, type, token->string, offset); + return; + } + + if (lexer_ispunct(token, '{') && lexer_peek()->type == LEXER_TOKEN_STRING) { + token = lexer_next(); + init_string(init, type, token->string, offset); + parse_expect('}'); + return; + } + } + lexer_unget(token); + + if (type->type == TYPE_ARRAY) + init_array(init, type, offset, designated); + else if (type->type == TYPE_STRUCTURE) + init_structure(init, type, offset, designated); + else + init_array(init, ast_array(type, 1), offset, designated); +} + +/* + * Actual entry point of the parser, parses an initializer list, while + * also dispatching into the appropriate parser routines depending on + * certain state like, array/structure, designated or not. + */ +list_t *init_entry(data_type_t *type) { + list_t *list = list_create(); + if (lexer_ispunct(lexer_peek(), '{') || ast_type_isstring(type)) { + init_list(list, type, 0, false); + return list; + } + + ast_t *init = parse_expression_assignment(); + if (conv_capable(init->ctype) && init->ctype->type != type->type) + init = ast_type_convert(type, init); + list_push(list, ast_initializer(init, type, 0)); + + return list; +} diff --git a/lexer.c b/lexer.c new file mode 100644 index 0000000..376cb66 --- /dev/null +++ b/lexer.c @@ -0,0 +1,505 @@ +#include +#include +#include +#include + +#include "lexer.h" +#include "util.h" +#include "lice.h" +#include "opt.h" + +static list_t *lexer_buffer = &SENTINEL_LIST; + +typedef struct { + char *file; + size_t line; + FILE *fp; +} lexer_file_t; + +static int lexer_continuation = -1; +static lexer_file_t lexer_file; + +__attribute__((constructor)) void lexer_init(void) { + lexer_file.file = "(stdin)"; + lexer_file.line = 1; + lexer_file.fp = stdin; +} + +static void lexer_file_unget(int ch) { + if (ch == '\n') + lexer_file.line --; + if (lexer_continuation >= 0) + ungetc(lexer_continuation, lexer_file.fp); + lexer_continuation = ch; +} + +static int lexer_file_get(void) { + int ch = (lexer_continuation < 0) ? getc(lexer_file.fp) : lexer_continuation; + lexer_continuation = -1; + if (ch == '\\') { + if ((ch = getc(lexer_file.fp)) == '\n') { + lexer_file.line ++; + return lexer_file_get(); + } + lexer_file_unget(ch); + return '\\'; + + } + if (ch == '\n') + lexer_file.line ++; + + return ch; +} + +static lexer_token_t *lexer_token_copy(lexer_token_t *token) { + return memcpy(malloc(sizeof(lexer_token_t)), token, sizeof(lexer_token_t)); +} + +static lexer_token_t *lexer_identifier(string_t *str) { + return lexer_token_copy(&(lexer_token_t){ + .type = LEXER_TOKEN_IDENTIFIER, + .string = string_buffer(str) + }); +} +static lexer_token_t *lexer_strtok(string_t *str) { + return lexer_token_copy(&(lexer_token_t){ + .type = LEXER_TOKEN_STRING, + .string = string_buffer(str) + }); +} +static lexer_token_t *lexer_punct(int punct) { + return lexer_token_copy(&(lexer_token_t){ + .type = LEXER_TOKEN_PUNCT, + .punct = punct + }); +} +static lexer_token_t *lexer_number(char *string) { + return lexer_token_copy(&(lexer_token_t){ + .type = LEXER_TOKEN_NUMBER, + .string = string + }); +} +static lexer_token_t *lexer_char(char value) { + return lexer_token_copy(&(lexer_token_t){ + .type = LEXER_TOKEN_CHAR, + .character = value + }); +} + +static void lexer_skip_comment_line(void) { + for (;;) { + int c = lexer_file_get(); + if (c == EOF) + return; + if (c == '\n') { + lexer_file_unget(c); + return; + } + } +} + +static void lexer_skip_comment_block(void) { + enum { + comment_outside, + comment_astrick + } state = comment_outside; + + for (;;) { + int c = lexer_file_get(); + if (c == '*') + state = comment_astrick; + else if (state == comment_astrick && c == '/') + return; + else + state = comment_outside; + } +} + +static int lexer_skip(void) { + int c; + while ((c = lexer_file_get()) != EOF) { + if (isspace(c) || c == '\n' || c == '\r') + continue; + lexer_file_unget(c); + return c; + } + return EOF; +} + +static lexer_token_t *lexer_read_number(int c) { + string_t *string = string_create(); + string_cat(string, c); + for (;;) { + int p = lexer_file_get(); + if (!isdigit(p) && !isalpha(p) && p != '.') { + lexer_file_unget(p); + return lexer_number(string_buffer(string)); + } + string_cat(string, p); + } + return NULL; +} + +static bool lexer_read_character_octal_brace(int c, int *r) { + if ('0' <= c && c <= '7') { + *r = (*r << 3) | (c - '0'); + return true; + } + return false; +} + +static int lexer_read_character_octal(int c) { + int r = c - '0'; + if (lexer_read_character_octal_brace((c = lexer_file_get()), &r)) { + if (!lexer_read_character_octal_brace((c = lexer_file_get()), &r)) + lexer_file_unget(c); + } else + lexer_file_unget(c); + return r; +} + +static bool lexer_read_character_universal_test(unsigned int c) { + if (0x800 <= c && c<= 0xDFFF) + return false; + return 0xA0 <= c || c == '$' || c == '@' || c == '`'; +} + +static int lexer_read_character_universal(int length) { + unsigned int r = 0; + for (int i = 0; i < length; i++) { + int c = lexer_file_get(); + switch (c) { + case '0' ... '9': r = (r << 4) | (c - '0'); continue; + case 'a' ... 'f': r = (r << 4) | (c - 'a' + 10); continue; + case 'A' ... 'F': r = (r << 4) | (c - 'A' + 10); continue; + default: + compile_error("not a valid universal character: %c", c); + + } + } + if (!lexer_read_character_universal_test(r)) { + compile_error( + "not a valid universal character: \\%c%0*x", + (length == 4) ? 'u' : 'U', + length, + r + ); + } + return r; +} + +static int lexer_read_character_hexadecimal(void) { + int c = lexer_file_get(); + int r = 0; + + if (!isxdigit(c)) + compile_error("malformatted hexadecimal character"); + + for (;; c = lexer_file_get()) { + switch (c) { + case '0' ... '9': r = (r << 4) | (c - '0'); continue; + case 'a' ... 'f': r = (r << 4) | (c - 'a' + 10); continue; + case 'A' ... 'F': r = (r << 4) | (c - 'A' + 10); continue; + + default: + lexer_file_unget(c); + return r; + } + } + return -1; +} + +static int lexer_read_character_escaped(void) { + int c = lexer_file_get(); + + switch (c) { + case '\'': return '\''; + case '"': return '"'; + case '?': return '?'; + case '\\': return '\\'; + case 'a': return '\a'; + case 'b': return '\b'; + case 'f': return '\f'; + case 'n': return '\n'; + case 'r': return '\r'; + case 't': return '\t'; + case 'v': return '\v'; + case 'e': return '\033'; + case '0' ... '7': return lexer_read_character_octal(c); + case 'x': return lexer_read_character_hexadecimal(); + case 'u': return lexer_read_character_universal(4); + case 'U': return lexer_read_character_universal(8); + case EOF: + compile_error("malformatted escape sequence"); + + default: + return c; + } +} + +static lexer_token_t *lexer_read_character(void) { + int c = lexer_file_get(); + int r = (c == '\\') ? lexer_read_character_escaped() : c; + + if (lexer_file_get() != '\'') + compile_error("unterminated character"); + + return lexer_char((char)r); +} + +static lexer_token_t *lexer_read_string(void) { + string_t *string = string_create(); + for (;;) { + int c = lexer_file_get(); + if (c == EOF) + compile_error("Expected termination for string literal"); + + if (c == '"') + break; + if (c == '\\') + c = lexer_read_character_escaped(); + string_cat(string, c); + } + return lexer_strtok(string); +} + +static lexer_token_t *lexer_read_identifier(int c1) { + string_t *string = string_create(); + string_cat(string, (char)c1); + + for (;;) { + int c2 = lexer_file_get(); + if (isalnum(c2) || c2 == '_' || c2 == '$') { + string_cat(string, c2); + } else { + lexer_file_unget(c2); + return lexer_identifier(string); + } + } + return NULL; +} + +static lexer_token_t *lexer_read_reclassify_one(int expect1, int a, int e) { + int c = lexer_file_get(); + if (c == expect1) + return lexer_punct(a); + lexer_file_unget(c); + return lexer_punct(e); +} +static lexer_token_t *lexer_read_reclassify_two(int expect1, int a, int expect2, int b, int e) { + int c = lexer_file_get(); + if (c == expect1) + return lexer_punct(a); + if (c == expect2) + return lexer_punct(b); + lexer_file_unget(c); + return lexer_punct(e); +} + +static lexer_token_t *lexer_read_token(void); + +static lexer_token_t *lexer_minicpp(void) { + string_t *string = string_create(); + string_t *method = string_create(); + char *buffer; + int ch; + + for (const char *p = "pragma"; *p; p++) { + if ((ch = lexer_file_get()) != *p) { + string_cat(string, ch); + goto error; + } + } + + for (ch = lexer_file_get(); ch && ch != '\n'; ch = lexer_file_get()) { + if (isspace(ch)) + continue; + string_cat(method, ch); + } + + buffer = string_buffer(method); + + if (!strcmp(buffer, "warning_disable")) + compile_warning = false; + if (!strcmp(buffer, "warning_enable")) + compile_warning = true; + + goto fall; + +error: + buffer = string_buffer(string); + for (char *beg = &buffer[string_length(string)]; beg != &buffer[-1]; --beg) + lexer_file_unget(*beg); + +fall: + lexer_skip_comment_line(); + return lexer_read_token(); +} + +static lexer_token_t *lexer_read_token(void) { + int c; + int n; + + lexer_skip(); + + switch ((c = lexer_file_get())) { + case '0' ... '9': return lexer_read_number(c); + case '"': return lexer_read_string(); + case '\'': return lexer_read_character(); + case 'a' ... 'z': + case 'A' ... 'K': + case 'M' ... 'Z': + case '_': + return lexer_read_identifier(c); + case '$': + if (opt_extension_test(EXTENSION_DOLLAR)) + return lexer_read_identifier(c); + break; + + case 'L': + switch ((c = lexer_file_get())) { + case '"': return lexer_read_string(); + case '\'': return lexer_read_character(); + } + lexer_file_unget(c); + return lexer_read_identifier('L'); + + case '/': + switch ((c = lexer_file_get())) { + case '/': + lexer_skip_comment_line(); + return lexer_read_token(); + case '*': + lexer_skip_comment_block(); + return lexer_read_token(); + } + if (c == '=') + return lexer_punct(LEXER_TOKEN_COMPOUND_DIV); + lexer_file_unget(c); + return lexer_punct('/'); + + // ignore preprocessor lines for now + case '#': + return lexer_minicpp(); + + case '(': case ')': + case ',': case ';': + case '[': case ']': + case '{': case '}': + case '?': case ':': + case '~': + return lexer_punct(c); + + case '+': return lexer_read_reclassify_two('+', LEXER_TOKEN_INCREMENT, '=', LEXER_TOKEN_COMPOUND_ADD, '+'); + case '&': return lexer_read_reclassify_two('&', LEXER_TOKEN_AND, '=', LEXER_TOKEN_COMPOUND_AND, '&'); + case '|': return lexer_read_reclassify_two('|', LEXER_TOKEN_OR, '=', LEXER_TOKEN_COMPOUND_OR, '|'); + case '*': return lexer_read_reclassify_one('=', LEXER_TOKEN_COMPOUND_MUL, '*'); + case '%': return lexer_read_reclassify_one('=', LEXER_TOKEN_COMPOUND_MOD, '%'); + case '=': return lexer_read_reclassify_one('=', LEXER_TOKEN_EQUAL, '='); + case '!': return lexer_read_reclassify_one('=', LEXER_TOKEN_NEQUAL, '!'); + case '^': return lexer_read_reclassify_one('=', LEXER_TOKEN_COMPOUND_XOR, '^'); + + case '-': + switch ((c = lexer_file_get())) { + case '-': return lexer_punct(LEXER_TOKEN_DECREMENT); + case '>': return lexer_punct(LEXER_TOKEN_ARROW); + case '=': return lexer_punct(LEXER_TOKEN_COMPOUND_SUB); + default: + break; + } + lexer_file_unget(c); + return lexer_punct('-'); + + case '<': + if ((c = lexer_file_get()) == '=') + return lexer_punct(LEXER_TOKEN_LEQUAL); + if (c == '<') + return lexer_read_reclassify_one('=', LEXER_TOKEN_COMPOUND_LSHIFT, LEXER_TOKEN_LSHIFT); + lexer_file_unget(c); + return lexer_punct('<'); + case '>': + if ((c = lexer_file_get()) == '=') + return lexer_punct(LEXER_TOKEN_GEQUAL); + if (c == '>') + return lexer_read_reclassify_one('=', LEXER_TOKEN_COMPOUND_RSHIFT, LEXER_TOKEN_RSHIFT); + lexer_file_unget(c); + return lexer_punct('>'); + + case '.': + n = lexer_file_get(); + if (isdigit(n)) { + lexer_file_unget(n); + return lexer_read_number(c); + } + if (n == '.') { + string_t *str = string_create(); + string_catf(str, "..%c", lexer_file_get()); + return lexer_identifier(str); + } + lexer_file_unget(n); + return lexer_punct('.'); + + case EOF: + return NULL; + + default: + compile_error("Unexpected character: `%c`", c); + } + return NULL; +} + +bool lexer_ispunct(lexer_token_t *token, int c) { + return token && (token->type == LEXER_TOKEN_PUNCT) && (token->punct == c); +} + +void lexer_unget(lexer_token_t *token) { + if (!token) + return; + list_push(lexer_buffer, token); +} + +lexer_token_t *lexer_next(void) { + if (list_length(lexer_buffer) > 0) + return list_pop(lexer_buffer); + return lexer_read_token(); +} + +lexer_token_t *lexer_peek(void) { + lexer_token_t *token = lexer_next(); + lexer_unget(token); + return token; +} + +char *lexer_token_string(lexer_token_t *token) { + string_t *string = string_create(); + if (!token) + return "(null)"; + switch (token->type) { + case LEXER_TOKEN_PUNCT: + if (token->punct == LEXER_TOKEN_EQUAL) { + string_catf(string, "=="); + return string_buffer(string); + } + case LEXER_TOKEN_CHAR: + string_cat(string, token->character); + return string_buffer(string); + case LEXER_TOKEN_NUMBER: + string_catf(string, "%d", token->integer); + return string_buffer(string); + case LEXER_TOKEN_STRING: + string_catf(string, "\"%s\"", token->string); + return string_buffer(string); + case LEXER_TOKEN_IDENTIFIER: + return token->string; + default: + break; + } + compile_ice("unexpected token"); + return NULL; +} + +char *lexer_marker(void) { + string_t *string = string_create(); + string_catf(string, "%s:%zu", lexer_file.file, lexer_file.line); + return string_buffer(string); +} -- 2.40.0