From aaf71be0a6e44d207803dedebbc646bf25b6c152 Mon Sep 17 00:00:00 2001 From: unknown <> Date: Mon, 17 Nov 2014 20:15:16 +0000 Subject: [PATCH] autocommit for files dated 2014-11-17 20:15:16 --- LICENSE | 24 ++ Makefile | 46 +++ README.md | 92 ++++++ STANDARD | 30 ++ arch_amd64.h | 42 +++ arch_dummy.h | 41 +++ ast.h | 814 +++++++++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 1089 insertions(+) create mode 100644 LICENSE create mode 100644 Makefile create mode 100644 README.md create mode 100644 STANDARD create mode 100644 arch_amd64.h create mode 100644 arch_dummy.h create mode 100644 ast.h diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..68a49da --- /dev/null +++ b/LICENSE @@ -0,0 +1,24 @@ +This is free and unencumbered software released into the public domain. + +Anyone is free to copy, modify, publish, use, compile, sell, or +distribute this software, either in source code form or as a compiled +binary, for any purpose, commercial or non-commercial, and by any +means. + +In jurisdictions that recognize copyright laws, the author or authors +of this software dedicate any and all copyright interest in the +software to the public domain. We make this dedication for the benefit +of the public at large and to the detriment of our heirs and +successors. We intend this dedication to be an overt act of +relinquishment in perpetuity of all present and future rights to this +software under copyright law. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. + +For more information, please refer to diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..bbbf0c1 --- /dev/null +++ b/Makefile @@ -0,0 +1,46 @@ +CC ?= clang +CFLAGS += -Wall -Wextra -Wno-missing-field-initializers -O3 -std=c99 -MD -DLICE_TARGET_AMD64 +LDFLAGS += + +LICESOURCES = ast.c parse.c lice.c gen.c gen_amd64.c lexer.c util.c conv.c decl.c init.c list.c opt.c +ARGSSOURCES = misc/argsgen.c util.c list.c +TESTSOURCES = test.c util.c list.c +LICEOBJECTS = $(LICESOURCES:.c=.o) +TESTOBJECTS = $(TESTSOURCES:.c=.o) +ARGSOBJECTS = $(ARGSSOURCES:.c=.o) +LICEDEPENDS = $(LICESOURCES:.c=.d) +TESTDEPENDS = $(TESTSOURCES:.c=.d) +ARGSDEPENDS = $(ARGSSOURCES:.c=.d) +LICEBIN = lice +TESTBIN = testsuite +ARGSBIN = argsgenerator + +all: $(LICEBIN) $(TESTBIN) + +$(LICEBIN): $(LICEOBJECTS) + $(CC) $(LDFLAGS) $(LICEOBJECTS) -o $@ + +$(TESTBIN): $(TESTOBJECTS) + $(CC) $(LDFLAGS) $(TESTOBJECTS) -o $@ + +$(ARGSBIN): $(ARGSOBJECTS) + $(CC) $(LDFLAGS) $(ARGSOBJECTS) -o $@ + +c.o: + $(CC) -c $(CFLAGS) $< -o $@ + +args: $(ARGSBIN) + @./$(ARGSBIN) + +clean: + rm -f $(LICEOBJECTS) + rm -f $(TESTOBJECTS) + rm -f $(LICEDEPENDS) + rm -f $(TESTDEPENDS) + rm -f $(ARGSDEPENDS) + rm -f $(LICEBIN) + rm -f $(TESTBIN) + rm -f $(ARGSBIN) + +test: $(LICEBIN) $(TESTBIN) + @./$(TESTBIN) diff --git a/README.md b/README.md new file mode 100644 index 0000000..73240f2 --- /dev/null +++ b/README.md @@ -0,0 +1,92 @@ +### Epilogue +LICE is a work in progress C99 compiler designed as a solution for +teaching myself and others about the internals of the C programming +language, how to write a compiler for it, and code generation. + +Part of the philosophy behind LICE is to provide a public domain +implementation of a working conformant C99 compiler. As well as borrowing +extensions and ideas from existing compilers to ensure a wider range of +support. + +### Status +See the STANDARDS file for the status on which standards LICE supports. + +### Prologue +If you don't find yourself needing any of the stuff which is marked as being +unsupported above then you may find that LICE will happily compile your +source into x86-64 assembly. The code generation is close from optimal. +LICE treats the entire system as a giant stack machine, since it's easier +to generate code that way. The problem is it's hardly efficent. All local +variables are assigned on the stack for operations. All operations operate +from the stack and write back the result to the stack location that is +the destination operand for that operation. + +### Porting +LICE should be farily straightforward to retarget for a specific architecture +or ABI. Start by making a copy of `arch_dummy.h`, naming it `arch_yourarch.h` +create an entry in `lice.h` for that header guarded by conditional include. +Supply that as a default option in the Makefile, remove `gen_amd64.c` from +the Makefile, add your own to it. Then write the code generator. Documentation +may be found in `gen.h`. + + +### Future Endeavors +- Full C90 support + +- Full C99 support + +- Full C11 support + +- Preprocessor + +- Intermediate stage with optimizations (libfirm?) + +- Code generation (directly to elf/coff, et. all) + +- Support for x86, ARM, PPC + +### Sources +The following sources where used in the construction of LICE + +- Aho, Alfred V., and Alfred V. Aho. Compilers: Principles, Techniques, & Tools. Boston: Pearson/Addison Wesley, 2007. Print. + http://www.amazon.ca/Compilers-Principles-Techniques-Alfred-Aho/dp/0201100886 + +- Degener, Jutta. "ANSI C Grammar, Lex Specification." ANSI C Grammar (Lex). N.p., 1995. Web. + http://www.lysator.liu.se/c/ANSI-C-grammar-l.html + +- Matz, Michael, Jan Hubicka, Andreas Jaeger, and Mark Mitchell. "System V Application Binary Interface AMD64 Architecture Processor Supplement." N.p., 07 Oct. 2013. Print. + http://www.x86-64.org/documentation/abi.pdf + +- Kahan, W., Prof. "IEEE Standard 754 for Binary Floating-Point Arithmetic." N.p., 1 Oct. 1997. Print. + http://www.eecs.berkeley.edu/~wkahan/ieee754status/IEEE754.PDF + +- Crenshaw, Jack. "Let's Build a Compiler." I.E.C.C., 1995. Web. + http://compilers.iecc.com/crenshaw/ + +- "C99 Final Draft." ISO/IEC, 06 May 2006. Print. + http://www.open-std.org/jtc1/sc22/wg14/www/docs/n1124.pdf + +- "C11 Final Draft." ISO/IEC, 12 April 2011. Print. + http://www.open-std.org/jtc1/sc22/wg14/www/docs/n1570.pdf + +- "Instruction Set Reference, A-Z." Intel 64 and IA-32 Architectures Software Developer's Manual. Vol. 2. [Calif.?]: Intel, 2013. N. pag. Print. + http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf + +- Bendersky, Eli. "Complete C99 parser in pure Python." pycparser. N.p., N.d. Web. + https://github.com/eliben/pycparser + +### Inspiration +The following projects were seen as inspiration in the construciton of +LICE. + +- SubC + http://www.t3x.org/subc/ + +- TCC + http://bellard.org/tcc/ + +- lcc + https://sites.google.com/site/lccretargetablecompiler/ + +- Kaleidoscope + http://llvm.org/docs/tutorial/index.html diff --git a/STANDARD b/STANDARD new file mode 100644 index 0000000..5ff511b --- /dev/null +++ b/STANDARD @@ -0,0 +1,30 @@ +The status of language standards in LICE. + +C11 status: + Things supported: + Anonymous structures and unions + Static assertions (_Static_assert) + Things unsupported: + Alignment specification (_Alignas) + _Noreturn function specifier + Type-Generic expressions (_Generic) + +C99 status: + Fully supported with exception to the following things: + - Veriable length arrays + +C90 status: + Fully supported with exception to the following things: + - String concatenation of adjacent strings + - May include C99 feautures by accident (allowing invalid C90 code to compile) +K&R status: + Fully supported with exception to the following things: + - The K&R function definition syntax + +LICE also supports these additional types of C: + +GNUC status: + Small subset of common things supported, see EXTENSIONS for a list. + +LICEC status: + LICEC is C11 with GNUC extensions. diff --git a/arch_amd64.h b/arch_amd64.h new file mode 100644 index 0000000..a770cbe --- /dev/null +++ b/arch_amd64.h @@ -0,0 +1,42 @@ +#ifndef LICE_ARCH_AMD64_HDR +/* + * File: arch_amd64.h + * Isolates AMD64 / SystemV ABI specific details that are used in + * a variety of places of the compiler to target AMD64. + */ + +/* + * Constants: Native type sizes + * + * The following are macros which describe the sizes of various native + * data types, they should reflect their true sizes on the given + * architecture unless mentioned otherwise by a specific ABI. + * + * ARCH_TYPE_SIZE_CHAR - Size of a char + * ARCH_TYPE_SIZE_LONG - Size of a long + * ARCH_TYPE_SIZE_LLONG - Size of a long long + * ARCH_TYPE_SIZE_INT - Size of a int + * ARCH_TYPE_SIZE_SHORT - Size of a short + * ARCH_TYPE_SIZE_FLOAT - Size of a float + * ARCH_TYPE_SIZE_DOUBLE - Size of a double + * ARCH_TYPE_SIZE_LDOUBLE - Size of a long double + * ARCH_TYPE_SIZE_POINTER - Size of a pointer + */ +#define ARCH_TYPE_SIZE_CHAR 1 +#define ARCH_TYPE_SIZE_LONG 8 +#define ARCH_TYPE_SIZE_LLONG 8 +#define ARCH_TYPE_SIZE_INT 4 +#define ARCH_TYPE_SIZE_SHORT 2 +#define ARCH_TYPE_SIZE_FLOAT 4 +#define ARCH_TYPE_SIZE_DOUBLE 8 +#define ARCH_TYPE_SIZE_LDOUBLE 8 +#define ARCH_TYPE_SIZE_POINTER 8 + +/* + * Macro: ARCH_ALIGNMENT + * The default alignment of structure elements (padding) for the given + * architecture / ABI + */ +#define ARCH_ALIGNMENT 16 + +#endif diff --git a/arch_dummy.h b/arch_dummy.h new file mode 100644 index 0000000..3cb6935 --- /dev/null +++ b/arch_dummy.h @@ -0,0 +1,41 @@ +#ifndef LICE_ARCH_DUMMY_HDR +/* + * File: arch_dummy.h + * Stubded dummy architecture useful as a template for retargeting + * LICE. + */ + +/* + * Constants: Native type sizes + * + * The following are macros which describe the sizes of various native + * data types, they should reflect their true sizes on the given + * architecture unless mentioned otherwise by a specific ABI. + * + * ARCH_TYPE_SIZE_CHAR - Size of a char + * ARCH_TYPE_SIZE_LONG - Size of a long + * ARCH_TYPE_SIZE_LLONG - Size of a long long + * ARCH_TYPE_SIZE_INT - Size of a int + * ARCH_TYPE_SIZE_SHORT - Size of a short + * ARCH_TYPE_SIZE_FLOAT - Size of a float + * ARCH_TYPE_SIZE_DOUBLE - Size of a double + * ARCH_TYPE_SIZE_LDOUBLE - Size of a long double + * ARCH_TYPE_SIZE_POINTER - Size of a pointer + */ +#define ARCH_TYPE_SIZE_CHAR -1 +#define ARCH_TYPE_SIZE_LONG -1 +#define ARCH_TYPE_SIZE_LLONG -1 +#define ARCH_TYPE_SIZE_INT -1 +#define ARCH_TYPE_SIZE_SHORT -1 +#define ARCH_TYPE_SIZE_FLOAT -1 +#define ARCH_TYPE_SIZE_DOUBLE -1 +#define ARCH_TYPE_SIZE_LDOUBLE -1 +#define ARCH_TYPE_SIZE_POINTER -1 + +/* + * Macro: ARCH_ALIGNMENT + * The default alignment of structure elements (padding) for the given + * architecture / ABI + */ +#define ARCH_ALIGNMENT -1 +#endif diff --git a/ast.h b/ast.h new file mode 100644 index 0000000..e9fc610 --- /dev/null +++ b/ast.h @@ -0,0 +1,814 @@ +#ifndef LICE_AST_HDR +#define LICE_AST_HDR +#include "util.h" + +/* + * File: ast.h + * Implements the interface to LICE's abstract syntax tree + */ +typedef struct ast_s ast_t; + +/* + * Type: ast_type_t + * The type of ast node + * + * Constants: + * + * AST_TYPE_LITERAL - Literal + * AST_TYPE_STRING - String literal + * AST_TYPE_VAR_LOCAL - Local variable + * AST_TYPE_VAR_GLOBAL - Global variable + * AST_TYPE_CALL - Direct function call + * AST_TYPE_POINTERCALL - Indirect function call + * AST_TYPE_FUNCTION - Function + * AST_TYPE_PROTOTYPE - Prototype + * AST_TYPE_DECLARATION - Declaration + * AST_TYPE_INITIALIZER - Initializer + * AST_TYPE_STRUCT - Structure + * AST_TYPE_ADDRESS - Address of operation + * AST_TYPE_DEREFERENCE - Pointer dereference + * AST_TYPE_EXPRESSION_TERNARY - Ternary expression + * AST_TYPE_EXPRESSION_CAST - Type cast expression + * AST_TYPE_STATEMENT_IF - If statement + * AST_TYPE_STATEMENT_FOR - For statement + * AST_TYPE_STATEMENT_WHILE - While statement + * AST_TYPE_STATEMENT_DO - Do statement + * AST_TYPE_STATEMENT_SWITCH - Switch statement + * AST_TYPE_STATEMENT_CASE - Switch statement case + * AST_TYPE_STATEMENT_DEFAULT - Switch statement default case + * AST_TYPE_STATEMENT_RETURN - Return statement + * AST_TYPE_STATEMENT_BREAK - Break statement + * AST_TYPE_STATEMENT_CONTINUE - Continue statement + * AST_TYPE_STATEMENT_COMPOUND - Compound statement + * AST_TYPE_STATEMENT_GOTO - Goto statement + * AST_TYPE_STATEMENT_LABEL - Goto statement label + * AST_TYPE_POST_INCREMENT - Post increment operation + * AST_TYPE_POST_DECREMENT - Post decrement operation + * AST_TYPE_PRE_INCREMENT - Pre increment operation + * AST_TYPE_PRE_DECREMENT - Pre decrement operation + * AST_TYPE_LSHIFT - Left shift operation + * AST_TYPE_RSHIFT - Right shift operation + * AST_TYPE_LRSHIFT - Logical right shift operation + * AST_TYPE_EQUAL - Equality condition + * AST_TYPE_GEQUAL - Greater-or-equal condition + * AST_TYPE_LEQUAL - Less-or-equal condition + * AST_TYPE_NEQUAL - Not-equal condition + * AST_TYPE_NEGATE - Negation + * AST_TYPE_AND - Logical-and operation + * AST_TYPE_OR - Logical-or operation + * AST_TYPE_NEGATE - Unary minus + * AST_TYPE_VA_START - __builtin_va_start + * AST_TYPE_VA_ARG - __builtin_va_arg + * AST_TYPE_DESIGNATOR - Designation on function types + * AST_TYPE_CONVERT - Type conversion + */ +typedef enum { + AST_TYPE_LITERAL = 0x100, + AST_TYPE_STRING, + AST_TYPE_VAR_LOCAL, + AST_TYPE_VAR_GLOBAL, + AST_TYPE_CALL, + AST_TYPE_POINTERCALL, + AST_TYPE_FUNCTION, + AST_TYPE_PROTOTYPE, + AST_TYPE_DECLARATION, + AST_TYPE_INITIALIZER, + AST_TYPE_STRUCT, + AST_TYPE_ADDRESS, + AST_TYPE_DEREFERENCE, + AST_TYPE_EXPRESSION_TERNARY, + AST_TYPE_EXPRESSION_CAST, + AST_TYPE_STATEMENT_IF, + AST_TYPE_STATEMENT_FOR, + AST_TYPE_STATEMENT_WHILE, + AST_TYPE_STATEMENT_DO, + AST_TYPE_STATEMENT_SWITCH, + AST_TYPE_STATEMENT_CASE, + AST_TYPE_STATEMENT_DEFAULT, + AST_TYPE_STATEMENT_RETURN, + AST_TYPE_STATEMENT_BREAK, + AST_TYPE_STATEMENT_CONTINUE, + AST_TYPE_STATEMENT_COMPOUND, + AST_TYPE_STATEMENT_GOTO, + AST_TYPE_STATEMENT_LABEL, + AST_TYPE_STATEMENT_GOTO_COMPUTED, + AST_TYPE_STATEMENT_LABEL_COMPUTED, + AST_TYPE_POST_INCREMENT, + AST_TYPE_POST_DECREMENT, + AST_TYPE_PRE_INCREMENT, + AST_TYPE_PRE_DECREMENT, + AST_TYPE_LSHIFT, + AST_TYPE_RSHIFT, + AST_TYPE_LRSHIFT, + AST_TYPE_EQUAL, + AST_TYPE_GEQUAL, + AST_TYPE_LEQUAL, + AST_TYPE_NEQUAL, + AST_TYPE_NEGATE, + AST_TYPE_AND, + AST_TYPE_OR, + AST_TYPE_VA_START, + AST_TYPE_VA_ARG, + AST_TYPE_DESIGNATOR, + AST_TYPE_CONVERT, +} ast_type_t; + + +/* + * Type: type_t + * Type describing the ast type. + * + * Constants: + * + * TYPE_VOID - void + * TYPE_BOOL - boolean + * TYPE_CHAR - char + * TYPE_SHORT - short + * TYPE_INT - int + * TYPE_LONG - long + * TYPE_LLONG - long long + * TYPE_DOUBLE - double + * TYPE_LDOUBLE - long double + * TYPE_ARRAY - array (also contains a type_t for base type) + * TYPE_POINTER - pointer (also contains a type_t for base type) + * TYPE_STRUCTURE - structure (user defined) + * TYPE_FUNCTION - function (user defined) + * TYPE_CDECL - used by the parser for dealing with declarations + */ +typedef enum { + TYPE_VOID, + TYPE_BOOL, + TYPE_CHAR, + TYPE_SHORT, + TYPE_INT, + TYPE_LONG, + TYPE_LLONG, + TYPE_FLOAT, + TYPE_DOUBLE, + TYPE_LDOUBLE, + TYPE_ARRAY, + TYPE_POINTER, + TYPE_STRUCTURE, + TYPE_FUNCTION, + TYPE_CDECL +} type_t; + +/* + * Type: ast_data_type_t + * Type describing the indice into `ast_data_table` + * + * Constants: + * + * AST_DATA_VOID - void + * AST_DATA_BOOL - boolean + * AST_DATA_LONG - long + * AST_DATA_LLONG - long long + * AST_DATA_INT - int + * AST_DATA_SHORT - short + * AST_DATA_CHAR - char + * AST_DATA_FLOAT - float + * AST_DATA_DOUBLE - double + * AST_DATA_LDOUBLE - long double + * AST_DATA_ULONG - unsigned long + * AST_DATA_ULLONG - unsigned long long + * AST_DATA_FUNCTION - function (current) + */ +typedef enum { + AST_DATA_VOID, + AST_DATA_BOOL, + AST_DATA_LONG, + AST_DATA_LLONG, + AST_DATA_INT, + AST_DATA_SHORT, + AST_DATA_CHAR, + AST_DATA_FLOAT, + AST_DATA_DOUBLE, + AST_DATA_LDOUBLE, + AST_DATA_ULONG, + AST_DATA_ULLONG, + AST_DATA_FUNCTION, + AST_DATA_COUNT +} ast_data_type_t; + +/* + * Type: cdecl_t + * Describes type of declarations + * + * Constants: + * + * CDECL_BODY - function body + * CDECL_PARAMETER - parameters (with name) + * CDECL_TYPEONLY - parameters (without name) + * CDECL_CAST - cast + */ +typedef enum { + CDECL_BODY = 1, + CDECL_PARAMETER, + CDECL_TYPEONLY, + CDECL_CAST +} cdecl_t; + +/* + * Type: storage_t + * Describes the storage class for a given variable + * + * Constants: + * + * STORAGE_TYPEDEF - typedef to another type + * STORAGE_EXTERN - external linkage + * STORAGE_STATIC - static storage + * STORAGE_AUTO - automatic storage (implicit) + * STORAGE_REGISTER - make use of register for storage + */ +typedef enum { + STORAGE_TYPEDEF = 1, + STORAGE_EXTERN, + STORAGE_STATIC, + STORAGE_AUTO, + STORAGE_REGISTER +} storage_t; + +/* + * Struct: data_type_t + * A structure that describes a data type. + */ +typedef struct data_type_s data_type_t; +struct data_type_s { + /* + * Variable: type + * The type of the data type. + * + * See Constants for a list of + * valid constant values. + */ + type_t type; + + /* + * Variable: size + * The size of the given data type, this is often the value + * provided for the given type in arch_[target].h + */ + int size; + + /* + * Variable: sign + * Describes if the type is signed or unsigned. + */ + bool sign; + + /* + * Variable: isstatic + * Describes if the type is static. + */ + bool isstatic; + + /* + * Variable: length + * Instances of the data type. + * + * When used as a base-type, i.e not an array; this will be + * 1, otherwise it will be the length of the array, or -1 + * if the size of the array is unknown. + */ + int length; + + /* + * Variable: pointer + * When the variable is a pointer type, this will point to another + * data type that describes the base type of the pointer, NULL other- + * wise. + */ + data_type_t *pointer; + + /* structure */ + struct { + /* + * Variable: fields + * Pointer to a table of fields (if structure) + */ + table_t *fields; + + /* + * Variable: offset + * Offset of the given field in a structure (if a structure base type) + */ + int offset; + + /* + * Variable: isstruct + * If we're dealing with a structure this will be true, false + * otherwise. + */ + bool isstruct; + + /* bitfields */ + struct { + int offset; + int size; + } bitfield; + + }; + + /* function */ + struct { + /* + * Variable: returntype + * Pointer to a data type which describes the return type + * of the function (if a function) + */ + data_type_t *returntype; + + /* + * Variable: parameters + * Pointer to a list of parameters for a function. + */ + list_t *parameters; + + /* + * Variable: hasdots + * Describes if the given function is variable-argument. + * + * Contains the value `true` when the function has + * three dots `...` in it's prototype, otherwise `false`. + */ + bool hasdots; + }; +}; + +/* + * Struct: ast_string_t + * The *AST_TYPE_STRING* ast node. + */ +typedef struct { + /* + * Variable: data + * String contents + */ + char *data; + + /* + * Variable: label + * Name of the label associated with the string. + */ + char *label; +} ast_string_t; + +/* + * Struct: ast_variable_t + * The *AST_TYPE_VAR_LOCAL* and *AST_TYPE_VAR_GLOBAL* ast node. + */ +typedef struct { + /* + * Variable: name + * Name of the variable + */ + char *name; + + /* + * Variable: off + * Offset of the variable on the stack. + */ + int off; + + /* + * Variable: label + * Name of the label associated with the variable. + */ + char *label; + + /* + * Variable: init + * Compound literal list for initialization + */ + list_t *init; +} ast_variable_t; + +/* + * Struct ast_function_call_t + * Function call + * + * Remarks: + * Not associated with any node. Instead describes the + * data associated with a function call for *ast_function_t* + */ +typedef struct { + /* + * Variable: args + * Pointer to a list of arguments for a function call + */ + list_t *args; + + /* + * Variable: type + * The basetype of the function that is being called. This type + * is the 'function type', ie. accessing the returntype member of + * this will return the FUNCTIONS return type. + */ + data_type_t *type; + + /* + * Variable: functionpointer + * Pointer to function for function pointer call. + * + * Remarks: + * This only associates with AST_TYPE_POINTERCALL, in which this + * node is filled with the function in which to call by indirection + * (address). + */ + ast_t *functionpointer; +} ast_function_call_t; + +/* + * Struct: ast_function_t + * The *AST_TYPE_FUNCTION* ast node. + */ +typedef struct { + /* + * Variable: name + * The function name + */ + char *name; + + /* + * Variable: call + * Data associated with a function call. + */ + ast_function_call_t call; + + /* + * Variable: params + * Pointer to a list of parameters. + */ + list_t *params; + + /* + * Variable: locals + * Pointer to a list of locals. + */ + list_t *locals; + + /* + * Variable: body + * Pointer to an ast node which describes the body. + * + * Remarks: + * A body is usually composed of a serise of ast nodes, + * typically a compound expression, but could also contain + * nested compound expressions. Think of this as a pointer + * to the head of the beginning of a serise of basic-blocks + * which are the forming of the function body. + */ + ast_t *body; +} ast_function_t; + +/* + * Struct: ast_unary_t + * Represents a unary operation in the AST tree + */ +typedef struct { + /* + * Variable: operand + * Pointer to the operand the unary operation is to + * be performed on. + */ + ast_t *operand; +} ast_unary_t; + +/* + * Struct: ast_decl_t + * Represents a declaration in the AST tree + */ +typedef struct { + /* + * Variable: var + * Pointer to the variable node associated with the + * declaration. + */ + ast_t *var; + + /* + * Variable: init + * When the declaration includes an initialization this points + * to a initlization list. + */ + list_t *init; +} ast_decl_t; + +/* + * Struct: ast_ifthan_t + * Represents a if-than node in the AST tree. + * + * Remarks: + * Describes a two-branch gaurded by conditional test node in the AST + * tree for implementing ternary expressions and if statements. + */ +typedef struct { + /* + * Variable: cond + * The condition node + */ + ast_t *cond; + + /* + * Variable: then + * Basic block for truth path in branch + */ + ast_t *then; + + /* + * Variable: last + * Basic block for false path in branch + */ + ast_t *last; +} ast_ifthan_t; + +/* + * Struct: ast_for_t + * Represents a for-loop node in the AST tree. + * + * Remarks: + * Standard for loop with precondition / initilization expression, + * conditionally testsed, and post step / expression, ergo + * for(init; cond; step) body; + */ +typedef struct { + /* Variable: init */ + ast_t *init; + /* Variable: cond */ + ast_t *cond; + /* Variable: step */ + ast_t *step; + /* Variable: body */ + ast_t *body; +} ast_for_t; + + +/* + * Struct: ast_init_t + * Represents an initializer in the AST tree. + * + * Remarks: + * Represents array initializer lists, as well as aggregate initializer + * lists for structure, enum and union. Also represents a designated + * initializer for a structure. + */ +typedef struct { + /* Variable: value */ + ast_t *value; + + /* Variable: offset */ + int offset; + + /* Variable: type */ + data_type_t *type; +} ast_init_t; + +/* + * Struct: ast_switch_t + * Represents a switch statement in the AST tree. + */ +typedef struct { + /* Variable: expr */ + ast_t *expr; + /* Variable: body */ + ast_t *body; +} ast_switch_t; + +/* + * Struct: ast_goto_t + * Represents a goto statement (or label) in the AST tree. + */ +typedef struct { + /* + * Variable: label + * When not used as a goto statement, describes the name of a label + * that may be 'gone to' with 'goto' + */ + char *label; + + /* + * Variable: where + * Where to go (label wise) for a goto statement. + */ + char *where; +} ast_goto_t; + +/* + * Struct: ast_t + * The monolthic ast tree and node at the same time. + * + * Remarks: + * The ast tree is just a doubly-linked list of ast nodes which are + * capable of being all the possible ast nodes at once. This is + * acomplished with a rather large union of all ast nodes. The only + * thing that declares what a node actually is, is the nodes type + * member. This is beneficial to keeping the complexity of the AST + * tree down, while also keeping memory footprint low. One more + * interesting aspect of this is the ability to have the AST tree + * nodes (next, prev), which make up the doubly-linked list part + * of the same union, giving us a free way to terminate the tree + * without using additional space to determine it. + */ +struct ast_s { + int type; + data_type_t *ctype; + + union { + struct { + int casebeg; + int caseend; + }; + + long integer; + char character; + ast_string_t string; + ast_variable_t variable; + ast_function_t function; + ast_unary_t unary; + ast_decl_t decl; + ast_ifthan_t ifstmt; + ast_for_t forstmt; + ast_switch_t switchstmt; + ast_t *returnstmt; + list_t *compound; + ast_init_t init; + ast_goto_t gotostmt; + ast_t *ap; + + struct { + ast_t *left; + ast_t *right; + }; + + struct { + ast_t *structure; + char *field; + data_type_t *fieldtype; + }; + + struct { + double value; + char *label; + } floating; + }; +}; + +extern data_type_t *ast_data_table[AST_DATA_COUNT]; + +extern list_t *ast_locals; +extern list_t *ast_gotos; +extern table_t *ast_globalenv; +extern table_t *ast_localenv; +extern table_t *ast_structures; +extern table_t *ast_unions; +extern table_t *ast_labels; + +/* + * Function: ast_structure_reference + * Creates an structure reference of a given type for a given field + * + * Parameters: + * type - The type of the field for reference + * structure - The structure that contains said field to be referenced + * name - The name of the field in that structure to reference + * + * Returns: + * An ast node referencing that field in that paticular structure on + * success, otherwise NULL. + */ +ast_t *ast_structure_reference(data_type_t *type, ast_t *structure, char *name); + +/* + * Function: ast_structure_new + * Creates a structure data type + * + * Parameters; + * field - A table of data_type_t fields for the structure + * size - The size of the structure + * isstruct - true if structure, false if structure-like + * + * Returns: + * A new structure data type with the specified fields and size on + * success, NULL otherwise. + */ +data_type_t *ast_structure_new(table_t *fields, int size, bool isstruct); + + +ast_t *ast_new_unary(int type, data_type_t *data, ast_t *operand); +ast_t *ast_new_binary(data_type_t *ctype, int type, ast_t *left, ast_t *right); +ast_t *ast_new_integer(data_type_t *type, int value); +ast_t *ast_new_floating(data_type_t *, double value); +ast_t *ast_new_char(char value); +ast_t *ast_new_string(char *value); +ast_t *ast_new_label(char *); + +char *ast_label(void); + +ast_t *ast_declaration(ast_t *var, list_t *init); +ast_t *ast_variable_local(data_type_t *type, char *name); +ast_t *ast_variable_global(data_type_t *type, char *name); +ast_t *ast_call(data_type_t *type, char *name, list_t *args); +ast_t *ast_pointercall(ast_t *ast, list_t *args); +ast_t *ast_function(data_type_t *type, char *name, list_t *params, ast_t *body, list_t *locals); +ast_t *ast_initializer(ast_t *, data_type_t *, int); +ast_t *ast_if(ast_t *cond, ast_t *then, ast_t *last); +ast_t *ast_for(ast_t *init, ast_t *cond, ast_t *step, ast_t *body); +ast_t *ast_while(ast_t *cond, ast_t *body); +ast_t *ast_do(ast_t *cond, ast_t *body); +ast_t *ast_return(ast_t *val); +ast_t *ast_compound(list_t *statements); +ast_t *ast_ternary(data_type_t *type, ast_t *cond, ast_t *then, ast_t *last); +ast_t *ast_switch(ast_t *expr, ast_t *body); +ast_t *ast_case(int begin, int end); +ast_t *ast_goto(char *); +ast_t *ast_goto_computed(ast_t *expression); +ast_t *ast_label_address(char *); +ast_t *ast_make(int type); + +ast_t *ast_va_start(ast_t *); +ast_t *ast_va_arg(data_type_t *, ast_t*); + +ast_t *ast_designator(char *name, ast_t *func); + +data_type_t *ast_prototype(data_type_t *returntype, list_t *paramtypes, bool dots); +data_type_t *ast_pointer(data_type_t *type); +data_type_t *ast_array(data_type_t *type, int size); +data_type_t *ast_array_convert(data_type_t *ast); +data_type_t *ast_result_type(int operation, data_type_t *); + +ast_t *ast_designator_convert(ast_t *ast); + +bool ast_struct_compare(data_type_t *a, data_type_t *b); + +/* + * Function: ast_type_string + * Get the type of a data_type_t as a string. + */ +const char *ast_type_string(data_type_t *type); + +/* + * Function: ast_type_isinteger + * Check if a given data type is an integral type. + * + * Parameters: + * type - pointer to the data_type_t object to check + * + * Returns: + * true if *type* is integral type, false otherwise. + * + * Remarks: + * Integral includes any of the following data types: + * - TYPE_CHAR + * - TYPE_INT + * - TYPE_SHORT + * - TYPE_LONG + * - TYPE_LLONG + */ +bool ast_type_isinteger(data_type_t *type); + +/* + * Function: ast_type_isfloating + * Check if a given data type is a floating-point one. + * + * Parameters: + * type - pointer to the data_type_t object to check + * + * Returns: + * true if *type* is floating point, false otherwise. + * + * Remarks: + * Floating-point includes any of the following data types + * - TYPE_FLOAT + * - TYPE_DOUBLE + * - TYPE_LDOUBLE + */ +bool ast_type_isfloating(data_type_t *type); + +/* + * Function: ast_type_isstring + * Check if a given data type is string + * + * Parameters: + * type - pointer to the data_type object to check + * + * Returns: + * trye if *type* is a string type, false otherwise. + * + * Remarks: + * string type is determined if it's an array and the base + * type of that array (when decayed to pointer type) is + * TYPE_CHAR + */ +bool ast_type_isstring(data_type_t *type); + +data_type_t *ast_type_copy(data_type_t *type); +data_type_t *ast_type_copy_incomplete(data_type_t *type); +data_type_t *ast_type_create(type_t type, bool sign); +data_type_t *ast_type_stub(void); + +ast_t *ast_type_convert(data_type_t *type, ast_t *ast); + + +char *ast_string(ast_t *ast); + +#endif -- 2.40.0