diff options
70 files changed, 8555 insertions, 0 deletions
diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..b5d63a1 --- /dev/null +++ b/Makefile @@ -0,0 +1,36 @@ +ifeq ($(mode), release) + CFLAGS = -O2 -Wall +else + CFLAGS = -g -Wall -Wextra -DCIBIC_DEBUG +endif + +all: cibic + +db: + gdb cibic + +cibic: lex.yy.o cibic.tab.o ast.o main.o semantics.o ssa.o mips.o + mkdir -p bin + gcc -o bin/cibic lex.yy.o cibic.tab.o ast.o main.o semantics.o ssa.o mips.o + cp bin/cibic cibic +lex.yy.o: lex.yy.c cibic.tab.c + gcc -c lex.yy.c +cibic.tab.o: cibic.tab.c + gcc -c cibic.tab.c +main.o: main.c +# gcc -c main.c -g -Wall -Wextra +ast.o: ast.c ast.h +# gcc -c ast.c -g -Wall -Wextra -DCIBIC_DEBUG +semantics.o: semantics.c semantics.h +# gcc -c semantics.c -g -Wall -Wextra -DCIBIC_DEBUG +ssa.o: ssa.c ssa.h +# gcc -c ssa.c -g -Wall -Wextra -DCIBIC_DEBUG +mips.o: mips.c mips.h +# gcc -c mips.c -g -Wall -Wextra -DCIBIC_DEBUG +lex.yy.c: cibic.l + flex cibic.l +cibic.tab.c: cibic.y + bison -d cibic.y + +clean: + rm -f cibic lex.yy.c cibic.tab.c cibic.tab.h *.o diff --git a/README.rst b/README.rst new file mode 100644 index 0000000..c2c3a05 --- /dev/null +++ b/README.rst @@ -0,0 +1,22 @@ +CIBIC: C Implemented Bare and Ingenuous Compiler +================================================= + +Build Requirements +------------------ +- flex >= 2.5.37 +- bison >= 2.4.3 +- gcc >= 4.7.3 + +Features +--------- +- Complex declaration support (``int (*a)[10]``, ``int (*f)()``, ``int (*g(int ***e[10]))()``, etc.) +- Complex cast support (``(int (*)())addr``, ``(int (*)[10])addr``, etc.) +- ``typedef`` support (together with complex decl) +- Forward declaration +- Real Single Static Assignment (with dominator frontier, renaming, interval + building and register allocation algorithm) +- Safe and conservative CSE (common subexpression elimination) +- Sophisticated semantic checking +- Small memory footprint +- Sophisticated error reporting +- User-friendly AST printing diff --git a/TODO.rst b/TODO.rst new file mode 100644 index 0000000..2d00ec7 --- /dev/null +++ b/TODO.rst @@ -0,0 +1,32 @@ +TODO +==== + +- More detailed checking in regex for: + + - char constant (done) + - string constant (done) + +- Fix: + + - check global definition (if type is complete) when semantic analysis finishes + - local function declaration is not in a local scope (external linkage issue) (will not be fixed) + - incomplete type issues + - function **definition** requires complete return type (function declaration does not) (done) + - array requires **complete** elem type (done) + - struct or union requires **complete** fields (done) + - pointer may **allow incomplete** type (done) + - calculate type memory footprint at proper time + - function to 'pointer to function' conversion (according the std 6.3.2/4) (done) + - vague var table management (done) + - incorrect address reference ``&`` (done) + - const function name (done) + - remove the redundant edge from blocks that break a loop + - ``<<`` operation in pre-calculation (done) + - support ``addui``, ``sll``, etc. (done) + +- Not Implemented: + + - complex type name (to be in agreement with complex decl) (done) + - initializer checking (done) + - typedef support (via adding mid-rule actions to bision to inform flex) (done) + - ``&&`` and ``||`` (done) @@ -0,0 +1,450 @@ +#include <stdlib.h> +#include <assert.h> +#include <stdio.h> +#include "ast.h" +#include "cibic.tab.h" +#define NEW_CNODE ((CNode *)malloc(sizeof(CNode))) + +CNode *ast_root; + +void cnode_reverse_chd(CNode *node) { + static CNode *chdn[MAX_CHDN]; + CNode *p; + int n = 0; + for (p = node->chd; p; p = p->next) + cnode_reverse_chd(p); + for (p = node->chd; p; p = p->next) + chdn[n++] = p; + if (n) + { + node->chd = chdn[--n]; + for (; n; n--) + chdn[n]->next = chdn[n - 1]; + chdn[0]->next = NULL; + } +} + +CNode *cnode_create_ast(CNode *wrapped) { + cnode_reverse_chd(wrapped); + return wrapped; +} + +CNode *cnode_create_nop(void) { + CNode *nop = NEW_CNODE; + nop->type = NOP; + nop->next = nop->chd = NULL; + return nop; +} + +CNode *cnode_create_general(int type, int subtype, int pnum, va_list ap) { + int i; + CNode *exp = NEW_CNODE; + exp->type = type; + exp->rec.subtype = subtype; + exp->next = exp->chd = NULL; + exp->ext.type = NULL; + exp->ext.offset = 0; + for (i = 0; i < pnum; i++) + { + CNode *subexp = va_arg(ap, CNode*); +#ifdef CIBIC_DEBUG + assert(subexp->next == NULL); +#endif + subexp->next = exp->chd; + exp->chd = subexp; + } + return exp; +} + +CNode *cnode_list_append(CNode *list, CNode *tail) { + if (list->type == NOP) + { + free(list); + return tail; + } + tail->next = list; + return tail; +} + +CNode *cnode_create_identifier(char *val) { + CNode *exp = NEW_CNODE; + exp->type = ID; + exp->chd = exp->next = NULL; + exp->rec.strval = val; + exp->ext.type = NULL; + exp->ext.offset = 0; + return exp; +} + +CNode *cnode_create_int_const(int val) { + /* TODO: overflow checking */ + CNode *exp = NEW_CNODE; + exp->type = INT; + exp->chd = exp->next = NULL; + exp->rec.intval = val; + exp->ext.type = NULL; + return exp; +} + +CNode *cnode_create_char_const(char *val) { + /* TODO: overflow checking */ + CNode *exp = NEW_CNODE; + exp->type = CHAR; + exp->chd = exp->next = NULL; + exp->rec.strval = val; + exp->ext.type = NULL; + return exp; +} + +CNode *cnode_create_str_const(char *val) { + CNode *exp = NEW_CNODE; + exp->type = STR; + exp->chd = exp->next = NULL; + exp->rec.strval = val; + exp->ext.type = NULL; + return exp; +} + +CNode *cnode_create_exp(int exp_type, int pnum, ...) { + CNode *ret; + va_list ap; + va_start(ap, pnum); + ret = cnode_create_general(EXP, exp_type, pnum, ap); + va_end(ap); + return ret; +} + +CNode *cnode_create_type_spec(int spec_type, int pnum, ...) { + CNode *ret; + va_list ap; + va_start(ap, pnum); + ret = cnode_create_general(TYPE_SPEC, spec_type, pnum, ap); + va_end(ap); + return ret; +} + +CNode *cnode_create_declr(int declr_type, int pnum, ...) { + CNode *ret; + va_list ap; + va_start(ap, pnum); + ret = cnode_create_general(DECLR, declr_type, pnum, ap); + va_end(ap); + return ret; +} + +CNode *cnode_create_stmt(int stmt_type, int pnum, ...) { + CNode *ret; + va_list ap; + va_start(ap, pnum); + ret = cnode_create_general(STMT, stmt_type, pnum, ap); + va_end(ap); + return ret; +} + +CNode *cnode_create_initr(int initr_type, CNode *body) { + CNode *initr = NEW_CNODE; + initr->type = INITR; + initr->rec.subtype = initr_type; + initr->chd = body; + initr->next = NULL; + initr->ext.type = NULL; + return initr; +} + +CNode *cnode_create_decl(CNode *type, CNode *init_declrs) { + CNode *decl = NEW_CNODE; +#ifdef CIBIC_DEBUG + assert(type->next == NULL); + assert(init_declrs->next == NULL); +#endif + decl->type = DECL; + decl->next = NULL; + decl->ext.type = NULL; + decl->chd = init_declrs; + init_declrs->next = type; + return decl; +} + +CNode *cnode_create_func(CNode *type, CNode *declr, CNode *stmt) { + CNode *func = NEW_CNODE; +#ifdef CIBIC_DEBUG + assert(type->next == NULL); + assert(declr->next == NULL); + assert(stmt->next == NULL); +#endif + func->type = FUNC_DEF; + func->next = NULL; + func->ext.type = NULL; + func->chd = stmt; + stmt->next = declr; + declr->next = type; + return func; +} + +CNode *cnode_create_init_declr(CNode *declr, CNode *initr) { + CNode *init_declr = NEW_CNODE; +#ifdef CIBIC_DEBUG + assert(declr->next == NULL); + assert(initr->next == NULL); +#endif + init_declr->type = INIT_DECLR; + init_declr->next = NULL; + init_declr->ext.type = NULL; + init_declr->chd = initr; + initr->next = declr; + return init_declr; +} + +CNode *cnode_create_struct_field(CNode *type_spec, CNode *declrs) { + CNode *field = NEW_CNODE; +#ifdef CIBIC_DEBUG + assert(type_spec->next == NULL); + assert(declrs->next == NULL); +#endif + field->type = FIELD; + field->next = NULL; + field->ext.type = NULL; + field->chd = declrs; + declrs->next = type_spec; + return field; +} + +CNode *cnode_create_plain_decl(CNode *type_spec, CNode *declr) { + CNode *pdecl = NEW_CNODE; +#ifdef CIBIC_DEBUG + assert(type_spec->next == NULL); + assert(declr->next == NULL); +#endif + pdecl->type = PLAIN_DECL; + pdecl->next = NULL; + pdecl->ext.type = NULL; + pdecl->chd = declr; + declr->next = type_spec; + return pdecl; +} + +CNode *cnode_create_typedef(CNode *type, CNode *declrs) { +#ifdef CIBIC_DEBUG + assert(type->next == NULL); + assert(declrs->next == NULL); +#endif + CNode *def = NEW_CNODE; + def->type = TYPEDEF; + def->next = NULL; + def->ext.type = NULL; + def->chd = declrs; + declrs->next = type; + return def; +} + +CNode *cnode_list_wrap(int type, CNode *list) { + CNode *wlist = NEW_CNODE; + wlist->type = type; + wlist->next = NULL; + wlist->ext.type = NULL; + wlist->chd = list; + return wlist; +} + +char *cnode_debug_type_repr(CNode *ast) { + static char buffer[MAX_DEBUG_PRINT_BUFF], + abuff[MAX_DEBUG_PRINT_BUFF]; + char *type, *aptr = abuff; + switch (ast->type) + { + case PROG: type = "prog"; break; + case FUNC_DEF: type = "func_def"; break; + case DECLS: type = "prg_decls"; break; + case FUNCS: type = "prg_funcs"; break; + case DECL: type = "decl"; break; + case INIT_DECLR: type = "init_declr"; break; + case PLAIN_DECL: type = "p_decl"; break; + case COMP_STMTS: type = "blk_stmts"; break; + case COMP_DECLS: type = "blk_decls"; break; + case DECLRS: type = "declrs"; break; + case INIT_DECLRS: type = "init_declrs"; break; + case ARGS: type = "args"; break; + case PARAMS: type = "params"; break; + case TYPEDEF: type = "typedef"; break; + case ID: + type = "id"; + aptr += sprintf(abuff, "%s", ast->rec.strval); + break; + case INT: + type = "int"; + aptr += sprintf(abuff, "%d", ast->rec.intval); + break; + case CHAR: + type = "char"; + aptr += sprintf(abuff, "%s", ast->rec.strval); + break; + case STR: + type = "str"; + aptr += sprintf(abuff, "\"%s\"", ast->rec.strval); + break; + case FIELD: type = "field"; break; + case FIELDS: type = "fields"; break; + case NOP: type = "nop"; break; + case EXP: + case INITR: + case TYPE_SPEC: + case STMT: + case DECLR: + type = NULL; break; + } + if (ast->type == EXP) + { + switch (ast->rec.subtype) + { + case ',': type = ","; break; + case '=': type = "="; break; + case ASS_MUL: type = "*="; break; + case ASS_DIV: type = "/="; break; + case ASS_MOD: type = "%="; break; + case ASS_ADD: type = "+="; break; + case ASS_SUB: type = "-="; break; + case ASS_SHL: type = "<<="; break; + case ASS_SHR: type = ">>="; break; + case ASS_AND: type = "&="; break; + case ASS_XOR: type = "^="; break; + case ASS_OR: type = "|="; break; + case OPT_OR: type = "||"; break; + case OPT_AND: type = "&&"; break; + case '|': type = "|"; break; + case '^': type = "^"; break; + case '&': type = "&"; break; + case OPT_EQ: type = "=="; break; + case OPT_NE: type = "!="; break; + case '<': type = "<"; break; + case '>': type = ">"; break; + case OPT_LE: type = "<="; break; + case OPT_GE: type = ">="; break; + case OPT_SHL: type = "<<"; break; + case OPT_SHR: type = ">>"; break; + case '+': type = "+"; break; + case '-': type = "-"; break; + case '*': type = "*"; break; + case '/': type = "/"; break; + case '%': type = "%"; break; + case EXP_CAST: type = "cast"; break; + case OPT_INC: type = "++"; break; + case OPT_DEC: type = "--"; break; + case '~': type = "~"; break; + case '!': type = "!"; break; + case KW_SIZEOF: type = "sizeof"; break; + case EXP_POSTFIX: type = "postfix"; break; + case POSTFIX_ARR: type = "arr"; break; + case POSTFIX_CALL: type = "call"; break; + case POSTFIX_DOT: type = "dot"; break; + case POSTFIX_PTR: type = "ptr"; break; + default: assert(0); + } + } + else if (ast->type == TYPE_SPEC) + { + switch (ast->rec.subtype) + { + case KW_VOID: type = "void"; break; + case KW_CHAR: type = "char"; break; + case KW_INT: type = "int"; break; + case KW_STRUCT: type = "struct"; break; + case KW_UNION: type = "union"; break; + case USER_TYPE: type = "user_type"; break; + default: assert(0); + } + } + else if (ast->type == DECLR) + { + switch (ast->rec.subtype) + { + case DECLR_FUNC: type = "func"; break; + case DECLR_ARR: type = "arr"; break; + case '*': type = "*"; break; + case 0: type = "typename"; break; + default: assert(0); + } + } + else if (ast->type == STMT) { switch (ast->rec.subtype) + { + case STMT_EXP: type = "exp"; break; + case STMT_COMP: type = "blk"; break; + case STMT_IF: type = "if"; break; + case STMT_WHILE: type = "while"; break; + case STMT_FOR: type = "for"; break; + case STMT_CONT: type = "cont"; break; + case STMT_BREAK: type = "break"; break; + case STMT_RET: type = "ret"; break; + default: assert(0); + } + } + else if (ast->type == INITR) + { + switch (ast->rec.subtype) + { + case INITR_NORM: type = "initr_n"; break; + case INITR_ARR: type = "initr_a"; break; + default: assert(0); + } + } + else + { + if (type == NULL) puts(""); + assert(type); + } + { + char *head = buffer; + head += sprintf(head, "%s", type); + if (aptr != abuff) + { + *aptr = '\0'; + head += sprintf(head, ":%s", abuff); + } + head += sprintf(head, "(%d:%d)", ast->loc.row, ast->loc.col); + if (ast->ext.type) + sprintf(head, "->(var:%lx type:%lx ic:%d cv:%ld off:%d)", + (size_t)ast->ext.var, (size_t)ast->ext.type, + ast->ext.is_const, ast->ext.const_val, ast->ext.offset); + } + return buffer; +} + +void cnode_debug_print_plain(CNode *ast) { |