diff options
70 files changed, 8555 insertions, 0 deletions
diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..b5d63a1 --- /dev/null +++ b/Makefile @@ -0,0 +1,36 @@ +ifeq ($(mode), release) + CFLAGS = -O2 -Wall +else + CFLAGS = -g -Wall -Wextra -DCIBIC_DEBUG +endif + +all: cibic + +db: + gdb cibic + +cibic: lex.yy.o cibic.tab.o ast.o main.o semantics.o ssa.o mips.o + mkdir -p bin + gcc -o bin/cibic lex.yy.o cibic.tab.o ast.o main.o semantics.o ssa.o mips.o + cp bin/cibic cibic +lex.yy.o: lex.yy.c cibic.tab.c + gcc -c lex.yy.c +cibic.tab.o: cibic.tab.c + gcc -c cibic.tab.c +main.o: main.c +# gcc -c main.c -g -Wall -Wextra +ast.o: ast.c ast.h +# gcc -c ast.c -g -Wall -Wextra -DCIBIC_DEBUG +semantics.o: semantics.c semantics.h +# gcc -c semantics.c -g -Wall -Wextra -DCIBIC_DEBUG +ssa.o: ssa.c ssa.h +# gcc -c ssa.c -g -Wall -Wextra -DCIBIC_DEBUG +mips.o: mips.c mips.h +# gcc -c mips.c -g -Wall -Wextra -DCIBIC_DEBUG +lex.yy.c: cibic.l + flex cibic.l +cibic.tab.c: cibic.y + bison -d cibic.y + +clean: + rm -f cibic lex.yy.c cibic.tab.c cibic.tab.h *.o diff --git a/README.rst b/README.rst new file mode 100644 index 0000000..c2c3a05 --- /dev/null +++ b/README.rst @@ -0,0 +1,22 @@ +CIBIC: C Implemented Bare and Ingenuous Compiler +================================================= + +Build Requirements +------------------ +- flex >= 2.5.37 +- bison >= 2.4.3 +- gcc >= 4.7.3 + +Features +--------- +- Complex declaration support (``int (*a)[10]``, ``int (*f)()``, ``int (*g(int ***e[10]))()``, etc.) +- Complex cast support (``(int (*)())addr``, ``(int (*)[10])addr``, etc.) +- ``typedef`` support (together with complex decl) +- Forward declaration +- Real Single Static Assignment (with dominator frontier, renaming, interval + building and register allocation algorithm) +- Safe and conservative CSE (common subexpression elimination) +- Sophisticated semantic checking +- Small memory footprint +- Sophisticated error reporting +- User-friendly AST printing diff --git a/TODO.rst b/TODO.rst new file mode 100644 index 0000000..2d00ec7 --- /dev/null +++ b/TODO.rst @@ -0,0 +1,32 @@ +TODO +==== + +- More detailed checking in regex for: + + - char constant (done) + - string constant (done) + +- Fix: + + - check global definition (if type is complete) when semantic analysis finishes + - local function declaration is not in a local scope (external linkage issue) (will not be fixed) + - incomplete type issues + - function **definition** requires complete return type (function declaration does not) (done) + - array requires **complete** elem type (done) + - struct or union requires **complete** fields (done) + - pointer may **allow incomplete** type (done) + - calculate type memory footprint at proper time + - function to 'pointer to function' conversion (according the std 6.3.2/4) (done) + - vague var table management (done) + - incorrect address reference ``&`` (done) + - const function name (done) + - remove the redundant edge from blocks that break a loop + - ``<<`` operation in pre-calculation (done) + - support ``addui``, ``sll``, etc. (done) + +- Not Implemented: + + - complex type name (to be in agreement with complex decl) (done) + - initializer checking (done) + - typedef support (via adding mid-rule actions to bision to inform flex) (done) + - ``&&`` and ``||`` (done) @@ -0,0 +1,450 @@ +#include <stdlib.h> +#include <assert.h> +#include <stdio.h> +#include "ast.h" +#include "cibic.tab.h" +#define NEW_CNODE ((CNode *)malloc(sizeof(CNode))) + +CNode *ast_root; + +void cnode_reverse_chd(CNode *node) { + static CNode *chdn[MAX_CHDN]; + CNode *p; + int n = 0; + for (p = node->chd; p; p = p->next) + cnode_reverse_chd(p); + for (p = node->chd; p; p = p->next) + chdn[n++] = p; + if (n) + { + node->chd = chdn[--n]; + for (; n; n--) + chdn[n]->next = chdn[n - 1]; + chdn[0]->next = NULL; + } +} + +CNode *cnode_create_ast(CNode *wrapped) { + cnode_reverse_chd(wrapped); + return wrapped; +} + +CNode *cnode_create_nop(void) { + CNode *nop = NEW_CNODE; + nop->type = NOP; + nop->next = nop->chd = NULL; + return nop; +} + +CNode *cnode_create_general(int type, int subtype, int pnum, va_list ap) { + int i; + CNode *exp = NEW_CNODE; + exp->type = type; + exp->rec.subtype = subtype; + exp->next = exp->chd = NULL; + exp->ext.type = NULL; + exp->ext.offset = 0; + for (i = 0; i < pnum; i++) + { + CNode *subexp = va_arg(ap, CNode*); +#ifdef CIBIC_DEBUG + assert(subexp->next == NULL); +#endif + subexp->next = exp->chd; + exp->chd = subexp; + } + return exp; +} + +CNode *cnode_list_append(CNode *list, CNode *tail) { + if (list->type == NOP) + { + free(list); + return tail; + } + tail->next = list; + return tail; +} + +CNode *cnode_create_identifier(char *val) { + CNode *exp = NEW_CNODE; + exp->type = ID; + exp->chd = exp->next = NULL; + exp->rec.strval = val; + exp->ext.type = NULL; + exp->ext.offset = 0; + return exp; +} + +CNode *cnode_create_int_const(int val) { + /* TODO: overflow checking */ + CNode *exp = NEW_CNODE; + exp->type = INT; + exp->chd = exp->next = NULL; + exp->rec.intval = val; + exp->ext.type = NULL; + return exp; +} + +CNode *cnode_create_char_const(char *val) { + /* TODO: overflow checking */ + CNode *exp = NEW_CNODE; + exp->type = CHAR; + exp->chd = exp->next = NULL; + exp->rec.strval = val; + exp->ext.type = NULL; + return exp; +} + +CNode *cnode_create_str_const(char *val) { + CNode *exp = NEW_CNODE; + exp->type = STR; + exp->chd = exp->next = NULL; + exp->rec.strval = val; + exp->ext.type = NULL; + return exp; +} + +CNode *cnode_create_exp(int exp_type, int pnum, ...) { + CNode *ret; + va_list ap; + va_start(ap, pnum); + ret = cnode_create_general(EXP, exp_type, pnum, ap); + va_end(ap); + return ret; +} + +CNode *cnode_create_type_spec(int spec_type, int pnum, ...) { + CNode *ret; + va_list ap; + va_start(ap, pnum); + ret = cnode_create_general(TYPE_SPEC, spec_type, pnum, ap); + va_end(ap); + return ret; +} + +CNode *cnode_create_declr(int declr_type, int pnum, ...) { + CNode *ret; + va_list ap; + va_start(ap, pnum); + ret = cnode_create_general(DECLR, declr_type, pnum, ap); + va_end(ap); + return ret; +} + +CNode *cnode_create_stmt(int stmt_type, int pnum, ...) { + CNode *ret; + va_list ap; + va_start(ap, pnum); + ret = cnode_create_general(STMT, stmt_type, pnum, ap); + va_end(ap); + return ret; +} + +CNode *cnode_create_initr(int initr_type, CNode *body) { + CNode *initr = NEW_CNODE; + initr->type = INITR; + initr->rec.subtype = initr_type; + initr->chd = body; + initr->next = NULL; + initr->ext.type = NULL; + return initr; +} + +CNode *cnode_create_decl(CNode *type, CNode *init_declrs) { + CNode *decl = NEW_CNODE; +#ifdef CIBIC_DEBUG + assert(type->next == NULL); + assert(init_declrs->next == NULL); +#endif + decl->type = DECL; + decl->next = NULL; + decl->ext.type = NULL; + decl->chd = init_declrs; + init_declrs->next = type; + return decl; +} + +CNode *cnode_create_func(CNode *type, CNode *declr, CNode *stmt) { + CNode *func = NEW_CNODE; +#ifdef CIBIC_DEBUG + assert(type->next == NULL); + assert(declr->next == NULL); + assert(stmt->next == NULL); +#endif + func->type = FUNC_DEF; + func->next = NULL; + func->ext.type = NULL; + func->chd = stmt; + stmt->next = declr; + declr->next = type; + return func; +} + +CNode *cnode_create_init_declr(CNode *declr, CNode *initr) { + CNode *init_declr = NEW_CNODE; +#ifdef CIBIC_DEBUG + assert(declr->next == NULL); + assert(initr->next == NULL); +#endif + init_declr->type = INIT_DECLR; + init_declr->next = NULL; + init_declr->ext.type = NULL; + init_declr->chd = initr; + initr->next = declr; + return init_declr; +} + +CNode *cnode_create_struct_field(CNode *type_spec, CNode *declrs) { + CNode *field = NEW_CNODE; +#ifdef CIBIC_DEBUG + assert(type_spec->next == NULL); + assert(declrs->next == NULL); +#endif + field->type = FIELD; + field->next = NULL; + field->ext.type = NULL; + field->chd = declrs; + declrs->next = type_spec; + return field; +} + +CNode *cnode_create_plain_decl(CNode *type_spec, CNode *declr) { + CNode *pdecl = NEW_CNODE; +#ifdef CIBIC_DEBUG + assert(type_spec->next == NULL); + assert(declr->next == NULL); +#endif + pdecl->type = PLAIN_DECL; + pdecl->next = NULL; + pdecl->ext.type = NULL; + pdecl->chd = declr; + declr->next = type_spec; + return pdecl; +} + +CNode *cnode_create_typedef(CNode *type, CNode *declrs) { +#ifdef CIBIC_DEBUG + assert(type->next == NULL); + assert(declrs->next == NULL); +#endif + CNode *def = NEW_CNODE; + def->type = TYPEDEF; + def->next = NULL; + def->ext.type = NULL; + def->chd = declrs; + declrs->next = type; + return def; +} + +CNode *cnode_list_wrap(int type, CNode *list) { + CNode *wlist = NEW_CNODE; + wlist->type = type; + wlist->next = NULL; + wlist->ext.type = NULL; + wlist->chd = list; + return wlist; +} + +char *cnode_debug_type_repr(CNode *ast) { + static char buffer[MAX_DEBUG_PRINT_BUFF], + abuff[MAX_DEBUG_PRINT_BUFF]; + char *type, *aptr = abuff; + switch (ast->type) + { + case PROG: type = "prog"; break; + case FUNC_DEF: type = "func_def"; break; + case DECLS: type = "prg_decls"; break; + case FUNCS: type = "prg_funcs"; break; + case DECL: type = "decl"; break; + case INIT_DECLR: type = "init_declr"; break; + case PLAIN_DECL: type = "p_decl"; break; + case COMP_STMTS: type = "blk_stmts"; break; + case COMP_DECLS: type = "blk_decls"; break; + case DECLRS: type = "declrs"; break; + case INIT_DECLRS: type = "init_declrs"; break; + case ARGS: type = "args"; break; + case PARAMS: type = "params"; break; + case TYPEDEF: type = "typedef"; break; + case ID: + type = "id"; + aptr += sprintf(abuff, "%s", ast->rec.strval); + break; + case INT: + type = "int"; + aptr += sprintf(abuff, "%d", ast->rec.intval); + break; + case CHAR: + type = "char"; + aptr += sprintf(abuff, "%s", ast->rec.strval); + break; + case STR: + type = "str"; + aptr += sprintf(abuff, "\"%s\"", ast->rec.strval); + break; + case FIELD: type = "field"; break; + case FIELDS: type = "fields"; break; + case NOP: type = "nop"; break; + case EXP: + case INITR: + case TYPE_SPEC: + case STMT: + case DECLR: + type = NULL; break; + } + if (ast->type == EXP) + { + switch (ast->rec.subtype) + { + case ',': type = ","; break; + case '=': type = "="; break; + case ASS_MUL: type = "*="; break; + case ASS_DIV: type = "/="; break; + case ASS_MOD: type = "%="; break; + case ASS_ADD: type = "+="; break; + case ASS_SUB: type = "-="; break; + case ASS_SHL: type = "<<="; break; + case ASS_SHR: type = ">>="; break; + case ASS_AND: type = "&="; break; + case ASS_XOR: type = "^="; break; + case ASS_OR: type = "|="; break; + case OPT_OR: type = "||"; break; + case OPT_AND: type = "&&"; break; + case '|': type = "|"; break; + case '^': type = "^"; break; + case '&': type = "&"; break; + case OPT_EQ: type = "=="; break; + case OPT_NE: type = "!="; break; + case '<': type = "<"; break; + case '>': type = ">"; break; + case OPT_LE: type = "<="; break; + case OPT_GE: type = ">="; break; + case OPT_SHL: type = "<<"; break; + case OPT_SHR: type = ">>"; break; + case '+': type = "+"; break; + case '-': type = "-"; break; + case '*': type = "*"; break; + case '/': type = "/"; break; + case '%': type = "%"; break; + case EXP_CAST: type = "cast"; break; + case OPT_INC: type = "++"; break; + case OPT_DEC: type = "--"; break; + case '~': type = "~"; break; + case '!': type = "!"; break; + case KW_SIZEOF: type = "sizeof"; break; + case EXP_POSTFIX: type = "postfix"; break; + case POSTFIX_ARR: type = "arr"; break; + case POSTFIX_CALL: type = "call"; break; + case POSTFIX_DOT: type = "dot"; break; + case POSTFIX_PTR: type = "ptr"; break; + default: assert(0); + } + } + else if (ast->type == TYPE_SPEC) + { + switch (ast->rec.subtype) + { + case KW_VOID: type = "void"; break; + case KW_CHAR: type = "char"; break; + case KW_INT: type = "int"; break; + case KW_STRUCT: type = "struct"; break; + case KW_UNION: type = "union"; break; + case USER_TYPE: type = "user_type"; break; + default: assert(0); + } + } + else if (ast->type == DECLR) + { + switch (ast->rec.subtype) + { + case DECLR_FUNC: type = "func"; break; + case DECLR_ARR: type = "arr"; break; + case '*': type = "*"; break; + case 0: type = "typename"; break; + default: assert(0); + } + } + else if (ast->type == STMT) { switch (ast->rec.subtype) + { + case STMT_EXP: type = "exp"; break; + case STMT_COMP: type = "blk"; break; + case STMT_IF: type = "if"; break; + case STMT_WHILE: type = "while"; break; + case STMT_FOR: type = "for"; break; + case STMT_CONT: type = "cont"; break; + case STMT_BREAK: type = "break"; break; + case STMT_RET: type = "ret"; break; + default: assert(0); + } + } + else if (ast->type == INITR) + { + switch (ast->rec.subtype) + { + case INITR_NORM: type = "initr_n"; break; + case INITR_ARR: type = "initr_a"; break; + default: assert(0); + } + } + else + { + if (type == NULL) puts(""); + assert(type); + } + { + char *head = buffer; + head += sprintf(head, "%s", type); + if (aptr != abuff) + { + *aptr = '\0'; + head += sprintf(head, ":%s", abuff); + } + head += sprintf(head, "(%d:%d)", ast->loc.row, ast->loc.col); + if (ast->ext.type) + sprintf(head, "->(var:%lx type:%lx ic:%d cv:%ld off:%d)", + (size_t)ast->ext.var, (size_t)ast->ext.type, + ast->ext.is_const, ast->ext.const_val, ast->ext.offset); + } + return buffer; +} + +void cnode_debug_print_plain(CNode *ast) { + fprintf(stderr, "(%s", cnode_debug_type_repr(ast)); + for (ast = ast->chd; ast; ast = ast->next) + { + fprintf(stderr, " "); + cnode_debug_print_plain(ast); + } + fprintf(stderr, ")"); +} + +void cnode_debug_print_fancy(CNode *ast, int lvl) { + static int show[MAX_DEBUG_PRINT_LVL]; + int i; + show[lvl] = 1; + for (i = 0; i < lvl - 1; i++) + fprintf(stderr, "%c ", show[i] ? '|' : ' '); + if (lvl) + fprintf(stderr, "|____"); + fprintf(stderr, "[%s]\n", cnode_debug_type_repr(ast)); + for (ast = ast->chd; ast; ast = ast->next) + { + if (!ast->next) show[lvl] = 0; + cnode_debug_print_fancy(ast, lvl + 1); + } +} + +void cnode_debug_print(CNode *ast, int fancy) { + if (fancy) + cnode_debug_print_fancy(ast, 0); + else + cnode_debug_print_plain(ast); + puts(""); +} + + +CNode *cnode_add_loc(CNode *node, YYLTYPE loc) { + node->loc.row = loc.first_line; + node->loc.col = loc.first_column; + return node; +} @@ -0,0 +1,93 @@ +#ifndef AST_H +#define AST_H +#include <stdarg.h> +#include "const.h" +#include "semantics.h" +#include "cibic.tab.h" + +typedef struct CNode { + enum { + /* Top Level */ + PROG, + FUNC_DEF, + DECL, /* declaration */ + DECLR, /* declarator */ + DECLRS, + INIT_DECLR, + INIT_DECLRS, + INITR, /* initializer */ + TYPE_SPEC, + FIELD, /* struct-or-union field */ + FIELDS, + PLAIN_DECL, + DECLS, + FUNCS, + TYPEDEF, + + /* Statments */ + STMT, + + /* Expressions */ + EXP, + ID, /* identifier */ + INT, /* INT_CONST */ + CHAR, + STR, + NOP, + + COMP_STMTS, + COMP_DECLS, + ARGS, + PARAMS + } type; + union { + int intval; + int subtype; + char *strval; + } rec; + struct { + CType_t type; + CVar_t var; + CVList_t autos; + long const_val; + int is_const; + int offset; /* offset from var */ + } ext; + struct CNode *chd, *next; + /* For error reporting */ + struct { + int row, col; + } loc; +} CNode; + +CNode *cnode_add_loc(CNode *node, YYLTYPE loc); +CNode *cnode_create_ast(CNode *wrapped); +CNode *cnode_create_nop(void); +CNode *cnode_create_general(int type, int subtype, int pnum, va_list ap); +CNode *cnode_list_append(CNode *list, CNode *tail); +CNode *cnode_list_wrap(int type, CNode *list); + +CNode *cnode_create_exp(int exp_type, int pnum, ...); +CNode *cnode_create_type_spec(int spec_type, int pnum, ...); +CNode *cnode_create_declr(int declr_type, int pnum, ...); +CNode *cnode_create_stmt(int stmt_type, int pnum, ...); +CNode *cnode_create_initr(int initr_type, CNode *body); + +CNode *cnode_create_decl(CNode *type, CNode *init_declrs); +CNode *cnode_create_func(CNode *type, CNode *declr, CNode *stmt); +CNode *cnode_create_init_declr(CNode *declr, CNode *initr); +CNode *cnode_create_struct_field(CNode *type_spec, CNode *declrs); +CNode *cnode_create_plain_decl(CNode *type_spec, CNode *declr); +CNode *cnode_create_typedef(CNode *type, CNode *declrs); + +CNode *cnode_create_identifier(char *val); +CNode *cnode_create_int_const(int val); +CNode *cnode_create_char_const(char *val); +CNode *cnode_create_str_const(char *val); + +void cnode_debug_print(CNode *ast, int fancy); + +extern CNode *ast_root; +extern CNode *null; + +#endif @@ -0,0 +1,141 @@ +%{ +#include "cibic.tab.h" +#include "const.h" +#include "semantics.h" +int yycolumn = 1; +char linebuff[MAX_LINEBUFF], *lptr = linebuff; + +#define YY_USER_ACTION \ + do { \ + yylloc.first_line = yylloc.last_line = yylineno; \ + yylloc.first_column = yycolumn; \ + yylloc.last_column = yycolumn + yyleng - 1; \ + yycolumn += yyleng; \ + memmove(lptr, yytext, yyleng); \ + lptr += yyleng; \ + } while (0); + +#define NEW_LINE_USER_ACTION \ + do { \ + yycolumn = 1; \ + lptr = linebuff; \ + } while (0) +%} + +letter [a-zA-Z_$] +digit [0-9] +string ((\\.|[^\n\"\\])*) +char ([^\n'\\]|\\.|\\[0-7]+|\\[xX][0-9a-fA-F]+) + +%s IN_BLOCK_COMMENT IN_INLINE_COMMENT IN_DIRECTIVE +%option yylineno +%% + + +<INITIAL>{ +"/*" BEGIN(IN_BLOCK_COMMENT); +} +<IN_BLOCK_COMMENT>{ +"*/" BEGIN(INITIAL); +[^*\n]+ // eat comment in chunks +"*" // eat the lone star +\n { NEW_LINE_USER_ACTION; } +} + +<INITIAL>{ +"//" BEGIN(IN_INLINE_COMMENT); +} +<IN_INLINE_COMMENT>{ +\n { NEW_LINE_USER_ACTION; BEGIN(INITIAL); } +[^\n]+ +} + +<INITIAL>{ +"#" BEGIN(IN_DIRECTIVE); +} +<IN_DIRECTIVE>{ +\n { NEW_LINE_USER_ACTION; BEGIN(INITIAL); } +[^\n]+ +} + +"void" { return KW_VOID; } +"char" { return KW_CHAR; } +"int" { return KW_INT; } +"struct" { return KW_STRUCT; } +"union" { return KW_UNION; } +"if" { return KW_IF; } +"else" { return KW_ELSE; } +"while" { return KW_WHILE; } +"for" { return KW_FOR; } +"continue" { return KW_CONT; } +"break" { return KW_BREAK; } +"return" { return KW_RET; } +"sizeof" { return KW_SIZEOF; } +"typedef" { return KW_TYPEDEF; } + +{letter}({letter}|{digit})* { + yylval.strval = strdup(yytext); + return is_identifier(yytext) ? IDENTIFIER : USER_TYPE; +} + +({digit}+)|(0[xX][0-9a-fA-F]+) { + if (*yytext == '0') + { + if (*(yytext + 1) == 'x' || *(yytext + 1) == 'X') + sscanf(yytext, "%x", &yylval.intval); + else // FIXME: error report if it is not a valid octal + sscanf(yytext, "%o", &yylval.intval); + } + else yylval.intval = atoi(yytext); + return INT_CONST; +} + +'{char}' { + yylval.strval = strndup(yytext + 1, strlen(yytext) - 2); + return CHAR_CONST; +} + +'{char}? { + yyerror("missing terminating ' character\n"); + exit(1); +} + +\"{string}\" { + yylval.strval = strndup(yytext + 1, strlen(yytext) - 2); + return STR_CONST; +} + +\"{string}? { + yyerror("missing terminating \" character\n"); + exit(1); +} + +"||" { return OPT_OR; } +"&&" { return OPT_AND; } +"==" { return OPT_EQ; } +"!=" { return OPT_NE; } +"<=" { return OPT_LE; } +">=" { return OPT_GE; } +"<<" { return OPT_SHL; } +">>" { return OPT_SHR; } +"++" { return OPT_INC; } +"--" { return OPT_DEC; } +"->" { return OPT_PTR; } + +"*=" { return ASS_MUL; } +"/=" { return ASS_DIV; } +"%=" { return ASS_MOD; } +"+=" { return ASS_ADD; } +"-=" { return ASS_SUB; } +"<<=" { return ASS_SHL; } +">>=" { return ASS_SHR; } +"&=" { return ASS_AND; } +"^=" { return ASS_XOR; } +"|=" { return ASS_OR; } + +[();,={}\[\]*|\^&<>+\-*//%~!.] { return *yytext; } + +[ \t\r] /* skip whitespaces */ +\n { NEW_LINE_USER_ACTION; } +. { return UNKNOWN; } +%% diff --git a/cibic.tex b/cibic.tex new file mode 100644 index 0000000..9324b9d --- /dev/null +++ b/cibic.tex @@ -0,0 +1,328 @@ +\documentclass[10pt, a4paper]{article} + +\usepackage{xeCJK, fontspec} +\usepackage{amsmath, verbatim} +\usepackage{setspace, float} +\usepackage{graphicx, wrapfig} +\usepackage{rotating, enumerate, minted, fancyvrb} +\usepackage[left=1in, right=1in]{geometry} +\usepackage[font=small]{caption} + +\defaultfontfeatures{Mapping=tex-text} +\setCJKmainfont[BoldFont={STHeiti}]{Adobe Song Std} +\XeTeXlinebreaklocale "zh" +\XeTeXlinebreakskip = 0pt plus 1pt minus 0.1pt + +\setlength{\parindent}{2em} +\setlength{\parskip}{1em} +\makeatletter +\def\verbatim@font{\ttfamily\small} +\makeatother + +\makeatletter +\let\@afterindentrestore\@afterindentfalse +\let\@afterindentfalse\@afterindenttrue +\@afterindenttrue +\makeatother + +\xeCJKsetup{CJKglue=\hspace{0pt plus .08 \baselineskip }} + +\newcommand{\ud}{\mathrm{d}} +\usemintedstyle{colorful} +\begin{document} +\title{\Large{CIBIC: C Implemented Bare and Ingenuous Compiler}} + +\author{\textsc{尹茂帆 Ted Yin}\\ \\\multicolumn{1}{p{.7\textwidth}} + {\centering\emph{F1224004 5120309051\\ +Shanghai Jiao Tong University ACM Class}}} +\maketitle +\begin{abstract} + This report presents the design and features of a simple C compiler which + generates MIPS assembly code. Although not all the language requirements + and features are implemented according to the standard, it still supports + major C features, such as basic types (void, int, char), basic flow control + syntax (if, while-loop, for-loop), user-defined types (aka. typedef), + functions, pointers (including function pointers), struct/union (may be + nested), etc. Besides, it makes use of SSA (Single Static Assignment) form + for the IR (Intermediate Representation) and optimization. The whole compiler is + written in pure C, obeying C89/C99 standard. The report first introduces + the lexer and parser generation, then focuses on the data structures being + used in the AST (Abstract Syntax Tree) and symbol tables, and makes a + conclusion on the supported syntactical features. Next, the report shows + the intermediate representation design and claims the techniques that have + been used in the project. Finally, various optimization techniques are + presented and discussed, some are accompanied with code snippets. +\end{abstract} +\section{Lexer and Parser} +CIBIC makes good use of existing lexer and parser generators. It uses Flex to +generate lexer while using Bison as parser generator. Both generators read +boilerplate text which contains C code fragments to be filled in the generated +lexer/parser. The best practice, I suggest, is to avoid embedding too much +concrete code in the boilerplate. Instead, we shall write well-designed +functions in a separate file (``\texttt{ast.c}'' in CIBIC) and invoke functions in the +boilerplate. The reason is that it is almost not practical nor convenient to +debug the generated lexer or parser. Using the seperation method, we can set up +breakpoints in the seperate file easily and debug on the fly. The declarations +of all functions that may be invoked during parsing can be found in ``\texttt{ast.h}''. + +It requires a little more effort to track the location of each token using +Flex. Luckily, Flex provides with a macro called ``\texttt{YY\_USER\_ACTION}'' +to let the user do some extra actions after each token is read. So I maintained +a global variable ``\texttt{yycolumn}'' to keep the current column, a global +char array ``\texttt{linebuff}'' to buffer the text being read for error +report. +\begin{listing}[H] + \centering + \RecustomVerbatimEnvironment{Verbatim}{BVerbatim}{} + \begin{minted}{c} +int yycolumn = 1; +char linebuff[MAX_LINEBUFF], *lptr = linebuff; + +#define YY_USER_ACTION \ + do { \ + yylloc.first_line = yylloc.last_line = yylineno; \ + yylloc.first_column = yycolumn; \ + yylloc.last_column = yycolumn + yyleng - 1; \ + yycolumn += yyleng; \ + memmove(lptr, yytext, yyleng); \ + lptr += yyleng; \ + } while (0); + +#define NEW_LINE_USER_ACTION \ + do { \ + yycolumn = 1; \ + lptr = linebuff; \ + } while (0) +\end{minted} +\caption {Code Snippet to Track Down Location} +\end{listing} + +\section{Semantic Analysis and Implementation} +The parsing process is also an AST (Abstrct Syntax Tree) construction process. +By calling the corresponding functions, the generated parser create tree nodes +and merge them into subtrees. The major issue here is how to design a proper +data structure to represent and maintain the AST. In CIBIC, all nodes in an AST +are instances of struct ``CNode'' in figure. +\begin{figure}[H] + \centering + \texttt { + \begin{tabular}{|c|} + \hline + type \\ + \footnotesize{describe syntax information e.g. expression or declarator} \\ \hline + rec: \\ + \footnotesize{\textbf{union of intval / subtype / strval}} \\ + \footnotesize{stores detailed information e.g. which kind of expression it is} \\ \hline + ext: \\ + \footnotesize{\textbf{struct of type, var, autos, const\_val, is\_const, offset}} \\ + \footnotesize{extended info, where annotation is made} \\ \hline + chd \\ + \footnotesize{the left most child of the node} \\ \hline + next \\ + \footnotesize{the next sibling} \\ \hline + loc \\ + \footnotesize{\textbf{struct of row, col}} \\ + \footnotesize{the location in the source code, for error report} \\ \hline + \end{tabular} + } + \caption{The Structure of a CNode instance} +\end{figure} +Since a node may have variable number of children, the most common and +efficient approach is to implement a left-child right-sibling binary tree. The +field ``\texttt{chd}'' points to the child and ``\texttt{next}'' points to the +next sibling. This implementation is extremely flexible because we do not need +to know the number of children in advance, which brings us the convenience of +appending argument nodes to a function invocation node in the boilerplate. + +After construction of the AST, the tree will be traversed by calling mutually +recursive functions declared in ``\texttt{semantics.h}''. The entry call is ``\texttt{semantics\_check}'' which will set up the symbol tables and call ``\texttt{semantics\_func}'' and ``\texttt{semantics\_decl}'' to analyze function definitions and global declarations. These functions will further invoke more basic functions such as ``\texttt{semantics\_comp}'' (handles compound statements) to fully check the semantics and gather variable typing information. + +The key data structures in the semantic checking are \textbf{symbol tables}. Symbol tables maintain variables and types in different scopes across different name spaces. When a variable is defined, the corresponding type specifer will be checked and binds to the variable. Also, when the scope ends, all variable bindings living within the scope will be removed from the table. Although they are removed from the table, the bindings are still referenced by some nodes on the AST, so that the AST becomes ``\textbf{annotated AST}''. In CIBIC, the variable reference is stored in ``\texttt{ext.var}'' field of ``\texttt{CNode}'' and the type information of an subexpression is annotated at ``\texttt{ext.type}''. Thus, in a word, symbol tables stores all symbols that are currently \textbf{visible}. + +In C language, there are four name spaces: for label names, tags, members of structures or unions and all other identifiers. Goto statments are not implemented in CIBIC, so there're actually three name spaces. Since each kind of structures or unios have its own name space for its fields, we treat them specially and create a new table for each of them. For tags and all other identifiers, we set up two global tables. Besides name spaces, scoping is also a very important concept in C. It seems to be an orthogonal concept to name spaces. + +Considering these two concepts, CIBIC implements a struct named ``\texttt{CScope}'' to maintain all the information as shown in figure. +\begin{figure}[H] + \centering + \texttt { + \begin{tabular}{|c|} + \hline + lvl \\ + \footnotesize{current nesting level of scopes, e.g. 0 for global scope} \\ \hline + func \\ + \footnotesize{current function, helpful when analyzing a return statement} \\ \hline + inside\_loop \\ + \footnotesize{if the scope is inside a loop, help full when analyzing a break statement} \\ \hline + top \\ + \footnotesize{points to the top of the scope stack} \\ \hline + ids \\ + \footnotesize{name space for all other variables} \\ \hline + tags \\ + \footnotesize{name space for all tags (name of structs or unions)} \\ \hline + \end{tabular} + } + \caption{The Structure of a CScope instance} +\end{figure} + +Note that ``\texttt{top}'' points to an instance of ``\texttt{CSNode}'' which +has two fields ``\texttt{symlist}'' and ``\texttt{next}''. ``\texttt{symlist}'' +points to a list of symbols in the same scope while ``\texttt{next}'' links to +the outer scope which is another instance of ``\texttt{CSNode}''. As for +``\texttt{ids}'' and ``\texttt{tags}'', they are pointers to +``\texttt{CTable}'',stores all the current symbols. As mentioned above, for +each struct or union, there is also a pointer to ``\texttt{CTable}'' stores all +field names. ``\texttt{CTable}'' is an open addressing hash table +containing nodes of the type ``\texttt{CTNode}''. The structure of each node is +depicted in figure. + +\begin{figure}[H] + \centering + \texttt { + \begin{tabular}{|c|} + \hline + key \\ + \footnotesize{char *} \\ \hline + val \\ + \footnotesize{void *, in order to also be used in checking duplicate parameters, etc.} \\ \hline + next \\ + \footnotesize{the next element which has the same hash value} \\ \hline + lvl \\ + \footnotesize{scope level} \\ \hline + \end{tabular} + } + \caption{The Structure of a CTNode instance} +\end{figure} + +Thanks to the level information kept in each ``\texttt{CTNode}'', we do not +have to establish a hash table for every scopes, which may be memory consuming. +Instead, whenever a new scope begins, CIBIC simply pushes a new frame to scope +stack. This is achieved by creating an instance of ``\texttt{CSNode}'', setting +its ``\texttt{next}'' field to the ``\texttt{top}'' field of the +``\texttt{CScope}'' then letting the ``\texttt{top}'' points to the new frame, +finally increasing ``\texttt{lvl}'' by one. Whenever a new symbol is being +added to ``\texttt{CScope}'', CIBIC adds the symbol to one of the tables +``\texttt{ids}'' and ``\texttt{tags}'', then also appends the symbol to the +``\texttt{symlist}'' of the top of the scope stack. The most elegant +characteristics of open addressing hash tables is, for the same name appears in +different scopes, the symbol defined at the inner most is always ahead of +others in the chain of the table because it is the most recently added one. So, +for lookups, CIBIC just need to return the first node having the same name in +the chain, which is very time-efficient. At last, when a scope ends, CIBIC +scans the whole ``\texttt{symlist}'' of the top scope frame, and tries to +remove these symbols from the table. Figure presents the content of the scope +stack when the analysis proceeds into the inner most declaration of a. See +chains with hash code 0097 and 0098. +\begin{figure}[H] +% \centering + \begin{minipage}{0.35\textwidth} +% \RecustomVerbatimEnvironment{Verbatim}{BVerbatim}{} + \begin{minted}{c} +int main() { + int a, b; + if (a > 0) + { + int a, b; + if (a > 0) + { + int a, b; + } + } +} + \end{minted} + \caption {Source Code Being Proceeded} +\end{minipage} +% \centering + \begin{minipage}{0.5\textwidth} + \begin{BVerbatim} + [0072]->[int@747e780:0] + [0097]->[a@7484ae0:3]->[a@7484890:2]->[a@7484580:1] + [0098]->[b@7484bb0:3]->[b@7484960:2]->[b@7484710:1] + [0108]->[printf@747f150:0] + [0188]->[scanf@747f640:0] + [0263]->[__print_string@7484010:0] + [0278]->[__print_char@7483fc0:0] + [0623]->[__print_int@747f8b0:0] + [0778]->[malloc@7484100:0] + [0827]->[void@747eee0:0] + [0856]->[main@7484530:0] + [0908]->[memcpy@7484060:0] + [0971]->[char@747e9f0:0] + \end{BVerbatim} + \caption {Dump Info from CIBIC: Scope Stack} +\end{minipage} +\end{figure} + +\subsection{Type System} +C has a small set of basic types. In CIBIC, basic types include \texttt{char}, +\texttt{int} and \texttt{void} (literally, \texttt{void} is not an actual +type). However, C supports two powerful type aggregation: arrays and structures +(unions), and also supports an indirect access tool: pointer. This makes the +type system much more complex and usable in practice. CIBIC uses the concept +``type tree'' to organize types. All basic types are the leaves of a type tree, +while aggregate types and pointers are the intermediate nodes. Figure shows a +typical type tree of a C struct. + +A type tree is good at preserving type hierarchy info which may be extremely +useful in type checking. For example, when checking a expression \texttt{*a}, +compiler first check if the root of the type tree of a is a pointer. If not, +error message will be printed and semantic checking fails. Otherwise, the type +of the expression result is the subtree of the only child of the root. Also, a +type tree enables us to implement complex type nesting, which will be discussed +later on. + +CIBIC has support for user-defined types, which are defined via the keyword +``\texttt{typedef}''. However, ``\texttt{typedef}'' is notoriously difficult +to deal with due to the ambiguity caused by the language design. For example, +in ``\texttt{int A}'', A is a \textbf{variable} of integer type, but in +``\texttt{A a}'', A is a user-defined \textbf{type}. The subtle semantic +difference is caused by context. In former case, A is identified as a +identifier token, while in latter case, identified as a type specifier. The +meaning of a token should be made clear during lexical analysis which does not +take in account of context. So the most direct and effective way to implement +\texttt{typedef} is to hack the lexer and parser. CIBIC maintains a +``\texttt{typedef\_stack}'' to denote current parsing status. When a parser has +just read a type specifier, before it moves on, it invokes +``\texttt{def\_enter(FORCE\_ID)}'' to notify ``\texttt{typedef\_stack}'' the +subsequent string token must be an identifier instead of a type specifier. As +for ``\texttt{typedef}'' statements, the parser will invoke +``\texttt{def\_enter(IN\_TYPEDEF)}'' to record the newly defined typename by +adding an entry to a hash table. In the lexer, when a string token is being +read, it invokes ``\texttt{is\_identifier}'' to make sure whether the string is +an \texttt{IDENTIFIER} or a \texttt{USER\_TYPE}. + +Figure demonstrates a very subtle use of \texttt{typedef}, which can be parsed perfectly by CIBIC. + +\begin{listing}[H] + \centering + \RecustomVerbatimEnvironment{Verbatim}{BVerbatim}{} + \begin{minted}{c} +/* It's quite common to define a shorthand `I' for `struct I'. */ +typedef struct I I; +/* Declaration of a function with parameter `a' of user-defined type `I'. */ +int incomp(I a); +/* The definition of `I' is postponed. */ +struct I { + int i, j; +}; +/* Function definition of previous declared one. */ +int incomp(I a) {} +/* Define `b' as an int type. */ +typedef int b; +int main() { + /* Variable `b' is of type `b', an integer actually. */ + I i; + b b; + incomp(i); +} +\end{minted} +\caption {Code Snippet to Track Down Location} +\end{listing} + + +\subsection{Complex Declaration and Function Pointer} +\section{Intermadiate Representation} +\subsection{Single Static Assignment Form} +\subsection{Phi-functions} +\subsection{Register Allocation} +\section{Performance and Optimization} +\end{document} @@ -0,0 +1,401 @@ +%{ +#include <stdio.h> +#include <stdlib.h> +#include "ast.h" +#include "semantics.h" +%} +%union { + int intval; + char *strval; + struct CNode *cnode; +} +%error-verbose +%token IDENTIFIER "identifier" INT_CONST "integer constant" CHAR_CONST "character constant" STR_CONST "string constant" USER_TYPE "typedef name" +%token KW_VOID "void" KW_CHAR "char" KW_INT "int" KW_STRUCT "struct" KW_UNION "union" KW_IF "if" KW_ELSE "else" KW_WHILE "while" KW_TYPEDEF "typedef" +%token KW_FOR "for" KW_CONT "continue" KW_BREAK "break" KW_RET "ret" KW_SIZEOF "sizeof" +%token OPT_OR "||" OPT_AND "&&" OPT_EQ "==" OPT_NE "!=" OPT_LE "<=" OPT_GE ">=" OPT_SHL "<<" OPT_SHR ">>" OPT_INC "++" OPT_DEC "--" OPT_PTR "->" +%token ASS_MUL "*=" ASS_DIV "/=" ASS_MOD "%=" ASS_ADD "+=" ASS_SUB "-=" ASS_SHL "<<=" ASS_SHR ">>=" ASS_AND "&=" ASS_XOR "^=" ASS_OR "|=" +%token UNKNOWN "stray character" +%token END 0 "end of file" +%type<intval> INT_CONST +%type<strval> IDENTIFIER STR_CONST CHAR_CONST USER_TYPE +%type<intval> additive_operator assignment_operator equality_operator multiplicative_operator relational_operator shift_operator struct_or_union unary_operator +%type<cnode> additive_expression and_expression arguments array_initializer assignment_expression cast_expression comp_decls compound_statement comp_stmts constant_expression declaration declarator declarators equality_expression exclusive_or_expression expression expression_statement function_definition identifier inclusive_or_expression init_declarator init_declarators initializer iteration_statement jump_statement logical_and_expression logical_or_expression multiplicative_expression optional_exp parameters plain_declaration direct_declarator postfix postfix_expression primary_expression prog_list program relational_expression selection_statement shift_expression statement struct_field struct_fields type_name type_specifier unary_expression abstract_declarator direct_abstract_declarator direct_abstract_declarator_opt abstract_declarator_opt user_type +%start program +%% +program + : prog_list { ast_root = cnode_create_ast(cnode_list_wrap(PROG, $1)); } + +prog_list + : declaration + | function_definition + | prog_list declaration { $$ = cnode_list_append($1, $2); } + | prog_list function_definition { $$ = cnode_list_append($1, $2); } + +declaration + : KW_TYPEDEF type_specifier { def_exit(); def_enter(IN_TYPEDEF); } declarators ';' { + $$ = cnode_add_loc(cnode_create_typedef( + $2, + cnode_add_loc(cnode_list_wrap(DECLRS, $4), @4)), @$); + def_exit(); + } + | type_specifier ';' { + $$ = cnode_add_loc(cnode_create_decl( + $1, + cnode_list_wrap(INIT_DECLRS, cnode_create_nop())), @$); + def_exit(); + } + | type_specifier init_declarators ';' { + $$ = cnode_add_loc(cnode_create_decl( + $1, + cnode_add_loc(cnode_list_wrap(INIT_DECLRS, $2), @2)), @$); + def_exit(); + } + +function_definition + : type_specifier declarator { def_exit(); } compound_statement { + $$ = cnode_add_loc(cnode_create_func($1, $2, $4), @$); + } + +parameters + : { $$ = NULL; } + | plain_declaration + | parameters ',' plain_declaration { $$ = cnode_list_append($1, $3); } + +declarators + : declarator + | declarators ',' declarator { $$ = cnode_list_append($1, $3); } + +init_declarators + : init_declarator + | init_declarators ',' init_declarator { $$ = cnode_list_append($1, $3); } + +init_declarator + : declarator { $$ = cnode_add_loc(cnode_create_init_declr($1, cnode_create_nop()), @$); } + | declarator { def_exit(); } '=' initializer { + $$ = cnode_add_loc(cnode_create_init_declr($1, $4), @$); + def_enter(FORCE_ID); + } + +initializer + : assignment_expression { $$ = cnode_add_loc(cnode_create_initr(INITR_NORM, $1), @$); } + | '{' array_initializer '}' { $$ = cnode_add_loc(cnode_create_initr(INITR_ARR, $2), @$); } + +array_initializer + : initializer + | array_initializer ',' initializer { + $$ = cnode_list_append(cnode_add_loc($1, @1), $3); } + +type_specifier + : KW_VOID { $$ = cnode_add_loc(cnode_create_type_spec(KW_VOID, 0), @$); def_enter(FORCE_ID); } + | KW_CHAR { $$ = cnode_add_loc(cnode_create_type_spec(KW_CHAR, 0), @$); def_enter(FORCE_ID); } + | KW_INT { $$ = cnode_add_loc(cnode_create_type_spec(KW_INT, 0), @$); def_enter(FORCE_ID); } + | struct_or_union identifier {def_exit(); }'{' struct_fields '}' { + $$ = cnode_add_loc(cnode_create_type_spec($1, 2, $2, cnode_add_loc(cnode_list_wrap(FIELDS, $5), @5)), @$); + def_enter(FORCE_ID); + } + | struct_or_union {def_exit(); }'{' struct_fields '}' { + $$ = cnode_add_loc(cnode_create_type_spec($1, 2, cnode_create_nop(), cnode_add_loc(cnode_list_wrap(FIELDS, $4), @4)), @$); + def_enter(FORCE_ID); + } + | struct_or_union identifier { + $$ = cnode_add_loc(cnode_create_type_spec($1, 2, $2, cnode_create_nop()), @$); + def_exit(); + def_enter(FORCE_ID); + } + | user_type { $$ = cnode_add_loc(cnode_create_type_spec(USER_TYPE, 1, $1), @$); def_enter(FORCE_ID); } + +user_type + : USER_TYPE { $$ = cnode_add_loc(cnode_create_identifier($1), @$); } + +struct_fields + : struct_field + | struct_fields struct_field { $$ = cnode_list_append($1, $2); } +struct_field + : type_specifier declarators ';' { + $$ = cnode_add_loc( + cnode_create_struct_field( + $1, + cnode_add_loc(cnode_list_wrap(DECLRS, $2), @2)), @$); + def_exit(); + } + +struct_or_union + : KW_STRUCT { $$ = KW_STRUCT; def_enter(FORCE_ID); } + | KW_UNION { $$ = KW_UNION; def_enter(FORCE_ID); } + +plain_declaration + : type_specifier declarator { + $$ = cnode_add_loc(cnode_create_plain_decl($1, $2), @$); + def_exit(); + } + +direct_declarator + : identifier { push($1->rec.strval); } + | '(' declarator ')' { $$ = $2; } + | direct_declarator { def_enter(NONE); } '(' parameters ')' { + $$ = cnode_add_loc(cnode_create_declr( + DECLR_FUNC, 2, $1, + cnode_add_loc(cnode_list_wrap(PARAMS, $4), @4)), @$); + def_exit(); + } + | direct_declarator '[' constant_expression ']' { + $$ = cnode_add_loc(cnode_create_declr(DECLR_ARR, 2, $1, $3), @$); + } + +declarator + : direct_declarator + | '*' declarator { + $$ = cnode_add_loc(cnode_create_declr('*', 1, $2), @$); } + +statement + : expression_statement + | compound_statement + | selection_statement + | iteration_statement + | jump_statement + +expression_statement + : ';' { $$ = cnode_add_loc(cnode_create_stmt(STMT_EXP, 1, cnode_create_nop()), @$); } + | expression ';' { $$ = cnode_add_loc(cnode_create_stmt(STMT_EXP, 1, $1), @$); } + +compound_statement + : { block_enter(); } '{' comp_decls comp_stmts '}' { + $$ = cnode_add_loc( + cnode_create_stmt(STMT_COMP, 2, cnode_add_loc(cnode_list_wrap(COMP_DECLS, $3), @3), + cnode_add_loc(cnode_list_wrap(COMP_STMTS, $4), @4)), @$); + block_exit(); + } + +comp_decls + : { $$ = cnode_create_nop(); } + | comp_decls declaration { $$ = cnode_list_append($1, $2); } + +comp_stmts + : { $$ = cnode_create_nop(); } + | comp_stmts statement { $$ = cnode_list_append($1, $2); } + +selection_statement + : KW_IF '(' expression ')' statement { + $$ = cnode_add_loc( + cnode_create_stmt(STMT_IF, 3, $3, $5, cnode_create_nop()), @$); + } + | KW_IF '(' expression ')' statement KW_ELSE statement { + $$ = cnode_add_loc( + cnode_create_stmt(STMT_IF, 3, $3, $5, $7), @$); + } + +iteration_statement + : KW_WHILE '(' expression ')' statement { + $$ = cnode_add_loc(cnode_create_stmt(STMT_WHILE, 2, $3, $5), @$); + } + | KW_FOR '(' optional_exp ';' optional_exp ';' optional_exp ')' statement { + $$ = cnode_add_loc(cnode_create_stmt(STMT_FOR, 4, $3, $5, $7, $9), @$); + } + +optional_exp + : { $$ = cnode_create_nop(); } + | expression + +jump_statement + : KW_CONT ';' { $$ = cnode_add_loc(cnode_create_stmt(STMT_CONT, 0), @$); } + | KW_BREAK ';' { $$ = cnode_add_loc(cnode_create_stmt(STMT_BREAK, 0), @$); } + | KW_RET optional_exp ';' { $$ = cnode_add_loc(cnode_create_stmt(STMT_RET, 1, $2), @$); } + +expression + : assignment_expression + | expression ',' assignment_expression { + @$ = @2; + $$ = cnode_add_loc(cnode_create_exp(',', 2, $1, $3), @2); } + +assignment_expression + : logical_or_expression + | unary_expression assignment_operator assignment_expression { + @$ = @2; + $$ = cnode_add_loc(cnode_create_exp($2, 2, $1, $3), @2); } + +assignment_operator + : '=' { $$ = '='; } + | ASS_MUL { $$ = ASS_MUL; } + | ASS_DIV { $$ = ASS_DIV; } + | ASS_MOD { $$ = ASS_MOD; } + | ASS_ADD { $$ = ASS_ADD; } + | ASS_SUB { $$ = ASS_SUB; } + | ASS_SHL { $$ = ASS_SHL; } + | ASS_SHR { $$ = ASS_SHR; } + | ASS_AND { $$ = ASS_AND; } + | ASS_XOR { $$ = ASS_XOR; } + | ASS_OR { $$ = ASS_OR; } + +constant_expression: logical_or_expression +logical_or_expression + : logical_and_expression + | logical_or_expression OPT_OR logical_and_expression { + @$ = @2; + $$ = cnode_add_loc(cnode_create_exp(OPT_OR, 2, $1, $3), @2); } + +logical_and_expression + : inclusive_or_expression + | logical_and_expression OPT_AND inclusive_or_expression { + @$ = @2; + $$ = cnode_add_loc(cnode_create_exp(OPT_AND, 2, $1, $3), @2); } + +inclusive_or_expression + : exclusive_or_expression + | inclusive_or_expression '|' exclusive_or_expression { + @$ = @2; + $$ = cnode_add_loc(cnode_create_exp('|', 2, $1, $3), @2); } + +exclusive_or_expression + : and_expression + | exclusive_or_expression '^' and_expression { + @$ = @2; + $$ = cnode_add_loc(cnode_create_exp('^', 2, $1, $3), @2); } + +and_expression + : equality_expression + | and_expression '&' equality_expression { + @$ = @2; + $$ = cnode_add_loc(cnode_create_exp('&', 2, $1, $3), @2); } + +equality_expression + : relational_expression + | equality_expression equality_operator relational_expression { + @$ = @2; + $$ = cnode_add_loc(cnode_create_exp($2, 2, $1, $3), @2); } + +equality_operator + : OPT_EQ { $$ = OPT_EQ; } + | OPT_NE { $$ = OPT_NE; } + +relational_expression + : shift_expression + | relational_expression relational_operator shift_expression { + @$ = @2; + $$ = cnode_add_loc(cnode_create_exp($2, 2, $1, $3), @2); } + +relational_operator + : '<' { $$ = '<'; } + | '>' { $$ = '>'; } + | OPT_LE { $$ = OPT_LE; } + | OPT_GE { $$ = OPT_GE; } + +shift_expression + : additive_expression + | shift_expression shift_operator additive_expression { + @$ = @2; + $$ = cnode_add_loc(cnode_create_exp($2, 2, $1, $3), @2); } + +shift_operator + : OPT_SHL { $$ = OPT_SHL; } + | OPT_SHR { $$ = OPT_SHR; } + +additive_expression + : multiplicative_expression + | additive_expression additive_operator multiplicative_expression { + @$ = @2; + $$ = cnode_add_loc(cnode_create_exp($2, 2, $1, $3), @2); } + +additive_operator + : '+' { $$ = '+'; } + | '-' { $$ = '-'; } + +multiplicative_expression + : cast_expression + | multiplicative_expression multiplicative_operator cast_expression { + @$ = @2; + $$ = cnode_add_loc(cnode_create_exp($2, 2, $1, $3), @2); } + +multiplicative_operator + : '*' { $$ = '*'; } + | '/' { $$ = '/'; } + | '%' { $$ = '%'; } + +cast_expression + : unary_expression + | '(' type_name ')' cast_expression { + $$ = cnode_add_loc(cnode_create_exp(EXP_CAST, 2, $2, $4), @$); } + +type_name + : type_specifier abstract_declarator_opt { + $$ = cnode_add_loc(cnode_create_declr(0, 2, $1, $2), @$); + def_exit(); + } + +abstract_declarator_opt + : { $$ = cnode_create_nop(); } + | abstract_declarator + +abstract_declarator + : '*' { $$ = cnode_add_loc(cnode_create_declr('*', 1, cnode_create_nop()), @$); } + | '*' abstract_declarator { $$ = cnode_add_loc(cnode_create_declr('*', 1, $2), @$); } + | direct_abstract_declarator + +direct_abstract_declarator_opt + : { $$ = cnode_create_nop(); } + | direct_abstract_declarator + +direct_abstract_declarator + : '(' abstract_declarator ')' { $$ = $2; } + | direct_abstract_declarator '(' parameters ')' { + $$ = cnode_add_loc(cnode_create_declr( + DECLR_FUNC, 2, $1, + cnode_add_loc(cnode_list_wrap(PARAMS, $3), @3)), @$); + } + | '(' parameters ')' { + $$ = cnode_add_loc(cnode_create_declr( + DECLR_FUNC, 2, cnode_create_nop(), + cnode_add_loc(cnode_list_wrap(PARAMS, $2), @2)), @$); + } + | direct_abstract_declarator_opt '[' constant_expression ']' { + $$ = cnode_add_loc(cnode_create_declr(DECLR_ARR, 2, $1, $3), @$); + } + +unary_expression + : postfix_expression + | OPT_INC unary_expression { $$ = cnode_add_loc(cnode_create_exp(OPT_INC, 1, $2), @$); } + | OPT_DEC unary_expression { $$ = cnode_add_loc(cnode_create_exp(OPT_DEC, 1, $2), @$); } + | unary_operator cast_expression { $$ = cnode_add_loc(cnode_create_exp($1, 1, $2), @$); } + | KW_SIZEOF unary_expression { $$ = cnode_add_loc(cnode_create_exp(KW_SIZEOF, 1, $2), @$); } + | KW_SIZEOF '(' type_name ')' { $$ = cnode_add_loc(cnode_create_exp(KW_SIZEOF, 1, $3), @$); } + +unary_operator + : '&' { $$ = '&'; } + | '*' { $$ = '*'; } + | '+' { $$ = '+'; } + | '-' { $$ = '-'; } + | '~' { $$ = '~'; } + | '!' { $$ = '!'; } + +postfix_expression + : primary_expression + | postfix_expression postfix { + @$ = @2; + $$ = cnode_add_loc(cnode_create_exp(EXP_POSTFIX, 2, $1, $2), @2); } + +postfix + : '[' expression ']' { $$ = cnode_add_loc(cnode_create_exp(POSTFIX_ARR, 1, $2), @$); } + | '(' arguments ')' { + $$ = cnode_add_loc(cnode_create_exp( + POSTFIX_CALL, 1, + cnode_add_loc(cnode_list_wrap(ARGS, $2), @2)), @$); } + | '.' identifier { $$ = cnode_add_loc(cnode_create_exp(POSTFIX_DOT, 1, $2), @$); } + | OPT_PTR identifier { $$ = cnode_add_loc(cnode_create_exp(POSTFIX_PTR, 1, $2), @$); } + | OPT_INC { $$ = cnode_add_loc(cnode_create_exp(OPT_INC, 0), @$); } + | OPT_DEC { $$ = cnode_add_loc(cnode_create_exp(OPT_DEC, 0), @$); } + +arguments + : { $$ = NULL; } + | assignment_expression + | arguments ',' assignment_expression { $$ = cnode_list_append($1, $3); } + +primary_expression + : identifier + | INT_CONST { $$ = cnode_add_loc(cnode_create_int_const($1), @$); } + | CHAR_CONST { $$ = cnode_add_loc(cnode_create_char_const($1), @$); } + | STR_CONST { $$ = cnode_add_loc(cnode_create_str_const($1), @$); } + | '(' expression ')' { $$ = cnode_add_loc($2, @$); } + +identifier + : IDENTIFIER { $$ = cnode_add_loc(cnode_create_identifier($1), @$); } +%% diff --git a/compile_data/custom_const.c b/compile_data/custom_const.c new file mode 100644 index 0000000..d0126f3 --- /dev/null +++ b/compile_data/custom_const.c @@ -0,0 +1,9 @@ +int sum;
+void f() {
+ sum = 3;
+}
+int main() {
+ sum = 1;
+ f();
+ printf("%d\n", sum);
+}
diff --git a/compile_data/custom_struct.c b/compile_data/custom_struct.c new file mode 100644 index 0000000..0d473f5 --- /dev/null +++ b/compile_data/custom_struct.c @@ -0,0 +1,26 @@ +struct D {
+ int a, b;
+};
+struct A {
+ int a[100];
+ struct B {
+ struct C {
+ int x, y;
+ } c;
+ int z;
+ struct D *p;
+ } b;
+} s;
+int main() {
+ struct D d;
+ int n = 0;
+ s.a[1] = 1;
+ s.a[2] = 2;
+ s.b.z = 4;
+ s.b.c.x = 5;
+ s.b.c.y = 6;
+ s.b.p = &d;
+ s.b.p->a = 7;
+ s.b.p->b = 8;
+ printf("%d %d %d %d %d %d %d\n", s.a[1], s.a[2], s.b.z, s.b.c.x, s.b.c.y, s.b.p->a, s.b.p->b);
+}
diff --git a/compile_data/custom_struct2.c b/compile_data/custom_struct2.c new file mode 100644 index 0000000..e147186 --- /dev/null +++ b/compile_data/custom_struct2.c @@ -0,0 +1,41 @@ +struct A {
+ int x, y;
+} sa;
+struct A print(struct A a, struct A b) {
+ a.x++;
+ a.y++;
+ b.x--;
+ b.y--;
+ printf("args: %d %d\n", a.x, a.y);
+ printf("args: %d %d\n", b.x, b.y);
+ return a;
+}
+int main() {
+ int i;
+ int t;
+ int *a, *b;
+ struct A sb, sc;
+ a = malloc(sizeof(int) * 100);
+ for (i = 0; i < 100; i++)
+ a[i] = i;
+ b = malloc(sizeof(int) * 100);
+ memcpy(b, a, sizeof(int) * 100);
+ for (i = 0; i < 100; i++)
+ printf("%d ", b[i]);
+ sb.x = 1;
+ sb.y = 2;
+ sa = sb;
+ sc = sa;
+ printf("\n%d %d\n", sa.x, sa.y);
+ printf("%d %d\n", sc.x, sc.y);
+ sa.x = 1;
+ sa.y = 2;
+ sb.x = 1;
+ sb.y = 2;
+ sa = print(sa, sb);
+ sb = print(sa, sb);
+ sa = print(sa, sb);
+ sb = print(sa, sb);
+ printf("%d %d\n", sa.x, sa.y);
+ printf("%d %d\n", sb.x, sb.y);
+}
diff --git a/compile_data/custom_struct3.c b/compile_data/custom_struct3.c new file mode 100644 index 0000000..d7060b3 --- /dev/null +++ b/compile_data/custom_struct3.c @@ -0,0 +1,15 @@ +struct A { + int x, y; +} a[10]; +struct A f(struct A a) { + a.x++; + a.y++; + return a; +} +int main(){ + int i; + for (i = 1; i < 10; i++) + a[i] = f(a[i - 1]); + for (i = 0; i < 10; i++) + printf("%d %d\n", a[i].x, a[i].y); +} diff --git a/compile_data/custom_struct4.c b/compile_data/custom_struct4.c new file mode 100644 index 0000000..75f50ea --- /dev/null +++ b/compile_data/custom_struct4.c @@ -0,0 +1,28 @@ +struct A {
+ struct B {
+ int x, y;
+ struct C {
+ int w;
+ } c;
+ } b;
+ int z;
+};
+
+struct B f(struct A a) {
+ printf("z: %d\n", a.z);
+ return a.b;
+}
+
+struct C g(struct B b) {
+ printf("x: %d\ny: %d\n", b.x, b.y);
+ return b.c;
+}
+
+int main() {
+ struct A a;
+ a.z = 1;
+ a.b.x = 2;
+ a.b.y = 3;
+ a.b.c.w = 4;
+ printf("w: %d\n", g(f(a)).w);
+}
diff --git a/compile_data/custom_subexp.c b/compile_data/custom_subexp.c new file mode 100644 index 0000000..bce5bdb --- /dev/null +++ b/compile_data/custom_subexp.c @@ -0,0 +1,9 @@ +int N = 0;
+void f() { N = 2; }
+int main() {
+ int a, b;
+ a = N + 1;
+ f();
+ b = N + 1;
+ printf("%d\n", b);
+}
diff --git a/compile_data/custom_subexp2.c b/compile_data/custom_subexp2.c new file mode 100644 index 0000000..1e4af93 --- /dev/null +++ b/compile_data/custom_subexp2.c @@ -0,0 +1,10 @@ +int flag = 0; +int check(int x, int y) { + return x > 0 && y > 0 && (flag ^= 1); +} +int main() { + int x = 1, y = 2; + printf("%d\n", check(x, y)); + printf("%d\n", check(x, y)); + printf("%d\n", check(x, y)); +} diff --git a/compile_data/func_pointer.c b/compile_data/func_pointer.c new file mode 100644 index 0000000..e7b6484 --- /dev/null +++ b/compile_data/func_pointer.c @@ -0,0 +1,19 @@ +typedef void (*Func_t)(); +void f(Func_t func, int step) { + if (!step) return; + printf("i'm f\n"); + func(func, step - 1); +} +void g(void (*func)(), int step) { + if (!step) return; + printf("i'm g\n"); + func(func, step - 1); +} +int main() { + void (*func)(void (*ifunc)(), int step); + int x = 1; + if (x) func = f; + else func = g; + func(func, 5); + return 0; +} @@ -0,0 +1,39 @@ +#ifndef CONST_H +#define CONST_H +enum { + EXP_POSTFIX = 1024, + POSTFIX_ARR, + POSTFIX_CALL, + POSTFIX_DOT, + POSTFIX_PTR, + EXP_CAST, + INITR_NORM, + INITR_ARR, + DECLR_FUNC, + DECLR_ARR, + STMT_EXP, + STMT_COMP, + STMT_IF, + STMT_WHILE, + STMT_FOR, + STMT_CONT, + STMT_BREAK, + STMT_RET, + NS_ID, + NS_TAG +}; +#define MAX_BLOCK 1024 +#define MAX_CHDN 1024 +#define MAX_DEBUG_PRINT_BUFF 10240000 +#define MAX_DEBUG_PRINT_LVL 10240000 +#define MAX_LINEBUFF 10240000 +#define MAX_TABLE_SIZE 1021 +#define MAX_ERROR_BUFF 10240000 +#define MAX_NAMELEN 1024 +#define INT_SIZE 4 +#define CHAR_SIZE 1 +#define PTR_SIZE 4 +#define FLAG_FUNC_CHK (1 << 0) +#define FLAG_FUNC_DEF (1 << 1) + +#endif @@ -0,0 +1,160 @@ +_func_printf: + addiu $sp, $sp, -64 + sw $31, 60($sp) #printf +# load pos_0 +# load arg_0 +# load ch_0 + lb $8, 12($sp) #ch +# load fmt_0 + lw $9, 64($sp) #fmt +# load x_0 + lw $10, 4($sp) #x +# load len_0 + lw $11, 8($sp) #len +# arg_1 = addr pos_0 + addiu $12, $sp, 68 +# goto __L2 + j __L2 +__L1: +# t2 = ch_2 == 37 +# seq $13, $8, 37 +# if not (t2) goto __L20 + bne $8, 37, __L20 +# fmt_4 = fmt_1 + 1 + addiu $9, $9, 1 +# ch_4 = fmt_4[0] + lb $8, 0($9) +# t4 = ch_4 == 100 +# seq $13, $8, 100 +# if not (t4) goto __L6 + bne $8, 100, __L6 +# t6 = arg_2[0] + lw $a0, 0($12) + li $2, 1 + syscall +# goto __L19 + j __L19 +__L6: +# t7 = ch_4 == 99 +# seq $13, $8, 99 +# if not (t7) goto __L8 + bne $8, 99, __L8 +# t9 = arg_2[0] + lw $a0, 0($12) + li $2, 11 + syscall +# goto __L19 + j __L19 +__L8: +# t10 = ch_4 == 115 +# seq $13, $8, 115 +# if not (t10) goto __L10 + bne $8, 115 __L10 +# t12 = arg_2[0] + lw $a0, 0($12) + li $2, 4 + syscall +# goto __L19 + j __L19 +__L10: +# x_4 = arg_2[0] + lw $10, 0($12) +# t14 = x_4 == 0 +# seq $11, $10, 0 +# if not (t14) goto __L12 + bne $10, 0, __L12 +# len_11 = 1 + li $11, 1 +# goto __L15 + j __L15 +__L12: +# len_8 = 0 + li $11, 0 +# goto __L14 + j __L14 +__L13: +# x_7 = x_6 / 10 + divu $10, $10, 10 +# len_10 = len_9 + 1 + addiu $11, $11, 1 +__L14: +# if (x_6) goto __L13 + bnez $10, __L13 +__L15: +# len_5 = 4 - len_4 + li $2, 4 + subu $11, $2, $11 +# goto __L17 + j __L17 +__L16: +# push 48 + li $a0 48 + li $2, 11 + syscall +# len_7 = len_6 - 1 + subu $11, $11, 1 +__L17: +# if (len_6) goto __L16 + bnez $11, __L16 +# t18 = arg_2[0] + lw $a0, 0($12) + li $2, 1 + syscall +# fmt_6 = fmt_4 + 2 + addiu $9, $9, 2 +__L19: +# arg_4 = arg_2 + 4 + addiu $12, $12, 4 +# goto __L21 + j __L21 +__L20: +# push ch_2 + move $a0, $8 + li $2, 11 + syscall +__L21: +# fmt_3 = fmt_2 + 1 + addiu $9, $9, 1 +__L2: +# ch_2 = fmt_1[0] + lb $8, 0($9) +# if (ch_2) goto __L1 + bnez $8, __L1 +_ret_printf: + lw $31, 60($sp) + addiu $sp, $sp, 64 + jr $31 +_func___print_int: + lw $a0, 0($sp) + li $2, 1 + syscall + jr $31 +_func___print_char: + lb $a0, 0($sp) + li $2, 11 + syscall + jr $31 +_func___print_string: + lw $a0, 0($sp) + li $2, 4 + syscall + jr $31 +_func_malloc: + lw $a0, 0($sp) + li $2, 9 + syscall + jr $31 +_func_memcpy: # the copied mem must be 4-aligned + lw $4, 0($sp) # dest addr + lw $5, 4($sp) # src addr + lw $6, 8($sp) # size + j __COND +__LOOP: + lw $2, 0($5) + sw $2, 0($4) + addiu $4, $4, 4 + addiu $5, $5, 4 + addiu $6, $6, -4 +__COND: + bnez $6, __LOOP + jr $31 @@ -0,0 +1,159 @@ +#include <stdio.h> +#include <getopt.h> +#include <stdlib.h> +#include "cibic.tab.h" +#include "ast.h" +#include "semantics.h" +#include "ssa.h" +#include "mips.h" + +extern char linebuff[]; +extern char *lptr; +extern int yyparse(); +extern FILE *yyin; +char *fname; + +int yywrap() { + return 1; +} + +void print_error(char *err_msg, char *lb, int row, int col, int warn) { + *lptr = '\0'; + fprintf(stderr, "%d:%d: %s: %s\n", + row, col, warn ? "warning" : "error", err_msg); + if (lb) + { + fprintf(stderr, "%s\n", lb); + while (--col) fprintf(stderr, "%c", ' '); + fprintf(stderr, "^\n"); + } + if (!warn) exit(1); +} + +int yyerror(char *err_msg) { + print_error(err_msg, linebuff, + yylloc.first_line, yylloc.first_column, 0); + return 0; +} + +void print_ast() { + if (fname) + fprintf(stderr, "AST for file: \"%s\"\n", fname); + else + fprintf(stderr, "AST for stdin\n"); + cibic_init(); + yyparse(); + cnode_debug_print(ast_root, 1); +} + +void print_sem() { + cibic_init(); + yyparse(); + semantics_check(ast_root, 0); +} + +void lib_generate() { + FILE *f = fopen("lib.s", "r"); + static char buff[1024]; + if (f) + { + size_t size; + while ((size = fread(buff, 1, 1024, f))) + fwrite(buff, 1, size, stdout); + } +} + +void print_ssa() { + cibic_init(); + yyparse(); + semantics_check(ast_root, 1); + ssa_generate(0); +} + +void compile() { + cibic_init(); + yyparse(); + semantics_check(ast_root, 1); + ssa_generate(1); + mips_generate(); + lib_generate(); +} + +void print_help() { + fprintf(stderr, + "CIBIC: C Implemented Bare and Ingenuous Compiler\n\n" + "Copyright (C) 2014 Ted Yin\n" + "License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>\n" + "There is NO WARRANTY, to the extent permitted by law.\n" + "Usage: [options] filename\n" + "Options:\n" + "\t --ast \t\t Print AST Construction\n" + "\t --sem \t\t Print Semantic Information\n" + "\t --ssa \t\t Print Single Static Assignment\n" + "\t --help \t Show this info\n" + ); +} + +static struct option lopts[] = { + { "ast", no_argument, 0, 'a'}, + { "sem", no_argument, 0, 'm'}, + { "ssa", no_argument, 0, 's'}, + { "help", no_argument, 0, 'h'}, + {0, 0, 0, 0} +}; + +enum { + PRINT_AST, + PRINT_HELP, + PRINT_SEM, + PRINT_SSA, + COMPILE +} mode = COMPILE; + +int main(int argc, char **argv) { + int option_index = 0; + while (1) + { + int c = getopt_long(argc, argv, "ah", lopts, &option_index); + if (c == -1) + break; + switch (c) + { + case 0: break; + case 'a': mode = PRINT_AST; break; + case 's': mode = PRINT_SSA; break; + case 'm': mode = PRINT_SEM; break; + case 'h': print_help(); return 0; + default: print_help(); return 0; + } + } + if (optind == argc - 1) + { + fname = argv[argc - 1]; + yyin = fopen(fname, "r"); + if (!yyin) + { + fprintf(stderr, "Error while opening file.\n"); + return 1; + } + } + else if (optind == argc) + { + fname = NULL; + yyin = stdin; + } + else + { + fprintf(stderr, "Only one source file is exepected.\n"); + return 1; + } + switch (mode) + { + case PRINT_AST: print_ast(); break; + case PRINT_SEM: print_sem(); break; + case PRINT_SSA: print_ssa(); break; + case PRINT_HELP: print_help(); break; + case COMPILE: compile(); break; + } + return 0; +} diff --git a/midtermvars.sh b/midtermvars.sh new file mode 100755 index 0000000..62f0ff1 --- /dev/null +++ b/midtermvars.sh @@ -0,0 +1 @@ +export CCHK="./cibic" @@ -0,0 +1,739 @@ +#include <stdio.h> +#include <assert.h> +#include <string.h> +#include "ast.h" +#include "ssa.h" +#include "mips.h" + +int reg_v0 = 2; +int reg_v1 = 3; +int memcpy_cnt; +static int save_pos[32]; +static int used_reg[32]; +static CType_t func; + +void mips_prologue(void) { + CVList_t v; + CSList_t s; + int prev = 0; + printf(".data 0x10000000\n"); + for (v = gvars; v; v = v->next) + { + CVar_t var = v->var; + printf("\t.align 2\n"); + printf("_%s:\n", var->name); + prev += align_shift(prev); + var->start = prev; + prev += calc_size(var->type); + if (var->initr) + { + CNode *initr = var->initr->chd; + assert(var->initr->rec.subtype == INITR_NORM); + switch (initr->ext.type->type) + { + case CINT: + printf("\t.word %ld\n", initr->ext.const_val); + break; + case CCHAR: + printf("\t.byte %ld\n", initr->ext.const_val); + break; + case CPTR: + { + CType_t ref = initr->ext.type->rec.ref; + printf("\t.word "); + switch (ref->type) + { + case CFUNC: + printf("%s\n", initr->rec.strval); + break; + case CCHAR: + printf("_str_%d\n", ((CSList_t)initr->ext.const_val)->id); + break; + default: assert(0); + } + } + break; + default: assert(0); + } + } + else + printf("\t.space %d\n", calc_size(var->type)); + } + printf("\t.align 2\n"); + prev += align_shift(prev); + for (s = cstrs->next; s != cstrs; s = s->next) + { + int len = 1; + char *p = s->str; + printf("_str_%d:\n", s->id); + printf("\t.asciiz \"%s\"\n", s->str); + s->start = prev; + for (; *p != '\0'; p++) + { + len++; + if (*p == '\\') + { + switch (*(++p)) + { + case '0': p += 3; break; + case 'x': p += 2; break; + default: ; + } + } + } + prev += len; + } + /* pre-calc done */ + printf(".text\n"); +} + +#define IN_IMM(x) (-0x8000 <= (x) && (x) < 0x8000) + +void mips_global_addr(int reg, CVar_t var) { + int offset = var->start - 0x8000; + if (IN_IMM(offset)) + printf("\taddiu $%d, $gp, %d #%s\n", reg, offset, var->name); + else + printf("\tla $%d, _%s\n", reg, var->name); +} + +void mips_global(const char *l, int reg, CVar_t var) { + int offset = var->start - 0x8000; + if (IN_IMM(offset)) + printf("\t%s $%d, %d($gp) #%s\n", l, reg, offset, var->name); + else + printf("\t%s $%d, _%s\n", l, reg, var->name); +} + +void mips_load(int reg, COpr_t opr) { + CVar_t var = opr->spill->info.var; + CType_t type = opr->type; + if (opr->kind == VAR && + (type->type == CSTRUCT || + type->type == CUNION || + type->type == CARR)) + { + if (var->loc > 0) + mips_global_addr(reg, var); + else + printf("\taddiu $%d, $sp, %d\n", reg, var->start); + } + else + { + const char *l = type->type == CCHAR ? "lb" : "lw"; + if (var->loc > 0) + mips_global(l, reg, var); + else + printf("\t%s $%d, %d($sp)\t#%s\n", l, reg, var->start, var->name); + } +} + +void mips_store(int reg, COpr_t opr) { + CVar_t var = opr->spill->info.var; + CType_t type = opr->type; + const char *l = type->type == CCHAR ? "sb" : "sw"; + if (var->loc > 0) + mips_global(l, reg, var); + else if (opr->reg == -1) + printf("\t%s $%d, %d($sp)\t#%s\n", l, reg, var->start, var->name); +} + +int mips_to_reg(COpr_t opr, int reg0) { + if (opr->kind == IMM) + { + printf("\tli $%d, %d\n", reg0, opr->info.imm); + return reg0; + } + else if (opr->kind == IMMS) + { + CSList_t cstr = opr->info.cstr; + int offset = cstr->start - 0x8000; + if (IN_IMM(offset)) + printf("\taddiu $%d, $gp, %d # _str_%d\n", reg0, offset, cstr->id); + else + printf("\tla $%d, _str_%d\n", reg0, cstr->id); + return reg0; + } + else if (opr->kind == IMMF) + { + printf("\tla $%d, _func_%s\n", reg0, opr->info.str); + return reg0; + } + if (opr->reg != -1) + return opr->reg; + mips_load(reg0, opr); + return reg0; +} + +void mips_memcpy(void) { + printf("\tj _mem_cond%d\n", memcpy_cnt); + printf("_mem_loop%d:\n", memcpy_cnt); + printf("\tlw $5, 0($%d)\n", reg_v0); + printf("\tsw $5, 0($%d)\n", reg_v1); + printf("\taddiu $%d, $%d, 4\n", reg_v1, reg_v1); + printf("\taddiu $%d, $%d, 4\n", reg_v0, reg_v0); + printf("\taddiu $a0, $a0, -4\n"); + printf("_mem_cond%d:\n", memcpy_cnt); + printf("\tbnez $a0, _mem_loop%d\n", memcpy_cnt); + memcpy_cnt++; +} + +/* memcpy requires three arguments */ +#define RESERVED_CALL_SIZE 12 +void mips_space_alloc(void) { + int local_size = func->rec.func.local_size; + int arg_size = RESERVED_CALL_SIZE; + int bret_size = 0; + int tmp_size = 0; + int offset = 0; + int prev = 0; + int save_size = MAX_AVAIL_REGS * INT_SIZE; + CBlock_t p; + COList_t d; + CVar_t v; + for (d = func_ir->defs; d; d = d->next) + { + COpr_t opr = d->opr; + assert(opr->par == opr); + if (opr->kind == TMP && opr->reg == -1) + { + int t = opr->type->type; + tmp_size += align_shift(tmp_size); + opr->info.var->start = tmp_size; + if (t == CSTRUCT || t == CUNION || t == CARR) + tmp_size += PTR_SIZE; + else if (t == CVOID) + tmp_size += INT_SIZE; + else + tmp_size += calc_size(opr->type); + } + } + for (p = func_ir->entry; p; p = p->next) + { + CInst_t i, ih = p->insts; + for (i = ih->next; i != ih; i = i->next) + { + if (i->op == PUSH) + { + COpr_t arg = i->src1; + offset += align_shift(offset); + i->offset = offset; + if (arg->kind == IMM) + offset += INT_SIZE; + else if (arg->kind == IMMS) + offset += PTR_SIZE; + else if (arg->kind == IMMF) + offset += PTR_SIZE; + else + { + CType_t t = arg->type; + if (t->type == CARR) + offset += PTR_SIZE; + else + offset += calc_size(t); + } + } + else if (i->op == CALL) + { + CType_t rt = i->dest->type; + if (offset > arg_size) + arg_size = offset; + offset = 0; + if (rt->type == CSTRUCT || rt->type == CUNION) + { + bret_size += align_shift(bret_size); + i->bret = bret_size; + bret_size += calc_size(rt); + } + else i->bret = -1; + } + } + } + prev += arg_size; + prev += align_shift(prev); + /* adjust offset for local variables */ + for (v = func->rec.func.local; v; v = v->next) + v->start += prev; + prev += local_size; + prev += align_shift(prev); + { + int i; + for (i = 0; i < MAX_AVAIL_REGS; i++) + save_pos[avail_regs[i]] = prev + i * INT_SIZE; + save_pos[30] = prev + save_size; + save_size += INT_SIZE; + } + prev += save_size; + /* adjust offset for spilled temporaries */ + for (d = func_ir->defs; d; d = d->next) + { + COpr_t opr = d->opr; + assert(opr->par == opr); + if (opr->kind == TMP && opr->reg == -1) + opr->info.var->start += prev; + } + prev += tmp_size; + prev += align_shift(prev); + for (p = func_ir->entry; p; p = p->next) + { + CInst_t i, ih = p->insts; + for (i = ih->next; i != ih; i = i->next) + if (i->op == CALL && i->bret != -1) + i->bret += prev; + } + prev += bret_size; + prev += align_shift(prev); + prev += 4; /* return address */ + for (v = func->rec.func.params; v; v = v->next) + v->start += prev; /* skip the whole frame to reach args */ + func->rec.func.frame_size = prev; +} + +void mips_func_begin(void) { + int fsize = func->rec.func.frame_size; + if (fsize < 0x8000) + printf("\taddiu $sp, $sp, -%d\n", fsize); + else + { + printf("\tli $%d, %d\n", reg_v0, -fsize); + printf("\taddu $sp, $sp, $%d\n", reg_v0); + } + printf("\tsw $31, %d($sp) #%s\n", fsize - 4, func->name); +} + +void mips_func_end(void) { + int fsize = func->rec.func.frame_size; + printf("_ret_%s:\n", func->name); + printf("\tlw $31, %d($sp)\n", fsize - 4); + if (fsize < 0x8000) + printf("\taddiu $sp, $sp, %d\n", fsize); + else + { + printf("\tli $%d, %d\n", reg_v0, fsize); + printf("\taddu $sp, $sp, $%d\n", reg_v0); + } + printf("\tjr $31\n"); +} + +void mips_func(void) { + CBlock_t p; + CType_t rt; + func = func_ir->func; + rt = func->rec.func.ret; + /* int arg_cnt = 0; */ + mips_space_alloc(); + if (strcmp(func->name, "main")) + printf("_func_%s:\n",func->name); + else + printf("main:\n"); + mips_func_begin(); + for (p = func_ir->entry; p; p = p->next) + { + if (p->ref) printf("_L%d:\n", p->id + func_ir->gbbase); + CInst_t i, ih = p->insts; + const char *bop; + for (i = ih->next; i != ih; i = i->next) + { + int flag = 1, swap, rimm; + if (i->dest && i->dest->reg == -2 && i->dest->kind == TMP && i->op != CALL) + continue; + printf("# "); + cinst_print(stdout, i); + switch (i->op) + { + case LOAD: + if (i->dest->kind == VAR && + i->dest->reg > 0) /* && + i->dest->info.var->loc >= 0) */ + mips_load(i->dest->reg, i->dest); + break; + case MOVE: + { + int rs; + int rd = i->dest->reg; + CType_t type = i->dest->type; + if ((type->type == CSTRUCT || type->type == CUNION) && i->dest->kind == VAR) + { + rs = mips_to_reg(i->src1, reg_v0); + /* assert(i->dest->kind == VAR); */ + printf("\tsw $%d, 4($sp)\n", rs); + if (rd < 0) rd = reg_v0; + mips_load(rd, i->dest); + printf("\tsw $%d, 0($sp)\n", rd); + printf("\tli $%d, %d\n", reg_v0, calc_size(type)); + printf("\tsw $%d, 8($sp)\n", reg_v0); + /* don't need to save current registers because + * memcpy doesn't interfere with them */ + printf("\tjal _func_memcpy\n"); + } + else + { + if (i->dest->reg > 0 && i->src1->kind == IMM) + { + printf("\tli $%d %d\n", i->dest->reg, i->src1->info.imm); + } + else + { + rs = mips_to_reg(i->src1, reg_v0); + if (rd > 0) + { + if (rd != rs) printf("\tmove $%d $%d\n", rd, rs); + } + else + rd = rs; + } + if (i->dest->reg == -1 || + (i->dest->kind == VAR && + i->dest->info.var->loc > 0)) + mips_store(rd, i->dest); + } + } + break; + case BEQ: + case BNE: + { + int rs = mips_to_reg(i->src1, reg_v0); + int rt; + const char *b = i->op == BEQ ? "beq" : "bne"; + if (i->src2->kind == IMM) + printf("\t%s $%d, %d, _L%d\n", b, rs, i->src2->info.imm, + i->dest->info.imm); + else + { + rt = mips_to_reg(i->src2, reg_v1); + printf("\t%s $%d, $%d, _L%d\n", b, rs, rt, i->dest->info.imm); + } + } + break; + case GOTO: + printf("\tj _L%d\n", i->dest->info.imm); + break; + case ARR: + { + CType_t type = i->src1->type; + int arr = mips_to_reg(i->src1, reg_v0); + int rd; + const char *l; + if (type->type == CARR) + type = type->rec.arr.elem; + else + type = type->rec.ref; + l = type->type == CCHAR ? "lb" : "lw"; + if (i->src2->kind == IMM) + { + int index = i->src2->info.imm; + rd = i->dest->reg; + if (rd < 0) rd = reg_v1; + printf("\t%s $%d, %d($%d)\n", l, rd, index, arr); + } + else + { + int index = mips_to_reg(i->src2, reg_v1); + rd = i->dest->reg; + if (rd < 0) rd = reg_v0; + printf("\taddu $%d, $%d, $%d\n", reg_v1, arr, index); + printf("\t%s $%d, 0($%d)\n", l, rd, reg_v1); + } + if (i->dest->reg == -1 || i->dest->kind == VAR) + mips_store(rd, i->dest); + } + break; + case WARR: + { + CType_t type = i->wtype; + int arr = mips_to_reg(i->dest, reg_v1); + const char *s; + int rs; + /* + if (type->type == CARR) + type = type->rec.arr.elem; + else + type = type->rec.ref; + */ + s = type->type == CCHAR ? "sb" : "sw"; + + if (type->type == CSTRUCT || type->type == CUNION) + { + if (i->src2->kind == IMM) + printf("\taddiu $%d, $%d, %d\n", reg_v1, arr, i->src2->info.imm); + else + { + int index = mips_to_reg(i->src2, reg_v0); + printf("\taddu $%d, $%d, $%d\n", reg_v1, arr, index); + } + rs = mips_to_reg(i->src1, reg_v0); + printf("\tmove $%d, $%d\n", reg_v0, rs); + printf("\tli $a0, %d\n", calc_size(type)); + mips_memcpy(); + } + else + { + if (i->src2->kind == IMM) + { + rs = mips_to_reg(i->src1, reg_v0); + if (type->type == CSTRUCT || type->type == CUNION) + { + printf("\tmove $%d, $%d\n", reg_v0, rs); + printf("\taddiu $%d, $%d, $%d\n", reg_v1, arr, i->src2->info.imm); + printf("\tli $a0, %d\n", calc_size(type)); + mips_memcpy(); + } + else + printf("\t%s $%d, %d($%d)\n", s, rs, i->src2->info.imm, arr); + } + else + { + int index = mips_to_reg(i->src2, reg_v0); + printf("\taddu $%d, $%d, $%d\n", reg_v0, arr, index); + rs = mips_to_reg(i->src1, reg_v1); + printf("\t%s $%d, 0($%d)\n", s, rs, reg_v0); + } + } + } + break; + case PUSH: + if (!i->sysp) + { + int rs = mips_to_reg(i->src1, reg_v0); + /* + if (arg_cnt < 4) + { + printf("\tmove $%d, $%d\n", arg_cnt + 4, rs); + } + else + { + */ + CType_t type = i->src1->type; + if (type && (type->type == CSTRUCT || type->type == CUNION)) + { + printf("\taddiu $%d, $sp, %d\n", reg_v1, i->offset); + printf("\tmove $%d, $%d\n", reg_v0, rs); + printf("\tli $a0, %d\n", calc_size(type)); + mips_memcpy(); + } + else + printf("\tsw $%d, %d($sp) # push\n", rs, i->offset); + /* } + arg_cnt++; */ + } + break; + case CALL: + { + COList_t p; + int rd = i->dest->reg; + int j; + memset(used_reg, 0, sizeof used_reg); + /* NOTE: bad hack */ + if (i->prev->op == PUSH && i->prev->sysp) + { + char *fname = i->src1->info.str; + int flag = 0; + int rs = mips_to_reg(i->prev->src1, 4); + if (!strcmp(fname, "__print_int")) flag = 1; + else if (!strcmp(fname, "__print_char")) flag = 11; + else if (!strcmp(fname, "__print_string")) flag = 4; + else assert(0); + if (rs != 4) printf("\tmove $4, $%d\n", rs); + printf( "\tli $2, %d\n" + "\tsyscall\n", flag); + break; + } + if (rt->type == CSTRUCT || rt->type == CUNION) + used_reg[30] = 1; /* save $fp */ + for (p = func_ir->defs; p; p = p->next) + { + COpr_t opr = p->opr; + if (opr->reg != -1 && + (opr->kind == TMP || !(opr->info.var->loc > 0)) && + overlap_with_beg(opr, i->id)) + used_reg[opr->reg] = 1; + } + for (j = 0; j < 32; j++) + if (used_reg[j]) + printf("\tsw $%d, %d($sp) # save reg\n", j, save_pos[j]); + if (i->bret != -1) + { + if (i->dest->kind == TMP) + printf("\taddiu $fp, $sp, %d\n", i->bret); + else + mips_load(30, i->dest); + } + if (i->src1->kind == IMMF) + printf("\tjal %s%s\n", + strcmp(i->src1->info.str, "main") ? "_func_" : "", + i->src1->info.str); + else + printf("\tjalr $%d\n", mips_to_reg(i->src1, reg_v0)); + for (j = 0; j < 32; j++) + if (used_reg[j]) + printf("\tlw $%d, %d($sp) # load reg\n", j, save_pos[j]); + for (p = func_ir->defs; p; p = p->next) + { + COpr_t opr = p->opr; + if (opr->reg != -1 && + opr->kind == VAR && + opr->info.var->loc > 0 && + overlap_with_beg(opr, i->id)) + mips_load(opr->reg, opr); + } + if (rd != -2) + { + if (i->bret != -1 && i->dest->kind == VAR) + { + if (rd == -1) + rd = mips_to_reg(i->dest, reg_v1); + if (reg_v1 != rd) + printf("\tmove $%d, $%d\n", reg_v1, rd); + printf("\tli $a0, %d\n", calc_size(i->dest->type)); + mips_memcpy(); + } + else + { + if (rd != -1) + printf("\tmove $%d, $%d\n", rd, reg_v0); + if (i->dest->reg == -1 || i->dest->kind == VAR) + mips_store(reg_v0, i->dest); + } + } + } + break; + case RET: + { + if (i->src1) + { + if (rt->type == CSTRUCT || rt->type == CUNION) + { + int rs = mips_to_reg(i->src1, reg_v0); + assert(i->src1->kind == VAR || i->src1->kind == TMP); + printf("\tsw $%d, 4($sp)\n", rs); + printf("\tsw $fp, 0($sp)\n"); + printf("\tli $%d, %d\n", reg_v0, calc_size(rt)); + printf("\tsw $%d, 8($sp)\n", reg_v0); + printf("\tjal _func_memcpy\n"); + printf("\tmove $%d, $fp\n", reg_v0); + } + else + { + if (i->src1->reg != -1) + { + if (i->src1->kind == IMM) + printf("\tli $%d, %d\n", reg_v0, i->src1->info.imm); + else + printf("\tmove $%d, $%d\n", reg_v0, mips_to_reg(i->src1, reg_v1)); + } + else + { + mips_to_reg(i->src1, reg_v0); + } + } + } + printf("\tj _ret_%s\n", func->name); + } + break; + case ADDR: + { + assert(i->src1->kind == VAR || + i->src1->kind == IMMF); + int rd = i->dest->reg; + if (rd < 0) rd = reg_v0; + if (i->src1->kind == IMMF) + printf("\tla $%d, %s\n", rd, i->src1->info.str); + else + { + CVar_t var = i->src1->spill->info.var; + if (var->loc > 0) + mips_global_addr(rd, var); + else + printf("\taddiu $%d, $sp, %d\n", rd, var->start); + if (i->dest->reg == -1 || i->dest->kind == VAR) + mips_store(rd, i->dest); + } + } + break; + default: flag = 0; + } + if (flag) continue; + flag = 1; + swap = rimm = 0; + switch (i->op) + { + case MUL: bop = "mul"; rimm = 1; break; + case DIV: bop = "divu"; rimm = 1; break; + case MOD: bop = "rem"; rimm = 1; break; + case ADD: bop = "addu"; rimm = swap = 1; break; + case SUB: bop = "subu"; rimm = 1; break; + case SHL: bop = "sllv"; rimm = 1; break; + case SHR: bop = "srlv"; rimm = 1; break; + case AND: bop = "and"; rimm = swap = 1; break; + case XOR: bop = "xor"; rimm = swap = 1; break; + case OR: bop = "or"; rimm = swap = 1; break; + case NOR: bop = "nor"; rimm = 1; break; + case EQ: bop = "seq"; rimm = 1; break; + case NE: bop = "sne"; rimm = 1; break; + case LT: bop = "slt"; rimm = 1; break; + case GT: bop = "sgt"; rimm = 1; break; + case LE: bop = "sle"; rimm = 1; break; + case GE: bop = "sge"; rimm = 1; break; + default: flag = 0; + } + if (flag) + { + int rs, rt; + int rd = i->dest->reg; + if (rd < 0) rd = reg_v0; + if (rimm) + { + COpr_t t; + if (swap && i->src1->kind == IMM) + { + t = i->src1; + i->src1 = i->src2; + i->src2 = t; + } + if (i->src2->kind == IMM && IN_IMM(i->src2->info.imm)) + { + switch (i->op) + { + case ADD: bop = "addiu"; break; + case AND: bop = "andi"; break; + case XOR: bop = "xori"; break; + case OR: bop = "ori"; break; + case SHL: bop = "sll"; break; + case SHR: bop = "srl"; break; + default: ; + } + rs = mips_to_reg(i->src1, reg_v0); + printf("\t%s $%d, $%d, %d\n", + bop, rd, rs, i->src2->info.imm); + } + else rimm = 0; + } + if (!rimm) + { + rs = mips_to_reg(i->src1, reg_v0); + rt = mips_to_reg(i->src2, reg_v1); + printf("\t%s $%d, $%d, $%d\n", bop, rd, rs, rt); + } + if (i->dest->reg == -1 || i->dest->kind == VAR) + mips_store(rd, i->dest); + continue; + } + if (i->op == NEG) + { + int rs = mips_to_reg(i->src1, reg_v0); + int rd = i->dest->reg; + if (rd < 0) rd = reg_v0; + printf("\tnegu $%d, $%d\n", rd, rs); + if (i->dest->reg == -1 || i->dest->kind == VAR) + mips_store(rd, i->dest); + } + } + } + mips_func_end(); +} + +void mips_generate(void) { + mips_prologue(); + for (; func_ir; func_ir = func_ir->next) + mips_func(); +} @@ -0,0 +1,7 @@ +#ifndef MIPS_C +#define MIPS_C + +void mips_prologue(void); +void mips_generate(void); + +#endif diff --git a/obsolete/dom.c b/obsolete/dom.c new file mode 100644 index 0000000..c6eaa97 --- /dev/null +++ b/obsolete/dom.c @@ -0,0 +1,155 @@ +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#define MAXN 1000 +#define MAX_HASH 1021 +typedef struct Edge Edge; +typedef struct Set Set; +typedef Set *Set_t; +typedef struct SetNode { + int val; + struct SetNode *next; +} SetNode; +struct Set { + SetNode *head[MAX_HASH]; +}; + +Set_t set_create() { + Set_t set = (Set_t)malloc(sizeof(Set)); + memset(set->head, 0, sizeof set->head); + return set; +} + +int set_belongs(Set_t set, int val) { + SetNode *p; + for (p = set->head[val % MAX_HASH]; p; p = p->next) + if (p->val == val) return 1; + return 0; +} + +void set_push(Set_t set, int val) { + SetNode *p = (SetNode*)malloc(sizeof(SetNode)); + int hv = val % MAX_HASH; + p->next = set->head[hv]; + p->val = val; + set->head[hv] = p; +} + +struct Edge { + int to; + Edge *next; +} *head[MAXN], *rhead[MAXN]; + +typedef struct DFNode DFNode; +struct DFNode { + int id; + DFNode *next; +}; + +void add_edge(int a, int b, Edge **head) { + Edge *p = (Edge*)malloc(sizeof(Edge)); + p->to = b; + p->next = head[a]; + head[a] = p; +} +int dom[MAXN], ord[MAXN], vis[MAXN], par[MAXN]; +int n, n0, m; +DFNode *df[MAXN]; +Set_t dfset[MAXN]; + +void dfs(int u, int v) { + static int ocnt = 0; + Edge *e; + if (vis[u] != -1) return; + par[u] = v; + vis[u] = 0; + for (e = head[u]; e; e = e->next) + dfs(e->to, u); + vis[u] = ocnt; + ord[ocnt++] = u; +} + +int intersect(int b1, int b2) { + while (b1 != b2) + if (b1 < b2) b1 = dom[b1]; + else b2 = dom[b2]; + return b1; +} + +int main() { + int i; + int ch = 1; + scanf("%d %d %d", &n, &m, &n0); + for (i = 0; i < m; i++) + { + int a, b; + scanf("%d %d", &a, &b); + add_edge(a, b, head); + add_edge(b, a, rhead); + } + for (i = 0; i < n; i++) + vis[i] = dom[i] = -1; + dfs(n0, -1); + for (i = 0; i < n; i++) + printf("%d ", ord[i]); puts(""); + dom[vis[n0]] = vis[n0]; + while (ch) + { + int i; + ch = 0; + for (i = n - 2; i >= 0; i--) + { + int id = ord[i]; + Edge *e = rhead[id]; + int new_idom = vis[par[id]]; + for (; e; e = e->next) + { + int p = e->to; + if (vis[p] == new_idom) continue; + if (dom[vis[p]] != -1) + new_idom = intersect(vis[p], new_idom); + } + if (dom[i] != new_idom) + { + ch = 1; + dom[i] = new_idom; + } + } + for (i = 0; i < n; i++) + printf("%d ", ord[dom[vis[i]]]); puts(""); + } + for (i = 0; i < n; i++) + { + dfset[i] = set_create(); + df[i] = NULL; + } + for (i = 0; i < n; i++) + if (rhead[i] && rhead[i]->next) + { + Edge *p = rhead[i]; + for (; p; p = p->next) + { + int runner = p->to; + while (vis[runner] != dom[vis[i]]) + { + if (!set_belongs(dfset[runner], i)) + { + DFNode *np = (DFNode*)malloc(sizeof(DFNode)); + np->id = i; + np->next = df[runner]; + set_push(dfset[runner], i); + df[runner] = np; + } + runner = ord[dom[vis[runner]]]; + } + } + } + for (i = 0; i < n; i++) + { + DFNode *p = df[i]; + printf("%d: ", i); + for (; p; p = p->next) + printf("%d ", p->id); puts(""); + } + return 0; +} diff --git a/obsolete/table_test.c b/obsolete/table_test.c new file mode 100644 index 0000000..46b9b8a --- /dev/null +++ b/obsolete/table_test.c @@ -0,0 +1,92 @@ +#include <stdlib.h> +#include <stdio.h> +#include "semantics.h" +#define PV(str) \ + do \ + { \ + if (cscope_push_var(scope, newvar(str))) \ + fprintf(stderr, "Successfully pushed var: %s\n", str); \ + else \ + fprintf(stderr, "Naming conflicts deteced: %s\n", str); \ + } while(0) + +#define PT(str) \ + do \ + { \ + if (cscope_push_type(scope, newtype(str))) \ + fprintf(stderr, "Successfully pushed type: %s\n", str); \ + else \ + fprintf(stderr, "Naming conflicts deteced: %s\n", str); \ + } while(0) + +CVar_t newvar(const char *name) { + return cvar_create(name, NULL, NULL); +} + +CType_t newtype(const char *name) { + return ctype_create(name, 0, NULL); +} + +void manual() { + CScope_t scope = cscope_create(); + PV("a"); + PV("b"); + PV("asdf"); + PV("fdsa"); + PV("hello"); + cscope_debug_print(scope); + cscope_enter(scope); + PV("a"); + PV("hello"); + PT("CType"); + cscope_debug_print(scope); + cscope_enter(scope); + PV("a"); + PV("yay"); + PV("world"); + PT("CType"); + PV("a"); + cscope_debug_print(scope); + cscope_exit(scope); + cscope_debug_print(scope); + cscope_exit(scope); + cscope_debug_print(scope); +} + +char *str_gen(int len) { + int i; + char *str = malloc(len); + for (i = 0; i < len; i++) + str[i] = rand() % 2 + 'a'; + return str; +} + +void autoforce() { + static const int max_lvl = 100, + max_push = 10; + int i, j; + CScope_t scope = cscope_create(); + for (i = 0; i < max_lvl; i++) + { + cscope_enter(scope); + int push = rand() % max_push; + for (j = 0; j < push; j++) + { + int len = rand() % 3 + 1; + int opt = rand() & 1; + if (opt) PV(str_gen(len)); + else PT(str_gen(len)); + } + } + for (i = 0; i < max_lvl; i++) + { + cscope_debug_print(scope); + cscope_exit(scope); + } +} + +int main() { +/* manual(); */ + autoforce(); + return 0; +} diff --git a/printf.c b/printf.c new file mode 100644 index 0000000..b34ade1 --- /dev/null +++ b/printf.c @@ -0,0 +1,32 @@ +int printf(char *fmt, int pos) { + char *arg, ch; + int len, x; + arg = (int)&pos; + while ((ch = *fmt)) + { + if (ch == '%') + { + ch = *(++fmt); + if (ch == 'd') + __print_int(*(int *)arg); + else if (ch == 'c') + __print_char(*(char *)arg); + else if (ch == 's') + __print_string(*(char **)arg); + else + { + x = *(int *)arg; + if (!x) len = 1; + else + for (len = 0; x; x /= 10, len++); + len = 4 - len; + while (len) __print_char('0'), len--; + __print_int(*(int *)arg); + fmt += 2; + } + arg += 4; + } + else __print_char(ch); + fmt++; + } +} diff --git a/semantics.c b/semantics.c new file mode 100644 index 0000000..e40a26a --- /dev/null +++ b/semantics.c @@ -0,0 +1,2042 @@ +#include <stdlib.h> +#include <assert.h> +#include <string.h> +#include <stdio.h> +#include "semantics.h" +#include "ast.h" + +#define NEW(type) ((type *)malloc(sizeof(type))) +#define CHECK_TYPE(p, _type) assert(p->type == _type) +#define ERROR(x) do { error_print x; } while (0) +#define WARNING(x) do { warning_print x; } while (0) + +#define NOT_IGNORE_VOID(et, ast) \ + if (et->type == CVOID) \ + do { \ + ERROR((ast, "void value not ignored as it ought to be")); \ + } while (0) + +#define INCOMP_TYPE(ast) \ + do { \ + ERROR((ast, "incompatible types when assigning")); \ + } while (0) + +/* pointer to function conversion (std 6.3.2/4) */ +/* also convert array to pointer */ +#define POINTER_CONV(t, p) \ + do { \ + if ((t)->type == CFUNC) \ + { \ + CType_t f = ctype_create("", CPTR, p); \ + f->rec.ref = t; \ + t = f; \ + } \ + else if ((t)->type == CARR) \ + { \ + CType_t a = ctype_create("", CPTR, p); \ + a->rec.ref = t->rec.arr.elem; \ + free(t); \ + t = a; \ + } \ + } while (0) + +#define CHECK_CVOID(name, ast) \ + if (typespec->type == CVOID) \ + do { \ + ERROR((ast, "variable or field '%s' declared void", name)); \ + } while (0) + +#define IS_INT(tt) ((tt) == CINT || (tt) == CCHAR) +#define IS_PTR(tt) ((tt) == CPTR || (tt) == CARR) +#define IS_SCALAR(tt) (!((tt) == CUNION || (tt) == CSTRUCT)) +#define IS_ARITH(tt) IS_INT(tt) + +extern void print_error(char *, char *, int, int, int); +extern char *load_line(int); +static char err_buff[MAX_ERROR_BUFF]; +static CType_t basic_type_int; +static CType_t basic_type_char; +static CType_t basic_type_void; +static CType_t builtin_printf; +static CType_t builtin_scanf; +static CType_t builtin_malloc; +static CType_t builtin_memcpy; +static CType_t builtin_print_int; +static CType_t builtin_print_char; +static CType_t builtin_print_string; + +CTList_t funcs; +CVList_t gvars; +CSList_t cstrs; + +static void error_print(CNode *ast, const char *fmt, ...) { + va_list args; + va_start(args, fmt); + vsprintf(err_buff, fmt, args); + print_error(err_buff, NULL, ast->loc.row, ast->loc.col, 0); + va_end(args); +} + +static void warning_print(CNode *ast, const char *fmt, ...) { + va_list args; + va_start(args, fmt); + vsprintf(err_buff, fmt, args); + print_error(err_buff, NULL, ast->loc.row, ast->loc.col, 1); + va_end(args); +} + +const char *csymbol_getname(CSymbol_t sym) { + switch (sym->kind) + { + case CVAR: return sym->rec.var->name; + case CTYPE: return sym->rec.type->name; + case CDEF: return sym->rec.def->name; + } + return NULL; +} + +CTable_t ctable_create(Hashfunc_t hfunc, Printfunc_t pfunc) { + CTable_t ct = NEW(CTable); + memset(ct->head, 0, sizeof(CTNode*) * MAX_TABLE_SIZE); + ct->hfunc = hfunc; + ct->pfunc = pfunc; + return ct; +} + +void ctable_destory(CTable_t ct) { + int i; + for (i = 0; i < MAX_TABLE_SIZE; i++) + { + CTNode *p, *np; + for (p = ct->head[i]; p; p = np) + { + np = p->next; + free(p); + } + } +} + +void *ctable_lookup(CTable_t ct, const char *key) { + unsigned int hv = ct->hfunc(key) % MAX_TABLE_SIZE; + CTNode *p = ct->head[hv]; + for (; p; p = p->next) + if (!strcmp(p->key, key)) + return p->val; + return NULL; /* not found */ +} + +int ctable_insert(CTable_t ct, const char *key, void *val, int lvl) { + unsigned int hv = ct->hfunc(key) % MAX_TABLE_SIZE; + CTNode *p = ct->head[hv], *np; + for (; p && p->lvl == lvl; p = p->next) + if (!strcmp(p->key, key)) + return 0; /* conflict */ + np = NEW(CTNode); + np->key = key; + np->val = val; + np->lvl = lvl; + np->next = ct->head[hv]; + ct->head[hv] = np; + return 1; +} + +void ctable_clip(CTable_t ct, const char *key, int max_lvl) { + unsigned int hv = ct->hfunc(key) % MAX_TABLE_SIZE; + CTNode *p = ct->head[hv], *np; + for (; p && p->lvl > max_lvl; p = np) + { + np = p->next; + free(p); + } + ct->head[hv] = p; +} + +CScope_t cscope_create(void) { + CScope_t p = NEW(CScope); + p->lvl = -1; + p->top = NULL; + p->func = NULL; + p->inside_loop = 0; + p->ids = ctable_create(bkdr_hash, csymbol_print); + p->tags = ctable_create(bkdr_hash, csymbol_print); + cscope_enter(p); + return p; +} + +static int cscope_push(CScope_t cs, CSymbol_t sym, int nspace) { + CTable_t ct = nspace == NS_ID ? cs->ids : cs->tags; +#ifdef CIBIC_DEBUG + assert(cs->top); +#endif + if (ctable_insert(ct, csymbol_getname(sym), sym, cs->lvl)) + { + CSElem *e = NEW(CSElem); + e->sym = sym; + e->next = cs->top->symlist; + cs->top->symlist = e; + return 1; + } + else return 0; /* naming conflict */ +} + +int cscope_push_var(CScope_t cs, CVar_t var, int nspace) { + CSymbol_t p = NEW(CSymbol); + p->kind = CVAR; + p->rec.var = var; + if (!cscope_push(cs, p, nspace)) + { + free(p); + return 0; + } + return 1; +} + +int cscope_push_type(CScope_t cs, CType_t type, int nspace) { + CSymbol_t p = NEW(CSymbol); + p->kind = CTYPE; + p->rec.type = type; + if (!cscope_push(cs, p, nspace)) + { + free(p); + return 0; + } + return 1; +} + +int cscope_push_def(CScope_t cs, CDef_t def, int nspace) { + CSymbol_t p = NEW(CSymbol); + p->kind = CDEF; + p->rec.def = def; + if (!cscope_push(cs, p, nspace)) + { + free(p); + return 0; + } + return 1; +} + +void cscope_enter(CScope_t cs) { + CSNode *np = NEW(CSNode); + np->next = cs->top; + np->symlist = NULL; + cs->top = np; + cs->lvl++; +} + +void cscope_exit(CScope_t cs) { + CSNode *otop = cs->top; + CSElem *p, *np; + cs->lvl--; + cs->top = otop->next; + for (p = otop->symlist; p; p = np) + { + const char *name = csymbol_getname(p->sym); + ctable_clip(cs->ids, name, cs->lvl); + ctable_clip(cs->tags, name, cs->lvl); + np = p->next; + free(p->sym); /* free CSymbol */ + free(p); /* free CSElem */ + } + free(otop); +} + +CSymbol_t cscope_lookup(CScope_t cs, const char *name, int nspace) { + if (nspace == NS_ID) + return ctable_lookup(cs->ids, name); + else + return ctable_lookup(cs->tags, name); + return NULL; +} + +unsigned int bkdr_hash(const char *str) { + unsigned int seed = 131; + unsigned int hv = 0; + while (*str) + hv = hv * seed + (unsigned)(*str++); + return hv; +} + +CVar_t cvar_create(char *name, CType_t type, CNode *ast) { + CVar_t cv = NEW(CVar); + cv->name = name; + cv->type = type; + cv->ast = ast; + cv->initr = NULL; + cv->defsite = NULL; + cv->loc = 0; + cv->weight = 0; + cv->reload = 0; + cv->cnt = 0; + return cv; +} + +CType_t ctype_create(char *name, int type, CNode *ast) { + CType_t ct = NEW(CType); + ct->name = name; + ct->type = type; + ct->ast = ast; + ct->size = -1; + return ct; +} + +int align_shift(int x) { + return ((4 - (x & 3)) & 3); +} + +int calc_size(CType_t type) { + int size = type->size; + if (size != -1) return size; + /* TODO: correct alignment */ + switch (type->type) + { + case CINT: size = INT_SIZE; break; + case CCHAR: size = CHAR_SIZE; break; + case CPTR: size = PTR_SIZE; break; + case CARR: + size = type->rec.arr.len * calc_size(type->rec.arr.elem); + break; + case CSTRUCT: + { + size = 0; + CVar_t p = type->rec.st.flist; + if (!p) return -1; + for (; p; p = p->next) + { + /* add padding to align to word boundary */ + if (p->type->type != CCHAR) + size += align_shift(size); + p->start = size; + size += calc_size(p->type); + } + } + break; + case CUNION: + { + size = 0; + CVar_t p = type->rec.st.flist; + if (!p) return -1; + for (; p; p = p->next) + { + int t = calc_size(p->type); + if (t > size) size = t; + p->start = 0; + } + } + break; + case CVOID: return -1; + case CFUNC: return 1; + } + return (type->size = size); +} + +static CType_t struct_type_merge(CType_t new, CScope_t scope) { + /* Note: we shall try to lookup first instead of pushing !! */ + CSymbol_t lu = cscope_lookup(scope, new->name, NS_TAG); + CType_t old; + if (!lu) /* create it if it does not exist */ + { + cscope_push_type(scope, new, NS_TAG); + return new; + } /* otherwise we have it */ + old = lu->rec.type; + /* it must be a struct or union */ + if (!new->rec.st.fields) /* use the old definition */ + return old; + /* otherwise it's a complete definition */ + if (cscope_push_type(scope, new, NS_TAG)) + return new; /* try to push the defintion */ + /* conflict appears */ + if (old->rec.st.fields) /* if the old one is complete */ + ERROR((new->ast, "redefinition of '%s'", new->name)); + /* otherwise incomplete, thus complete the type */ + old->rec.st = new->rec.st; + old->ast = new->ast; + free(new); + return old; +} + +static void type_merge(CType_t old, CType_t new) { + /* assume old and new are the same type */ + assert(old->type == new->type); + switch (old->type) + { + case CINT: case CCHAR: case CPTR: + case CUNION: case CSTRUCT: + break; + case CFUNC: + if (new->rec.func.params) + old->rec.func.params = new->rec.func.params; + if (new->rec.func.body) + { + old->rec.func.local = new->rec.func.local; + old->rec.func.body = new->rec.func.body; + } + break; + default: assert(0); + } +} + +int is_same_type(CType_t typea, CType_t typeb) { + if (typea == typeb) return 1; + if (typea->type != typeb->type) return 0; + switch (typea->type) + { + case CSTRUCT: case CUNION: + return typea == typeb; + case CARR: + if (typea->rec.arr.len != typeb->rec.arr.len) + return 0; + return is_same_type(typea->rec.arr.elem, typeb->rec.arr.elem); + case CPTR: + return is_same_type(typea->rec.ref, typeb->rec.ref); + case CFUNC: + { + CVar_t pa = typea->rec.func.params, + pb = typeb->rec.func.params; + if ((pa || typea->rec.func.body) && + (pb || typeb->rec.func.body)) + { + for (;pa && pb; pa = pa->next, pb = pb->next) + if (!is_same_type(pa->type, pb->type)) + return 0; + if (pa || pb) + return 0; /* different number of parameters */ + } + return is_same_type(typea->rec.func.ret, typeb->rec.func.ret); + } + case CINT: case CCHAR: case CVOID: break; + } + return 1; +} + +static CVar_t var_merge(CVar_t new, CScope_t scope) { + CVar_t old; + if (cscope_push_var(scope, new, NS_ID)) + return new; + else + old = cscope_lookup(scope, new->name, NS_ID)->rec.var; + if (!is_same_type(old->type, new->type)) + ERROR((new->ast, "conflicting types of '%s'", new->name)); + else if (scope->lvl) + ERROR((new->ast, "redeclaration of '%s' with no linkage", new->name)); + type_merge(old->type, new->type); + free(new); + return old; +} + +void semantics_fields(CNode *, CType_t, CScope_t scope); +CType_t semantics_typespec(CNode *p, CScope_t scope) { + CHECK_TYPE(p, TYPE_SPEC); + CType_t type; + switch (p->rec.subtype) + { + case KW_VOID: + type = ctype_create("", CVOID, p); break; + case KW_CHAR: + type = ctype_create("", CCHAR, p); break; + case KW_INT: + type = ctype_create("", CINT, p); break; + case KW_STRUCT: case KW_UNION: + { + CNode *id = p->chd, + *fields = p->chd->next; + type = ctype_create(id->type == NOP ? "" : id->rec.strval, + p->rec.subtype == KW_STRUCT ? CSTRUCT : CUNION, + p); + if (fields->type == NOP) + type->rec.st.fields = NULL; /* incomplete type */ + else + semantics_fields(fields, type, scope); + if (id->type != NOP) + type = struct_type_merge(type, scope); + } + break; + case USER_TYPE: + { + CHECK_TYPE(p->chd, ID); + CSymbol_t lu = cscope_lookup(scope, p->chd->rec.strval, NS_ID); + assert(lu && lu->kind == CDEF); /* parser guarantees this */ + type = lu->rec.def->type; + } + break; + default: assert(0); + } + return type; +} + +int type_is_complete(CType_t type) { + switch(type->type) + { + case CINT: case CCHAR: + /* basic types are always complete */ + case CPTR: + /* pointer may point to an incomplete type */ + case CARR: + /* syntax of incomplete arrays is not allowed in `cibic.y` */ + return 1; + case CSTRUCT: case CUNION: + /* fields are guaranteed to be complete if exists, due to + * `semantics_fields` */ + return type->rec.st.fields != NULL; + case CVOID: + /* void type is never complete */ + return 0; + case CFUNC: + /* function body is not required here, it is checked in the last + * phase */ + return 1; + default: assert(0); + } + return 1; +} + +CVar_t semantics_declr(CNode *, CType_t, CScope_t, int); +CVar_t semantics_pdecl(CNode *p, CScope_t scope) { + CHECK_TYPE(p, PLAIN_DECL); + CVar_t var = semantics_declr(p->chd->next, + semantics_typespec(p->chd, scope), + scope, 0); + return var; +} + +CVar_t semantics_params(CNode *p, CScope_t scope) { + CHECK_TYPE(p, PARAMS); + static CVar dummy; + CVar_t params = &dummy, tail = params; + CTable_t ct; + if (!(p = p->chd)) return NULL; /* no parameters */ + ct = ctable_create(bkdr_hash, ctable_cvar_print); + for (; p; p = p->next) + { + CVar_t var = semantics_pdecl(p, scope); + POINTER_CONV(var->type, p); + if (scope) /* params inside a function definition */ + if (!ctable_insert(ct, var->name, var, 0)) + ERROR((var->ast, "redefinition of parameter '%s'", var->name)); + tail->next = var; + tail = var; + } + ctable_destory(ct); + tail->next = NULL; + return params->next; +} + +ExpType semantics_exp(CNode *, CScope_t); +CVar_t semantics_declr(CNode *p, CType_t typespec, CScope_t scope, int flag) { + CVar_t type; + if (p->type == ID) + { + if (!(flag & FLAG_FUNC_CHK)) CHECK_CVOID(p->rec.strval, p); + return cvar_create(p->rec.strval, typespec, p); + } + if (p->type == NOP) /* type name */ + return cvar_create(NULL, typespec, p); + switch (p->rec.subtype) + { + case DECLR_FUNC: + { + CType_t func = ctype_create("", CFUNC, p); /* function declr */ + if (flag & FLAG_FUNC_DEF) /* function def */ + func->rec.func.params = semantics_params(p->chd->next, scope); + else /* function declaration */ + { + cscope_enter(scope); + func->rec.func.params = semantics_params(p->chd->next, scope); + cscope_exit(scope); + /* incomplete type */ + func->rec.func.local = NULL; + func->rec.func.body = NULL; /* not a definition */ + } + func->rec.func.ret = typespec; /* might be an incomplete type */ + type = semantics_declr(p->chd, func, scope, flag | FLAG_FUNC_CHK); + if (typespec->type == CARR) + ERROR((p, "'%s' declared as function returning an array", + type->name)); + if (typespec->type == CFUNC) + ERROR((p, "'%s' declared as function returing a function", + type->name)); + } + break; + case DECLR_ARR: + { + CType_t arr = ctype_create("", CARR, p); /* array declr */ + CNode *rch = p->chd->next; + ExpType tl = semantics_exp(rch, scope); + if (calc_size(typespec) == -1) + ERROR((p, "array type has incomplete element type")); + if (!rch->ext.is_const) + ERROR((p, "size of array must be a constant")); + if (!IS_INT(tl.type->type)) + ERROR((p, "size of array has non-integer type")); + arr->rec.arr.elem = typespec; + arr->rec.arr.len = rch->ext.const_val; + type = semantics_declr(p->chd, arr, scope, 0); + } + break; + case '*': + { + CType_t ptr = ctype_create("", CPTR, p); /* pointer */ + ptr->rec.ref = typespec; + type = semantics_declr(p->chd, ptr, scope, 0); + } + break; + default: assert(0); + } + return type; +} + +void semantics_fields(CNode *p, CType_t type, CScope_t scope) { + CTable_t ct = ctable_create(bkdr_hash, ctable_cvar_print); + type->rec.st.fields = ct; + type->rec.st.flist = NULL; + for (p = p->chd; p; p = p->next) + { + CNode *declr = p->chd->next->chd; + for (; declr; declr = declr->next) + { + CVar_t var = semantics_declr(declr, + semantics_typespec(p->chd, scope), + scope, 0); + if (var->type->type == CFUNC) + ERROR((var->ast, "field '%s' declared as a function", var->name)); + /* types of fields are supposed to be complete */ + if (calc_size(var->type) == -1) + ERROR((var->ast, "field '%s' has incomplete type", var->name)); + if (!ctable_insert(ct, var->name, var, 0)) + ERROR((p, "duplicate member '%s'", var->name)); + var->next = type->rec.st.flist; + type->rec.st.flist = var; + } + } +} + +static void exp_check_aseq_(CType_t lhs, CType_t rhs, CNode *ast) { + NOT_IGNORE_VOID(lhs, ast); + NOT_IGNORE_VOID(rhs, ast); + switch (lhs->type) + { + case CSTRUCT: case CUNION: + if (!is_same_type(lhs, rhs)) + INCOMP_TYPE(ast); + break; + case CARR: case CFUNC: /* constant */ + INCOMP_TYPE(ast); + break; + case CINT: case CCHAR: + switch (rhs->type) + { + case CINT: case CCHAR: + ; break; /* ok */ + case CPTR: case CARR: + WARNING((ast, "assignment makes integer from pointer without a cast")); + break; + default: INCOMP_TYPE(ast); + } + break; + case CPTR: + switch (rhs->type) + { + case CPTR: case CARR: + if (!is_same_type(lhs->rec.ref, rhs->rec.ref)) + WARNING((ast, "assignment from incompatible pointer type")); + break; + case CINT: case CCHAR: + WARNING((ast, "assignment makes pointer from integer without a cast")); + break; + default: INCOMP_TYPE(ast); + } + break; + default: assert(0); + } +} + +void semantics_initr(CNode *p, CScope_t scope, CType_t type) { + switch (p->rec.subtype) + { + case INITR_NORM: + { + ExpType et = semantics_exp(p->chd, scope); + if (!scope->lvl && !p->chd->ext.is_const) /* in global scope */ + ERROR((p->chd, "initializer element is not constant")); + exp_check_aseq_(type, et.type, p); + } + break; + case INITR_ARR: + { + if (type->type == CARR) + type = type->rec.arr.elem; + else + ERROR((p, "invalid initializer")); + for (p = p->chd; p; p = p->next) + semantics_initr(p, scope, type); + } + break; + } +} + +void semantics_typedef(CNode *p, CType_t type, CScope_t scope) { + CNode *declr = p->chd->next; + for (p = declr->chd; p; p = p->next) + { + CVar_t var = semantics_declr(p, type, scope, 0); + CDef_t def = cdef_create(var->name, var->type, var->ast); + if (!cscope_push_def(scope, def, NS_ID)) + { + CSymbol_t lu = cscope_lookup(scope, def->name, NS_ID); + if (lu->kind != CDEF) + ERROR((def->ast, "'%s' redeclared as different kind of symbol", def->name)); + /* FIXME: `typedef int a()` is different from typedef `int a(int)` */ + if (!is_same_type(lu->rec.def->type, def->type)) + ERROR((def->ast, "conflicting types of '%s'", def->name)); + } + } +} + +CVar_t semantics_decl(CNode *p, CScope_t scope) { + CNode *declr = p->chd->next; + CType_t type = semantics_typespec(p->chd, scope); + CVar_t res = NULL; + int useful = 0; + if ((type->type == CSTRUCT || type->type == CUNION) && + (*type->name) != '\0') + { + cscope_push_type(scope, type, NS_TAG); + useful = 1; + } + if (p->type == TYPEDEF) + { + semantics_typedef(p, type, scope); + return NULL; + } + CHECK_TYPE(p, DECL); + if (declr->chd->type != NOP) + { + CNode *p; + for (p = declr->chd; p; p = p->next) + { + CNode *initr = p->chd->next; + CVar_t var = semantics_declr(p->chd, type, scope, 0); + if (var->type->type == CFUNC) + { + CType_t func = var->type; + CSymbol_t lu; + func->name = var->name; + if (initr->type == INITR) + ERROR((var->ast, "function '%s' is initialized like a variable", func->name)); + if (!cscope_push_type(scope, func, NS_ID)) + { + lu = cscope_lookup(scope, func->name, NS_ID); + if (lu->kind != CTYPE) + ERROR((func->ast, "'%s' redeclared as different kind of symbol", func->name)); + if (!is_same_type(lu->rec.type, func)) + ERROR((func->ast, "conflicting types of '%s'", func->name)); + type_merge(lu->rec.type, func); + } + } + else + { + if (scope->lvl && calc_size(var->type) == -1) + ERROR((var->ast, "storage size of '%s' isn’t known", var->name)); + var = var_merge(var, scope); + var->next = res; + res = var; + /* check initializer */ + if (initr->type == INITR) + { + var->initr = initr; + semantics_initr(initr, scope, var->type); + } + } + } + useful = 1; + } + if (!useful) + WARNING((type->ast, "useless declaration")); + return res; +} + +ExpType exp_check_aseq(ExpType lhs, ExpType rhs, CNode *ast) { + exp_check_aseq_(lhs.type, rhs.type, ast); + lhs.lval = 0; + return lhs; +} + +ExpType semantics_cast(CNode *p, CScope_t scope) { + CNode *chd = p->chd->next; + ExpType op = semantics_exp(chd, scope); + CVar_t var = semantics_declr(p->chd->chd->next, + semantics_typespec(p->chd->chd, scope), + scope, 0); + CType_t type = var->type; + free(var); + if (!IS_SCALAR(type->type)) + ERROR((p, "conversion to non-scalar type requested")); + if (!IS_SCALAR(op.type->type)) + ERROR((p, "aggregate value used where a scalar was expected")); + if (type->type == CARR) + ERROR((p, "cast specifies array type")); + if (type->type == CFUNC) + ERROR((p, "cast specifies function type")); + type->ast = p; + op.type = type; + op.lval = 0; + if ((p->ext.is_const &= !IS_INT(type->type))) + { + p->ext.const_val = chd->ext.const_val; + } + return op; +} + +ExpType exp_check_arith(ExpType op1, ExpType op2, CNode *p, char kind) { + CNode *lch = p->chd, + *rch = lch->next; + int t1 = op1.type->type, + t2 = op2.type->type; + ExpType res; + NOT_IGNORE_VOID(op1.type, p); + NOT_IGNORE_VOID(op2.type, p); + res.lval = 0; + res.type = basic_type_int; + if ((p->ext.is_const = lch->ext.is_const && rch->ext.is_const)) + { + long l = lch->ext.const_val, + r = rch->ext.const_val, + *a = &(p->ext.const_val); + switch (kind) + { + case '*': *a = l * r; break; + case '/': *a = l / r; break; + case '%': *a = l % r; break; + } + } + if (!(IS_ARITH(t1) && IS_ARITH(t2))) + ERROR((p, "invalid operands to binary operator")); + return res; +} + +ExpType exp_check_bitwise(ExpType op1, ExpType op2, CNode *p, char kind) { + CNode *lch = p->chd, + *rch = lch->next; + int t1 = op1.type->type, + t2 = op2.type->type; + ExpType res; + NOT_IGNORE_VOID(op1.type, p); + NOT_IGNORE_VOID(op2.type, p); + res.lval = 0; + res.type = basic_type_int; + if ((p->ext.is_const = lch->ext.is_const && rch->ext.is_const)) + { + long l = lch->ext.const_val, + r = rch->ext.const_val, + *a = &(p->ext.const_val); + switch (kind) + { + case 'l': *a = l << r; break; + case 'r': *a = l >> r; break; + case '&': *a = l & r; break; + case '|': *a = l | r; break; + case '^': *a = l ^ r; break; + } + } + if (!(IS_INT(t1) && IS_INT(t2))) + ERROR((p, "invalid operands to binary operator")); + return res; +} + +ExpType exp_check_add(ExpType op1, ExpType op2, CNode *p, char kind) { + CNode *lch = p->chd, + *rch = lch->next; + int t1 = op1.type->type, + t2 = op2.type->type; + NOT_IGNORE_VOID(op1.type, p); + NOT_IGNORE_VOID(op2.type, p); + if (kind == '+' && IS_PTR(t2)) + { + /* place the pointer type in the first place */ + int t = t1; + ExpType te = op1; + + t1 = t2; + t2 = t; + + op1 = op2; + op2 = te; + + CNode *n1 = p->chd; + CNode *n2 = n1->next; + n2->next = n1; + n1->next = NULL; + p->chd = n2; + } + if (t1 == CPTR && calc_size(op1.type->rec.ref) == -1) + ERROR((p, "invalid use of undefined type")); + if (t2 == CPTR && calc_size(op2.type->rec.ref) == -1) + ERROR((p, "invalid use of undefined type")); + if (kind == '-') + { + if (IS_PTR(t2) && !IS_PTR(t1)) + ERROR((p, "invalid operands to binary operator")); + } + else + { + if (!((IS_INT(t1) || IS_PTR(t1)) && IS_INT(t2))) + ERROR((p, "invalid operands to binary operator")); + } + if ((p->ext.is_const = lch->ext.is_const && rch->ext.is_const)) + { + long r = rch->ext.const_val, + *a = &(p->ext.const_val); + if (t1 == CARR) + { + int l = p->chd->ext.offset; + CType_t type; + p->ext.var = p->chd->ext.var; + if (t1 == CPTR) type = op1.type->rec.ref; + else type = op1.type->rec.arr.elem; + r *= calc_size(type); + switch (kind) + { + case '+': p->ext.offset = l + r; break; + case '-': p->ext.offset = l - r; break; + } + } + else + { + int l = lch->ext.const_val; + switch (kind) + { + case '+': *a = l + r; break; + case '-': *a = l - r; break; + } + } + } + op1.lval = 0; + return op1; /* int or pointer */ +} + +ExpType exp_check_int(ExpType op1, CNode *p) { + if (!IS_INT(op1.type->type)) + ERROR((p, "wrong type argument to unary operator")); + op1.lval = 0; + return op1; +} + +ExpType exp_check_scalar(ExpType op1, CNode *p) { + if (!IS_SCALAR(op1.type->type)) + ERROR((p, "wrong type argument to unary operator")); + op1.lval = 0; + return op1; +} + +ExpType exp_check_deref(ExpType op1, CNode *p) { + if (!IS_PTR(op1.type->type)) + ERROR((p, "invalid type argument of unary '*'")); + if (op1.type->rec.ref->type == CFUNC) + return op1; + op1.lval = 1; /* deref changes exp to lval */ + if (calc_size(op1.type = op1.type->rec.ref) == -1) + ERROR((p, "dereferencing pointer to incomplete type")); + return op1; +} + +ExpType exp_check_ref(ExpType op1, CNode *p) { + ExpType res; + CType_t t = op1.type; + if (t->type == CARR || (t->type == CPTR && t->rec.ref->type == CFUNC)) + return op1; + if (!op1.lval) + ERROR((p, "lvalue required as unary '&' operand")); + /* TODO: constant pointer folding */ + p->ext.is_const = 0; + /* should not be constant */ + res.lval = 0; + res.type = ctype_create("", CPTR, p); + res.type->rec.ref = op1.type; + return res; +} + +ExpType exp_check_sizeof(CNode *p, CScope_t scope) { + ExpType res, sub; + if (p->chd->type == DECLR) + { + CVar_t abs_declr = semantics_declr( + p->chd->chd->next, + semantics_typespec(p->chd->chd, scope), + scope, 0); + sub.type = abs_declr->type; + free(abs_declr); + } + else + sub = semantics_exp(p->chd, scope); + res.lval = 0; + res.type = basic_type_int; + p->ext.const_val = calc_size(sub.type); + p->ext.is_const = 1; + return res; +} + +ExpType exp_check_inc(ExpType op1, CNode *p) { + if (!IS_SCALAR(op1.type->type)) + ERROR((p, "wrong type argument to increment/decrement")); + if (!op1.lval) + ERROR((p, "lvalue required as increment/decrement operand")); + op1.lval = 0; + return op1; +} + +ExpType exp_check_logical(ExpType op1, ExpType op2, CNode *p, char kind) { + CNode *lch = p->chd, + *rch = lch->next; + int t1 = op1.type->type, + t2 = op2.type->type; + ExpType res; + NOT_IGNORE_VOID(op1.type, p); + NOT_IGNORE_VOID(op2.type, p); + res.lval = 0; + res.type = basic_type_int; + + if ((p->ext.is_const = lch->ext.is_const && rch->ext.is_const)) + { + long l = lch->ext.const_val, + r = rch->ext.const_val, + *a = &(p->ext.const_val); + switch (kind) + { + case '&': *a = l && r; break; + case '|': *a = l || r; break; + } + } + + if (!(IS_SCALAR(t1) && IS_SCALAR(t2))) + ERROR((p, "invalid operands to binary operator")); + return res; +} + +ExpType exp_check_ass(ExpType lhs, ExpType rhs, CNode *p) { + NOT_IGNORE_VOID(lhs.type, p); + NOT_IGNORE_VOID(rhs.type, p); + if (!lhs.lval) + ERROR((p, "lvalue required as left operand of assignment")); + switch (p->rec.subtype) + { + case '=' : return exp_check_aseq(lhs, rhs, p); + case ASS_MUL: return exp_check_aseq(lhs, exp_check_arith(lhs, rhs, p, '*'), p); + case ASS_DIV: return exp_check_aseq(lhs, exp_check_arith(lhs, rhs, p, '/'), p); + case ASS_MOD: return exp_check_aseq(lhs, exp_check_arith(lhs, rhs, p, '%'), p); + + case ASS_ADD: return exp_check_aseq(lhs, exp_check_add(lhs, rhs, p, '+'), p); + case ASS_SUB: return exp_check_aseq(lhs, exp_check_add(lhs, rhs, p, '-'), p); + + case ASS_SHL: return exp_check_aseq(lhs, exp_check_bitwise(lhs, rhs, p, 'l'), p); + case ASS_SHR: return exp_check_aseq(lhs, exp_check_bitwise(lhs, rhs, p, 'r'), p); + case ASS_AND: return exp_check_aseq(lhs, exp_check_bitwise(lhs, rhs, p, '&'), p); + case ASS_XOR: return exp_check_aseq(lhs, exp_check_bitwise(lhs, rhs, p, '^'), p); + case ASS_OR: return exp_check_aseq(lhs, exp_check_bitwise(lhs, rhs, p, '|'), p); + default: assert(0); + } +} + +ExpType exp_check_equality(ExpType op1, ExpType op2, CNode *p, int kind) { + CNode *lch = p->chd, + *rch = lch->next; + int t1 = op1.type->type, + t2 = op2.type->type; + ExpType res; + NOT_IGNORE_VOID(op1.type, p); + NOT_IGNORE_VOID(op2.type, p); + res.lval = 0; + res.type = basic_type_int; + if ((p->ext.is_const = lch->ext.is_const && rch->ext.is_const)) + { + long l = lch->ext.const_val, + r = rch->ext.const_val, + *a = &(p->ext.const_val); + switch (kind) + { + case OPT_EQ: *a = l == r; break; + case OPT_NE: *a = l != r; break; + case '>': *a = l > r; break; + case '<': *a = l < r; break; + case OPT_LE: *a = l <= r; break; + case OPT_GE: *a = l >= r; break; + } + } + if (IS_ARITH(t1) && IS_ARITH(t2)) + return res; + if (!(IS_SCALAR(t1) && IS_SCALAR(t2))) + ERROR((p, "invalid operands to binary operator")); + if (IS_PTR(t1) && IS_PTR(t2)) + { + if (!is_same_type(op1.type->rec.ref, op2.type->rec.ref)) + WARNING((p, "comparison of distinct pointer types lacks a cast")); + } + else if (IS_PTR(t1) || IS_PTR(t2)) + WARNING((p, "comparison between pointer and integer")); + return res; +} + +ExpType exp_check_postfix(CNode *p, CScope_t scope) { + CNode *post = p->chd->next; + ExpType op1 = semantics_exp(p->chd, scope), op2; + int t1 = op1.type->type, t2; + switch (post->rec.subtype) + { + case POSTFIX_ARR: + if (!IS_PTR(t1)) + ERROR((p, "subscripted value is neither array nor pointer")); + op2 = semantics_exp(post->chd, scope); + t2 = op2.type->type; + if (!IS_INT(t2)) + ERROR((p, "array subscript is not an integer")); + p->ext.is_const = 0; + if (t1 == CARR) + op1.type = op1.type->rec.arr.elem; + else + op1.type = op1.type->rec.ref; + op1.lval = 1; + break; + case POSTFIX_CALL: + if (!(t1 == CPTR && op1.type->rec.ref->type == CFUNC)) + ERROR((p, "called object is not a function")); + { + CNode *arg = post->chd->chd, *t; + CType_t func = p->chd->ext.type; + CVar_t param; + /* pointer to function */ + if (func->type == CPTR) func = func->rec.ref; + for (t = arg; t; t = t->next) + semantics_exp(t, scope); + if ((param = func->rec.func.params)) + { + for (; arg && param; + arg = arg->next, param = param->next) + exp_check_aseq_(param->type, arg->ext.type, arg); + if (arg || param) + ERROR((p, "too many/few arguments to the function")); + } + op1.type = func->rec.func.ret; + op1.lval = 0; + break; + } + case POSTFIX_DOT: + if (!(t1 == CSTRUCT || t1 == CUNION)) + ERROR((p, "request for the member in something not a structure or union")); + { + if (calc_size(op1.type) == -1) + ERROR((op1.type->ast, "invalid use of undefined type '%s'", op1.type->name)); + CVar_t fv = ctable_lookup(op1.type->rec.st.fields, + post->chd->rec.strval); + if (!fv) + ERROR((p, "struct/union has no member named '%s'", post->chd->rec.strval)); + p->ext.var = NULL; + p->ext.offset = fv->start; + op1.type = fv->type; + } + break; + case POSTFIX_PTR: + if (t1 != CPTR) + ERROR((p, "invalid type argument of '->'")); + { + CType_t tref = op1.type->rec.ref; + if (!(tref->type == CSTRUCT || tref->type == CUNION)) + ERROR((p, "request for the member in something not a structure or union")); + if (!tref->rec.st.fields) + ERROR((p, "dereferencing pointer to incomplete type")); + calc_size(tref); + CVar_t fv = ctable_lookup(tref->rec.st.fields, + post->chd->rec.strval); + if (!fv) + ERROR((p, "struct/union has no member named '%s'", post->chd->rec.strval)); + p->ext.var = fv; + p->ext.offset = fv->start; + op1.type = fv->type; + op1.lval = 1; + } + break; + case OPT_INC: case OPT_DEC: + op1 = exp_check_inc(op1, p); + break; + default: assert(0); + } + return op1; +} + +ExpType semantics_exp(CNode *p, CScope_t scope) { + ExpType res; + switch (p->type) + { + case ID: + { + CSymbol_t lu = cscope_lookup(scope, p->rec.strval, NS_ID); + if (!lu) ERROR((p, "'%s' undeclared", p->rec.strval)); + if (lu->kind == CVAR) + { + p->ext.var = lu->rec.var; + res.type = p->ext.var->type; + res.lval = res.type->type != CARR; + } + else + { + p->ext.type = lu->rec.type; + res.type = p->ext.type; + res.lval = res.type->type == CFUNC; + POINTER_CONV(res.type, p); + p->ext.var = cvar_create(p->ext.type->name, res.type, NULL); + } + p->ext.is_const = 0; /* res.type->type == CARR || + res.type->type == CFUNC; */ + } + break; + case INT: + res.type = basic_type_int; + res.lval = 0; + p->ext.is_const = 1; + p->ext.const_val = p->rec.intval; + break; + case CHAR: + res.type = basic_type_char; + res.lval = 0; + p->ext.is_const = 1; + { + char *val = p->rec.strval; + int intval; + int len = strlen(val); + if (*val == '\\') + { + if (len == 2) + switch (val[1]) + { + case 'a': intval = '\a'; break; + case 'b': intval = '\b'; break; + case 'f': intval = '\f'; break; + case 'n': intval = '\n'; break; + case 'r': intval = '\r'; break; + case 't': intval = '\t'; break; + case 'v': intval = '\v'; break; + case '\\': intval = '\\'; break; + case '\'': intval = '\''; break; + case '"': intval = '\"'; break; + case '\?': intval = '\?'; break; + case '0': intval = '\0'; break; + default: + ERROR((p, "unknown escape sequence")); + } + else + { + switch (val[1]) + { + case '0': + sscanf(val + 2, "%o", &intval); + break; + case 'x': + sscanf(val + 2, "%x", &intval); + break; + default: + ERROR((p, "unknown escape sequence")); + } + } + } + else + intval = *val; + p->ext.const_val = intval; + } + break; + case STR: + { + CType_t type = ctype_create("", CPTR, NULL); + CSList_t cstr = NEW(CSList); + + cstr->str = p->rec.strval; + (cstr->prev = cstrs->prev)->next = cstr; + (cstr->next = cstrs)->prev = cstr; + cstr->id = cstr->prev->id + 1; + /* cstrs = cstr; */ + + type->rec.ref = basic_type_char; + res.type = type; + res.lval = 0; + p->ext.is_const = 1; + p->ext.const_val = (uintptr_t)cstr; + } + break; + case EXP: + { + ExpType op1; + ExpType op2; + switch (p->rec.subtype) + { + case EXP_CAST: + res = semantics_cast(p, scope); + break; + case EXP_POSTFIX: + res = exp_check_postfix(p, scope); + break; + case KW_SIZEOF: + res = exp_check_sizeof(p, scope); + break; + default: + { + op1 = semantics_exp(p->chd, scope); + if (p->chd->next) + op2 = semantics_exp(p->chd->next, scope); + switch (p->rec.subtype) + { + /* following cases are binary expressions */ + case ',': + res = op2; + res.lval = 0; + break; + case '=' : + case ASS_MUL: + case ASS_DIV: + case ASS_MOD: + case ASS_ADD: + case ASS_SUB: + case ASS_SHL: + case ASS_SHR: + case ASS_AND: + case ASS_XOR: + case ASS_OR: + res = exp_check_ass(op1, op2, p); + break; + case OPT_OR: + res = exp_check_logical(op1, op2, p, '|'); + break; + case OPT_AND: + res = exp_check_logical(op1, op2, p, '&'); + break; + case OPT_SHL: + res = exp_check_bitwise(op1, op2, p, 'l'); + break; + case OPT_SHR: + res = exp_check_bitwise(op1, op2, p, 'r'); + break; + case '|': + case '^': + res = exp_check_bitwise(op1, op2, p, p->rec.subtype); + break; + case OPT_EQ: + case OPT_NE: + case '<': + case '>' : + case OPT_LE: + case OPT_GE: + res = exp_check_equality(op1, op2, p, p->rec.subtype); + break; + case '/': + case '%': + res = exp_check_arith(op1, op2, p, p->rec.subtype); + break; + case '&': + if (p->chd->next) + res = exp_check_bitwise(op1, op2, p, '&'); + else + res = exp_check_ref(op1, p); + break; + case '*': + if (p->chd->next) + res = exp_check_arith(op1, op2, p, '*'); + else + res = exp_check_deref(op1, p); + break; + case '+': + if (p->chd->next) + res = exp_check_add(op1, op2, p, '+'); + else + { + res = op1; + res.lval = 0; + } + break; + case '-': + if (p->chd->next) + res = exp_check_add(op1, op2, p, '-'); + else + { + res = op1; + res.lval = 0; + } + break; + case '~': + res = exp_check_int(op1, p); + break; + case '!': + res = exp_check_scalar(op1, p); + break; + case OPT_INC: case OPT_DEC: + res = exp_check_inc(op1, p); + break; + default: + printf("%d\n", p->rec.subtype); + assert(0); + } + } + } + } + break; + case NOP: ; break; + default: assert(0); + } + p->ext.type = res.type; + return res; +} + +CVar_t semantics_stmt(CNode *p, CScope_t scope); + +CVar_t semantics_if(CNode *p, CScope_t scope) { + ExpType exp = semantics_exp(p->chd, scope); + CNode *body1 = p->chd->next, + *body2 = body1->next; + CVar_t res; + if (!IS_SCALAR(exp.type->type)) + ERROR((p->chd, "a scalar is required in 'if' condition")); + res = semantics_stmt(body1, scope); + if (body2->type != NOP) + { + CVar_t h, t; + if ((h = semantics_stmt(p->chd->next->next, scope))) + { + for (t = h; t->next; t = t->next); + t->next = res; + res = h; + } + } + return res; +} + +CVar_t semantics_for(CNode *p, CScope_t scope) { + ExpType exp = semantics_exp(p->chd->next, scope); + semantics_exp(p->chd, scope); + semantics_exp(p->chd->next->next, scope); + CVar_t res; + if (p->chd->next->type != NOP && !IS_SCALAR(exp.type->type)) + ERROR((p->chd->next, "a scalar is required in 'for' condition")); + scope->inside_loop++; + res = semantics_stmt(p->chd->next->next->next, scope); + scope->inside_loop--; + return res; +} + +CVar_t semantics_while(CNode *p, CScope_t scope) { + ExpType exp = semantics_exp(p->chd, scope); + CVar_t res; + if (!IS_SCALAR(exp.type->type)) + ERROR((p->chd, "a scalar is required in 'while' condition")); + scope->inside_loop++; + res = semantics_stmt(p->chd->next, scope); + scope->inside_loop--; + return res; +} + +CVar_t semantics_check_loop(CNode *p, CScope_t scope, const char *stmt_name) { + if (!scope->inside_loop) + ERROR((p, "%s statement not within a loop", stmt_name)); + return NULL; +} +CVar_t semantics_return(CNode *p, CScope_t scope) { + assert(scope->func); + CType_t rt = scope->func->rec.func.ret; + if (p->chd->type != NOP) + { + ExpType t = semantics_exp(p->chd, scope); + if (rt->type == CVOID) + { + if (t.type->type != CVOID) + WARNING((p->chd, "'return' with a value, in function returning void")); + } + else + exp_check_aseq_(rt, p->chd->ext.type, p->chd); + } + return NULL; +} + +CVar_t semantics_comp(CNode *, CScope_t); +CVar_t semantics_stmt(CNode *p, CScope_t scope) { + CHECK_TYPE(p, STMT); + switch (p->rec.subtype) + { + case STMT_EXP: + semantics_exp(p->chd, scope); + break; + case STMT_COMP: + { + CVar_t res; + cscope_enter(scope); + res = semantics_comp(p, scope); + cscope_exit(scope); + return res; + } + case STMT_IF: + return semantics_if(p, scope); + case STMT_FOR: + return semantics_for(p, scope); + case STMT_WHILE: + return semantics_while(p, scope); + case STMT_CONT: + return semantics_check_loop(p, scope, "continue"); + case STMT_BREAK: + return semantics_check_loop(p, scope, "break"); + case STMT_RET: + return semantics_return(p, scope); + default: assert(0); + } + return NULL; +} + +CVar_t semantics_comp(CNode *p, CScope_t scope) { + CNode *decls = p->chd, + *stmts = p->chd->next, *i; + CVar_t res = NULL; + if (decls->chd->type != NOP) + { + CVList autos, *tail = &autos; + autos.next = NULL; + for (i = decls->chd; i; i = i->next) + { + CVar_t vlist = semantics_decl(i, scope); + CVList_t sa = NULL, a; + if (vlist) /* collect local vars */ + { + CVar_t v; + for (v = vlist; v->next; v = v->next) + { + a = NEW(CVList); + a->var = v; + a->next = sa; + sa = a; + } + a = NEW(CVList); + a->var = v; + a->next = sa; + sa = a; + v->next = res; + res = vlist; + } + if (sa) + { + tail->next = sa; + for (tail = sa; tail->next; tail = tail->next); + } + } + p->ext.autos = autos.next; + } + if (stmts->chd->type != NOP) + for (i = stmts->chd; i; i = i->next) + { + CVar_t vlist = semantics_stmt(i, scope); + if (vlist) /* collect nested local vars */ + { + CVar_t p; + for (p = vlist; p->next; p = p->next); + p->next = res; + res = vlist; + } + } + return res; +} + +CType_t semantics_func(CNode *p, CScope_t scope) { + CHECK_TYPE(p, FUNC_DEF); + CVar_t head; + CType_t func, efunc = NULL, rt; + cscope_enter(scope); /* enter function local scope */ + head = semantics_declr(p->chd->next, + semantics_typespec(p->chd, scope), + scope, FLAG_FUNC_DEF); + func = head->type; + rt = func->rec.func.ret; + if (rt->type != CVOID && calc_size(rt) == -1) + ERROR((func->rec.func.ret->ast, "return type is an incomplete type")); + + func->rec.func.body = p->chd->next->next; + func->name = head->name; + scope->func = func; + free(head); + { /* Note: here is a dirty hack to forcibly push function definition to + the global scope, while all the types specified in parameters retain in local scope. + The key point is to make sure semantics_params does not push any var */ + CSNode *ntop = scope->top; + CVar_t var; + scope->top = ntop->next; + scope->lvl--; + + if (!cscope_push_type(scope, func, NS_ID)) + { + CSymbol_t lu = cscope_lookup(scope, func->name, NS_ID); + if (lu->kind != CTYPE) + ERROR((func->ast, "'%s' redeclared as different kind of symbol", func->name)); + efunc = lu->rec.type; + if (efunc->type != CFUNC) + ERROR((func->ast, "conflicting types of '%s'", func->name)); + else if (efunc->rec.func.body) + ERROR((func->ast, "redefintion of function '%s'", func->name)); + else if (!is_same_type(efunc, func)) + ERROR((func->ast, "function defintion does not match the prototype")); + type_merge(efunc, func); + } + + scope->top = ntop; + scope->lvl++; + + for (var = func->rec.func.params; var; var = var->next) + { + cscope_push_var(scope, var, NS_ID); + if (calc_size(var->type) == -1) + ERROR((var->ast, "parameter '%s' has incomplete type", var->name)); + } + } + func->rec.func.local = semantics_comp(p->chd->next->next, scope); /* check comp */ + cscope_exit(scope); /* exit from local scope */ + if (efunc) + { + type_merge(efunc, func); + free(func); + func = efunc; + } + return func; +} + +CType_t make_builtin_func(char *name, CType_t rt) { + CType_t func = ctype_create(name, CFUNC, NULL); + func->rec.func.params = NULL; + func->rec.func.body = NULL; + func->rec.func.local = NULL; + func->rec.func.ret = rt; + return func; +} + +typedef struct DNode DNode; +CScope_t typedef_scope; +struct DNode{ + enum DefState kind; + DNode *next; +} *typedef_stack; + +void cibic_init(void) { + typedef_scope = cscope_create(); + typedef_stack = NULL; +} + +int is_identifier(const char *name) { + CSymbol_t lu; + if (typedef_stack) + { + /* struct tag */ + /* the parser is reading declarators */ + if (typedef_stack->kind == FORCE_ID) return 1; + /* the parser is reading typedef */ + if (typedef_stack->kind == IN_TYPEDEF) return 1; + /* no info about name, assume it to be an id by default */ + } + lu = cscope_lookup(typedef_scope, name, NS_ID); + if (!lu) return 1; + return lu->kind == CVAR; +} + +void push(char *name) { + if (typedef_stack && typedef_stack->kind == IN_TYPEDEF) + cscope_push_type(typedef_scope, ctype_create(name, 0, NULL), NS_ID); + else + cscope_push_var(typedef_scope, cvar_create(name, NULL, NULL), NS_ID); +} + +CDef_t cdef_create(const char *name, CType_t type, CNode *ast) { + CDef_t cd = NEW(CDef); + cd->name = name; + cd->type = type; + cd->ast = ast; + return cd; +} + +void def_enter(enum DefState kind) { + DNode *ntop = NEW(DNode); + ntop->kind = kind; + ntop->next = typedef_stack; + typedef_stack = ntop; +} + +void def_exit(void) { + DNode *ntop = typedef_stack->next; + free(typedef_stack); + typedef_stack = ntop; +} + +void block_enter(void) { cscope_enter(typedef_scope); } +void block_exit(void) { cscope_exit(typedef_scope); } + +void ctable_debug_print(CTable_t ct) { + int i; + fprintf(stderr, "*** CTable ***\n"); + for (i = 0; i < MAX_TABLE_SIZE; i++) + if (ct->head[i]) + { + CTNode *p; + fprintf(stderr, "[%04d]", i); + for (p = ct->head[i]; p; p = p->next) + fprintf(stderr, "->[%s:%d]", ct->pfunc(p->val), p->lvl); + fprintf(stderr, "\n"); + } + fprintf(stderr, "*** CTable ***\n"); +} + +void cscope_debug_print(CScope_t cs) { + int lvl = cs->lvl; + CSNode *p; + CSElem *tp; + fprintf(stderr, "\n****** CScope ******\n"); + for (p = cs->top; p; p = p->next) + { + fprintf(stderr, "Level %d:\n", lvl--); + for (tp = p->symlist; tp; tp = tp->next) + fprintf(stderr, "%s ", csymbol_print(tp->sym)); + fprintf(stderr, "\n\n"); + } + fprintf(stderr, "IDs:\n"); + ctable_debug_print(cs->ids); + fprintf(stderr, "Tags:\n"); + ctable_debug_print(cs->tags); + fprintf(stderr, "****** CScope ******\n\n"); +} + +const char *ctable_cvar_print(void *var) { + static char buff[MAX_DEBUG_PRINT_BUFF]; + sprintf(buff, "%s@%lx", ((CVar_t )var)->name, (size_t)var); + return buff; +} + +const char *csymbol_print(void *csym) { + CSymbol_t p = (CSymbol_t)csym; + static char buff[MAX_DEBUG_PRINT_BUFF]; + switch (p->kind) + { + case CVAR: + sprintf(buff, "%s@%lx", p->rec.var->name, (size_t)p->rec.var); + break; + case CTYPE: + sprintf(buff, "%s@%lx", p->rec.type->name, (size_t)p->rec.type); + break; + case CDEF: + sprintf(buff, "%s@%lx", p->rec.def->name, (size_t)p->rec.def); + } + return buff; +} + +void ctype_print_(CType_t, int, CPSet_t); +void print_tabs(int tnum) { while (tnum--) fprintf(stderr, " "); } +void ctype_pre_(CType_t type, int lvl) { + int t= type->type; + if (t == CARR || t == CFUNC || t == CUNION || t == CSTRUCT) + { + fprintf(stderr, "\n"); + print_tabs(lvl); + } +} + +void cvar_print_(CVar_t cv, int lvl, CPSet_t visited) { + fprintf(stderr, "[var@%lx:%s :: ", (size_t)cv, cv->name); + ctype_pre_(cv->type, ++lvl); + ctype_print_(cv->type, lvl, visited); + fprintf(stderr, "]"); +} + +void cdef_print_(CDef_t cd, int lvl, CPSet_t visited) { + fprintf(stderr, "[def@%lx:%s :: ", (size_t)cd, cd->name); + ctype_pre_(cd->type, ++lvl); + ctype_print_(cd->type, lvl, visited); + fprintf(stderr, "]"); +} + +void ctype_print_(CType_t ct, int lvl, CPSet_t visited) { + switch (ct->type) + { + case CINT: + fprintf(stderr, "[int]"); break; + case CCHAR: + fprintf(stderr, "[char]"); break; + case CVOID: + fprintf(stderr, "[void]"); break; + case CSTRUCT: + case CUNION: + { + CTable_t f = ct->rec.st.fields; + int i; + CTNode *fn; + lvl++; + fprintf(stderr, "[%s@%lx:{name:%s}", + ct->type == CSTRUCT ? "struct" : "union", + (size_t)ct, ct->name); + if (cpset_belongs(visited, (uintptr_t)ct)) + { + fprintf(stderr, "]\n"); + return; + } + cpset_insert(visited, (uintptr_t)ct); + fprintf(stderr, "{size:%d}", ct->size); + fprintf(stderr, "{fields:"); + if (f) + { + fprintf(stderr, "\n"); + int first = 1; + for (i = 0; i < MAX_TABLE_SIZE; i++) + for (fn = f->head[i]; fn; fn = fn->next) + { + fprintf(stderr, "%s", first ? (first = 0, "") : ",\n"); + print_tabs(lvl); + cvar_print_((CVar_t)fn->val, lvl, visited); + } + } + fprintf(stderr, "}]"); + } + break; + case CARR: + { + CType_t type = ct->rec.arr.elem; + fprintf(stderr, "[arr:{len:%d}{size:%d}]->", + ct->rec.arr.len, ct->size); + ctype_pre_(type, ++lvl); + ctype_print_(type, lvl, visited); + } + break; + case CPTR: + { + CType_t type = ct->rec.ref; + fprintf(stderr, "[ptr]->"); + ctype_pre_(type, ++lvl); + ctype_print_(type, lvl, visited); + } + break; + case CFUNC: + { + CType_t type = ct->rec.func.ret; + CVar_t p; + lvl++; + fprintf(stderr, "[func:{name:%s}{size:%d}\n", + ct->name, ct->size); + print_tabs(lvl); + fprintf(stderr, "{params:"); + if (ct->rec.func.params) + { + fprintf(stderr, "\n"); + for (p = ct->rec.func.params; p; p = p->next) + { + print_tabs(lvl + 1); + cvar_print_(p, lvl + 1, visited); + if (p->next) fprintf(stderr, ",\n"); + } + } + /* print_tabs(lvl); */ + fprintf(stderr, "}\n"); + print_tabs(lvl); + fprintf(stderr, "{local:"); + if (ct->rec.func.local) + { + fprintf(stderr, "\n"); + for (p = ct->rec.func.local; p; p = p->next) + { + print_tabs(lvl + 1); + cvar_print_(p, lvl + 1, visited); + if (p->next) fprintf(stderr, ",\n"); + } + } + fprintf(stderr, "}]->"); + ctype_pre_(type, lvl); + ctype_print_(type, lvl, visited); + } + break; + } +} + +void ctype_print(CType_t ct) { + CPSet_t visited = cpset_create(); + ctype_print_(ct, 0, visited); + cpset_destroy(visited); +} + +void cvar_print(CVar_t cv) { + CPSet_t visited = cpset_create(); + cvar_print_(cv, 0, visited); + cpset_destroy(visited); +} + +void cdef_print(CDef_t cd) { + CPSet_t visited = cpset_create(); + cdef_print_(cd, 0, visited); + cpset_destroy(visited); +} + +void semantics_check(CNode *p, int quiet) { + CScope_t scope = cscope_create(); + basic_type_int = ctype_create("int", CINT, NULL); + basic_type_char = ctype_create("char", CCHAR, NULL); + basic_type_void = ctype_create("void", CVOID, NULL); + builtin_printf = make_builtin_func("printf", basic_type_int); + builtin_scanf = make_builtin_func("scanf", basic_type_int); + builtin_print_int = make_builtin_func("__print_int", basic_type_int); + builtin_print_char = make_builtin_func("__print_char", basic_type_int); + builtin_print_string = make_builtin_func("__print_string", basic_type_int); + builtin_memcpy = make_builtin_func("memcpy", basic_type_void); + { + CType_t vstar = ctype_create("", CPTR, NULL); + vstar->rec.ref = basic_type_void; + builtin_malloc = make_builtin_func("malloc", vstar); + } + /* add top-level basic types */ + cscope_push_type(scope, basic_type_int, NS_ID); + cscope_push_type(scope, basic_type_char, NS_ID); + cscope_push_type(scope, basic_type_void, NS_ID); + cscope_push_type(scope, builtin_printf, NS_ID); + cscope_push_type(scope, builtin_scanf, NS_ID); + cscope_push_type(scope, builtin_malloc, NS_ID); + cscope_push_type(scope, builtin_memcpy, NS_ID); + cscope_push_type(scope, builtin_print_int, NS_ID); + cscope_push_type(scope, builtin_print_char, NS_ID); + cscope_push_type(scope, builtin_print_string, NS_ID); + /* const string counter */ + cstrs = NEW(CSList); + cstrs->id = -1; + cstrs->prev = cstrs->next = cstrs; + /* check all definitions and declarations */ + for (p = p->chd; p; p = p->next) + { + switch (p->type) + { + case FUNC_DEF: + semantics_func(p, scope); break; + case DECL: case TYPEDEF: + semantics_decl(p, scope); break; + default: assert(0); + } + } + { + int i; + CTNode *p; + funcs = NULL; + gvars = NULL; + for (i = 0; i < MAX_TABLE_SIZE; i++) + for (p = scope->ids->head[i]; p; p = p->next) + { + CSymbol_t tp = (CSymbol_t)(p->val); + CType_t func = tp->rec.type; + if (tp->kind == CTYPE && + func->type == CFUNC && + func->rec.func.body) + { + CTList_t nf = NEW(CTList); + CVar_t p; + int size; + nf->type = func; + nf->next = funcs; + funcs = nf; + size = 0; + for (p = func->rec.func.params; p; p = p->next) + { + CType_t t = p->type; + /* force to a word alignment */ + size += align_shift(size); + p->start = size; + if (t->type == CARR) + size += PTR_SIZE; + else + size += calc_size(t); + } + size = 0; + for (p = func->rec.func.local; p; p = p->next) + { + /* force to a word alignment */ + size += align_shift(size); + p->start = size; + size += calc_size(p->type); + } + func->rec.func.local_size = size; + } + else if (tp->kind == CVAR) + { + CVList_t nv = NEW(CVList); + nv->var = tp->rec.var; + nv->next = gvars; + nv->var->loc = 1; + gvars = nv; + } + } + } + if (!quiet) + { + CTNode *p; + int i; + cscope_debug_print(scope); + for (i = 0; i < MAX_TABLE_SIZE; i++) + for (p = scope->ids->head[i]; p; p = p->next) + { + CSymbol_t tp = (CSymbol_t)(p->val); + switch (tp->kind) + { + case CVAR: cvar_print(tp->rec.var); break; + case CTYPE: ctype_print(tp->rec.type); break; + case CDEF: cdef_print(tp->rec.def); break; + } + fprintf(stderr, "\n\n"); + } + cnode_debug_print(ast_root, 1); + } +} + + +CPSet_t cpset_create(void) { + CPSet_t res = NEW(CPSet); + memset(res->head, 0, sizeof res->head); + return res; +} + +void cpset_destroy(CPSet_t cps) { + int i; + for (i = 0; i < MAX_TABLE_SIZE; i++) + { + CPNode *p, *np; + for (p = cps->head[i]; p; p = np) + { + np = p->next; + free(p); + } + } + free(cps); +} + +int cpset_insert(CPSet_t cps, uintptr_t key) { + unsigned int hv = key % MAX_TABLE_SIZE; + CPNode *p = cps->head[hv], *np; + for (; p; p = p->next) + if (p->key == key) + return 0; + np = NEW(CPNode); + np->key = key; + np->next = cps->head[hv]; + cps->head[hv] = np; + return 1; +} + +void cpset_erase(CPSet_t cps, uintptr_t key) { + unsigned int hv = key % MAX_TABLE_SIZE; + int flag = 0; + CPNode *p = cps->head[hv], *pp = NULL; + for (; p; pp = p, p = p->next) + if (p->key == key) + { + flag = 1; + break; + } + if (!flag) return; + if (pp) + pp->next = p->next; + else + cps->head[hv] = p->next; + free(p); +} + +int cpset_belongs(CPSet_t cps, uintptr_t key) { + unsigned int hv = key % MAX_TABLE_SIZE; + CPNode *p = cps->head[hv]; + for (; p; p = p->next) + if (p->key == key) + return 1; + return 0; +} diff --git a/semantics.h b/semantics.h new file mode 100644 index 0000000..4ba1317 --- /dev/null +++ b/semantics.h @@ -0,0 +1,226 @@ +#ifndef SEMANTICS_H +#define SEMANTICS_H +#include <stdint.h> +#include "const.h" + +typedef struct CNode CNode; +typedef struct CTable *CTable_t; +typedef struct CType CType; +typedef CType *CType_t; +typedef struct CVar CVar; +typedef CVar *CVar_t; +typedef struct CSymbol CSymbol; +typedef CSymbol *CSymbol_t; +typedef struct CDef CDef; +typedef CDef *CDef_t; + +typedef struct CTList CTList; +typedef CTList *CTList_t; +struct CTList { + CType_t type; + CTList_t next; +}; + +typedef struct CVList CVList; +typedef CVList *CVList_t; +struct CVList { + CVar_t var; + CVList_t next; +}; + +typedef struct CSList CSList; +typedef CSList *CSList_t; +struct CSList { + char *str; + int id; + int start; + CSList_t prev, next; +}; + +typedef struct CBList *CBList_t; +typedef struct COList *COList_t; +typedef struct CVList *CVList_t; + +struct CVar { + char *name; + CVar_t next; /* next in the linked list */ + CType_t type; + int start; + CNode *ast; + CNode *initr; + CBList_t defsite; + int loc; + int reload; + /* the following fields are used for renaming */ + int cnt; + int weight; + COList_t stack; +}; + +CVar_t cvar_create(char *name, CType_t type, CNode *ast); +void cvar_print(CVar_t cv); + +struct CType { + enum { + CINT, + CCHAR, + CVOID, + CSTRUCT, + CUNION, + CARR, + CPTR, + CFUNC + } type; + char *name; + union { + struct { + CTable_t fields; /* for a struct or union */ + CVar_t flist; + } st; + CType_t ref; /* for a pointer */ + struct { + CType_t elem; + int len; + } arr; /* for an array */ + struct { + CVar_t params; + CVar_t local; + CType_t ret; + CNode *body; + int local_size; + int frame_size; + } func; /* for a function */ + } rec; + int size; /* memory footprint */ + CNode *ast; +}; + +CType_t ctype_create(char *name, int type, CNode *ast); +void ctype_debug_print(CType_t ct); + +typedef unsigned int (*Hashfunc_t) (const char *); +typedef const char *(*Printfunc_t) (void *); + +typedef struct CTNode CTNode; +struct CTNode { + const char *key; + void *val; + CTNode *next; + int lvl; +}; + +typedef struct CTable { + CTNode *head[MAX_TABLE_SIZE]; + Hashfunc_t hfunc; + Printfunc_t pfunc; +} CTable; + + +CTable_t ctable_create(Hashfunc_t hfunc, Printfunc_t pfunc); +void ctable_destroy(CTable_t ct); +void *ctable_lookup(CTable_t ct, const char *key); +int ctable_insert(CTable_t ct, const char *key, void *val, int lvl); +void ctable_clip(CTable_t ct, const char *key, int max_lvl); +void ctable_debug_print(CTable_t ct); + +typedef struct CSElem CSElem; +struct CSElem { + CSymbol_t sym; + CSElem *next; +}; + +typedef struct CSNode CSNode; +struct CSNode { + CSElem *symlist; + CSNode *next; +}; + +typedef struct CScope CScope; +typedef CScope *CScope_t; +struct CScope { + int lvl; + CType_t func; + int inside_loop; + CSNode *top; + CTable_t ids; /* ordinary identifiers */ + CTable_t tags; /* union & struct tags */ +/* CTable_t ext_link; external linkage */ +}; + +typedef struct ExpType { + CType_t type; + int lval; +} ExpType; + +struct CSymbol { + enum { + CTYPE, + CVAR, + CDEF + } kind; + union { + CType_t type; + CVar_t var; + CDef_t def; + } rec; +}; +const char *csymbol_print(void *csym); +const char *csym_getname(CSymbol_t csym); + +struct CDef { + const char *name; + CType_t type; + CNode *ast; +}; +CDef_t cdef_create(const char *name, CType_t type, CNode *ast); + +CScope_t cscope_create(void); +CSymbol_t cscope_lookup(CScope_t cs, const char *name, int nspace); +int cscope_push_var(CScope_t cs, CVar_t var, int nspace); +int cscope_push_def(CScope_t cs, CDef_t def, int nspace); +int cscope_push_type(CScope_t cs, CType_t type, int nspace); +void cscope_enter(CScope_t cs); +void cscope_exit(CScope_t cs); +void cscope_debug_print(CScope_t cs); + +unsigned int bkdr_hash(const char *str); +const char *ctable_cvar_print(void *var); + +void semantics_check(CNode *ast, int quiet); + +enum DefState{ + FORCE_ID, + IN_TYPEDEF, + NONE +}; + +typedef struct CPNode CPNode; +typedef struct CPSet { + struct CPNode { + uintptr_t key; + CPNode *next; + } *head[MAX_TABLE_SIZE]; +} CPSet; +typedef CPSet *CPSet_t; + +CPSet_t cpset_create(void); +int cpset_insert(CPSet_t cps, uintptr_t key); +int cpset_belongs(CPSet_t cps, uintptr_t key); +void cpset_erase(CPSet_t cps, uintptr_t key); +void cpset_destroy(CPSet_t cps); + +int is_identifier(const char *name); +void push(char *name); +void cibic_init(void); +void block_enter(void); +void block_exit(void); +void def_enter(enum DefState kind); +void def_exit(void); +int calc_size(CType_t type); +int align_shift(int x); + +extern int scnt; +extern CTList_t funcs; +extern CVList_t gvars; +extern CSList_t cstrs; +#endif diff --git a/semantics_data/anonymous_struct1.c b/semantics_data/anonymous_struct1.c new file mode 100644 index 0000000..2709f70 --- /dev/null +++ b/semantics_data/anonymous_struct1.c @@ -0,0 +1,6 @@ +int main() { + struct {int x, y;} a, b, c; + a = b; + b = c; + c = a; +} diff --git a/semantics_data/anonymous_struct2.c b/semantics_data/anonymous_struct2.c new file mode 100644 index 0000000..b17e6c0 --- /dev/null +++ b/semantics_data/anonymous_struct2.c @@ -0,0 +1,7 @@ +int main() { + struct {int x, y;} a; + struct {int x, y;} b, c; + a = b; + b = c; + c = a; +} diff --git a/semantics_data/array_complete.c b/semantics_data/array_complete.c new file mode 100644 index 0000000..79c5c8a --- /dev/null +++ b/semantics_data/array_complete.c @@ -0,0 +1,4 @@ +struct A arr[1]; +struct A {int x;}; +int main() { +} diff --git a/semantics_data/array_decl.c b/semantics_data/array_decl.c new file mode 100644 index 0000000..33973e5 --- /dev/null +++ b/semantics_data/array_decl.c @@ -0,0 +1,6 @@ +int f(int a[1][3][2]) {
+}
+int main() {
+ int a[3][3][2];
+ f(a);
+}
diff --git a/semantics_data/array_decl2.c b/semantics_data/array_decl2.c new file mode 100644 index 0000000..d7f8a04 --- /dev/null +++ b/semantics_data/array_decl2.c @@ -0,0 +1,7 @@ +int f(int a[20][2][2]);
+int f(int a[1][3][2]) {
+}
+int main() {
+ int a[3][3][2];
+ f(a);
+}
diff --git a/semantics_data/cast.c b/semantics_data/cast.c new file mode 100644 index 0000000..b441552 --- /dev/null +++ b/semantics_data/cast.c @@ -0,0 +1,3 @@ +int main() { + ((int (*)(int a))1)(1); +} diff --git a/semantics_data/cast2.c b/semantics_data/cast2.c new file mode 100644 index 0000000..34b3484 --- /dev/null +++ b/semantics_data/cast2.c @@ -0,0 +1,3 @@ +int main() { + (int [3])1; +} diff --git a/semantics_data/cast3.c b/semantics_data/cast3.c new file mode 100644 index 0000000..4500cb1 --- /dev/null +++ b/semantics_data/cast3.c @@ -0,0 +1,3 @@ +int main() { + (int ())1; +} diff --git a/semantics_data/conflict.c b/semantics_data/conflict.c new file mode 100644 index 0000000..f2798e7 --- /dev/null +++ b/semantics_data/conflict.c @@ -0,0 +1,4 @@ +int f(int a, char b); +int f(int a); +int main() { +} diff --git a/semantics_data/decl_comp.c b/semantics_data/decl_comp.c new file mode 100644 index 0000000..f1b8bd2 --- /dev/null +++ b/semantics_data/decl_comp.c @@ -0,0 +1,5 @@ +int f();
+int f(int a);
+int main() {
+ f(1, 2);
+}
diff --git a/semantics_data/decl_comp2.c b/semantics_data/decl_comp2.c new file mode 100644 index 0000000..f1298bd --- /dev/null +++ b/semantics_data/decl_comp2.c @@ -0,0 +1,5 @@ +int f(int a);
+int f() {
+}
+int main() {
+}
diff --git a/semantics_data/decl_comp3.c b/semantics_data/decl_comp3.c new file mode 100644 index 0000000..29d388f --- /dev/null +++ b/semantics_data/decl_comp3.c @@ -0,0 +1,4 @@ +int f(); +int f(int a) { + f(); +} diff --git a/semantics_data/fail1.c b/semantics_data/fail1.c new file mode 100644 index 0000000..f80ecaa --- /dev/null +++ b/semantics_data/fail1.c @@ -0,0 +1,2 @@ +int main(struct C c) { /* fail because of incomplete type of parameter */ +} diff --git a/semantics_data/fail10.c b/semantics_data/fail10.c new file mode 100644 index 0000000..e287c6f --- /dev/null +++ b/semantics_data/fail10.c @@ -0,0 +1,4 @@ +int main() { + int a[2][3]; + a[1][3][3] = 3; +} diff --git a/semantics_data/fail11.c b/semantics_data/fail11.c new file mode 100644 index 0000000..4aa213a --- /dev/null +++ b/semantics_data/fail11.c @@ -0,0 +1,4 @@ +int main() { + int a[2][3]; + a[1] = 3; +} diff --git a/semantics_data/fail2.c b/semantics_data/fail2.c new file mode 100644 index 0000000..a76d4e0 --- /dev/null +++ b/semantics_data/fail2.c @@ -0,0 +1,4 @@ +int main() { + /* fail because of incomplete type of local variable */ + struct C c; +} diff --git a/semantics_data/fail3.c b/semantics_data/fail3.c new file mode 100644 index 0000000..ad47f5f --- /dev/null +++ b/semantics_data/fail3.c @@ -0,0 +1,4 @@ +int main(int main) { + /* fail because `f' is overrided by parameter */ + main(3); +} diff --git a/semantics_data/fail4.c b/semantics_data/fail4.c new file mode 100644 index 0000000..20033af --- /dev/null +++ b/semantics_data/fail4.c @@ -0,0 +1,3 @@ +int main() { + (void)1 + 2; /* not ok */ +} diff --git a/semantics_data/fail5.c b/semantics_data/fail5.c new file mode 100644 index 0000000..0e8fba8 --- /dev/null +++ b/semantics_data/fail5.c @@ -0,0 +1,4 @@ +int main() { + /* fail because of incomplete field */ + struct C {struct B b;} *c; +} diff --git a/semantics_data/fail6.c b/semantics_data/fail6.c new file mode 100644 index 0000000..3419ce9 --- /dev/null +++ b/semantics_data/fail6.c @@ -0,0 +1,5 @@ +int main() { + struct C {int x;} *c; + /* fail because no member called `y' */ + c->y; +} diff --git a/semantics_data/fail7.c b/semantics_data/fail7.c new file mode 100644 index 0000000..5040408 --- /dev/null +++ b/semantics_data/fail7.c @@ -0,0 +1,6 @@ +int *main(int a,int b) { + struct {int x;} c; + /* fail because of wrong argument type */ + main(2, c); + return &a; +} diff --git a/semantics_data/fail8.c b/semantics_data/fail8.c new file mode 100644 index 0000000..21efd5f --- /dev/null +++ b/semantics_data/fail8.c @@ -0,0 +1,5 @@ +struct { + int f(); +}; +int main() { +} diff --git a/semantics_data/function_returns_function.c b/semantics_data/function_returns_function.c new file mode 100644 index 0000000..e590ddb --- /dev/null +++ b/semantics_data/function_returns_function.c @@ -0,0 +1 @@ +int main()() {} diff --git a/semantics_data/global_decl.c b/semantics_data/global_decl.c new file mode 100644 index 0000000..ff49829 --- /dev/null +++ b/semantics_data/global_decl.c @@ -0,0 +1,3 @@ +int x = x;
+int main() {
+}
diff --git a/semantics_data/incomp_initr.c b/semantics_data/incomp_initr.c new file mode 100644 index 0000000..1068284 --- /dev/null +++ b/semantics_data/incomp_initr.c @@ -0,0 +1,4 @@ +int main() { + struct A {int x, y;} b; + int a[(1 + 1 == 2) * 2] = {1, b}; +} diff --git a/semantics_data/incomp_param.c b/semantics_data/incomp_param.c new file mode 100644 index 0000000..c1ece5f --- /dev/null +++ b/semantics_data/incomp_param.c @@ -0,0 +1,5 @@ +int f(struct A a, int b) { + +} +int main() { +} diff --git a/semantics_data/local_struct.c b/semantics_data/local_struct.c new file mode 100644 index 0000000..28cb223 --- /dev/null +++ b/semantics_data/local_struct.c @@ -0,0 +1,7 @@ +struct A {int x; int y; } b;
+int f(struct A {int a;} p) {
+ struct A a;
+ a.x;
+}
+int main() {
+}
diff --git a/semantics_data/param1.c b/semantics_data/param1.c new file mode 100644 index 0000000..6c22a2b --- /dev/null +++ b/semantics_data/param1.c @@ -0,0 +1,5 @@ +int f(); +int f() {} +int main() { + f(1, 2, 3); +} diff --git a/semantics_data/param2.c b/semantics_data/param2.c new file mode 100644 index 0000000..6b1726d --- /dev/null +++ b/semantics_data/param2.c @@ -0,0 +1,4 @@ +int f(); +int f(int a) {} +int main() { +} diff --git a/semantics_data/param3.c b/semantics_data/param3.c new file mode 100644 index 0000000..8492455 --- /dev/null +++ b/semantics_data/param3.c @@ -0,0 +1,4 @@ +int f(int a); +int f() {} +int main() { +} diff --git a/semantics_data/pass.c b/semantics_data/pass.c new file mode 100644 index 0000000..a7d4481 --- /dev/null +++ b/semantics_data/pass.c @@ -0,0 +1,147 @@ +int; +char; +struct {int x;}; +/* useless declarations are ok */ + +int a(int a); +int a(int d); +/* duplicate function declarations are ok */ + +struct A {int x; int y;} b; + +/* struct definitions in parameters is in a local scope + * subsequent parameter can make use of previous defined struct */ + +int foo(struct A {int x;} a, struct A b) { + /* function declaration in local scope is ok */ + int f(char *x); + /* A is defined in parameters */ + struct A c; + c.x = 43; +} + +void bar() { + /* struct definition is ok inside a function */ + struct A {int x;}; + struct A a; + a.x = 1; + { + /* the following `A' is different from previous one */ + struct A {int y;}; + struct A b; + b.y = 2; + } +} + +struct C c; +struct C { + struct D { + int x, y; + } b; + int c; + struct D d; +}; +struct D d; /* feasible here */ + +void nonsense() { + char q; + (void)q; + return "yay"; +} + +int assign() { + int *a; + struct {int *x;} b; + a = b.x; +} + +void incomplete() { + struct E {struct F *f;} e; +} + +void delay() { + struct G *g; + struct G {int x; }; + g->x = 1; +} + +void comma() { + int a; + int *b; + (b++, a++) * 3; +} + +int complex_pointer() { + int (*g(int ***e[10]))(); +} + +int fp(int a, int b, int c) { + int (*f)(int a, int b, int c); + f = ****fp + 1; + (****f)(1, 2, 3); + f = &fp + 1; +} + +int fc(int fc()) { + fc(complex_pointer); +} + +int incomp(struct I a); +struct I { int i, j; }; + +void (*bsd_signal(int sig, void (*func)(int a)))(int b); + +void array() { + int a[(1 + 1 == 2) * 2]; +} + +void local_decl() { + int y = y; + { + int x = x; + int a; + int b = a = 2; + } +} + +struct Node n; +struct Node {int x, y;} n; +/* global forward declaration is ok */ +int again; +int again; + +typedef int def; +int typedef1() { + int def; /* overrides outer typedef */ + def = 1; +} +typedef int *ptr1; +int typedef2() { + typedef int **ptr2; + { + typedef int ***ptr3; + ptr3 ptr2; + ptr3 ptr1; + } +} + +typedef struct TA { + int x; +} TA; +typedef struct TA TA; +int typedef_struct() { + TA a; + a.x = 1; +} + +struct AA {int x; int y; }; +int aa(struct AA {int a;} p) { + struct AA a; + a.a; +} + +int main() { + int self = sizeof self; + n.x = 1; + n.y = 2; +} diff --git a/semantics_data/ref.c b/semantics_data/ref.c new file mode 100644 index 0000000..4644127 --- /dev/null +++ b/semantics_data/ref.c @@ -0,0 +1,4 @@ +int main() { + int a, b; + &(a + b); +} diff --git a/semantics_data/sizeof.c b/semantics_data/sizeof.c new file mode 100644 index 0000000..4d0dceb --- /dev/null +++ b/semantics_data/sizeof.c @@ -0,0 +1,3 @@ +int main() { + int a[sizeof a]; +} diff --git a/semantics_data/typedef1.c b/semantics_data/typedef1.c new file mode 100644 index 0000000..e67c112 --- /dev/null +++ b/semantics_data/typedef1.c @@ -0,0 +1,4 @@ +int main() { + typedef int a; + int a(); +} diff --git a/semantics_data/typedef2.c b/semantics_data/typedef2.c new file mode 100644 index 0000000..be9f3a4 --- /dev/null +++ b/semantics_data/typedef2.c @@ -0,0 +1,4 @@ +int main() { + typedef int a; +} +int a; diff --git a/semantics_data/typedef3.c b/semantics_data/typedef3.c new file mode 100644 index 0000000..71c45d1 --- /dev/null +++ b/semantics_data/typedef3.c @@ -0,0 +1,4 @@ +typedef int a; +int main() { +} +int a; diff --git a/semantics_data/typedef4.c b/semantics_data/typedef4.c new file mode 100644 index 0000000..0aeb0f4 --- /dev/null +++ b/semantics_data/typedef4.c @@ -0,0 +1,10 @@ +typedef struct I I; +int incomp(I a); +struct I { int i, j; }; +int incomp(I a) {} +typedef int b; +int main() { + I i; + b b; + incomp(i); +} diff --git a/semantics_data/void_decl.c b/semantics_data/void_decl.c new file mode 100644 index 0000000..8a68b18 --- /dev/null +++ b/semantics_data/void_decl.c @@ -0,0 +1,3 @@ +void a;
+int main() {
+}
diff --git a/semantics_data/void_decl2.c b/semantics_data/void_decl2.c new file mode 100644 index 0000000..30c7af6 --- /dev/null +++ b/semantics_data/void_decl2.c @@ -0,0 +1,5 @@ +struct {
+ void a;
+};
+int main() {
+}
@@ -0,0 +1,2723 @@ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <assert.h> +#include "ast.h" +#include "ssa.h" +#include "mips.h" +#define NEW(type) ((type *)malloc(sizeof(type))) +#define DBLINK(from, to) ((from)->next = (to))->prev = (from) + +static CGraph cfg, dtree; +static CBlock_t blks[MAX_BLOCK]; +static COList_t raw_defs; /* defintion of all vars and tmps */ +static int bcnt; /* block counter */ +static int tcnt; /* temporary counter */ + +static int quiet; +static int gbbase; +static CBlock_t entry; +static COList_t defs; /* all defintions that have actual effects */ + +/* for code generation */ +CFuncIR_t func_ir; + +COpr_t copr_create(void) { + COpr_t opr = NEW(COpr); + opr->type = NULL; + opr->cval = NULL; + opr->range = NULL; + opr->same = opr; + opr->dep = 0; + opr->mod = 0; + opr->par = opr; + return opr; +} + +CInst_t cinst_create(void) { + CInst_t inst = NEW(CInst); + inst->dest = NULL; + inst->src1 = NULL; + inst->src2 = NULL; + inst->sysp = 0; + return inst; +} + +CBlock_t cblock_create(int inc) { + CBlock_t cblk = NEW(CBlock); + CInst_t dum = cinst_create(); + CPhi_t pdum = NEW(CPhi); + dum->prev = dum; + dum->next = dum; + pdum->prev = pdum; + pdum->next = pdum; + cblk->insts = dum; + cblk->phis = pdum; + cblk->next = NULL; + if (inc) + cblk->id = bcnt++; + cblk->ref = 0; + return cblk; +} + +void cblock_append(CBlock_t cblk, CInst_t inst) { + CInst_t head = cblk->insts; + (inst->prev = head->prev)->next = inst; + (inst->next = head)->prev = inst; +} + +void cblock_pushfront(CBlock_t cblk, CInst_t inst) { + CInst_t head = cblk->insts; + (inst->next = head->next)->prev = inst; + (inst->prev = head)->next = inst; +} + +void cblock_pappend(CBlock_t cblk, CPhi_t phi) { + CPhi_t head = cblk->phis; + (phi->prev = head->prev)->next = phi; + (phi->next = head)->prev = phi; +} + +void cblock_popback(CBlock_t cblk) { + CInst_t last = cblk->insts->prev; + last->next->prev = last->prev; + last->prev->next = last->next; +} + +void cblock_popfront(CBlock_t cblk) { + CInst_t first = cblk->insts->next; + first->next->prev = first->prev; + first->prev->next = first->next; +} + +CInst_t cblock_getback(CBlock_t cblk) { + CInst_t res = cblk->insts->prev; + return res != cblk->insts ? res : NULL; +} + +int cblock_isempty(CBlock_t cblk) { + return cblk->insts->prev == cblk->insts; +} + +CVar_t ctmp_create(void) { + static char buff[MAX_NAMELEN]; + sprintf(buff, "t%d", tcnt++); + return cvar_create(strdup(buff), NULL, NULL); +} + +void ctmp_destroy(CVar_t type) { + /* allocated dynamically */ + free(type->name); + free(type); +} + +void cfg_clear(void) { + int i; + for (i = 0; i < MAX_BLOCK; i++) + { + CEdge *p, *np; + for (p = cfg.head[i]; p; p = np) + { + np = p->next; + free(p); + } + cfg.head[i] = NULL; + for (p = cfg.rhead[i]; p; p = np) + { + np = p->next; + free(p); + } + cfg.rhead[i] = NULL; + } +} + +void dtree_clear(void) { + int i; + CEdge *p, *np; + for (i = 0; i < MAX_BLOCK; dtree.head[i++] = NULL) + for (p = dtree.head[i]; p; p = np) + { + np = p->next; + free(p); + } +} + +void cfg_add_edge(CBlock_t from, CBlock_t to) { + int fid = from->id, tid = to->id; +#ifdef CIBIC_DEBUG + fprintf(stderr, "%d -> %d\n", from->id, to->id); +#endif + CEdge *e = NEW(CEdge), *re = NEW(CEdge); + e->to = to; + e->next = cfg.head[fid]; + cfg.head[fid] = e; + + re->to = from; + re->next = cfg.rhead[tid]; + cfg.rhead[tid] = re; +} + +void dtree_add_edge(CBlock_t from, CBlock_t to) { +#ifdef CIBIC_DEBUG + fprintf(stderr, "%d d-> %d\n", from->id, to->id); +#endif + int id = from->id; + CEdge *e = NEW(CEdge); + e->to = to; + e->next = dtree.head[id]; + dtree.head[id] = e; +} + +void copr_print(FILE *f, COpr_t opr) { + switch (opr->kind) + { + case VAR: + fprintf(f, "%s_%d", opr->info.var->name, opr->sub); + break; + case TMP: fprintf(f, "%s", opr->info.var->name); + break; + case IMM: fprintf(f, "%d", opr->info.imm); + break; + case IMMS: fprintf(f, "\"%s\"", opr->info.cstr->str); + break; + case IMMF: fprintf(f, "%s", opr->info.str); + break; + } +} + +void cinst_print(FILE *f, CInst_t inst) { + switch (inst->op) + { + case LOAD: + fprintf(f, "load "); + copr_print(f, inst->dest); + break; + case MOVE: + copr_print(f, inst->dest); + fprintf(f, " = "); + copr_print(f, inst->src1); + break; + case BEQ: + fprintf(f, "if ("); + copr_print(f, inst->src1); + fprintf(f, " == "); + copr_print(f, inst->src2); + fprintf(f, ") goto _L"); + copr_print(f, inst->dest); + break; + case BNE: + fprintf(f, "if ("); + copr_print(f, inst->src1); + fprintf(f, " != "); + copr_print(f, inst->src2); + fprintf(f, ") goto _L"); + copr_print(f, inst->dest); + break; + case GOTO: + fprintf(f, "goto _L"); + copr_print(f, inst->dest); + break; + case ARR: + copr_print(f, inst->dest); + fprintf(f, " = "); + copr_print(f, inst->src1); + fprintf(f, "["); + copr_print(f, inst->src2); + fprintf(f, "]"); + break; + case NEG: + copr_print(f, inst->dest); + fprintf(f, " = -"); + copr_print(f, inst->src1); + break; + case WARR: + copr_print(f, inst->dest); + fprintf(f, "["); + copr_print(f, inst->src2); + fprintf(f, "] = "); + copr_print(f, inst->src1); + break; + case PUSH: + fprintf(f, "push "); + copr_print(f, inst->src1); + break; + case CALL: + copr_print(f, inst->dest); + fprintf(f, " = call "); + copr_print(f, inst->src1); + break; + case RET: + if (inst->src1) + { + fprintf(f, "return "); + copr_print(f, inst->src1); + } + else fprintf(f, "return"); + break; + case ADDR: + copr_print(f, inst->dest); + fprintf(f, " = addr "); + copr_print(f, inst->src1); + break; + default: + { + const char *op; + switch (inst->op) + { + case MUL: op = "*"; break; + case DIV: op = "/"; break; + case MOD: op = "%"; break; + case ADD: op = "+"; break; + case SUB: op = "-"; break; + case SHL: op = "<<"; break; + case SHR: op = ">>"; break; + case AND: op = "&"; break; + case XOR: op = "^"; break; + case OR: op = "|"; break; + case LT: op = "<"; break; + case GT: op = ">"; break; + case LE: op = "<="; break; + case GE: op = ">="; break; + case EQ: op = "=="; break; + case NE: op = "!="; break; + case NOR: op = "nor"; break; + default: ; + } + copr_print(f, inst->dest); + fprintf(f, " = "); + copr_print(f, inst->src1); + fprintf(f, " %s ", op); + copr_print(f, inst->src2); + } + } + fprintf(f, "\n"); +} + +void cphi_print(CPhi_t phi, CBlock_t blk) { + int i; + fprintf(stderr, "%s_%d = phi", phi->dest->info.var->name, + phi->dest->sub); + for (i = 0; i < blk->pred; i++) + fprintf(stderr, " %s_%d", phi->oprs[i]->info.var->name, + phi->oprs[i]->sub); + fprintf(stderr, "\n"); +} + +void cblock_print(CBlock_t blk) { + fprintf(stderr, "_L%d:\n", blk->id + gbbase); + { + CPhi_t p, sp = blk->phis; + for (p = sp->next; p != sp; p = p->next) + { + fprintf(stderr, "\t"); + cphi_print(p, blk); + } + } + { + CInst_t p, sp = blk->insts; + for (p = sp->next; p != sp; p = p->next) + { + fprintf(stderr, "%02d\t", p->id); + cinst_print(stderr, p); + } + } +} +void ssa_func_print(CBlock_t p) { + for (; p; p = p->next) + cblock_print(p); +} +void ssa_func(CType_t); +void ssa_generate(int quiet) { + CTList_t f; + CFuncIR_t cf; + func_ir = NULL; + for (f = funcs; f; f = f->next) + { + cf = NEW(CFuncIR); + ssa_func(cf->func = f->type); + cf->gbbase = gbbase; + cf->defs = defs; + cf->entry = entry; + cf->next = func_ir; + func_ir = cf; + gbbase += bcnt; + bcnt = 0; + } + if (!quiet) + { + for (cf = func_ir; cf; cf = cf->next) + { + fprintf(stderr, "%s:\n", cf->func->name); + ssa_func_print(cf->entry); + } + } +} + +#define POINTER_CONV(inst) \ +do { \ + if (rt->type == CARR) \ + { \ + /* convert to pointer type */ \ + CType_t a; \ + a = ctype_create("", CPTR, p); \ + a->rec.ref = rt->rec.arr.elem; \ + (inst)->op = ADD; \ + (inst)->dest->type = a; \ + } \ + else if (rt->type == CSTRUCT || rt->type == CUNION) \ + (inst)->op = ADD; \ + else (inst)->op = ARR; \ +} while (0) + + +COpr_t ssa_exp_(CNode *p, CBlock_t *, CInst_t, CBlock_t); +COpr_t ssa_postfix(CNode *p, CBlock_t *cur, CInst_t lval, CBlock_t succ) { + CNode *post = p->chd->next; + CType_t rt = p->ext.type; + CInst_t base = cinst_create(); + switch (post->rec.subtype) + { + case POSTFIX_ARR: + { + CInst_t off = cinst_create(); + off->dest = copr_create(); + off->dest->kind = TMP; + off->dest->info.var = ctmp_create(); + off->dest->type = post->chd->ext.type; + off->op = MUL; + off->src1 = ssa_exp_(post->chd, cur, NULL, succ); + off->src2 = copr_create(); + off->src2->kind = IMM; + off->src2->info.imm = calc_size(rt); + cblock_append(*cur, off); + + base->dest = copr_create(); + base->dest->kind = TMP; + base->dest->info.var = ctmp_create(); + base->dest->type = rt; + base->src1 = ssa_exp_(p->chd, cur, NULL, succ); + base->src2 = off->dest; + POINTER_CONV(base); + } + break; + case POSTFIX_DOT: + { + base->dest = copr_create(); + base->dest->kind = TMP; + base->dest->info.var = ctmp_create(); + base->dest->type = rt; + base->src1 = ssa_exp_(p->chd, cur, NULL, succ); + base->src2 = copr_create(); + base->src2->kind = IMM; + base->src2->info.imm = p->ext.offset; + POINTER_CONV(base); + } + break; + case POSTFIX_PTR: + { + base->dest = copr_create(); + base->dest->kind = TMP; + base->dest->info.var = ctmp_create(); + base->dest->type = rt; + base->src1 = ssa_exp_(p->chd, cur, NULL, succ); + base->src2 = copr_create(); + base->src2->kind = IMM; + base->src2->info.imm = p->ext.offset; + POINTER_CONV(base); + } + break; + case POSTFIX_CALL: + { + CNode *arg = post->chd->chd; + CInst h; + CInst_t t = &h, n; + base->op = CALL; + base->src1 = ssa_exp_(p->chd, cur, lval, succ); + base->dest = copr_create(); + base->dest->kind = TMP; + base->dest->info.var = ctmp_create(); + base->dest->type = rt; + for (; arg; arg = arg->next) + { + CInst_t pi = cinst_create(); + pi->op = PUSH; + pi->src1 = ssa_exp_(arg, cur, lval, succ); + t->next = pi; + t = pi; + } + t->next = NULL; + for (t = h.next; t; t = n) + { + n = t->next; + cblock_append(*cur, t); + } + } + break; + default: + { + CInst_t tins = cinst_create(); + ssa_exp_(p->chd, cur, tins, succ); + base->op = post->rec.subtype == OPT_INC ? ADD : SUB; + base->src2 = copr_create(); + base->src2->kind = IMM; + base->src2->info.imm = 1; + base->src1 = ssa_exp_(p->chd, cur, NULL, succ); + if (tins->op == MOVE) + { + base->dest = tins->dest; + cblock_append(succ, base); + free(tins); + } + else + { + CInst_t tins2 = cinst_create(); + base->dest = copr_create(); + base->dest->kind = TMP; + base->dest->info.var = ctmp_create(); + base->dest->type = rt; + tins->src1 = base->dest; + tins2->op = ARR; + tins2->src1 = tins->dest; + tins2->src2 = tins->src2; + tins2->dest = copr_create(); + tins2->dest->kind = TMP; + tins2->dest->info.var = ctmp_create(); + tins2->dest->type = rt; + + cblock_append(succ, base); + cblock_append(succ, tins); + cblock_append(succ, tins2); + } + return base->src1; + } + break; + } + + if (lval) + { + lval->op = WARR; + lval->dest = base->src1; + lval->src2 = base->src2; + lval->wtype = p->ext.type; + free(base); + return lval->dest; + } + cblock_append(*cur, base); + return base->dest; +} + +CInst_t compress_branch(COpr_t r, CBlock_t blk, int rev) { + int flag = -1; + CInst_t b; + if (r->kind == TMP) + { + b = cblock_getback(blk); + if (b) + { + assert(r == b->dest); + if (b->op == EQ) + flag = 0; + else if (b->op == NE) + flag = 1; + } + } + if (flag != -1) + b->op = flag ? BNE : BEQ; + else + { + b = cinst_create(); + b->op = BNE; + b->src1 = r; + b->src2 = copr_create(); + b->src2->kind = IMM; + b->src2->info.imm = 0; + cblock_append(blk, b); + } + b->op ^= rev; + b->dest = copr_create(); + b->dest->kind = IMM; + return b; +} + +static CNode *opt_if = NULL; +static CBlock_t opt_if_loop_exit = NULL; + +CBlock_t ssa_stmt(CNode *, CBlock_t, CBlock_t); +#define IS_PTR(tt) ((tt) == CPTR || (tt) == CARR) +COpr_t ssa_exp(CNode *, CBlock_t *, int); +COpr_t ssa_exp_(CNode *p, CBlock_t *cur, CInst_t lval, CBlock_t succ) {/*{{{*/ + COpr_t res; + CInst_t inst = cinst_create(); + switch (p->type) + { + case NOP: ; break; + case ID: + res = copr_create(); + res->kind = VAR; + res->info.var = p->ext.var; + res->type = p->ext.type; + { + CVar_t var = res->info.var; + CType_t type = var->type; + if (type->type == CPTR && + type->rec.ref->type == CFUNC) + { + char *name = type->rec.ref->name; + if (*name != '\0') + { + res->kind = IMMF; + res->info.str = name; + } + } + } + + if (lval) + { + lval->op = MOVE; + lval->dest = res; + } + break; + case STR: + res = copr_create(); + res->kind = IMMS; + res->info.cstr = (CSList_t)(p->ext.const_val); + break; + default: + if (p->ext.is_const) + { + res = copr_create(); + res->kind = IMM; + res->info.imm = p->ext.const_val; + } + else + { + int op = p->rec.subtype; + int rec = 1, auto_dest = 1; + + if (op == ',') + { + ssa_exp(p->chd, cur, 1); + return ssa_exp_(p->chd->next, cur, NULL, succ); + } + else if (op == '=') + { + inst->src1 = ssa_exp_(p->chd->next, cur, NULL, succ); + ssa_exp_(p->chd, cur, inst, succ); + if (inst->op == MOVE) + { + CInst_t last = cblock_getback(*cur); + if (last && last->dest->kind == TMP + && last->dest == inst->src1) + { + free(last->dest); + last->dest = inst->dest; + free(inst); + return last->dest; + } + else + { + cblock_append(*cur, inst); + return inst->dest; + } + } + else + { + CInst_t tins = cinst_create(); + cblock_append(*cur, inst); + tins->op = ARR; + tins->src1 = inst->dest; /* base */ + tins->src2 = inst->src2; /* displacement */ + tins->dest = copr_create(); + tins->dest->kind = TMP; + tins->dest->info.var = ctmp_create(); + tins->dest->type = p->ext.type; + cblock_append(*cur, tins); + return tins->dest; + } + } + else if (op == '*' && !p->chd->next) + { + if (lval) + { + lval->op = WARR; + lval->dest = ssa_exp_(p->chd, cur, NULL, succ); + lval->src2 = copr_create(); + lval->src2->kind = IMM; + lval->src2->info.imm = 0; + lval->wtype = p->ext.type; + return lval->dest; + } + else + { + CType_t rt = p->ext.type; + inst->src1 = ssa_exp_(p->chd, cur, NULL, succ); + inst->src2 = copr_create(); + inst->src2->kind = IMM; + inst->src2->info.imm = 0; + inst->dest = copr_create(); + inst->dest->kind = TMP; + inst->dest->info.var = ctmp_create(); + inst->dest->type = rt; + POINTER_CONV(inst); + cblock_append(*cur, inst); + return inst->dest; + } + } + else if (op == OPT_AND) + { + CBlock_t else_h = cblock_create(1), else_t = else_h, + one_h = cblock_create(1), one_t = one_h, + zero_h = cblock_create(1), zero_t = zero_h, + next_blk = cblock_create(1), sblk; + COpr_t r0, r1, ri; + CInst_t b, a0, a1; + CPhi_t m = NEW(CPhi); + CNode *t; + if (opt_if) + { + CNode *body1 = opt_if->chd->next, *body2 = body1->next; + one_t = ssa_stmt(body1, one_h, opt_if_loop_exit); + if (body2->type != NOP) + zero_t = ssa_stmt(body2, zero_h, opt_if_loop_exit); + opt_if = NULL; + } + else + { + /* constant opt */ + a0 = cinst_create(); + a0->op = MOVE; + a0->dest = copr_create(); + a0->dest->kind = TMP; + a0->dest->info.var = ctmp_create(); + a0->dest->type = p->ext.type; /* int */ + a0->src1 = copr_create(); + a0->src1->kind = IMM; + a0->src1->info.imm = 0; + cblock_append(zero_h, a0); + + a1 = cinst_create(); + a1->op = MOVE; + a1->dest = copr_create(); + a1->dest->kind = TMP; + a1->dest->info.var = ctmp_create(); + a1->dest->type = p->ext.type; + a1->src1 = copr_create(); + a1->src1->kind = IMM; + a1->src1->info.imm = 1; + cblock_append(one_h, a1); + + m->dest = copr_create(); + m->dest->kind = TMP; + m->dest->info.var = ctmp_create(); + m->dest->type = p->ext.type; + m->oprs = (COpr_t *)malloc(sizeof(COpr_t) * 2); + m->oprs[0] = a0->dest; + m->oprs[1] = a1->dest; + cblock_pappend(next_blk, m); + } + + r1 = ssa_exp_(p->chd->next, &else_t, NULL, succ); + compress_branch(r1, else_t, 1)->dest->info.imm = zero_h->id + gbbase; + zero_h->ref = 1; + + sblk = else_h; + for (t = p->chd; t->rec.subtype == OPT_AND; t = t->chd) + { + CBlock_t c_h = cblock_create(1), c_t = c_h; + ri = ssa_exp_(t->chd->next, &c_t, NULL, succ); + compress_branch(ri, c_t, 1)->dest->info.imm = zero_h->id + gbbase; + cfg_add_edge(c_t, zero_h); /* tail */ + DBLINK(c_t, sblk); + cfg_add_edge(c_t, sblk); /* connect to header */ + sblk = c_h; + } + + r0 = ssa_exp_(t, cur, NULL, succ); + compress_branch(r0, *cur, 1)->dest->info.imm = zero_h->id + gbbase; + cfg_add_edge(*cur, zero_h); + DBLINK(*cur, sblk); + cfg_add_edge(*cur, sblk); + + b = cinst_create(); + b->op = GOTO; + b->dest = copr_create(); + b->dest->kind = IMM; + b->dest->info.imm = next_blk->id + gbbase; + cblock_append(one_t, b); + next_blk->ref = 1; + + DBLINK(else_t, one_h); + DBLINK(one_t, zero_h); + DBLINK(zero_t, next_blk); + + cfg_add_edge(else_t, one_h); + cfg_add_edge(else_t, zero_h); + cfg_add_edge(one_t, next_blk); + cfg_add_edge(zero_t, next_blk); + + *cur = next_blk; + return m->dest; + } + else if (op == OPT_OR) + { + CBlock_t else_h = cblock_create(1), else_t = else_h, + one_h = cblock_create(1), one_t = one_h, + zero_h = cblock_create(1), zero_t = zero_h, + next_blk = cblock_create(1), sblk; + COpr_t r0, r1, ri; + CInst_t b, a0, a1; + CPhi_t m = NEW(CPhi); + CNode *t; + if (opt_if) + { + CNode *body1 = opt_if->chd->next, *body2 = body1->next; + one_t = ssa_stmt(body1, one_h, opt_if_loop_exit); + if (body2->type != NOP) + zero_t = ssa_stmt(body2, zero_h, opt_if_loop_exit); + opt_if = NULL; + } + else + { + /* constant opt */ + a0 = cinst_create(); + a0->op = MOVE; + a0->dest = copr_create(); + a0->dest->kind = TMP; + a0->dest->info.var = ctmp_create(); + a0->dest->type = p->ext.type; /* int */ + a0->src1 = copr_create(); + a0->src1->kind = IMM; + a0->src1->info.imm = 0; + cblock_append(zero_h, a0); + + a1 = cinst_create(); + a1->op = MOVE; + a1->dest = copr_create(); + a1->dest->kind = TMP; + a1->dest->info.var = ctmp_create(); + a1->dest->type = p->ext.type; + a1->src1 = copr_create(); + a1->src1->kind = IMM; + a1->src1->info.imm = 1; + cblock_append(one_h, a1); + + m->dest = copr_create(); + m->dest->kind = TMP; + m->dest->info.var = ctmp_create(); + m->dest->type = p->ext.type; + m->oprs = (COpr_t *)malloc(sizeof(COpr_t) * 2); + m->oprs[0] = a1->dest; + m->oprs[1] = a0->dest; + cblock_pappend(next_blk, m); + } + + r1 = ssa_exp_(p->chd->next, &else_t, NULL, succ); + compress_branch(r1, else_t, 0)->dest->info.imm = one_h->id + gbbase; + one_h->ref = 1; + + sblk = else_h; + for (t = p->chd; t->rec.subtype == OPT_OR; t = t->chd) + { + CBlock_t c_h = cblock_create(1), c_t = c_h; + ri = ssa_exp_(t->chd->next, &c_t, NULL, succ); + compress_branch(ri, c_t, 0)->dest->info.imm = one_h->id + gbbase; + cfg_add_edge(c_t, one_h); /* tail */ + DBLINK(c_t, sblk); + cfg_add_edge(c_t, sblk); /* connect to header */ + sblk = c_h; + } + + r0 = ssa_exp_(t, cur, NULL, succ); + compress_branch(r0, *cur, 0)->dest->info.imm = one_h->id + gbbase; + cfg_add_edge(*cur, one_h); + DBLINK(*cur, sblk); + cfg_add_edge(*cur, sblk); + + b = cinst_create(); + b->op = GOTO; + b->dest = copr_create(); + b->dest->kind = IMM; + b->dest->info.imm = next_blk->id + gbbase; + cblock_append(zero_t, b); + next_blk->ref = 1; + + DBLINK(else_t, zero_h); + DBLINK(zero_t, one_h); + DBLINK(one_t, next_blk); + + cfg_add_edge(else_t, zero_h); + cfg_add_edge(else_t, one_h); + cfg_add_edge(zero_t, next_blk); + cfg_add_edge(one_t, next_blk); + + *cur = next_blk; + return m->dest; + } + else if (op == '+' && IS_PTR(p->ext.type->type)) + { + COpr_t lhs = ssa_exp_(p->chd, cur, lval, succ), + rhs = ssa_exp_(p->chd->next, cur, lval, succ); + CInst_t index = cinst_create(); + CType_t et = p->chd->ext.type; + if (et->type == CPTR) + et = et->rec.ref; + else + et = et->rec.arr.elem; + index->op = MUL; + index->dest = copr_create(); + index->dest->kind = TMP; + index->dest->info.var = ctmp_create(); + index->dest->type = p->chd->next->ext.type; + index->src1 = rhs; + index->src2 = copr_create(); + index->src2->kind = IMM; + index->src2->info.imm = calc_size(et); + + inst->op = ADD; + inst->dest = copr_create(); + inst->dest->kind = TMP; + inst->dest->info.var = ctmp_create(); + inst->dest->type = p->ext.type; + inst->src1 = lhs; + inst->src2 = index->dest; + cblock_append(*cur, index); + cblock_append(*cur, inst); + return inst->dest; + } + else if (op == '-' && IS_PTR(p->chd->ext.type->type)) + { + CType_t nt = p->chd->next->ext.type; + CType_t et = p->chd->ext.type; + COpr_t lhs = ssa_exp_(p->chd, cur, lval, succ), + rhs = ssa_exp_(p->chd->next, cur, lval, succ); + CInst_t diff = cinst_create(); + + if (et->type == CPTR) + et = et->rec.ref; + else + et = et->rec.arr.elem; + + if (IS_PTR(nt->type)) + { + diff->op = SUB; + diff->dest = copr_create(); + diff->dest->kind = TMP; + diff->dest->info.var = ctmp_create(); + diff->dest->type = p->ext.type; + diff->src1 = lhs; + diff->src2 = rhs; + + inst->op = DIV; + inst->dest = copr_create(); + inst->dest->kind = TMP; + inst->dest->info.var = ctmp_create(); + inst->dest->type = p->ext.type; + inst->src1 = diff->dest; + inst->src2 = copr_create(); + inst->src2->kind = IMM; + inst->src2->info.imm = calc_size(et); + } + else + { + diff->op = MUL; + diff->dest = copr_create(); + diff->dest->kind = TMP; + diff->dest->info.var = ctmp_create(); + diff->dest->type = p->chd->next->ext.type; + diff->src1 = rhs; + diff->src2 = copr_create(); + diff->src2->kind = IMM; + diff->src2->info.imm = calc_size(et); + + inst->op = SUB; + inst->dest = copr_create(); + inst->dest->kind = TMP; + inst->dest->info.var = ctmp_create(); + inst->dest->type = p->ext.type; + inst->src1 = lhs; + inst->src2 = diff->dest; + } + cblock_append(*cur, diff); + cblock_append(*cur, inst); + return inst->dest; + } + else if (op == '&' && !p->chd->next) + { + ssa_exp_(p->chd, cur, inst, succ); + if (inst->op == MOVE) + { + inst->op = ADDR; + inst->src1 = inst->dest; + } + else + { + inst->op = ADD; + inst->src1 = inst->dest; + } + inst->dest = copr_create(); + inst->dest->kind = TMP; + inst->dest->info.var = ctmp_create(); + inst->dest->type = p->ext.type; + cblock_append(*cur, inst); + return inst->dest; + } + else + { + int unary = 0; + inst->op = (unsigned)-1; + switch (op) + { + case ASS_MUL: inst->op = MUL; break; + case ASS_DIV: inst->op = DIV; break; + case ASS_MOD: inst->op = MOD; break; + case ASS_ADD: inst->op = ADD; break; + case ASS_SUB: inst->op = SUB; break; + case ASS_SHL: inst->op = SHL; break; + case ASS_SHR: inst->op = SHR; break; + case ASS_AND: inst->op = AND; break; + case ASS_XOR: inst->op = XOR; break; + case ASS_OR: inst->op = OR; break; + case OPT_INC: inst->op = ADD; unary = 1; break; + case OPT_DEC: inst->op = SUB; unary = 1; break; + } + if (inst->op != (unsigned)-1) + { + CInst_t tins = cinst_create(); + ssa_exp_(p->chd, cur, tins, succ); /* as lval */ + inst->src1 = ssa_exp_(p->chd, cur, NULL, succ); /* as rval */ + if (unary) + { + inst->src2 = copr_create(); + inst->src2->kind = IMM; + inst->src2->info.imm = 1; + } + else + inst->src2 = ssa_exp_(p->chd->next, cur, NULL, succ); + if (tins->op == MOVE) + { + inst->dest = tins->dest; + cblock_append(*cur, inst); + free(tins); + return inst->dest; + } + else + { + CInst_t tins2 = cinst_create(); + inst->dest = copr_create(); + inst->dest->kind = TMP; + inst->dest->info.var = ctmp_create(); + inst->dest->type = p->ext.type; + tins->src1 = inst->dest; + tins2->op = ARR; + tins2->src1 = tins->dest; /* base */ + tins2->src2 = tins->src2; /* displacement */ + tins2->dest = copr_create(); + tins2->dest->kind = TMP; + tins2->dest->info.var = ctmp_create(); + tins2->dest->type = p->ext.type; + cblock_append(*cur, inst); + cblock_append(*cur, tins); + cblock_append(*cur, tins2); + return tins2->dest; + } + } + } + + switch (op) + { + case EXP_CAST: + { + res = ssa_exp_(p->chd->next, cur, lval, succ); + res->type = p->ext.type; + free(inst); + return res; + } + case EXP_POSTFIX: + free(inst); + return ssa_postfix(p, cur, lval, succ); + /* KW_SIZEOF is eliminated during semantic checking */ + default: + { + COpr_t lhs = ssa_exp_(p->chd, cur, lval, succ), + rhs = NULL; + if (p->chd->next) + rhs = ssa_exp_(p->chd->next, cur, lval, succ); + + inst->src1 = lhs; + inst->src2 = rhs; + switch (op) + { + case OPT_SHL: inst->op = SHL; break; + case OPT_SHR: inst->op = SHR; break; + case '|': inst->op = OR; break; + case '^': inst->op = XOR; break; + case OPT_EQ: inst->op = EQ; break; + case OPT_NE: inst->op = NE; break; + case '<': inst->op = LT; break; + case '>': inst->op = GT; break; + case OPT_LE: inst->op = LE; break; + case OPT_GE: inst->op = GE; break; + case '/': inst->op = DIV; break; + case '%': inst->op = MOD; break; + case '*': + inst->op = MUL; + break; + case '&': + inst->op = AND; + break; + case '+': + if (p->chd->next) + inst->op = ADD; + else res = lhs; + break; + case '-': + if (p->chd->next) + inst->op = SUB; + else + { + inst->op = NEG; + inst->src1 = lhs; + } + break; + case '~': + inst->op = NOR; + inst->src1 = lhs; + inst->src2 = copr_create(); + inst->src2->kind = IMM; + inst->src2->info.imm = 0; + break; + case '!': + inst->op = EQ; + inst->src1 = lhs; + inst->src2 = copr_create(); + inst->src2->kind = IMM; + inst->src2->info.imm = 0; + break; + default: + auto_dest = 0; + } + if (rec) + { + if (auto_dest) + { + inst->dest = copr_create(); + inst->dest->kind = TMP; + inst->dest->info.var = ctmp_create(); + inst->dest->type = p->ext.type; + } + cblock_append(*cur, inst); + res = inst->dest; + } + } + } + } + } + return res; +}/*}}}*/ + +COpr_t ssa_exp(CNode *p, CBlock_t *cur, int discard_last) { + CBlock_t succ = cblock_create(0); + COpr_t res = ssa_exp_(p, cur, NULL, succ); + CInst_t last; + { + CInst_t head = succ->insts, t; + while (head->next != head) + { + t = head->next; + cblock_popfront(succ); + cblock_append(*cur, t); + } + free(succ); + } + last = cblock_getback(*cur); + if (discard_last && last + && last->dest->kind == TMP + && last->op != CALL) /* temporary not used */ + { + ctmp_destroy(last->dest->info.var); + cblock_popback(*cur); + free(last); + } + return res; +} + +CBlock_t ssa_while(CNode *p, CBlock_t cur) {/*{{{*/ + CNode *exp = p->chd; + CBlock_t loop_h = cblock_create(1), loop_t, + cond_h= cblock_create(1), cond_t = cond_h, + next_blk = cblock_create(1); + CInst_t j_inst = cinst_create(); + COpr_t e = ssa_exp(exp, &cond_t, 0); + compress_branch(e, cond_t, 0)->dest->info.imm = loop_h->id + gbbase; + loop_h->ref = 1; + + DBLINK(cond_t, next_blk); + loop_t = ssa_stmt(exp->next, loop_h, next_blk); + + j_inst->op = GOTO; + j_inst->dest = copr_create(); + j_inst->dest->kind = IMM; + j_inst->dest->info.imm = cond_h->id + gbbase; + cond_h->ref = 1; + cblock_append(cur, j_inst); + + cfg_add_edge(cur, cond_h); + cfg_add_edge(cond_t, loop_h); + cfg_add_edge(loop_t, cond_h); + cfg_add_edge(cond_t, next_blk); + + DBLINK(cur, loop_h); + DBLINK(loop_t, cond_h); + + return next_blk; +}/*}}}*/ + +CBlock_t ssa_for(CNode *p, CBlock_t cur) {/*{{{*/ + CNode *exp1 = p->chd, + *exp2 = exp1->next, + *exp3 = exp2->next; + CBlock_t loop_h = cblock_create(1), loop_t, + cond_h = cblock_create(1), cond_t = cond_h, + next_blk = cblock_create(1); + CInst_t j_inst = cinst_create(); + COpr_t e = ssa_exp(exp2, &cond_t, 0); + compress_branch(e, cond_t, 0)->dest->info.imm = loop_h->id + gbbase; + loop_h->ref = 1; + + DBLINK(cond_t, next_blk); + loop_t = ssa_stmt(exp3->next, loop_h, next_blk); + + ssa_exp(exp1, &cur, 1); + ssa_exp(exp3, &loop_t, 1); + + j_inst->op = GOTO; + j_inst->dest = copr_create(); + j_inst->dest->kind = IMM; + j_inst->dest->info.imm = cond_h->id + gbbase; + cond_h->ref = 1; + cblock_append(cur, j_inst); + + cfg_add_edge(cur, cond_h); + cfg_add_edge(cond_t, loop_h); + cfg_add_edge(loop_t, cond_h); + cfg_add_edge(cond_t, next_blk); + + DBLINK(cur, loop_h); + DBLINK(loop_t, cond_h); + + return next_blk; +}/*}}}*/ + +CBlock_t ssa_if(CNode *p, CBlock_t cur, CBlock_t loop_exit) {/*{{{*/ + CNode *body1 = p->chd->next, + *body2 = body1->next; + CBlock_t then_blk, then_t, next_blk, + else_blk, else_t; + CInst_t if_inst; /* = cinst_create(); */ + COpr_t rt; + opt_if = p; + opt_if_loop_exit = loop_exit; + rt = ssa_exp(p->chd, &cur, 0); /* calculate cond */ + if (!opt_if) return cur; + else + { + opt_if = NULL; + opt_if_loop_exit = NULL; + } + if (rt->kind == IMM) + { + if (rt->info.imm) + return ssa_stmt(body1, cur, loop_exit); + else if (body2->type != NOP) + return ssa_stmt(body2, cur, loop_exit); + else + return cur; + } + then_blk = cblock_create(1); + if_inst = compress_branch(rt, cur, 1); + + cfg_add_edge(cur, then_blk); + DBLINK(cur, then_blk); + then_t = ssa_stmt(body1, then_blk, loop_exit); + if (body2->type != NOP) + { + CInst_t j_inst = cinst_create(); + j_inst->op = GOTO; + j_inst->dest = copr_create(); + j_inst->dest->kind = IMM; + + else_blk = cblock_create(1); + if_inst->dest->info.imm = else_blk->id + gbbase; + else_blk->ref = 1; + DBLINK(then_t, else_blk); + else_t = ssa_stmt(body2, else_blk, loop_exit); + if (cblock_isempty(else_t)) + next_blk = else_t; + else + { + next_blk = cblock_create(1); + DBLINK(else_t, next_blk); + cfg_add_edge(else_t, next_blk); + } + + j_inst->dest->info.imm = next_blk->id + gbbase; + next_blk->ref = 1; + cblock_append(then_t, j_inst); + + cfg_add_edge(cur, else_blk); + cfg_add_edge(then_t, next_blk); + } + else + { + if (cblock_isempty(then_t)) + next_blk = then_t; + else + { + next_blk = cblock_create(1); + DBLINK(then_t, next_blk); + cfg_add_edge(then_t, next_blk); + } + cfg_add_edge(cur, next_blk); + if_inst->dest->info.imm = next_blk->id + gbbase; + next_blk->ref = 1; + } + return next_blk; +}/*}}}*/ + +CBlock_t ssa_ret(CNode *p, CBlock_t cur) { + CInst_t inst = cinst_create(); + inst->op = RET; + if (p->chd->type != NOP) + inst->src1 = ssa_exp(p->chd, &cur, 0); + cblock_append(cur, inst); + return cur; +} + +CBlock_t ssa_break(CBlock_t cur, CBlock_t loop_exit) { + CInst_t inst = cinst_create(); + assert(loop_exit); + inst->op = GOTO; + inst->dest = copr_create(); + inst->dest->kind = IMM; + inst->dest->info.imm = loop_exit->id + gbbase; + loop_exit->ref = 1; + cblock_append(cur, inst); + cfg_add_edge(cur, loop_exit); + return cur; +} + +CBlock_t ssa_cont(CBlock_t cur, CBlock_t loop_exit) { + CInst_t inst = cinst_create(); + assert(loop_exit); + loop_exit = loop_exit->prev; /* loop cond */ + inst->op = GOTO; + inst->dest = copr_create(); + inst->dest->kind = IMM; + inst->dest->info.imm = loop_exit->id + gbbase; + loop_exit->ref = 1; + cblock_append(cur, inst); + cfg_add_edge(cur, loop_exit); + return cur; +} + +CBlock_t ssa_comp(CNode *, CBlock_t, CBlock_t loop_exit); +CBlock_t ssa_stmt(CNode *p, CBlock_t cur, CBlock_t loop_exit) { + switch (p->rec.subtype) + { + case STMT_EXP: + ssa_exp(p->chd, &cur, 1); + break; + case STMT_COMP: + cur = ssa_comp(p, cur, loop_exit); + break; + case STMT_IF: + return ssa_if(p, cur, loop_exit); + case STMT_FOR: + return ssa_for(p, cur); + case STMT_WHILE: + return ssa_while(p, cur); + case STMT_CONT: + return ssa_cont(cur, loop_exit); + case STMT_BREAK: + return ssa_break(cur, loop_exit); + case STMT_RET: + return ssa_ret(p, cur); + } + return cur; +} + +CBlock_t ssa_comp(CNode *p, CBlock_t cur, CBlock_t loop_exit) { + CNode *decls = p->chd, + *stmts = p->chd->next, *i; + if (decls->chd->type != NOP) + { + CVList_t a; + for (a = p->ext.autos; a; a = a->next) + { + CNode *initr = a->var->initr; + CInst_t last, inst; + if (!initr) continue; + assert(initr->rec.subtype == INITR_NORM); + inst = cinst_create(); + inst->src1 = ssa_exp(initr->chd, &cur, 0); + last = cblock_getback(cur); + if (last && last->dest->kind == TMP) + { + last->dest->kind = VAR; + free(last->dest->info.var); + free(inst); + last->dest->info.var = a->var; + last->dest->type = a->var->type; + } + else + { + inst->op = MOVE; + inst->dest = copr_create(); + inst->dest->kind = VAR; + inst->dest->info.var = a->var; + inst->dest->type = a->var->type; + cblock_append(cur, inst); + } + } + } + if (stmts->chd->type != NOP) + for (i = stmts->chd; i; i = i->next) + cur = ssa_stmt(i, cur, loop_exit); + return cur; +} + +int dom[MAX_BLOCK], ord[MAX_BLOCK], vis[MAX_BLOCK], par[MAX_BLOCK], ocnt; +int loop_tail[MAX_BLOCK]; +CPSet_t dfset[MAX_BLOCK], phi[MAX_BLOCK]; +CBList_t df[MAX_BLOCK]; + +void dfs(CBlock_t u, int v) { + CEdge *e; + par[u->id] = v; + vis[u->id] = -2; + for (e = cfg.head[u->id]; e; e = e->next) + { + CBlock_t v = e->to; + if (vis[v->id] == -1) + dfs(v, u->id); + else if (vis[v->id] == -2) + loop_tail[v->id] = u->id; + } + vis[u->id] = ocnt; + ord[ocnt++] = u->id; +} + +int intersect(int b1, int b2) { + while (b1 != b2) + if (b1 < b2) b1 = dom[b1]; + else b2 = dom[b2]; + return b1; +} + +void calc_dominant_frontier(void) { + int i; + int ch = 1; + ocnt = 0; + memset(vis, -1, sizeof vis); + memset(dom, -1, sizeof dom); + memset(loop_tail, -1, sizeof loop_tail); + dfs(entry, -1); + dom[vis[entry->id]] = vis[entry->id]; + while (ch) + { + int i; + ch = 0; + for (i = bcnt - 2; i >= 0; i--) + { + int id = ord[i]; + CEdge *e = cfg.rhead[id]; + int new_idom = vis[par[id]]; + for (; e; e = e->next) + { + int p = e->to->id; + if (vis[p] == new_idom) continue; + if (dom[vis[p]] != -1) + new_idom = intersect(vis[p], new_idom); + } + if (dom[i] != new_idom) + { + ch = 1; + dom[i] = new_idom; + } + } + } + for (i = 0; i < bcnt; i++) + dfset[i] = cpset_create(); + for (i = 0; i < bcnt; i++) + if (cfg.rhead[i] && cfg.rhead[i]->next) + { + CEdge *p = cfg.rhead[i]; + for (; p; p = p->next) + { + int runner = p->to->id; + while (vis[runner] != dom[vis[i]]) + { + if (!cpset_belongs(dfset[runner], i)) + { + CBList_t np = NEW(CBList); + np->cblk = blks[i]; + np->next = df[runner]; + cpset_insert(dfset[runner], i); + df[runner] = np; + } + runner = ord[dom[vis[runner]]]; + } + } + } + for (i = 1; i < bcnt; i++) + dtree_add_edge(blks[ord[dom[vis[i]]]], blks[i]); +} + +void insert_phi(CVList_t vars) { + CVList_t vp; + int i; + for (i = 0; i < bcnt; i++) + phi[i] = cpset_create(); + for (vp = vars; vp; vp = vp->next) + { /* for each variable */ + CVar_t var = vp->var; + CBList_t t = var->defsite; + CBList_t def = NULL; + static int indef[MAX_BLOCK]; + for (t = var->defsite; t; t = t->next) + if (++indef[t->cblk->id] == 1) + { + CBList_t p = NEW(CBList); + p->cblk = t->cblk; + p->next = def; + def = p; + } + for (t = var->defsite; t; t = t->next) + indef[t->cblk->id] = 0; /* clear */ + while (def) /* while def not empty */ + { + CBList_t n = def, i; /* remove some node n from def */ + def = def->next; + for (i = df[n->cblk->id]; i; i = i->next) + { + CBlock_t y = i->cblk; + CPSet_t phiy = phi[y->id]; + if (!cpset_belongs(phiy, (uintptr_t)var)) + { + CPhi_t phi = NEW(CPhi); + CBList_t ndef; + phi->dest = copr_create(); + phi->dest->kind = VAR; + phi->dest->info.var = var; + phi->dest->type = var->type; + phi->oprs = (COpr_t *)malloc(sizeof(COpr_t) * y->pred); + cblock_pappend(y, phi); + cpset_insert(phiy, (uintptr_t)var); + ndef = NEW(CBList); + ndef->cblk = y; + ndef->next = def; + def = ndef; + } + } + } + } + for (i = 0; i < bcnt; i++) + { + CBList_t p, np; + for (p = df[i]; p; p = np) + { + np = p->next; + free(p); + } + df[i] = NULL; + if (phi[i]) cpset_destroy(phi[i]); + if (dfset[i]) cpset_destroy(dfset[i]); + } +} + +void renaming_dfs(CBlock_t blk) { + CInst_t ih = blk->insts, i; + CPhi_t ph = blk->phis, pi; + CEdge *e = cfg.head[blk->id]; + COList_t defl = NULL, dn; + for (pi = ph->next; pi != ph; pi = pi->next) + { + COpr_t dest = pi->dest; + CVar_t var = dest->info.var; + COList_t n = NEW(COList), n2; + dest->sub = var->cnt++; +/* dest->def = ih->next; *//* the first inst */ + n->opr = dest; + n->next = var->stack; + var->stack = n; + n2 = NEW(COList); + n2->opr = dest; + n2->next = defl; + defl = n2; + } + for (i = ih->next; i != ih; i = i->next) + { + COpr_t dest = i->dest; + COpr_t *opr[3] = {NULL, &(i->src1), &(i->src2)}; + int t; + if (i->op == WARR) + opr[0] = &(i->dest); + for (t = 0; t < 3; t++) + { + COpr_t p; + if (!opr[t]) continue; + p = *(opr[t]); + if (!(p && (p->kind == VAR || p->kind == TMP))) continue; + /* free(p); */ /* memory leak */ + (*opr[t] = p->info.var->stack->opr)->type = p->type; + (*opr[t])->info.var->weight++; + } + if (dest) + { + if (i->op == WARR) + i->dest = dest->info.var->stack->opr; + else + { + if (dest->kind == VAR || dest->kind == TMP) + { + CVar_t var = dest->info.var; + COList_t n = NEW(COList), n2; + dest->sub = var->cnt++; + /* dest->def = i; */ + n->opr = dest; + n->next = var->stack; + var->stack = n; + n2 = NEW(COList); + n2->opr = dest; + n2->next = defl; + defl = n2; + } + } + } + } + for (; e; e = e->next) /* for each successor */ + { + CBlock_t y = e->to; + int j = 0; + CEdge *pre = cfg.rhead[y->id]; + for (; pre->to != blk; pre = pre->next) j++; + ph = y->phis; + for (pi = ph->next; pi != ph; pi = pi->next) + { + if (pi->dest->kind == VAR) + pi->oprs[j] = pi->dest->info.var->stack->opr; + pi->oprs[j]->dep = 1; + } + } + for (e = dtree.head[blk->id]; e; e = e->next) + renaming_dfs(e->to); + for (; defl; defl = dn) + { + CVar_t var = defl->opr->info.var; + COList_t nf = var->stack->next; + free(var->stack); + var->stack = nf; + dn = defl->next; + free(defl); + } +} + +void renaming_vars(COList_t oprs) { + COList_t p; + for (p = oprs; p; p = p->next) + if (p->opr->kind == VAR) + { + CInst_t ld = cinst_create(); + CVar_t var = p->opr->info.var; + var->cnt = 0; + var->reload = var->loc > 0 && var->type->type != CARR; + ld->op = LOAD; + ld->dest = copr_create(); + ld->dest->kind = VAR; + ld->dest->info.var = var; + ld->dest->type = var->type; + cblock_pushfront(entry, ld); + } + renaming_dfs(entry); +} + +void mark_insts(void) { + int i, icnt = 0; + for (i = bcnt - 1; i >= 0; i--) + { + CBlock_t b = blks[ord[i]]; + CInst_t ih = b->insts, ii; + CPhi_t ph = b->phis, pi; + for (pi = ph->next; pi != ph; pi = pi->next) + { + pi->dest->def = ih->next; + icnt++; + } + if (cblock_isempty(b)) + b->first = b->last = icnt++; + else + { + for (ii = ih->next; ii != ih; ii = ii->next) + { + if (ii->op != WARR && ii->dest) + ii->dest->def = ii; + ii->id = icnt++; + } + b->first = ih->next->id; + b->last = ih->prev->id; + } + } +} + +CPSet_t liveset[MAX_BLOCK]; +COList_t live[MAX_BLOCK]; + +CRange_t crange_merge(CRange_t a, CRange_t b) { + CRange res; + CRange_t tail = &res; + res.next = NULL; + res.r = -1; + for (; a || b;) + { + if (a && (!b || (a->l < b->l || (a->l == b->l && a->r < b->r)))) + { + if (tail->r >= a->l) + { + if (a->r > tail->r) + tail->r = a->r; + } + else + { + assert(tail->r < a->l); + tail->next = a; + tail = a; + } + a = a->next; + } + else + { + if (tail->r >= b->l) + { + if (b->r > tail->r) + tail->r = b->r; + } + else + { + assert(tail->r < b->l); + tail->next = b; + tail = b; + } + b = b->next; + } + } + tail->next = NULL; + return res.next; +} + +void add_range_(COpr_t opr, int begin, int end) { + CRange_t range; + range = NEW(CRange); + range->l = begin; + range->r = end; + range->next = NULL; + opr->range = crange_merge(opr->range, range); +} + +void add_range(COpr_t opr, CBlock_t blk, int end) { + int dfid = opr->def->id; + int begin; + if (blk->first <= dfid && dfid <= blk->last) + begin = dfid; + else + begin = blk->first; + add_range_(opr, begin, end); +} + +void build_intervals(void) { + int i; + for (i = 0; i < bcnt; i++) + liveset[i] = cpset_create(); + for (i = 0; i < bcnt; i++) + { + int id = ord[i]; + CBlock_t b = blks[id]; + CEdge *e = cfg.head[id]; + CPSet_t curlive = liveset[id]; + for (; e; e = e->next) + { + int sid = e->to->id; + CBlock_t s = blks[sid]; + COList_t p = live[sid]; + CPhi_t ph = s->phis, i; + for (i = ph->prev; i != ph; i = i->prev) + { + CEdge *pe; + int t; + for (t = 0, pe = cfg.rhead[sid]; pe->to != b; pe = pe->next) t++; + COpr_t opr = i->oprs[t]; + if (opr && + (opr->kind == VAR || + opr->kind == TMP) && !cpset_belongs(curlive, (uintptr_t)opr)) + { + COList_t np = NEW(COList); + np->opr = opr; + np->next = live[id]; + live[id] = np; + cpset_insert(curlive, (uintptr_t)opr); + } + } + for (; p; p = p->next) + if (cpset_belongs(liveset[sid], (uintptr_t)p->opr) && + cpset_insert(curlive, (uintptr_t)p->opr)) + { + COList_t np = NEW(COList); + np->opr = p->opr; + np->next = live[id]; + live[id] = np; + } + } + { + COList_t p; + for (p = live[id]; p; p = p->next) + add_range(p->opr, b, b->last + 1); + } + { + CInst_t ih = b->insts, i; + for (i = ih->prev; i != ih; i = i->prev) + { + int t; + COpr_t oprs[3] = {NULL, i->src1, i->src2}; + if (i->dest && + (i->dest->kind == VAR || + i->dest->kind == TMP) && i->op != WARR) /* def */ + { + i->is_def = 1; + cpset_erase(curlive, (uintptr_t)i->dest); + } + else + { + if (i->op == WARR) + oprs[0] = i->dest; + i->is_def = 0; + } + for (t = 0; t < 3; t++) + { + COpr_t opr = oprs[t]; + if (opr && + (opr->kind == VAR || + opr->kind == TMP) && !cpset_belongs(curlive, (uintptr_t)opr)) + { + COList_t np = NEW(COList); + np->opr = opr; + np->next = live[id]; + live[id] = np; + cpset_insert(curlive, (uintptr_t)opr); + add_range(opr, b, i->id); + } + } + } + } + { + CPhi_t ph = b->phis, i; + for (i = ph->prev; i != ph; i = i->prev) + cpset_erase(curlive, (uintptr_t)i->dest); + } + if (loop_tail[id] != -1) + { + COList_t p; + for (p = live[id]; p; p = p->next) + if (cpset_belongs(curlive, (uintptr_t)p->opr)) + add_range_(p->opr, b->first, blks[loop_tail[id]]->last + 1); + } + } + for (i = 0; i < bcnt; i++) + { + COList_t p = live[i], np; + for (; p; p = np) + { + np = p->next; + free(p); + } + live[i] = NULL; + if (liveset[i]) cpset_destroy(liveset[i]); + } +} + +COpr_t cinterv_repr(COpr_t opr) { + return opr->par == opr ? opr : (opr->par = cinterv_repr(opr->par)); +} + +void cinterv_union(COpr_t a, COpr_t b) { + a = cinterv_repr(a); + b = cinterv_repr(b); +#ifdef CIBIC_DEBUG + fprintf(stderr, "merging "); + copr_print(stderr, a); + fprintf(stderr, " "); + copr_print(stderr, b); + fprintf(stderr, "\n"); +#endif + if (a == b) return; + b->range = crange_merge(b->range, a->range); + a->par = b; + b->mod |= a->mod; +} + +void init_def(void) { + CBlock_t p; + COList_t def; + raw_defs = NULL; + for (p = entry; p; p = p->next) + { + CInst_t i, ih = p->insts; + CPhi_t pi, ph = p->phis; + for (i = ih->next; i != ih; i = i->next) + if (i->is_def) + { + def = NEW(COList); + def->opr = i->dest; + def->next = raw_defs; + raw_defs = def; + } + for (pi = ph->next; pi != ph; pi = pi->next) + { + def = NEW(COList); + def->opr = pi->dest; + def->next = raw_defs; + raw_defs = def; + } + } + for (p = entry; p; p = p->next) + { + CPhi_t pi, ph = p->phis; + for (pi = ph->next; pi != ph; pi = pi->next) + { + int i; + for (i = 0; i < p->pred; i++) + cinterv_union(pi->dest, pi->oprs[i]); + } + } + for (def = raw_defs; def; def = def->next) + { + COpr_t opr = def->opr; + opr->spill = cinterv_repr(opr); + } + /* coalescing */ + for (p = entry; p; p = p->next) + { + CInst_t i, ih = p->insts; + for (i = ih->next; i != ih; i = i->next) + if (i->op == MOVE && i->dest->kind == TMP && + (i->src1->kind == TMP || i->src1->kind == VAR)) + cinterv_union(i->dest, i->src1); + } +} + +void colist_remove(COList_t node) { + node->prev->next = node->next; + node->next->prev = node->prev; +} + +void colist_add(COList_t head, COList_t p) { + (p->next = head->next)->prev = p; + (p->prev = head)->next = p; +} + +int overlap_with_beg(COpr_t i, int beg) { + CRange_t r; + for (r = i->range; r && r->l <= beg; r = r->next) + if (r->r > beg) return 1; + return 0; +} + +int overlap_with_interv(COpr_t i, COpr_t cur) { + CRange_t pi, pc; + for (pi = i->range, pc = cur->range; pi && pc;) + { + if (pi->r <= pc->l) pi = pi->next; + else if (pc->r <= pi->l) pc = pc->next; + else return 1; + } + return 0; +} + +int copr_comp(const void *a, const void *b) { + return (*(COpr_t *)a)->range->l - (*(COpr_t *)b)->range->l; +} + +const int avail_regs[] = {8, 9, 10, 11, + 12, 13, 14, 15, + 16, 17, 24, 25}; + +const int MAX_AVAIL_REGS = sizeof(avail_regs) / sizeof(avail_regs[0]); + +void register_alloc(void) { + /* Algorithm from the paper: + * Linear Scan Register Allocation + * in the Context of SSA Form and Register Constraints */ + static int freg[32], f[32]; + int dn = 0, i; + COpr_t *unhandled; + COList_t p; + COList_t active = NEW(COList), + inactive = NEW(COList); + active->next = active->prev = active; + inactive->next = inactive->prev = inactive; + memset(freg, -1, sizeof freg); + init_def(); + for (i = 0; i < MAX_AVAIL_REGS; i++) + freg[avail_regs[i]] = 1; /* available */ + for (p = raw_defs; p; p = p->next) + { + COpr_t opr = p->opr; + /* + if (opr->info.var->loc < 0) + { + opr->reg = 3 - opr->info.var->loc; + continue; + } */ /* arguments */ + opr->reg = -2; + if (opr->par != opr) continue; + if (cinterv_repr(opr)->range) + { + opr->reg = -1; + dn++; + } + } + unhandled = (COpr_t *)malloc(dn * sizeof(COpr_t)); + i = 0; + for (p = raw_defs; p; p = p->next) + if (p->opr->reg == -1) + unhandled[i++] = p->opr; + for (i = 0; i < dn; i++) + { + COpr_t opr = unhandled[i]; + CRange_t r; + for (r = opr->range; r->next; r = r->next); + opr->begin = opr->range->l; + opr->end = r->r; + } + qsort(unhandled, dn, sizeof(COpr_t), copr_comp); + /* preparation done */ + for (i = 0; i < dn; i++) + { + COList_t c = NEW(COList); + COpr_t cur; + COList_t np; + int reg, t; + cur = c->opr = unhandled[i]; + /* for each interval in active */ + for (p = active->next; p != active; p = np) + { + COpr_t i = p->opr; + np = p->next; + if (i->end <= cur->begin) /* if i ends before cur.beg */ + { + colist_remove(p); + free(p); /* move i to handled */ + freg[i->reg] = 1; /* add i.reg to free */ + } + else if (!overlap_with_beg(i, cur->begin)) + { + colist_remove(p); + colist_add(inactive, p); /* move i to inactive */ + freg[i->reg] = 1; /* add i.reg to free */ + } + } + /* for each interval i in inactive */ + for (p = inactive->next; p != inactive; p = np) + { + COpr_t i = p->opr; + np = p->next; + if (i->end <= cur->begin) /* if i ends before cur.beg */ + { + colist_remove(p); + free(p); /* move i to handled */ + } + else if (overlap_with_beg(i, cur->begin)) + { + colist_remove(p); + colist_add(active, p); /* move i to active */ + freg[i->reg] = 0; /* remove i.reg from free */ + } + } + memmove(f, freg, sizeof f); + /* for each interval i in inactive that overlaps cur do + * f <- f - {i.reg} */ + for (p = inactive->next; p != inactive; p = p->next) + if (overlap_with_interv(p->opr, cur)) + f[p->opr->reg] = 0; + reg = -1; + for (t = 0; t < 32; t++) + if (f[t] > 0) { reg = t; break; } + if (reg == -1) /* if f = {} */ + { /* assign mem loc */ + static int w[32]; + int min = 0x7fffffff; + memset(w, 0, sizeof w); + for (p = active->next; p != active; p = p->next) + if (overlap_with_interv(p->opr, cur)) + w[p->opr->reg] += p->opr->info.var->weight; + for (p = inactive->next; p != inactive; p = p->next) + if (overlap_with_interv(p->opr, cur)) + w[p->opr->reg] += p->opr->info.var->weight; + for (t = 0; t < 32; t++) + if (f[t] != -1 && w[t] < min) min = t, reg = t; + if (reg == -1 || cur->info.var->weight < w[reg]) + { + cur->reg = -1; /* assign a memory location to cur */ + free(c); /* and move cur to handled */ + } + else + { + /* move all active or inactive intervals to which r was assigned to handled + * assign memory locations to them */ + for (p = active->next; p != active; p = np) + { + np = p->next; + if (p->opr->reg == reg) + { + p->opr->reg = -1; + colist_remove(p); + free(p); + } + } + for (p = inactive->next; p != inactive; p = np) + { + np = p->next; + if (p->opr->reg == reg) + { + p->opr->reg = -1; + colist_remove(p); + free(p); + } + } + cur->reg = reg; + colist_add(active, c); /* move cur to active */ + } + } + else if (cur->mod) /* may be referenced by a pointer */ + { + cur->reg = -1; /* assign a memory location to cur */ + free(c); /* and move cur to handled */ + } + else + { + cur->reg = reg; /* cur.reg <- any register in f */ + freg[reg] = 0; /* free <- free - {cur.reg} */ + colist_add(active, c); /* move cur to active */ + } + } + for (p = raw_defs; p; p = p->next) + { + COpr_t opr = p->opr; +/* opr->spill = cinterv_repr(opr); */ + if (cinterv_repr(opr)->range) + opr->reg = cinterv_repr(opr)->reg; + } + defs = NULL; + for (i = 0; i < dn; i++) + { + COList_t p = NEW(COList); + p->opr = unhandled[i]; + p->next = defs; + defs = p; + } + if (!quiet) + { + for (i = 0; i < dn; i++) + { + COpr_t opr = unhandled[i]; + CRange_t p; + copr_print(stderr, opr); + fprintf(stderr, ":"); + for (p = opr->range; p; p = p->next) + fprintf(stderr, " [%d, %d)", p->l, p->r); + fprintf(stderr, " (begin: %d, end: %d, weight: %d, reg: %d)\n", + opr->begin, opr->end, opr->info.var->weight, opr->reg); + } + } + free(unhandled); +} + +void const_propagation(void) { +#define IS_STATIC(opr) (!((opr)->mod || (opr)->info.var->reload)) + int i; + for (i = bcnt - 1; i >= 0; i--) + { + CBlock_t b = blks[vis[i]]; + CInst_t i, ni, ih = b->insts; + for (i = ih->next; i != ih; i = ni) + { + int flag = 0; + COpr_t t; + if (i->op == ADDR) + i->src1->mod = 1; + if (i->src1 && i->src1->cval && IS_STATIC(i->src1)) + { + t = i->src1->cval; + i->src1 = copr_create(); + *(i->src1) = *t; + } + if (i->src2 && i->src2->cval && IS_STATIC(i->src2)) + { + t = i->src2->cval; + i->src2 = copr_create(); + *(i->src2) = *t; + } + if (!i->dest) + { + ni = i->next; + continue; + } + if (i->src1 && i->src1->kind == IMM) + { + if (i->op == MOVE) + flag = 1; + else if (i->op == NEG) + { + i->op = MOVE; + i->src1->info.imm *= -1; + flag = 1; + } + else if (i->src2 && i->src2->kind == IMM) + { + COpr_t c; + int immd; + int imm1 = i->src1->info.imm; + int imm2 = i->src2->info.imm; + flag = 1; + switch (i->op) + { + case MUL: immd = imm1 * imm2; break; + case DIV: immd = imm1 / imm2; break; + case MOD: immd = imm1 % imm2; break; + case ADD: immd = imm1 + imm2; break; + case SUB: immd = imm1 - imm2; break; + case SHL: immd = imm1 << imm2; break; + case SHR: immd = imm1 >> imm2; break; + case AND: immd = imm1 & imm2; break; + case XOR: immd = imm1 ^ imm2; break; + case OR: immd = imm1 | imm2; break; + case NOR: immd = ~(imm1 | imm2); break; + case EQ: immd = imm1 == imm2; break; + case NE: immd = imm1 != imm2; break; + case LT: immd = imm1 < imm2; break; + case GT: immd = imm1 > imm2; break; + case LE: immd = imm1 <= imm2; break; + case GE: immd = imm1 >= imm2; break; + default: flag = 0; + } + if (flag) + { + c = copr_create(); + c->kind = IMM; + free(i->src1); + free(i->src2); + i->op = MOVE; + i->src1 = c; + c->info.imm = immd; + } + } + } + ni = i->next; + if (flag) + { + i->dest->cval = i->src1; + if (i->dest->kind == TMP && !i->dest->dep) + { + i->next->prev = i->prev; + i->prev->next = i->next; + free(i); + } + } + } + } +} + +void strength_reduction(void) { +#define SWAP_IMM \ + do { \ + if (i->src1->kind == IMM) \ + { \ + COpr_t t = i->src1; \ + i->src1 = i->src2; \ + i->src2 = t; \ + } \ + } while (0) + +#define PRINT_BARE_STRING \ + do { \ + if (bp != buff) \ + { \ + CInst_t print = cinst_create(); \ + CSList_t cstr = NEW(CSList); \ + print->op = CALL; \ + print->dest = copr_create(); \ + print->dest->kind = TMP; \ + print->dest->info.var = ctmp_create(); \ + print->dest->type = i->dest->type; \ + print->src1 = copr_create(); \ + print->src1->kind = IMMF; \ + *bp = '\0'; \ + print->src1->kind = IMMF; \ + cstr->str = strdup(buff); \ + (cstr->prev = cstrs->prev)->next = cstr; \ + (cstr->next = cstrs)->prev = cstr; \ + cstr->id = cstr->prev->id + 1; \ + inst = cinst_create(); \ + inst->op = PUSH; \ + inst->sysp = 1; \ + inst->src1 = copr_create(); \ + inst->src1->kind = IMMS; \ + inst->src1->info.cstr = cstr; \ + cblock_append(ibuff, inst); \ + print->src1->info.str = "__print_string"; \ + cblock_append(ibuff, print); \ + bp = buff; \ + } \ + } while (0) + + int i; + int call_cnt = 0; + for (i = bcnt - 1; i >= 0; i--) + { + CBlock_t b = blks[vis[i]]; + CInst_t i, ni, ih = b->insts; + for (i = ih->next; i != ih; i = ni) + { + ni = i->next; + switch (i->op) + { + case ADD: + SWAP_IMM; + case SUB: + if (i->src2->kind == IMM && !i->src2->info.imm) + { + i->op = MOVE; + i->src2 = NULL; + } + break; + case MUL: + SWAP_IMM; + if (i->src2->kind == IMM) + { + int p = 0, n = i->src2->info.imm; + while (!(n & 1)) n >>= 1, p++; + if (n == 1) + { + i->op = SHL; + i->src2 = copr_create(); + i->src2->kind = IMM; + i->src2->info.imm = p; + } + } + break; + case DIV: + SWAP_IMM; + if (i->src2->kind == IMM) + { + int p = 0, n = i->src2->info.imm; + while (!(n & 1)) n >>= 1, p++; + if (n == 1) + { + i->op = SHR; + i->src2 = copr_create(); + i->src2->kind = IMM; + i->src2->info.imm = p; + } + } + break; + case PUSH: + call_cnt++; + break; + case CALL: + if (i->src1->kind == IMMF && + !strcmp(i->src1->info.str, "printf")) + { + static char buff[1024]; + char *bp = buff, *sp; + CInst_t push = i, inst, phead, np; + CBlock_t ibuff; + CSList_t fmt; + /* move to the first push */ + while (call_cnt--) push = push->prev; + if (push->src1->kind != IMMS) break; /* not a const pattern */ + ibuff = cblock_create(0); + fmt = push->src1->info.cstr; + push->prev->next = i->next; + i->next->prev = push->prev; + phead = push->prev; + np = push->next; + free(push); + push = np; + for (sp = fmt->str; *sp != '\0'; sp++) + { + if (*sp == '%') + { + CInst_t print = cinst_create(); + print->op = CALL; + print->dest = copr_create(); + print->dest->kind = TMP; + print->dest->info.var = ctmp_create(); + print->dest->type = i->dest->type; + print->src1 = copr_create(); + print->src1->kind = IMMF; + PRINT_BARE_STRING; + switch (*(++sp)) + { + case 'd': + { + if (push != i) + { + np = push->next; + push->sysp = 1; + cblock_append(ibuff, push); + push = np; + } + print->src1->info.str = "__print_int"; + cblock_append(ibuff, print); + } + break; + case 'c': + { + if (push != i) + { + np = push->next; + push->sysp = 1; + cblock_append(ibuff, push); + push = np; + } + print->src1->info.str = "__print_char"; + cblock_append(ibuff, print); + } + break; + case 's': + { + + if (push != i) + { + np = push->next; + push->sysp = 1; + cblock_append(ibuff, push); + push = np; + } + print->src1->info.str = "__print_string"; + cblock_append(ibuff, print); + } + break; + default: + { + CSList_t cstr = NEW(CSList); + cstr->str = strdup(sp - 1); + (cstr->prev = cstrs->prev)->next = cstr; + (cstr->next = cstrs)->prev = cstr; + cstr->id = cstr->prev->id + 1; + inst = cinst_create(); + inst->op = PUSH; + inst->src1 = copr_create(); + inst->src1->kind = IMMS; + inst->src1->info.cstr = cstr; + cblock_append(ibuff, inst); + for (; push != i; push = np) + { + np = push->next; + cblock_append(ibuff, push); + } + print->src1->info.str = "printf"; + cblock_append(ibuff, print); + sp = fmt->str + strlen(fmt->str) - 1; + } + break; + } + } + else *bp++ = *sp; + } + PRINT_BARE_STRING; + fmt->prev->next = fmt->next; + fmt->next->prev = fmt->prev; + free(fmt); + (i->next->prev = ibuff->insts->prev)->next = i->next; + (phead->next = ibuff->insts->next)->prev = phead; + free(ibuff); + } + call_cnt = 0; + break; + default: ; + } + } + } +} + +unsigned int copr_hash(COpr_t opr) { + if (!opr) return 0; + switch (opr->kind) + { + case VAR: + case TMP: + return (unsigned long)opr; + default: + return (unsigned int)opr->info.imm; + } +} + +int copr_eq(COpr_t a, COpr_t b) { + if (a->kind != b->kind) return 0; + switch (a->kind) + { + case VAR: + case TMP: + return a == b; + case IMM: + return a->info.imm == b->info.imm; + case IMMS: + return a->info.cstr == b->info.cstr; + case IMMF: + return !strcmp(a->info.str, b->info.str); + default: + return 0; + } +} + +unsigned int cexpmap_hash(CInst_t exp) { + unsigned int res = 0; + res = ((unsigned int)exp->op) << 10; + res ^= copr_hash(exp->src1); + res = (res << 1) ^ copr_hash(exp->src2); + return res; +} + +CExpMap_t cexpmap_create(void) { + CExpMap_t res = NEW(CExpMap); + memset(res->head, 0, sizeof res->head); + return res; +} + +CExpMap_t cexpmap_dup(CExpMap_t cem) { + int i; + CExpMap_t res = cexpmap_create(); + for (i = 0; i < MAX_TABLE_SIZE; i++) + { + CENode *p, *t; + for (p = cem->head[i]; p; p = p->next) + { + t = NEW(CENode); + t->exp = p->exp; + t->next = res->head[i]; + res->head[i] = t; + } + } + return res; +} + +int cexpmap_comp(CInst_t exp1, CInst_t exp2) { + if (exp1->op != exp2->op) return 0; + if (!copr_eq(exp1->src1, exp2->src1)) return 0; + return copr_eq(exp1->src2, exp2->src2); +} + +void cexpmap_insert(CExpMap_t cem, CInst_t exp) { + int hv = cexpmap_hash(exp) % MAX_TABLE_SIZE; + CENode *np = NEW(CENode); + np->exp = exp; + np->next = cem->head[hv]; + cem->head[hv] = np; +} + +CInst_t cexpmap_lookup(CExpMap_t cem, CInst_t exp) { + int hv = cexpmap_hash(exp) % MAX_TABLE_SIZE; + CENode *p; + for (p = cem->head[hv]; p; p = p->next) + if (cexpmap_comp(p->exp, exp)) + return p->exp; + return NULL; +} + +void cexpmap_clear(CExpMap_t cem) { + int i; + CENode *p, *np; + for (i = 0; i < MAX_TABLE_SIZE; cem->head[i++] = NULL) + for (p = cem->head[i]; p; p = np) + { + np = p->next; + free(p); + } +} + +void cexpmap_destroy(CExpMap_t cem) { + cexpmap_clear(cem); + free(cem); +} +void copr_shortcut(COpr_t *opr) { + COpr_t t = *opr; + if (!t) return; + t = t->same; + if (t->kind == TMP) + *opr = t->same; +} + +void subexp_elimination_(CBlock_t b, CExpMap_t cem) { + CInst_t i, ih = b->insts, ni; + CEdge *e; + for (i = ih->next; i != ih; i = ni) + { + CInst_t t; + ni = i->next; + if (i->op == MOVE) + { + i->dest->same = i->src1->same; + continue; + } + else if (i->op == CALL) + { + cexpmap_clear(cem); + continue; + } + copr_shortcut(&i->src1); + copr_shortcut(&i->src2); + t = cexpmap_lookup(cem, i); + if (t) + { + i->op = MOVE; + i->src1 = t->dest; + i->src2 = NULL; + i->dest->same = i->src1; + } + else + { + switch (i->op) + { + case MUL: case DIV: case MOD: case ADD: case SUB: + case SHL: case SHR: case AND: case XOR: case OR: case NOR: + case EQ: case NE: case LT: case GT: case LE: case GE: + case NEG: + if (i->dest->kind == TMP) cexpmap_insert(cem, i); + break; + default: ; + } + } + } + for (e = dtree.head[b->id]; e; e = e->next) + { + CExpMap_t scem = cexpmap_dup(cem); + subexp_elimination_(e->to, scem); + cexpmap_destroy(scem); + } +} + +void subexp_elimination(void) { + CExpMap_t cem = cexpmap_create(); + subexp_elimination_(entry, cem); + cexpmap_destroy(cem); +} + +void deadcode_elimination() { + int i; + for (i = bcnt - 1; i >= 0; i--) + { + CBlock_t b = blks[vis[i]]; + CInst_t i, ih = b->insts; + for (i = ih->next; i != ih; i = i->next) + { + if (i->src1) i->src1->dep = 1; + if (i->src2) i->src2->dep = 1; + if (i->op == WARR && i->dest) i->dest->dep = 1; + } + } + for (i = bcnt - 1; i >= 0; i--) + { + CBlock_t b = blks[vis[i]]; + CInst_t i, ih = b->insts; + for (i = ih->next; i != ih; i = i->next) + if (i->op != CALL && i->dest && i->dest->kind == TMP && !i->dest->dep) + { + i->next->prev = i->prev; + i->prev->next = i->next; + free(i); + } + } +} + +void ssa_func(CType_t func) { +#define OPRS_ADD(_opr) \ + do { \ + if (cpset_insert(avs, (uintptr_t)((_opr)->info.var))) \ + { \ + COList_t n = NEW(COList); \ + n->next = oprs; \ + n->opr = _opr; \ + oprs = n; \ + } \ + } while (0) + +#define VS_ADD(_d) \ + do { \ + if (cpset_insert(vs, (uintptr_t)(_d))) \ + { \ + CVList_t n = NEW(CVList); \ + n->next = vars; \ + n->var = _d; \ + vars = n; \ + } \ + } while (0) + + CBlock_t p; + entry = cblock_create(1); + CPSet_t vs = cpset_create(), avs = cpset_create(); + CVList_t vars = NULL; + COList_t oprs = NULL; + /* CVar_t pr; */ + cfg_clear(); + dtree_clear(); + ssa_comp(func->rec.func.body, entry, NULL); + /* + for (i = 0, pr = func->rec.func.params; + i < 4 && pr; + i++, pr = pr->next) + pr->loc = -(i + 1); */ /* mark arguments */ + + for (p = entry; p; p = p->next) + { + CInst_t head = p->insts, i; + CEdge *e; + p->pred = 0; + for (e = cfg.rhead[p->id]; e; e = e->next) + p->pred++; + for (i = head->next; i != head; i = i->next) + { + if (i->src1 && (i->src1->kind == VAR || i->src1->kind == TMP)) + OPRS_ADD(i->src1); + if (i->src2 && (i->src2->kind == VAR || i->src2->kind == TMP)) + OPRS_ADD(i->src2); + if (i->op == WARR) + OPRS_ADD(i->dest); + else if (i->dest && i->dest->kind == VAR) + { + CVar_t d = i->dest->info.var; + CBList_t b = NEW(CBList); + VS_ADD(d); + OPRS_ADD(i->dest); + b->next = d->defsite; + b->cblk = p; + d->defsite = b; + } + } + blks[p->id] = p; + } + cpset_destroy(avs); + cpset_destroy(vs); + calc_dominant_frontier(); + /* build SSA */ + insert_phi(vars); + renaming_vars(oprs); + /* optimization on SSA */ + const_propagation(); + subexp_elimination(); + const_propagation(); + strength_reduction(); + deadcode_elimination(); + /* out of SSA */ + mark_insts(); + build_intervals(); + register_alloc(); +} @@ -0,0 +1,167 @@ +#ifndef SSA_H +#define SSA_H +#include "const.h" +#include "semantics.h" + +typedef struct CInst CInst; +typedef CInst *CInst_t; + +typedef struct CRange CRange; +typedef struct CRange *CRange_t; +struct CRange { + int l, r; /* [l, r) */ + CRange_t next; +}; + +typedef struct COpr COpr; +typedef COpr *COpr_t; +struct COpr { + enum { + VAR, + TMP, + IMM, + IMMS, + IMMF + } kind; + union { + CVar_t var; + CSList_t cstr; + char *str; + int imm; + } info; + + int sub; + int dep; + int reg; /* -1 for spilled, -2 for discarded */ + int begin, end; /* for reg allocation */ + int mod; + CType_t type; + CInst_t def; + CRange_t range; + COpr_t par; /* union-find */ + COpr_t cval; + COpr_t spill; /* check this reference if spilled */ + COpr_t same; /* for common exp elimination */ +}; + +typedef struct COList COList; +typedef COList *COList_t; +struct COList { + COpr_t opr; + COList_t next, prev; +}; + +void colist_remove(COList_t node); + +struct CInst { + enum OpCode { + BEQ = 0, /* conditional jump */ + BNE = 1, + GOTO, /* unconditional jump */ + ARR, /* displacement */ + PUSH, /* push to stack top */ + CALL, /* call function */ + RET, /* return */ + WARR, + MOVE, + LOAD, /* load from memory */ + ADDR, /* get address */ + MUL, DIV, MOD, ADD, SUB, + SHL, SHR, AND, XOR, OR, NOR, + EQ, NE, LT, GT, LE, GE, + NEG + } op; + COpr_t dest, src1, src2; + CInst_t next, prev; + CType_t wtype; /* for WARR */ + int id; + int is_def; + int sysp; + int bret; /* for CALL */ + int offset; /* for PUSH */ +}; + +typedef struct CPhi CPhi; +typedef CPhi *CPhi_t; +struct CPhi { + COpr_t dest; + COpr_t *oprs; + CPhi_t next, prev; +}; + +typedef struct CBlock CBlock; +typedef CBlock *CBlock_t; +struct CBlock { + CPhi_t phis; + CInst_t insts; /* instructions */ + CBlock_t next, prev; + int id; + int ref; /* if referenced by any gotos */ + int pred; /* the number of predecessors */ + int first, last; +}; + +typedef struct CBList CBList; +typedef CBList *CBList_t; +struct CBList { + CBlock_t cblk; + CBList_t next; +}; + +CBlock_t cblock_create(int inc); +void cblock_append(CBlock_t cblk, CInst_t inst); +void cblock_pappend(CBlock_t cblk, CPhi_t phi); +CInst_t cblock_getback(CBlock_t cblk); +int cblock_isempty(CBlock_t cblk); + +typedef struct CEdge CEdge; +typedef struct CGraph { + struct CEdge { + CBlock *to; + CEdge *next; + } *head[MAX_BLOCK], *rhead[MAX_BLOCK]; +} CGraph; + +typedef struct CInterv { + COpr_t opr; + CRange_t range; +} CInterv; + +typedef struct CENode CENode; +typedef struct CExpMap { + struct CENode { + CInst_t exp; + CENode *next; + } *head[MAX_TABLE_SIZE]; +} CExpMap; +typedef CExpMap *CExpMap_t; + +CExpMap_t cexpmap_create(void); +CExpMap_t cexpmap_dup(CExpMap_t cem); +unsigned int cexpmap_hash(CInst_t exp); +int cexpmap_comp(CInst_t exp1, CInst_t exp2); +void cexpmap_insert(CExpMap_t cem, CInst_t exp); +CInst_t cexpmap_lookup(CExpMap_t cem, CInst_t exp); +void cexpmap_clear(CExpMap_t cem); +void cexpmap_destroy(CExpMap_t cem); + +typedef struct CFuncIR CFuncIR; +typedef CFuncIR *CFuncIR_t; +struct CFuncIR { + int gbbase; + CBlock_t entry; + COList_t defs; + CType_t func; + CFuncIR_t next; +}; + +void ssa_generate(int quiet); +COpr_t cinterv_repr(COpr_t opr); +void cinst_print(FILE *stream, CInst_t inst); +int overlap_with_beg(COpr_t i, int beg); + +extern CFuncIR_t func_ir; +extern const int avail_regs[]; +extern const int MAX_AVAIL_REGS; + +#endif diff --git a/test_compile.sh b/test_compile.sh new file mode 100755 index 0000000..073231c --- /dev/null +++ b/test_compile.sh @@ -0,0 +1,18 @@ +#! /bin/bash +cp cibic compile_data/ +cp lib.s compile_data/ +cd compile_data/ +for f in *.c +do + echo $f + ./cibic $f > mips.s 2> /dev/null + gcc $f -m32 -std=c99 2> /dev/null + ./spim -stat -file mips.s > out + ./a.out > std + diff std out + if [[ "$?" != 0 ]]; then + echo "Wrong Answer!" + break; + fi +done +echo "OK." diff --git a/test_semantics.sh b/test_semantics.sh new file mode 100755 index 0000000..c84fc2a --- /dev/null +++ b/test_semantics.sh @@ -0,0 +1,20 @@ +#! /bin/bash +dir=semantics_data/*.c +if [ "$#" != 0 ]; then + dir=$1 +fi +res=0 +for file in $dir +do + gcc $file -o /dev/null &> /dev/null + gcc_ret="$?" + ./cibic $file &> /dev/null + ret=$? + if [ $ret -ne $gcc_ret ]; then + echo "Failed on $file" + res=1 + else + echo "ok $file: $ret" + fi +done +exit $res |