From 97fb20538cdac668dda0a0aa82884505bc0df61a Mon Sep 17 00:00:00 2001 From: Teddy Date: Sun, 23 Mar 2014 16:54:19 +0800 Subject: lexical analysis almost done --- Makefile | 11 ++++++++ cibic.l | 95 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ cibic.y | 43 +++++++++++++++++++++++++++++ 3 files changed, 149 insertions(+) create mode 100644 Makefile create mode 100644 cibic.l create mode 100644 cibic.y diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..6deae78 --- /dev/null +++ b/Makefile @@ -0,0 +1,11 @@ +all: cibic + +run: + ./cibic + +cibic: lex.yy.c cibic.tab.c + gcc -o cibic lex.yy.c cibic.tab.c +lex.yy.c: cibic.l + flex cibic.l +cibic.tab.c: cibic.y + bison -d cibic.y diff --git a/cibic.l b/cibic.l new file mode 100644 index 0000000..37de015 --- /dev/null +++ b/cibic.l @@ -0,0 +1,95 @@ +%{ +#include "cibic.tab.h" +%} + +letter [a-zA-Z_$] +digit [0-9] +%s IN_BLOCK_COMMENT IN_INLINE_COMMENT +%% + +{ +"/*" BEGIN(IN_BLOCK_COMMENT); +} +{ +"*/" BEGIN(INITIAL); +[^*\n]+ // eat comment in chunks +"*" // eat the lone star +\n +} + +{ +"//" BEGIN(IN_INLINE_COMMENT); +} +{ +\n BEGIN(INITIAL); +[^\n]+ +} + +"void" { return KW_VOID; } +"char" { return KW_CHAR; } +"int" { return KW_INT; } +"struct" { return KW_STRUCT; } +"union" { return KW_UNION; } +"if" { return KW_IF; } +"else" { return KW_ELSE; } +"while" { return KW_WHILE; } +"for" { return KW_FOR; } +"continue" { return KW_CONT; } +"break" { return KW_BREAK; } +"return" { return KW_RET; } +"sizeof" { return KW_SIZEOF; } + +{letter}({letter}|{digit})* { + yylval.strval = strdup(yytext); + return IDENTIFIER; +} + +({digit}+)|(0[xX][0-9a-fA-F]+) { + if (*yytext == '0') + { + if (*(yytext + 1) == 'x' || *(yytext + 1) == 'X') + sscanf(yytext, "%x", &yylval.intval); + else // FIXME: error report if it is not a valid octal + sscanf(yytext, "%o", &yylval.intval); + } + else yylval.intval = atoi(yytext); + return INT_CONST; +} + +'{letter}' { + yylval.intval = *(yytext); + return CHAR_CONST; +} + +\"[^\n\"]*\" { + yylval.strval = strndup(yytext + 1, strlen(yytext) - 2); + return STR_CONST; +} + +"||" { return OPT_OR; } +"&&" { return OPT_AND; } +"==" { return OPT_EQ; } +"!=" { return OPT_NE; } +"<=" { return OPT_LE; } +"<<" { return OPT_SHL; } +">>" { return OPT_SHR; } +"++" { return OPT_INC; } +"--" { return OPT_DEC; } +"->" { return OPT_PTR; } + +"*=" { return ASS_MUL; } +"/=" { return ASS_DIV; } +"%=" { return ASS_MOD; } +"+=" { return ASS_ADD; } +"-=" { return ASS_SUB; } +"<<=" { return ASS_SHL; } +">>=" { return ASS_SHR; } +"&=" { return ASS_AND; } +"^=" { return ASS_XOR; } +"|=" { return ASS_OR; } + +[();,={}\[\]*|\^&<>+\-*//%~!.] { return *yytext; } + +[ \t\n\r] /* skip whitespaces */ +. { return UNKNOWN; } +%% diff --git a/cibic.y b/cibic.y new file mode 100644 index 0000000..92f0cfe --- /dev/null +++ b/cibic.y @@ -0,0 +1,43 @@ +%{ +#include +%} +%token IDENTIFIER INT_CONST CHAR_CONST STR_CONST +%token KW_VOID KW_CHAR KW_INT KW_STRUCT KW_UNION KW_IF KW_ELSE KW_WHILE +%token KW_FOR KW_CONT KW_BREAK KW_RET KW_SIZEOF +%token OPT_OR OPT_AND OPT_EQ OPT_NE OPT_LE OPT_GE OPT_SHL OPT_SHR OPT_INC OPT_DEC OPT_PTR +%token ASS_MUL ASS_DIV ASS_MOD ASS_ADD ASS_SUB ASS_SHL ASS_SHR ASS_AND ASS_XOR ASS_OR +%token UNKNOWN +%union { + int intval; + char *strval; +} +%% +program + : body { + printf("\n")} +body + : IDENTIFIER +%% +int yywrap() { + return 1; +} + +int yyerror(char *s) { +} + +extern FILE *yyin; +int main() { + int ret; + //yyin = fopen("in", "r"); + while (ret = yylex()) + { + printf("%d\n", ret); + if (ret == IDENTIFIER) + printf("id: %s\n", yylval.strval); + else if (ret == INT_CONST) + printf("int: %d\n", yylval.intval); + else if (ret == STR_CONST) + printf("str: %s\n", yylval.strval); + } + return 0; +} -- cgit v1.2.3