aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTeddy <ted.sybil@gmail.com>2014-03-23 16:54:19 +0800
committerTeddy <ted.sybil@gmail.com>2014-03-23 16:54:19 +0800
commit97fb20538cdac668dda0a0aa82884505bc0df61a (patch)
tree755ef3900575f19db533963610db72b79680fc03
parent7670b6968af891d164d6cae0e42fc35415276b65 (diff)
lexical analysis almost done
-rw-r--r--Makefile11
-rw-r--r--cibic.l95
-rw-r--r--cibic.y43
3 files changed, 149 insertions, 0 deletions
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..6deae78
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,11 @@
+all: cibic
+
+run:
+ ./cibic
+
+cibic: lex.yy.c cibic.tab.c
+ gcc -o cibic lex.yy.c cibic.tab.c
+lex.yy.c: cibic.l
+ flex cibic.l
+cibic.tab.c: cibic.y
+ bison -d cibic.y
diff --git a/cibic.l b/cibic.l
new file mode 100644
index 0000000..37de015
--- /dev/null
+++ b/cibic.l
@@ -0,0 +1,95 @@
+%{
+#include "cibic.tab.h"
+%}
+
+letter [a-zA-Z_$]
+digit [0-9]
+%s IN_BLOCK_COMMENT IN_INLINE_COMMENT
+%%
+
+<INITIAL>{
+"/*" BEGIN(IN_BLOCK_COMMENT);
+}
+<IN_BLOCK_COMMENT>{
+"*/" BEGIN(INITIAL);
+[^*\n]+ // eat comment in chunks
+"*" // eat the lone star
+\n
+}
+
+<INITIAL>{
+"//" BEGIN(IN_INLINE_COMMENT);
+}
+<IN_INLINE_COMMENT>{
+\n BEGIN(INITIAL);
+[^\n]+
+}
+
+"void" { return KW_VOID; }
+"char" { return KW_CHAR; }
+"int" { return KW_INT; }
+"struct" { return KW_STRUCT; }
+"union" { return KW_UNION; }
+"if" { return KW_IF; }
+"else" { return KW_ELSE; }
+"while" { return KW_WHILE; }
+"for" { return KW_FOR; }
+"continue" { return KW_CONT; }
+"break" { return KW_BREAK; }
+"return" { return KW_RET; }
+"sizeof" { return KW_SIZEOF; }
+
+{letter}({letter}|{digit})* {
+ yylval.strval = strdup(yytext);
+ return IDENTIFIER;
+}
+
+({digit}+)|(0[xX][0-9a-fA-F]+) {
+ if (*yytext == '0')
+ {
+ if (*(yytext + 1) == 'x' || *(yytext + 1) == 'X')
+ sscanf(yytext, "%x", &yylval.intval);
+ else // FIXME: error report if it is not a valid octal
+ sscanf(yytext, "%o", &yylval.intval);
+ }
+ else yylval.intval = atoi(yytext);
+ return INT_CONST;
+}
+
+'{letter}' {
+ yylval.intval = *(yytext);
+ return CHAR_CONST;
+}
+
+\"[^\n\"]*\" {
+ yylval.strval = strndup(yytext + 1, strlen(yytext) - 2);
+ return STR_CONST;
+}
+
+"||" { return OPT_OR; }
+"&&" { return OPT_AND; }
+"==" { return OPT_EQ; }
+"!=" { return OPT_NE; }
+"<=" { return OPT_LE; }
+"<<" { return OPT_SHL; }
+">>" { return OPT_SHR; }
+"++" { return OPT_INC; }
+"--" { return OPT_DEC; }
+"->" { return OPT_PTR; }
+
+"*=" { return ASS_MUL; }
+"/=" { return ASS_DIV; }
+"%=" { return ASS_MOD; }
+"+=" { return ASS_ADD; }
+"-=" { return ASS_SUB; }
+"<<=" { return ASS_SHL; }
+">>=" { return ASS_SHR; }
+"&=" { return ASS_AND; }
+"^=" { return ASS_XOR; }
+"|=" { return ASS_OR; }
+
+[();,={}\[\]*|\^&<>+\-*//%~!.] { return *yytext; }
+
+[ \t\n\r] /* skip whitespaces */
+. { return UNKNOWN; }
+%%
diff --git a/cibic.y b/cibic.y
new file mode 100644
index 0000000..92f0cfe
--- /dev/null
+++ b/cibic.y
@@ -0,0 +1,43 @@
+%{
+#include <stdio.h>
+%}
+%token IDENTIFIER INT_CONST CHAR_CONST STR_CONST
+%token KW_VOID KW_CHAR KW_INT KW_STRUCT KW_UNION KW_IF KW_ELSE KW_WHILE
+%token KW_FOR KW_CONT KW_BREAK KW_RET KW_SIZEOF
+%token OPT_OR OPT_AND OPT_EQ OPT_NE OPT_LE OPT_GE OPT_SHL OPT_SHR OPT_INC OPT_DEC OPT_PTR
+%token ASS_MUL ASS_DIV ASS_MOD ASS_ADD ASS_SUB ASS_SHL ASS_SHR ASS_AND ASS_XOR ASS_OR
+%token UNKNOWN
+%union {
+ int intval;
+ char *strval;
+}
+%%
+program
+ : body {
+ printf("\n")}
+body
+ : IDENTIFIER
+%%
+int yywrap() {
+ return 1;
+}
+
+int yyerror(char *s) {
+}
+
+extern FILE *yyin;
+int main() {
+ int ret;
+ //yyin = fopen("in", "r");
+ while (ret = yylex())
+ {
+ printf("%d\n", ret);
+ if (ret == IDENTIFIER)
+ printf("id: %s\n", yylval.strval);
+ else if (ret == INT_CONST)
+ printf("int: %d\n", yylval.intval);
+ else if (ret == STR_CONST)
+ printf("str: %s\n", yylval.strval);
+ }
+ return 0;
+}