From 5dab7df6830018c5c28ebcc7cc3b242ccad2736b Mon Sep 17 00:00:00 2001 From: Teddy Date: Mon, 5 Aug 2013 23:11:39 +0800 Subject: added string quoting support in the parser --- consts.cpp | 5 +++-- consts.h | 3 ++- parser.cpp | 73 ++++++++++++++++++++++++++++++++++++++++++++++---------------- parser.h | 1 + 4 files changed, 61 insertions(+), 21 deletions(-) diff --git a/consts.cpp b/consts.cpp index e41c981..51267a1 100644 --- a/consts.cpp +++ b/consts.cpp @@ -3,12 +3,13 @@ const char *ERR_MSG[] = { "\"%s\" is not an valid identifier", "Cannot apply the operation \"%s\"", - "Unbound variable: \"%s\"", + "Unbound variable: %s", "Missing or extra expression in (%s)", "Empty parameter list in (%s)", "Wrong number of arguments to procedure (%s)", "Illegal empty combination ()", "Unexpected \")\"", "Wrong type (expecting %s)", - "Internal Error !!! File a bug please!" + "Internal Error !!! File a bug please!", + "Illegal character in escape sequence: #\\%s" }; diff --git a/consts.h b/consts.h index e88cd31..cf07edb 100644 --- a/consts.h +++ b/consts.h @@ -11,7 +11,8 @@ enum ErrCode { SYN_ERR_EMPTY_COMB, READ_ERR_UNEXPECTED_RIGHT_BRACKET, RUN_ERR_WRONG_TYPE, - INT_ERR + INT_ERR, + PAR_ERR_ILLEGAL_CHAR_IN_ESC }; extern const char *ERR_MSG[]; diff --git a/parser.cpp b/parser.cpp index 499b832..cb8695f 100644 --- a/parser.cpp +++ b/parser.cpp @@ -12,7 +12,7 @@ static char buff[TOKEN_BUFF_SIZE]; static EvalObj *parse_stack[PARSE_STACK_SIZE]; extern Cons *empty_list; -Tokenizor::Tokenizor() : stream(stdin), buff_ptr(buff) {} +Tokenizor::Tokenizor() : stream(stdin), buff_ptr(buff), escaping(false) {} void Tokenizor::set_stream(FILE *_stream) { stream = _stream; } @@ -27,37 +27,74 @@ void Tokenizor::set_stream(FILE *_stream) { (IS_BRACKET(ch) || IS_SPACE(ch)) #define IS_COMMENT(ch) \ ((ch) == ';') +#define IS_QUOTE(ch) \ + ((ch) == '\"') +#define IS_SLASH(ch) \ + ((ch) == '\\') #define POP \ -do { \ - *buff_ptr = 0; \ - ret = string(buff); \ - buff_ptr = buff; \ -} while (0) + do { \ + *buff_ptr = 0; \ + ret = string(buff); \ + buff_ptr = buff; \ + } while (0) bool Tokenizor::get_token(string &ret) { char ch; bool flag = false; while (fread(&ch, 1, 1, stream)) { - if (buff_ptr != buff && - (IS_BRACKET(*buff) || - IS_DELIMITER(ch) || - IS_COMMENT(ch))) + if (escaping) { - if (IS_COMMENT(*buff)) + escaping = false; + switch (ch) { - if (IS_NEWLINE(ch)) buff_ptr = buff; - else buff_ptr = buff + 1; + case '\\': *buff_ptr++ = '\\'; break; + case '\"': *buff_ptr++ = '\"'; break; + case 'n': *buff_ptr++ = '\n'; break; + case 't': *buff_ptr++ = '\t'; break; + default: { + buff_ptr = buff; + throw TokenError(string("") + ch, + PAR_ERR_ILLEGAL_CHAR_IN_ESC); + } } - else + } + else + { + bool in_quote = buff_ptr != buff && IS_QUOTE(*buff); + if (buff_ptr != buff && + (IS_BRACKET(*buff) || + IS_DELIMITER(ch) || + IS_COMMENT(ch) || + IS_QUOTE(ch))) + { + if (IS_COMMENT(*buff)) + { + if (IS_NEWLINE(ch)) buff_ptr = buff; + else buff_ptr = buff + 1; + } + else if (!in_quote) + { + POP; + flag = true; + } + else if (IS_QUOTE(ch)) + { + *buff_ptr++ = '\"'; + POP; + return true; // discard current slash + } + } + if (in_quote || !IS_SPACE(ch)) { - POP; - flag = true; + if (in_quote && IS_SLASH(ch)) + escaping = true; + else + *buff_ptr++ = ch; } + if (flag) return true; } - if (!IS_SPACE(ch)) *buff_ptr++ = ch; - if (flag) return true; } if (buff_ptr != buff) POP; return false; // can't read more diff --git a/parser.h b/parser.h index fa00ccc..11a03a5 100644 --- a/parser.h +++ b/parser.h @@ -16,6 +16,7 @@ class Tokenizor { private: FILE *stream; char *buff_ptr; + bool escaping; public: Tokenizor(); /** Set the stream to be read from (without setting this, the default -- cgit v1.2.3