From 5dab7df6830018c5c28ebcc7cc3b242ccad2736b Mon Sep 17 00:00:00 2001 From: Teddy Date: Mon, 5 Aug 2013 23:11:39 +0800 Subject: added string quoting support in the parser --- parser.cpp | 73 ++++++++++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 55 insertions(+), 18 deletions(-) (limited to 'parser.cpp') diff --git a/parser.cpp b/parser.cpp index 499b832..cb8695f 100644 --- a/parser.cpp +++ b/parser.cpp @@ -12,7 +12,7 @@ static char buff[TOKEN_BUFF_SIZE]; static EvalObj *parse_stack[PARSE_STACK_SIZE]; extern Cons *empty_list; -Tokenizor::Tokenizor() : stream(stdin), buff_ptr(buff) {} +Tokenizor::Tokenizor() : stream(stdin), buff_ptr(buff), escaping(false) {} void Tokenizor::set_stream(FILE *_stream) { stream = _stream; } @@ -27,37 +27,74 @@ void Tokenizor::set_stream(FILE *_stream) { (IS_BRACKET(ch) || IS_SPACE(ch)) #define IS_COMMENT(ch) \ ((ch) == ';') +#define IS_QUOTE(ch) \ + ((ch) == '\"') +#define IS_SLASH(ch) \ + ((ch) == '\\') #define POP \ -do { \ - *buff_ptr = 0; \ - ret = string(buff); \ - buff_ptr = buff; \ -} while (0) + do { \ + *buff_ptr = 0; \ + ret = string(buff); \ + buff_ptr = buff; \ + } while (0) bool Tokenizor::get_token(string &ret) { char ch; bool flag = false; while (fread(&ch, 1, 1, stream)) { - if (buff_ptr != buff && - (IS_BRACKET(*buff) || - IS_DELIMITER(ch) || - IS_COMMENT(ch))) + if (escaping) { - if (IS_COMMENT(*buff)) + escaping = false; + switch (ch) { - if (IS_NEWLINE(ch)) buff_ptr = buff; - else buff_ptr = buff + 1; + case '\\': *buff_ptr++ = '\\'; break; + case '\"': *buff_ptr++ = '\"'; break; + case 'n': *buff_ptr++ = '\n'; break; + case 't': *buff_ptr++ = '\t'; break; + default: { + buff_ptr = buff; + throw TokenError(string("") + ch, + PAR_ERR_ILLEGAL_CHAR_IN_ESC); + } } - else + } + else + { + bool in_quote = buff_ptr != buff && IS_QUOTE(*buff); + if (buff_ptr != buff && + (IS_BRACKET(*buff) || + IS_DELIMITER(ch) || + IS_COMMENT(ch) || + IS_QUOTE(ch))) + { + if (IS_COMMENT(*buff)) + { + if (IS_NEWLINE(ch)) buff_ptr = buff; + else buff_ptr = buff + 1; + } + else if (!in_quote) + { + POP; + flag = true; + } + else if (IS_QUOTE(ch)) + { + *buff_ptr++ = '\"'; + POP; + return true; // discard current slash + } + } + if (in_quote || !IS_SPACE(ch)) { - POP; - flag = true; + if (in_quote && IS_SLASH(ch)) + escaping = true; + else + *buff_ptr++ = ch; } + if (flag) return true; } - if (!IS_SPACE(ch)) *buff_ptr++ = ch; - if (flag) return true; } if (buff_ptr != buff) POP; return false; // can't read more -- cgit v1.2.3