diff options
-rw-r--r-- | consts.cpp | 5 | ||||
-rw-r--r-- | consts.h | 3 | ||||
-rw-r--r-- | parser.cpp | 73 | ||||
-rw-r--r-- | parser.h | 1 |
4 files changed, 61 insertions, 21 deletions
@@ -3,12 +3,13 @@ const char *ERR_MSG[] = { "\"%s\" is not an valid identifier", "Cannot apply the operation \"%s\"", - "Unbound variable: \"%s\"", + "Unbound variable: %s", "Missing or extra expression in (%s)", "Empty parameter list in (%s)", "Wrong number of arguments to procedure (%s)", "Illegal empty combination ()", "Unexpected \")\"", "Wrong type (expecting %s)", - "Internal Error !!! File a bug please!" + "Internal Error !!! File a bug please!", + "Illegal character in escape sequence: #\\%s" }; @@ -11,7 +11,8 @@ enum ErrCode { SYN_ERR_EMPTY_COMB, READ_ERR_UNEXPECTED_RIGHT_BRACKET, RUN_ERR_WRONG_TYPE, - INT_ERR + INT_ERR, + PAR_ERR_ILLEGAL_CHAR_IN_ESC }; extern const char *ERR_MSG[]; @@ -12,7 +12,7 @@ static char buff[TOKEN_BUFF_SIZE]; static EvalObj *parse_stack[PARSE_STACK_SIZE]; extern Cons *empty_list; -Tokenizor::Tokenizor() : stream(stdin), buff_ptr(buff) {} +Tokenizor::Tokenizor() : stream(stdin), buff_ptr(buff), escaping(false) {} void Tokenizor::set_stream(FILE *_stream) { stream = _stream; } @@ -27,37 +27,74 @@ void Tokenizor::set_stream(FILE *_stream) { (IS_BRACKET(ch) || IS_SPACE(ch)) #define IS_COMMENT(ch) \ ((ch) == ';') +#define IS_QUOTE(ch) \ + ((ch) == '\"') +#define IS_SLASH(ch) \ + ((ch) == '\\') #define POP \ -do { \ - *buff_ptr = 0; \ - ret = string(buff); \ - buff_ptr = buff; \ -} while (0) + do { \ + *buff_ptr = 0; \ + ret = string(buff); \ + buff_ptr = buff; \ + } while (0) bool Tokenizor::get_token(string &ret) { char ch; bool flag = false; while (fread(&ch, 1, 1, stream)) { - if (buff_ptr != buff && - (IS_BRACKET(*buff) || - IS_DELIMITER(ch) || - IS_COMMENT(ch))) + if (escaping) { - if (IS_COMMENT(*buff)) + escaping = false; + switch (ch) { - if (IS_NEWLINE(ch)) buff_ptr = buff; - else buff_ptr = buff + 1; + case '\\': *buff_ptr++ = '\\'; break; + case '\"': *buff_ptr++ = '\"'; break; + case 'n': *buff_ptr++ = '\n'; break; + case 't': *buff_ptr++ = '\t'; break; + default: { + buff_ptr = buff; + throw TokenError(string("") + ch, + PAR_ERR_ILLEGAL_CHAR_IN_ESC); + } } - else + } + else + { + bool in_quote = buff_ptr != buff && IS_QUOTE(*buff); + if (buff_ptr != buff && + (IS_BRACKET(*buff) || + IS_DELIMITER(ch) || + IS_COMMENT(ch) || + IS_QUOTE(ch))) + { + if (IS_COMMENT(*buff)) + { + if (IS_NEWLINE(ch)) buff_ptr = buff; + else buff_ptr = buff + 1; + } + else if (!in_quote) + { + POP; + flag = true; + } + else if (IS_QUOTE(ch)) + { + *buff_ptr++ = '\"'; + POP; + return true; // discard current slash + } + } + if (in_quote || !IS_SPACE(ch)) { - POP; - flag = true; + if (in_quote && IS_SLASH(ch)) + escaping = true; + else + *buff_ptr++ = ch; } + if (flag) return true; } - if (!IS_SPACE(ch)) *buff_ptr++ = ch; - if (flag) return true; } if (buff_ptr != buff) POP; return false; // can't read more @@ -16,6 +16,7 @@ class Tokenizor { private: FILE *stream; char *buff_ptr; + bool escaping; public: Tokenizor(); /** Set the stream to be read from (without setting this, the default |