aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTeddy <[email protected]>2013-08-05 23:11:39 +0800
committerTeddy <[email protected]>2013-08-05 23:11:39 +0800
commit5dab7df6830018c5c28ebcc7cc3b242ccad2736b (patch)
treeae6a0cbc8c63c10185047abdd218f9f3b1d30bb6
parent45dec735ec131c18d70ad202ed1446982b99ed9f (diff)
added string quoting support in the parsernumeric_types
-rw-r--r--consts.cpp5
-rw-r--r--consts.h3
-rw-r--r--parser.cpp73
-rw-r--r--parser.h1
4 files changed, 61 insertions, 21 deletions
diff --git a/consts.cpp b/consts.cpp
index e41c981..51267a1 100644
--- a/consts.cpp
+++ b/consts.cpp
@@ -3,12 +3,13 @@
const char *ERR_MSG[] = {
"\"%s\" is not an valid identifier",
"Cannot apply the operation \"%s\"",
- "Unbound variable: \"%s\"",
+ "Unbound variable: %s",
"Missing or extra expression in (%s)",
"Empty parameter list in (%s)",
"Wrong number of arguments to procedure (%s)",
"Illegal empty combination ()",
"Unexpected \")\"",
"Wrong type (expecting %s)",
- "Internal Error !!! File a bug please!"
+ "Internal Error !!! File a bug please!",
+ "Illegal character in escape sequence: #\\%s"
};
diff --git a/consts.h b/consts.h
index e88cd31..cf07edb 100644
--- a/consts.h
+++ b/consts.h
@@ -11,7 +11,8 @@ enum ErrCode {
SYN_ERR_EMPTY_COMB,
READ_ERR_UNEXPECTED_RIGHT_BRACKET,
RUN_ERR_WRONG_TYPE,
- INT_ERR
+ INT_ERR,
+ PAR_ERR_ILLEGAL_CHAR_IN_ESC
};
extern const char *ERR_MSG[];
diff --git a/parser.cpp b/parser.cpp
index 499b832..cb8695f 100644
--- a/parser.cpp
+++ b/parser.cpp
@@ -12,7 +12,7 @@ static char buff[TOKEN_BUFF_SIZE];
static EvalObj *parse_stack[PARSE_STACK_SIZE];
extern Cons *empty_list;
-Tokenizor::Tokenizor() : stream(stdin), buff_ptr(buff) {}
+Tokenizor::Tokenizor() : stream(stdin), buff_ptr(buff), escaping(false) {}
void Tokenizor::set_stream(FILE *_stream) {
stream = _stream;
}
@@ -27,37 +27,74 @@ void Tokenizor::set_stream(FILE *_stream) {
(IS_BRACKET(ch) || IS_SPACE(ch))
#define IS_COMMENT(ch) \
((ch) == ';')
+#define IS_QUOTE(ch) \
+ ((ch) == '\"')
+#define IS_SLASH(ch) \
+ ((ch) == '\\')
#define POP \
-do { \
- *buff_ptr = 0; \
- ret = string(buff); \
- buff_ptr = buff; \
-} while (0)
+ do { \
+ *buff_ptr = 0; \
+ ret = string(buff); \
+ buff_ptr = buff; \
+ } while (0)
bool Tokenizor::get_token(string &ret) {
char ch;
bool flag = false;
while (fread(&ch, 1, 1, stream))
{
- if (buff_ptr != buff &&
- (IS_BRACKET(*buff) ||
- IS_DELIMITER(ch) ||
- IS_COMMENT(ch)))
+ if (escaping)
{
- if (IS_COMMENT(*buff))
+ escaping = false;
+ switch (ch)
{
- if (IS_NEWLINE(ch)) buff_ptr = buff;
- else buff_ptr = buff + 1;
+ case '\\': *buff_ptr++ = '\\'; break;
+ case '\"': *buff_ptr++ = '\"'; break;
+ case 'n': *buff_ptr++ = '\n'; break;
+ case 't': *buff_ptr++ = '\t'; break;
+ default: {
+ buff_ptr = buff;
+ throw TokenError(string("") + ch,
+ PAR_ERR_ILLEGAL_CHAR_IN_ESC);
+ }
}
- else
+ }
+ else
+ {
+ bool in_quote = buff_ptr != buff && IS_QUOTE(*buff);
+ if (buff_ptr != buff &&
+ (IS_BRACKET(*buff) ||
+ IS_DELIMITER(ch) ||
+ IS_COMMENT(ch) ||
+ IS_QUOTE(ch)))
+ {
+ if (IS_COMMENT(*buff))
+ {
+ if (IS_NEWLINE(ch)) buff_ptr = buff;
+ else buff_ptr = buff + 1;
+ }
+ else if (!in_quote)
+ {
+ POP;
+ flag = true;
+ }
+ else if (IS_QUOTE(ch))
+ {
+ *buff_ptr++ = '\"';
+ POP;
+ return true; // discard current slash
+ }
+ }
+ if (in_quote || !IS_SPACE(ch))
{
- POP;
- flag = true;
+ if (in_quote && IS_SLASH(ch))
+ escaping = true;
+ else
+ *buff_ptr++ = ch;
}
+ if (flag) return true;
}
- if (!IS_SPACE(ch)) *buff_ptr++ = ch;
- if (flag) return true;
}
if (buff_ptr != buff) POP;
return false; // can't read more
diff --git a/parser.h b/parser.h
index fa00ccc..11a03a5 100644
--- a/parser.h
+++ b/parser.h
@@ -16,6 +16,7 @@ class Tokenizor {
private:
FILE *stream;
char *buff_ptr;
+ bool escaping;
public:
Tokenizor();
/** Set the stream to be read from (without setting this, the default