blob: 8fbbc4e771a7ea63dbabaa1e1e67a44178a8787b (
plain) (
tree)
|
|
#include <cstdio>
#include <cctype>
#include <sstream>
#include "parser.h"
#include "exc.h"
#include "consts.h"
#include "builtin.h"
#include "gc.h"
using std::stringstream;
static char buff[TOKEN_BUFF_SIZE];
static FrameObj *parse_stack[PARSE_STACK_SIZE];
extern Pair *empty_list;
Tokenizor::Tokenizor() : stream(stdin), buff_ptr(buff), escaping(false) {}
void Tokenizor::set_stream(FILE *_stream) {
stream = _stream;
}
#define IS_NEWLINE(ch) \
((ch) == '\n')
#define IS_QUOTE(ch) \
((ch) == '\"')
#define IS_SLASH(ch) \
((ch) == '\\')
#define IS_BRACKET(ch) \
((ch) == '(' || (ch) == ')')
#define IS_SPACE(ch) \
((ch) == ' ' || (ch) == '\t' || IS_NEWLINE(ch))
#define IS_COMMENT(ch) \
((ch) == ';')
#define IS_LITERAL(ch) \
((ch) == '\'')
#define IS_DELIMITER(ch) \
(IS_BRACKET(ch) || IS_SPACE(ch) || \
IS_COMMENT(ch) || IS_QUOTE(ch))
#define POP \
do { \
*buff_ptr = 0; \
ret = string(buff); \
buff_ptr = buff; \
} while (0)
#define TOP (*(buff_ptr - 1))
string str_to_lower(string str) {
size_t len = str.length();
for (size_t i = 0; i < len; i++)
if ('A' <= str[i] && str[i] <= 'Z')
str[i] -= 'A' - 'a';
return str;
}
bool Tokenizor::get_token(string &ret) {
char ch;
bool flag = false;
while (fread(&ch, 1, 1, stream))
{
if (escaping)
{
escaping = false;
switch (ch)
{
case '\\': *buff_ptr++ = '\\'; break;
case '\"': *buff_ptr++ = '\"'; break;
case 'n': *buff_ptr++ = '\n'; break;
case 't': *buff_ptr++ = '\t'; break;
default: {
buff_ptr = buff;
throw TokenError(string("") + ch,
PAR_ERR_ILLEGAL_CHAR_IN_ESC);
}
}
}
else
{
bool in_quote = buff_ptr != buff && IS_QUOTE(*buff);
if (buff_ptr != buff)
{
if (buff_ptr - buff == 1 && IS_LITERAL(TOP))
{
POP;
flag = true;
}
else if ((IS_BRACKET(TOP) || IS_DELIMITER(ch)))
{
if (IS_COMMENT(*buff))
{
if (IS_NEWLINE(ch)) buff_ptr = buff;
else buff_ptr = buff + 1;
}
else if (!in_quote) // not in a double-quote
{
if (!(buff_ptr - buff == 1 && ch == '(' && TOP == '#'))
{
POP;
flag = true;
}
}
else if (IS_QUOTE(ch))
{
// in a double-quote which is being enclosed
*buff_ptr++ = '\"';
POP;
return true; // prevent duplicate quote sign
}
}
}
if (in_quote || !IS_SPACE(ch))
{
if (in_quote && IS_SLASH(ch))
escaping = true;
else
*buff_ptr++ = ch;
}
if (flag) return true;
}
}
if (buff_ptr != buff) POP;
return false; // can't read more
}
ASTGenerator::ASTGenerator() {}
EvalObj *ASTGenerator::to_obj(const string &str) {
EvalObj *res = NULL;
if ((res = BoolObj::from_string(str))) return res;
if ((res = CharObj::from_string(str))) return res;
if ((res = IntNumObj::from_string(str))) return res;
if ((res = RatNumObj::from_string(str))) return res;
if ((res = RealNumObj::from_string(str))) return res;
if ((res = CompNumObj::from_string(str))) return res;
if ((res = StrObj::from_string(str))) return res;
return new SymObj(str_to_lower(str)); // otherwise we assume it a symbol
}
#define TO_EVAL(ptr) \
(static_cast<EvalObj*>(ptr))
#define TO_BRACKET(ptr) \
(static_cast<ParseBracket*>(ptr))
#define IS_BRAKET(ptr) \
((ptr)->is_parse_bracket())
Pair *ASTGenerator::absorb(Tokenizor *tk) {
FrameObj **top_ptr = parse_stack;
for (;;)
{
if (top_ptr == parse_stack + PARSE_STACK_SIZE)
throw TokenError("Parser", RUN_ERR_STACK_OVERFLOW);
if (top_ptr - parse_stack > 1 &&
!IS_BRAKET(*(top_ptr - 1)) &&
IS_BRAKET(*(top_ptr - 2)))
{
ParseBracket *bptr = TO_BRACKET(*(top_ptr - 2));
if (bptr->btype == 2)
{
top_ptr -= 2;
Pair *lst_cdr = new Pair(TO_EVAL(*(top_ptr + 1)), empty_list);
Pair *lst = new Pair(new SymObj("quote"), lst_cdr);
*top_ptr++ = lst;
}
}
if (top_ptr > parse_stack && !IS_BRAKET(*parse_stack))
return new Pair(TO_EVAL(*(top_ptr - 1)), empty_list);
string token;
if (!tk->get_token(token)) return NULL;
if (token == "(") // a list
*top_ptr++ = new ParseBracket(0);
else if (token == "#(") // a vector
*top_ptr++ = new ParseBracket(1);
else if (token == "\'") // syntatic sugar for quote
*top_ptr++ = new ParseBracket(2);
else if (token == ")")
{
if (top_ptr == parse_stack)
throw NormalError(READ_ERR_UNEXPECTED_RIGHT_BRACKET);
EvalObj *lst = empty_list;
bool improper = false;
while (top_ptr >= parse_stack && !IS_BRAKET(*(--top_ptr)))
{
EvalObj *obj = TO_EVAL(*top_ptr);
if (obj->is_sym_obj() && static_cast<SymObj*>(obj)->val == ".")
{
if (improper ||
lst == empty_list ||
TO_PAIR(lst)->cdr != empty_list)
throw NormalError(PAR_ERR_IMPROPER_PAIR);
improper = true;
Pair *_lst = TO_PAIR(lst);
lst = _lst->car;
delete _lst;
delete obj;
}
else
{
Pair *_lst = new Pair(obj, lst); // Collect the list
lst = _lst;
}
}
ParseBracket *bptr = TO_BRACKET(*top_ptr);
if (bptr->btype == 0)
*top_ptr++ = lst;
else if (bptr->btype == 1)
{
if (improper) throw NormalError(PAR_ERR_IMPROPER_VECT);
VecObj *vec = new VecObj();
for (Pair *ptr = TO_PAIR(lst); ptr != empty_list; ptr = TO_PAIR(ptr->cdr))
vec->push_back(ptr->car);
*top_ptr++ = vec;
if (lst != empty_list) delete lst;
}
}
else
*top_ptr++ = ASTGenerator::to_obj(token);
}
}
|