diff options
Diffstat (limited to 'src/lex.cpp')
-rw-r--r-- | src/lex.cpp | 222 |
1 files changed, 156 insertions, 66 deletions
diff --git a/src/lex.cpp b/src/lex.cpp index 5b6eb73..2ac838f 100644 --- a/src/lex.cpp +++ b/src/lex.cpp @@ -16,8 +16,7 @@ */ /** @file - * @brief Lexing (build a CST from a string). - * A CST is a lexeme, or a tuple of CST's. + * @brief Parsing (build an AST from text) */ #include <stdio.h> @@ -26,8 +25,8 @@ using namespace std; -inline int -readChar(Cursor& cur, istream& in) +static inline int +read_char(Cursor& cur, istream& in) { int ch = in.get(); switch (ch) { @@ -37,81 +36,172 @@ readChar(Cursor& cur, istream& in) return ch; } +static inline void +skip_space(Cursor& cur, istream& in) +{ + while (isspace(in.peek())) + read_char(cur, in); +} + +static inline void +eat_char(Cursor& cur, istream& in, const char character) +{ + const char c = read_char(cur, in); + assert(c == character); + return; +} + +static AST* +read_string(Cursor& cur, istream& in) +{ + string str; + char c; + Cursor loc = cur; + eat_char(cur, in, '"'); + while ((c = read_char(cur, in)) != '"') { + if (c == '\\') { // string escape + switch (c = read_char(cur, in)) { + case '"': + str.push_back('"'); + break; + case '\\': + str.push_back('\\'); + break; + default: + cin.putback(c); + throw Error(cur, string("unknown string escape `\\") + (char)c + "'"); + } + } else { // any other character + str.push_back(c); + } + } + return new AString(loc, str); +} + +static AST* +read_line_comment(Cursor& cur, istream& in) +{ + char c; + while ((c = read_char(cur, in)) != '\n') {} + return NULL; +} + +static AST* +read_list(PEnv& penv, Cursor& cur, istream& in) +{ + List<ATuple, AST> list; + + eat_char(cur, in, '('); + while (true) { + skip_space(cur, in); + if (in.peek() == ')') { + eat_char(cur, in, ')'); + return list.head; + } + + list.push_back(read_expression(penv, cur, in)); + } + assert(false); +} + +static AST* +read_special(Cursor& cur, istream& in) +{ + eat_char(cur, in, '#'); + switch (in.peek()) { + case '|': + while (!(read_char(cur, in) == '|' && read_char(cur, in) == '#')) {} + return NULL; + case 't': + eat_char(cur, in, 't'); + return new ALiteral<bool>(T_BOOL, true, cur); + case 'f': + return new ALiteral<bool>(T_BOOL, false, cur); + default: + throw Error(cur, (format("unknown special lexeme `%1%'") % in.peek()).str()); + } + assert(false); + return NULL; +} + +static AST* +read_number(Cursor& cur, istream& in) +{ + string str; + char c; + Cursor loc = cur; + while ((c = in.peek()) != EOF) { + if (isdigit(c) || c == '.') + str += read_char(cur, in); + else + break; + } + + if (str.find('.') == string::npos) + return new ALiteral<int32_t>(T_INT32, strtol(str.c_str(), NULL, 10), loc); + else + return new ALiteral<float>(T_FLOAT, strtod(str.c_str(), NULL), loc); +} + +static AST* +read_symbol(PEnv& penv, Cursor& cur, istream& in) +{ + string str; + char c; + Cursor loc = cur; + while ((c = in.peek()) != EOF) { + if (!isspace(c) && c != ')' && c != '(' && c != EOF && c != -1) { + str += read_char(cur, in); + } else { + break; + } + } + + return penv.sym(str); +} + /// Read an expression from @a in AST* -readExpression(Cursor& cur, istream& in) +read_expression(PEnv& penv, Cursor& cur, istream& in) { -#define PUSH(s, t) { if (t != "") { s.top().push_back(new ALexeme(loc, t)); t = ""; } } -#define YIELD(s, t) { if (s.empty()) { return new ALexeme(loc, t); } else PUSH(s, t) } - stack< List<ATuple, AST> > stk; - string tok; - Cursor loc; // start of tok - while (int c = readChar(cur, in)) { + while (!cin.eof()) { + skip_space(cur, in); + const char c = in.peek(); switch (c) { case EOF: - THROW_IF(!stk.empty(), cur, "unexpected end of file"); - return new ATuple(cur); + return NULL; case ';': - while ((c = readChar(cur, in)) != '\n') {} - case '\n': case ' ': case '\t': case '\r': case '\f': - if (tok != "") YIELD(stk, tok); + read_line_comment(cur, in); break; case '"': - loc = cur; - tok.push_back(c); // leading quote - while ((c = readChar(cur, in)) != '"') { - if (c == '\\') { // string escape - switch (c = readChar(cur, in)) { - case '"': - tok.push_back('"'); - break; - case '\\': - tok.push_back('\\'); - break; - default: - cin.putback(c); - throw Error(cur, string("unknown string escape `\\") + (char)c + "'"); - } - } else { // any other character - tok.push_back(c); - } - } - tok.push_back(c); // trailing quote - YIELD(stk, tok); - break; + return read_string(cur, in); case '(': - stk.push(List<ATuple, AST>()); - break; + return read_list(penv, cur, in); case ')': - switch (stk.size()) { - case 0: - cin.putback(c); - throw Error(cur, "unexpected `)'"); - case 1: - PUSH(stk, tok); - return stk.top().head; - default: - PUSH(stk, tok); - List<ATuple, AST> l = stk.top(); - stk.pop(); - stk.top().push_back(l.head); - } - break; + throw Error(cur, "unexpected `)'"); case '#': - if (in.peek() == '|') { - while (!(readChar(cur, in) == '|' && readChar(cur, in) == '#')) {} - break; + { + AST* ret = read_special(cur, in); + if (ret) + return ret; + break; + } + case '-': + case '+': + read_char(cur, in); + if (isdigit(in.peek())) { + in.putback(c); + return read_number(cur, in); + } else { + in.putback(c); + return read_symbol(penv, cur, in); } default: - if (tok == "") loc = cur; - tok += c; + if (isdigit(c)) + return read_number(cur, in); + else + return read_symbol(penv, cur, in); } } - switch (stk.size()) { - case 0: return new AString(loc, tok); - case 1: return stk.top().head; - default: throw Error(cur, "missing `)'"); - } - assert(false); - return new ATuple(cur); // never reached + return NULL; } |