aboutsummaryrefslogtreecommitdiffstats
path: root/src/lex.cpp
diff options
context:
space:
mode:
authorDavid Robillard <d@drobilla.net>2010-12-07 23:21:08 +0000
committerDavid Robillard <d@drobilla.net>2010-12-07 23:21:08 +0000
commit7682c4ceab935d39aafac369c9b110b658b1e575 (patch)
tree96fcddeb789f72419a5416201ee8944a58baac4e /src/lex.cpp
parent5e8c55c15f42aff343d0b6189a6c8f8c50d12775 (diff)
downloadresp-7682c4ceab935d39aafac369c9b110b658b1e575.tar.gz
resp-7682c4ceab935d39aafac369c9b110b658b1e575.tar.bz2
resp-7682c4ceab935d39aafac369c9b110b658b1e575.zip
Saner recursive descent lexer/parser.
git-svn-id: http://svn.drobilla.net/resp/resp@306 ad02d1e2-f140-0410-9f75-f8b11f17cedd
Diffstat (limited to 'src/lex.cpp')
-rw-r--r--src/lex.cpp222
1 files changed, 156 insertions, 66 deletions
diff --git a/src/lex.cpp b/src/lex.cpp
index 5b6eb73..2ac838f 100644
--- a/src/lex.cpp
+++ b/src/lex.cpp
@@ -16,8 +16,7 @@
*/
/** @file
- * @brief Lexing (build a CST from a string).
- * A CST is a lexeme, or a tuple of CST's.
+ * @brief Parsing (build an AST from text)
*/
#include <stdio.h>
@@ -26,8 +25,8 @@
using namespace std;
-inline int
-readChar(Cursor& cur, istream& in)
+static inline int
+read_char(Cursor& cur, istream& in)
{
int ch = in.get();
switch (ch) {
@@ -37,81 +36,172 @@ readChar(Cursor& cur, istream& in)
return ch;
}
+static inline void
+skip_space(Cursor& cur, istream& in)
+{
+ while (isspace(in.peek()))
+ read_char(cur, in);
+}
+
+static inline void
+eat_char(Cursor& cur, istream& in, const char character)
+{
+ const char c = read_char(cur, in);
+ assert(c == character);
+ return;
+}
+
+static AST*
+read_string(Cursor& cur, istream& in)
+{
+ string str;
+ char c;
+ Cursor loc = cur;
+ eat_char(cur, in, '"');
+ while ((c = read_char(cur, in)) != '"') {
+ if (c == '\\') { // string escape
+ switch (c = read_char(cur, in)) {
+ case '"':
+ str.push_back('"');
+ break;
+ case '\\':
+ str.push_back('\\');
+ break;
+ default:
+ cin.putback(c);
+ throw Error(cur, string("unknown string escape `\\") + (char)c + "'");
+ }
+ } else { // any other character
+ str.push_back(c);
+ }
+ }
+ return new AString(loc, str);
+}
+
+static AST*
+read_line_comment(Cursor& cur, istream& in)
+{
+ char c;
+ while ((c = read_char(cur, in)) != '\n') {}
+ return NULL;
+}
+
+static AST*
+read_list(PEnv& penv, Cursor& cur, istream& in)
+{
+ List<ATuple, AST> list;
+
+ eat_char(cur, in, '(');
+ while (true) {
+ skip_space(cur, in);
+ if (in.peek() == ')') {
+ eat_char(cur, in, ')');
+ return list.head;
+ }
+
+ list.push_back(read_expression(penv, cur, in));
+ }
+ assert(false);
+}
+
+static AST*
+read_special(Cursor& cur, istream& in)
+{
+ eat_char(cur, in, '#');
+ switch (in.peek()) {
+ case '|':
+ while (!(read_char(cur, in) == '|' && read_char(cur, in) == '#')) {}
+ return NULL;
+ case 't':
+ eat_char(cur, in, 't');
+ return new ALiteral<bool>(T_BOOL, true, cur);
+ case 'f':
+ return new ALiteral<bool>(T_BOOL, false, cur);
+ default:
+ throw Error(cur, (format("unknown special lexeme `%1%'") % in.peek()).str());
+ }
+ assert(false);
+ return NULL;
+}
+
+static AST*
+read_number(Cursor& cur, istream& in)
+{
+ string str;
+ char c;
+ Cursor loc = cur;
+ while ((c = in.peek()) != EOF) {
+ if (isdigit(c) || c == '.')
+ str += read_char(cur, in);
+ else
+ break;
+ }
+
+ if (str.find('.') == string::npos)
+ return new ALiteral<int32_t>(T_INT32, strtol(str.c_str(), NULL, 10), loc);
+ else
+ return new ALiteral<float>(T_FLOAT, strtod(str.c_str(), NULL), loc);
+}
+
+static AST*
+read_symbol(PEnv& penv, Cursor& cur, istream& in)
+{
+ string str;
+ char c;
+ Cursor loc = cur;
+ while ((c = in.peek()) != EOF) {
+ if (!isspace(c) && c != ')' && c != '(' && c != EOF && c != -1) {
+ str += read_char(cur, in);
+ } else {
+ break;
+ }
+ }
+
+ return penv.sym(str);
+}
+
/// Read an expression from @a in
AST*
-readExpression(Cursor& cur, istream& in)
+read_expression(PEnv& penv, Cursor& cur, istream& in)
{
-#define PUSH(s, t) { if (t != "") { s.top().push_back(new ALexeme(loc, t)); t = ""; } }
-#define YIELD(s, t) { if (s.empty()) { return new ALexeme(loc, t); } else PUSH(s, t) }
- stack< List<ATuple, AST> > stk;
- string tok;
- Cursor loc; // start of tok
- while (int c = readChar(cur, in)) {
+ while (!cin.eof()) {
+ skip_space(cur, in);
+ const char c = in.peek();
switch (c) {
case EOF:
- THROW_IF(!stk.empty(), cur, "unexpected end of file");
- return new ATuple(cur);
+ return NULL;
case ';':
- while ((c = readChar(cur, in)) != '\n') {}
- case '\n': case ' ': case '\t': case '\r': case '\f':
- if (tok != "") YIELD(stk, tok);
+ read_line_comment(cur, in);
break;
case '"':
- loc = cur;
- tok.push_back(c); // leading quote
- while ((c = readChar(cur, in)) != '"') {
- if (c == '\\') { // string escape
- switch (c = readChar(cur, in)) {
- case '"':
- tok.push_back('"');
- break;
- case '\\':
- tok.push_back('\\');
- break;
- default:
- cin.putback(c);
- throw Error(cur, string("unknown string escape `\\") + (char)c + "'");
- }
- } else { // any other character
- tok.push_back(c);
- }
- }
- tok.push_back(c); // trailing quote
- YIELD(stk, tok);
- break;
+ return read_string(cur, in);
case '(':
- stk.push(List<ATuple, AST>());
- break;
+ return read_list(penv, cur, in);
case ')':
- switch (stk.size()) {
- case 0:
- cin.putback(c);
- throw Error(cur, "unexpected `)'");
- case 1:
- PUSH(stk, tok);
- return stk.top().head;
- default:
- PUSH(stk, tok);
- List<ATuple, AST> l = stk.top();
- stk.pop();
- stk.top().push_back(l.head);
- }
- break;
+ throw Error(cur, "unexpected `)'");
case '#':
- if (in.peek() == '|') {
- while (!(readChar(cur, in) == '|' && readChar(cur, in) == '#')) {}
- break;
+ {
+ AST* ret = read_special(cur, in);
+ if (ret)
+ return ret;
+ break;
+ }
+ case '-':
+ case '+':
+ read_char(cur, in);
+ if (isdigit(in.peek())) {
+ in.putback(c);
+ return read_number(cur, in);
+ } else {
+ in.putback(c);
+ return read_symbol(penv, cur, in);
}
default:
- if (tok == "") loc = cur;
- tok += c;
+ if (isdigit(c))
+ return read_number(cur, in);
+ else
+ return read_symbol(penv, cur, in);
}
}
- switch (stk.size()) {
- case 0: return new AString(loc, tok);
- case 1: return stk.top().head;
- default: throw Error(cur, "missing `)'");
- }
- assert(false);
- return new ATuple(cur); // never reached
+ return NULL;
}