diff options
author | David Robillard <d@drobilla.net> | 2010-08-19 03:25:43 +0000 |
---|---|---|
committer | David Robillard <d@drobilla.net> | 2010-08-19 03:25:43 +0000 |
commit | 9a1c3c7cfc96cb6ee1c9f7bc103b99b89da43d6e (patch) | |
tree | 3c0f7348198b9c722c67ed6f2e819fa8e8150767 /src/lex.cpp | |
parent | 1bc26254cf83449017b24afd90420916d8f512aa (diff) | |
download | resp-9a1c3c7cfc96cb6ee1c9f7bc103b99b89da43d6e.tar.gz resp-9a1c3c7cfc96cb6ee1c9f7bc103b99b89da43d6e.tar.bz2 resp-9a1c3c7cfc96cb6ee1c9f7bc103b99b89da43d6e.zip |
Coherent AString and Lexeme implementation.
A Lexeme is any "token" read from input, a lexeme has not yet beeen parsed
and could parse to anything, e.g. a string, an expression, a number, etc.
Lexemes are not (yet?) exposed to the language or ever compiled.
A String is a string literal, which can contain any character directly
except " and \. There are two special escapes: \" and \\, any other
character following a \ is a syntax error.
Fix garbage collection of REPL objects, leading to type errors from
type variable re-use because a type variable for a given AST's /address/
exists, but that address has actually been deleted and reused by new
(i.e. make top level REPL expressions and types be GC roots).
git-svn-id: http://svn.drobilla.net/resp/resp@261 ad02d1e2-f140-0410-9f75-f8b11f17cedd
Diffstat (limited to 'src/lex.cpp')
-rw-r--r-- | src/lex.cpp | 32 |
1 files changed, 26 insertions, 6 deletions
diff --git a/src/lex.cpp b/src/lex.cpp index 0097346..f633b00 100644 --- a/src/lex.cpp +++ b/src/lex.cpp @@ -16,7 +16,8 @@ */ /** @file - * @brief Lexing (build an unparsed textual AST from a string) + * @brief Lexing (build a CST from a string). + * A CST is a lexeme, or a tuple of CST's. */ #include <stdio.h> @@ -40,8 +41,8 @@ readChar(Cursor& cur, istream& in) AST* readExpression(Cursor& cur, istream& in) { -#define PUSH(s, t) { if (t != "") { s.top()->push_back(new AString(loc, t)); t = ""; } } -#define YIELD(s, t) { if (s.empty()) { return new AString(loc, t); } else PUSH(s, t) } +#define PUSH(s, t) { if (t != "") { s.top()->push_back(new ALexeme(loc, t)); t = ""; } } +#define YIELD(s, t) { if (s.empty()) { return new ALexeme(loc, t); } else PUSH(s, t) } stack<ATuple*> stk; string tok; Cursor loc; // start of tok @@ -52,13 +53,31 @@ readExpression(Cursor& cur, istream& in) return new ATuple(cur); case ';': while ((c = readChar(cur, in)) != '\n') {} - case '\n': case ' ': case '\t': + case '\n': case ' ': case '\t': case '\r': case '\f': if (tok != "") YIELD(stk, tok); break; case '"': loc = cur; - do { tok.push_back(c); } while ((c = readChar(cur, in)) != '"'); - YIELD(stk, tok + '"'); + tok.push_back(c); // leading quote + while ((c = readChar(cur, in)) != '"') { + if (c == '\\') { // string escape + switch (c = readChar(cur, in)) { + case '"': + tok.push_back('"'); + break; + case '\\': + tok.push_back('\\'); + break; + default: + cin.putback(c); + throw Error(cur, string("unknown string escape `\\") + (char)c + "'"); + } + } else { // any other character + tok.push_back(c); + } + } + tok.push_back(c); // trailing quote + YIELD(stk, tok); break; case '(': stk.push(new ATuple(cur)); @@ -66,6 +85,7 @@ readExpression(Cursor& cur, istream& in) case ')': switch (stk.size()) { case 0: + cin.putback(c); throw Error(cur, "unexpected `)'"); case 1: PUSH(stk, tok); |