aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Robillard <d@drobilla.net>2010-08-19 03:25:43 +0000
committerDavid Robillard <d@drobilla.net>2010-08-19 03:25:43 +0000
commit9a1c3c7cfc96cb6ee1c9f7bc103b99b89da43d6e (patch)
tree3c0f7348198b9c722c67ed6f2e819fa8e8150767
parent1bc26254cf83449017b24afd90420916d8f512aa (diff)
downloadresp-9a1c3c7cfc96cb6ee1c9f7bc103b99b89da43d6e.tar.gz
resp-9a1c3c7cfc96cb6ee1c9f7bc103b99b89da43d6e.tar.bz2
resp-9a1c3c7cfc96cb6ee1c9f7bc103b99b89da43d6e.zip
Coherent AString and Lexeme implementation.
A Lexeme is any "token" read from input, a lexeme has not yet beeen parsed and could parse to anything, e.g. a string, an expression, a number, etc. Lexemes are not (yet?) exposed to the language or ever compiled. A String is a string literal, which can contain any character directly except " and \. There are two special escapes: \" and \\, any other character following a \ is a syntax error. Fix garbage collection of REPL objects, leading to type errors from type variable re-use because a type variable for a given AST's /address/ exists, but that address has actually been deleted and reused by new (i.e. make top level REPL expressions and types be GC roots). git-svn-id: http://svn.drobilla.net/resp/resp@261 ad02d1e2-f140-0410-9f75-f8b11f17cedd
-rw-r--r--src/c.cpp7
-rw-r--r--src/compile.cpp12
-rw-r--r--src/constrain.cpp6
-rw-r--r--src/lex.cpp32
-rw-r--r--src/llvm.cpp35
-rw-r--r--src/parse.cpp5
-rw-r--r--src/pprint.cpp6
-rw-r--r--src/repl.cpp23
-rw-r--r--src/resp.hpp33
-rwxr-xr-xtest.sh1
-rw-r--r--test/string.resp4
11 files changed, 128 insertions, 36 deletions
diff --git a/src/c.cpp b/src/c.cpp
index ecf7d92..d6d71f3 100644
--- a/src/c.cpp
+++ b/src/c.cpp
@@ -152,6 +152,7 @@ struct CEngine : public Engine {
CVal compileTup(CEnv& cenv, const AType* type, const vector<CVal>& fields);
CVal compileDot(CEnv& cenv, CVal tup, int32_t index);
CVal compileLiteral(CEnv& cenv, AST* lit);
+ CVal compileString(CEnv& cenv, const char* str);
CVal compilePrimitive(CEnv& cenv, APrimitive* prim);
CVal compileIf(CEnv& cenv, AIf* aif);
CVal compileGlobal(CEnv& cenv, const AType* type, const string& sym, CVal val);
@@ -197,6 +198,12 @@ CEngine::compileLiteral(CEnv& cenv, AST* lit)
return new Value(lit->str());
}
+CVal
+CEngine::compileString(CEnv& cenv, const char* str)
+{
+ return new Value(str);
+}
+
CFunc
CEngine::compileFunction(CEnv& cenv, AFn* fn, const AType* type)
{
diff --git a/src/compile.cpp b/src/compile.cpp
index 70056db..f5a4128 100644
--- a/src/compile.cpp
+++ b/src/compile.cpp
@@ -32,6 +32,18 @@ COMPILE_LITERAL(float);
COMPILE_LITERAL(bool);
CVal
+AString::compile(CEnv& cenv) throw()
+{
+ return cenv.engine()->compileString(cenv, c_str());
+}
+
+CVal
+ALexeme::compile(CEnv& cenv) throw()
+{
+ return cenv.engine()->compileString(cenv, c_str());
+}
+
+CVal
ASymbol::compile(CEnv& cenv) throw()
{
if (cenv.vals.topLevel(this) && cenv.type(this)->head()->str() != "Fn") {
diff --git a/src/constrain.cpp b/src/constrain.cpp
index 94a27c3..969e87d 100644
--- a/src/constrain.cpp
+++ b/src/constrain.cpp
@@ -40,6 +40,12 @@ AString::constrain(TEnv& tenv, Constraints& c) const throw(Error)
}
void
+ALexeme::constrain(TEnv& tenv, Constraints& c) const throw(Error)
+{
+ c.constrain(tenv, this, tenv.named("Lexeme"));
+}
+
+void
ASymbol::constrain(TEnv& tenv, Constraints& c) const throw(Error)
{
const AType** ref = tenv.ref(this);
diff --git a/src/lex.cpp b/src/lex.cpp
index 0097346..f633b00 100644
--- a/src/lex.cpp
+++ b/src/lex.cpp
@@ -16,7 +16,8 @@
*/
/** @file
- * @brief Lexing (build an unparsed textual AST from a string)
+ * @brief Lexing (build a CST from a string).
+ * A CST is a lexeme, or a tuple of CST's.
*/
#include <stdio.h>
@@ -40,8 +41,8 @@ readChar(Cursor& cur, istream& in)
AST*
readExpression(Cursor& cur, istream& in)
{
-#define PUSH(s, t) { if (t != "") { s.top()->push_back(new AString(loc, t)); t = ""; } }
-#define YIELD(s, t) { if (s.empty()) { return new AString(loc, t); } else PUSH(s, t) }
+#define PUSH(s, t) { if (t != "") { s.top()->push_back(new ALexeme(loc, t)); t = ""; } }
+#define YIELD(s, t) { if (s.empty()) { return new ALexeme(loc, t); } else PUSH(s, t) }
stack<ATuple*> stk;
string tok;
Cursor loc; // start of tok
@@ -52,13 +53,31 @@ readExpression(Cursor& cur, istream& in)
return new ATuple(cur);
case ';':
while ((c = readChar(cur, in)) != '\n') {}
- case '\n': case ' ': case '\t':
+ case '\n': case ' ': case '\t': case '\r': case '\f':
if (tok != "") YIELD(stk, tok);
break;
case '"':
loc = cur;
- do { tok.push_back(c); } while ((c = readChar(cur, in)) != '"');
- YIELD(stk, tok + '"');
+ tok.push_back(c); // leading quote
+ while ((c = readChar(cur, in)) != '"') {
+ if (c == '\\') { // string escape
+ switch (c = readChar(cur, in)) {
+ case '"':
+ tok.push_back('"');
+ break;
+ case '\\':
+ tok.push_back('\\');
+ break;
+ default:
+ cin.putback(c);
+ throw Error(cur, string("unknown string escape `\\") + (char)c + "'");
+ }
+ } else { // any other character
+ tok.push_back(c);
+ }
+ }
+ tok.push_back(c); // trailing quote
+ YIELD(stk, tok);
break;
case '(':
stk.push(new ATuple(cur));
@@ -66,6 +85,7 @@ readExpression(Cursor& cur, istream& in)
case ')':
switch (stk.size()) {
case 0:
+ cin.putback(c);
throw Error(cur, "unexpected `)'");
case 1:
PUSH(stk, tok);
diff --git a/src/llvm.cpp b/src/llvm.cpp
index e4d7c07..043b5fc 100644
--- a/src/llvm.cpp
+++ b/src/llvm.cpp
@@ -93,6 +93,7 @@ struct LLVMEngine : public Engine {
if (t->head()->str() == "Bool") return Type::getInt1Ty(context);
if (t->head()->str() == "Int") return Type::getInt32Ty(context);
if (t->head()->str() == "Float") return Type::getFloatTy(context);
+ if (t->head()->str() == "String") return PointerType::get(Type::getInt8Ty(context), NULL);
throw Error(t->loc, string("Unknown primitive type `") + t->str() + "'");
} else if (t->kind == AType::EXPR && t->head()->str() == "Fn") {
AType::const_iterator i = t->begin();
@@ -188,6 +189,7 @@ struct LLVMEngine : public Engine {
CVal compileTup(CEnv& cenv, const AType* type, const vector<CVal>& fields);
CVal compileDot(CEnv& cenv, CVal tup, int32_t index);
CVal compileLiteral(CEnv& cenv, AST* lit);
+ CVal compileString(CEnv& cenv, const char* str);
CVal compilePrimitive(CEnv& cenv, APrimitive* prim);
CVal compileIf(CEnv& cenv, AIf* aif);
CVal compileGlobal(CEnv& cenv, const AType* type, const string& sym, CVal val);
@@ -206,16 +208,33 @@ struct LLVMEngine : public Engine {
THROW_IF(!t, Cursor(), "function with non-concrete return type called");
std::stringstream ss;
- if (t == Type::getInt32Ty(context))
+ if (t == Type::getInt32Ty(context)) {
ss << ((int32_t (*)())fp)();
- else if (t == Type::getFloatTy(context))
+ } else if (t == Type::getFloatTy(context)) {
ss << showpoint << ((float (*)())fp)();
- else if (t == Type::getInt1Ty(context))
+ } else if (t == Type::getInt1Ty(context)) {
ss << (((bool (*)())fp)() ? "#t" : "#f");
- else if (t != Type::getVoidTy(context))
+ } else if (retT->head()->str() == "String") {
+ const std::string s(((char* (*)())fp)());
+ ss << "\"";
+ for (std::string::const_iterator i = s.begin(); i != s.end(); ++i) {
+ switch (*i) {
+ case '\"':
+ case '\\':
+ ss << '\\';
+ default:
+ ss << *i;
+ break;
+ }
+ }
+ ss << "\"";
+ } else if (retT->head()->str() == "Lexeme") {
+ ss << ((char* (*)())fp)();
+ } else if (t != Type::getVoidTy(context)) {
ss << ((void* (*)())fp)();
- else
+ } else {
((void (*)())fp)();
+ }
return ss.str();
}
@@ -293,6 +312,12 @@ LLVMEngine::compileLiteral(CEnv& cenv, AST* lit)
throw Error(lit->loc, "Unknown literal type");
}
+CVal
+LLVMEngine::compileString(CEnv& cenv, const char* str)
+{
+ return builder.CreateGlobalStringPtr(str);
+}
+
CFunc
LLVMEngine::compileFunction(CEnv& cenv, AFn* fn, const AType* type)
{
diff --git a/src/parse.cpp b/src/parse.cpp
index 1c448db..20d0816 100644
--- a/src/parse.cpp
+++ b/src/parse.cpp
@@ -36,7 +36,7 @@ macDef(PEnv& penv, const AST* exp)
THROW_IF(i == tup->end(), tup->loc, "Unexpected end of `def' macro call");
const AST* name = *(++i);
THROW_IF(i == tup->end(), name->loc, "Unexpected end of `def' macro call");
- if (name->to<const AString*>()) {
+ if (name->to<const ALexeme*>()) {
return const_cast<AST*>(exp);
} else {
const ATuple* pat = name->to<const ATuple*>();
@@ -48,7 +48,7 @@ macDef(PEnv& penv, const AST* exp)
argsExp->push_back(*j);
const AST* body = *(++i);
ATuple* fnExp = new ATuple(body->loc);
- fnExp->push_back(new AString(exp->loc, "fn"));
+ fnExp->push_back(new ALexeme(exp->loc, "fn"));
fnExp->push_back(argsExp);
for (; i != tup->end(); ++i)
fnExp->push_back(*i);
@@ -107,6 +107,7 @@ initLang(PEnv& penv, TEnv& tenv)
tenv.def(penv.sym("Int"), new AType(penv.sym("Int")));
tenv.def(penv.sym("Float"), new AType(penv.sym("Float")));
tenv.def(penv.sym("String"), new AType(penv.sym("String")));
+ tenv.def(penv.sym("Lexeme"), new AType(penv.sym("Lexeme")));
// Literals
static bool trueVal = true;
diff --git a/src/pprint.cpp b/src/pprint.cpp
index 5ee5736..391609a 100644
--- a/src/pprint.cpp
+++ b/src/pprint.cpp
@@ -24,6 +24,10 @@
ostream&
operator<<(ostream& out, const AST* ast)
{
+ const ALexeme* lexeme = ast->to<const ALexeme*>();
+ if (lexeme)
+ return out << *lexeme;
+
const ALiteral<float>* flit = ast->to<const ALiteral<float>*>();
if (flit)
return out << showpoint << flit->val;
@@ -38,7 +42,7 @@ operator<<(ostream& out, const AST* ast)
const AString* str = ast->to<const AString*>();
if (str)
- return out << *str;
+ return out << '"' << *str << '"';
const ASymbol* sym = ast->to<const ASymbol*>();
if (sym)
diff --git a/src/repl.cpp b/src/repl.cpp
index 472d1a5..977976b 100644
--- a/src/repl.cpp
+++ b/src/repl.cpp
@@ -29,7 +29,13 @@ using namespace std;
static bool
readParseType(CEnv& cenv, Cursor& cursor, istream& is, AST*& exp, AST*& ast)
{
- exp = readExpression(cursor, is);
+ try {
+ exp = readExpression(cursor, is);
+ } catch (Error e) {
+ is.ignore(std::numeric_limits<std::streamsize>::max(), '\n'); // Skip REPL junk
+ throw e;
+ }
+
if (exp->to<ATuple*>() && exp->to<ATuple*>()->empty())
return false;
@@ -40,9 +46,10 @@ readParseType(CEnv& cenv, Cursor& cursor, istream& is, AST*& exp, AST*& ast)
const Subst subst = unify(c); // Solve type constraints
for (Subst::const_iterator i = subst.begin(); i != subst.end(); ++i) {
- if (!cenv.tsubst.contains(i->first)) {
- //cout << "New variable " << i->first << " = " << i->second << endl;
- cenv.tsubst.push_back(*i);
+ if (!cenv.tsubst.contains(i->first)) { // Substitution's LHS is a new variable
+ cenv.tsubst.push_back(*i); // Add substitution to global type substitution
+ Object::pool.addRoot(i->first);
+ Object::pool.addRoot(i->second);
}
}
@@ -50,12 +57,8 @@ readParseType(CEnv& cenv, Cursor& cursor, istream& is, AST*& exp, AST*& ast)
//cout << "**** CENV.SUBST\n" << cenv.tsubst << "********" << endl;
//cenv.tsubst = Subst::compose(cenv.tsubst, subst);
- // Add types in type substition as GC roots
- for (Subst::iterator i = cenv.tsubst.begin(); i != cenv.tsubst.end(); ++i) {
- Object::pool.addRoot(i->first);
- Object::pool.addRoot(i->second);
- }
-
+ Object::pool.addRoot(ast); // Make parsed expression a GC root so it is not deleted
+
return true;
}
diff --git a/src/resp.hpp b/src/resp.hpp
index 752796b..a76aed7 100644
--- a/src/resp.hpp
+++ b/src/resp.hpp
@@ -241,12 +241,20 @@ struct ALiteral : public AST {
const T val;
};
+/// Lexeme (any atom in the CST, e.g. "a", "3.4", ""hello"", etc.
+struct ALexeme : public AST, public std::string {
+ ALexeme(Cursor c, const string& s) : AST(c), std::string(s) {}
+ bool operator==(const AST& rhs) const { return this == &rhs; }
+ void constrain(TEnv& tenv, Constraints& c) const throw(Error);
+ CVal compile(CEnv& cenv) throw();
+};
+
/// String, e.g. ""a""
struct AString : public AST, public std::string {
AString(Cursor c, const string& s) : AST(c), std::string(s) {}
bool operator==(const AST& rhs) const { return this == &rhs; }
void constrain(TEnv& tenv, Constraints& c) const throw(Error);
- CVal compile(CEnv& cenv) throw() { return NULL; }
+ CVal compile(CEnv& cenv) throw();
};
/// Symbol, e.g. "a"
@@ -490,7 +498,7 @@ struct PEnv : private map<const string, ASymbol*> {
void defmac(const string& s, const MF f) {
macros.insert(make_pair(s, f));
}
- MF mac(const AString& s) const {
+ MF mac(const ALexeme& s) const {
map<string, MF>::const_iterator i = macros.find(s);
return (i != macros.end()) ? i->second : NULL;
}
@@ -517,32 +525,32 @@ struct PEnv : private map<const string, ASymbol*> {
if (tup) {
if (tup->empty()) throw Error(exp->loc, "call to empty list");
if (!tup->head()->to<const ATuple*>()) {
- MF mf = mac(*tup->head()->to<const AString*>());
+ MF mf = mac(*tup->head()->to<const ALexeme*>());
const AST* expanded = (mf ? mf(*this, exp) : exp);
const ATuple* expanded_tup = expanded->to<const ATuple*>();
- const PEnv::Handler* h = handler(true, *expanded_tup->head()->to<const AString*>());
+ const PEnv::Handler* h = handler(true, *expanded_tup->head()->to<const ALexeme*>());
if (h)
return h->func(*this, expanded, h->arg);
}
ATuple* parsed_tup = parseTuple(tup);
return new ACall(parsed_tup); // Parse as regular call
}
- const AString* str = exp->to<const AString*>();
- assert(str);
- if (isdigit((*str)[0])) {
- const std::string& s = *str;
+ const ALexeme* lex = exp->to<const ALexeme*>();
+ assert(lex);
+ if (isdigit((*lex)[0])) {
+ const std::string& s = *lex;
if (s.find('.') == string::npos)
return new ALiteral<int32_t>(strtol(s.c_str(), NULL, 10), exp->loc);
else
return new ALiteral<float>(strtod(s.c_str(), NULL), exp->loc);
- } else if ((*str)[0] == '\"') {
- return new AString(exp->loc, str->substr(1, str->length() - 2));
+ } else if ((*lex)[0] == '\"') {
+ return new AString(exp->loc, lex->substr(1, lex->length() - 2));
} else {
- const PEnv::Handler* h = handler(false, *str);
+ const PEnv::Handler* h = handler(false, *lex);
if (h)
return h->func(*this, exp, h->arg);
}
- return sym(*exp->to<const AString*>(), exp->loc);
+ return sym(*lex, exp->loc);
}
unsigned symID;
};
@@ -691,6 +699,7 @@ struct Engine {
virtual CVal compileTup(CEnv& cenv, const AType* t, ValVec& f) = 0;
virtual CVal compileDot(CEnv& cenv, CVal tup, int32_t index) = 0;
virtual CVal compileLiteral(CEnv& cenv, AST* lit) = 0;
+ virtual CVal compileString(CEnv& cenv, const char* str) = 0;
virtual CVal compileCall(CEnv& cenv, CFunc f, const AType* fT, ValVec& args) = 0;
virtual CVal compilePrimitive(CEnv& cenv, APrimitive* prim) = 0;
virtual CVal compileIf(CEnv& cenv, AIf* aif) = 0;
diff --git a/test.sh b/test.sh
index e800625..d344aaf 100755
--- a/test.sh
+++ b/test.sh
@@ -21,5 +21,6 @@ run './test/fac.resp' '720 : Int'
run './test/inlinefn.resp' '2 : Int'
run './test/nest.resp' '8 : Int'
run './test/tup.resp' '5 : Int'
+run './test/string.resp' '"Hello, world!" : String'
#run './test/poly.resp' '#t : Bool'
diff --git a/test/string.resp b/test/string.resp
new file mode 100644
index 0000000..ff980a9
--- /dev/null
+++ b/test/string.resp
@@ -0,0 +1,4 @@
+(def greeting "Hello, world!")
+
+greeting
+