From 9a1c3c7cfc96cb6ee1c9f7bc103b99b89da43d6e Mon Sep 17 00:00:00 2001 From: David Robillard Date: Thu, 19 Aug 2010 03:25:43 +0000 Subject: Coherent AString and Lexeme implementation. A Lexeme is any "token" read from input, a lexeme has not yet beeen parsed and could parse to anything, e.g. a string, an expression, a number, etc. Lexemes are not (yet?) exposed to the language or ever compiled. A String is a string literal, which can contain any character directly except " and \. There are two special escapes: \" and \\, any other character following a \ is a syntax error. Fix garbage collection of REPL objects, leading to type errors from type variable re-use because a type variable for a given AST's /address/ exists, but that address has actually been deleted and reused by new (i.e. make top level REPL expressions and types be GC roots). git-svn-id: http://svn.drobilla.net/resp/resp@261 ad02d1e2-f140-0410-9f75-f8b11f17cedd --- src/c.cpp | 7 +++++++ src/compile.cpp | 12 ++++++++++++ src/constrain.cpp | 6 ++++++ src/lex.cpp | 32 ++++++++++++++++++++++++++------ src/llvm.cpp | 35 ++++++++++++++++++++++++++++++----- src/parse.cpp | 5 +++-- src/pprint.cpp | 6 +++++- src/repl.cpp | 23 +++++++++++++---------- src/resp.hpp | 33 +++++++++++++++++++++------------ test.sh | 1 + test/string.resp | 4 ++++ 11 files changed, 128 insertions(+), 36 deletions(-) create mode 100644 test/string.resp diff --git a/src/c.cpp b/src/c.cpp index ecf7d92..d6d71f3 100644 --- a/src/c.cpp +++ b/src/c.cpp @@ -152,6 +152,7 @@ struct CEngine : public Engine { CVal compileTup(CEnv& cenv, const AType* type, const vector& fields); CVal compileDot(CEnv& cenv, CVal tup, int32_t index); CVal compileLiteral(CEnv& cenv, AST* lit); + CVal compileString(CEnv& cenv, const char* str); CVal compilePrimitive(CEnv& cenv, APrimitive* prim); CVal compileIf(CEnv& cenv, AIf* aif); CVal compileGlobal(CEnv& cenv, const AType* type, const string& sym, CVal val); @@ -197,6 +198,12 @@ CEngine::compileLiteral(CEnv& cenv, AST* lit) return new Value(lit->str()); } +CVal +CEngine::compileString(CEnv& cenv, const char* str) +{ + return new Value(str); +} + CFunc CEngine::compileFunction(CEnv& cenv, AFn* fn, const AType* type) { diff --git a/src/compile.cpp b/src/compile.cpp index 70056db..f5a4128 100644 --- a/src/compile.cpp +++ b/src/compile.cpp @@ -31,6 +31,18 @@ COMPILE_LITERAL(int32_t); COMPILE_LITERAL(float); COMPILE_LITERAL(bool); +CVal +AString::compile(CEnv& cenv) throw() +{ + return cenv.engine()->compileString(cenv, c_str()); +} + +CVal +ALexeme::compile(CEnv& cenv) throw() +{ + return cenv.engine()->compileString(cenv, c_str()); +} + CVal ASymbol::compile(CEnv& cenv) throw() { diff --git a/src/constrain.cpp b/src/constrain.cpp index 94a27c3..969e87d 100644 --- a/src/constrain.cpp +++ b/src/constrain.cpp @@ -39,6 +39,12 @@ AString::constrain(TEnv& tenv, Constraints& c) const throw(Error) c.constrain(tenv, this, tenv.named("String")); } +void +ALexeme::constrain(TEnv& tenv, Constraints& c) const throw(Error) +{ + c.constrain(tenv, this, tenv.named("Lexeme")); +} + void ASymbol::constrain(TEnv& tenv, Constraints& c) const throw(Error) { diff --git a/src/lex.cpp b/src/lex.cpp index 0097346..f633b00 100644 --- a/src/lex.cpp +++ b/src/lex.cpp @@ -16,7 +16,8 @@ */ /** @file - * @brief Lexing (build an unparsed textual AST from a string) + * @brief Lexing (build a CST from a string). + * A CST is a lexeme, or a tuple of CST's. */ #include @@ -40,8 +41,8 @@ readChar(Cursor& cur, istream& in) AST* readExpression(Cursor& cur, istream& in) { -#define PUSH(s, t) { if (t != "") { s.top()->push_back(new AString(loc, t)); t = ""; } } -#define YIELD(s, t) { if (s.empty()) { return new AString(loc, t); } else PUSH(s, t) } +#define PUSH(s, t) { if (t != "") { s.top()->push_back(new ALexeme(loc, t)); t = ""; } } +#define YIELD(s, t) { if (s.empty()) { return new ALexeme(loc, t); } else PUSH(s, t) } stack stk; string tok; Cursor loc; // start of tok @@ -52,13 +53,31 @@ readExpression(Cursor& cur, istream& in) return new ATuple(cur); case ';': while ((c = readChar(cur, in)) != '\n') {} - case '\n': case ' ': case '\t': + case '\n': case ' ': case '\t': case '\r': case '\f': if (tok != "") YIELD(stk, tok); break; case '"': loc = cur; - do { tok.push_back(c); } while ((c = readChar(cur, in)) != '"'); - YIELD(stk, tok + '"'); + tok.push_back(c); // leading quote + while ((c = readChar(cur, in)) != '"') { + if (c == '\\') { // string escape + switch (c = readChar(cur, in)) { + case '"': + tok.push_back('"'); + break; + case '\\': + tok.push_back('\\'); + break; + default: + cin.putback(c); + throw Error(cur, string("unknown string escape `\\") + (char)c + "'"); + } + } else { // any other character + tok.push_back(c); + } + } + tok.push_back(c); // trailing quote + YIELD(stk, tok); break; case '(': stk.push(new ATuple(cur)); @@ -66,6 +85,7 @@ readExpression(Cursor& cur, istream& in) case ')': switch (stk.size()) { case 0: + cin.putback(c); throw Error(cur, "unexpected `)'"); case 1: PUSH(stk, tok); diff --git a/src/llvm.cpp b/src/llvm.cpp index e4d7c07..043b5fc 100644 --- a/src/llvm.cpp +++ b/src/llvm.cpp @@ -93,6 +93,7 @@ struct LLVMEngine : public Engine { if (t->head()->str() == "Bool") return Type::getInt1Ty(context); if (t->head()->str() == "Int") return Type::getInt32Ty(context); if (t->head()->str() == "Float") return Type::getFloatTy(context); + if (t->head()->str() == "String") return PointerType::get(Type::getInt8Ty(context), NULL); throw Error(t->loc, string("Unknown primitive type `") + t->str() + "'"); } else if (t->kind == AType::EXPR && t->head()->str() == "Fn") { AType::const_iterator i = t->begin(); @@ -188,6 +189,7 @@ struct LLVMEngine : public Engine { CVal compileTup(CEnv& cenv, const AType* type, const vector& fields); CVal compileDot(CEnv& cenv, CVal tup, int32_t index); CVal compileLiteral(CEnv& cenv, AST* lit); + CVal compileString(CEnv& cenv, const char* str); CVal compilePrimitive(CEnv& cenv, APrimitive* prim); CVal compileIf(CEnv& cenv, AIf* aif); CVal compileGlobal(CEnv& cenv, const AType* type, const string& sym, CVal val); @@ -206,16 +208,33 @@ struct LLVMEngine : public Engine { THROW_IF(!t, Cursor(), "function with non-concrete return type called"); std::stringstream ss; - if (t == Type::getInt32Ty(context)) + if (t == Type::getInt32Ty(context)) { ss << ((int32_t (*)())fp)(); - else if (t == Type::getFloatTy(context)) + } else if (t == Type::getFloatTy(context)) { ss << showpoint << ((float (*)())fp)(); - else if (t == Type::getInt1Ty(context)) + } else if (t == Type::getInt1Ty(context)) { ss << (((bool (*)())fp)() ? "#t" : "#f"); - else if (t != Type::getVoidTy(context)) + } else if (retT->head()->str() == "String") { + const std::string s(((char* (*)())fp)()); + ss << "\""; + for (std::string::const_iterator i = s.begin(); i != s.end(); ++i) { + switch (*i) { + case '\"': + case '\\': + ss << '\\'; + default: + ss << *i; + break; + } + } + ss << "\""; + } else if (retT->head()->str() == "Lexeme") { + ss << ((char* (*)())fp)(); + } else if (t != Type::getVoidTy(context)) { ss << ((void* (*)())fp)(); - else + } else { ((void (*)())fp)(); + } return ss.str(); } @@ -293,6 +312,12 @@ LLVMEngine::compileLiteral(CEnv& cenv, AST* lit) throw Error(lit->loc, "Unknown literal type"); } +CVal +LLVMEngine::compileString(CEnv& cenv, const char* str) +{ + return builder.CreateGlobalStringPtr(str); +} + CFunc LLVMEngine::compileFunction(CEnv& cenv, AFn* fn, const AType* type) { diff --git a/src/parse.cpp b/src/parse.cpp index 1c448db..20d0816 100644 --- a/src/parse.cpp +++ b/src/parse.cpp @@ -36,7 +36,7 @@ macDef(PEnv& penv, const AST* exp) THROW_IF(i == tup->end(), tup->loc, "Unexpected end of `def' macro call"); const AST* name = *(++i); THROW_IF(i == tup->end(), name->loc, "Unexpected end of `def' macro call"); - if (name->to()) { + if (name->to()) { return const_cast(exp); } else { const ATuple* pat = name->to(); @@ -48,7 +48,7 @@ macDef(PEnv& penv, const AST* exp) argsExp->push_back(*j); const AST* body = *(++i); ATuple* fnExp = new ATuple(body->loc); - fnExp->push_back(new AString(exp->loc, "fn")); + fnExp->push_back(new ALexeme(exp->loc, "fn")); fnExp->push_back(argsExp); for (; i != tup->end(); ++i) fnExp->push_back(*i); @@ -107,6 +107,7 @@ initLang(PEnv& penv, TEnv& tenv) tenv.def(penv.sym("Int"), new AType(penv.sym("Int"))); tenv.def(penv.sym("Float"), new AType(penv.sym("Float"))); tenv.def(penv.sym("String"), new AType(penv.sym("String"))); + tenv.def(penv.sym("Lexeme"), new AType(penv.sym("Lexeme"))); // Literals static bool trueVal = true; diff --git a/src/pprint.cpp b/src/pprint.cpp index 5ee5736..391609a 100644 --- a/src/pprint.cpp +++ b/src/pprint.cpp @@ -24,6 +24,10 @@ ostream& operator<<(ostream& out, const AST* ast) { + const ALexeme* lexeme = ast->to(); + if (lexeme) + return out << *lexeme; + const ALiteral* flit = ast->to*>(); if (flit) return out << showpoint << flit->val; @@ -38,7 +42,7 @@ operator<<(ostream& out, const AST* ast) const AString* str = ast->to(); if (str) - return out << *str; + return out << '"' << *str << '"'; const ASymbol* sym = ast->to(); if (sym) diff --git a/src/repl.cpp b/src/repl.cpp index 472d1a5..977976b 100644 --- a/src/repl.cpp +++ b/src/repl.cpp @@ -29,7 +29,13 @@ using namespace std; static bool readParseType(CEnv& cenv, Cursor& cursor, istream& is, AST*& exp, AST*& ast) { - exp = readExpression(cursor, is); + try { + exp = readExpression(cursor, is); + } catch (Error e) { + is.ignore(std::numeric_limits::max(), '\n'); // Skip REPL junk + throw e; + } + if (exp->to() && exp->to()->empty()) return false; @@ -40,9 +46,10 @@ readParseType(CEnv& cenv, Cursor& cursor, istream& is, AST*& exp, AST*& ast) const Subst subst = unify(c); // Solve type constraints for (Subst::const_iterator i = subst.begin(); i != subst.end(); ++i) { - if (!cenv.tsubst.contains(i->first)) { - //cout << "New variable " << i->first << " = " << i->second << endl; - cenv.tsubst.push_back(*i); + if (!cenv.tsubst.contains(i->first)) { // Substitution's LHS is a new variable + cenv.tsubst.push_back(*i); // Add substitution to global type substitution + Object::pool.addRoot(i->first); + Object::pool.addRoot(i->second); } } @@ -50,12 +57,8 @@ readParseType(CEnv& cenv, Cursor& cursor, istream& is, AST*& exp, AST*& ast) //cout << "**** CENV.SUBST\n" << cenv.tsubst << "********" << endl; //cenv.tsubst = Subst::compose(cenv.tsubst, subst); - // Add types in type substition as GC roots - for (Subst::iterator i = cenv.tsubst.begin(); i != cenv.tsubst.end(); ++i) { - Object::pool.addRoot(i->first); - Object::pool.addRoot(i->second); - } - + Object::pool.addRoot(ast); // Make parsed expression a GC root so it is not deleted + return true; } diff --git a/src/resp.hpp b/src/resp.hpp index 752796b..a76aed7 100644 --- a/src/resp.hpp +++ b/src/resp.hpp @@ -241,12 +241,20 @@ struct ALiteral : public AST { const T val; }; +/// Lexeme (any atom in the CST, e.g. "a", "3.4", ""hello"", etc. +struct ALexeme : public AST, public std::string { + ALexeme(Cursor c, const string& s) : AST(c), std::string(s) {} + bool operator==(const AST& rhs) const { return this == &rhs; } + void constrain(TEnv& tenv, Constraints& c) const throw(Error); + CVal compile(CEnv& cenv) throw(); +}; + /// String, e.g. ""a"" struct AString : public AST, public std::string { AString(Cursor c, const string& s) : AST(c), std::string(s) {} bool operator==(const AST& rhs) const { return this == &rhs; } void constrain(TEnv& tenv, Constraints& c) const throw(Error); - CVal compile(CEnv& cenv) throw() { return NULL; } + CVal compile(CEnv& cenv) throw(); }; /// Symbol, e.g. "a" @@ -490,7 +498,7 @@ struct PEnv : private map { void defmac(const string& s, const MF f) { macros.insert(make_pair(s, f)); } - MF mac(const AString& s) const { + MF mac(const ALexeme& s) const { map::const_iterator i = macros.find(s); return (i != macros.end()) ? i->second : NULL; } @@ -517,32 +525,32 @@ struct PEnv : private map { if (tup) { if (tup->empty()) throw Error(exp->loc, "call to empty list"); if (!tup->head()->to()) { - MF mf = mac(*tup->head()->to()); + MF mf = mac(*tup->head()->to()); const AST* expanded = (mf ? mf(*this, exp) : exp); const ATuple* expanded_tup = expanded->to(); - const PEnv::Handler* h = handler(true, *expanded_tup->head()->to()); + const PEnv::Handler* h = handler(true, *expanded_tup->head()->to()); if (h) return h->func(*this, expanded, h->arg); } ATuple* parsed_tup = parseTuple(tup); return new ACall(parsed_tup); // Parse as regular call } - const AString* str = exp->to(); - assert(str); - if (isdigit((*str)[0])) { - const std::string& s = *str; + const ALexeme* lex = exp->to(); + assert(lex); + if (isdigit((*lex)[0])) { + const std::string& s = *lex; if (s.find('.') == string::npos) return new ALiteral(strtol(s.c_str(), NULL, 10), exp->loc); else return new ALiteral(strtod(s.c_str(), NULL), exp->loc); - } else if ((*str)[0] == '\"') { - return new AString(exp->loc, str->substr(1, str->length() - 2)); + } else if ((*lex)[0] == '\"') { + return new AString(exp->loc, lex->substr(1, lex->length() - 2)); } else { - const PEnv::Handler* h = handler(false, *str); + const PEnv::Handler* h = handler(false, *lex); if (h) return h->func(*this, exp, h->arg); } - return sym(*exp->to(), exp->loc); + return sym(*lex, exp->loc); } unsigned symID; }; @@ -691,6 +699,7 @@ struct Engine { virtual CVal compileTup(CEnv& cenv, const AType* t, ValVec& f) = 0; virtual CVal compileDot(CEnv& cenv, CVal tup, int32_t index) = 0; virtual CVal compileLiteral(CEnv& cenv, AST* lit) = 0; + virtual CVal compileString(CEnv& cenv, const char* str) = 0; virtual CVal compileCall(CEnv& cenv, CFunc f, const AType* fT, ValVec& args) = 0; virtual CVal compilePrimitive(CEnv& cenv, APrimitive* prim) = 0; virtual CVal compileIf(CEnv& cenv, AIf* aif) = 0; diff --git a/test.sh b/test.sh index e800625..d344aaf 100755 --- a/test.sh +++ b/test.sh @@ -21,5 +21,6 @@ run './test/fac.resp' '720 : Int' run './test/inlinefn.resp' '2 : Int' run './test/nest.resp' '8 : Int' run './test/tup.resp' '5 : Int' +run './test/string.resp' '"Hello, world!" : String' #run './test/poly.resp' '#t : Bool' diff --git a/test/string.resp b/test/string.resp new file mode 100644 index 0000000..ff980a9 --- /dev/null +++ b/test/string.resp @@ -0,0 +1,4 @@ +(def greeting "Hello, world!") + +greeting + -- cgit v1.2.1