From 2d925912c38c2557ac853fb1b6de5fd6e5d4c5b5 Mon Sep 17 00:00:00 2001 From: David Robillard Date: Sun, 28 Jun 2009 23:29:27 +0000 Subject: Move code into src directory. git-svn-id: http://svn.drobilla.net/resp/tuplr@160 ad02d1e2-f140-0410-9f75-f8b11f17cedd --- src/tuplr.hpp | 654 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 654 insertions(+) create mode 100644 src/tuplr.hpp (limited to 'src/tuplr.hpp') diff --git a/src/tuplr.hpp b/src/tuplr.hpp new file mode 100644 index 0000000..5275dfd --- /dev/null +++ b/src/tuplr.hpp @@ -0,0 +1,654 @@ +/* Tuplr: A programming language + * Copyright (C) 2008-2009 David Robillard + * + * Tuplr is free software: you can redistribute it and/or modify it under + * the terms of the GNU Affero General Public License as published by the + * Free Software Foundation, either version 3 of the License, or (at your + * option) any later version. + * + * Tuplr is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General + * Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with Tuplr. If not, see . + */ + +#ifndef TUPLR_HPP +#define TUPLR_HPP + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define FOREACH(IT, i, c) for (IT i = (c).begin(); i != (c).end(); ++i) +#define THROW_IF(cond, error, ...) { if (cond) throw Error(error, __VA_ARGS__); } + +using namespace std; +using boost::format; + + +/*************************************************************************** + * Basic Utility Classes * + ***************************************************************************/ + +/// Location in textual code +struct Cursor { + Cursor(const string& n="", unsigned l=1, unsigned c=0) : name(n), line(l), col(c) {} + operator bool() const { return !(line == 1 && col == 0); } + string str() const { return (format("%1%:%2%:%3%") % name % line % col).str(); } + string name; + unsigned line; + unsigned col; +}; + +/// Compiler error +struct Error { + Error(Cursor c, const string& m) : loc(c), msg(m) {} + const string what() const { return (loc ? loc.str() + ": " : "") + "error: " + msg; } + Cursor loc; + string msg; +}; + +/// Expression ::= Atom | (SubExp*) +template +struct Exp : public std::vector< Exp > { + Exp(Cursor c) : type(LIST), loc(c) {} + Exp(Cursor c, const Atom& a) : type(ATOM), loc(c), atom(a) {} + enum { ATOM, LIST } type; + Cursor loc; + Atom atom; +}; + +template +extern ostream& operator<<(ostream& out, const Exp& exp); + +/// Lexical Address +struct LAddr { + LAddr(unsigned u=0, unsigned o=0) : up(u), over(o) {} + operator bool() const { return !(up == 0 && over == 0); } + unsigned up, over; +}; + +/// Generic Lexical Environment +template +struct Env : public list< vector< pair > > { + typedef vector< pair > Frame; + Env() : list(1) {} + virtual void push(Frame f=Frame()) { list::push_front(f); } + virtual void pop() { assert(!this->empty()); list::pop_front(); } + const V& def(const K& k, const V& v) { + for (typename Frame::iterator b = this->begin()->begin(); b != this->begin()->end(); ++b) + if (b->first == k) + return (b->second = v); + this->front().push_back(make_pair(k, v)); + return v; + } + V* ref(const K& key) { + for (typename Env::iterator f = this->begin(); f != this->end(); ++f) + for (typename Frame::iterator b = f->begin(); b != f->end(); ++b) + if (b->first == key) + return &b->second; + return NULL; + } + LAddr lookup(const K& key) const { + unsigned up = 0; + for (typename Env::const_iterator f = this->begin(); f != this->end(); ++f, ++up) + for (unsigned over = 0; over < f->size(); ++over) + if ((*f)[over].first == key) + return LAddr(up + 1, over + 1); + return LAddr(); + } + V& deref(LAddr addr) { + assert(addr); + typename Env::iterator f = this->begin(); + for (unsigned u = 1; u < addr.up; ++u, ++f) { assert(f != this->end()); } + assert(f->size() > addr.over - 1); + return (*f)[addr.over - 1].second; + } +}; + + +/*************************************************************************** + * Lexer: Text (istream) -> S-Expressions (SExp) * + ***************************************************************************/ + +typedef Exp SExp; ///< Textual S-Expression + +SExp readExpression(Cursor& cur, std::istream& in); + + +/*************************************************************************** + * Backend Types * + ***************************************************************************/ + +typedef void* CValue; ///< Compiled value (opaque) +typedef void* CFunction; ///< Compiled function (opaque) + + +/*************************************************************************** + * Garbage Collector * + ***************************************************************************/ + +struct Object; ///< Object (AST nodes and runtime data) + +struct GC { + enum Tag { + TAG_AST = 2, ///< Abstract syntax tree node + TAG_FRAME = 4 ///< Stack frame + }; + typedef std::list Roots; + typedef std::list Heap; + void* alloc(size_t size, Tag tag); + void collect(const Roots& roots); + void addRoot(const Object* obj) { if (obj) _roots.push_back(obj); } + void lock() { _roots.insert(_roots.end(), _heap.begin(), _heap.end()); } + const Roots& roots() const { return _roots; } +private: + Heap _heap; + Roots _roots; +}; + +/// Dynamic (garbage-collected) object +struct Object { + struct Header { + uint8_t mark; + uint8_t tag; + }; + + /// Always allocated with pool.alloc, so this - sizeof(Header) is a valid Header*. + inline Header* header() const { return (Header*)((char*)this - sizeof(Header)); } + + inline bool marked() const { return header()->mark != 0; } + inline void mark(bool b) const { header()->mark = 1; } + inline GC::Tag tag() const { return (GC::Tag)header()->tag; } + + static void* operator new(size_t size) { return pool.alloc(size, GC::TAG_AST); } + static void operator delete(void* ptr) {} + static GC pool; +}; + + +/*************************************************************************** + * Abstract Syntax Tree * + ***************************************************************************/ + +struct Constraint; ///< Type Constraint +struct TEnv; ///< Type-Time Environment +struct Constraints; ///< Type Constraints +struct Subst; ///< Type substitutions +struct CEnv; ///< Compile-Time Environment + +struct AST; +extern ostream& operator<<(ostream& out, const AST* ast); + +/// Base class for all AST nodes +struct AST : public Object { + AST(Cursor c=Cursor()) : loc(c) {} + virtual ~AST() {} + virtual bool value() const { return true; } + virtual bool operator==(const AST& o) const = 0; + virtual bool contains(const AST* child) const { return false; } + virtual void constrain(TEnv& tenv, Constraints& c) const {} + virtual AST* cps(TEnv& tenv, AST* cont); + virtual void lift(CEnv& cenv) {} + virtual CValue compile(CEnv& cenv) = 0; + string str() const { ostringstream ss; ss << this; return ss.str(); } + template T to() { return dynamic_cast(this); } + template T to() const { return dynamic_cast(this); } + template T as() { + T t = dynamic_cast(this); + if (!t) throw Error(loc, "internal error: bad cast"); + return t; + } + Cursor loc; +}; + +template +static T* tup(Cursor c, AST* ast, ...) +{ + va_list args; + va_start(args, ast); + T* ret = new T(c, ast, args); + va_end(args); + return ret; +} + +/// Literal value +template +struct ALiteral : public AST { + ALiteral(VT v, Cursor c) : AST(c), val(v) {} + bool operator==(const AST& rhs) const { + const ALiteral* r = rhs.to*>(); + return (r && (val == r->val)); + } + void constrain(TEnv& tenv, Constraints& c) const; + CValue compile(CEnv& cenv); + const VT val; +}; + +/// Symbol, e.g. "a" +struct ASymbol : public AST { + bool operator==(const AST& rhs) const { return this == &rhs; } + void constrain(TEnv& tenv, Constraints& c) const; + CValue compile(CEnv& cenv); + mutable LAddr addr; + const string cppstr; +private: + friend class PEnv; + ASymbol(const string& s, Cursor c) : AST(c), cppstr(s) {} +}; + +/// Tuple (heterogeneous sequence of fixed length), e.g. "(a b c)" +struct ATuple : public AST, public vector { + ATuple(Cursor c, const vector& v=vector()) : AST(c), vector(v) {} + ATuple(Cursor c, AST* ast, va_list args) : AST(c) { + if (!ast) return; + push_back(ast); + for (AST* a = va_arg(args, AST*); a; a = va_arg(args, AST*)) + push_back(a); + } + bool value() const { return false; } + bool operator==(const AST& rhs) const { + const ATuple* rt = rhs.to(); + if (!rt || rt->size() != size()) return false; + const_iterator l = begin(); + FOREACH(const_iterator, r, *rt) + if (!(*(*l++) == *(*r))) + return false; + return true; + } + bool contains(AST* child) const { + if (*this == *child) return true; + FOREACH(const_iterator, p, *this) + if (**p == *child || (*p)->contains(child)) + return true; + return false; + } + void constrain(TEnv& tenv, Constraints& c) const; + void lift(CEnv& cenv) { FOREACH(iterator, t, *this) (*t)->lift(cenv); } + + CValue compile(CEnv& cenv) { throw Error(loc, "tuple compiled"); } +}; + +/// Type Expression, e.g. "Int", "(Fn (Int Int) Float)" +struct AType : public ATuple { + AType(ASymbol* s) : ATuple(s->loc), kind(PRIM), id(0) { push_back(s); } + AType(Cursor c, unsigned i, LAddr a) : ATuple(c), kind(VAR), id(i) {} + AType(Cursor c, AST* ast, ...) : ATuple(c), kind(EXPR), id(0) { + if (!ast) return; + va_list args; + va_start(args, ast); + push_back(ast); + for (AST* a = va_arg(args, AST*); a; a = va_arg(args, AST*)) + push_back(a); + va_end(args); + } + AType(Cursor c, AST* ast, va_list args) : ATuple(c, ast, args), kind(EXPR), id(0) {} + CValue compile(CEnv& cenv) { return NULL; } + bool var() const { return kind == VAR; } + bool concrete() const { + switch (kind) { + case VAR: return false; + case PRIM: return at(0)->str() != "Nothing"; + case EXPR: + FOREACH(const_iterator, t, *this) { + AType* kid = (*t)->to(); + if (kid && !kid->concrete()) + return false; + } + } + return true; + } + bool operator==(const AST& rhs) const { + const AType* rt = rhs.to(); + if (!rt || kind != rt->kind) + return false; + else + switch (kind) { + case VAR: return id == rt->id; + case PRIM: return at(0)->str() == rt->at(0)->str(); + case EXPR: return ATuple::operator==(rhs); + } + return false; // never reached + } + enum { VAR, PRIM, EXPR } kind; + unsigned id; +}; + +/// Type substitution +struct Subst : public list< pair > { + Subst(AType* s=0, AType* t=0) { if (s && t) { assert(s != t); push_back(make_pair(s, t)); } } + static Subst compose(const Subst& delta, const Subst& gamma); + void add(const AType* from, AType* to) { push_back(make_pair(from, to)); } + const_iterator find(const AType* t) const { + for (const_iterator j = begin(); j != end(); ++j) + if (*j->first == *t) + return j; + return end(); + } + AST* apply(AST* ast) const { + AType* in = ast->to(); + if (!in) return ast; + if (in->kind == AType::EXPR) { + AType* out = tup(in->loc, NULL); + for (size_t i = 0; i < in->size(); ++i) + out->push_back(apply(in->at(i))); + return out; + } else { + const_iterator i = find(in); + if (i != end()) { + AST* out = i->second; + AType* outT = out->to(); + if (outT && outT->kind == AType::EXPR && !outT->concrete()) + out = apply(out); + return out; + } else { + return in; + } + } + } +}; + +inline ostream& operator<<(ostream& out, const Subst& s) { + for (Subst::const_iterator i = s.begin(); i != s.end(); ++i) + out << i->first << " => " << i->second << endl; + return out; +} + +/// Fn (first-class function with captured lexical bindings) +struct AFn : public ATuple { + AFn(Cursor c, AST* ast, va_list args) : ATuple(c, ast, args) {} + bool operator==(const AST& rhs) const { return this == &rhs; } + void constrain(TEnv& tenv, Constraints& c) const; + AST* cps(TEnv& tenv, AST* cont); + void lift(CEnv& cenv); + void liftCall(CEnv& cenv, const AType& argsT); + CValue compile(CEnv& cenv); + ATuple* prot() const { return at(1)->to(); } + /// System level implementations of this (polymorphic) fn + struct Impls : public list< pair > { + CFunction find(AType* type) const { + for (const_iterator f = begin(); f != end(); ++f) + if (*f->first == *type) + return f->second; + return NULL; + } + }; + Impls impls; + mutable Subst subst; + string name; +}; + +/// Function call/application, e.g. "(func arg1 arg2)" +struct ACall : public ATuple { + ACall(const SExp& e, const ATuple& t) : ATuple(e.loc, t) {} + ACall(Cursor c, AST* ast, va_list args) : ATuple(c, ast, args) {} + void constrain(TEnv& tenv, Constraints& c) const; + AST* cps(TEnv& tenv, AST* cont); + void lift(CEnv& cenv); + CValue compile(CEnv& cenv); +}; + +/// Definition special form, e.g. "(def x 2)" +struct ADef : public ACall { + ADef(const SExp& e, const ATuple& t) : ACall(e, t) {} + ADef(Cursor c, AST* ast, va_list args) : ACall(c, ast, args) {} + ASymbol* sym() const { + ASymbol* sym = at(1)->to(); + if (!sym) { + ATuple* tup = at(1)->to(); + if (tup && !tup->empty()) + return tup->at(0)->to(); + } + return sym; + } + void constrain(TEnv& tenv, Constraints& c) const; + AST* cps(TEnv& tenv, AST* cont); + void lift(CEnv& cenv); + CValue compile(CEnv& cenv); +}; + +/// Conditional special form, e.g. "(if cond thenexp elseexp)" +struct AIf : public ACall { + AIf(const SExp& e, const ATuple& t) : ACall(e, t) {} + AIf(Cursor c, AST* ast, va_list args) : ACall(c, ast, args) {} + void constrain(TEnv& tenv, Constraints& c) const; + AST* cps(TEnv& tenv, AST* cont); + CValue compile(CEnv& cenv); +}; + +/// Primitive (builtin arithmetic function), e.g. "(+ 2 3)" +struct APrimitive : public ACall { + APrimitive(const SExp& e, const ATuple& t) : ACall(e, t) {} + bool value() const { + for (size_t i = 1; i < size(); ++i) + if (!at(i)->value()) + return false;; + return true; + } + void constrain(TEnv& tenv, Constraints& c) const; + AST* cps(TEnv& tenv, AST* cont); + CValue compile(CEnv& cenv); +}; + + +/*************************************************************************** + * Parser: S-Expressions (SExp) -> AST Nodes (AST) * + ***************************************************************************/ + +/// Parse Time Environment (really just a symbol table) +struct PEnv : private map { + PEnv() : symID(0) {} + typedef AST* (*PF)(PEnv&, const SExp&, void*); ///< Parse Function + typedef SExp (*MF)(PEnv&, const SExp&); ///< Macro Function + struct Handler { Handler(PF f, void* a=0) : func(f), arg(a) {} PF func; void* arg; }; + map aHandlers; ///< Atom parse functions + map lHandlers; ///< List parse functions + map macros; ///< Macro functions + void reg(bool list, const string& s, const Handler& h) { + (list ? lHandlers : aHandlers).insert(make_pair(sym(s)->str(), h)); + } + const Handler* handler(bool list, const string& s) const { + const map& handlers = list ? lHandlers : aHandlers; + map::const_iterator i = handlers.find(s); + return (i != handlers.end()) ? &i->second : NULL; + } + void defmac(const string& s, const MF f) { + macros.insert(make_pair(s, f)); + } + MF mac(const string& s) const { + map::const_iterator i = macros.find(s); + return (i != macros.end()) ? i->second : NULL; + } + string gensymstr(const char* s="_") { return (format("%s%d") % s % symID++).str(); } + ASymbol* gensym(const char* s="_") { return sym(gensymstr(s)); } + ASymbol* sym(const string& s, Cursor c=Cursor()) { + const const_iterator i = find(s); + if (i != end()) { + return i->second; + } else { + ASymbol* sym = new ASymbol(s, c); + insert(make_pair(s, sym)); + return sym; + } + } + ATuple parseTuple(const SExp& e) { + ATuple ret(e.loc, vector(e.size())); + size_t n = 0; + FOREACH(SExp::const_iterator, i, e) + ret[n++] = parse(*i); + return ret; + } + AST* parse(const SExp& exp) { + if (exp.type == SExp::LIST) { + if (exp.empty()) throw Error(exp.loc, "call to empty list"); + if (exp.front().type == SExp::ATOM) { + MF mf = mac(exp.front().atom); + SExp expanded = (mf ? mf(*this, exp) : exp); + + const PEnv::Handler* h = handler(true, expanded.front().atom); + if (h) + return h->func(*this, expanded, h->arg); + } + return new ACall(exp, parseTuple(exp)); // Parse as regular call + } else if (isdigit(exp.atom[0])) { + if (exp.atom.find('.') == string::npos) + return new ALiteral(strtol(exp.atom.c_str(), NULL, 10), exp.loc); + else + return new ALiteral(strtod(exp.atom.c_str(), NULL), exp.loc); + } else { + const PEnv::Handler* h = handler(false, exp.atom); + if (h) + return h->func(*this, exp, h->arg); + } + return sym(exp.atom, exp.loc); + } + unsigned symID; +}; + + +/*************************************************************************** + * Typing * + ***************************************************************************/ + +struct Constraint : public pair { + Constraint(AType* a, AType* b, Cursor c) : pair(a, b), loc(c) {} + Cursor loc; +}; + +struct Constraints : public list { + void constrain(TEnv& tenv, const AST* o, AType* t); +}; + +inline ostream& operator<<(ostream& out, const Constraints& c) { + for (Constraints::const_iterator i = c.begin(); i != c.end(); ++i) + out << i->first << " : " << i->second << endl; + return out; +} + +/// Type-Time Environment +struct TEnv : public Env< const ASymbol*, pair > { + TEnv(PEnv& p) : penv(p), varID(1) {} + AType* fresh(const ASymbol* sym) { + assert(sym); + AType* ret = new AType(sym->loc, varID++, LAddr()); + def(sym, make_pair((AST*)NULL, ret)); + return ret; + } + AType* var(const AST* ast=0) { + const ASymbol* sym = ast->to(); + if (sym) + return deref(lookup(sym)).second; + + Vars::iterator v = vars.find(ast); + if (v != vars.end()) + return v->second; + + AType* ret = new AType(ast ? ast->loc : Cursor(), varID++, LAddr()); + if (ast) + vars[ast] = ret; + + return ret; + } + AType* named(const string& name) { + return ref(penv.sym(name))->second; + } + AST* resolve(AST* ast) { + ASymbol* sym = ast->to(); + return (sym && sym->addr) ? ref(sym)->first : ast; + } + + static Subst unify(const Constraints& c); + + typedef map Vars; + typedef map GenericTypes; + Vars vars; + GenericTypes genericTypes; + PEnv& penv; + unsigned varID; +}; + + +/*************************************************************************** + * Code Generation * + ***************************************************************************/ + +struct Engine { + virtual CFunction startFunction(CEnv& cenv, const std::string& name, + const AType* retT, const ATuple& argsT, + const vector argNames=vector()) = 0; + + virtual void finishFunction(CEnv& cenv, CFunction f, const AType* retT, CValue ret) = 0; + virtual void eraseFunction(CEnv& cenv, CFunction f) = 0; + virtual void writeModule(CEnv& cenv, std::ostream& os) = 0; + + virtual const string call(CEnv& cenv, CFunction f, AType* retT) = 0; +}; + +Engine* tuplr_new_engine(); +void tuplr_free_engine(Engine* engine); + +/// Compile-Time Environment +struct CEnv { + CEnv(PEnv& p, TEnv& t, Engine* e, ostream& os=std::cout, ostream& es=std::cerr) + : out(os), err(es), penv(p), tenv(t), _engine(e) + {} + + ~CEnv() { Object::pool.collect(GC::Roots()); } + + typedef Env Vals; + + Engine* engine() { return _engine; } + void push() { tenv.push(); vals.push(); } + void pop() { tenv.pop(); vals.pop(); } + void precompile(AST* obj, CValue value) { vals.def(obj, value); } + CValue compile(AST* obj) { + CValue* v = vals.ref(obj); + return (v && *v) ? *v : vals.def(obj, obj->compile(*this)); + } + void lock(AST* ast) { Object::pool.addRoot(ast); Object::pool.addRoot(type(ast)); } + AType* type(AST* ast, const Subst& subst = Subst()) const { + ASymbol* sym = ast->to(); + if (sym) + return sym->addr ? tenv.deref(sym->addr).second : NULL; + return tsubst.apply(subst.apply(tenv.vars[ast]))->to(); + } + void def(ASymbol* sym, AST* c, AType* t, CValue v) { + tenv.def(sym, make_pair(c, t)); + vals.def(sym, v); + } + + ostream& out; + ostream& err; + PEnv& penv; + TEnv& tenv; + Vals vals; + + Subst tsubst; + + map args; + +private: + Engine* _engine; +}; + + +/*************************************************************************** + * EVAL/REPL/MAIN * + ***************************************************************************/ + +void pprint(std::ostream& out, const AST* ast); +void initLang(PEnv& penv, TEnv& tenv); +int eval(CEnv& cenv, const string& name, istream& is); +int repl(CEnv& cenv); + +#endif // TUPLR_HPP + -- cgit v1.2.1