6 files changed, 125 insertions, 82 deletions
diff --git a/serd/serd.h b/serd/serd.h
index 351b8b8b..1e7da13c 100644
--- a/serd/serd.h
+++ b/serd/serd.h
@@ -113,28 +113,26 @@ typedef enum {
 
    This is more precise than the type of an abstract RDF node.  An abstract
    node is either a resource, literal, or blank.  In syntax there are two ways
-   to refer to both a resource (by URI or CURIE) and a blank (by ID or
-   anonymously).
+   to refer to a resource (by URI or CURIE) and two ways to refer to a blank
+   (by ID or anonymously).
 
-   Serd represents all nodes as an unquoted UTF-8 string "value" associated
-   with a @ref SerdType, which is precise enough to preserve the syntactic
-   information required for streaming abbreviation.  A non-abbreviating sink
-   may simply consider @ref SERD_ANON_BEGIN and @ref SERD_ANON equivalent to
-   @ref SERD_BLANK_ID.
+   Serd represents a node as a string "value" associated with a @ref SerdType,
+   which is precise enough to support streaming abbreviation.  If abbreviation
+   is not applicable, @ref SERD_ANON_BEGIN and @ref SERD_ANON may simply be
+   considered equivalent to @ref SERD_BLANK_ID.
 */
 typedef enum {
 	/**
 	   The type of a nonexistent node.
 
-	   This type is occasionally useful, but is never emitted by the reader.
+	   This type is useful as a sentinel, but is never emitted by the reader.
 	*/
 	SERD_NOTHING = 0,
 
 	/**
 	   Literal value.
 
-	   A literal optionally has either an associated language, or an associated
-	   datatype (not both).
+	   A literal optionally has either a language, or a datatype (not both).
 	*/
 	SERD_LITERAL = 1,
 
@@ -142,8 +140,8 @@ typedef enum {
 	   URI (absolute or relative).
 
 	   Value is an unquoted URI string, which is either a relative reference
-	   with respect to the current base URI, or an absolute URI.  A URI is an
-	   ID with universal scope.
+	   with respect to the current base URI (e.g. "foo/bar"), or an absolute
+	   URI (e.g. "http://example.org/foo").
 	   @see <a href="http://tools.ietf.org/html/rfc3986">RFC3986</a>.
 	*/
 	SERD_URI = 2,
@@ -160,8 +158,8 @@ typedef enum {
 	/**
 	   A blank node ID.
 
-	   Value is a blank node ID, e.g. "id3", which is valid only within this
-	   serialisation.
+	   Value is a blank node ID, e.g. "id3", which is meaningful only within
+	   this serialisation.
 	   @see <a href="http://www.w3.org/TeamSubmission/turtle#nodeID">Turtle
 	   <tt>nodeID</tt></a>
 	*/
@@ -185,12 +183,21 @@ typedef enum {
 } SerdType;
 
 /**
+   Flags indicating certain string properties relevant to serialisation.
+*/
+typedef enum {
+	SERD_HAS_NEWLINE = 1,      /**< Contains line breaks ('\\n' or '\\r') */
+	SERD_HAS_QUOTE   = 1 << 1  /**< Contains quotes ('"') */
+} SerdNodeFlag;
+
+/**
    A syntactic RDF node.
 */
 typedef struct {
-	const uint8_t* buf;      /**< Buffer */
+	const uint8_t* buf;      /**< Value string */
 	size_t         n_bytes;  /**< Size in bytes (including null) */
 	size_t         n_chars;  /**< Length in characters */
+	uint32_t       flags;    /**< Bitwise OR of SerdNodeFlag values */
 	SerdType       type;     /**< Node type */
 } SerdNode;
 
@@ -223,8 +230,8 @@ typedef struct {
 
    The style of the writer output can be controlled by ORing together
    values from this enumeration.  Note that some options are only supported
-   for some syntaxes (e.g. NTriples does not support any options except
-   @ref SERD_STYLE_ASCII, which is required).
+   for some syntaxes (e.g. NTriples does not support abbreviation and is
+   always ASCII).
 */
 typedef enum {
 	SERD_STYLE_ABBREVIATED = 1,       /**< Abbreviate triples when possible. */
@@ -279,7 +286,7 @@ serd_uri_serialise(const SerdURI* uri, SerdSink sink, void* stream);
    @{
 */
 
-static const SerdNode SERD_NODE_NULL = { 0, 0, 0, SERD_NOTHING };
+static const SerdNode SERD_NODE_NULL = { 0, 0, 0, 0, SERD_NOTHING };
 
 /**
    Make a (shallow) node from @a str.
diff --git a/src/env.c b/src/env.c
index 6671d683..0d9bc128 100644
--- a/src/env.c
+++ b/src/env.c
@@ -206,6 +206,7 @@ serd_env_expand_node(const SerdEnv*  env,
 		SerdNode ret = { NULL,
 		                 prefix.len + suffix.len + 1,
 		                 prefix.len + suffix.len,  // FIXME: UTF-8
+		                 0,
 		                 SERD_URI };
 		ret.buf = malloc(ret.n_bytes);
 		snprintf((char*)ret.buf, ret.n_bytes, "%s%s", prefix.buf, suffix.buf);
diff --git a/src/node.c b/src/node.c
index b6288ebc..edd0db86 100644
--- a/src/node.c
+++ b/src/node.c
@@ -23,9 +23,10 @@ SERD_API
 SerdNode
 serd_node_from_string(SerdType type, const uint8_t* buf)
 {
-	size_t buf_n_bytes;
-	const size_t buf_n_chars = serd_strlen(buf, &buf_n_bytes);
-	SerdNode ret = { buf, buf_n_bytes, buf_n_chars, type };
+	uint32_t flags;
+	size_t   buf_n_bytes;
+	const size_t buf_n_chars = serd_strlen(buf, &buf_n_bytes, &flags);
+	SerdNode ret = { buf, buf_n_bytes, buf_n_chars, flags, type };
 	return ret;
 }
 
@@ -118,7 +119,7 @@ serd_node_new_uri(const SerdURI* uri, const SerdURI* base, SerdURI* out)
 	const size_t len = serd_uri_string_length(&abs_uri);
 	uint8_t*     buf = malloc(len + 1);
 
-	SerdNode node = { buf, len + 1, len, SERD_URI };  // FIXME: UTF-8
+	SerdNode node = { buf, len + 1, len, 0, SERD_URI };  // FIXME: UTF-8
 
 	uint8_t*     ptr        = buf;
 	const size_t actual_len = serd_uri_serialise(&abs_uri, string_sink, &ptr);
diff --git a/src/reader.c b/src/reader.c
index e4e91f4c..b0ad02e5 100644
--- a/src/reader.c
+++ b/src/reader.c
@@ -269,9 +269,9 @@ public_node_from_ref(SerdReader* reader, SerdType type, Ref ref)
 	if (!ref) {
 		return SERD_NODE_NULL;
 	}
-	const SerdString* str    = deref(reader, ref);
-	const SerdNode    public = { str->buf, str->n_bytes, str->n_chars, type };
-	return public;
+	const SerdString* str  = deref(reader, ref);
+	const SerdNode    node = { str->buf, str->n_bytes, str->n_chars, 0, type };
+	return node;
 }
 
 static inline SerdNode
@@ -287,16 +287,17 @@ public_node(SerdReader* reader, const Node* private)
 static inline bool
 emit_statement(SerdReader* reader,
                const Node* g, const Node* s, const Node* p, const Node* o,
-               const Node* d, Ref l)
+               const Node* d, Ref l, uint32_t f)
 {
 	assert(s && p && o);
 	assert(s->value && p->value && o->value);
 	const SerdNode graph     = public_node(reader, g);
 	const SerdNode subject   = public_node(reader, s);
 	const SerdNode predicate = public_node(reader, p);
-	const SerdNode object    = public_node(reader, o);
+	SerdNode       object    = public_node(reader, o);
 	const SerdNode datatype  = public_node(reader, d);
 	const SerdNode lang      = public_node_from_ref(reader, SERD_LITERAL, l);
+	object.flags = f;
 	return !reader->statement_sink(reader->handle,
 	                               &graph,
 	                               &subject,
@@ -389,7 +390,7 @@ read_character_escape(SerdReader* reader, Ref dest)
 }
 
 static inline bool
-read_echaracter_escape(SerdReader* reader, Ref dest)
+read_echaracter_escape(SerdReader* reader, Ref dest, uint32_t* flags)
 {
 	switch (peek_byte(reader)) {
 	case 't':
@@ -397,10 +398,12 @@ read_echaracter_escape(SerdReader* reader, Ref dest)
 		push_byte(reader, dest, '\t');
 		return true;
 	case 'n':
+		*flags |= SERD_HAS_NEWLINE;
 		eat_byte(reader, 'n');
 		push_byte(reader, dest, '\n');
 		return true;
 	case 'r':
+		*flags |= SERD_HAS_NEWLINE;
 		eat_byte(reader, 'r');
 		push_byte(reader, dest, '\r');
 		return true;
@@ -410,26 +413,28 @@ read_echaracter_escape(SerdReader* reader, Ref dest)
 }
 
 static inline bool
-read_scharacter_escape(SerdReader* reader, Ref dest)
+read_scharacter_escape(SerdReader* reader, Ref dest, uint32_t* flags)
 {
 	switch (peek_byte(reader)) {
 	case '"':
+		*flags |= SERD_HAS_QUOTE;
 		push_byte(reader, dest, eat_byte(reader, '"'));
 		return true;
 	default:
-		return read_echaracter_escape(reader, dest);
+		return read_echaracter_escape(reader, dest, flags);
 	}
 }
 
 static inline bool
 read_ucharacter_escape(SerdReader* reader, Ref dest)
 {
+	uint32_t flags = 0;
 	switch (peek_byte(reader)) {
 	case '>':
 		push_byte(reader, dest, eat_byte(reader, '>'));
 		return true;
 	default:
-		return read_echaracter_escape(reader, dest);
+		return read_echaracter_escape(reader, dest, &flags);
 	}
 }
 
@@ -477,11 +482,12 @@ read_character(SerdReader* reader, Ref dest)
 static inline SerdStatus
 read_echaracter(SerdReader* reader, Ref dest)
 {
-	uint8_t c = peek_byte(reader);
+	uint32_t flags = 0;
+	uint8_t  c     = peek_byte(reader);
 	switch (c) {
 	case '\\':
 		eat_byte(reader, '\\');
-		if (read_echaracter_escape(reader, peek_byte(reader))) {
+		if (read_echaracter_escape(reader, peek_byte(reader), &flags)) {
 			return SERD_SUCCESS;
 		} else {
 			error(reader, "illegal escape `\\%c'\n", peek_byte(reader));
@@ -494,31 +500,34 @@ read_echaracter(SerdReader* reader, Ref dest)
 
 // [43] lcharacter ::= echaracter | '\"' | #x9 | #xA | #xD
 static inline SerdStatus
-read_lcharacter(SerdReader* reader, Ref dest)
+read_lcharacter(SerdReader* reader, Ref dest, uint32_t* flags)
 {
 	const uint8_t c = peek_byte(reader);
 	uint8_t       pre[3];
 	switch (c) {
 	case '"':
-		peek_string(reader, pre, 3);
-		if (pre[1] == '\"' && pre[2] == '\"') {
+		peek_string(reader, pre, 4);
+		if (pre[1] == '\"' && pre[2] == '\"' && pre[3] != '\"') {
 			eat_byte(reader, '\"');
 			eat_byte(reader, '\"');
 			eat_byte(reader, '\"');
 			return SERD_FAILURE;
 		} else {
+			*flags |= SERD_HAS_QUOTE;
 			push_byte(reader, dest, eat_byte(reader, '"'));
 			return SERD_SUCCESS;
 		}
 	case '\\':
 		eat_byte(reader, '\\');
-		if (read_scharacter_escape(reader, dest)) {
+		if (read_scharacter_escape(reader, dest, flags)) {
 			return SERD_SUCCESS;
 		} else {
 			error(reader, "illegal escape `\\%c'\n", peek_byte(reader));
 			return SERD_ERR_BAD_SYNTAX;
 		}
-	case 0x9: case 0xA: case 0xD:
+	case 0xA: case 0xD:
+		*flags |= SERD_HAS_NEWLINE;
+	case 0x9:
 		push_byte(reader, dest, eat_byte(reader, c));
 		return SERD_SUCCESS;
 	default:
@@ -528,13 +537,13 @@ read_lcharacter(SerdReader* reader, Ref dest)
 
 // [42] scharacter ::= ( echaracter - #x22 ) | '\"'
 static inline SerdStatus
-read_scharacter(SerdReader* reader, Ref dest)
+read_scharacter(SerdReader* reader, Ref dest, uint32_t* flags)
 {
 	uint8_t c = peek_byte(reader);
 	switch (c) {
 	case '\\':
 		eat_byte(reader, '\\');
-		if (read_scharacter_escape(reader, dest)) {
+		if (read_scharacter_escape(reader, dest, flags)) {
 			return SERD_SUCCESS;
 		} else {
 			error(reader, "illegal escape `\\%c'\n", peek_byte(reader));
@@ -612,12 +621,12 @@ read_ws_plus(SerdReader* reader)
 
 // [37] longString ::= #x22 #x22 #x22 lcharacter* #x22 #x22 #x22
 static Ref
-read_longString(SerdReader* reader)
+read_longString(SerdReader* reader, uint32_t* flags)
 {
 	eat_string(reader, "\"\"\"", 3);
 	Ref        str = push_string(reader, "", 1);
 	SerdStatus st;
-	while (!(st = read_lcharacter(reader, str))) {}
+	while (!(st = read_lcharacter(reader, str, flags))) {}
 	if (st < SERD_ERR_UNKNOWN) {
 		return str;
 	}
@@ -627,12 +636,12 @@ read_longString(SerdReader* reader)
 
 // [36] string ::= #x22 scharacter* #x22
 static Ref
-read_string(SerdReader* reader)
+read_string(SerdReader* reader, uint32_t* flags)
 {
 	eat_byte(reader, '\"');
 	Ref        str = push_string(reader, "", 1);
 	SerdStatus st;
-	while (!(st = read_scharacter(reader, str))) {}
+	while (!(st = read_scharacter(reader, str, flags))) {}
 	if (st < SERD_ERR_UNKNOWN) {
 		eat_byte(reader, '\"');
 		return str;
@@ -643,7 +652,7 @@ read_string(SerdReader* reader)
 
 // [35] quotedString ::= string | longString
 static Ref
-read_quotedString(SerdReader* reader)
+read_quotedString(SerdReader* reader, uint32_t* flags)
 {
 	uint8_t pre[3];
 	peek_string(reader, pre, 3);
@@ -651,11 +660,11 @@ read_quotedString(SerdReader* reader)
 	switch (pre[1]) {
 	case '\"':
 		if (pre[2] == '\"')
-			return read_longString(reader);
+			return read_longString(reader, flags);
 		else
-			return read_string(reader);
+			return read_string(reader, flags);
 	default:
-		return read_string(reader);
+		return read_string(reader, flags);
 	}
 }
 
@@ -893,14 +902,15 @@ read_resource(SerdReader* reader, Node* dest)
 // [14] literal ::= quotedString ( '@' language )? | datatypeString
 //    | integer | double | decimal | boolean
 static bool
-read_literal(SerdReader* reader, Node* dest, Node* datatype, Ref* lang)
+read_literal(SerdReader* reader, Node* dest,
+             Node* datatype, Ref* lang, uint32_t* flags)
 {
 	Ref           str = 0;
 	const uint8_t c   = peek_byte(reader);
 	if (c == '-' || c == '+' || c == '.' || is_digit(c)) {
 		return read_number(reader, dest, datatype);
 	} else if (c == '\"') {
-		str = read_quotedString(reader);
+		str = read_quotedString(reader, flags);
 		if (!str) {
 			return false;
 		}
@@ -994,7 +1004,7 @@ read_blank(SerdReader* reader, ReadContext ctx, Node* dest)
 			if (ctx.subject) {
 				TRY_RET(emit_statement(reader,
 				                       ctx.graph, ctx.subject, ctx.predicate,
-				                       dest, NULL, 0));
+				                       dest, NULL, 0, 0));
 			}
 			return true;
 		}
@@ -1002,7 +1012,7 @@ read_blank(SerdReader* reader, ReadContext ctx, Node* dest)
 		if (ctx.subject) {
 			TRY_RET(emit_statement(reader,
 			                       ctx.graph, ctx.subject, ctx.predicate,
-			                       dest, NULL, 0));
+			                       dest, NULL, 0, 0));
 			dest->type = SERD_ANON;
 		}
 		ctx.subject = dest;
@@ -1019,7 +1029,7 @@ read_blank(SerdReader* reader, ReadContext ctx, Node* dest)
 			if (ctx.subject) {
 				TRY_RET(emit_statement(reader,
 				                       ctx.graph, ctx.subject, ctx.predicate,
-				                       dest, NULL, 0));
+				                       dest, NULL, 0, 0));
 			}
 			return true;
 		}
@@ -1060,6 +1070,7 @@ read_object(SerdReader* reader, ReadContext ctx)
 	Node          o        = INTERNAL_NODE_NULL;
 	Node          datatype = INTERNAL_NODE_NULL;
 	Ref           lang     = 0;
+	uint32_t      flags    = 0;
 	const uint8_t c        = peek_byte(reader);
 	switch (c) {
 	case '\0':
@@ -1077,10 +1088,10 @@ read_object(SerdReader* reader, ReadContext ctx)
 	case '\"': case '+': case '-':
 	case '0': case '1': case '2': case '3': case '4':
 	case '5': case '6': case '7': case '8': case '9':
-		TRY_THROW(ret = read_literal(reader, &o, &datatype, &lang));
+		TRY_THROW(ret = read_literal(reader, &o, &datatype, &lang, &flags));
 		break;
 	case '.':
-		TRY_THROW(ret = read_literal(reader, &o, &datatype, &lang));
+		TRY_THROW(ret = read_literal(reader, &o, &datatype, &lang, &flags));
 		break;
 	default:
 		/* Either a boolean literal, or a qname.
@@ -1108,7 +1119,7 @@ read_object(SerdReader* reader, ReadContext ctx)
 		assert(o.value);
 		ret = emit_statement(reader,
 		                     ctx.graph, ctx.subject, ctx.predicate,
-		                     &o, &datatype, lang);
+		                     &o, &datatype, lang, flags);
 	}
 
 except:
@@ -1188,14 +1199,14 @@ read_collection_rec(SerdReader* reader, ReadContext ctx)
 		TRY_RET(emit_statement(reader, NULL,
 		                       ctx.subject,
 		                       &reader->rdf_rest,
-		                       &reader->rdf_nil, NULL, 0));
+		                       &reader->rdf_nil, NULL, 0, 0));
 		return false;
 	} else {
 		const Node rest = make_node(SERD_BLANK_ID, blank_id(reader));
 		TRY_RET(emit_statement(reader, ctx.graph,
 		                       ctx.subject,
 		                       &reader->rdf_rest,
-		                       &rest, NULL, 0));
+		                       &rest, NULL, 0, 0));
 		ctx.subject = &rest;
 		ctx.predicate = &reader->rdf_first;
 		if (read_object(reader, ctx)) {
diff --git a/src/serd_internal.h b/src/serd_internal.h
index dd57af1e..e573a806 100644
--- a/src/serd_internal.h
+++ b/src/serd_internal.h
@@ -105,7 +105,7 @@ is_digit(const uint8_t c)
    @param n_bytes (Output) Set to the size of @a str in bytes (incl. NULL).
 */
 static inline size_t
-serd_strlen(const uint8_t* str, size_t* n_bytes)
+serd_strlen(const uint8_t* str, size_t* n_bytes, uint32_t* flags)
 {
 	size_t n_chars = 0;
 	size_t i       = 0;
@@ -113,6 +113,14 @@ serd_strlen(const uint8_t* str, size_t* n_bytes)
 		if ((str[i] & 0xC0) != 0x80) {
 			// Does not start with `10', start of a new character
 			++n_chars;
+			switch (str[i]) {
+			case '\r':
+			case '\n':
+				*flags |= SERD_HAS_NEWLINE;
+				break;
+			case '"':
+				*flags |= SERD_HAS_QUOTE;
+			}
 		}
 	}
 	if (n_bytes) {
diff --git a/src/writer.c b/src/writer.c
index 4e4ee2eb..a9cdf6ac 100644
--- a/src/writer.c
+++ b/src/writer.c
@@ -31,7 +31,7 @@ typedef struct {
 } WriteContext;
 
 static const WriteContext WRITE_CONTEXT_NULL = {
-	{ 0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}
+	{ 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0}, {0, 0, 0, 0, 0}
 };
 
 struct SerdWriterImpl {
@@ -47,9 +47,9 @@ struct SerdWriterImpl {
 };
 
 typedef enum {
-	WRITE_NORMAL,
 	WRITE_URI,
-	WRITE_STRING
+	WRITE_STRING,
+	WRITE_LONG_STRING
 } TextContext;
 
 static inline WriteContext*
@@ -67,23 +67,29 @@ write_text(SerdWriter* writer, TextContext ctx,
 	char escape[11] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
 	for (size_t i = 0; i < n_bytes;) {
 		uint8_t in = utf8[i++];
-		switch (in) {
-		case '\\': writer->sink("\\\\", 2, writer->stream); continue;
-		case '\n': writer->sink("\\n", 2, writer->stream);  continue;
-		case '\r': writer->sink("\\r", 2, writer->stream);  continue;
-		case '\t': writer->sink("\\t", 2, writer->stream);  continue;
-		case '"':
-			if (terminator == '"') {
-				writer->sink("\\\"", 2, writer->stream);
-				continue;
-			}  // else fall-through
-		default: break;
-		}
+		if (ctx == WRITE_LONG_STRING) {
+			if (in == '\\') {
+				writer->sink("\\\\", 2, writer->stream); continue;
+			}
+		} else {
+			switch (in) {
+			case '\\': writer->sink("\\\\", 2, writer->stream); continue;
+			case '\n': writer->sink("\\n", 2, writer->stream);  continue;
+			case '\r': writer->sink("\\r", 2, writer->stream);  continue;
+			case '\t': writer->sink("\\t", 2, writer->stream);  continue;
+			case '"':
+				if (terminator == '"') {
+					writer->sink("\\\"", 2, writer->stream);
+					continue;
+				}  // else fall-through
+			default: break;
+			}
 
-		if (in == terminator) {
-			snprintf(escape, 7, "\\u%04X", terminator);
-			writer->sink(escape, 6, writer->stream);
-			continue;
+			if (in == terminator) {
+				snprintf(escape, 7, "\\u%04X", terminator);
+				writer->sink(escape, 6, writer->stream);
+				continue;
+			}
 		}
 
 		uint32_t c    = 0;
@@ -109,7 +115,8 @@ write_text(SerdWriter* writer, TextContext ctx,
 			return false;
 		}
 
-		if (ctx == WRITE_STRING && !(writer->style & SERD_STYLE_ASCII)) {
+		if ((ctx == WRITE_STRING || ctx == WRITE_LONG_STRING)
+		    && !(writer->style & SERD_STYLE_ASCII)) {
 			// Write UTF-8 character directly to UTF-8 output
 			// TODO: Scan to next escape and write entire range at once
 			writer->sink(utf8 + i - 1, size, writer->stream);
@@ -228,9 +235,17 @@ write_node(SerdWriter*     writer,
 				break;
 			}
 		}
-		writer->sink("\"", 1, writer->stream);
-		write_text(writer, WRITE_STRING, node->buf, node->n_bytes - 1, '"');
-		writer->sink("\"", 1, writer->stream);
+		if (writer->syntax != SERD_NTRIPLES
+		    && ((node->flags & SERD_HAS_NEWLINE)
+		        || (node->flags & SERD_HAS_QUOTE))) {
+			writer->sink("\"\"\"", 3, writer->stream);
+			write_text(writer, WRITE_LONG_STRING, node->buf, node->n_bytes - 1, '\0');
+			writer->sink("\"\"\"", 3, writer->stream);
+		} else {
+			writer->sink("\"", 1, writer->stream);
+			write_text(writer, WRITE_STRING, node->buf, node->n_bytes - 1, '"');
+			writer->sink("\"", 1, writer->stream);
+		}
 		if (lang && lang->buf) {
 			writer->sink("@", 1, writer->stream);
 			writer->sink(lang->buf, lang->n_bytes - 1, writer->stream);