aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorDavid Robillard <d@drobilla.net>2011-01-21 22:58:53 +0000
committerDavid Robillard <d@drobilla.net>2011-01-21 22:58:53 +0000
commit187a60b1e0e80e44b5be52a7054585f0be35ca9f (patch)
tree5a9568f5c802d2796d82d13f294ecf391767731f /src
parentfbf024e07f80f547677f9d39e64dd4bdd4be99f3 (diff)
downloadserd-187a60b1e0e80e44b5be52a7054585f0be35ca9f.tar.gz
serd-187a60b1e0e80e44b5be52a7054585f0be35ca9f.tar.bz2
serd-187a60b1e0e80e44b5be52a7054585f0be35ca9f.zip
Full rount-trip test coverage for Turtle reading and writing.
Fix writing UTF-8 URIs. Improved test coverage. git-svn-id: http://svn.drobilla.net/serd/trunk@28 490d8e77-9747-427b-9fa3-0b8f29cee8a0
Diffstat (limited to 'src')
-rw-r--r--src/reader.c36
-rw-r--r--src/serdi.c3
-rw-r--r--src/writer.c26
3 files changed, 51 insertions, 14 deletions
diff --git a/src/reader.c b/src/reader.c
index bfdfa8ba..a769fe55 100644
--- a/src/reader.c
+++ b/src/reader.c
@@ -449,7 +449,7 @@ read_character(SerdReader parser, Ref dest)
case '\0':
error(parser, "unexpected end of file\n", peek_byte(parser));
return SERD_ERROR;
- case '\\':
+ case '\\': // 0x5C
eat_byte(parser, '\\');
if (read_character_escape(parser, dest)) {
return SERD_SUCCESS;
@@ -458,11 +458,32 @@ read_character(SerdReader parser, Ref dest)
return SERD_ERROR;
}
default:
- if (in_range(c, 0x20, 0x5B) || in_range(c, 0x5D, 0x10FFF)) {
+ if (c < 0x20) { // ASCII control character
+ error(parser, "unexpected control character\n");
+ return SERD_ERROR;
+ } else if (c <= 0x7E) { // Printable ASCII
push_byte(parser, dest, eat_byte(parser, c));
return SERD_SUCCESS;
- } else {
- return SERD_FAILURE;
+ } else { // Wide UTF-8 character
+ unsigned size = 1;
+ if ((c & 0xE0) == 0xC0) { // Starts with `110'
+ size = 2;
+ } else if ((c & 0xF0) == 0xE0) { // Starts with `1110'
+ size = 3;
+ } else if ((c & 0xF8) == 0xF0) { // Starts with `11110'
+ size = 4;
+ } else if ((c & 0xFC) == 0xF8) { // Starts with `111110'
+ size = 5;
+ } else if ((c & 0xFE) == 0xFC) { // Starts with `1111110'
+ size = 6;
+ } else {
+ error(parser, "invalid character\n");
+ return SERD_ERROR;
+ }
+ for (unsigned i = 0; i < size; ++i) {
+ push_byte(parser, dest, eat_byte(parser, peek_byte(parser)));
+ }
+ return SERD_SUCCESS;
}
}
}
@@ -663,7 +684,8 @@ read_relativeURI(SerdReader parser)
if (st != SERD_ERROR) {
return str;
}
- return st;
+ pop_string(parser, str);
+ return 0;
}
// [30] nameStartChar ::= [A-Z] | "_" | [a-z]
@@ -769,11 +791,9 @@ read_uriref(SerdReader parser)
{
TRY_RET(eat_byte(parser, '<'));
Ref const str = read_relativeURI(parser);
- if (str) {
- TRY_THROW(eat_byte(parser, '>'));
+ if (str && eat_byte(parser, '>')) {
return str;
}
-except:
pop_string(parser, str);
return 0;
}
diff --git a/src/serdi.c b/src/serdi.c
index b14df3d3..00e687de 100644
--- a/src/serdi.c
+++ b/src/serdi.c
@@ -46,7 +46,6 @@ event_base(void* handle,
SerdURI abs_base_uri;
if (!serd_uri_resolve(&uri, &state->base_uri, &abs_base_uri)) {
fprintf(stderr, "error: failed to resolve new base URI\n");
- assert(false);
return false;
}
base_uri_str = serd_string_new_from_uri(&abs_base_uri, &base_uri);
@@ -173,7 +172,7 @@ main(int argc, char** argv)
if (strncmp((const char*)in_filename, "file:", 5)) {
fprintf(stderr, "unsupported URI scheme `%s'\n", in_filename);
return 1;
- } else if (!strncmp((const char*)in_filename, "file:///", 7)) {
+ } else if (!strncmp((const char*)in_filename, "file://", 7)) {
in_filename += 7;
} else {
in_filename += 5;
diff --git a/src/writer.c b/src/writer.c
index eb6310a7..17179c60 100644
--- a/src/writer.c
+++ b/src/writer.c
@@ -115,10 +115,10 @@ write_text(SerdWriter writer, WriteContext ctx,
return false;
}
- if (!(writer->style & SERD_STYLE_ASCII)) {
- // Write UTF-8 input directly to UTF-8 output
- writer->sink(utf8, n_bytes, writer->stream);
- i += n_bytes - 1;
+ if (ctx == WRITE_STRING && !(writer->style & SERD_STYLE_ASCII)) {
+ // Write UTF-8 character directly to UTF-8 output
+ writer->sink(utf8 + i - 1, size, writer->stream);
+ i += size - 1;
continue;
}
@@ -370,6 +370,17 @@ serd_writer_set_base_uri(SerdWriter writer,
const SerdURI* uri)
{
writer->base_uri = *uri;
+ if (writer->syntax != SERD_NTRIPLES) {
+ if (writer->prev_g || writer->prev_s) {
+ writer->sink(" .\n\n", 4, writer->stream);
+ writer->prev_g = writer->prev_s =
+ writer->prev_p = writer->prev_o = 0;
+ }
+ writer->sink("@base ", 6, writer->stream);
+ writer->sink(" <", 2, writer->stream);
+ serd_uri_serialise(uri, writer->sink, writer->stream);
+ writer->sink("> .\n", 4, writer->stream);
+ }
}
SERD_API
@@ -379,6 +390,13 @@ serd_writer_set_prefix(SerdWriter writer,
const SerdString* uri)
{
if (writer->syntax != SERD_NTRIPLES) {
+ if (writer->prev_g || writer->prev_s) {
+ writer->sink(" .\n\n", 4, writer->stream);
+ writer->prev_g = 0;
+ writer->prev_s = 0;
+ writer->prev_p = 0;
+ writer->prev_o = 0;
+ }
writer->sink("@prefix ", 8, writer->stream);
writer->sink(name->buf, name->n_bytes - 1, writer->stream);
writer->sink(": <", 3, writer->stream);