From 187a60b1e0e80e44b5be52a7054585f0be35ca9f Mon Sep 17 00:00:00 2001 From: David Robillard Date: Fri, 21 Jan 2011 22:58:53 +0000 Subject: Full rount-trip test coverage for Turtle reading and writing. Fix writing UTF-8 URIs. Improved test coverage. git-svn-id: http://svn.drobilla.net/serd/trunk@28 490d8e77-9747-427b-9fa3-0b8f29cee8a0 --- src/reader.c | 36 ++++++++++++++++++++++++++++-------- src/serdi.c | 3 +-- src/writer.c | 26 ++++++++++++++++++++++---- wscript | 48 +++++++++++++++++++++++++++++++++--------------- 4 files changed, 84 insertions(+), 29 deletions(-) diff --git a/src/reader.c b/src/reader.c index bfdfa8ba..a769fe55 100644 --- a/src/reader.c +++ b/src/reader.c @@ -449,7 +449,7 @@ read_character(SerdReader parser, Ref dest) case '\0': error(parser, "unexpected end of file\n", peek_byte(parser)); return SERD_ERROR; - case '\\': + case '\\': // 0x5C eat_byte(parser, '\\'); if (read_character_escape(parser, dest)) { return SERD_SUCCESS; @@ -458,11 +458,32 @@ read_character(SerdReader parser, Ref dest) return SERD_ERROR; } default: - if (in_range(c, 0x20, 0x5B) || in_range(c, 0x5D, 0x10FFF)) { + if (c < 0x20) { // ASCII control character + error(parser, "unexpected control character\n"); + return SERD_ERROR; + } else if (c <= 0x7E) { // Printable ASCII push_byte(parser, dest, eat_byte(parser, c)); return SERD_SUCCESS; - } else { - return SERD_FAILURE; + } else { // Wide UTF-8 character + unsigned size = 1; + if ((c & 0xE0) == 0xC0) { // Starts with `110' + size = 2; + } else if ((c & 0xF0) == 0xE0) { // Starts with `1110' + size = 3; + } else if ((c & 0xF8) == 0xF0) { // Starts with `11110' + size = 4; + } else if ((c & 0xFC) == 0xF8) { // Starts with `111110' + size = 5; + } else if ((c & 0xFE) == 0xFC) { // Starts with `1111110' + size = 6; + } else { + error(parser, "invalid character\n"); + return SERD_ERROR; + } + for (unsigned i = 0; i < size; ++i) { + push_byte(parser, dest, eat_byte(parser, peek_byte(parser))); + } + return SERD_SUCCESS; } } } @@ -663,7 +684,8 @@ read_relativeURI(SerdReader parser) if (st != SERD_ERROR) { return str; } - return st; + pop_string(parser, str); + return 0; } // [30] nameStartChar ::= [A-Z] | "_" | [a-z] @@ -769,11 +791,9 @@ read_uriref(SerdReader parser) { TRY_RET(eat_byte(parser, '<')); Ref const str = read_relativeURI(parser); - if (str) { - TRY_THROW(eat_byte(parser, '>')); + if (str && eat_byte(parser, '>')) { return str; } -except: pop_string(parser, str); return 0; } diff --git a/src/serdi.c b/src/serdi.c index b14df3d3..00e687de 100644 --- a/src/serdi.c +++ b/src/serdi.c @@ -46,7 +46,6 @@ event_base(void* handle, SerdURI abs_base_uri; if (!serd_uri_resolve(&uri, &state->base_uri, &abs_base_uri)) { fprintf(stderr, "error: failed to resolve new base URI\n"); - assert(false); return false; } base_uri_str = serd_string_new_from_uri(&abs_base_uri, &base_uri); @@ -173,7 +172,7 @@ main(int argc, char** argv) if (strncmp((const char*)in_filename, "file:", 5)) { fprintf(stderr, "unsupported URI scheme `%s'\n", in_filename); return 1; - } else if (!strncmp((const char*)in_filename, "file:///", 7)) { + } else if (!strncmp((const char*)in_filename, "file://", 7)) { in_filename += 7; } else { in_filename += 5; diff --git a/src/writer.c b/src/writer.c index eb6310a7..17179c60 100644 --- a/src/writer.c +++ b/src/writer.c @@ -115,10 +115,10 @@ write_text(SerdWriter writer, WriteContext ctx, return false; } - if (!(writer->style & SERD_STYLE_ASCII)) { - // Write UTF-8 input directly to UTF-8 output - writer->sink(utf8, n_bytes, writer->stream); - i += n_bytes - 1; + if (ctx == WRITE_STRING && !(writer->style & SERD_STYLE_ASCII)) { + // Write UTF-8 character directly to UTF-8 output + writer->sink(utf8 + i - 1, size, writer->stream); + i += size - 1; continue; } @@ -370,6 +370,17 @@ serd_writer_set_base_uri(SerdWriter writer, const SerdURI* uri) { writer->base_uri = *uri; + if (writer->syntax != SERD_NTRIPLES) { + if (writer->prev_g || writer->prev_s) { + writer->sink(" .\n\n", 4, writer->stream); + writer->prev_g = writer->prev_s = + writer->prev_p = writer->prev_o = 0; + } + writer->sink("@base ", 6, writer->stream); + writer->sink(" <", 2, writer->stream); + serd_uri_serialise(uri, writer->sink, writer->stream); + writer->sink("> .\n", 4, writer->stream); + } } SERD_API @@ -379,6 +390,13 @@ serd_writer_set_prefix(SerdWriter writer, const SerdString* uri) { if (writer->syntax != SERD_NTRIPLES) { + if (writer->prev_g || writer->prev_s) { + writer->sink(" .\n\n", 4, writer->stream); + writer->prev_g = 0; + writer->prev_s = 0; + writer->prev_p = 0; + writer->prev_o = 0; + } writer->sink("@prefix ", 8, writer->stream); writer->sink(name->buf, name->n_bytes - 1, writer->stream); writer->sink(": <", 3, writer->stream); diff --git a/wscript b/wscript index e8f18366..e76c88a6 100644 --- a/wscript +++ b/wscript @@ -124,15 +124,19 @@ def test(ctx): autowaf.pre_test(ctx, APPNAME) - autowaf.run_tests(ctx, APPNAME, - ['./serdi_static > /dev/null', - './serdi_static ftp://example.org/unsupported.ttl > /dev/null'], - 1, name='serdi-fail') - autowaf.run_tests(ctx, APPNAME, ['./serdi_static file:../tests/manifest.ttl > /dev/null', + './serdi_static file://../tests/manifest.ttl > /dev/null', './serdi_static ../tests/UTF-8.ttl > /dev/null'], - 0, name='serdi-succeed') + 0, name='serdi-cmd-good') + + autowaf.run_tests(ctx, APPNAME, + ['./serdi_static > /dev/null', + './serdi_static ftp://example.org/unsupported.ttl > /dev/null', + './serdi_static -o > /dev/null', + './serdi_static -z > /dev/null', + './serdi_static -o illegal > /dev/null'], + 1, name='serdi-cmd-bad') commands = [] for test in good_tests: @@ -141,7 +145,7 @@ def test(ctx): autowaf.run_tests(ctx, APPNAME, commands, 0, name='good') - Logs.pprint('BOLD', '\nVerifying output') + Logs.pprint('BOLD', '\nVerifying turtle => ntriples') for test in good_tests: out_filename = test + '.out' if not os.access(out_filename, os.F_OK): @@ -149,7 +153,7 @@ def test(ctx): elif filecmp.cmp('../' + test.replace('.ttl', '.out'), test + '.out', False) != 1: - Logs.pprint('RED', 'FAIL: %s output is incorrect' % test) + Logs.pprint('RED', 'FAIL: %s is incorrect' % out_filename) else: Logs.pprint('GREEN', 'Pass: %s' % test) @@ -159,12 +163,26 @@ def test(ctx): autowaf.run_tests(ctx, APPNAME, commands, 1, name='bad') -# commands = [] -# for test in good_tests: -# out_filename = test + '.thru' -# commands += [ './serdi_static -o turtle ../%s \'%s\' | ./serdi_static - \'%s\' > %s.out' % (test, base_uri, base_uri, test) ] -# -# autowaf.run_tests(ctx, APPNAME, commands, 0, name='turtle-write') -# + commands = [] + for test in good_tests: + base_uri = 'http://www.w3.org/2001/sw/DataAccess/df1/' + test + out_filename = test + '.thru' + commands += [ + '%s -o turtle ../%s \'%s\' | %s - \'%s\' > %s.thru' % ( + './serdi_static', test, base_uri, + './serdi_static', base_uri, test) ] + + autowaf.run_tests(ctx, APPNAME, commands, 0, name='turtle-round-trip') + Logs.pprint('BOLD', '\nVerifying ntriples => turtle => ntriples') + for test in good_tests: + out_filename = test + '.thru' + if not os.access(out_filename, os.F_OK): + Logs.pprint('RED', 'FAIL: %s output is missing' % test) + elif filecmp.cmp('../' + test.replace('.ttl', '.out'), + test + '.thru', + False) != 1: + Logs.pprint('RED', 'FAIL: %s is incorrect' % out_filename) + else: + Logs.pprint('GREEN', 'Pass: %s' % test) autowaf.post_test(ctx, APPNAME) -- cgit v1.2.1