From 1184a657618a42af2e3f882fb7f90d873a6ee9b8 Mon Sep 17 00:00:00 2001 From: David Robillard Date: Mon, 4 Mar 2013 01:36:52 +0000 Subject: Improve dot support. Extend test suite to cover more of the new Turtle draft. git-svn-id: http://svn.drobilla.net/serd/trunk@433 490d8e77-9747-427b-9fa3-0b8f29cee8a0 --- src/reader.c | 70 +++++++++--------- tests/extra/manifest.ttl | 86 ++++++++++++++++++++++ .../turtle-syntax-bad-blank-label-dot-end.ttl | 2 + tests/extra/turtle-syntax-bad-ln-dash-start.ttl | 2 + tests/extra/turtle-syntax-bad-ln-escape-start.ttl | 2 + tests/extra/turtle-syntax-bad-ln-escape.ttl | 2 + .../extra/turtle-syntax-bad-missing-ns-dot-end.ttl | 1 + .../turtle-syntax-bad-missing-ns-dot-start.ttl | 1 + tests/extra/turtle-syntax-bad-ns-dot-end.ttl | 2 + tests/extra/turtle-syntax-bad-ns-dot-start.ttl | 2 + .../extra/turtle-syntax-bad-number-dot-in-anon.ttl | 6 ++ tests/extra/turtle-syntax-blank-label.nt | 3 + tests/extra/turtle-syntax-blank-label.ttl | 4 + tests/extra/turtle-syntax-ns-dots.ttl | 2 + tests/extra/turtle-syntax-pname-dots.ttl | 2 + wscript | 4 +- 16 files changed, 155 insertions(+), 36 deletions(-) create mode 100644 tests/extra/manifest.ttl create mode 100644 tests/extra/turtle-syntax-bad-blank-label-dot-end.ttl create mode 100644 tests/extra/turtle-syntax-bad-ln-dash-start.ttl create mode 100644 tests/extra/turtle-syntax-bad-ln-escape-start.ttl create mode 100644 tests/extra/turtle-syntax-bad-ln-escape.ttl create mode 100644 tests/extra/turtle-syntax-bad-missing-ns-dot-end.ttl create mode 100644 tests/extra/turtle-syntax-bad-missing-ns-dot-start.ttl create mode 100644 tests/extra/turtle-syntax-bad-ns-dot-end.ttl create mode 100644 tests/extra/turtle-syntax-bad-ns-dot-start.ttl create mode 100644 tests/extra/turtle-syntax-bad-number-dot-in-anon.ttl create mode 100644 tests/extra/turtle-syntax-blank-label.nt create mode 100644 tests/extra/turtle-syntax-blank-label.ttl create mode 100644 tests/extra/turtle-syntax-ns-dots.ttl create mode 100644 tests/extra/turtle-syntax-pname-dots.ttl diff --git a/src/reader.c b/src/reader.c index 5dd9578e..3a245877 100644 --- a/src/reader.c +++ b/src/reader.c @@ -671,7 +671,7 @@ read_PN_LOCAL(SerdReader* reader, Ref dest) default: if ((st = read_PLX(reader, dest)) > SERD_FAILURE) { return st; - } else if (st != SERD_SUCCESS && !read_PN_CHARS(reader, dest)) { + } else if (st != SERD_SUCCESS && !read_PN_CHARS_BASE(reader, dest)) { return SERD_FAILURE; } } @@ -679,12 +679,10 @@ read_PN_LOCAL(SerdReader* reader, Ref dest) while ((c = peek_byte(reader))) { // Middle: (PN_CHARS | '.' | ';')* if (/*c == '.' || */c == ':') { push_byte(reader, dest, eat_byte_safe(reader, c)); - } else if (!read_PN_CHARS(reader, dest)) { - if ((st = read_PLX(reader, dest)) > SERD_FAILURE) { - return st; - } else if (st != SERD_SUCCESS) { - break; - } + } else if ((st = read_PLX(reader, dest)) > SERD_FAILURE) { + return st; + } else if (st != SERD_SUCCESS && !read_PN_CHARS(reader, dest)) { + break; } } @@ -707,8 +705,9 @@ read_PN_PREFIX(SerdReader* reader, Ref dest) } } - if (c == '.' && !read_PN_CHARS(reader, dest)) { // Last: PN_CHARS - r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid prefix character\n"); + const SerdNode* const n = deref(reader, dest); + if (n->buf[n->n_bytes - 1] == '.' && !read_PN_CHARS(reader, dest)) { + r_err(reader, SERD_ERR_BAD_SYNTAX, "prefix ends with `.'\n"); return SERD_ERR_BAD_SYNTAX; } @@ -960,7 +959,7 @@ read_BLANK_NODE_LABEL(SerdReader* reader) uint8_t c = peek_byte(reader); // First: (PN_CHARS | '_' | [0-9]) if (is_digit(c) || c == '_') { - push_byte(reader, ref, c); + push_byte(reader, ref, eat_byte_safe(reader, c)); } else if (!read_PN_CHARS(reader, ref)) { r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid name start character\n"); return pop_node(reader, ref); @@ -974,18 +973,18 @@ read_BLANK_NODE_LABEL(SerdReader* reader) } } - if (c == '.' && !read_PN_CHARS(reader, ref)) { // Last: PN_CHARS - r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid name character\n"); + const SerdNode* n = deref(reader, ref); + if (n->buf[n->n_bytes - 1] == '.' && !read_PN_CHARS(reader, ref)) { + r_err(reader, SERD_ERR_BAD_SYNTAX, "name ends with `.'\n"); return pop_node(reader, ref); } if (reader->syntax == SERD_TURTLE) { - const char* const buf = (const char*)deref(reader, ref)->buf; - if (is_digit(buf[1])) { - if (buf[0] == 'b') { - ((char*)buf)[0] = 'B'; // Prevent clash + if (is_digit(n->buf[1])) { + if (n->buf[0] == 'b') { + ((char*)n->buf)[0] = 'B'; // Prevent clash reader->seen_genid = true; - } else if (reader->seen_genid && buf[0] == 'B') { + } else if (reader->seen_genid && n->buf[0] == 'B') { r_err(reader, SERD_ERR_ID_CLASH, "found both `b' and `B' blank IDs, prefix required\n"); return pop_node(reader, ref); @@ -1152,29 +1151,30 @@ read_objectList(SerdReader* reader, ReadContext ctx, bool* ate_dot) static bool read_predicateObjectList(SerdReader* reader, ReadContext ctx, bool* ate_dot) { - TRY_RET(read_verb(reader, &ctx.predicate)); - read_ws_star(reader); - TRY_THROW(read_objectList(reader, ctx, ate_dot)); - ctx.predicate = pop_node(reader, ctx.predicate); - if (*ate_dot) { - return true; - } - while (eat_delim(reader, ';')) { - switch (peek_byte(reader)) { - case ';': - continue; - case '.': case ']': + uint8_t c; + while (true) { + TRY_THROW(read_verb(reader, &ctx.predicate)); + read_ws_star(reader); + + TRY_THROW(read_objectList(reader, ctx, ate_dot)); + ctx.predicate = pop_node(reader, ctx.predicate); + if (*ate_dot) { return true; - default: - TRY_THROW(read_verb(reader, &ctx.predicate)); + } + + do { read_ws_star(reader); - TRY_THROW(read_objectList(reader, ctx, ate_dot)); - ctx.predicate = pop_node(reader, ctx.predicate); - if (*ate_dot) { + switch (c = peek_byte(reader)) { + case 0: + return false; + case '.': case ']': return true; + case ';': + eat_byte_safe(reader, c); } - } + } while (c == ';'); } + pop_node(reader, ctx.predicate); return true; except: diff --git a/tests/extra/manifest.ttl b/tests/extra/manifest.ttl new file mode 100644 index 00000000..f373f3a2 --- /dev/null +++ b/tests/extra/manifest.ttl @@ -0,0 +1,86 @@ +@prefix mf: . +@prefix qt: . +@prefix rdf: . +@prefix rdfs: . +@prefix rdft: . + +<> + rdf:type mf:Manifest ; + rdfs:comment "Extra Turtle tests from Serd" ; + mf:entries ( + <#turtle-syntax-bad-blank-label-dot-end> + <#turtle-syntax-bad-ln-dash-start> + <#turtle-syntax-bad-ln-escape> + <#turtle-syntax-bad-ns-dot-end> + <#turtle-syntax-bad-ns-dot-start> + <#turtle-syntax-bad-ns-missing-dot-end> + <#turtle-syntax-bad-ns-missing-dot-start> + <#turtle-syntax-blank-label> + <#turtle-syntax-ns-dots> + ) . + +<#turtle-syntax-bad-blank-label-dot-end> + rdf:type rdft:TestTurtleNegativeSyntax ; + rdfs:comment "Blank node label must not end in dot" ; + mf:name "turtle-syntax-bad-blank-label-dot-end" ; + mf:action . + +<#turtle-syntax-bad-number-dot-in-anon> + rdf:type rdft:TestTurtleNegativeSyntax ; + rdfs:comment "Dot delimeter may not appear in anonymous nodes" ; + mf:name "turtle-syntax-bad-number-dot-in-anon" ; + mf:action . + +<#turtle-syntax-bad-ln-dash-start> + rdf:type rdft:TestTurtleNegativeSyntax ; + rdfs:comment "Local name must not begin with dash" ; + mf:name "turtle-syntax-bad-ln-dash-start" ; + mf:action . + +<#turtle-syntax-bad-ln-escape> + rdf:type rdft:TestTurtleNegativeSyntax ; + rdfs:comment "Bad hex escape in local name" ; + mf:name "turtle-syntax-bad-ln-escape" ; + mf:action . + +<#turtle-syntax-bad-ln-escape-start> + rdf:type rdft:TestTurtleNegativeSyntax ; + rdfs:comment "Bad hex escape at start of local name" ; + mf:name "turtle-syntax-bad-ln-escape-start" ; + mf:action . + +<#turtle-syntax-bad-ns-dot-end> + rdf:type rdft:TestTurtleNegativeSyntax ; + rdfs:comment "Prefix must not end in dot" ; + mf:name "turtle-syntax-bad-ns-dot-end" ; + mf:action . + +<#turtle-syntax-bad-ns-dot-start> + rdf:type rdft:TestTurtleNegativeSyntax ; + rdfs:comment "Prefix must not start with dot" ; + mf:name "turtle-syntax-bad-ns-dot-start" ; + mf:action . + +<#turtle-syntax-bad-ns-missing-dot-end> + rdf:type rdft:TestTurtleNegativeSyntax ; + rdfs:comment "Prefix must not end in dot (error in triple, not prefix directive like turtle-syntax-bad-ns-dot-end)" ; + mf:name "turtle-syntax-bad-missing-ns-dot-end" ; + mf:action . + +<#turtle-syntax-bad-ns-missing-dot-start> + rdf:type rdft:TestTurtleNegativeSyntax ; + rdfs:comment "Prefix must not start with dot (error in triple, not prefix directive like turtle-syntax-bad-ns-dot-end)" ; + mf:name "turtle-syntax-bad-missing-ns-dot-start" ; + mf:action . + +<#turtle-syntax-ns-dots> + rdf:type rdft:TestTurtlePositiveSyntax ; + rdfs:comment "Dots in namespace names" ; + mf:name "turtle-syntax-ns-dots" ; + mf:action . + +<#turtle-syntax-blank-label> + rdf:type rdft:TestTurtlePositiveSyntax ; + rdfs:comment "Characters allowed in blank node labels" ; + mf:name "turtle-syntax-blank-label" ; + mf:action . diff --git a/tests/extra/turtle-syntax-bad-blank-label-dot-end.ttl b/tests/extra/turtle-syntax-bad-blank-label-dot-end.ttl new file mode 100644 index 00000000..613e775f --- /dev/null +++ b/tests/extra/turtle-syntax-bad-blank-label-dot-end.ttl @@ -0,0 +1,2 @@ +@prefix : . +_:b1. :p :o . diff --git a/tests/extra/turtle-syntax-bad-ln-dash-start.ttl b/tests/extra/turtle-syntax-bad-ln-dash-start.ttl new file mode 100644 index 00000000..291bdda3 --- /dev/null +++ b/tests/extra/turtle-syntax-bad-ln-dash-start.ttl @@ -0,0 +1,2 @@ +@prefix : . +:s :p :-o . diff --git a/tests/extra/turtle-syntax-bad-ln-escape-start.ttl b/tests/extra/turtle-syntax-bad-ln-escape-start.ttl new file mode 100644 index 00000000..d1661bd7 --- /dev/null +++ b/tests/extra/turtle-syntax-bad-ln-escape-start.ttl @@ -0,0 +1,2 @@ +@prefix : . +:s :p :%2o . diff --git a/tests/extra/turtle-syntax-bad-ln-escape.ttl b/tests/extra/turtle-syntax-bad-ln-escape.ttl new file mode 100644 index 00000000..585e28b5 --- /dev/null +++ b/tests/extra/turtle-syntax-bad-ln-escape.ttl @@ -0,0 +1,2 @@ +@prefix : . +:s :p :o%2 . diff --git a/tests/extra/turtle-syntax-bad-missing-ns-dot-end.ttl b/tests/extra/turtle-syntax-bad-missing-ns-dot-end.ttl new file mode 100644 index 00000000..4a593eb7 --- /dev/null +++ b/tests/extra/turtle-syntax-bad-missing-ns-dot-end.ttl @@ -0,0 +1 @@ +undefined.:s undefined.:p undefined.:o . diff --git a/tests/extra/turtle-syntax-bad-missing-ns-dot-start.ttl b/tests/extra/turtle-syntax-bad-missing-ns-dot-start.ttl new file mode 100644 index 00000000..23514bcc --- /dev/null +++ b/tests/extra/turtle-syntax-bad-missing-ns-dot-start.ttl @@ -0,0 +1 @@ +.undefined:s .undefined:p .undefined:o . diff --git a/tests/extra/turtle-syntax-bad-ns-dot-end.ttl b/tests/extra/turtle-syntax-bad-ns-dot-end.ttl new file mode 100644 index 00000000..a99fc98d --- /dev/null +++ b/tests/extra/turtle-syntax-bad-ns-dot-end.ttl @@ -0,0 +1,2 @@ +@prefix eg. : . +eg.:s eg.:p eg.:o . diff --git a/tests/extra/turtle-syntax-bad-ns-dot-start.ttl b/tests/extra/turtle-syntax-bad-ns-dot-start.ttl new file mode 100644 index 00000000..b3993dd7 --- /dev/null +++ b/tests/extra/turtle-syntax-bad-ns-dot-start.ttl @@ -0,0 +1,2 @@ +@prefix .eg : . +.eg:s .eg:p .eg:o . diff --git a/tests/extra/turtle-syntax-bad-number-dot-in-anon.ttl b/tests/extra/turtle-syntax-bad-number-dot-in-anon.ttl new file mode 100644 index 00000000..46d15f9a --- /dev/null +++ b/tests/extra/turtle-syntax-bad-number-dot-in-anon.ttl @@ -0,0 +1,6 @@ +@prefix : . + +:s + :p [ + :p1 27. + ] . diff --git a/tests/extra/turtle-syntax-blank-label.nt b/tests/extra/turtle-syntax-blank-label.nt new file mode 100644 index 00000000..05dcc396 --- /dev/null +++ b/tests/extra/turtle-syntax-blank-label.nt @@ -0,0 +1,3 @@ +_:0b . +_:_b . +_:b.0 . diff --git a/tests/extra/turtle-syntax-blank-label.ttl b/tests/extra/turtle-syntax-blank-label.ttl new file mode 100644 index 00000000..3e3e516d --- /dev/null +++ b/tests/extra/turtle-syntax-blank-label.ttl @@ -0,0 +1,4 @@ +@prefix : . +_:0b :p :o . # Starts with digit +_:_b :p :o . # Starts with underscore +_:b.0 :p :o . # Contains dot, ends with digit diff --git a/tests/extra/turtle-syntax-ns-dots.ttl b/tests/extra/turtle-syntax-ns-dots.ttl new file mode 100644 index 00000000..78465870 --- /dev/null +++ b/tests/extra/turtle-syntax-ns-dots.ttl @@ -0,0 +1,2 @@ +@prefix e.g: . +e.g:s e.g:p e.g:o . diff --git a/tests/extra/turtle-syntax-pname-dots.ttl b/tests/extra/turtle-syntax-pname-dots.ttl new file mode 100644 index 00000000..a708cf70 --- /dev/null +++ b/tests/extra/turtle-syntax-pname-dots.ttl @@ -0,0 +1,2 @@ +@prefix : . +:s.ubj :p.r.ed :o.bject. diff --git a/wscript b/wscript index b787760f..8abfa67d 100644 --- a/wscript +++ b/wscript @@ -385,7 +385,7 @@ def test_manifest(ctx, srcdir, testdir, report, test_base, parse_base): def test(ctx): blddir = autowaf.build_dir(APPNAME, 'tests') - for i in ['', 'bad', 'good', 'new', 'tests-ttl']: + for i in ['', 'bad', 'good', 'new', 'tests-ttl', 'extra']: try: os.makedirs(os.path.join(blddir, i)) except: @@ -522,6 +522,8 @@ def test(ctx): rdf_turtle + 'coverage/tests/', 'http://example/base/') test_manifest(ctx, srcdir, 'tests-ttl', report, rdf_turtle + 'tests-ttl/', 'http://example/base/') + test_manifest(ctx, srcdir, 'extra', report, + rdf_turtle + 'extra/', 'http://example/base/') report.close() -- cgit v1.2.1