diff options
author | David Robillard <d@drobilla.net> | 2013-03-04 04:42:56 +0000 |
---|---|---|
committer | David Robillard <d@drobilla.net> | 2013-03-04 04:42:56 +0000 |
commit | a27bd15451c9c27c25d3429badfe90cc2b0e9b82 (patch) | |
tree | cbd245a816ded86b5fb9058f696fabcfcbbb0961 | |
parent | 3a4d1a415691e45ffef6f64c32c0301fef577458 (diff) | |
download | serd-a27bd15451c9c27c25d3429badfe90cc2b0e9b82.tar.gz serd-a27bd15451c9c27c25d3429badfe90cc2b0e9b82.tar.bz2 serd-a27bd15451c9c27c25d3429badfe90cc2b0e9b82.zip |
Support dots and colons in pnames.
git-svn-id: http://svn.drobilla.net/serd/trunk@436 490d8e77-9747-427b-9fa3-0b8f29cee8a0
-rw-r--r-- | src/reader.c | 79 | ||||
-rw-r--r-- | tests/bad/bad-subject.ttl | 1 | ||||
-rw-r--r-- | tests/extra/manifest.ttl | 24 | ||||
-rw-r--r-- | tests/extra/turtle-syntax-bad-missing-ns-dot-end.ttl | 2 | ||||
-rw-r--r-- | tests/extra/turtle-syntax-ln-colons.nt | 5 | ||||
-rw-r--r-- | tests/extra/turtle-syntax-ln-colons.ttl | 6 | ||||
-rw-r--r-- | tests/extra/turtle-syntax-ln-dots.nt | 3 | ||||
-rw-r--r-- | tests/extra/turtle-syntax-ln-dots.ttl | 4 |
8 files changed, 90 insertions, 34 deletions
diff --git a/src/reader.c b/src/reader.c index f322c992..fe2e309b 100644 --- a/src/reader.c +++ b/src/reader.c @@ -659,7 +659,7 @@ read_PLX(SerdReader* reader, Ref dest) } static SerdStatus -read_PN_LOCAL(SerdReader* reader, Ref dest) +read_PN_LOCAL(SerdReader* reader, Ref dest, bool* ate_dot) { uint8_t c = peek_byte(reader); SerdStatus st; @@ -677,7 +677,7 @@ read_PN_LOCAL(SerdReader* reader, Ref dest) } while ((c = peek_byte(reader))) { // Middle: (PN_CHARS | '.' | ';')* - if (/*c == '.' || */c == ':') { + if (c == '.' || c == ':') { push_byte(reader, dest, eat_byte_safe(reader, c)); } else if ((st = read_PLX(reader, dest)) > SERD_FAILURE) { return st; @@ -686,16 +686,21 @@ read_PN_LOCAL(SerdReader* reader, Ref dest) } } + SerdNode* const n = deref(reader, dest); + if (n->buf[n->n_bytes - 1] == '.') { + // Ate trailing dot, pop it from stack/node and inform caller + --n->n_bytes; + serd_stack_pop(&reader->stack, 1); + *ate_dot = true; + } + return SERD_SUCCESS; } +// Read the remainder of a PN_PREFIX after some initial characters static SerdStatus -read_PN_PREFIX(SerdReader* reader, Ref dest) +read_PN_PREFIX_tail(SerdReader* reader, Ref dest) { - if (!read_PN_CHARS_BASE(reader, dest)) { // First: PN_CHARS_BASE - return SERD_FAILURE; - } - uint8_t c; while ((c = peek_byte(reader))) { // Middle: (PN_CHARS | '.')* if (c == '.') { @@ -714,6 +719,15 @@ read_PN_PREFIX(SerdReader* reader, Ref dest) return SERD_SUCCESS; } +static SerdStatus +read_PN_PREFIX(SerdReader* reader, Ref dest) +{ + if (read_PN_CHARS_BASE(reader, dest)) { + return read_PN_PREFIX_tail(reader, dest); + } + return SERD_FAILURE; +} + // [29] language ::= [a-z]+ ('-' [a-z0-9]+ )* static Ref read_language(SerdReader* reader) @@ -773,7 +787,7 @@ read_IRIREF(SerdReader* reader) } static bool -read_PrefixedName(SerdReader* reader, Ref dest, bool read_prefix) +read_PrefixedName(SerdReader* reader, Ref dest, bool read_prefix, bool* ate_dot) { if (read_prefix) { if (read_PN_PREFIX(reader, dest) > SERD_FAILURE) { @@ -786,7 +800,7 @@ read_PrefixedName(SerdReader* reader, Ref dest, bool read_prefix) } push_byte(reader, dest, ':'); - return read_PN_LOCAL(reader, dest) <= SERD_FAILURE; + return read_PN_LOCAL(reader, dest, ate_dot) <= SERD_FAILURE; } static bool @@ -867,7 +881,7 @@ except: } static bool -read_iri(SerdReader* reader, Ref* dest) +read_iri(SerdReader* reader, Ref* dest, bool* ate_dot) { switch (peek_byte(reader)) { case '<': @@ -875,7 +889,7 @@ read_iri(SerdReader* reader, Ref* dest) break; default: *dest = push_node(reader, SERD_CURIE, "", 0); - if (!read_PrefixedName(reader, *dest, true)) { + if (!read_PrefixedName(reader, *dest, true, ate_dot)) { *dest = pop_node(reader, *dest); } } @@ -884,7 +898,7 @@ read_iri(SerdReader* reader, Ref* dest) static bool read_literal(SerdReader* reader, Ref* dest, - Ref* datatype, Ref* lang, SerdNodeFlags* flags) + Ref* datatype, Ref* lang, SerdNodeFlags* flags, bool* ate_dot) { Ref str = read_String(reader, flags); if (!str) { @@ -899,7 +913,7 @@ read_literal(SerdReader* reader, Ref* dest, case '^': eat_byte_safe(reader, '^'); eat_byte_check(reader, '^'); - TRY_THROW(read_iri(reader, datatype)); + TRY_THROW(read_iri(reader, datatype, ate_dot)); break; } *dest = str; @@ -933,13 +947,16 @@ read_verb(SerdReader* reader, Ref* dest) "a", produce that instead. */ *dest = push_node(reader, SERD_CURIE, "", 0); - SerdNode* node = deref(reader, *dest); - SerdStatus st = read_PN_PREFIX(reader, *dest); + SerdNode* node = deref(reader, *dest); + const SerdStatus st = read_PN_PREFIX(reader, *dest); + bool ate_dot = false; if (!st && node->n_bytes == 1 && node->buf[0] == 'a' && is_token_end(peek_byte(reader))) { pop_node(reader, *dest); return (*dest = push_node(reader, SERD_URI, NS_RDF "type", 47)); - } else if (st > SERD_FAILURE || !read_PrefixedName(reader, *dest, false)) { + } else if (st > SERD_FAILURE || + !read_PrefixedName(reader, *dest, false, &ate_dot) || + ate_dot) { return (*dest = pop_node(reader, *dest)); } else { return true; @@ -1084,7 +1101,6 @@ read_object(SerdReader* reader, ReadContext ctx, bool* ate_dot) Ref lang = 0; uint32_t flags = 0; const uint8_t c = peek_byte(reader); - SerdStatus st; switch (c) { case '\0': case ')': @@ -1096,7 +1112,7 @@ read_object(SerdReader* reader, ReadContext ctx, bool* ate_dot) TRY_THROW(ret = read_blank(reader, ctx, false, &o)); break; case '<': case ':': - TRY_THROW(ret = read_iri(reader, &o)); + TRY_THROW(ret = read_iri(reader, &o, ate_dot)); break; case '+': case '-': case '.': case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': @@ -1104,22 +1120,24 @@ read_object(SerdReader* reader, ReadContext ctx, bool* ate_dot) break; case '\"': case '\'': - TRY_THROW(ret = read_literal(reader, &o, &datatype, &lang, &flags)); + TRY_THROW(ret = read_literal(reader, &o, &datatype, &lang, &flags, ate_dot)); break; default: /* Either a boolean literal, or a qname. Read the prefix first, and if it is in fact a "true" or "false" literal, produce that instead. */ node = deref(reader, o = push_node(reader, SERD_CURIE, "", 0)); - st = read_PN_PREFIX(reader, o); - if (!st && ((node->n_bytes == 4 && !memcmp(node->buf, "true", 4)) || - (node->n_bytes == 5 && !memcmp(node->buf, "false", 5)))) { + while (read_PN_CHARS_BASE(reader, o)) {} + if ((node->n_bytes == 4 && !memcmp(node->buf, "true", 4)) || + (node->n_bytes == 5 && !memcmp(node->buf, "false", 5))) { node->type = SERD_LITERAL; datatype = push_node( reader, SERD_URI, XSD_BOOLEAN, XSD_BOOLEAN_LEN); ret = true; + } else if (read_PN_PREFIX_tail(reader, o) > SERD_FAILURE) { + ret = false; } else { - ret = read_PrefixedName(reader, o, false); + ret = read_PrefixedName(reader, o, false, ate_dot); } } @@ -1142,7 +1160,7 @@ static bool read_objectList(SerdReader* reader, ReadContext ctx, bool* ate_dot) { TRY_RET(read_object(reader, ctx, ate_dot)); - while (eat_delim(reader, ',')) { + while (!*ate_dot && eat_delim(reader, ',')) { TRY_RET(read_object(reader, ctx, ate_dot)); } return true; @@ -1253,7 +1271,8 @@ read_collection(SerdReader* reader, ReadContext ctx, Ref* dest) static Ref read_subject(SerdReader* reader, ReadContext ctx, bool* nested) { - Ref subject = 0; + Ref subject = 0; + bool ate_dot = false; switch (peek_byte(reader)) { case '[': case '(': *nested = true; @@ -1262,9 +1281,9 @@ read_subject(SerdReader* reader, ReadContext ctx, bool* nested) read_blank(reader, ctx, true, &subject); break; default: - read_iri(reader, &subject); + read_iri(reader, &subject, &ate_dot); } - return subject; + return ate_dot ? pop_node(reader, subject) : subject; } static bool @@ -1277,7 +1296,9 @@ read_triples(SerdReader* reader, ReadContext ctx, bool* ate_dot) ctx.subject = subject; if (nested) { read_ws_star(reader); - read_predicateObjectList(reader, ctx, ate_dot); + if (peek_byte(reader) != '.') { + read_predicateObjectList(reader, ctx, ate_dot); + } ret = true; } else { TRY_RET(read_ws_plus(reader)); @@ -1358,7 +1379,7 @@ read_statement(SerdReader* reader) SerdStatementFlags flags = 0; ReadContext ctx = { 0, 0, 0, &flags }; read_ws_star(reader); - bool ate_dot; + bool ate_dot = false; switch (peek_byte(reader)) { case '\0': reader->eof = true; diff --git a/tests/bad/bad-subject.ttl b/tests/bad/bad-subject.ttl new file mode 100644 index 00000000..b98ea39b --- /dev/null +++ b/tests/bad/bad-subject.ttl @@ -0,0 +1 @@ +invalid.:thing a invalid.Thing . diff --git a/tests/extra/manifest.ttl b/tests/extra/manifest.ttl index f373f3a2..49e7436d 100644 --- a/tests/extra/manifest.ttl +++ b/tests/extra/manifest.ttl @@ -10,12 +10,16 @@ mf:entries ( <#turtle-syntax-bad-blank-label-dot-end> <#turtle-syntax-bad-ln-dash-start> + <#turtle-syntax-bad-ln-escape-start> <#turtle-syntax-bad-ln-escape> + <#turtle-syntax-bad-missing-ns-dot-end> + <#turtle-syntax-bad-missing-ns-dot-start> <#turtle-syntax-bad-ns-dot-end> <#turtle-syntax-bad-ns-dot-start> - <#turtle-syntax-bad-ns-missing-dot-end> - <#turtle-syntax-bad-ns-missing-dot-start> + <#turtle-syntax-bad-number-dot-in-anon> <#turtle-syntax-blank-label> + <#turtle-syntax-ln-colons> + <#turtle-syntax-ln-dots> <#turtle-syntax-ns-dots> ) . @@ -61,18 +65,30 @@ mf:name "turtle-syntax-bad-ns-dot-start" ; mf:action <turtle-syntax-bad-ns-dot-start.ttl> . -<#turtle-syntax-bad-ns-missing-dot-end> +<#turtle-syntax-bad-missing-ns-dot-end> rdf:type rdft:TestTurtleNegativeSyntax ; rdfs:comment "Prefix must not end in dot (error in triple, not prefix directive like turtle-syntax-bad-ns-dot-end)" ; mf:name "turtle-syntax-bad-missing-ns-dot-end" ; mf:action <turtle-syntax-bad-missing-ns-dot-end.ttl> . -<#turtle-syntax-bad-ns-missing-dot-start> +<#turtle-syntax-bad-missing-ns-dot-start> rdf:type rdft:TestTurtleNegativeSyntax ; rdfs:comment "Prefix must not start with dot (error in triple, not prefix directive like turtle-syntax-bad-ns-dot-end)" ; mf:name "turtle-syntax-bad-missing-ns-dot-start" ; mf:action <turtle-syntax-bad-missing-ns-dot-start.ttl> . +<#turtle-syntax-ln-dots> + rdf:type rdft:TestTurtlePositiveSyntax ; + rdfs:comment "Dots in pname local names" ; + mf:name "turtle-syntax-ln-dots" ; + mf:action <turtle-syntax-ln-dots.ttl> . + +<#turtle-syntax-ln-colons> + rdf:type rdft:TestTurtlePositiveSyntax ; + rdfs:comment "Colons in pname local names" ; + mf:name "turtle-syntax-ln-colons" ; + mf:action <turtle-syntax-ln-colons.ttl> . + <#turtle-syntax-ns-dots> rdf:type rdft:TestTurtlePositiveSyntax ; rdfs:comment "Dots in namespace names" ; diff --git a/tests/extra/turtle-syntax-bad-missing-ns-dot-end.ttl b/tests/extra/turtle-syntax-bad-missing-ns-dot-end.ttl index 4a593eb7..d06f4e69 100644 --- a/tests/extra/turtle-syntax-bad-missing-ns-dot-end.ttl +++ b/tests/extra/turtle-syntax-bad-missing-ns-dot-end.ttl @@ -1 +1 @@ -undefined.:s undefined.:p undefined.:o . +valid:s valid:p invalid.:o . diff --git a/tests/extra/turtle-syntax-ln-colons.nt b/tests/extra/turtle-syntax-ln-colons.nt new file mode 100644 index 00000000..9b3c223f --- /dev/null +++ b/tests/extra/turtle-syntax-ln-colons.nt @@ -0,0 +1,5 @@ +<http://example/s:1> <http://example/p:1> <http://example/o:1> . +<http://example/s::2> <http://example/p::2> <http://example/o::2> . +<http://example/3:s> <http://example/3:p> <http://example/3> . +<http://example/:s> <http://example/:p> <http://example/:o> . +<http://example/:s:> <http://example/:p:> <http://example/:o:> . diff --git a/tests/extra/turtle-syntax-ln-colons.ttl b/tests/extra/turtle-syntax-ln-colons.ttl new file mode 100644 index 00000000..397989db --- /dev/null +++ b/tests/extra/turtle-syntax-ln-colons.ttl @@ -0,0 +1,6 @@ +@prefix : <http://example/> . +:s:1 :p:1 :o:1 . +:s::2 :p::2 :o::2 . +:3:s :3:p :3 . +::s ::p ::o . +::s: ::p: ::o: . diff --git a/tests/extra/turtle-syntax-ln-dots.nt b/tests/extra/turtle-syntax-ln-dots.nt new file mode 100644 index 00000000..d24052bb --- /dev/null +++ b/tests/extra/turtle-syntax-ln-dots.nt @@ -0,0 +1,3 @@ +<http://example/s.1> <http://example/p.1> <http://example/o.1> . +<http://example/s..2> <http://example/p..2> <http://example/o..2> . +<http://example/3.s> <http://example/3.p> <http://example/3> . diff --git a/tests/extra/turtle-syntax-ln-dots.ttl b/tests/extra/turtle-syntax-ln-dots.ttl new file mode 100644 index 00000000..230381a1 --- /dev/null +++ b/tests/extra/turtle-syntax-ln-dots.ttl @@ -0,0 +1,4 @@ +@prefix : <http://example/> . +:s.1 :p.1 :o.1 . +:s..2 :p..2 :o..2. +:3.s :3.p :3. |