aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Robillard <d@drobilla.net>2013-03-04 04:42:56 +0000
committerDavid Robillard <d@drobilla.net>2013-03-04 04:42:56 +0000
commita27bd15451c9c27c25d3429badfe90cc2b0e9b82 (patch)
treecbd245a816ded86b5fb9058f696fabcfcbbb0961
parent3a4d1a415691e45ffef6f64c32c0301fef577458 (diff)
downloadserd-a27bd15451c9c27c25d3429badfe90cc2b0e9b82.tar.gz
serd-a27bd15451c9c27c25d3429badfe90cc2b0e9b82.tar.bz2
serd-a27bd15451c9c27c25d3429badfe90cc2b0e9b82.zip
Support dots and colons in pnames.
git-svn-id: http://svn.drobilla.net/serd/trunk@436 490d8e77-9747-427b-9fa3-0b8f29cee8a0
-rw-r--r--src/reader.c79
-rw-r--r--tests/bad/bad-subject.ttl1
-rw-r--r--tests/extra/manifest.ttl24
-rw-r--r--tests/extra/turtle-syntax-bad-missing-ns-dot-end.ttl2
-rw-r--r--tests/extra/turtle-syntax-ln-colons.nt5
-rw-r--r--tests/extra/turtle-syntax-ln-colons.ttl6
-rw-r--r--tests/extra/turtle-syntax-ln-dots.nt3
-rw-r--r--tests/extra/turtle-syntax-ln-dots.ttl4
8 files changed, 90 insertions, 34 deletions
diff --git a/src/reader.c b/src/reader.c
index f322c992..fe2e309b 100644
--- a/src/reader.c
+++ b/src/reader.c
@@ -659,7 +659,7 @@ read_PLX(SerdReader* reader, Ref dest)
}
static SerdStatus
-read_PN_LOCAL(SerdReader* reader, Ref dest)
+read_PN_LOCAL(SerdReader* reader, Ref dest, bool* ate_dot)
{
uint8_t c = peek_byte(reader);
SerdStatus st;
@@ -677,7 +677,7 @@ read_PN_LOCAL(SerdReader* reader, Ref dest)
}
while ((c = peek_byte(reader))) { // Middle: (PN_CHARS | '.' | ';')*
- if (/*c == '.' || */c == ':') {
+ if (c == '.' || c == ':') {
push_byte(reader, dest, eat_byte_safe(reader, c));
} else if ((st = read_PLX(reader, dest)) > SERD_FAILURE) {
return st;
@@ -686,16 +686,21 @@ read_PN_LOCAL(SerdReader* reader, Ref dest)
}
}
+ SerdNode* const n = deref(reader, dest);
+ if (n->buf[n->n_bytes - 1] == '.') {
+ // Ate trailing dot, pop it from stack/node and inform caller
+ --n->n_bytes;
+ serd_stack_pop(&reader->stack, 1);
+ *ate_dot = true;
+ }
+
return SERD_SUCCESS;
}
+// Read the remainder of a PN_PREFIX after some initial characters
static SerdStatus
-read_PN_PREFIX(SerdReader* reader, Ref dest)
+read_PN_PREFIX_tail(SerdReader* reader, Ref dest)
{
- if (!read_PN_CHARS_BASE(reader, dest)) { // First: PN_CHARS_BASE
- return SERD_FAILURE;
- }
-
uint8_t c;
while ((c = peek_byte(reader))) { // Middle: (PN_CHARS | '.')*
if (c == '.') {
@@ -714,6 +719,15 @@ read_PN_PREFIX(SerdReader* reader, Ref dest)
return SERD_SUCCESS;
}
+static SerdStatus
+read_PN_PREFIX(SerdReader* reader, Ref dest)
+{
+ if (read_PN_CHARS_BASE(reader, dest)) {
+ return read_PN_PREFIX_tail(reader, dest);
+ }
+ return SERD_FAILURE;
+}
+
// [29] language ::= [a-z]+ ('-' [a-z0-9]+ )*
static Ref
read_language(SerdReader* reader)
@@ -773,7 +787,7 @@ read_IRIREF(SerdReader* reader)
}
static bool
-read_PrefixedName(SerdReader* reader, Ref dest, bool read_prefix)
+read_PrefixedName(SerdReader* reader, Ref dest, bool read_prefix, bool* ate_dot)
{
if (read_prefix) {
if (read_PN_PREFIX(reader, dest) > SERD_FAILURE) {
@@ -786,7 +800,7 @@ read_PrefixedName(SerdReader* reader, Ref dest, bool read_prefix)
}
push_byte(reader, dest, ':');
- return read_PN_LOCAL(reader, dest) <= SERD_FAILURE;
+ return read_PN_LOCAL(reader, dest, ate_dot) <= SERD_FAILURE;
}
static bool
@@ -867,7 +881,7 @@ except:
}
static bool
-read_iri(SerdReader* reader, Ref* dest)
+read_iri(SerdReader* reader, Ref* dest, bool* ate_dot)
{
switch (peek_byte(reader)) {
case '<':
@@ -875,7 +889,7 @@ read_iri(SerdReader* reader, Ref* dest)
break;
default:
*dest = push_node(reader, SERD_CURIE, "", 0);
- if (!read_PrefixedName(reader, *dest, true)) {
+ if (!read_PrefixedName(reader, *dest, true, ate_dot)) {
*dest = pop_node(reader, *dest);
}
}
@@ -884,7 +898,7 @@ read_iri(SerdReader* reader, Ref* dest)
static bool
read_literal(SerdReader* reader, Ref* dest,
- Ref* datatype, Ref* lang, SerdNodeFlags* flags)
+ Ref* datatype, Ref* lang, SerdNodeFlags* flags, bool* ate_dot)
{
Ref str = read_String(reader, flags);
if (!str) {
@@ -899,7 +913,7 @@ read_literal(SerdReader* reader, Ref* dest,
case '^':
eat_byte_safe(reader, '^');
eat_byte_check(reader, '^');
- TRY_THROW(read_iri(reader, datatype));
+ TRY_THROW(read_iri(reader, datatype, ate_dot));
break;
}
*dest = str;
@@ -933,13 +947,16 @@ read_verb(SerdReader* reader, Ref* dest)
"a", produce that instead.
*/
*dest = push_node(reader, SERD_CURIE, "", 0);
- SerdNode* node = deref(reader, *dest);
- SerdStatus st = read_PN_PREFIX(reader, *dest);
+ SerdNode* node = deref(reader, *dest);
+ const SerdStatus st = read_PN_PREFIX(reader, *dest);
+ bool ate_dot = false;
if (!st && node->n_bytes == 1 && node->buf[0] == 'a' &&
is_token_end(peek_byte(reader))) {
pop_node(reader, *dest);
return (*dest = push_node(reader, SERD_URI, NS_RDF "type", 47));
- } else if (st > SERD_FAILURE || !read_PrefixedName(reader, *dest, false)) {
+ } else if (st > SERD_FAILURE ||
+ !read_PrefixedName(reader, *dest, false, &ate_dot) ||
+ ate_dot) {
return (*dest = pop_node(reader, *dest));
} else {
return true;
@@ -1084,7 +1101,6 @@ read_object(SerdReader* reader, ReadContext ctx, bool* ate_dot)
Ref lang = 0;
uint32_t flags = 0;
const uint8_t c = peek_byte(reader);
- SerdStatus st;
switch (c) {
case '\0':
case ')':
@@ -1096,7 +1112,7 @@ read_object(SerdReader* reader, ReadContext ctx, bool* ate_dot)
TRY_THROW(ret = read_blank(reader, ctx, false, &o));
break;
case '<': case ':':
- TRY_THROW(ret = read_iri(reader, &o));
+ TRY_THROW(ret = read_iri(reader, &o, ate_dot));
break;
case '+': case '-': case '.': case '0': case '1': case '2': case '3':
case '4': case '5': case '6': case '7': case '8': case '9':
@@ -1104,22 +1120,24 @@ read_object(SerdReader* reader, ReadContext ctx, bool* ate_dot)
break;
case '\"':
case '\'':
- TRY_THROW(ret = read_literal(reader, &o, &datatype, &lang, &flags));
+ TRY_THROW(ret = read_literal(reader, &o, &datatype, &lang, &flags, ate_dot));
break;
default:
/* Either a boolean literal, or a qname. Read the prefix first, and if
it is in fact a "true" or "false" literal, produce that instead.
*/
node = deref(reader, o = push_node(reader, SERD_CURIE, "", 0));
- st = read_PN_PREFIX(reader, o);
- if (!st && ((node->n_bytes == 4 && !memcmp(node->buf, "true", 4)) ||
- (node->n_bytes == 5 && !memcmp(node->buf, "false", 5)))) {
+ while (read_PN_CHARS_BASE(reader, o)) {}
+ if ((node->n_bytes == 4 && !memcmp(node->buf, "true", 4)) ||
+ (node->n_bytes == 5 && !memcmp(node->buf, "false", 5))) {
node->type = SERD_LITERAL;
datatype = push_node(
reader, SERD_URI, XSD_BOOLEAN, XSD_BOOLEAN_LEN);
ret = true;
+ } else if (read_PN_PREFIX_tail(reader, o) > SERD_FAILURE) {
+ ret = false;
} else {
- ret = read_PrefixedName(reader, o, false);
+ ret = read_PrefixedName(reader, o, false, ate_dot);
}
}
@@ -1142,7 +1160,7 @@ static bool
read_objectList(SerdReader* reader, ReadContext ctx, bool* ate_dot)
{
TRY_RET(read_object(reader, ctx, ate_dot));
- while (eat_delim(reader, ',')) {
+ while (!*ate_dot && eat_delim(reader, ',')) {
TRY_RET(read_object(reader, ctx, ate_dot));
}
return true;
@@ -1253,7 +1271,8 @@ read_collection(SerdReader* reader, ReadContext ctx, Ref* dest)
static Ref
read_subject(SerdReader* reader, ReadContext ctx, bool* nested)
{
- Ref subject = 0;
+ Ref subject = 0;
+ bool ate_dot = false;
switch (peek_byte(reader)) {
case '[': case '(':
*nested = true;
@@ -1262,9 +1281,9 @@ read_subject(SerdReader* reader, ReadContext ctx, bool* nested)
read_blank(reader, ctx, true, &subject);
break;
default:
- read_iri(reader, &subject);
+ read_iri(reader, &subject, &ate_dot);
}
- return subject;
+ return ate_dot ? pop_node(reader, subject) : subject;
}
static bool
@@ -1277,7 +1296,9 @@ read_triples(SerdReader* reader, ReadContext ctx, bool* ate_dot)
ctx.subject = subject;
if (nested) {
read_ws_star(reader);
- read_predicateObjectList(reader, ctx, ate_dot);
+ if (peek_byte(reader) != '.') {
+ read_predicateObjectList(reader, ctx, ate_dot);
+ }
ret = true;
} else {
TRY_RET(read_ws_plus(reader));
@@ -1358,7 +1379,7 @@ read_statement(SerdReader* reader)
SerdStatementFlags flags = 0;
ReadContext ctx = { 0, 0, 0, &flags };
read_ws_star(reader);
- bool ate_dot;
+ bool ate_dot = false;
switch (peek_byte(reader)) {
case '\0':
reader->eof = true;
diff --git a/tests/bad/bad-subject.ttl b/tests/bad/bad-subject.ttl
new file mode 100644
index 00000000..b98ea39b
--- /dev/null
+++ b/tests/bad/bad-subject.ttl
@@ -0,0 +1 @@
+invalid.:thing a invalid.Thing .
diff --git a/tests/extra/manifest.ttl b/tests/extra/manifest.ttl
index f373f3a2..49e7436d 100644
--- a/tests/extra/manifest.ttl
+++ b/tests/extra/manifest.ttl
@@ -10,12 +10,16 @@
mf:entries (
<#turtle-syntax-bad-blank-label-dot-end>
<#turtle-syntax-bad-ln-dash-start>
+ <#turtle-syntax-bad-ln-escape-start>
<#turtle-syntax-bad-ln-escape>
+ <#turtle-syntax-bad-missing-ns-dot-end>
+ <#turtle-syntax-bad-missing-ns-dot-start>
<#turtle-syntax-bad-ns-dot-end>
<#turtle-syntax-bad-ns-dot-start>
- <#turtle-syntax-bad-ns-missing-dot-end>
- <#turtle-syntax-bad-ns-missing-dot-start>
+ <#turtle-syntax-bad-number-dot-in-anon>
<#turtle-syntax-blank-label>
+ <#turtle-syntax-ln-colons>
+ <#turtle-syntax-ln-dots>
<#turtle-syntax-ns-dots>
) .
@@ -61,18 +65,30 @@
mf:name "turtle-syntax-bad-ns-dot-start" ;
mf:action <turtle-syntax-bad-ns-dot-start.ttl> .
-<#turtle-syntax-bad-ns-missing-dot-end>
+<#turtle-syntax-bad-missing-ns-dot-end>
rdf:type rdft:TestTurtleNegativeSyntax ;
rdfs:comment "Prefix must not end in dot (error in triple, not prefix directive like turtle-syntax-bad-ns-dot-end)" ;
mf:name "turtle-syntax-bad-missing-ns-dot-end" ;
mf:action <turtle-syntax-bad-missing-ns-dot-end.ttl> .
-<#turtle-syntax-bad-ns-missing-dot-start>
+<#turtle-syntax-bad-missing-ns-dot-start>
rdf:type rdft:TestTurtleNegativeSyntax ;
rdfs:comment "Prefix must not start with dot (error in triple, not prefix directive like turtle-syntax-bad-ns-dot-end)" ;
mf:name "turtle-syntax-bad-missing-ns-dot-start" ;
mf:action <turtle-syntax-bad-missing-ns-dot-start.ttl> .
+<#turtle-syntax-ln-dots>
+ rdf:type rdft:TestTurtlePositiveSyntax ;
+ rdfs:comment "Dots in pname local names" ;
+ mf:name "turtle-syntax-ln-dots" ;
+ mf:action <turtle-syntax-ln-dots.ttl> .
+
+<#turtle-syntax-ln-colons>
+ rdf:type rdft:TestTurtlePositiveSyntax ;
+ rdfs:comment "Colons in pname local names" ;
+ mf:name "turtle-syntax-ln-colons" ;
+ mf:action <turtle-syntax-ln-colons.ttl> .
+
<#turtle-syntax-ns-dots>
rdf:type rdft:TestTurtlePositiveSyntax ;
rdfs:comment "Dots in namespace names" ;
diff --git a/tests/extra/turtle-syntax-bad-missing-ns-dot-end.ttl b/tests/extra/turtle-syntax-bad-missing-ns-dot-end.ttl
index 4a593eb7..d06f4e69 100644
--- a/tests/extra/turtle-syntax-bad-missing-ns-dot-end.ttl
+++ b/tests/extra/turtle-syntax-bad-missing-ns-dot-end.ttl
@@ -1 +1 @@
-undefined.:s undefined.:p undefined.:o .
+valid:s valid:p invalid.:o .
diff --git a/tests/extra/turtle-syntax-ln-colons.nt b/tests/extra/turtle-syntax-ln-colons.nt
new file mode 100644
index 00000000..9b3c223f
--- /dev/null
+++ b/tests/extra/turtle-syntax-ln-colons.nt
@@ -0,0 +1,5 @@
+<http://example/s:1> <http://example/p:1> <http://example/o:1> .
+<http://example/s::2> <http://example/p::2> <http://example/o::2> .
+<http://example/3:s> <http://example/3:p> <http://example/3> .
+<http://example/:s> <http://example/:p> <http://example/:o> .
+<http://example/:s:> <http://example/:p:> <http://example/:o:> .
diff --git a/tests/extra/turtle-syntax-ln-colons.ttl b/tests/extra/turtle-syntax-ln-colons.ttl
new file mode 100644
index 00000000..397989db
--- /dev/null
+++ b/tests/extra/turtle-syntax-ln-colons.ttl
@@ -0,0 +1,6 @@
+@prefix : <http://example/> .
+:s:1 :p:1 :o:1 .
+:s::2 :p::2 :o::2 .
+:3:s :3:p :3 .
+::s ::p ::o .
+::s: ::p: ::o: .
diff --git a/tests/extra/turtle-syntax-ln-dots.nt b/tests/extra/turtle-syntax-ln-dots.nt
new file mode 100644
index 00000000..d24052bb
--- /dev/null
+++ b/tests/extra/turtle-syntax-ln-dots.nt
@@ -0,0 +1,3 @@
+<http://example/s.1> <http://example/p.1> <http://example/o.1> .
+<http://example/s..2> <http://example/p..2> <http://example/o..2> .
+<http://example/3.s> <http://example/3.p> <http://example/3> .
diff --git a/tests/extra/turtle-syntax-ln-dots.ttl b/tests/extra/turtle-syntax-ln-dots.ttl
new file mode 100644
index 00000000..230381a1
--- /dev/null
+++ b/tests/extra/turtle-syntax-ln-dots.ttl
@@ -0,0 +1,4 @@
+@prefix : <http://example/> .
+:s.1 :p.1 :o.1 .
+:s..2 :p..2 :o..2.
+:3.s :3.p :3.