aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Robillard <d@drobilla.net>2021-06-29 18:33:55 -0400
committerDavid Robillard <d@drobilla.net>2022-01-14 19:37:51 -0500
commit72d8d4af247fed78657806fca82d72f34f12d1ef (patch)
tree8c3e87191bd1b02476e011277cebb0fc2f49af3b
parent8974148437e9c98bc715a96a0cdeef8f7a9726f2 (diff)
downloadserd-72d8d4af247fed78657806fca82d72f34f12d1ef.tar.gz
serd-72d8d4af247fed78657806fca82d72f34f12d1ef.tar.bz2
serd-72d8d4af247fed78657806fca82d72f34f12d1ef.zip
Reduce complexity of Turtle object reading function
-rw-r--r--src/n3.c129
1 files changed, 73 insertions, 56 deletions
diff --git a/src/n3.c b/src/n3.c
index c2ed8f72..0381def8 100644
--- a/src/n3.c
+++ b/src/n3.c
@@ -674,22 +674,69 @@ read_anon(SerdReader* const reader,
return eat_byte_check(reader, ']');
}
-/* If emit is true: recurses, calling statement_sink for every statement
- encountered, and leaves stack in original calling state (i.e. pops
- everything it pushes). */
+// Read a "named" object: a boolean literal or a prefixed name
static SerdStatus
-read_object(SerdReader* const reader,
- ReadContext* const ctx,
- const bool emit,
- bool* const ate_dot)
+read_named_object(SerdReader* const reader,
+ SerdNode** const dest,
+ bool* const ate_dot)
{
static const char* const XSD_BOOLEAN = NS_XSD "boolean";
static const size_t XSD_BOOLEAN_LEN = 40;
+ /* This function deals with nodes that start with some letters. Unlike
+ everything else, the cases here aren't nicely distinguished by leading
+ characters, so this is more tedious to deal with in a non-tokenizing
+ parser like this one.
+
+ Deal with this here by first reading the prefix into a tentative node. If
+ it turns out to be "true" or "false", switch it to a boolean literal after
+ the fact. */
+
+ if (!(*dest = push_node(reader, SERD_URI, "", 0))) {
+ return SERD_ERR_OVERFLOW;
+ }
+
+ const size_t string_start_offset = reader->stack.size;
+ SerdNode* const node = *dest;
+ SerdStatus st = SERD_SUCCESS;
+ while (!(st = read_PN_CHARS_BASE(reader, node))) {
+ }
+
+ if (st > SERD_FAILURE) {
+ return st;
+ }
+
+ if ((node->length == 4 && !memcmp(serd_node_string(node), "true", 4)) ||
+ (node->length == 5 && !memcmp(serd_node_string(node), "false", 5))) {
+ node->flags |= SERD_HAS_DATATYPE;
+ node->type = SERD_LITERAL;
+ return push_node(reader, SERD_URI, XSD_BOOLEAN, XSD_BOOLEAN_LEN)
+ ? SERD_SUCCESS
+ : SERD_ERR_OVERFLOW;
+ }
+
+ if ((st = read_PN_PREFIX_tail(reader, node)) > SERD_FAILURE ||
+ (st = read_PrefixedName(
+ reader, node, false, ate_dot, string_start_offset))) {
+ st = (st > SERD_FAILURE) ? st : SERD_ERR_BAD_SYNTAX;
+ return r_err(reader, st, "expected prefixed name");
+ }
+
+ return SERD_SUCCESS;
+}
+
+// Read an object and emit statements, possibly recursively
+static SerdStatus
+read_object(SerdReader* const reader,
+ ReadContext* const ctx,
+ bool* const ate_dot)
+{
const size_t orig_stack_size = reader->stack.size;
- SerdStatus ret = SERD_FAILURE;
- bool simple = (ctx->subject != 0);
+ assert(ctx->subject);
+
+ SerdStatus st = SERD_FAILURE;
+ bool simple = true;
SerdNode* o = 0;
const int c = peek_byte(reader);
@@ -699,24 +746,24 @@ read_object(SerdReader* const reader,
return r_err(reader, SERD_ERR_BAD_SYNTAX, "expected object");
case '$':
case '?':
- ret = read_Var(reader, &o);
+ st = read_Var(reader, &o);
break;
case '[':
simple = false;
- ret = read_anon(reader, *ctx, false, &o);
+ st = read_anon(reader, *ctx, false, &o);
break;
case '(':
simple = false;
- ret = read_collection(reader, *ctx, &o);
+ st = read_collection(reader, *ctx, &o);
break;
case '_':
- ret = read_BLANK_NODE_LABEL(reader, &o, ate_dot);
+ st = read_BLANK_NODE_LABEL(reader, &o, ate_dot);
break;
case '<':
- ret = read_IRIREF(reader, &o);
+ st = read_IRIREF(reader, &o);
break;
case ':':
- ret = read_iri(reader, &o, ate_dot);
+ st = read_iri(reader, &o, ate_dot);
break;
case '+':
case '-':
@@ -731,66 +778,36 @@ read_object(SerdReader* const reader,
case '7':
case '8':
case '9':
- ret = read_number(reader, &o, ate_dot);
+ st = read_number(reader, &o, ate_dot);
break;
case '\"':
case '\'':
- ret = read_literal(reader, &o, ate_dot);
+ st = read_literal(reader, &o, ate_dot);
break;
- default: {
- /* Either a boolean literal, or a qname. Read the prefix first, and if
- it is in fact a "true" or "false" literal, produce that instead.
- */
- if (!(o = push_node(reader, SERD_URI, "", 0))) {
- return SERD_ERR_OVERFLOW;
- }
-
- const size_t string_start_offset = reader->stack.size;
- while (!(ret = read_PN_CHARS_BASE(reader, o))) {
- }
-
- if (ret > SERD_FAILURE) {
- return ret;
- }
-
- if ((o->length == 4 && !memcmp(serd_node_string(o), "true", 4)) ||
- (o->length == 5 && !memcmp(serd_node_string(o), "false", 5))) {
- o->flags |= SERD_HAS_DATATYPE;
- o->type = SERD_LITERAL;
- if (!(push_node(reader, SERD_URI, XSD_BOOLEAN, XSD_BOOLEAN_LEN))) {
- ret = SERD_ERR_OVERFLOW;
- } else {
- ret = SERD_SUCCESS;
- }
- } else if ((ret = read_PN_PREFIX_tail(reader, o)) > SERD_FAILURE ||
- (ret = read_PrefixedName(
- reader, o, false, ate_dot, string_start_offset))) {
- ret = (ret > SERD_FAILURE) ? ret : SERD_ERR_BAD_SYNTAX;
- return r_err(reader, ret, "expected prefixed name");
- }
- }
+ default:
+ // Either a boolean literal or a prefixed name
+ st = read_named_object(reader, &o, ate_dot);
}
- ctx->object = o;
- if (!ret && emit && simple && o) {
- ret = emit_statement(reader, *ctx, o);
+ if (!st && simple && o) {
+ st = emit_statement(reader, *ctx, o);
}
serd_stack_pop_to(&reader->stack, orig_stack_size);
#ifndef NDEBUG
assert(reader->stack.size == orig_stack_size);
#endif
- return ret;
+ return st;
}
static SerdStatus
read_objectList(SerdReader* const reader, ReadContext ctx, bool* const ate_dot)
{
SerdStatus st = SERD_SUCCESS;
- TRY(st, read_object(reader, &ctx, true, ate_dot));
+ TRY(st, read_object(reader, &ctx, ate_dot));
while (st <= SERD_FAILURE && !*ate_dot && eat_delim(reader, ',')) {
- st = read_object(reader, &ctx, true, ate_dot);
+ st = read_object(reader, &ctx, ate_dot);
}
return st;
@@ -890,7 +907,7 @@ read_collection(SerdReader* const reader,
// _:node rdf:first object
ctx.predicate = reader->rdf_first;
bool ate_dot = false;
- if ((st = read_object(reader, &ctx, true, &ate_dot)) || ate_dot) {
+ if ((st = read_object(reader, &ctx, &ate_dot)) || ate_dot) {
return end_collection(reader, st);
}