diff options
author | David Robillard <d@drobilla.net> | 2021-06-29 18:33:55 -0400 |
---|---|---|
committer | David Robillard <d@drobilla.net> | 2023-12-02 18:49:07 -0500 |
commit | 045751da92836b31548661c48f125f229067b84f (patch) | |
tree | 69aed6fa569f83aec68238d7fa4824cca5a315ad /src/n3.c | |
parent | 13892da5556b962d749b42c851b29237e380e36c (diff) | |
download | serd-045751da92836b31548661c48f125f229067b84f.tar.gz serd-045751da92836b31548661c48f125f229067b84f.tar.bz2 serd-045751da92836b31548661c48f125f229067b84f.zip |
Reduce complexity of Turtle object reading code
Diffstat (limited to 'src/n3.c')
-rw-r--r-- | src/n3.c | 107 |
1 files changed, 66 insertions, 41 deletions
@@ -17,6 +17,7 @@ #include "serd/sink.h" #include "serd/statement.h" #include "serd/status.h" +#include "serd/string_view.h" #include "serd/syntax.h" #include <assert.h> @@ -534,22 +535,73 @@ read_anon(SerdReader* const reader, return st > SERD_FAILURE ? st : eat_byte_check(reader, ']'); } -/* If emit is true: recurses, calling statement_sink for every statement - encountered, and leaves stack in original calling state (i.e. pops - everything it pushes). */ +static bool +node_has_string(const SerdNode* const node, const SerdStringView string) +{ + return node->length == string.length && + !memcmp(serd_node_string(node), string.data, string.length); +} + +// Read a "named" object: a boolean literal or a prefixed name +static SerdStatus +read_named_object(SerdReader* const reader, + SerdNode** const dest, + bool* const ate_dot) +{ + static const char* const XSD_BOOLEAN = NS_XSD "boolean"; + static const size_t XSD_BOOLEAN_LEN = 40; + static const SerdStringView true_string = {"true", 4U}; + static const SerdStringView false_string = {"false", 5U}; + + /* This function deals with nodes that start with some letters. Unlike + everything else, the cases here aren't nicely distinguished by leading + characters, so this is more tedious to deal with in a non-tokenizing + parser like this one. + + Deal with this here by trying to read a prefixed node, then if it turns + out to actually be "true" or "false", switch it to a boolean literal. */ + + if (!(*dest = push_node(reader, SERD_CURIE, "", 0))) { + return SERD_BAD_STACK; + } + + SerdNode* node = *dest; + SerdStatus st = SERD_SUCCESS; + + // Attempt to read a prefixed name + st = read_PrefixedName(reader, node, true, ate_dot); + + // Check if this is actually a special boolean node + if (st == SERD_FAILURE && (node_has_string(node, true_string) || + node_has_string(node, false_string))) { + node->flags = SERD_HAS_DATATYPE; + node->type = SERD_LITERAL; + return push_node(reader, SERD_URI, XSD_BOOLEAN, XSD_BOOLEAN_LEN) + ? SERD_SUCCESS + : SERD_BAD_STACK; + } + + // Any other failure is a syntax error + if (st) { + st = st > SERD_FAILURE ? st : SERD_BAD_SYNTAX; + return r_err(reader, st, "expected prefixed name or boolean"); + } + + return SERD_SUCCESS; +} + +// Read an object and emit statements, possibly recursively static SerdStatus read_object(SerdReader* const reader, ReadContext* const ctx, - const bool emit, bool* const ate_dot) { - static const char* const XSD_BOOLEAN = NS_XSD "boolean"; - static const size_t XSD_BOOLEAN_LEN = 40; - const size_t orig_stack_size = reader->stack.size; + assert(ctx->subject); + SerdStatus st = SERD_FAILURE; - bool simple = (ctx->subject != 0); + bool simple = true; SerdNode* o = 0; const int c = peek_byte(reader); @@ -594,38 +646,11 @@ read_object(SerdReader* const reader, st = read_literal(reader, &o, ate_dot); break; default: - /* Either a boolean literal, or a qname. Read the prefix first, and if - it is in fact a "true" or "false" literal, produce that instead. - */ - if (!(o = push_node(reader, SERD_CURIE, "", 0))) { - return SERD_BAD_STACK; - } - - while (!(st = read_PN_CHARS_BASE(reader, o))) { - } - - if (st > SERD_FAILURE) { - return st; - } - - if ((o->length == 4 && !memcmp(serd_node_string(o), "true", 4)) || - (o->length == 5 && !memcmp(serd_node_string(o), "false", 5))) { - o->flags |= SERD_HAS_DATATYPE; - o->type = SERD_LITERAL; - if (!(push_node(reader, SERD_URI, XSD_BOOLEAN, XSD_BOOLEAN_LEN))) { - st = SERD_BAD_STACK; - } else { - st = SERD_SUCCESS; - } - } else if ((st = read_PN_PREFIX_tail(reader, o)) > SERD_FAILURE || - (st = read_PrefixedName(reader, o, false, ate_dot))) { - st = (st > SERD_FAILURE) ? st : SERD_BAD_SYNTAX; - return r_err(reader, st, "expected prefixed name"); - } + // Either a boolean literal or a prefixed name + st = read_named_object(reader, &o, ate_dot); } - ctx->object = o; - if (!st && emit && simple && o) { + if (!st && simple && o) { st = emit_statement(reader, *ctx, o); } @@ -640,10 +665,10 @@ static SerdStatus read_objectList(SerdReader* const reader, ReadContext ctx, bool* const ate_dot) { SerdStatus st = SERD_SUCCESS; - TRY(st, read_object(reader, &ctx, true, ate_dot)); + TRY(st, read_object(reader, &ctx, ate_dot)); while (st <= SERD_FAILURE && !*ate_dot && eat_delim(reader, ',')) { - st = read_object(reader, &ctx, true, ate_dot); + st = read_object(reader, &ctx, ate_dot); } return st; @@ -743,7 +768,7 @@ read_collection(SerdReader* const reader, // _:node rdf:first object ctx.predicate = reader->rdf_first; bool ate_dot = false; - if ((st = read_object(reader, &ctx, true, &ate_dot)) || ate_dot) { + if ((st = read_object(reader, &ctx, &ate_dot)) || ate_dot) { return end_collection(reader, st); } |