diff options
author | David Robillard <d@drobilla.net> | 2021-06-29 18:33:55 -0400 |
---|---|---|
committer | David Robillard <d@drobilla.net> | 2022-01-14 19:37:51 -0500 |
commit | 72d8d4af247fed78657806fca82d72f34f12d1ef (patch) | |
tree | 8c3e87191bd1b02476e011277cebb0fc2f49af3b /src | |
parent | 8974148437e9c98bc715a96a0cdeef8f7a9726f2 (diff) | |
download | serd-72d8d4af247fed78657806fca82d72f34f12d1ef.tar.gz serd-72d8d4af247fed78657806fca82d72f34f12d1ef.tar.bz2 serd-72d8d4af247fed78657806fca82d72f34f12d1ef.zip |
Reduce complexity of Turtle object reading function
Diffstat (limited to 'src')
-rw-r--r-- | src/n3.c | 129 |
1 files changed, 73 insertions, 56 deletions
@@ -674,22 +674,69 @@ read_anon(SerdReader* const reader, return eat_byte_check(reader, ']'); } -/* If emit is true: recurses, calling statement_sink for every statement - encountered, and leaves stack in original calling state (i.e. pops - everything it pushes). */ +// Read a "named" object: a boolean literal or a prefixed name static SerdStatus -read_object(SerdReader* const reader, - ReadContext* const ctx, - const bool emit, - bool* const ate_dot) +read_named_object(SerdReader* const reader, + SerdNode** const dest, + bool* const ate_dot) { static const char* const XSD_BOOLEAN = NS_XSD "boolean"; static const size_t XSD_BOOLEAN_LEN = 40; + /* This function deals with nodes that start with some letters. Unlike + everything else, the cases here aren't nicely distinguished by leading + characters, so this is more tedious to deal with in a non-tokenizing + parser like this one. + + Deal with this here by first reading the prefix into a tentative node. If + it turns out to be "true" or "false", switch it to a boolean literal after + the fact. */ + + if (!(*dest = push_node(reader, SERD_URI, "", 0))) { + return SERD_ERR_OVERFLOW; + } + + const size_t string_start_offset = reader->stack.size; + SerdNode* const node = *dest; + SerdStatus st = SERD_SUCCESS; + while (!(st = read_PN_CHARS_BASE(reader, node))) { + } + + if (st > SERD_FAILURE) { + return st; + } + + if ((node->length == 4 && !memcmp(serd_node_string(node), "true", 4)) || + (node->length == 5 && !memcmp(serd_node_string(node), "false", 5))) { + node->flags |= SERD_HAS_DATATYPE; + node->type = SERD_LITERAL; + return push_node(reader, SERD_URI, XSD_BOOLEAN, XSD_BOOLEAN_LEN) + ? SERD_SUCCESS + : SERD_ERR_OVERFLOW; + } + + if ((st = read_PN_PREFIX_tail(reader, node)) > SERD_FAILURE || + (st = read_PrefixedName( + reader, node, false, ate_dot, string_start_offset))) { + st = (st > SERD_FAILURE) ? st : SERD_ERR_BAD_SYNTAX; + return r_err(reader, st, "expected prefixed name"); + } + + return SERD_SUCCESS; +} + +// Read an object and emit statements, possibly recursively +static SerdStatus +read_object(SerdReader* const reader, + ReadContext* const ctx, + bool* const ate_dot) +{ const size_t orig_stack_size = reader->stack.size; - SerdStatus ret = SERD_FAILURE; - bool simple = (ctx->subject != 0); + assert(ctx->subject); + + SerdStatus st = SERD_FAILURE; + bool simple = true; SerdNode* o = 0; const int c = peek_byte(reader); @@ -699,24 +746,24 @@ read_object(SerdReader* const reader, return r_err(reader, SERD_ERR_BAD_SYNTAX, "expected object"); case '$': case '?': - ret = read_Var(reader, &o); + st = read_Var(reader, &o); break; case '[': simple = false; - ret = read_anon(reader, *ctx, false, &o); + st = read_anon(reader, *ctx, false, &o); break; case '(': simple = false; - ret = read_collection(reader, *ctx, &o); + st = read_collection(reader, *ctx, &o); break; case '_': - ret = read_BLANK_NODE_LABEL(reader, &o, ate_dot); + st = read_BLANK_NODE_LABEL(reader, &o, ate_dot); break; case '<': - ret = read_IRIREF(reader, &o); + st = read_IRIREF(reader, &o); break; case ':': - ret = read_iri(reader, &o, ate_dot); + st = read_iri(reader, &o, ate_dot); break; case '+': case '-': @@ -731,66 +778,36 @@ read_object(SerdReader* const reader, case '7': case '8': case '9': - ret = read_number(reader, &o, ate_dot); + st = read_number(reader, &o, ate_dot); break; case '\"': case '\'': - ret = read_literal(reader, &o, ate_dot); + st = read_literal(reader, &o, ate_dot); break; - default: { - /* Either a boolean literal, or a qname. Read the prefix first, and if - it is in fact a "true" or "false" literal, produce that instead. - */ - if (!(o = push_node(reader, SERD_URI, "", 0))) { - return SERD_ERR_OVERFLOW; - } - - const size_t string_start_offset = reader->stack.size; - while (!(ret = read_PN_CHARS_BASE(reader, o))) { - } - - if (ret > SERD_FAILURE) { - return ret; - } - - if ((o->length == 4 && !memcmp(serd_node_string(o), "true", 4)) || - (o->length == 5 && !memcmp(serd_node_string(o), "false", 5))) { - o->flags |= SERD_HAS_DATATYPE; - o->type = SERD_LITERAL; - if (!(push_node(reader, SERD_URI, XSD_BOOLEAN, XSD_BOOLEAN_LEN))) { - ret = SERD_ERR_OVERFLOW; - } else { - ret = SERD_SUCCESS; - } - } else if ((ret = read_PN_PREFIX_tail(reader, o)) > SERD_FAILURE || - (ret = read_PrefixedName( - reader, o, false, ate_dot, string_start_offset))) { - ret = (ret > SERD_FAILURE) ? ret : SERD_ERR_BAD_SYNTAX; - return r_err(reader, ret, "expected prefixed name"); - } - } + default: + // Either a boolean literal or a prefixed name + st = read_named_object(reader, &o, ate_dot); } - ctx->object = o; - if (!ret && emit && simple && o) { - ret = emit_statement(reader, *ctx, o); + if (!st && simple && o) { + st = emit_statement(reader, *ctx, o); } serd_stack_pop_to(&reader->stack, orig_stack_size); #ifndef NDEBUG assert(reader->stack.size == orig_stack_size); #endif - return ret; + return st; } static SerdStatus read_objectList(SerdReader* const reader, ReadContext ctx, bool* const ate_dot) { SerdStatus st = SERD_SUCCESS; - TRY(st, read_object(reader, &ctx, true, ate_dot)); + TRY(st, read_object(reader, &ctx, ate_dot)); while (st <= SERD_FAILURE && !*ate_dot && eat_delim(reader, ',')) { - st = read_object(reader, &ctx, true, ate_dot); + st = read_object(reader, &ctx, ate_dot); } return st; @@ -890,7 +907,7 @@ read_collection(SerdReader* const reader, // _:node rdf:first object ctx.predicate = reader->rdf_first; bool ate_dot = false; - if ((st = read_object(reader, &ctx, true, &ate_dot)) || ate_dot) { + if ((st = read_object(reader, &ctx, &ate_dot)) || ate_dot) { return end_collection(reader, st); } |