diff options
author | David Robillard <d@drobilla.net> | 2023-12-01 20:39:44 -0500 |
---|---|---|
committer | David Robillard <d@drobilla.net> | 2023-12-02 18:49:08 -0500 |
commit | 02d56e83931e53e1cde57247c64d56fda3804f77 (patch) | |
tree | 2d1ac467bc56f4f4f3570497427be32d7e36bd1a /src/read_turtle.c | |
parent | d094448c095a59117febc8bd4687df071ce9759a (diff) | |
download | serd-02d56e83931e53e1cde57247c64d56fda3804f77.tar.gz serd-02d56e83931e53e1cde57247c64d56fda3804f77.tar.bz2 serd-02d56e83931e53e1cde57247c64d56fda3804f77.zip |
[WIP] Tighten up reader node management
[WIP] Broken on 32-bit
This makes the reader stack manipulations stricter, to make the code more
regular and avoid redundant work and bad cache activity. Now, functions that
push node headers and their bodies are responsible for (more or less)
immediately pushing any trailing null bytes required for termination and
alignment.
This makes the writes to the node in the stack more local, ensures nodes are
terminated as early as possible (to reduce the risk of using non-terminated
strings), and avoids the need to calculate aligned stack allocations.
Diffstat (limited to 'src/read_turtle.c')
-rw-r--r-- | src/read_turtle.c | 107 |
1 files changed, 58 insertions, 49 deletions
diff --git a/src/read_turtle.c b/src/read_turtle.c index c6982327..a041e873 100644 --- a/src/read_turtle.c +++ b/src/read_turtle.c @@ -2,7 +2,8 @@ // SPDX-License-Identifier: ISC #include "read_turtle.h" -#include "byte_source.h" + +#include "caret.h" #include "env.h" #include "namespaces.h" #include "node.h" @@ -14,7 +15,6 @@ #include "try.h" #include "turtle.h" -#include "serd/caret.h" #include "serd/env.h" #include "serd/node.h" #include "serd/reader.h" @@ -49,12 +49,14 @@ read_predicateObjectList(SerdReader* reader, ReadContext ctx, bool* ate_dot); static SerdStatus read_whitespace(SerdReader* const reader) { - switch (peek_byte(reader)) { + const int c = peek_byte(reader); + + switch (c) { case '\t': case '\n': case '\r': case ' ': - return serd_byte_source_advance(reader->source); + return skip_byte(reader, c); case '#': return read_comment(reader); default: @@ -338,7 +340,7 @@ resolve_IRIREF(SerdReader* const reader, } // Push a new temporary node for constructing the resolved URI - SerdNode* const temp = push_node(reader, SERD_URI, "", 0); + SerdNode* const temp = push_node_head(reader, SERD_URI); if (!temp) { return SERD_BAD_STACK; } @@ -346,10 +348,12 @@ resolve_IRIREF(SerdReader* const reader, // Write resolved URI to the temporary node WriteNodeContext ctx = {reader, temp, SERD_SUCCESS}; temp->length = serd_write_uri(uri, write_to_stack, &ctx); + if (!ctx.status) { // Replace the destination with the new expanded node memmove(dest, temp, sizeof(SerdNode) + serd_node_pad_length(temp->length)); serd_stack_pop_to(&reader->stack, string_start_offset + dest->length); + TRY(ctx.status, push_node_tail(reader)); } return ctx.status; @@ -361,7 +365,7 @@ read_IRIREF(SerdReader* const reader, SerdNode** const dest) SerdStatus st = SERD_SUCCESS; TRY(st, eat_byte_check(reader, '<')); - if (!(*dest = push_node(reader, SERD_URI, "", 0))) { + if (!(*dest = push_node_head(reader, SERD_URI))) { return SERD_BAD_STACK; } @@ -372,8 +376,10 @@ read_IRIREF(SerdReader* const reader, SerdNode** const dest) return st; } + TRY(st, push_node_tail(reader)); + return (reader->flags & SERD_READ_RELATIVE) - ? SERD_SUCCESS + ? st : resolve_IRIREF(reader, *dest, string_start_offset); } @@ -411,7 +417,7 @@ read_PrefixedName(SerdReader* const reader, return st; } - return SERD_SUCCESS; + return push_node_tail(reader); } static SerdStatus @@ -439,12 +445,11 @@ read_number(SerdReader* const reader, #define XSD_DOUBLE NS_XSD "double" #define XSD_INTEGER NS_XSD "integer" - *dest = push_node(reader, SERD_LITERAL, "", 0); - SerdStatus st = SERD_SUCCESS; int c = peek_byte(reader); bool has_decimal = false; - if (!*dest) { + + if (!(*dest = push_node_head(reader, SERD_LITERAL))) { return SERD_BAD_STACK; } @@ -467,8 +472,8 @@ read_number(SerdReader* const reader, skip_byte(reader, c); c = peek_byte(reader); if (!is_digit(c) && c != 'e' && c != 'E') { - *ate_dot = true; // Force caller to deal with stupid grammar - return SERD_SUCCESS; // Next byte is not a number character + *ate_dot = true; // Force caller to deal with stupid grammar + return push_node_tail(reader); // Next byte is not a number character } TRY(st, push_byte(reader, *dest, '.')); @@ -491,12 +496,13 @@ read_number(SerdReader* const reader, break; } TRY(st, read_0_9(reader, *dest, true)); + TRY(st, push_node_tail(reader)); meta = push_node(reader, SERD_URI, XSD_DOUBLE, sizeof(XSD_DOUBLE) - 1); - (*dest)->flags |= SERD_HAS_DATATYPE; } else if (has_decimal) { + TRY(st, push_node_tail(reader)); meta = push_node(reader, SERD_URI, XSD_DECIMAL, sizeof(XSD_DECIMAL) - 1); - (*dest)->flags |= SERD_HAS_DATATYPE; } else { + TRY(st, push_node_tail(reader)); meta = push_node(reader, SERD_URI, XSD_INTEGER, sizeof(XSD_INTEGER) - 1); } @@ -513,11 +519,14 @@ read_turtle_iri(SerdReader* const reader, return read_IRIREF(reader, dest); } - if (!(*dest = push_node(reader, SERD_LITERAL, "", 0))) { + if (!(*dest = push_node_head(reader, SERD_LITERAL))) { return SERD_BAD_STACK; } - return read_PrefixedName(reader, *dest, true, ate_dot, reader->stack.size); + const SerdStatus st = + read_PrefixedName(reader, *dest, true, ate_dot, reader->stack.size); + + return st; } static SerdStatus @@ -525,14 +534,14 @@ read_literal(SerdReader* const reader, SerdNode** const dest, bool* const ate_dot) { - if (!(*dest = push_node(reader, SERD_LITERAL, "", 0))) { + SerdStatus st = SERD_SUCCESS; + + if (!(*dest = push_node_head(reader, SERD_LITERAL))) { return SERD_BAD_STACK; } - SerdStatus st = read_String(reader, *dest); - if (st) { - return st; - } + TRY(st, read_String(reader, *dest)); + TRY(st, push_node_tail(reader)); SerdNode* datatype = NULL; switch (peek_byte(reader)) { @@ -546,9 +555,11 @@ read_literal(SerdReader* const reader, TRY(st, eat_byte_check(reader, '^')); (*dest)->flags |= SERD_HAS_DATATYPE; TRY(st, read_turtle_iri(reader, &datatype, ate_dot)); + assert(datatype == serd_node_meta_c(*dest)); break; } - return SERD_SUCCESS; + + return st; } static SerdStatus @@ -567,7 +578,7 @@ read_verb(SerdReader* reader, SerdNode** const dest) /* Either a qname, or "a". Read the prefix first, and if it is in fact "a", produce that instead. */ - if (!(*dest = push_node(reader, SERD_URI, "", 0))) { + if (!(*dest = push_node_head(reader, SERD_URI))) { return SERD_BAD_STACK; } @@ -593,7 +604,7 @@ read_verb(SerdReader* reader, SerdNode** const dest) reader, st > SERD_FAILURE ? st : SERD_BAD_SYNTAX, "expected verb"); } - return SERD_SUCCESS; + return SERD_SUCCESS; // push_node_tail(reader); } static SerdStatus @@ -670,7 +681,7 @@ read_named_object(SerdReader* const reader, Deal with this here by trying to read a prefixed node, then if it turns out to actually be "true" or "false", switch it to a boolean literal. */ - if (!(*dest = push_node(reader, SERD_URI, "", 0))) { + if (!(*dest = push_node_head(reader, SERD_URI))) { return SERD_BAD_STACK; } @@ -685,6 +696,7 @@ read_named_object(SerdReader* const reader, node_has_string(node, false_string))) { node->flags = SERD_HAS_DATATYPE; node->type = SERD_LITERAL; + TRY(st, push_node_tail(reader)); return push_node(reader, SERD_URI, XSD_BOOLEAN, XSD_BOOLEAN_LEN) ? SERD_SUCCESS : SERD_BAD_STACK; @@ -696,7 +708,7 @@ read_named_object(SerdReader* const reader, return r_err(reader, st, "expected prefixed name or boolean"); } - return SERD_SUCCESS; + return SERD_SUCCESS; // push_node_tail(reader); } // Read an object and emit statements, possibly recursively @@ -705,8 +717,8 @@ read_object(SerdReader* const reader, ReadContext* const ctx, bool* const ate_dot) { - const size_t orig_stack_size = reader->stack.size; - SerdCaret orig_caret = reader->source->caret; + const size_t orig_stack_size = reader->stack.size; + struct SerdCaretImpl orig_caret = reader->source.caret; assert(ctx->subject); @@ -809,7 +821,7 @@ read_predicateObjectList(SerdReader* const reader, int c = 0; do { read_turtle_ws_star(reader); - switch (c = peek_byte(reader)) { + switch ((c = peek_byte(reader))) { case EOF: serd_stack_pop_to(&reader->stack, orig_stack_size); return r_err(reader, SERD_BAD_SYNTAX, "unexpected end of file"); @@ -869,11 +881,9 @@ read_collection(SerdReader* const reader, /* The order of node allocation here is necessarily not in stack order, so we create two nodes and recycle them throughout. */ - SerdNode* n1 = - push_node_padded(reader, genid_length(reader), SERD_BLANK, "", 0); - + SerdNode* n1 = push_node_padding(reader, SERD_BLANK, genid_length(reader)); SerdNode* node = n1; - SerdNode* rest = 0; + SerdNode* rest = NULL; if (!n1) { return SERD_BAD_STACK; @@ -985,7 +995,6 @@ read_turtle_base(SerdReader* const reader, const bool sparql, const bool token) return SERD_BAD_STACK; } - serd_node_zero_pad(uri); TRY(st, serd_env_set_base_uri(reader->env, serd_node_string_view(uri))); TRY(st, serd_sink_write_base(reader->sink, uri)); @@ -1012,25 +1021,22 @@ read_turtle_prefixID(SerdReader* const reader, } read_turtle_ws_star(reader); - SerdNode* name = push_node(reader, SERD_LITERAL, "", 0); + + SerdNode* const name = push_node_head(reader, SERD_LITERAL); if (!name) { return SERD_BAD_STACK; } + // Read (possibly empty) name node TRY_LAX(st, read_PN_PREFIX(reader, name)); + TRY(st, push_node_tail(reader)); TRY(st, eat_byte_check(reader, ':')); read_turtle_ws_star(reader); + // Read URI node SerdNode* uri = NULL; TRY(st, read_IRIREF(reader, &uri)); - if (reader->stack.size + sizeof(SerdNode) > reader->stack.buf_size) { - return SERD_BAD_STACK; - } - - serd_node_zero_pad(name); - serd_node_zero_pad(uri); - TRY(st, serd_env_set_prefix( reader->env, serd_node_string_view(name), serd_node_string_view(uri))); @@ -1064,11 +1070,15 @@ read_turtle_directive(SerdReader* const reader) static SerdStatus read_sparql_directive(SerdReader* const reader, const SerdNode* const token) { + SerdStatus st = SERD_SUCCESS; + if (!tokcmp(token, "base", 4)) { + TRY(st, push_node_tail(reader)); return read_turtle_base(reader, true, false); } if (!tokcmp(token, "prefix", 6)) { + TRY(st, push_node_tail(reader)); return read_turtle_prefixID(reader, true, false); } @@ -1078,11 +1088,10 @@ read_sparql_directive(SerdReader* const reader, const SerdNode* const token) static SerdStatus read_block(SerdReader* const reader, ReadContext* const ctx) { - SerdStatus st = SERD_SUCCESS; - // Try to read a subject, though it may actually be a directive or graph name - SerdNode* token = NULL; - int s_type = 0; + SerdNode* token = NULL; + SerdStatus st = SERD_SUCCESS; + int s_type = 0; TRY_LAX(st, read_turtle_subject(reader, *ctx, &token, &s_type)); // Try to interpret as a SPARQL "PREFIX" or "BASE" directive @@ -1121,7 +1130,7 @@ read_turtle_statement(SerdReader* const reader) return SERD_FAILURE; case '\0': - eat_byte(reader); + skip_byte(reader, '\0'); return SERD_FAILURE; case '@': @@ -1138,7 +1147,7 @@ read_turtle_statement(SerdReader* const reader) SerdStatus read_turtleDoc(SerdReader* const reader) { - while (!reader->source->eof) { + while (!reader->source.eof) { const size_t orig_stack_size = reader->stack.size; const SerdStatus st = read_turtle_statement(reader); |