diff options
author | David Robillard <d@drobilla.net> | 2023-12-01 20:39:44 -0500 |
---|---|---|
committer | David Robillard <d@drobilla.net> | 2023-12-02 18:49:08 -0500 |
commit | 02d56e83931e53e1cde57247c64d56fda3804f77 (patch) | |
tree | 2d1ac467bc56f4f4f3570497427be32d7e36bd1a /src/read_ntriples.c | |
parent | d094448c095a59117febc8bd4687df071ce9759a (diff) | |
download | serd-02d56e83931e53e1cde57247c64d56fda3804f77.tar.gz serd-02d56e83931e53e1cde57247c64d56fda3804f77.tar.bz2 serd-02d56e83931e53e1cde57247c64d56fda3804f77.zip |
[WIP] Tighten up reader node management
[WIP] Broken on 32-bit
This makes the reader stack manipulations stricter, to make the code more
regular and avoid redundant work and bad cache activity. Now, functions that
push node headers and their bodies are responsible for (more or less)
immediately pushing any trailing null bytes required for termination and
alignment.
This makes the writes to the node in the stack more local, ensures nodes are
terminated as early as possible (to reduce the risk of using non-terminated
strings), and avoids the need to calculate aligned stack allocations.
Diffstat (limited to 'src/read_ntriples.c')
-rw-r--r-- | src/read_ntriples.c | 86 |
1 files changed, 50 insertions, 36 deletions
diff --git a/src/read_ntriples.c b/src/read_ntriples.c index 5c02abfe..e5101522 100644 --- a/src/read_ntriples.c +++ b/src/read_ntriples.c @@ -42,23 +42,29 @@ read_LANGTAG(SerdReader* const reader) return r_err(reader, SERD_BAD_SYNTAX, "expected A-Z or a-z"); } - SerdNode* node = push_node(reader, SERD_LITERAL, "", 0); + SerdNode* const node = push_node_head(reader, SERD_LITERAL); if (!node) { return SERD_BAD_STACK; } SerdStatus st = SERD_SUCCESS; - TRY(st, push_byte(reader, node, eat_byte_safe(reader, c))); + TRY(st, skip_byte(reader, c)); + TRY(st, push_byte(reader, node, c)); while ((c = peek_byte(reader)) && is_alpha(c)) { TRY(st, push_byte(reader, node, eat_byte_safe(reader, c))); } while (peek_byte(reader) == '-') { TRY(st, push_byte(reader, node, eat_byte_safe(reader, '-'))); - while ((c = peek_byte(reader)) && (is_alpha(c) || is_digit(c))) { - TRY(st, push_byte(reader, node, eat_byte_safe(reader, c))); + + c = peek_byte(reader); + while (is_alpha(c) || is_digit(c)) { + TRY(st, push_byte(reader, node, c)); + TRY(st, skip_byte(reader, c)); + c = peek_byte(reader); } } - return SERD_SUCCESS; + + return push_node_tail(reader); } static bool @@ -71,13 +77,16 @@ is_EOL(const int c) SerdStatus read_EOL(SerdReader* const reader) { - if (!is_EOL(peek_byte(reader))) { + int c = peek_byte(reader); + + if (!is_EOL(c)) { return r_err(reader, SERD_BAD_SYNTAX, "expected a line ending"); } - while (is_EOL(peek_byte(reader))) { - eat_byte(reader); - } + do { + skip_byte(reader, c); + c = peek_byte(reader); + } while (is_EOL(c)); return SERD_SUCCESS; } @@ -176,7 +185,9 @@ read_IRIREF_suffix(SerdReader* const reader, SerdNode* const node) uint32_t code = 0U; while (st <= SERD_FAILURE) { - const int c = eat_byte(reader); + const int c = peek_byte(reader); + skip_byte(reader, c); + switch (c) { case ' ': case '"': @@ -239,9 +250,10 @@ static SerdStatus read_IRI(SerdReader* const reader, SerdNode** const dest) { SerdStatus st = SERD_SUCCESS; - TRY(st, eat_byte_check(reader, '<')); - if (!(*dest = push_node(reader, SERD_URI, "", 0))) { + TRY(st, skip_byte(reader, '<')); + + if (!(*dest = push_node_head(reader, SERD_URI))) { return SERD_BAD_STACK; } @@ -249,7 +261,8 @@ read_IRI(SerdReader* const reader, SerdNode** const dest) return r_err(reader, st, "expected IRI scheme"); } - return read_IRIREF_suffix(reader, *dest); + TRY(st, read_IRIREF_suffix(reader, *dest)); + return push_node_tail(reader); } SerdStatus @@ -287,7 +300,7 @@ read_STRING_LITERAL(SerdReader* const reader, case '\r': return r_err(reader, SERD_BAD_SYNTAX, "line end in short string"); case '\\': - skip_byte(reader, c); + TRY(st, skip_byte(reader, c)); TRY(st, read_string_escape(reader, ref)); break; default: @@ -330,7 +343,7 @@ read_BLANK_NODE_LABEL(SerdReader* const reader, { SerdStatus st = SERD_SUCCESS; - skip_byte(reader, '_'); + TRY(st, skip_byte(reader, '_')); TRY(st, eat_byte_check(reader, ':')); int c = peek_byte(reader); @@ -339,8 +352,7 @@ read_BLANK_NODE_LABEL(SerdReader* const reader, return r_err(reader, SERD_BAD_SYNTAX, "expected blank node label"); } - if (!(*dest = push_node( - reader, SERD_BLANK, reader->bprefix, reader->bprefix_len))) { + if (!(*dest = push_node_head(reader, SERD_BLANK))) { return SERD_BAD_STACK; } @@ -373,7 +385,7 @@ read_BLANK_NODE_LABEL(SerdReader* const reader, // Adjust ID to avoid clashes with generated IDs if necessary st = adjust_blank_id(reader, buf); - return tolerate_status(reader, st) ? SERD_SUCCESS : st; + return tolerate_status(reader, st) ? push_node_tail(reader) : st; } static unsigned @@ -592,19 +604,22 @@ read_VARNAME(SerdReader* const reader, SerdNode** const dest) SerdStatus read_Var(SerdReader* const reader, SerdNode** const dest) { + SerdStatus st = SERD_SUCCESS; + const int c = peek_byte(reader); + assert(c == '$' || c == '?'); + if (!(reader->flags & SERD_READ_VARIABLES)) { return r_err(reader, SERD_BAD_SYNTAX, "syntax does not support variables"); } - const int c = peek_byte(reader); - assert(c == '$' || c == '?'); - skip_byte(reader, c); - - if (!(*dest = push_node(reader, SERD_VARIABLE, "", 0))) { + if (!(*dest = push_node_head(reader, SERD_VARIABLE))) { return SERD_BAD_STACK; } - return read_VARNAME(reader, dest); + TRY(st, skip_byte(reader, c)); + TRY(st, read_VARNAME(reader, dest)); + + return st ? st : push_node_tail(reader); } // Nonterminals @@ -613,14 +628,16 @@ read_Var(SerdReader* const reader, SerdNode** const dest) SerdStatus read_comment(SerdReader* const reader) { - skip_byte(reader, '#'); + SerdStatus st = SERD_SUCCESS; + + TRY(st, skip_byte(reader, '#')); for (int c = peek_byte(reader); c && c != '\n' && c != '\r' && c != EOF;) { - skip_byte(reader, c); + TRY(st, skip_byte(reader, c)); c = peek_byte(reader); } - return SERD_SUCCESS; + return st; } /// [6] literal @@ -629,22 +646,23 @@ read_literal(SerdReader* const reader, SerdNode** const dest) { SerdStatus st = SERD_SUCCESS; - if (!(*dest = push_node(reader, SERD_LITERAL, "", 0))) { + if (!(*dest = push_node_head(reader, SERD_LITERAL))) { return SERD_BAD_STACK; } - skip_byte(reader, '"'); + TRY(st, skip_byte(reader, '"')); TRY(st, read_STRING_LITERAL(reader, *dest, '"')); + TRY(st, push_node_tail(reader)); SerdNode* datatype = NULL; switch (peek_byte(reader)) { case '@': - skip_byte(reader, '@'); + TRY(st, skip_byte(reader, '@')); TRY(st, read_LANGTAG(reader)); (*dest)->flags |= SERD_HAS_LANGUAGE; break; case '^': - skip_byte(reader, '^'); + TRY(st, skip_byte(reader, '^')); TRY(st, eat_byte_check(reader, '^')); TRY(st, read_IRI(reader, &datatype)); (*dest)->flags |= SERD_HAS_DATATYPE; @@ -724,7 +742,7 @@ read_triple(SerdReader* const reader) } // Preserve the caret for error reporting and read object - SerdCaret orig_caret = reader->source->caret; + SerdCaret orig_caret = reader->source.caret; if ((st = read_nt_object(reader, &ctx.object, &ate_dot)) || (st = skip_horizontal_whitespace(reader))) { return st; @@ -734,10 +752,6 @@ read_triple(SerdReader* const reader) return st; } - if (ctx.object) { - serd_node_zero_pad(ctx.object); - } - const SerdStatement statement = { {ctx.subject, ctx.predicate, ctx.object, ctx.graph}, &orig_caret}; |