diff options
-rw-r--r-- | src/n3.c | 107 | ||||
-rw-r--r-- | src/reader.c | 27 | ||||
-rw-r--r-- | src/reader.h | 4 | ||||
-rw-r--r-- | src/stack.h | 33 |
4 files changed, 69 insertions, 102 deletions
@@ -319,7 +319,7 @@ read_STRING_LITERAL_LONG(SerdReader* reader, SerdNodeFlags* flags, uint8_t q) read_UCHAR(reader, ref, &code)) { r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid escape `\\%c'\n", peek_byte(reader)); - return pop_node(reader, ref); + return NULL; } } else if (c == q) { eat_byte_safe(reader, q); @@ -334,7 +334,7 @@ read_STRING_LITERAL_LONG(SerdReader* reader, SerdNodeFlags* flags, uint8_t q) st = read_character(reader, ref, flags, (uint8_t)q2); } else if (c == EOF) { r_err(reader, SERD_ERR_BAD_SYNTAX, "end of file in long string\n"); - return pop_node(reader, ref); + return NULL; } else { st = read_character( reader, ref, flags, (uint8_t)eat_byte_safe(reader, c)); @@ -357,17 +357,17 @@ read_STRING_LITERAL(SerdReader* reader, SerdNodeFlags* flags, uint8_t q) switch (c) { case EOF: r_err(reader, SERD_ERR_BAD_SYNTAX, "end of file in short string\n"); - return pop_node(reader, ref); + return NULL; case '\n': case '\r': r_err(reader, SERD_ERR_BAD_SYNTAX, "line end in short string\n"); - return pop_node(reader, ref); + return NULL; case '\\': eat_byte_safe(reader, c); if (read_ECHAR(reader, ref, flags) && read_UCHAR(reader, ref, &code)) { r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid escape `\\%c'\n", peek_byte(reader)); - return pop_node(reader, ref); + return NULL; } break; default: @@ -383,14 +383,10 @@ read_STRING_LITERAL(SerdReader* reader, SerdNodeFlags* flags, uint8_t q) if (st) { reader->status = st; - return pop_node(reader, ref); - } - - if (!eat_byte_check(reader, q)) { - return pop_node(reader, ref); + return NULL; } - return ref; + return eat_byte_check(reader, q) ? ref : NULL; } static SerdNode* @@ -655,7 +651,7 @@ read_IRIREF(SerdReader* reader) TRY_RET(eat_byte_check(reader, '<')); SerdNode* ref = push_node(reader, SERD_URI, "", 0); if (!ref || (!fancy_syntax(reader) && !read_IRIREF_scheme(reader, ref))) { - return pop_node(reader, ref); + return NULL; } SerdStatus st = SERD_SUCCESS; @@ -666,19 +662,19 @@ read_IRIREF(SerdReader* reader) case '"': case '<': case '^': case '`': case '{': case '|': case '}': r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid IRI character `%c'\n", c); - return pop_node(reader, ref); + return NULL; case '>': return ref; case '\\': if (read_UCHAR(reader, ref, &code)) { r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid IRI escape\n"); - return pop_node(reader, ref); + return NULL; } switch (code) { case 0: case ' ': case '<': case '>': r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid escaped IRI character %X %c\n", code, code); - return pop_node(reader, ref); + return NULL; } break; default: @@ -693,7 +689,7 @@ read_IRIREF(SerdReader* reader) (unsigned)c); } if (reader->strict) { - return pop_node(reader, ref); + return NULL; } reader->status = SERD_FAILURE; push_byte(reader, ref, c); @@ -701,13 +697,12 @@ read_IRIREF(SerdReader* reader) push_byte(reader, ref, c); } else if ((st = read_utf8_character(reader, ref, (uint8_t)c))) { if (reader->strict) { - reader->status = SERD_FAILURE; - return pop_node(reader, ref); + return NULL; } } } } - return pop_node(reader, ref); + return NULL; } static bool @@ -804,8 +799,6 @@ read_number(SerdReader* reader, *dest = ref; return true; except: - pop_node(reader, *datatype); - pop_node(reader, ref); return r_err(reader, SERD_ERR_BAD_SYNTAX, "bad number syntax\n"); } @@ -853,15 +846,15 @@ read_literal(SerdReader* reader, *dest = str; return true; except: - *datatype = pop_node(reader, *datatype); - *lang = pop_node(reader, *lang); - pop_node(reader, str); + *datatype = NULL; + *lang = NULL; return r_err(reader, SERD_ERR_BAD_SYNTAX, "bad literal syntax\n"); } static bool read_verb(SerdReader* reader, SerdNode** dest) { + const size_t orig_stack_size = reader->stack.size; if (peek_byte(reader) == '<') { return (*dest = read_IRIREF(reader)); } @@ -880,12 +873,12 @@ read_verb(SerdReader* reader, SerdNode** dest) if (!st && node->n_bytes == 1 && serd_node_get_string(node)[0] == 'a' && next != ':' && !is_PN_CHARS_BASE((uint32_t)next)) { - pop_node(reader, *dest); + serd_stack_pop_to(&reader->stack, orig_stack_size); return (*dest = push_node(reader, SERD_URI, NS_RDF "type", 47)); } else if (st > SERD_FAILURE || !read_PrefixedName(reader, *dest, false, &ate_dot) || ate_dot) { - *dest = pop_node(reader, *dest); + *dest = NULL; return r_err(reader, SERD_ERR_BAD_SYNTAX, "bad verb\n"); } @@ -909,7 +902,7 @@ read_BLANK_NODE_LABEL(SerdReader* reader, bool* ate_dot) push_byte(reader, n, eat_byte_safe(reader, c)); } else if (read_PN_CHARS(reader, n)) { r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid name start character\n"); - return pop_node(reader, n); + return NULL; } while ((c = peek_byte(reader))) { // Middle: (PN_CHARS | '.')* @@ -936,7 +929,7 @@ read_BLANK_NODE_LABEL(SerdReader* reader, bool* ate_dot) } else if (reader->seen_genid && buf[reader->bprefix_len] == 'B') { r_err(reader, SERD_ERR_ID_CLASH, "found both `b' and `B' blank IDs, prefix required\n"); - return pop_node(reader, n); + return NULL; } } } @@ -1013,9 +1006,7 @@ read_object(SerdReader* reader, ReadContext* ctx, bool emit, bool* ate_dot) static const char* const XSD_BOOLEAN = NS_XSD "boolean"; static const size_t XSD_BOOLEAN_LEN = 40; -#ifndef NDEBUG const size_t orig_stack_size = reader->stack.size; -#endif bool ret = false; bool simple = (ctx->subject != 0); @@ -1094,9 +1085,7 @@ read_object(SerdReader* reader, ReadContext* ctx, bool emit, bool* ate_dot) } except: - pop_node(reader, lang); - pop_node(reader, datatype); - pop_node(reader, o); + serd_stack_pop_to(&reader->stack, orig_stack_size); #ifndef NDEBUG assert(reader->stack.size == orig_stack_size); #endif @@ -1121,11 +1110,13 @@ read_objectList(SerdReader* reader, ReadContext ctx, bool* ate_dot) static bool read_predicateObjectList(SerdReader* reader, ReadContext ctx, bool* ate_dot) { + const size_t orig_stack_size = reader->stack.size; + while (read_verb(reader, &ctx.predicate) && read_ws_star(reader) && read_objectList(reader, ctx, ate_dot)) { - ctx.predicate = pop_node(reader, ctx.predicate); if (*ate_dot) { + serd_stack_pop_to(&reader->stack, orig_stack_size); return true; } @@ -1135,9 +1126,11 @@ read_predicateObjectList(SerdReader* reader, ReadContext ctx, bool* ate_dot) read_ws_star(reader); switch (c = peek_byte(reader)) { case EOF: case '\0': + serd_stack_pop_to(&reader->stack, orig_stack_size); return r_err(reader, SERD_ERR_BAD_SYNTAX, "unexpected end of file\n"); case '.': case ']': case '}': + serd_stack_pop_to(&reader->stack, orig_stack_size); return true; case ';': eat_byte_safe(reader, c); @@ -1146,18 +1139,19 @@ read_predicateObjectList(SerdReader* reader, ReadContext ctx, bool* ate_dot) } while (c == ';'); if (!ate_semi) { + serd_stack_pop_to(&reader->stack, orig_stack_size); return r_err(reader, SERD_ERR_BAD_SYNTAX, "missing ';' or '.'\n"); } } - return pop_node(reader, ctx.predicate); + serd_stack_pop_to(&reader->stack, orig_stack_size); + ctx.predicate = 0; + return false; } static bool -end_collection(SerdReader* reader, ReadContext ctx, SerdNode* n1, SerdNode* n2, bool ret) +end_collection(SerdReader* reader, ReadContext ctx, bool ret) { - pop_node(reader, n2); - pop_node(reader, n1); *ctx.flags &= ~(unsigned)SERD_LIST_CONT; return ret && (eat_byte_safe(reader, ')') == ')'); } @@ -1178,7 +1172,7 @@ read_collection(SerdReader* reader, ReadContext ctx, SerdNode** dest) } if (end) { - return end_collection(reader, ctx, 0, 0, true); + return end_collection(reader, ctx, true); } /* The order of node allocation here is necessarily not in stack order, @@ -1198,7 +1192,7 @@ read_collection(SerdReader* reader, ReadContext ctx, SerdNode** dest) ctx.predicate = reader->rdf_first; bool ate_dot = false; if (!read_object(reader, &ctx, true, &ate_dot) || ate_dot) { - return end_collection(reader, ctx, n1, n2, false); + return end_collection(reader, ctx, false); } if (!(end = peek_delim(reader, ')'))) { @@ -1221,7 +1215,7 @@ read_collection(SerdReader* reader, ReadContext ctx, SerdNode** dest) node = ctx.subject; // invariant } - return end_collection(reader, ctx, n1, n2, true); + return end_collection(reader, ctx, true); } static SerdNode* @@ -1241,7 +1235,7 @@ read_subject(SerdReader* reader, ReadContext ctx, SerdNode** dest, int* s_type) default: TRY_RET(read_iri(reader, dest, &ate_dot)); } - return ate_dot ? pop_node(reader, *dest) : *dest; + return ate_dot ? NULL : *dest; } static SerdNode* @@ -1296,7 +1290,6 @@ read_base(SerdReader* reader, bool sparql, bool token) } else if (reader->sink->base) { reader->sink->base(reader->sink->handle, uri); } - pop_node(reader, uri); read_ws_star(reader); if (!sparql) { @@ -1321,25 +1314,22 @@ read_prefixID(SerdReader* reader, bool sparql, bool token) if (!name) { return false; } else if (read_PN_PREFIX(reader, name) > SERD_FAILURE) { - return pop_node(reader, name); + return false; } if (eat_byte_check(reader, ':') != ':') { - return pop_node(reader, name); + return false; } read_ws_star(reader); const SerdNode* uri = read_IRIREF(reader); if (!uri) { - pop_node(reader, name); return false; } if (reader->sink->prefix) { ret = !reader->sink->prefix(reader->sink->handle, name, uri); } - pop_node(reader, uri); - pop_node(reader, name); if (!sparql) { read_ws_star(reader); return eat_byte_check(reader, '.'); @@ -1376,8 +1366,10 @@ read_wrappedGraph(SerdReader* reader, ReadContext* ctx) TRY_RET(eat_byte_check(reader, '{')); read_ws_star(reader); while (peek_byte(reader) != '}') { - bool ate_dot = false; - int s_type = 0; + const size_t orig_stack_size = reader->stack.size; + bool ate_dot = false; + int s_type = 0; + ctx->subject = 0; SerdNode* subj = read_subject(reader, *ctx, &ctx->subject, &s_type); if (!subj && ctx->subject) { @@ -1388,7 +1380,7 @@ read_wrappedGraph(SerdReader* reader, ReadContext* ctx) return r_err(reader, SERD_ERR_BAD_SYNTAX, "missing predicate object list\n"); } - pop_node(reader, subj); + serd_stack_pop_to(&reader->stack, orig_stack_size); read_ws_star(reader); if (peek_byte(reader) == '.') { eat_byte_safe(reader, '.'); @@ -1447,7 +1439,6 @@ read_n3_statement(SerdReader* reader) TRY_RET((ctx.graph = read_labelOrSubject(reader))); read_ws_star(reader); TRY_RET(read_wrappedGraph(reader, &ctx)); - pop_node(reader, ctx.graph); ctx.graph = 0; read_ws_star(reader); } else if (read_ws_star(reader) && peek_byte(reader) == '{') { @@ -1458,7 +1449,6 @@ read_n3_statement(SerdReader* reader) ctx.graph = subj; ctx.subject = subj = 0; TRY_RET(read_wrappedGraph(reader, &ctx)); - pop_node(reader, ctx.graph); read_ws_star(reader); } else if (!subj) { ret = r_err(reader, SERD_ERR_BAD_SYNTAX, "bad subject\n"); @@ -1471,7 +1461,6 @@ read_n3_statement(SerdReader* reader) read_ws_star(reader); ret = (eat_byte_check(reader, '.') == '.'); } - pop_node(reader, subj); break; } return ret; @@ -1489,13 +1478,16 @@ SerdStatus read_turtleTrigDoc(SerdReader* reader) { while (!reader->source.eof) { + const size_t orig_stack_size = reader->stack.size; if (!read_n3_statement(reader)) { if (reader->strict) { + serd_stack_pop_to(&reader->stack, orig_stack_size); return SERD_ERR_UNKNOWN; } skip_until(reader, '\n'); reader->status = SERD_SUCCESS; } + serd_stack_pop_to(&reader->stack, orig_stack_size); } return reader->status; } @@ -1504,6 +1496,8 @@ SerdStatus read_nquadsDoc(SerdReader* reader) { while (!reader->source.eof) { + const size_t orig_stack_size = reader->stack.size; + SerdStatementFlags flags = 0; ReadContext ctx = { 0, 0, 0, 0, 0, 0, &flags }; bool ate_dot = false; @@ -1549,10 +1543,7 @@ read_nquadsDoc(SerdReader* reader) break; } - pop_node(reader, ctx.graph); - pop_node(reader, ctx.lang); - pop_node(reader, ctx.datatype); - pop_node(reader, ctx.object); + serd_stack_pop_to(&reader->stack, orig_stack_size); } return reader->status; } diff --git a/src/reader.c b/src/reader.c index 585a1cd2..3eccacc7 100644 --- a/src/reader.c +++ b/src/reader.c @@ -76,6 +76,13 @@ SerdNode* push_node_padded(SerdReader* reader, size_t maxlen, SerdType type, const char* str, size_t n_bytes) { + // Push a null byte to ensure the previous node was null terminated + char* terminator = (char*)serd_stack_push(&reader->stack, 1); + if (!terminator) { + return NULL; + } + *terminator = 0; + void* mem = serd_stack_push_aligned( &reader->stack, sizeof(SerdNode) + maxlen + 1, sizeof(SerdNode)); @@ -95,7 +102,7 @@ push_node_padded(SerdReader* reader, size_t maxlen, #ifdef SERD_STACK_CHECK reader->allocs = realloc( reader->allocs, sizeof(reader->allocs) * (++reader->n_allocs)); - reader->allocs[reader->n_allocs - 1] = (mem - reader->stack.buf); + reader->allocs[reader->n_allocs - 1] = node; #endif return node; } @@ -106,21 +113,6 @@ push_node(SerdReader* reader, SerdType type, const char* str, size_t n_bytes) return push_node_padded(reader, n_bytes, type, str, n_bytes); } -SerdNode* -pop_node(SerdReader* reader, const SerdNode* node) -{ - if (node && node != reader->rdf_first && node != reader->rdf_rest - && node != reader->rdf_nil) { -#ifdef SERD_STACK_CHECK - SERD_STACK_ASSERT_TOP(reader, node); - --reader->n_allocs; -#endif - char* const top = reader->stack.buf + reader->stack.size; - serd_stack_pop_aligned(&reader->stack, (size_t)(top - (char*)node)); - } - return NULL; -} - bool emit_statement(SerdReader* reader, ReadContext ctx, SerdNode* o) { @@ -190,9 +182,6 @@ serd_reader_set_strict(SerdReader* reader, bool strict) void serd_reader_free(SerdReader* reader) { - pop_node(reader, reader->rdf_nil); - pop_node(reader, reader->rdf_rest); - pop_node(reader, reader->rdf_first); serd_node_free(reader->default_graph); #ifdef SERD_STACK_CHECK diff --git a/src/reader.h b/src/reader.h index 30663210..64e00eb7 100644 --- a/src/reader.h +++ b/src/reader.h @@ -94,8 +94,6 @@ size_t genid_size(SerdReader* reader); SerdNode* blank_id(SerdReader* reader); void set_blank_id(SerdReader* reader, SerdNode* node, size_t buf_size); -SerdNode* pop_node(SerdReader* reader, const SerdNode* node); - bool emit_statement(SerdReader* reader, ReadContext ctx, SerdNode* o); bool read_n3_statement(SerdReader* reader); @@ -156,7 +154,7 @@ static inline SerdStatus push_byte(SerdReader* reader, SerdNode* node, const int c) { assert(c != EOF); - SERD_STACK_ASSERT_TOP(reader, ref); + SERD_STACK_ASSERT_TOP(reader, node); char* const s = (char*)serd_stack_push(&reader->stack, 1); if (!s) { diff --git a/src/stack.h b/src/stack.h index 2356731b..5bd86f7a 100644 --- a/src/stack.h +++ b/src/stack.h @@ -22,6 +22,7 @@ #include <stddef.h> #include <stdint.h> #include <stdlib.h> +#include <string.h> /** An offset to start the stack at. Note 0 is reserved for NULL. */ #define SERD_STACK_BOTTOM sizeof(void*) @@ -77,40 +78,28 @@ serd_stack_pop(SerdStack* stack, size_t n_bytes) stack->size -= n_bytes; } +static inline void +serd_stack_pop_to(SerdStack* stack, size_t n_bytes) +{ + assert(stack->size >= n_bytes); + stack->size = n_bytes; +} + static inline void* serd_stack_push_aligned(SerdStack* stack, size_t n_bytes, size_t align) { - // Push one byte to ensure space for a pad count - if (!serd_stack_push(stack, 1)) { - return NULL; - } - // Push padding if necessary const size_t pad = align - stack->size % align; if (pad > 0) { - if (!serd_stack_push(stack, pad)) { + void* padding = serd_stack_push(stack, pad); + if (!padding) { return NULL; } + memset(padding, 0, pad); } - // Set top of stack to pad count so we can properly pop later - stack->buf[stack->size - 1] = (char)pad; - // Push requested space at aligned location return serd_stack_push(stack, n_bytes); } -static inline void -serd_stack_pop_aligned(SerdStack* stack, size_t n_bytes) -{ - // Pop requested space down to aligned location - serd_stack_pop(stack, n_bytes); - - // Get amount of padding from top of stack - const uint8_t pad = (uint8_t)stack->buf[stack->size - 1]; - - // Pop padding and pad count - serd_stack_pop(stack, pad + 1u); -} - #endif // SERD_STACK_H |