diff options
-rw-r--r-- | serd/serd.h | 6 | ||||
-rw-r--r-- | src/n3.c | 321 | ||||
-rw-r--r-- | src/reader.c | 60 | ||||
-rw-r--r-- | src/reader.h | 77 | ||||
-rw-r--r-- | src/serdi.c | 16 | ||||
-rw-r--r-- | src/stack.h | 11 | ||||
-rw-r--r-- | src/string.c | 1 | ||||
-rw-r--r-- | src/writer.c | 2 | ||||
-rw-r--r-- | tests/read_chunk_test.c | 2 | ||||
-rw-r--r-- | tests/serd_test.c | 2 |
10 files changed, 277 insertions, 221 deletions
diff --git a/serd/serd.h b/serd/serd.h index e7b47693..514fccf4 100644 --- a/serd/serd.h +++ b/serd/serd.h @@ -128,7 +128,8 @@ typedef enum { SERD_ERR_NOT_FOUND, /**< Not found */ SERD_ERR_ID_CLASH, /**< Encountered clashing blank node IDs */ SERD_ERR_BAD_CURIE, /**< Invalid CURIE (e.g. prefix does not exist) */ - SERD_ERR_INTERNAL /**< Unexpected internal error (should not happen) */ + SERD_ERR_INTERNAL, /**< Unexpected internal error (should not happen) */ + SERD_ERR_OVERFLOW /**< Stack overflow */ } SerdStatus; /** @@ -928,7 +929,8 @@ SERD_API SerdReader* serd_reader_new(SerdWorld* world, SerdSyntax syntax, - const SerdSinkInterface* sink); + const SerdSinkInterface* sink, + size_t stack_size); /** Enable or disable strict parsing. @@ -37,7 +37,7 @@ fancy_syntax(const SerdReader* reader) } static bool -read_collection(SerdReader* reader, ReadContext ctx, Ref* dest); +read_collection(SerdReader* reader, ReadContext ctx, SerdNode** dest); static bool read_predicateObjectList(SerdReader* reader, ReadContext ctx, bool* ate_dot); @@ -54,8 +54,8 @@ read_HEX(SerdReader* reader) } // Read UCHAR escape, initial \ is already eaten by caller -static inline bool -read_UCHAR(SerdReader* reader, Ref dest, uint32_t* char_code) +static inline SerdStatus +read_UCHAR(SerdReader* reader, SerdNode* dest, uint32_t* char_code) { const uint8_t b = peek_byte(reader); unsigned length = 0; @@ -67,14 +67,14 @@ read_UCHAR(SerdReader* reader, Ref dest, uint32_t* char_code) length = 4; break; default: - return false; + return SERD_ERR_BAD_SYNTAX; } eat_byte_safe(reader, b); uint8_t buf[9] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 }; for (unsigned i = 0; i < length; ++i) { if (!(buf[i] = read_HEX(reader))) { - return false; + return SERD_ERR_BAD_SYNTAX; } } @@ -93,9 +93,9 @@ read_UCHAR(SerdReader* reader, Ref dest, uint32_t* char_code) } else { r_err(reader, SERD_ERR_BAD_SYNTAX, "unicode character 0x%X out of range\n", code); - push_bytes(reader, dest, replacement_char, 3); *char_code = 0xFFFD; - return true; + const SerdStatus st = push_bytes(reader, dest, replacement_char, 3); + return st ? st : SERD_SUCCESS; } // Build output in buf @@ -118,44 +118,37 @@ read_UCHAR(SerdReader* reader, Ref dest, uint32_t* char_code) buf[0] = (uint8_t)c; } - push_bytes(reader, dest, buf, size); *char_code = code; - return true; + return push_bytes(reader, dest, buf, size); } // Read ECHAR escape, initial \ is already eaten by caller -static inline bool -read_ECHAR(SerdReader* reader, Ref dest, SerdNodeFlags* flags) +static inline SerdStatus +read_ECHAR(SerdReader* reader, SerdNode* dest, SerdNodeFlags* flags) { const uint8_t c = peek_byte(reader); switch (c) { case 't': eat_byte_safe(reader, 't'); - push_byte(reader, dest, '\t'); - return true; + return push_byte(reader, dest, '\t'); case 'b': eat_byte_safe(reader, 'b'); - push_byte(reader, dest, '\b'); - return true; + return push_byte(reader, dest, '\b'); case 'n': *flags |= SERD_HAS_NEWLINE; eat_byte_safe(reader, 'n'); - push_byte(reader, dest, '\n'); - return true; + return push_byte(reader, dest, '\n'); case 'r': *flags |= SERD_HAS_NEWLINE; eat_byte_safe(reader, 'r'); - push_byte(reader, dest, '\r'); - return true; + return push_byte(reader, dest, '\r'); case 'f': eat_byte_safe(reader, 'f'); - push_byte(reader, dest, '\f'); - return true; + return push_byte(reader, dest, '\f'); case '\\': case '"': case '\'': - push_byte(reader, dest, eat_byte_safe(reader, c)); - return true; + return push_byte(reader, dest, eat_byte_safe(reader, c)); default: - return false; + return SERD_ERR_BAD_SYNTAX; } } @@ -193,21 +186,21 @@ read_utf8_bytes(SerdReader* reader, uint8_t bytes[4], uint32_t* size, uint8_t c) } static SerdStatus -read_utf8_character(SerdReader* reader, Ref dest, uint8_t c) +read_utf8_character(SerdReader* reader, SerdNode* dest, uint8_t c) { uint32_t size; uint8_t bytes[4]; SerdStatus st = read_utf8_bytes(reader, bytes, &size, c); if (st) { push_bytes(reader, dest, replacement_char, 3); - } else { - push_bytes(reader, dest, bytes, size); + return st; } - return st; + + return push_bytes(reader, dest, bytes, size); } static SerdStatus -read_utf8_code(SerdReader* reader, Ref dest, uint32_t* code, uint8_t c) +read_utf8_code(SerdReader* reader, SerdNode* dest, uint32_t* code, uint8_t c) { uint32_t size; uint8_t bytes[4]; @@ -217,15 +210,17 @@ read_utf8_code(SerdReader* reader, Ref dest, uint32_t* code, uint8_t c) return st; } - push_bytes(reader, dest, bytes, size); - *code = parse_counted_utf8_char(bytes, size); + if (!(st = push_bytes(reader, dest, bytes, size))) { + *code = parse_counted_utf8_char(bytes, size); + } + return st; } // Read one character (possibly multi-byte) // The first byte, c, has already been eaten by caller static inline SerdStatus -read_character(SerdReader* reader, Ref dest, SerdNodeFlags* flags, uint8_t c) +read_character(SerdReader* reader, SerdNode* dest, SerdNodeFlags* flags, uint8_t c) { if (!(c & 0x80)) { switch (c) { @@ -236,8 +231,9 @@ read_character(SerdReader* reader, Ref dest, SerdNodeFlags* flags, uint8_t c) *flags |= SERD_HAS_QUOTE; break; } - push_byte(reader, dest, c); - return SERD_SUCCESS; + + const SerdStatus st = push_byte(reader, dest, c); + return st ? st : SERD_SUCCESS; } return read_utf8_character(reader, dest, c); } @@ -296,18 +292,19 @@ eat_delim(SerdReader* reader, const char delim) // STRING_LITERAL_LONG_QUOTE and STRING_LITERAL_LONG_SINGLE_QUOTE // Initial triple quotes are already eaten by caller -static Ref +static SerdNode* read_STRING_LITERAL_LONG(SerdReader* reader, SerdNodeFlags* flags, uint8_t q) { - Ref ref = push_node(reader, SERD_LITERAL, "", 0); + SerdNode* ref = push_node(reader, SERD_LITERAL, "", 0); + SerdStatus st = SERD_SUCCESS; while (!reader->status) { const uint8_t c = peek_byte(reader); uint32_t code; switch (c) { case '\\': eat_byte_safe(reader, c); - if (!read_ECHAR(reader, ref, flags) && - !read_UCHAR(reader, ref, &code)) { + if (read_ECHAR(reader, ref, flags) && + read_UCHAR(reader, ref, &code)) { r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid escape `\\%c'\n", peek_byte(reader)); return pop_node(reader, ref); @@ -324,21 +321,22 @@ read_STRING_LITERAL_LONG(SerdReader* reader, SerdNodeFlags* flags, uint8_t q) } *flags |= SERD_HAS_QUOTE; push_byte(reader, ref, c); - read_character(reader, ref, flags, q2); + st = read_character(reader, ref, flags, q2); } else { - read_character(reader, ref, flags, eat_byte_safe(reader, c)); + st = read_character(reader, ref, flags, eat_byte_safe(reader, c)); } } } - return ref; + return st ? NULL : ref; } // STRING_LITERAL_QUOTE and STRING_LITERAL_SINGLE_QUOTE // Initial quote is already eaten by caller -static Ref +static SerdNode* read_STRING_LITERAL(SerdReader* reader, SerdNodeFlags* flags, uint8_t q) { - Ref ref = push_node(reader, SERD_LITERAL, "", 0); + SerdNode* ref = push_node(reader, SERD_LITERAL, "", 0); + SerdStatus st = SERD_SUCCESS; while (!reader->status) { const uint8_t c = peek_byte(reader); uint32_t code = 0; @@ -348,8 +346,8 @@ read_STRING_LITERAL(SerdReader* reader, SerdNodeFlags* flags, uint8_t q) return pop_node(reader, ref); case '\\': eat_byte_safe(reader, c); - if (!read_ECHAR(reader, ref, flags) && - !read_UCHAR(reader, ref, &code)) { + if (read_ECHAR(reader, ref, flags) && + read_UCHAR(reader, ref, &code)) { r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid escape `\\%c'\n", peek_byte(reader)); return pop_node(reader, ref); @@ -360,15 +358,24 @@ read_STRING_LITERAL(SerdReader* reader, SerdNodeFlags* flags, uint8_t q) eat_byte_check(reader, q); return ref; } else { - read_character(reader, ref, flags, eat_byte_safe(reader, c)); + st = read_character(reader, ref, flags, eat_byte_safe(reader, c)); } } } - eat_byte_check(reader, q); + + if (st) { + reader->status = st; + return NULL; + } + + if (!eat_byte_check(reader, q)) { + return pop_node(reader, ref); + } + return ref; } -static Ref +static SerdNode* read_String(SerdReader* reader, SerdNodeFlags* flags) { const uint8_t q1 = peek_byte(reader); @@ -386,8 +393,9 @@ read_String(SerdReader* reader, SerdNodeFlags* flags) } if (!fancy_syntax(reader)) { - return r_err(reader, SERD_ERR_BAD_SYNTAX, - "syntax does not support long literals\n"); + r_err(reader, SERD_ERR_BAD_SYNTAX, + "syntax does not support long literals\n"); + return NULL; } eat_byte_safe(reader, q3); @@ -406,7 +414,7 @@ is_PN_CHARS_BASE(const uint32_t c) } static SerdStatus -read_PN_CHARS_BASE(SerdReader* reader, Ref dest) +read_PN_CHARS_BASE(SerdReader* reader, SerdNode* dest) { uint32_t code; const uint8_t c = peek_byte(reader); @@ -436,7 +444,7 @@ is_PN_CHARS(const uint32_t c) } static SerdStatus -read_PN_CHARS(SerdReader* reader, Ref dest) +read_PN_CHARS(SerdReader* reader, SerdNode* dest) { uint32_t code; const uint8_t c = peek_byte(reader); @@ -456,7 +464,7 @@ read_PN_CHARS(SerdReader* reader, Ref dest) } static bool -read_PERCENT(SerdReader* reader, Ref dest) +read_PERCENT(SerdReader* reader, SerdNode* dest) { push_byte(reader, dest, eat_byte_safe(reader, '%')); const uint8_t h1 = read_HEX(reader); @@ -470,7 +478,7 @@ read_PERCENT(SerdReader* reader, Ref dest) } static SerdStatus -read_PLX(SerdReader* reader, Ref dest) +read_PLX(SerdReader* reader, SerdNode* dest) { uint8_t c = peek_byte(reader); switch (c) { @@ -494,7 +502,7 @@ read_PLX(SerdReader* reader, Ref dest) } static SerdStatus -read_PN_LOCAL(SerdReader* reader, Ref dest, bool* ate_dot) +read_PN_LOCAL(SerdReader* reader, SerdNode* dest, bool* ate_dot) { uint8_t c = peek_byte(reader); SerdStatus st = SERD_SUCCESS; @@ -523,10 +531,9 @@ read_PN_LOCAL(SerdReader* reader, Ref dest, bool* ate_dot) trailing_unescaped_dot = (c == '.'); } - SerdNode* const n = deref(reader, dest); if (trailing_unescaped_dot) { // Ate trailing dot, pop it from stack/node and inform caller - --n->n_bytes; + --dest->n_bytes; serd_stack_pop(&reader->stack, 1); *ate_dot = true; } @@ -536,7 +543,7 @@ read_PN_LOCAL(SerdReader* reader, Ref dest, bool* ate_dot) // Read the remainder of a PN_PREFIX after some initial characters static SerdStatus -read_PN_PREFIX_tail(SerdReader* reader, Ref dest) +read_PN_PREFIX_tail(SerdReader* reader, SerdNode* dest) { uint8_t c; while ((c = peek_byte(reader))) { // Middle: (PN_CHARS | '.')* @@ -547,8 +554,7 @@ read_PN_PREFIX_tail(SerdReader* reader, Ref dest) } } - const SerdNode* const n = deref(reader, dest); - if (serd_node_get_string(n)[n->n_bytes - 1] == '.' && + if (serd_node_get_string(dest)[dest->n_bytes - 1] == '.' && read_PN_CHARS(reader, dest)) { r_err(reader, SERD_ERR_BAD_SYNTAX, "prefix ends with `.'\n"); return SERD_ERR_BAD_SYNTAX; @@ -558,7 +564,7 @@ read_PN_PREFIX_tail(SerdReader* reader, Ref dest) } static SerdStatus -read_PN_PREFIX(SerdReader* reader, Ref dest) +read_PN_PREFIX(SerdReader* reader, SerdNode* dest) { if (!read_PN_CHARS_BASE(reader, dest)) { return read_PN_PREFIX_tail(reader, dest); @@ -566,14 +572,20 @@ read_PN_PREFIX(SerdReader* reader, Ref dest) return SERD_FAILURE; } -static Ref +static SerdNode* read_LANGTAG(SerdReader* reader) { uint8_t c = peek_byte(reader); if (!is_alpha(c)) { - return r_err(reader, SERD_ERR_BAD_SYNTAX, "unexpected `%c'\n", c); + r_err(reader, SERD_ERR_BAD_SYNTAX, "unexpected `%c'\n", c); + return NULL; + } + + SerdNode* ref = push_node(reader, SERD_LITERAL, "", 0); + if (!ref) { + return NULL; } - Ref ref = push_node(reader, SERD_LITERAL, "", 0); + push_byte(reader, ref, eat_byte_safe(reader, c)); while ((c = peek_byte(reader)) && is_alpha(c)) { push_byte(reader, ref, eat_byte_safe(reader, c)); @@ -588,7 +600,7 @@ read_LANGTAG(SerdReader* reader) } static bool -read_IRIREF_scheme(SerdReader* reader, Ref dest) +read_IRIREF_scheme(SerdReader* reader, SerdNode* dest) { uint8_t c = peek_byte(reader); if (!is_alpha(c)) { @@ -613,12 +625,12 @@ read_IRIREF_scheme(SerdReader* reader, Ref dest) return false; } -static Ref +static SerdNode* read_IRIREF(SerdReader* reader) { TRY_RET(eat_byte_check(reader, '<')); - Ref ref = push_node(reader, SERD_URI, "", 0); - if (!fancy_syntax(reader) && !read_IRIREF_scheme(reader, ref)) { + SerdNode* ref = push_node(reader, SERD_URI, "", 0); + if (!ref || (!fancy_syntax(reader) && !read_IRIREF_scheme(reader, ref))) { return pop_node(reader, ref); } @@ -633,7 +645,7 @@ read_IRIREF(SerdReader* reader) case '>': return ref; case '\\': - if (!read_UCHAR(reader, ref, &code)) { + if (read_UCHAR(reader, ref, &code)) { r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid IRI escape\n"); return pop_node(reader, ref); } @@ -672,7 +684,7 @@ read_IRIREF(SerdReader* reader) } static bool -read_PrefixedName(SerdReader* reader, Ref dest, bool read_prefix, bool* ate_dot) +read_PrefixedName(SerdReader* reader, SerdNode* dest, bool read_prefix, bool* ate_dot) { if (read_prefix && read_PN_PREFIX(reader, dest) > SERD_FAILURE) { return false; @@ -685,7 +697,7 @@ read_PrefixedName(SerdReader* reader, Ref dest, bool read_prefix, bool* ate_dot) } static bool -read_0_9(SerdReader* reader, Ref str, bool at_least_one) +read_0_9(SerdReader* reader, SerdNode* str, bool at_least_one) { unsigned count = 0; for (uint8_t c; is_digit((c = peek_byte(reader))); ++count) { @@ -699,20 +711,23 @@ read_0_9(SerdReader* reader, Ref str, bool at_least_one) static bool read_number(SerdReader* reader, - Ref* dest, - Ref* datatype, + SerdNode** dest, + SerdNode** datatype, SerdNodeFlags* flags, bool* ate_dot) { #define XSD_DECIMAL NS_XSD "decimal" #define XSD_DOUBLE NS_XSD "double" #define XSD_INTEGER NS_XSD "integer" - Ref ref = push_node(reader, SERD_LITERAL, "", 0); - uint8_t c = peek_byte(reader); - bool has_decimal = false; - if (c == '-' || c == '+') { + SerdNode* ref = push_node(reader, SERD_LITERAL, "", 0); + uint8_t c = peek_byte(reader); + bool has_decimal = false; + if (!ref) { + return false; + } else if (c == '-' || c == '+') { push_byte(reader, ref, eat_byte_safe(reader, c)); } + if ((c = peek_byte(reader)) == '.') { has_decimal = true; // decimal case 2 (e.g. '.0' or `-.0' or `+.0') @@ -767,23 +782,29 @@ except: } static bool -read_iri(SerdReader* reader, Ref* dest, bool* ate_dot) +read_iri(SerdReader* reader, SerdNode** dest, bool* ate_dot) { switch (peek_byte(reader)) { case '<': *dest = read_IRIREF(reader); return true; default: - *dest = push_node(reader, SERD_CURIE, "", 0); + if (!(*dest = push_node(reader, SERD_CURIE, "", 0))) { + return false; + } return read_PrefixedName(reader, *dest, true, ate_dot); } } static bool -read_literal(SerdReader* reader, Ref* dest, - Ref* datatype, Ref* lang, SerdNodeFlags* flags, bool* ate_dot) +read_literal(SerdReader* reader, + SerdNode** dest, + SerdNode** datatype, + SerdNode** lang, + SerdNodeFlags* flags, + bool* ate_dot) { - Ref str = read_String(reader, flags); + SerdNode* str = read_String(reader, flags); if (!str) { return false; } @@ -823,7 +844,7 @@ is_token_end(uint8_t c) } static bool -read_verb(SerdReader* reader, Ref* dest) +read_verb(SerdReader* reader, SerdNode** dest) { if (peek_byte(reader) == '<') { return (*dest = read_IRIREF(reader)); @@ -832,10 +853,13 @@ read_verb(SerdReader* reader, Ref* dest) /* Either a qname, or "a". Read the prefix first, and if it is in fact "a", produce that instead. */ - *dest = push_node(reader, SERD_CURIE, "", 0); + if (!(*dest = push_node(reader, SERD_CURIE, "", 0))) { + return false; + } + const SerdStatus st = read_PN_PREFIX(reader, *dest); bool ate_dot = false; - SerdNode* node = deref(reader, *dest); + SerdNode* node = *dest; if (!st && node->n_bytes == 1 && serd_node_get_string(node)[0] == 'a' && is_token_end(peek_byte(reader))) { @@ -851,34 +875,36 @@ read_verb(SerdReader* reader, Ref* dest) return true; } -static Ref +static SerdNode* read_BLANK_NODE_LABEL(SerdReader* reader, bool* ate_dot) { eat_byte_safe(reader, '_'); eat_byte_check(reader, ':'); - Ref ref = push_node(reader, SERD_BLANK, - reader->bprefix ? reader->bprefix : "", - reader->bprefix_len); + SerdNode* n = push_node(reader, SERD_BLANK, + reader->bprefix ? reader->bprefix : "", + reader->bprefix_len); + if (!n) { + return NULL; + } uint8_t c = peek_byte(reader); // First: (PN_CHARS | '_' | [0-9]) if (is_digit(c) || c == '_') { - push_byte(reader, ref, eat_byte_safe(reader, c)); - } else if (read_PN_CHARS(reader, ref)) { + push_byte(reader, n, eat_byte_safe(reader, c)); + } else if (read_PN_CHARS(reader, n)) { r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid name start character\n"); - return pop_node(reader, ref); + return pop_node(reader, n); } while ((c = peek_byte(reader))) { // Middle: (PN_CHARS | '.')* if (c == '.') { - push_byte(reader, ref, eat_byte_safe(reader, c)); - } else if (read_PN_CHARS(reader, ref)) { + push_byte(reader, n, eat_byte_safe(reader, c)); + } else if (read_PN_CHARS(reader, n)) { break; } } - SerdNode* n = deref(reader, ref); - char* buf = serd_node_buffer(n); - if (buf[n->n_bytes - 1] == '.' && read_PN_CHARS(reader, ref)) { + char* buf = serd_node_buffer(n); + if (buf[n->n_bytes - 1] == '.' && read_PN_CHARS(reader, n)) { // Ate trailing dot, pop it from stack/node and inform caller --n->n_bytes; serd_stack_pop(&reader->stack, 1); @@ -893,30 +919,31 @@ read_BLANK_NODE_LABEL(SerdReader* reader, bool* ate_dot) } else if (reader->seen_genid && buf[reader->bprefix_len] == 'B') { r_err(reader, SERD_ERR_ID_CLASH, "found both `b' and `B' blank IDs, prefix required\n"); - return pop_node(reader, ref); + return pop_node(reader, n); } } } - return ref; + return n; } -static Ref +static SerdNode* read_blankName(SerdReader* reader) { eat_byte_safe(reader, '='); if (eat_byte_check(reader, '=') != '=') { - return r_err(reader, SERD_ERR_BAD_SYNTAX, "expected `='\n"); + r_err(reader, SERD_ERR_BAD_SYNTAX, "expected `='\n"); + return NULL; } - Ref subject = 0; - bool ate_dot = false; + SerdNode* subject = 0; + bool ate_dot = false; read_ws_star(reader); read_iri(reader, &subject, &ate_dot); return subject; } static bool -read_anon(SerdReader* reader, ReadContext ctx, bool subject, Ref* dest) +read_anon(SerdReader* reader, ReadContext ctx, bool subject, SerdNode** dest) { const SerdStatementFlags old_flags = *ctx.flags; bool empty; @@ -953,7 +980,7 @@ read_anon(SerdReader* reader, ReadContext ctx, bool subject, Ref* dest) } read_ws_star(reader); if (reader->sink->end) { - reader->sink->end(reader->sink->handle, deref(reader, *dest)); + reader->sink->end(reader->sink->handle, *dest); } *ctx.flags = old_flags; } @@ -975,10 +1002,9 @@ read_object(SerdReader* reader, ReadContext* ctx, bool emit, bool* ate_dot) bool ret = false; bool simple = (ctx->subject != 0); - SerdNode* node = NULL; - Ref o = 0; - Ref datatype = 0; - Ref lang = 0; + SerdNode* o = 0; + SerdNode* datatype = 0; + SerdNode* lang = 0; uint32_t flags = 0; const uint8_t c = peek_byte(reader); if (!fancy_syntax(reader)) { @@ -1017,17 +1043,16 @@ read_object(SerdReader* reader, ReadContext* ctx, bool emit, bool* ate_dot) /* Either a boolean literal, or a qname. Read the prefix first, and if it is in fact a "true" or "false" literal, produce that instead. */ - o = push_node(reader, SERD_CURIE, "", 0); + TRY_THROW(o = push_node(reader, SERD_CURIE, "", 0)); while (!read_PN_CHARS_BASE(reader, o)) {} - node = deref(reader, o); - if ((node->n_bytes == 4 && - !memcmp(serd_node_get_string(node), "true", 4)) || - (node->n_bytes == 5 && - !memcmp(serd_node_get_string(node), "false", 5))) { - flags = flags | SERD_HAS_DATATYPE; - node->type = SERD_LITERAL; - datatype = push_node( - reader, SERD_URI, XSD_BOOLEAN, XSD_BOOLEAN_LEN); + if ((o->n_bytes == 4 && + !memcmp(serd_node_get_string(o), "true", 4)) || + (o->n_bytes == 5 && + !memcmp(serd_node_get_string(o), "false", 5))) { + flags = flags | SERD_HAS_DATATYPE; + o->type = SERD_LITERAL; + TRY_THROW(datatype = push_node( + reader, SERD_URI, XSD_BOOLEAN, XSD_BOOLEAN_LEN)); ret = true; } else if (read_PN_PREFIX_tail(reader, o) > SERD_FAILURE) { ret = false; @@ -1039,7 +1064,7 @@ read_object(SerdReader* reader, ReadContext* ctx, bool emit, bool* ate_dot) } if (simple && o) { - deref(reader, o)->flags = flags; + o->flags = flags; } if (ret && emit && simple) { @@ -1112,7 +1137,7 @@ read_predicateObjectList(SerdReader* reader, ReadContext ctx, bool* ate_dot) } static bool -end_collection(SerdReader* reader, ReadContext ctx, Ref n1, Ref n2, bool ret) +end_collection(SerdReader* reader, ReadContext ctx, SerdNode* n1, SerdNode* n2, bool ret) { pop_node(reader, n2); pop_node(reader, n1); @@ -1121,7 +1146,7 @@ end_collection(SerdReader* reader, ReadContext ctx, Ref n1, Ref n2, bool ret) } static bool -read_collection(SerdReader* reader, ReadContext ctx, Ref* dest) +read_collection(SerdReader* reader, ReadContext ctx, SerdNode** dest) { eat_byte_safe(reader, '('); bool end = peek_delim(reader, ')'); @@ -1141,10 +1166,14 @@ read_collection(SerdReader* reader, ReadContext ctx, Ref* dest) /* The order of node allocation here is necessarily not in stack order, so we create two nodes and recycle them throughout. */ - Ref n1 = push_node_padded(reader, genid_size(reader), SERD_BLANK, "", 0); - Ref n2 = 0; - Ref node = n1; - Ref rest = 0; + SerdNode* n1 = push_node_padded(reader, genid_size(reader), SERD_BLANK, "", 0); + SerdNode* n2 = 0; + SerdNode* node = n1; + SerdNode* rest = 0; + + if (!n1) { + return false; + } ctx.subject = *dest; while (!(end = peek_delim(reader, ')'))) { @@ -1179,8 +1208,8 @@ read_collection(SerdReader* reader, ReadContext ctx, Ref* dest) return end_collection(reader, ctx, n1, n2, true); } -static Ref -read_subject(SerdReader* reader, ReadContext ctx, Ref* dest, char* s_type) +static SerdNode* +read_subject(SerdReader* reader, ReadContext ctx, SerdNode** dest, char* s_type) { bool ate_dot = false; switch ((*s_type = peek_byte(reader))) { @@ -1199,11 +1228,11 @@ read_subject(SerdReader* reader, ReadContext ctx, Ref* dest, char* s_type) return ate_dot ? pop_node(reader, *dest) : *dest; } -static Ref +static SerdNode* read_labelOrSubject(SerdReader* reader, ReadContext ctx) { - Ref subject = 0; - bool ate_dot = false; + SerdNode* subject = 0; + bool ate_dot = false; switch (peek_byte(reader)) { case '[': eat_byte_safe(reader, '['); @@ -1244,11 +1273,12 @@ read_base(SerdReader* reader, bool sparql, bool token) TRY_RET(eat_string(reader, "base", 4)); } - Ref uri; read_ws_star(reader); - TRY_RET(uri = read_IRIREF(reader)); - if (reader->sink->base) { - reader->sink->base(reader->sink->handle, deref(reader, uri)); + SerdNode* uri = read_IRIREF(reader); + if (!uri) { + return false; + } else if (reader->sink->base) { + reader->sink->base(reader->sink->handle, uri); } pop_node(reader, uri); @@ -1270,9 +1300,11 @@ read_prefixID(SerdReader* reader, bool sparql, bool token) } read_ws_star(reader); - bool ret = true; - Ref name = push_node(reader, SERD_LITERAL, "", 0); - if (read_PN_PREFIX(reader, name) > SERD_FAILURE) { + bool ret = true; + SerdNode* name = push_node(reader, SERD_LITERAL, "", 0); + if (!name) { + return false; + } else if (read_PN_PREFIX(reader, name) > SERD_FAILURE) { return pop_node(reader, name); } @@ -1281,16 +1313,14 @@ read_prefixID(SerdReader* reader, bool sparql, bool token) } read_ws_star(reader); - const Ref uri = read_IRIREF(reader); + const SerdNode* uri = read_IRIREF(reader); if (!uri) { pop_node(reader, name); return false; } if (reader->sink->prefix) { - ret = !reader->sink->prefix(reader->sink->handle, - deref(reader, name), - deref(reader, uri)); + ret = !reader->sink->prefix(reader->sink->handle, name, uri); } pop_node(reader, uri); pop_node(reader, name); @@ -1333,7 +1363,7 @@ read_wrappedGraph(SerdReader* reader, ReadContext* ctx) read_ws_star(reader); while (peek_byte(reader) != '}') { ctx->subject = 0; - Ref subj = read_subject(reader, *ctx, &ctx->subject, &s_type); + SerdNode* subj = read_subject(reader, *ctx, &ctx->subject, &s_type); if (!subj && ctx->subject) { return r_err(reader, SERD_ERR_BAD_SYNTAX, "bad subject\n"); } else if (!subj) { @@ -1353,9 +1383,8 @@ read_wrappedGraph(SerdReader* reader, ReadContext* ctx) } static int -tokcmp(SerdReader* reader, Ref ref, const char* tok, size_t n) +tokcmp(SerdReader* reader, SerdNode* node, const char* tok, size_t n) { - SerdNode* node = deref(reader, ref); if (!node || node->n_bytes != n) { return -1; } @@ -1367,7 +1396,7 @@ read_n3_statement(SerdReader* reader) { SerdStatementFlags flags = 0; ReadContext ctx = { 0, 0, 0, 0, 0, 0, &flags }; - Ref subj = 0; + SerdNode* subj = 0; bool ate_dot = false; char s_type = 0; bool ret = true; diff --git a/src/reader.c b/src/reader.c index 1233bb19..31ee5287 100644 --- a/src/reader.c +++ b/src/reader.c @@ -43,9 +43,8 @@ r_err(SerdReader* reader, SerdStatus st, const char* fmt, ...) } void -set_blank_id(SerdReader* reader, Ref ref, size_t buf_size) +set_blank_id(SerdReader* reader, SerdNode* node, size_t buf_size) { - SerdNode* node = deref(reader, ref); char* buf = (char*)(node + 1); const char* prefix = reader->bprefix ? (const char*)reader->bprefix : ""; node->n_bytes = snprintf(buf, buf_size, "%sb%u", prefix, reader->next_id++); @@ -57,21 +56,29 @@ genid_size(SerdReader* reader) return reader->bprefix_len + 1 + 10 + 1; // + "b" + UINT32_MAX + \0 } -Ref +SerdNode* blank_id(SerdReader* reader) { - Ref ref = push_node_padded(reader, genid_size(reader), SERD_BLANK, "", 0); - set_blank_id(reader, ref, genid_size(reader)); + SerdNode* ref = push_node_padded( + reader, genid_size(reader), SERD_BLANK, "", 0); + if (ref) { + set_blank_id(reader, ref, genid_size(reader)); + } return ref; } -Ref +SerdNode* push_node_padded(SerdReader* reader, size_t maxlen, SerdType type, const char* str, size_t n_bytes) { void* mem = serd_stack_push_aligned( &reader->stack, sizeof(SerdNode) + maxlen + 1, sizeof(SerdNode)); + if (!mem) { + reader->status = SERD_ERR_OVERFLOW; + return NULL; + } + SerdNode* const node = (SerdNode*)mem; node->n_bytes = n_bytes; node->flags = 0; @@ -85,49 +92,45 @@ push_node_padded(SerdReader* reader, size_t maxlen, reader->allocs, sizeof(reader->allocs) * (++reader->n_allocs)); reader->allocs[reader->n_allocs - 1] = (mem - reader->stack.buf); #endif - return (char*)node - reader->stack.buf; + return node; } -Ref +SerdNode* push_node(SerdReader* reader, SerdType type, const char* str, size_t n_bytes) { return push_node_padded(reader, n_bytes, type, str, n_bytes); } SerdNode* -deref(SerdReader* reader, const Ref ref) +pop_node(SerdReader* reader, const SerdNode* node) { - return ref ? (SerdNode*)(reader->stack.buf + ref) : NULL; -} - -Ref -pop_node(SerdReader* reader, Ref ref) -{ - if (ref && ref != reader->rdf_first && ref != reader->rdf_rest - && ref != reader->rdf_nil) { + if (node && node != reader->rdf_first && node != reader->rdf_rest + && node != reader->rdf_nil) { #ifdef SERD_STACK_CHECK - SERD_STACK_ASSERT_TOP(reader, ref); + SERD_STACK_ASSERT_TOP(reader, node); --reader->n_allocs; #endif - SerdNode* const node = deref(reader, ref); - char* const top = reader->stack.buf + reader->stack.size; + char* const top = reader->stack.buf + reader->stack.size; serd_stack_pop_aligned(&reader->stack, top - (char*)node); } - return 0; + return NULL; } bool -emit_statement(SerdReader* reader, ReadContext ctx, Ref o, Ref d, Ref l) +emit_statement(SerdReader* reader, + ReadContext ctx, + SerdNode* o, + SerdNode* d, + SerdNode* l) { - SerdNode* graph = deref(reader, ctx.graph); + SerdNode* graph = ctx.graph; if (!graph && reader->default_graph) { graph = reader->default_graph; } bool ret = !reader->sink->statement || !reader->sink->statement( reader->sink->handle, *ctx.flags, graph, - deref(reader, ctx.subject), deref(reader, ctx.predicate), - deref(reader, o)); + ctx.subject, ctx.predicate, o); *ctx.flags &= SERD_ANON_CONT|SERD_LIST_CONT; // Preserve only cont flags return ret; } @@ -157,15 +160,16 @@ serd_reader_read_document(SerdReader* reader) } SerdReader* -serd_reader_new(SerdWorld* world, +serd_reader_new(SerdWorld* world, SerdSyntax syntax, - const SerdSinkInterface* sink) + const SerdSinkInterface* sink, + size_t stack_size) { SerdReader* me = (SerdReader*)calloc(1, sizeof(SerdReader)); me->world = world; me->sink = sink; me->default_graph = NULL; - me->stack = serd_stack_new(SERD_PAGE_SIZE); + me->stack = serd_stack_new(stack_size); me->syntax = syntax; me->next_id = 1; me->strict = true; diff --git a/src/reader.h b/src/reader.h index abae5d92..92baac3f 100644 --- a/src/reader.h +++ b/src/reader.h @@ -35,18 +35,13 @@ # define SERD_STACK_ASSERT_TOP(reader, ref) #endif -/* Reference to a node in the stack (we can not use pointers since the - stack may be reallocated, invalidating any pointers to elements). -*/ -typedef size_t Ref; - typedef struct { - Ref graph; - Ref subject; - Ref predicate; - Ref object; - Ref datatype; - Ref lang; + SerdNode* graph; + SerdNode* subject; + SerdNode* predicate; + SerdNode* object; + SerdNode* datatype; + SerdNode* lang; SerdStatementFlags* flags; } ReadContext; @@ -55,9 +50,9 @@ struct SerdReaderImpl { const SerdSinkInterface* sink; SerdErrorSink error_sink; void* error_handle; - Ref rdf_first; - Ref rdf_rest; - Ref rdf_nil; + SerdNode* rdf_first; + SerdNode* rdf_rest; + SerdNode* rdf_nil; SerdNode* default_graph; SerdByteSource source; SerdStack stack; @@ -70,33 +65,35 @@ struct SerdReaderImpl { bool strict; ///< True iff strict parsing bool seen_genid; #ifdef SERD_STACK_CHECK - Ref* allocs; ///< Stack of push offsets + SerdNode** allocs; ///< Stack of push offsets size_t n_allocs; ///< Number of stack pushes #endif }; int r_err(SerdReader* reader, SerdStatus st, const char* fmt, ...); -Ref push_node_padded(SerdReader* reader, - size_t maxlen, - SerdType type, - const char* str, - size_t n_bytes); - -Ref push_node(SerdReader* reader, - SerdType type, - const char* str, - size_t n_bytes); +SerdNode* push_node_padded(SerdReader* reader, + size_t maxlen, + SerdType type, + const char* str, + size_t n_bytes); -size_t genid_size(SerdReader* reader); -Ref blank_id(SerdReader* reader); -void set_blank_id(SerdReader* reader, Ref ref, size_t buf_size); +SerdNode* push_node(SerdReader* reader, + SerdType type, + const char* str, + size_t n_bytes); -SerdNode* deref(SerdReader* reader, Ref ref); +size_t genid_size(SerdReader* reader); +SerdNode* blank_id(SerdReader* reader); +void set_blank_id(SerdReader* reader, SerdNode* node, size_t buf_size); -Ref pop_node(SerdReader* reader, Ref ref); +SerdNode* pop_node(SerdReader* reader, const SerdNode* node); -bool emit_statement(SerdReader* reader, ReadContext ctx, Ref o, Ref d, Ref l); +bool emit_statement(SerdReader* reader, + ReadContext ctx, + SerdNode* o, + SerdNode* d, + SerdNode* l); bool read_n3_statement(SerdReader* reader); SerdStatus read_nquadsDoc(SerdReader* reader); @@ -149,23 +146,31 @@ eat_string(SerdReader* reader, const char* str, unsigned n) } static inline SerdStatus -push_byte(SerdReader* reader, Ref ref, const uint8_t c) +push_byte(SerdReader* reader, SerdNode* node, const uint8_t c) { SERD_STACK_ASSERT_TOP(reader, ref); - char* const s = (char*)serd_stack_push(&reader->stack, 1); - SerdNode* const node = (SerdNode*)(reader->stack.buf + ref); + char* const s = (char*)serd_stack_push(&reader->stack, 1); + if (!s) { + return SERD_ERR_OVERFLOW; + } + ++node->n_bytes; *(s - 1) = c; *s = '\0'; return SERD_SUCCESS; } -static inline void -push_bytes(SerdReader* reader, Ref ref, const uint8_t* bytes, unsigned len) +static inline SerdStatus +push_bytes(SerdReader* reader, SerdNode* ref, const uint8_t* bytes, unsigned len) { + if (reader->stack.buf_size < reader->stack.size + len) { + return SERD_ERR_OVERFLOW; + } + for (unsigned i = 0; i < len; ++i) { push_byte(reader, ref, bytes[i]); } + return SERD_SUCCESS; } #endif // SERD_READER_H diff --git a/src/serdi.c b/src/serdi.c index 1f12812b..20690c37 100644 --- a/src/serdi.c +++ b/src/serdi.c @@ -20,9 +20,11 @@ #include "serd/serd.h" +#include <limits.h> #include <stdbool.h> +#include <stdint.h> #include <stdio.h> -#include <string.h> +#include <stdlib.h> #define SERDI_ERROR(msg) fprintf(stderr, "serdi: " msg); #define SERDI_ERRORF(fmt, ...) fprintf(stderr, "serdi: " fmt, __VA_ARGS__); @@ -135,6 +137,7 @@ main(int argc, char** argv) bool lax = false; bool use_model = false; bool quiet = false; + long stack_size = 4194304; const char* add_prefix = NULL; const char* chop_prefix = NULL; const char* root_uri = NULL; @@ -171,6 +174,15 @@ main(int argc, char** argv) } else if (!(input_syntax = get_syntax(argv[a]))) { return print_usage(argv[0], true); } + } else if (argv[a][1] == 'k') { + if (++a == argc) { + return missing_arg(argv[0], 'k'); + } + stack_size = strtol(argv[a], NULL, 10); + if (stack_size <= 0 || stack_size == LONG_MAX) { + SERDI_ERRORF("stack size `%ld' out of range\n", stack_size); + return 1; + } } else if (argv[a][1] == 'o') { if (++a == argc) { return missing_arg(argv[0], 'o'); @@ -263,7 +275,7 @@ main(int argc, char** argv) sink = serd_writer_get_sink_interface(writer); } - reader = serd_reader_new(world, input_syntax, sink); + reader = serd_reader_new(world, input_syntax, sink, stack_size); serd_reader_set_strict(reader, !lax); if (quiet) { serd_world_set_error_sink(world, quiet_error_sink, NULL); diff --git a/src/stack.h b/src/stack.h index 122b3b14..3848af66 100644 --- a/src/stack.h +++ b/src/stack.h @@ -61,8 +61,7 @@ serd_stack_push(SerdStack* stack, size_t n_bytes) { const size_t new_size = stack->size + n_bytes; if (stack->buf_size < new_size) { - stack->buf_size += (stack->buf_size >> 1); // *= 1.5 - stack->buf = (char*)realloc(stack->buf, stack->buf_size); + return NULL; } char* const ret = (stack->buf + stack->size); stack->size = new_size; @@ -80,12 +79,16 @@ static inline void* serd_stack_push_aligned(SerdStack* stack, size_t n_bytes, size_t align) { // Push one byte to ensure space for a pad count - serd_stack_push(stack, 1); + if (!serd_stack_push(stack, 1)) { + return NULL; + } // Push padding if necessary const uint8_t pad = align - stack->size % align; if (pad > 0) { - serd_stack_push(stack, pad); + if (!serd_stack_push(stack, pad)) { + return NULL; + } } // Set top of stack to pad count so we can properly pop later diff --git a/src/string.c b/src/string.c index 3ca5bd98..b32df6e3 100644 --- a/src/string.c +++ b/src/string.c @@ -36,6 +36,7 @@ serd_strerror(SerdStatus status) case SERD_ERR_ID_CLASH: return "Blank node ID clash"; case SERD_ERR_BAD_CURIE: return "Invalid CURIE"; case SERD_ERR_INTERNAL: return "Internal error"; + case SERD_ERR_OVERFLOW: return "Stack overflow"; } return "Unknown error"; // never reached } diff --git a/src/writer.c b/src/writer.c index 2f17d86d..f4f60413 100644 --- a/src/writer.c +++ b/src/writer.c @@ -877,7 +877,7 @@ serd_writer_new(SerdWorld* world, writer->env = env; writer->root_node = NULL; writer->root_uri = SERD_URI_NULL; - writer->anon_stack = serd_stack_new(4 * sizeof(WriteContext)); + writer->anon_stack = serd_stack_new(SERD_PAGE_SIZE); writer->context = context; writer->list_subj = NULL; writer->empty = true; diff --git a/tests/read_chunk_test.c b/tests/read_chunk_test.c index 1e41ca8a..82cb4277 100644 --- a/tests/read_chunk_test.c +++ b/tests/read_chunk_test.c @@ -61,7 +61,7 @@ main(int argc, char** argv) { SerdWorld* world = serd_world_new(); SerdSinkInterface sink = { 0, on_base, on_prefix, on_statement, on_end }; - SerdReader* reader = serd_reader_new(world, SERD_TURTLE, &sink); + SerdReader* reader = serd_reader_new(world, SERD_TURTLE, &sink, 4096); if (!reader) { FAIL("Failed to create reader\n"); } diff --git a/tests/serd_test.c b/tests/serd_test.c index e63e9594..5bb7db12 100644 --- a/tests/serd_test.c +++ b/tests/serd_test.c @@ -675,7 +675,7 @@ main(void) ReaderTest rt = { 0, NULL }; SerdSinkInterface sink = { &rt, NULL, NULL, test_sink, NULL }; - SerdReader* reader = serd_reader_new(world, SERD_TURTLE, &sink); + SerdReader* reader = serd_reader_new(world, SERD_TURTLE, &sink, 4096); if (!reader) { FAIL("Failed to create reader\n"); } |