diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/base64.c | 4 | ||||
-rw-r--r-- | src/n3.c | 221 | ||||
-rw-r--r-- | src/node.c | 10 | ||||
-rw-r--r-- | src/node.h | 36 | ||||
-rw-r--r-- | src/reader.c | 30 | ||||
-rw-r--r-- | src/reader.h | 14 | ||||
-rw-r--r-- | src/serd_config.h | 2 | ||||
-rw-r--r-- | src/stack.h | 5 | ||||
-rw-r--r-- | src/string_utils.h | 6 | ||||
-rw-r--r-- | src/try.h | 2 | ||||
-rw-r--r-- | src/writer.c | 78 |
11 files changed, 197 insertions, 211 deletions
diff --git a/src/base64.c b/src/base64.c index b12db9ff..07fdcbfd 100644 --- a/src/base64.c +++ b/src/base64.c @@ -51,7 +51,7 @@ encode_chunk(uint8_t out[4], const uint8_t in[3], const size_t n_in) size_t serd_base64_get_length(const size_t size, const bool wrap_lines) { - return (size + 2) / 3 * 4 + (wrap_lines * ((size - 1) / 57)); + return ((size + 2) / 3 * 4) + (wrap_lines * ((size - 1) / 57)); } bool @@ -102,7 +102,7 @@ serd_base64_decode(const uint8_t* const str, assert(str); assert(size); - void* buf = malloc((len * 3) / 4 + 2); + void* buf = malloc(((len * 3) / 4) + 2); *size = 0; for (size_t i = 0, j = 0; i < len; j += 3) { @@ -1,9 +1,8 @@ -// Copyright 2011-2023 David Robillard <d@drobilla.net> +// Copyright 2011-2025 David Robillard <d@drobilla.net> // SPDX-License-Identifier: ISC #include "reader.h" #include "serd_internal.h" -#include "stack.h" #include "string_utils.h" #include "try.h" #include "uri_utils.h" @@ -46,7 +45,7 @@ read_HEX(SerdReader* const reader) return (uint8_t)eat_byte_safe(reader, c); } - r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid hexadecimal digit '%c'\n", c); + r_err_char(reader, "hexadecimal", c); return 0; } @@ -354,11 +353,11 @@ read_STRING_LITERAL_LONG(SerdReader* const reader, push_byte(reader, ref, c); st = read_character(reader, ref, flags, (uint8_t)q2); } - } else if (c == EOF) { - st = r_err(reader, SERD_ERR_BAD_SYNTAX, "end of file in long string\n"); - } else { + } else if (c > 0) { st = read_character(reader, ref, flags, (uint8_t)eat_byte_safe(reader, c)); + } else { + return r_err(reader, SERD_ERR_BAD_SYNTAX, "end of file in long string\n"); } } @@ -457,7 +456,7 @@ read_PN_CHARS_BASE(SerdReader* const reader, const Ref dest) return push_byte(reader, dest, eat_byte_safe(reader, c)); } - if (c == EOF || !(c & 0x80)) { + if (c < 0x80) { return SERD_FAILURE; } @@ -465,11 +464,7 @@ read_PN_CHARS_BASE(SerdReader* const reader, const Ref dest) read_utf8_code(reader, dest, &code, (uint8_t)c); if (!is_PN_CHARS_BASE(code)) { - r_err( - reader, SERD_ERR_BAD_SYNTAX, "invalid character U+%04X in name\n", code); - if (reader->strict) { - return SERD_ERR_BAD_SYNTAX; - } + st = r_err_char(reader, "name", (int)code); } return st; @@ -493,7 +488,7 @@ read_PN_CHARS(SerdReader* const reader, const Ref dest) return push_byte(reader, dest, eat_byte_safe(reader, c)); } - if (c == EOF || !(c & 0x80)) { + if (c < 0x80) { return SERD_FAILURE; } @@ -501,8 +496,7 @@ read_PN_CHARS(SerdReader* const reader, const Ref dest) TRY(st, read_utf8_code(reader, dest, &code, (uint8_t)c)); if (!is_PN_CHARS(code)) { - return r_err( - reader, SERD_ERR_BAD_SYNTAX, "invalid character U+%04X in name\n", code); + st = r_err_char(reader, "name", (int)code); } return st; @@ -531,7 +525,7 @@ read_PN_LOCAL_ESC(SerdReader* const reader, const Ref dest) return ((c == '!') || in_range(c, '#', '/') || (c == ';') || (c == '=') || (c == '?') || (c == '@') || (c == '_') || (c == '~')) ? push_byte(reader, dest, eat_byte_safe(reader, c)) - : r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid escape\n"); + : r_err(reader, SERD_ERR_BAD_SYNTAX, "bad escape\n"); } static SerdStatus @@ -589,9 +583,7 @@ read_PN_LOCAL(SerdReader* const reader, const Ref dest, bool* const ate_dot) SerdNode* const n = deref(reader, dest); if (trailing_unescaped_dot) { // Ate trailing dot, pop it from stack/node and inform caller - --n->n_bytes; - serd_stack_pop(&reader->stack, 1); - *ate_dot = true; + *ate_dot = pop_last_node_char(reader, n); } return (st > SERD_FAILURE) ? st : SERD_SUCCESS; @@ -599,31 +591,37 @@ read_PN_LOCAL(SerdReader* const reader, const Ref dest, bool* const ate_dot) // Read the remainder of a PN_PREFIX after some initial characters static SerdStatus -read_PN_PREFIX_tail(SerdReader* const reader, const Ref dest) +read_PN_PREFIX_tail(SerdReader* const reader, + const Ref dest, + bool* const ate_dot) { - int c = 0; - while ((c = peek_byte(reader)) > 0) { // Middle: (PN_CHARS | '.')* + SerdStatus st = SERD_SUCCESS; + bool trailing_unescaped_dot = false; + + while (!st) { // Middle: (PN_CHARS | '.')* + const int c = peek_byte(reader); if (c == '.') { push_byte(reader, dest, eat_byte_safe(reader, c)); - } else if (read_PN_CHARS(reader, dest)) { - break; + trailing_unescaped_dot = true; + } else if (!(st = read_PN_CHARS(reader, dest))) { + trailing_unescaped_dot = false; } } - const SerdNode* const n = deref(reader, dest); - if (n->buf[n->n_bytes - 1] == '.' && read_PN_CHARS(reader, dest)) { - return r_err(reader, SERD_ERR_BAD_SYNTAX, "prefix ends with '.'\n"); + if (trailing_unescaped_dot) { + SerdNode* const n = deref(reader, dest); + *ate_dot = pop_last_node_char(reader, n); } - return SERD_SUCCESS; + return st; } static SerdStatus -read_PN_PREFIX(SerdReader* const reader, const Ref dest) +read_PN_PREFIX(SerdReader* const reader, const Ref dest, bool* const ate_dot) { const SerdStatus st = read_PN_CHARS_BASE(reader, dest); - return st ? st : read_PN_PREFIX_tail(reader, dest); + return st ? st : read_PN_PREFIX_tail(reader, dest, ate_dot); } static SerdStatus @@ -631,7 +629,7 @@ read_LANGTAG(SerdReader* const reader, Ref* const dest) { int c = peek_byte(reader); if (!is_alpha(c)) { - return r_err(reader, SERD_ERR_BAD_SYNTAX, "unexpected '%c'\n", c); + return r_err_char(reader, "language", c); } *dest = push_node(reader, SERD_LITERAL, "", 0); @@ -657,7 +655,7 @@ read_IRIREF_scheme(SerdReader* const reader, const Ref dest) { int c = peek_byte(reader); if (!is_alpha(c)) { - return r_err(reader, SERD_ERR_BAD_SYNTAX, "bad IRI scheme start '%c'\n", c); + return r_err_char(reader, "IRI scheme start", c); } while ((c = peek_byte(reader)) > 0) { @@ -666,11 +664,7 @@ read_IRIREF_scheme(SerdReader* const reader, const Ref dest) } if (!is_uri_scheme_char(c)) { - return r_err(reader, - SERD_ERR_BAD_SYNTAX, - "bad IRI scheme char U+%04X (%c)\n", - (unsigned)c, - (char)c); + return r_err_char(reader, "IRI scheme", c); } push_byte(reader, dest, eat_byte_safe(reader, c)); @@ -704,8 +698,7 @@ read_IRIREF(SerdReader* const reader, Ref* const dest) case '"': case '<': *dest = pop_node(reader, *dest); - return r_err( - reader, SERD_ERR_BAD_SYNTAX, "invalid IRI character '%c'\n", c); + return r_err_char(reader, "IRI", c); case '>': return SERD_SUCCESS; @@ -713,7 +706,7 @@ read_IRIREF(SerdReader* const reader, Ref* const dest) case '\\': if (read_UCHAR(reader, *dest, &code)) { *dest = pop_node(reader, *dest); - return r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid IRI escape\n"); + return r_err_char(reader, "IRI escape", c); } if (code == ' ' || code == '<' || code == '>') { @@ -731,11 +724,12 @@ read_IRIREF(SerdReader* const reader, Ref* const dest) case '|': case '}': *dest = pop_node(reader, *dest); - return r_err( - reader, SERD_ERR_BAD_SYNTAX, "invalid IRI character '%c'\n", c); + return r_err_char(reader, "IRI", c); default: - if (c <= 0x20) { + if (c <= 0) { + st = r_err(reader, SERD_ERR_BAD_SYNTAX, "unexpected end of file\n"); + } else if (c <= 0x20) { st = r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid IRI character (escape %%%02X)\n", @@ -743,8 +737,6 @@ read_IRIREF(SerdReader* const reader, Ref* const dest) if (!reader->strict) { st = SERD_FAILURE; push_byte(reader, *dest, c); - } else { - break; } } else if (!(c & 0x80)) { push_byte(reader, *dest, c); @@ -762,15 +754,10 @@ read_IRIREF(SerdReader* const reader, Ref* const dest) } static SerdStatus -read_PrefixedName(SerdReader* const reader, - const Ref dest, - const bool read_prefix, - bool* const ate_dot) +read_PrefixedName(SerdReader* const reader, const Ref dest, bool* const ate_dot) { SerdStatus st = SERD_SUCCESS; - if (read_prefix) { - TRY_FAILING(st, read_PN_PREFIX(reader, dest)); - } + TRY_FAILING(st, read_PN_PREFIX(reader, dest, ate_dot)); if (peek_byte(reader) != ':') { return SERD_FAILURE; @@ -825,18 +812,16 @@ read_number(SerdReader* const reader, // all other cases ::= ( '-' | '+' ) [0-9]+ ( . )? ( [0-9]+ )? ... TRY(st, read_0_9(reader, *dest, true)); if ((c = peek_byte(reader)) == '.') { - has_decimal = true; - // Annoyingly, dot can be end of statement, so tentatively eat skip_byte(reader, c); c = peek_byte(reader); if (!is_digit(c) && c != 'e' && c != 'E') { - *ate_dot = true; // Force caller to deal with stupid grammar - return SERD_SUCCESS; // Next byte is not a number character + *ate_dot = true; // Force caller to deal with silly grammar + } else { + has_decimal = true; + push_byte(reader, *dest, '.'); + read_0_9(reader, *dest, false); } - - push_byte(reader, *dest, '.'); - read_0_9(reader, *dest, false); } } c = peek_byte(reader); @@ -868,7 +853,7 @@ read_iri(SerdReader* const reader, Ref* const dest, bool* const ate_dot) } *dest = push_node(reader, SERD_CURIE, "", 0); - return read_PrefixedName(reader, *dest, true, ate_dot); + return read_PrefixedName(reader, *dest, ate_dot); } static SerdStatus @@ -920,29 +905,31 @@ read_verb(SerdReader* const reader, Ref* const dest) return read_IRIREF(reader, dest); } - /* Either a qname, or "a". Read the prefix first, and if it is in fact - "a", produce that instead. - */ - *dest = push_node(reader, SERD_CURIE, "", 0); + Ref p = push_node(reader, SERD_CURIE, "", 0); - SerdStatus st = read_PN_PREFIX(reader, *dest); - bool ate_dot = false; - const SerdNode* const node = deref(reader, *dest); - const int next = peek_byte(reader); - if (!st && node->n_bytes == 1 && node->buf[0] == 'a' && next != ':' && - !is_PN_CHARS_BASE((uint32_t)next)) { - pop_node(reader, *dest); - *dest = push_node(reader, SERD_URI, NS_RDF "type", 47); - return SERD_SUCCESS; + // Try to read as a prefixed name + bool ate_dot = false; + SerdStatus st = read_PrefixedName(reader, p, &ate_dot); + + if (st == SERD_FAILURE) { + // Check if this is actually the "a" shorthand + const SerdNode* const node = deref(reader, p); + if (node->n_bytes == 1 && node->buf[0] == 'a') { + pop_node(reader, p); + p = push_node(reader, SERD_URI, NS_RDF "type", 47); + st = SERD_SUCCESS; + } else { + st = SERD_ERR_BAD_SYNTAX; + } } - if (st > SERD_FAILURE || - (st = read_PrefixedName(reader, *dest, false, &ate_dot)) || ate_dot) { - *dest = pop_node(reader, *dest); - st = st > SERD_FAILURE ? st : SERD_ERR_BAD_SYNTAX; + if (st) { + pop_node(reader, p); + *dest = 0; return r_err(reader, st, "bad verb\n"); } + *dest = p; return SERD_SUCCESS; } @@ -981,9 +968,7 @@ read_BLANK_NODE_LABEL(SerdReader* const reader, SerdNode* n = deref(reader, ref); if (n->buf[n->n_bytes - 1] == '.' && read_PN_CHARS(reader, ref)) { // Ate trailing dot, pop it from stack/node and inform caller - --n->n_bytes; - serd_stack_pop(&reader->stack, 1); - *ate_dot = true; + *ate_dot = pop_last_node_char(reader, n); } if (fancy_syntax(reader)) { @@ -1078,6 +1063,40 @@ read_anon(SerdReader* const reader, : SERD_ERR_BAD_SYNTAX; } +// Read a "named" object: a boolean literal or a prefixed name +static SerdStatus +read_named_object(SerdReader* const reader, + Ref* const dest, + Ref* const datatype, + bool* const ate_dot) +{ + static const char* const XSD_BOOLEAN = NS_XSD "boolean"; + static const size_t XSD_BOOLEAN_LEN = 40; + + // Try to read as a prefixed name + const Ref o = push_node(reader, SERD_CURIE, "", 0); + SerdStatus st = read_PrefixedName(reader, o, ate_dot); + + if (st == SERD_FAILURE) { + // Check if this is actually a boolean literal + SerdNode* const node = deref(reader, o); + if ((node->n_bytes == 4 && !memcmp(node->buf, "true", 4)) || + (node->n_bytes == 5 && !memcmp(node->buf, "false", 5))) { + node->type = SERD_LITERAL; + *datatype = push_node(reader, SERD_URI, XSD_BOOLEAN, XSD_BOOLEAN_LEN); + st = SERD_SUCCESS; + } + } + + if (st) { + pop_node(reader, o); + return r_err(reader, SERD_ERR_BAD_SYNTAX, "expected prefixed name\n"); + } + + *dest = o; + return SERD_SUCCESS; +} + /* If emit is true: recurses, calling statement_sink for every statement encountered, and leaves stack in original calling state (i.e. pops everything it pushes). */ @@ -1087,9 +1106,6 @@ read_object(SerdReader* const reader, const bool emit, bool* const ate_dot) { - static const char* const XSD_BOOLEAN = NS_XSD "boolean"; - static const size_t XSD_BOOLEAN_LEN = 40; - #ifndef NDEBUG const size_t orig_stack_size = reader->stack.size; #endif @@ -1097,7 +1113,6 @@ read_object(SerdReader* const reader, SerdStatus st = SERD_FAILURE; bool simple = (ctx->subject != 0); - SerdNode* node = NULL; Ref o = 0; Ref datatype = 0; Ref lang = 0; @@ -1147,27 +1162,8 @@ read_object(SerdReader* const reader, st = read_literal(reader, &o, &datatype, &lang, &flags, ate_dot); break; default: - /* Either a boolean literal, or a qname. Read the prefix first, and if - it is in fact a "true" or "false" literal, produce that instead. - */ - o = push_node(reader, SERD_CURIE, "", 0); - while (!read_PN_CHARS_BASE(reader, o)) { - } - node = deref(reader, o); - if ((node->n_bytes == 4 && !memcmp(node->buf, "true", 4)) || - (node->n_bytes == 5 && !memcmp(node->buf, "false", 5))) { - node->type = SERD_LITERAL; - datatype = push_node(reader, SERD_URI, XSD_BOOLEAN, XSD_BOOLEAN_LEN); - st = SERD_SUCCESS; - } else if (read_PN_PREFIX_tail(reader, o) > SERD_FAILURE) { - st = SERD_ERR_BAD_SYNTAX; - } else { - if ((st = read_PrefixedName(reader, o, false, ate_dot))) { - st = st > SERD_FAILURE ? st : SERD_ERR_BAD_SYNTAX; - pop_node(reader, o); - return r_err(reader, st, "expected prefixed name\n"); - } - } + // Either a boolean literal or a prefixed name + st = read_named_object(reader, &o, &datatype, ate_dot); } if (!st && simple && o) { @@ -1319,7 +1315,10 @@ read_collection(SerdReader* const reader, ReadContext ctx, Ref* const dest) // _:node rdf:rest _:rest *ctx.flags |= SERD_LIST_CONT; ctx.predicate = reader->rdf_rest; - TRY(st, emit_statement(reader, ctx, (end ? reader->rdf_nil : rest), 0, 0)); + st = emit_statement(reader, ctx, (end ? reader->rdf_nil : rest), 0, 0); + if (st) { + break; + } ctx.subject = rest; // _:node = _:rest rest = node; // _:rest = (old)_:node @@ -1444,12 +1443,12 @@ read_prefixID(SerdReader* const reader, const bool sparql, const bool token) } read_ws_star(reader); - Ref name = push_node(reader, SERD_LITERAL, "", 0); - TRY_FAILING(st, read_PN_PREFIX(reader, name)); - - if (eat_byte_check(reader, ':') != ':') { + Ref name = push_node(reader, SERD_LITERAL, "", 0); + bool ate_dot = false; + TRY_FAILING(st, read_PN_PREFIX(reader, name, &ate_dot)); + if (ate_dot || eat_byte_check(reader, ':') != ':') { pop_node(reader, name); - return SERD_ERR_BAD_SYNTAX; + return r_err(reader, SERD_ERR_BAD_SYNTAX, "expected a prefix name\n"); } read_ws_star(reader); @@ -1,8 +1,6 @@ // Copyright 2011-2023 David Robillard <d@drobilla.net> // SPDX-License-Identifier: ISC -#include "node.h" - #include "base64.h" #include "string_utils.h" @@ -17,6 +15,12 @@ #include <stdlib.h> #include <string.h> +struct SerdNodeImpl { + size_t n_bytes; /**< Size in bytes (not including null) */ + SerdNodeFlags flags; /**< Node flags (e.g. string properties) */ + SerdType type; /**< Node type */ +}; + static size_t serd_uri_string_length(const SerdURI* const uri) { @@ -288,7 +292,7 @@ serd_node_new_decimal(const double d, const unsigned frac_digits) char* t = s - 1; uint64_t dec = (uint64_t)int_part; do { - *t-- = (char)('0' + dec % 10); + *t-- = (char)('0' + (dec % 10)); } while ((dec /= 10) > 0); *s++ = '.'; diff --git a/src/node.h b/src/node.h deleted file mode 100644 index ce53a1be..00000000 --- a/src/node.h +++ /dev/null @@ -1,36 +0,0 @@ -// Copyright 2011-2023 David Robillard <d@drobilla.net> -// SPDX-License-Identifier: ISC - -#ifndef SERD_SRC_NODE_H -#define SERD_SRC_NODE_H - -#include <serd/serd.h> - -#include <stddef.h> - -struct SerdNodeImpl { - size_t n_bytes; /**< Size in bytes (not including null) */ - SerdNodeFlags flags; /**< Node flags (e.g. string properties) */ - SerdType type; /**< Node type */ -}; - -static inline char* SERD_NONNULL -serd_node_buffer(SerdNode* const SERD_NONNULL node) -{ - return (char*)(node + 1); -} - -static inline const char* SERD_NONNULL -serd_node_buffer_c(const SerdNode* const SERD_NONNULL node) -{ - return (const char*)(node + 1); -} - -SerdNode* SERD_ALLOCATED -serd_node_malloc(size_t n_bytes, SerdNodeFlags flags, SerdType type); - -void -serd_node_set(SerdNode* SERD_NULLABLE* SERD_NONNULL dst, - const SerdNode* SERD_NULLABLE src); - -#endif // SERD_SRC_NODE_H diff --git a/src/reader.c b/src/reader.c index b2563c49..98500fcd 100644 --- a/src/reader.c +++ b/src/reader.c @@ -30,6 +30,17 @@ r_err(SerdReader* const reader, const SerdStatus st, const char* const fmt, ...) return st; } +SerdStatus +r_err_char(SerdReader* const reader, const char* const kind, const int c) +{ + const SerdStatus st = SERD_ERR_BAD_SYNTAX; + + return (c < 0x20 || c == 0x7F) ? r_err(reader, st, "bad %s character\n", kind) + : (c == '\'' || c >= 0x80) + ? r_err(reader, st, "bad %s character U+%04X\n", kind, (uint32_t)c) + : r_err(reader, st, "bad %s character '%c'\n", kind, c); +} + void set_blank_id(SerdReader* const reader, const Ref ref, const size_t buf_size) { @@ -91,12 +102,16 @@ push_node_padded(SerdReader* const reader, uint8_t* buf = (uint8_t*)(node + 1); memcpy(buf, str, n_bytes + 1); + const Ref ref = (Ref)((uint8_t*)node - reader->stack.buf); + #ifdef SERD_STACK_CHECK - reader->allocs = (Ref*)realloc(reader->allocs, + reader->allocs = (Ref*)realloc(reader->allocs, sizeof(reader->allocs) * (++reader->n_allocs)); - reader->allocs[reader->n_allocs - 1] = ((uint8_t*)mem - reader->stack.buf); + + reader->allocs[reader->n_allocs - 1] = ref; #endif - return (Ref)((uint8_t*)node - reader->stack.buf); + + return ref; } Ref @@ -119,6 +134,14 @@ deref(SerdReader* const reader, const Ref ref) return NULL; } +bool +pop_last_node_char(SerdReader* const reader, SerdNode* const node) +{ + --node->n_bytes; + serd_stack_pop(&reader->stack, 1); + return true; +} + Ref pop_node(SerdReader* const reader, const Ref ref) { @@ -130,6 +153,7 @@ pop_node(SerdReader* const reader, const Ref ref) #endif SerdNode* const node = deref(reader, ref); const uint8_t* const top = reader->stack.buf + reader->stack.size; + assert(top > (uint8_t*)node); serd_stack_pop_aligned(&reader->stack, (size_t)(top - (uint8_t*)node)); } return 0; diff --git a/src/reader.h b/src/reader.h index d7b06a98..74255862 100644 --- a/src/reader.h +++ b/src/reader.h @@ -17,7 +17,7 @@ #ifdef SERD_STACK_CHECK # define SERD_STACK_ASSERT_TOP(reader, ref) \ - assert(ref == reader->allocs[reader->n_allocs - 1]); + assert(ref == reader->allocs[reader->n_allocs - 1]) #else # define SERD_STACK_ASSERT_TOP(reader, ref) #endif @@ -69,6 +69,9 @@ SERD_LOG_FUNC(3, 4) SerdStatus r_err(SerdReader* reader, SerdStatus st, const char* fmt, ...); +SerdStatus +r_err_char(SerdReader* reader, const char* kind, int c); + Ref push_node_padded(SerdReader* reader, size_t maxlen, @@ -91,6 +94,9 @@ set_blank_id(SerdReader* reader, Ref ref, size_t buf_size); SerdNode* deref(SerdReader* reader, Ref ref); +bool +pop_last_node_char(SerdReader* reader, SerdNode* node); + Ref pop_node(SerdReader* reader, Ref ref); @@ -127,7 +133,7 @@ skip_byte(SerdReader* const reader, const int byte) return serd_byte_source_advance(&reader->source); } -static inline int SERD_NODISCARD +SERD_NODISCARD static inline int eat_byte_safe(SerdReader* const reader, const int byte) { (void)byte; @@ -138,12 +144,12 @@ eat_byte_safe(SerdReader* const reader, const int byte) return byte; } -static inline int SERD_NODISCARD +SERD_NODISCARD static inline int eat_byte_check(SerdReader* const reader, const int byte) { const int c = peek_byte(reader); if (c != byte) { - r_err(reader, SERD_ERR_BAD_SYNTAX, "expected '%c', not '%c'\n", byte, c); + r_err(reader, SERD_ERR_BAD_SYNTAX, "expected '%c'\n", byte); return 0; } return eat_byte_safe(reader, byte); diff --git a/src/serd_config.h b/src/serd_config.h index cb356e40..d4165a20 100644 --- a/src/serd_config.h +++ b/src/serd_config.h @@ -36,7 +36,7 @@ #define SERD_SRC_SERD_CONFIG_H // Define version unconditionally so a warning will catch a mismatch -#define SERD_VERSION "0.32.4" +#define SERD_VERSION "0.32.5" #if !defined(SERD_NO_DEFAULT_CONFIG) diff --git a/src/stack.h b/src/stack.h index f82de9d2..63c6ba01 100644 --- a/src/stack.h +++ b/src/stack.h @@ -20,9 +20,6 @@ typedef struct { size_t size; ///< Conceptual size of stack in buf } SerdStack; -/** An offset to start the stack at. Note 0 is reserved for NULL. */ -#define SERD_STACK_BOTTOM sizeof(void*) - static inline SerdStack serd_stack_new(const size_t size) { @@ -79,7 +76,7 @@ serd_stack_push_aligned(SerdStack* const stack, serd_stack_push(stack, 1); // Push padding if necessary - const size_t pad = align - stack->size % align; + const size_t pad = align - (stack->size % align); serd_stack_push(stack, pad); // Set top of stack to pad count so we can properly pop later diff --git a/src/string_utils.h b/src/string_utils.h index 6a098991..f4b27bcc 100644 --- a/src/string_utils.h +++ b/src/string_utils.h @@ -51,13 +51,13 @@ is_xdigit(const int c) static inline bool is_space(const char c) { - return c == ' ' || (c >= '\t' && c <= '\r'); + return c == ' ' || in_range(c, '\t', '\r'); } static inline bool is_print(const int c) { - return c >= 0x20 && c <= 0x7E; + return in_range(c, 0x20, 0x7E); } static inline bool @@ -88,7 +88,7 @@ hex_digit_value(const uint8_t c) static inline char serd_to_upper(const char c) { - return (char)((c >= 'a' && c <= 'z') ? c - 32 : c); + return (char)(in_range(c, 'a', 'z') ? (c - 32) : c); } SERD_PURE_FUNC static inline int @@ -4,6 +4,8 @@ #ifndef SERD_SRC_TRY_H #define SERD_SRC_TRY_H +#include <serd/serd.h> + #define TRY(st, exp) \ do { \ if (((st) = (exp))) { \ diff --git a/src/writer.c b/src/writer.c index f0a9ad16..aa7fd980 100644 --- a/src/writer.c +++ b/src/writer.c @@ -1,4 +1,4 @@ -// Copyright 2011-2023 David Robillard <d@drobilla.net> +// Copyright 2011-2025 David Robillard <d@drobilla.net> // SPDX-License-Identifier: ISC #include "attributes.h" @@ -37,20 +37,12 @@ typedef enum { typedef struct { ContextType type; + bool comma_indented; SerdNode graph; SerdNode subject; SerdNode predicate; - bool predicates; - bool comma_indented; } WriteContext; -static const WriteContext WRITE_CONTEXT_NULL = {CTX_NAMED, - {0, 0, 0, 0, SERD_NOTHING}, - {0, 0, 0, 0, SERD_NOTHING}, - {0, 0, 0, 0, SERD_NOTHING}, - 0U, - 0U}; - typedef enum { SEP_NONE, ///< Sentinel after "nothing" SEP_NEWLINE, ///< Sentinel after a line end @@ -205,7 +197,7 @@ push_context(SerdWriter* const writer, *(WriteContext*)top = writer->context; // Update the current context - const WriteContext current = {type, graph, subject, predicate, 0U, 0U}; + const WriteContext current = {type, false, graph, subject, predicate}; writer->context = current; } @@ -238,7 +230,7 @@ sink(const void* const buf, const size_t len, SerdWriter* const writer) return written; } -SERD_NODISCARD static inline SerdStatus +SERD_NODISCARD static SerdStatus esink(const void* const buf, const size_t len, SerdWriter* const writer) { return sink(buf, len, writer) == len ? SERD_SUCCESS : SERD_ERR_BAD_WRITE; @@ -550,10 +542,13 @@ write_sep(SerdWriter* const writer, const Sep sep) : 0); } - // If this is the first comma, bump the increment for the following object + // Adjust indentation for object comma if necessary if (sep == SEP_END_O && !writer->context.comma_indented) { ++writer->indent; writer->context.comma_indented = true; + } else if (sep == SEP_END_P && writer->context.comma_indented) { + --writer->indent; + writer->context.comma_indented = false; } // Write newline or space before separator if necessary @@ -579,7 +574,6 @@ write_sep(SerdWriter* const writer, const Sep sep) // Reset context and write a blank line after ends of subjects if (sep == SEP_END_S) { writer->indent = writer->context.graph.type ? 1 : 0; - writer->context.predicates = false; writer->context.comma_indented = false; TRY(st, esink("\n", 1, writer)); } @@ -612,7 +606,6 @@ reset_context(SerdWriter* const writer, const unsigned flags) writer->context.type = CTX_NAMED; writer->context.subject.type = SERD_NOTHING; writer->context.predicate.type = SERD_NOTHING; - writer->context.predicates = false; writer->context.comma_indented = false; return SERD_SUCCESS; } @@ -864,7 +857,6 @@ write_pred(SerdWriter* const writer, TRY(st, write_sep(writer, SEP_P_O)); copy_node(&writer->context.predicate, pred); - writer->context.predicates = true; writer->context.comma_indented = false; return st; } @@ -926,10 +918,6 @@ serd_writer_write_statement(SerdWriter* const writer, SerdStatus st = SERD_SUCCESS; - if (!is_resource(subject) || !is_resource(predicate) || !object->buf) { - return SERD_ERR_BAD_ARG; - } - if ((flags & SERD_LIST_O_BEGIN) && !strcmp((const char*)object->buf, NS_RDF "nil")) { /* Tolerate LIST_O_BEGIN for "()" objects, even though it doesn't make @@ -938,6 +926,17 @@ serd_writer_write_statement(SerdWriter* const writer, flags &= (SerdStatementFlags)~SERD_LIST_O_BEGIN; } + // Refuse to write incoherent statements + if (!is_resource(subject) || !is_resource(predicate) || + object->type == SERD_NOTHING || !object->buf || + (datatype && datatype->buf && lang && lang->buf) || + ((flags & SERD_ANON_S_BEGIN) && (flags & SERD_LIST_S_BEGIN)) || + ((flags & SERD_EMPTY_S) && (flags & SERD_LIST_S_BEGIN)) || + ((flags & SERD_ANON_O_BEGIN) && (flags & SERD_LIST_O_BEGIN)) || + ((flags & SERD_EMPTY_O) && (flags & SERD_LIST_O_BEGIN))) { + return SERD_ERR_BAD_ARG; + } + // Simple case: write a line of NTriples or NQuads if (writer->syntax == SERD_NTRIPLES || writer->syntax == SERD_NQUADS) { TRY(st, write_node(writer, subject, NULL, NULL, FIELD_SUBJECT, flags)); @@ -953,23 +952,21 @@ serd_writer_write_statement(SerdWriter* const writer, return SERD_SUCCESS; } - SERD_DISABLE_NULL_WARNINGS - // Separate graphs if necessary - if ((graph && !serd_node_equals(graph, &writer->context.graph)) || - (!graph && writer->context.graph.type)) { + const SerdNode* const out_graph = writer->syntax == SERD_TRIG ? graph : NULL; + if ((out_graph && !serd_node_equals(out_graph, &writer->context.graph)) || + (!out_graph && writer->context.graph.type)) { TRY(st, terminate_context(writer)); reset_context(writer, RESET_GRAPH | RESET_INDENT); TRY(st, write_newline(writer)); - if (graph) { - TRY(st, write_node(writer, graph, datatype, lang, FIELD_GRAPH, flags)); + if (out_graph) { + TRY(st, + write_node(writer, out_graph, datatype, lang, FIELD_GRAPH, flags)); TRY(st, write_sep(writer, SEP_GRAPH_BEGIN)); - copy_node(&writer->context.graph, graph); + copy_node(&writer->context.graph, out_graph); } } - SERD_RESTORE_WARNINGS - if ((flags & SERD_LIST_CONT)) { // Continue a list if (!strcmp((const char*)predicate->buf, NS_RDF "first") && @@ -1003,11 +1000,6 @@ serd_writer_write_statement(SerdWriter* const writer, } else { // Elide S (write P and O) - if (writer->context.comma_indented) { - --writer->indent; - writer->context.comma_indented = false; - } - const bool first = !writer->context.predicate.type; TRY(st, write_sep(writer, first ? SEP_S_P : SEP_END_P)); TRY(st, write_pred(writer, flags, predicate)); @@ -1052,7 +1044,7 @@ serd_writer_write_statement(SerdWriter* const writer, const bool is_list = (flags & SERD_LIST_S_BEGIN); push_context(writer, is_list ? CTX_LIST : CTX_BLANK, - serd_node_copy(graph), + serd_node_copy(out_graph), serd_node_copy(subject), is_list ? SERD_NODE_NULL : serd_node_copy(predicate)); } @@ -1061,7 +1053,7 @@ serd_writer_write_statement(SerdWriter* const writer, // Push context for anonymous or list object if necessary push_context(writer, (flags & SERD_LIST_O_BEGIN) ? CTX_LIST : CTX_BLANK, - serd_node_copy(graph), + serd_node_copy(out_graph), serd_node_copy(object), SERD_NODE_NULL); } @@ -1089,14 +1081,11 @@ serd_writer_end_anon(SerdWriter* const writer, const SerdNode* const node) TRY(st, write_sep(writer, SEP_ANON_END)); pop_context(writer); - SERD_DISABLE_NULL_WARNINGS - if (node && serd_node_equals(node, &writer->context.subject)) { // Now-finished anonymous node is the new subject with no other context writer->context.predicate.type = SERD_NOTHING; } - SERD_RESTORE_WARNINGS return st; } @@ -1123,8 +1112,7 @@ serd_writer_new(const SerdSyntax syntax, assert(env); assert(ssink); - const WriteContext context = WRITE_CONTEXT_NULL; - SerdWriter* writer = (SerdWriter*)calloc(1, sizeof(SerdWriter)); + SerdWriter* writer = (SerdWriter*)calloc(1, sizeof(SerdWriter)); writer->syntax = syntax; writer->style = style; @@ -1133,7 +1121,6 @@ serd_writer_new(const SerdSyntax syntax, writer->root_uri = SERD_URI_NULL; writer->base_uri = base_uri ? *base_uri : SERD_URI_NULL; writer->anon_stack = serd_stack_new(SERD_PAGE_SIZE); - writer->context = context; writer->byte_sink = serd_byte_sink_new( ssink, stream, (style & SERD_STYLE_BULK) ? SERD_PAGE_SIZE : 1); @@ -1225,7 +1212,12 @@ serd_writer_set_prefix(SerdWriter* const writer, TRY(st, serd_env_set_prefix(writer->env, name, uri)); if (writer->syntax == SERD_TURTLE || writer->syntax == SERD_TRIG) { + const bool had_subject = writer->context.subject.type; TRY(st, terminate_context(writer)); + if (had_subject) { + TRY(st, esink("\n", 1, writer)); + } + TRY(st, esink("@prefix ", 8, writer)); TRY(st, esink(name->buf, name->n_bytes, writer)); TRY(st, esink(": <", 3, writer)); @@ -1244,9 +1236,7 @@ serd_writer_free(SerdWriter* const writer) return; } - SERD_DISABLE_NULL_WARNINGS serd_writer_finish(writer); - SERD_RESTORE_WARNINGS free_context(&writer->context); free_anon_stack(writer); serd_stack_free(&writer->anon_stack); |