diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/base64.c | 8 | ||||
-rw-r--r-- | src/base64.h | 2 | ||||
-rw-r--r-- | src/byte_sink.h | 10 | ||||
-rw-r--r-- | src/byte_source.c | 2 | ||||
-rw-r--r-- | src/byte_source.h | 6 | ||||
-rw-r--r-- | src/env.c | 2 | ||||
-rw-r--r-- | src/n3.c | 239 | ||||
-rw-r--r-- | src/node.c | 19 | ||||
-rw-r--r-- | src/node.h | 36 | ||||
-rw-r--r-- | src/reader.c | 37 | ||||
-rw-r--r-- | src/reader.h | 39 | ||||
-rw-r--r-- | src/serd_config.h | 2 | ||||
-rw-r--r-- | src/serd_internal.h | 6 | ||||
-rw-r--r-- | src/serdi.c | 30 | ||||
-rw-r--r-- | src/stack.h | 21 | ||||
-rw-r--r-- | src/string.c | 2 | ||||
-rw-r--r-- | src/string_utils.h | 16 | ||||
-rw-r--r-- | src/system.c | 1 | ||||
-rw-r--r-- | src/try.h | 2 | ||||
-rw-r--r-- | src/uri.c | 18 | ||||
-rw-r--r-- | src/uri_utils.h | 16 | ||||
-rw-r--r-- | src/writer.c | 267 |
22 files changed, 387 insertions, 394 deletions
diff --git a/src/base64.c b/src/base64.c index 42d296b3..07fdcbfd 100644 --- a/src/base64.c +++ b/src/base64.c @@ -6,7 +6,7 @@ #include "serd_internal.h" #include "string_utils.h" -#include "serd/serd.h" +#include <serd/serd.h> #include <assert.h> #include <stdbool.h> @@ -51,7 +51,7 @@ encode_chunk(uint8_t out[4], const uint8_t in[3], const size_t n_in) size_t serd_base64_get_length(const size_t size, const bool wrap_lines) { - return (size + 2) / 3 * 4 + (wrap_lines * ((size - 1) / 57)); + return ((size + 2) / 3 * 4) + (wrap_lines * ((size - 1) / 57)); } bool @@ -91,7 +91,7 @@ decode_chunk(const uint8_t in[4], uint8_t out[3]) out[0] = (uint8_t)(((unmap(in[0]) << 2)) | unmap(in[1]) >> 4); out[1] = (uint8_t)(((unmap(in[1]) << 4) & 0xF0) | unmap(in[2]) >> 2); out[2] = (uint8_t)(((unmap(in[2]) << 6) & 0xC0) | unmap(in[3])); - return 1U + (in[2] != '=') + ((in[2] != '=') && (in[3] != '=')); + return (size_t)1U + (in[2] != '=') + ((in[2] != '=') && (in[3] != '=')); } void* @@ -102,7 +102,7 @@ serd_base64_decode(const uint8_t* const str, assert(str); assert(size); - void* buf = malloc((len * 3) / 4 + 2); + void* buf = malloc(((len * 3) / 4) + 2); *size = 0; for (size_t i = 0, j = 0; i < len; j += 3) { diff --git a/src/base64.h b/src/base64.h index 21a6878c..4a4675e7 100644 --- a/src/base64.h +++ b/src/base64.h @@ -4,7 +4,7 @@ #ifndef SERD_SRC_BASE64_H #define SERD_SRC_BASE64_H -#include "serd/serd.h" +#include <serd/serd.h> #include <stdbool.h> #include <stddef.h> diff --git a/src/byte_sink.h b/src/byte_sink.h index 65b5eb12..327c1b82 100644 --- a/src/byte_sink.h +++ b/src/byte_sink.h @@ -7,7 +7,7 @@ #include "serd_internal.h" #include "system.h" -#include "serd/serd.h" +#include <serd/serd.h> #include <stddef.h> #include <stdint.h> @@ -22,7 +22,7 @@ typedef struct SerdByteSinkImpl { } SerdByteSink; static inline SerdByteSink -serd_byte_sink_new(SerdSink sink, void* stream, size_t block_size) +serd_byte_sink_new(SerdSink sink, void* const stream, const size_t block_size) { SerdByteSink bsink = {sink, stream, NULL, 0, block_size}; @@ -34,7 +34,7 @@ serd_byte_sink_new(SerdSink sink, void* stream, size_t block_size) } static inline SerdStatus -serd_byte_sink_flush(SerdByteSink* bsink) +serd_byte_sink_flush(SerdByteSink* const bsink) { if (bsink->block_size > 1 && bsink->size > 0) { const size_t size = bsink->size; @@ -48,7 +48,7 @@ serd_byte_sink_flush(SerdByteSink* bsink) } static inline void -serd_byte_sink_free(SerdByteSink* bsink) +serd_byte_sink_free(SerdByteSink* const bsink) { serd_byte_sink_flush(bsink); serd_free_aligned(bsink->buf); @@ -56,7 +56,7 @@ serd_byte_sink_free(SerdByteSink* bsink) } static inline size_t -serd_byte_sink_write(const void* buf, size_t len, SerdByteSink* bsink) +serd_byte_sink_write(const void* buf, size_t len, SerdByteSink* const bsink) { if (len == 0) { return 0; diff --git a/src/byte_source.c b/src/byte_source.c index 122dc0f9..74827741 100644 --- a/src/byte_source.c +++ b/src/byte_source.c @@ -5,7 +5,7 @@ #include "system.h" -#include "serd/serd.h" +#include <serd/serd.h> #include <assert.h> #include <stdbool.h> diff --git a/src/byte_source.h b/src/byte_source.h index afd9ccb9..ac0bc306 100644 --- a/src/byte_source.h +++ b/src/byte_source.h @@ -4,7 +4,7 @@ #ifndef SERD_SRC_BYTE_SOURCE_H #define SERD_SRC_BYTE_SOURCE_H -#include "serd/serd.h" +#include <serd/serd.h> #include <assert.h> #include <stdbool.h> @@ -58,14 +58,14 @@ SerdStatus serd_byte_source_page(SerdByteSource* source); static inline SERD_PURE_FUNC uint8_t -serd_byte_source_peek(SerdByteSource* source) +serd_byte_source_peek(SerdByteSource* const source) { assert(source->prepared); return source->read_buf[source->read_head]; } static inline SerdStatus -serd_byte_source_advance(SerdByteSource* source) +serd_byte_source_advance(SerdByteSource* const source) { SerdStatus st = SERD_SUCCESS; @@ -1,7 +1,7 @@ // Copyright 2011-2023 David Robillard <d@drobilla.net> // SPDX-License-Identifier: ISC -#include "serd/serd.h" +#include <serd/serd.h> #include <assert.h> #include <stdbool.h> @@ -1,14 +1,13 @@ -// Copyright 2011-2023 David Robillard <d@drobilla.net> +// Copyright 2011-2025 David Robillard <d@drobilla.net> // SPDX-License-Identifier: ISC #include "reader.h" #include "serd_internal.h" -#include "stack.h" #include "string_utils.h" #include "try.h" #include "uri_utils.h" -#include "serd/serd.h" +#include <serd/serd.h> #include <assert.h> #include <stdbool.h> @@ -46,7 +45,7 @@ read_HEX(SerdReader* const reader) return (uint8_t)eat_byte_safe(reader, c); } - r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid hexadecimal digit '%c'\n", c); + r_err_char(reader, "hexadecimal", c); return 0; } @@ -172,7 +171,7 @@ bad_char(SerdReader* const reader, const char* const fmt, const uint8_t c) static SerdStatus read_utf8_bytes(SerdReader* const reader, uint8_t bytes[4], - uint32_t* const size, + uint8_t* const size, const uint8_t c) { *size = utf8_num_bytes(c); @@ -181,9 +180,9 @@ read_utf8_bytes(SerdReader* const reader, } bytes[0] = c; - for (unsigned i = 1; i < *size; ++i) { + for (uint8_t i = 1U; i < *size; ++i) { const int b = peek_byte(reader); - if (b == EOF || ((uint8_t)b & 0x80) == 0) { + if (b == EOF || ((uint8_t)b & 0x80U) == 0U) { return bad_char(reader, "invalid UTF-8 continuation 0x%X\n", (uint8_t)b); } @@ -196,7 +195,7 @@ read_utf8_bytes(SerdReader* const reader, static SerdStatus read_utf8_character(SerdReader* const reader, const Ref dest, const uint8_t c) { - uint32_t size = 0; + uint8_t size = 0U; uint8_t bytes[4] = {0, 0, 0, 0}; SerdStatus st = read_utf8_bytes(reader, bytes, &size, c); if (st) { @@ -214,7 +213,7 @@ read_utf8_code(SerdReader* const reader, uint32_t* const code, const uint8_t c) { - uint32_t size = 0; + uint8_t size = 0U; uint8_t bytes[4] = {0, 0, 0, 0}; SerdStatus st = read_utf8_bytes(reader, bytes, &size, c); if (st) { @@ -354,11 +353,11 @@ read_STRING_LITERAL_LONG(SerdReader* const reader, push_byte(reader, ref, c); st = read_character(reader, ref, flags, (uint8_t)q2); } - } else if (c == EOF) { - st = r_err(reader, SERD_ERR_BAD_SYNTAX, "end of file in long string\n"); - } else { + } else if (c > 0) { st = read_character(reader, ref, flags, (uint8_t)eat_byte_safe(reader, c)); + } else { + return r_err(reader, SERD_ERR_BAD_SYNTAX, "end of file in long string\n"); } } @@ -457,7 +456,7 @@ read_PN_CHARS_BASE(SerdReader* const reader, const Ref dest) return push_byte(reader, dest, eat_byte_safe(reader, c)); } - if (c == EOF || !(c & 0x80)) { + if (c < 0x80) { return SERD_FAILURE; } @@ -465,11 +464,7 @@ read_PN_CHARS_BASE(SerdReader* const reader, const Ref dest) read_utf8_code(reader, dest, &code, (uint8_t)c); if (!is_PN_CHARS_BASE(code)) { - r_err( - reader, SERD_ERR_BAD_SYNTAX, "invalid character U+%04X in name\n", code); - if (reader->strict) { - return SERD_ERR_BAD_SYNTAX; - } + st = r_err_char(reader, "name", (int)code); } return st; @@ -493,7 +488,7 @@ read_PN_CHARS(SerdReader* const reader, const Ref dest) return push_byte(reader, dest, eat_byte_safe(reader, c)); } - if (c == EOF || !(c & 0x80)) { + if (c < 0x80) { return SERD_FAILURE; } @@ -501,8 +496,7 @@ read_PN_CHARS(SerdReader* const reader, const Ref dest) TRY(st, read_utf8_code(reader, dest, &code, (uint8_t)c)); if (!is_PN_CHARS(code)) { - return r_err( - reader, SERD_ERR_BAD_SYNTAX, "invalid character U+%04X in name\n", code); + st = r_err_char(reader, "name", (int)code); } return st; @@ -531,7 +525,7 @@ read_PN_LOCAL_ESC(SerdReader* const reader, const Ref dest) return ((c == '!') || in_range(c, '#', '/') || (c == ';') || (c == '=') || (c == '?') || (c == '@') || (c == '_') || (c == '~')) ? push_byte(reader, dest, eat_byte_safe(reader, c)) - : r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid escape\n"); + : r_err(reader, SERD_ERR_BAD_SYNTAX, "bad escape\n"); } static SerdStatus @@ -589,9 +583,7 @@ read_PN_LOCAL(SerdReader* const reader, const Ref dest, bool* const ate_dot) SerdNode* const n = deref(reader, dest); if (trailing_unescaped_dot) { // Ate trailing dot, pop it from stack/node and inform caller - --n->n_bytes; - serd_stack_pop(&reader->stack, 1); - *ate_dot = true; + *ate_dot = pop_last_node_char(reader, n); } return (st > SERD_FAILURE) ? st : SERD_SUCCESS; @@ -599,31 +591,37 @@ read_PN_LOCAL(SerdReader* const reader, const Ref dest, bool* const ate_dot) // Read the remainder of a PN_PREFIX after some initial characters static SerdStatus -read_PN_PREFIX_tail(SerdReader* const reader, const Ref dest) +read_PN_PREFIX_tail(SerdReader* const reader, + const Ref dest, + bool* const ate_dot) { - int c = 0; - while ((c = peek_byte(reader)) > 0) { // Middle: (PN_CHARS | '.')* + SerdStatus st = SERD_SUCCESS; + bool trailing_unescaped_dot = false; + + while (!st) { // Middle: (PN_CHARS | '.')* + const int c = peek_byte(reader); if (c == '.') { push_byte(reader, dest, eat_byte_safe(reader, c)); - } else if (read_PN_CHARS(reader, dest)) { - break; + trailing_unescaped_dot = true; + } else if (!(st = read_PN_CHARS(reader, dest))) { + trailing_unescaped_dot = false; } } - const SerdNode* const n = deref(reader, dest); - if (n->buf[n->n_bytes - 1] == '.' && read_PN_CHARS(reader, dest)) { - return r_err(reader, SERD_ERR_BAD_SYNTAX, "prefix ends with '.'\n"); + if (trailing_unescaped_dot) { + SerdNode* const n = deref(reader, dest); + *ate_dot = pop_last_node_char(reader, n); } - return SERD_SUCCESS; + return st; } static SerdStatus -read_PN_PREFIX(SerdReader* const reader, const Ref dest) +read_PN_PREFIX(SerdReader* const reader, const Ref dest, bool* const ate_dot) { const SerdStatus st = read_PN_CHARS_BASE(reader, dest); - return st ? st : read_PN_PREFIX_tail(reader, dest); + return st ? st : read_PN_PREFIX_tail(reader, dest, ate_dot); } static SerdStatus @@ -631,7 +629,7 @@ read_LANGTAG(SerdReader* const reader, Ref* const dest) { int c = peek_byte(reader); if (!is_alpha(c)) { - return r_err(reader, SERD_ERR_BAD_SYNTAX, "unexpected '%c'\n", c); + return r_err_char(reader, "language", c); } *dest = push_node(reader, SERD_LITERAL, "", 0); @@ -657,7 +655,7 @@ read_IRIREF_scheme(SerdReader* const reader, const Ref dest) { int c = peek_byte(reader); if (!is_alpha(c)) { - return r_err(reader, SERD_ERR_BAD_SYNTAX, "bad IRI scheme start '%c'\n", c); + return r_err_char(reader, "IRI scheme start", c); } while ((c = peek_byte(reader)) > 0) { @@ -666,11 +664,7 @@ read_IRIREF_scheme(SerdReader* const reader, const Ref dest) } if (!is_uri_scheme_char(c)) { - return r_err(reader, - SERD_ERR_BAD_SYNTAX, - "bad IRI scheme char U+%04X (%c)\n", - (unsigned)c, - (char)c); + return r_err_char(reader, "IRI scheme", c); } push_byte(reader, dest, eat_byte_safe(reader, c)); @@ -704,8 +698,7 @@ read_IRIREF(SerdReader* const reader, Ref* const dest) case '"': case '<': *dest = pop_node(reader, *dest); - return r_err( - reader, SERD_ERR_BAD_SYNTAX, "invalid IRI character '%c'\n", c); + return r_err_char(reader, "IRI", c); case '>': return SERD_SUCCESS; @@ -713,7 +706,7 @@ read_IRIREF(SerdReader* const reader, Ref* const dest) case '\\': if (read_UCHAR(reader, *dest, &code)) { *dest = pop_node(reader, *dest); - return r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid IRI escape\n"); + return r_err_char(reader, "IRI escape", c); } if (code == ' ' || code == '<' || code == '>') { @@ -731,21 +724,20 @@ read_IRIREF(SerdReader* const reader, Ref* const dest) case '|': case '}': *dest = pop_node(reader, *dest); - return r_err( - reader, SERD_ERR_BAD_SYNTAX, "invalid IRI character '%c'\n", c); + return r_err_char(reader, "IRI", c); default: - if (c <= 0x20) { + if (c <= 0) { + st = r_err(reader, SERD_ERR_BAD_SYNTAX, "unexpected end of file\n"); + } else if (c <= 0x20) { st = r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid IRI character (escape %%%02X)\n", (unsigned)c); - if (reader->strict) { - break; + if (!reader->strict) { + st = SERD_FAILURE; + push_byte(reader, *dest, c); } - - st = SERD_FAILURE; - push_byte(reader, *dest, c); } else if (!(c & 0x80)) { push_byte(reader, *dest, c); } else if (read_utf8_character(reader, *dest, (uint8_t)c)) { @@ -762,15 +754,10 @@ read_IRIREF(SerdReader* const reader, Ref* const dest) } static SerdStatus -read_PrefixedName(SerdReader* const reader, - const Ref dest, - const bool read_prefix, - bool* const ate_dot) +read_PrefixedName(SerdReader* const reader, const Ref dest, bool* const ate_dot) { SerdStatus st = SERD_SUCCESS; - if (read_prefix) { - TRY_FAILING(st, read_PN_PREFIX(reader, dest)); - } + TRY_FAILING(st, read_PN_PREFIX(reader, dest, ate_dot)); if (peek_byte(reader) != ':') { return SERD_FAILURE; @@ -825,18 +812,16 @@ read_number(SerdReader* const reader, // all other cases ::= ( '-' | '+' ) [0-9]+ ( . )? ( [0-9]+ )? ... TRY(st, read_0_9(reader, *dest, true)); if ((c = peek_byte(reader)) == '.') { - has_decimal = true; - // Annoyingly, dot can be end of statement, so tentatively eat skip_byte(reader, c); c = peek_byte(reader); if (!is_digit(c) && c != 'e' && c != 'E') { - *ate_dot = true; // Force caller to deal with stupid grammar - return SERD_SUCCESS; // Next byte is not a number character + *ate_dot = true; // Force caller to deal with silly grammar + } else { + has_decimal = true; + push_byte(reader, *dest, '.'); + read_0_9(reader, *dest, false); } - - push_byte(reader, *dest, '.'); - read_0_9(reader, *dest, false); } } c = peek_byte(reader); @@ -868,7 +853,7 @@ read_iri(SerdReader* const reader, Ref* const dest, bool* const ate_dot) } *dest = push_node(reader, SERD_CURIE, "", 0); - return read_PrefixedName(reader, *dest, true, ate_dot); + return read_PrefixedName(reader, *dest, ate_dot); } static SerdStatus @@ -920,29 +905,31 @@ read_verb(SerdReader* const reader, Ref* const dest) return read_IRIREF(reader, dest); } - /* Either a qname, or "a". Read the prefix first, and if it is in fact - "a", produce that instead. - */ - *dest = push_node(reader, SERD_CURIE, "", 0); + Ref p = push_node(reader, SERD_CURIE, "", 0); - SerdStatus st = read_PN_PREFIX(reader, *dest); + // Try to read as a prefixed name bool ate_dot = false; - SerdNode* node = deref(reader, *dest); - const int next = peek_byte(reader); - if (!st && node->n_bytes == 1 && node->buf[0] == 'a' && next != ':' && - !is_PN_CHARS_BASE((uint32_t)next)) { - pop_node(reader, *dest); - *dest = push_node(reader, SERD_URI, NS_RDF "type", 47); - return SERD_SUCCESS; + SerdStatus st = read_PrefixedName(reader, p, &ate_dot); + + if (st == SERD_FAILURE) { + // Check if this is actually the "a" shorthand + const SerdNode* const node = deref(reader, p); + if (node->n_bytes == 1 && node->buf[0] == 'a') { + pop_node(reader, p); + p = push_node(reader, SERD_URI, NS_RDF "type", 47); + st = SERD_SUCCESS; + } else { + st = SERD_ERR_BAD_SYNTAX; + } } - if (st > SERD_FAILURE || - (st = read_PrefixedName(reader, *dest, false, &ate_dot)) || ate_dot) { - *dest = pop_node(reader, *dest); - st = st > SERD_FAILURE ? st : SERD_ERR_BAD_SYNTAX; + if (st) { + pop_node(reader, p); + *dest = 0; return r_err(reader, st, "bad verb\n"); } + *dest = p; return SERD_SUCCESS; } @@ -981,9 +968,7 @@ read_BLANK_NODE_LABEL(SerdReader* const reader, SerdNode* n = deref(reader, ref); if (n->buf[n->n_bytes - 1] == '.' && read_PN_CHARS(reader, ref)) { // Ate trailing dot, pop it from stack/node and inform caller - --n->n_bytes; - serd_stack_pop(&reader->stack, 1); - *ate_dot = true; + *ate_dot = pop_last_node_char(reader, n); } if (fancy_syntax(reader)) { @@ -1078,6 +1063,40 @@ read_anon(SerdReader* const reader, : SERD_ERR_BAD_SYNTAX; } +// Read a "named" object: a boolean literal or a prefixed name +static SerdStatus +read_named_object(SerdReader* const reader, + Ref* const dest, + Ref* const datatype, + bool* const ate_dot) +{ + static const char* const XSD_BOOLEAN = NS_XSD "boolean"; + static const size_t XSD_BOOLEAN_LEN = 40; + + // Try to read as a prefixed name + const Ref o = push_node(reader, SERD_CURIE, "", 0); + SerdStatus st = read_PrefixedName(reader, o, ate_dot); + + if (st == SERD_FAILURE) { + // Check if this is actually a boolean literal + SerdNode* const node = deref(reader, o); + if ((node->n_bytes == 4 && !memcmp(node->buf, "true", 4)) || + (node->n_bytes == 5 && !memcmp(node->buf, "false", 5))) { + node->type = SERD_LITERAL; + *datatype = push_node(reader, SERD_URI, XSD_BOOLEAN, XSD_BOOLEAN_LEN); + st = SERD_SUCCESS; + } + } + + if (st) { + pop_node(reader, o); + return r_err(reader, SERD_ERR_BAD_SYNTAX, "expected prefixed name\n"); + } + + *dest = o; + return SERD_SUCCESS; +} + /* If emit is true: recurses, calling statement_sink for every statement encountered, and leaves stack in original calling state (i.e. pops everything it pushes). */ @@ -1087,9 +1106,6 @@ read_object(SerdReader* const reader, const bool emit, bool* const ate_dot) { - static const char* const XSD_BOOLEAN = NS_XSD "boolean"; - static const size_t XSD_BOOLEAN_LEN = 40; - #ifndef NDEBUG const size_t orig_stack_size = reader->stack.size; #endif @@ -1097,7 +1113,6 @@ read_object(SerdReader* const reader, SerdStatus st = SERD_FAILURE; bool simple = (ctx->subject != 0); - SerdNode* node = NULL; Ref o = 0; Ref datatype = 0; Ref lang = 0; @@ -1147,27 +1162,8 @@ read_object(SerdReader* const reader, st = read_literal(reader, &o, &datatype, &lang, &flags, ate_dot); break; default: - /* Either a boolean literal, or a qname. Read the prefix first, and if - it is in fact a "true" or "false" literal, produce that instead. - */ - o = push_node(reader, SERD_CURIE, "", 0); - while (!read_PN_CHARS_BASE(reader, o)) { - } - node = deref(reader, o); - if ((node->n_bytes == 4 && !memcmp(node->buf, "true", 4)) || - (node->n_bytes == 5 && !memcmp(node->buf, "false", 5))) { - node->type = SERD_LITERAL; - datatype = push_node(reader, SERD_URI, XSD_BOOLEAN, XSD_BOOLEAN_LEN); - st = SERD_SUCCESS; - } else if (read_PN_PREFIX_tail(reader, o) > SERD_FAILURE) { - st = SERD_ERR_BAD_SYNTAX; - } else { - if ((st = read_PrefixedName(reader, o, false, ate_dot))) { - st = st > SERD_FAILURE ? st : SERD_ERR_BAD_SYNTAX; - pop_node(reader, o); - return r_err(reader, st, "expected prefixed name\n"); - } - } + // Either a boolean literal or a prefixed name + st = read_named_object(reader, &o, &datatype, ate_dot); } if (!st && simple && o) { @@ -1319,7 +1315,10 @@ read_collection(SerdReader* const reader, ReadContext ctx, Ref* const dest) // _:node rdf:rest _:rest *ctx.flags |= SERD_LIST_CONT; ctx.predicate = reader->rdf_rest; - TRY(st, emit_statement(reader, ctx, (end ? reader->rdf_nil : rest), 0, 0)); + st = emit_statement(reader, ctx, (end ? reader->rdf_nil : rest), 0, 0); + if (st) { + break; + } ctx.subject = rest; // _:node = _:rest rest = node; // _:rest = (old)_:node @@ -1444,12 +1443,12 @@ read_prefixID(SerdReader* const reader, const bool sparql, const bool token) } read_ws_star(reader); - Ref name = push_node(reader, SERD_LITERAL, "", 0); - TRY_FAILING(st, read_PN_PREFIX(reader, name)); - - if (eat_byte_check(reader, ':') != ':') { + Ref name = push_node(reader, SERD_LITERAL, "", 0); + bool ate_dot = false; + TRY_FAILING(st, read_PN_PREFIX(reader, name, &ate_dot)); + if (ate_dot || eat_byte_check(reader, ':') != ':') { pop_node(reader, name); - return SERD_ERR_BAD_SYNTAX; + return r_err(reader, SERD_ERR_BAD_SYNTAX, "expected a prefix name\n"); } read_ws_star(reader); @@ -1540,7 +1539,7 @@ token_equals(SerdReader* const reader, const char* const tok, const size_t n) { - SerdNode* const node = deref(reader, ref); + const SerdNode* const node = deref(reader, ref); if (!node || node->n_bytes != n) { return false; } @@ -1,12 +1,10 @@ // Copyright 2011-2023 David Robillard <d@drobilla.net> // SPDX-License-Identifier: ISC -#include "node.h" - #include "base64.h" #include "string_utils.h" -#include "serd/serd.h" +#include <serd/serd.h> #include <assert.h> #include <float.h> @@ -17,14 +15,11 @@ #include <stdlib.h> #include <string.h> -#ifdef _WIN32 -# ifndef isnan -# define isnan(x) _isnan(x) -# endif -# ifndef isinf -# define isinf(x) (!_finite(x)) -# endif -#endif +struct SerdNodeImpl { + size_t n_bytes; /**< Size in bytes (not including null) */ + SerdNodeFlags flags; /**< Node flags (e.g. string properties) */ + SerdType type; /**< Node type */ +}; static size_t serd_uri_string_length(const SerdURI* const uri) @@ -297,7 +292,7 @@ serd_node_new_decimal(const double d, const unsigned frac_digits) char* t = s - 1; uint64_t dec = (uint64_t)int_part; do { - *t-- = (char)('0' + dec % 10); + *t-- = (char)('0' + (dec % 10)); } while ((dec /= 10) > 0); *s++ = '.'; diff --git a/src/node.h b/src/node.h deleted file mode 100644 index a4d5dcd5..00000000 --- a/src/node.h +++ /dev/null @@ -1,36 +0,0 @@ -// Copyright 2011-2023 David Robillard <d@drobilla.net> -// SPDX-License-Identifier: ISC - -#ifndef SERD_SRC_NODE_H -#define SERD_SRC_NODE_H - -#include "serd/serd.h" - -#include <stddef.h> - -struct SerdNodeImpl { - size_t n_bytes; /**< Size in bytes (not including null) */ - SerdNodeFlags flags; /**< Node flags (e.g. string properties) */ - SerdType type; /**< Node type */ -}; - -static inline char* SERD_NONNULL -serd_node_buffer(SerdNode* SERD_NONNULL node) -{ - return (char*)(node + 1); -} - -static inline const char* SERD_NONNULL -serd_node_buffer_c(const SerdNode* SERD_NONNULL node) -{ - return (const char*)(node + 1); -} - -SerdNode* SERD_ALLOCATED -serd_node_malloc(size_t n_bytes, SerdNodeFlags flags, SerdType type); - -void -serd_node_set(SerdNode* SERD_NULLABLE* SERD_NONNULL dst, - const SerdNode* SERD_NULLABLE src); - -#endif // SERD_SRC_NODE_H diff --git a/src/reader.c b/src/reader.c index aa24a9ca..98500fcd 100644 --- a/src/reader.c +++ b/src/reader.c @@ -4,10 +4,11 @@ #include "reader.h" #include "byte_source.h" +#include "serd_internal.h" #include "stack.h" #include "system.h" -#include "serd_internal.h" +#include <serd/serd.h> #include <assert.h> #include <errno.h> @@ -29,6 +30,17 @@ r_err(SerdReader* const reader, const SerdStatus st, const char* const fmt, ...) return st; } +SerdStatus +r_err_char(SerdReader* const reader, const char* const kind, const int c) +{ + const SerdStatus st = SERD_ERR_BAD_SYNTAX; + + return (c < 0x20 || c == 0x7F) ? r_err(reader, st, "bad %s character\n", kind) + : (c == '\'' || c >= 0x80) + ? r_err(reader, st, "bad %s character U+%04X\n", kind, (uint32_t)c) + : r_err(reader, st, "bad %s character '%c'\n", kind, c); +} + void set_blank_id(SerdReader* const reader, const Ref ref, const size_t buf_size) { @@ -90,12 +102,16 @@ push_node_padded(SerdReader* const reader, uint8_t* buf = (uint8_t*)(node + 1); memcpy(buf, str, n_bytes + 1); + const Ref ref = (Ref)((uint8_t*)node - reader->stack.buf); + #ifdef SERD_STACK_CHECK - reader->allocs = (Ref*)realloc(reader->allocs, + reader->allocs = (Ref*)realloc(reader->allocs, sizeof(reader->allocs) * (++reader->n_allocs)); - reader->allocs[reader->n_allocs - 1] = ((uint8_t*)mem - reader->stack.buf); + + reader->allocs[reader->n_allocs - 1] = ref; #endif - return (Ref)((uint8_t*)node - reader->stack.buf); + + return ref; } Ref @@ -118,6 +134,14 @@ deref(SerdReader* const reader, const Ref ref) return NULL; } +bool +pop_last_node_char(SerdReader* const reader, SerdNode* const node) +{ + --node->n_bytes; + serd_stack_pop(&reader->stack, 1); + return true; +} + Ref pop_node(SerdReader* const reader, const Ref ref) { @@ -127,8 +151,9 @@ pop_node(SerdReader* const reader, const Ref ref) SERD_STACK_ASSERT_TOP(reader, ref); --reader->n_allocs; #endif - SerdNode* const node = deref(reader, ref); - uint8_t* const top = reader->stack.buf + reader->stack.size; + SerdNode* const node = deref(reader, ref); + const uint8_t* const top = reader->stack.buf + reader->stack.size; + assert(top > (uint8_t*)node); serd_stack_pop_aligned(&reader->stack, (size_t)(top - (uint8_t*)node)); } return 0; diff --git a/src/reader.h b/src/reader.h index 9b558d1f..74255862 100644 --- a/src/reader.h +++ b/src/reader.h @@ -8,16 +8,16 @@ #include "byte_source.h" #include "stack.h" -#include "serd/serd.h" +#include <serd/serd.h> #include <assert.h> #include <stdbool.h> +#include <stddef.h> #include <stdint.h> -#include <stdio.h> #ifdef SERD_STACK_CHECK # define SERD_STACK_ASSERT_TOP(reader, ref) \ - assert(ref == reader->allocs[reader->n_allocs - 1]); + assert(ref == reader->allocs[reader->n_allocs - 1]) #else # define SERD_STACK_ASSERT_TOP(reader, ref) #endif @@ -69,6 +69,9 @@ SERD_LOG_FUNC(3, 4) SerdStatus r_err(SerdReader* reader, SerdStatus st, const char* fmt, ...); +SerdStatus +r_err_char(SerdReader* reader, const char* kind, int c); + Ref push_node_padded(SerdReader* reader, size_t maxlen, @@ -91,6 +94,9 @@ set_blank_id(SerdReader* reader, Ref ref, size_t buf_size); SerdNode* deref(SerdReader* reader, Ref ref); +bool +pop_last_node_char(SerdReader* reader, SerdNode* node); + Ref pop_node(SerdReader* reader, Ref ref); @@ -110,15 +116,15 @@ SerdStatus read_turtleTrigDoc(SerdReader* reader); static inline int -peek_byte(SerdReader* reader) +peek_byte(SerdReader* const reader) { SerdByteSource* source = &reader->source; - return source->eof ? EOF : (int)source->read_buf[source->read_head]; + return source->eof ? -1 : (int)source->read_buf[source->read_head]; } static inline SerdStatus -skip_byte(SerdReader* reader, const int byte) +skip_byte(SerdReader* const reader, const int byte) { (void)byte; @@ -127,8 +133,8 @@ skip_byte(SerdReader* reader, const int byte) return serd_byte_source_advance(&reader->source); } -static inline int SERD_NODISCARD -eat_byte_safe(SerdReader* reader, const int byte) +SERD_NODISCARD static inline int +eat_byte_safe(SerdReader* const reader, const int byte) { (void)byte; @@ -138,19 +144,19 @@ eat_byte_safe(SerdReader* reader, const int byte) return byte; } -static inline int SERD_NODISCARD -eat_byte_check(SerdReader* reader, const int byte) +SERD_NODISCARD static inline int +eat_byte_check(SerdReader* const reader, const int byte) { const int c = peek_byte(reader); if (c != byte) { - r_err(reader, SERD_ERR_BAD_SYNTAX, "expected '%c', not '%c'\n", byte, c); + r_err(reader, SERD_ERR_BAD_SYNTAX, "expected '%c'\n", byte); return 0; } return eat_byte_safe(reader, byte); } static inline SerdStatus -eat_string(SerdReader* reader, const char* str, unsigned n) +eat_string(SerdReader* const reader, const char* const str, const unsigned n) { for (unsigned i = 0; i < n; ++i) { if (!eat_byte_check(reader, ((const uint8_t*)str)[i])) { @@ -161,9 +167,9 @@ eat_string(SerdReader* reader, const char* str, unsigned n) } static inline SerdStatus -push_byte(SerdReader* reader, Ref ref, const int c) +push_byte(SerdReader* const reader, const Ref ref, const int c) { - assert(c != EOF); + assert(c >= 0); SERD_STACK_ASSERT_TOP(reader, ref); uint8_t* const s = (uint8_t*)serd_stack_push(&reader->stack, 1); @@ -180,7 +186,10 @@ push_byte(SerdReader* reader, Ref ref, const int c) } static inline void -push_bytes(SerdReader* reader, Ref ref, const uint8_t* bytes, unsigned len) +push_bytes(SerdReader* const reader, + const Ref ref, + const uint8_t* const bytes, + const unsigned len) { for (unsigned i = 0; i < len; ++i) { push_byte(reader, ref, bytes[i]); diff --git a/src/serd_config.h b/src/serd_config.h index 150ffb81..d4165a20 100644 --- a/src/serd_config.h +++ b/src/serd_config.h @@ -36,7 +36,7 @@ #define SERD_SRC_SERD_CONFIG_H // Define version unconditionally so a warning will catch a mismatch -#define SERD_VERSION "0.32.3" +#define SERD_VERSION "0.32.5" #if !defined(SERD_NO_DEFAULT_CONFIG) diff --git a/src/serd_internal.h b/src/serd_internal.h index 388c12ec..ead6d796 100644 --- a/src/serd_internal.h +++ b/src/serd_internal.h @@ -4,7 +4,7 @@ #ifndef SERD_SRC_SERD_INTERNAL_H #define SERD_SRC_SERD_INTERNAL_H -#include "serd/serd.h" +#include <serd/serd.h> #include <stdio.h> @@ -20,7 +20,9 @@ /* Error reporting */ static inline void -serd_error(SerdErrorSink error_sink, void* handle, const SerdError* e) +serd_error(const SerdErrorSink error_sink, + void* const handle, + const SerdError* const e) { if (error_sink) { error_sink(handle, e); diff --git a/src/serdi.c b/src/serdi.c index 9d0a8f44..c19bd212 100644 --- a/src/serdi.c +++ b/src/serdi.c @@ -4,7 +4,7 @@ #include "serd_config.h" #include "string_utils.h" -#include "serd/serd.h" +#include <serd/serd.h> #ifdef _WIN32 # ifdef _MSC_VER @@ -85,20 +85,20 @@ print_usage(const char* const name, const bool error) static const char* const description = "Read and write RDF syntax.\n" "Use - for INPUT to read from standard input.\n\n" - " -a Write ASCII output.\n" - " -b Write output in blocks for performance.\n" - " -c PREFIX Chop PREFIX from matching blank node IDs.\n" - " -e Eat input one character at a time.\n" - " -f Fast and loose URI pass-through.\n" - " -h Display this help and exit.\n" - " -i SYNTAX Input syntax: turtle/ntriples/trig/nquads.\n" - " -l Lax (non-strict) parsing.\n" - " -o SYNTAX Output syntax: turtle/ntriples/nquads.\n" - " -p PREFIX Add PREFIX to blank node IDs.\n" - " -q Suppress all output except data.\n" - " -r ROOT_URI Keep relative URIs within ROOT_URI.\n" - " -s INPUT Parse INPUT as string (terminates options).\n" - " -v Display version information and exit.\n"; + " -a Write ASCII output\n" + " -b Write output in blocks for performance\n" + " -c PREFIX Chop PREFIX from matching blank node IDs\n" + " -e Eat input one character at a time\n" + " -f Fast and loose URI pass-through\n" + " -h Display this help and exit\n" + " -i SYNTAX Input syntax: turtle/ntriples/trig/nquads\n" + " -l Lax (non-strict) parsing\n" + " -o SYNTAX Output syntax: turtle/ntriples/nquads\n" + " -p PREFIX Add PREFIX to blank node IDs\n" + " -q Suppress all output except data\n" + " -r ROOT_URI Keep relative URIs within ROOT_URI\n" + " -s INPUT Parse INPUT as string (terminates options)\n" + " -v Display version information and exit\n"; FILE* const os = error ? stderr : stdout; fprintf(os, "%s", error ? "\n" : ""); diff --git a/src/stack.h b/src/stack.h index 388dd054..63c6ba01 100644 --- a/src/stack.h +++ b/src/stack.h @@ -20,11 +20,8 @@ typedef struct { size_t size; ///< Conceptual size of stack in buf } SerdStack; -/** An offset to start the stack at. Note 0 is reserved for NULL. */ -#define SERD_STACK_BOTTOM sizeof(void*) - static inline SerdStack -serd_stack_new(size_t size) +serd_stack_new(const size_t size) { SerdStack stack; stack.buf = (uint8_t*)calloc(size, 1); @@ -34,13 +31,13 @@ serd_stack_new(size_t size) } static inline bool -serd_stack_is_empty(const SerdStack* stack) +serd_stack_is_empty(const SerdStack* const stack) { return stack->size <= SERD_STACK_BOTTOM; } static inline void -serd_stack_free(SerdStack* stack) +serd_stack_free(SerdStack* const stack) { free(stack->buf); stack->buf = NULL; @@ -49,7 +46,7 @@ serd_stack_free(SerdStack* stack) } static inline void* -serd_stack_push(SerdStack* stack, size_t n_bytes) +serd_stack_push(SerdStack* const stack, const size_t n_bytes) { const size_t new_size = stack->size + n_bytes; if (stack->buf_size < new_size) { @@ -64,20 +61,22 @@ serd_stack_push(SerdStack* stack, size_t n_bytes) } static inline void -serd_stack_pop(SerdStack* stack, size_t n_bytes) +serd_stack_pop(SerdStack* const stack, const size_t n_bytes) { assert(stack->size >= n_bytes); stack->size -= n_bytes; } static inline void* -serd_stack_push_aligned(SerdStack* stack, size_t n_bytes, size_t align) +serd_stack_push_aligned(SerdStack* const stack, + const size_t n_bytes, + const size_t align) { // Push one byte to ensure space for a pad count serd_stack_push(stack, 1); // Push padding if necessary - const size_t pad = align - stack->size % align; + const size_t pad = align - (stack->size % align); serd_stack_push(stack, pad); // Set top of stack to pad count so we can properly pop later @@ -89,7 +88,7 @@ serd_stack_push_aligned(SerdStack* stack, size_t n_bytes, size_t align) } static inline void -serd_stack_pop_aligned(SerdStack* stack, size_t n_bytes) +serd_stack_pop_aligned(SerdStack* const stack, const size_t n_bytes) { // Pop requested space down to aligned location serd_stack_pop(stack, n_bytes); diff --git a/src/string.c b/src/string.c index 936989c2..1a9d62d4 100644 --- a/src/string.c +++ b/src/string.c @@ -3,7 +3,7 @@ #include "string_utils.h" -#include "serd/serd.h" +#include <serd/serd.h> #include <assert.h> #include <math.h> diff --git a/src/string_utils.h b/src/string_utils.h index 2ce90ac9..f4b27bcc 100644 --- a/src/string_utils.h +++ b/src/string_utils.h @@ -4,7 +4,7 @@ #ifndef SERD_SRC_STRING_UTILS_H #define SERD_SRC_STRING_UTILS_H -#include "serd/serd.h" +#include <serd/serd.h> #include <stdbool.h> #include <stddef.h> @@ -51,13 +51,13 @@ is_xdigit(const int c) static inline bool is_space(const char c) { - return c == ' ' || (c >= '\t' && c <= '\r'); + return c == ' ' || in_range(c, '\t', '\r'); } static inline bool is_print(const int c) { - return c >= 0x20 && c <= 0x7E; + return in_range(c, 0x20, 0x7E); } static inline bool @@ -88,7 +88,7 @@ hex_digit_value(const uint8_t c) static inline char serd_to_upper(const char c) { - return (char)((c >= 'a' && c <= 'z') ? c - 32 : c); + return (char)(in_range(c, 'a', 'z') ? (c - 32) : c); } SERD_PURE_FUNC static inline int @@ -107,7 +107,7 @@ serd_strcasecmp(const char* s1, const char* s2) return (c1 == c2) ? 0 : (c1 < c2) ? -1 : +1; } -static inline uint32_t +static inline uint8_t utf8_num_bytes(const uint8_t leading) { return ((leading & 0x80U) == 0x00U) ? 1U // Starts with `0' @@ -119,18 +119,18 @@ utf8_num_bytes(const uint8_t leading) /// Return the code point of a UTF-8 character with known length static inline uint32_t -parse_counted_utf8_char(const uint8_t* utf8, size_t size) +parse_counted_utf8_char(const uint8_t* const utf8, const uint8_t size) { uint32_t c = utf8[0] & ((1U << (8U - size)) - 1U); for (size_t i = 1; i < size; ++i) { - c = (c << 6) | (utf8[i] & 0x3FU); + c = (c << 6U) | (utf8[i] & 0x3FU); } return c; } /// Parse a UTF-8 character, set *size to the length, and return the code point static inline uint32_t -parse_utf8_char(const uint8_t* utf8, size_t* size) +parse_utf8_char(const uint8_t* const utf8, uint8_t* const size) { switch (*size = utf8_num_bytes(utf8[0])) { case 1: diff --git a/src/system.c b/src/system.c index 072d2ed5..84916060 100644 --- a/src/system.c +++ b/src/system.c @@ -15,7 +15,6 @@ #endif #include <errno.h> -#include <stdio.h> #include <stdlib.h> #include <string.h> @@ -4,6 +4,8 @@ #ifndef SERD_SRC_TRY_H #define SERD_SRC_TRY_H +#include <serd/serd.h> + #define TRY(st, exp) \ do { \ if (((st) = (exp))) { \ @@ -5,7 +5,7 @@ #include "uri_utils.h" #include "warnings.h" -#include "serd/serd.h" +#include <serd/serd.h> #include <assert.h> #include <stdbool.h> @@ -15,7 +15,7 @@ #include <string.h> const uint8_t* -serd_uri_to_path(const uint8_t* uri) +serd_uri_to_path(const uint8_t* const uri) { assert(uri); @@ -344,7 +344,7 @@ serd_uri_resolve(const SerdURI* const r, /** Write the path of `uri` starting at index `i` */ static size_t -write_path_tail(SerdSink sink, +write_path_tail(const SerdSink sink, void* const stream, const SerdURI* const uri, const size_t i) @@ -372,7 +372,7 @@ write_path_tail(SerdSink sink, /** Write the path of `uri` relative to the path of `base`. */ static size_t -write_rel_path(SerdSink sink, +write_rel_path(const SerdSink sink, void* const stream, const SerdURI* const uri, const SerdURI* const base) @@ -413,7 +413,7 @@ write_rel_path(SerdSink sink, } static uint8_t -serd_uri_path_starts_without_slash(const SerdURI* uri) +serd_uri_path_starts_without_slash(const SerdURI* const uri) { return ((uri->path_base.len || uri->path.len) && ((!uri->path_base.len || uri->path_base.buf[0] != '/') && @@ -425,7 +425,7 @@ size_t serd_uri_serialise_relative(const SerdURI* const uri, const SerdURI* const base, const SerdURI* const root, - SerdSink sink, + const SerdSink sink, void* const stream) { assert(uri); @@ -441,7 +441,7 @@ serd_uri_serialise_relative(const SerdURI* const uri, SERD_DISABLE_NULL_WARNINGS - if (!relative || (!len && base->query.buf)) { + if (!relative || (!len && base && base->query.buf)) { if (uri->scheme.buf) { len += sink(uri->scheme.buf, uri->scheme.len, stream); len += sink(":", 1, stream); @@ -481,7 +481,9 @@ serd_uri_serialise_relative(const SerdURI* const uri, /// See http://tools.ietf.org/html/rfc3986#section-5.3 size_t -serd_uri_serialise(const SerdURI* const uri, SerdSink sink, void* const stream) +serd_uri_serialise(const SerdURI* const uri, + const SerdSink sink, + void* const stream) { assert(uri); assert(sink); diff --git a/src/uri_utils.h b/src/uri_utils.h index 0d3bd74e..1e7adb82 100644 --- a/src/uri_utils.h +++ b/src/uri_utils.h @@ -4,10 +4,10 @@ #ifndef SERD_SRC_URI_UTILS_H #define SERD_SRC_URI_UTILS_H -#include "serd/serd.h" - #include "string_utils.h" +#include <serd/serd.h> + #include <stdbool.h> #include <stdint.h> #include <string.h> @@ -18,20 +18,20 @@ typedef struct { } SlashIndexes; static inline bool -chunk_equals(const SerdChunk* a, const SerdChunk* b) +chunk_equals(const SerdChunk* const a, const SerdChunk* const b) { return a->len == b->len && !strncmp((const char*)a->buf, (const char*)b->buf, a->len); } static inline size_t -uri_path_len(const SerdURI* uri) +uri_path_len(const SerdURI* const uri) { return uri->path_base.len + uri->path.len; } static inline uint8_t -uri_path_at(const SerdURI* uri, size_t i) +uri_path_at(const SerdURI* const uri, const size_t i) { return (i < uri->path_base.len) ? uri->path_base.buf[i] : uri->path.buf[i - uri->path_base.len]; @@ -46,7 +46,7 @@ uri_path_at(const SerdURI* uri, size_t i) otherwise it may merely share some leading path components). */ static inline SERD_PURE_FUNC SlashIndexes -uri_rooted_index(const SerdURI* uri, const SerdURI* root) +uri_rooted_index(const SerdURI* const uri, const SerdURI* const root) { SlashIndexes indexes = {SIZE_MAX, SIZE_MAX}; @@ -84,14 +84,14 @@ uri_rooted_index(const SerdURI* uri, const SerdURI* root) /** Return true iff `uri` shares path components with `root` */ static inline SERD_PURE_FUNC bool -uri_is_related(const SerdURI* uri, const SerdURI* root) +uri_is_related(const SerdURI* const uri, const SerdURI* const root) { return uri_rooted_index(uri, root).shared != SIZE_MAX; } /** Return true iff `uri` is within the base of `root` */ static inline SERD_PURE_FUNC bool -uri_is_under(const SerdURI* uri, const SerdURI* root) +uri_is_under(const SerdURI* const uri, const SerdURI* const root) { const SlashIndexes indexes = uri_rooted_index(uri, root); return indexes.shared && indexes.shared != SIZE_MAX && diff --git a/src/writer.c b/src/writer.c index e4ef5651..aa7fd980 100644 --- a/src/writer.c +++ b/src/writer.c @@ -1,4 +1,4 @@ -// Copyright 2011-2023 David Robillard <d@drobilla.net> +// Copyright 2011-2025 David Robillard <d@drobilla.net> // SPDX-License-Identifier: ISC #include "attributes.h" @@ -10,7 +10,7 @@ #include "uri_utils.h" #include "warnings.h" -#include "serd/serd.h" +#include <serd/serd.h> #include <assert.h> #include <errno.h> @@ -37,20 +37,12 @@ typedef enum { typedef struct { ContextType type; + bool comma_indented; SerdNode graph; SerdNode subject; SerdNode predicate; - bool predicates; - bool comma_indented; } WriteContext; -static const WriteContext WRITE_CONTEXT_NULL = {CTX_NAMED, - {0, 0, 0, 0, SERD_NOTHING}, - {0, 0, 0, 0, SERD_NOTHING}, - {0, 0, 0, 0, SERD_NOTHING}, - 0U, - 0U}; - typedef enum { SEP_NONE, ///< Sentinel after "nothing" SEP_NEWLINE, ///< Sentinel after a line end @@ -143,7 +135,7 @@ write_node(SerdWriter* writer, SerdStatementFlags flags); SERD_NODISCARD static bool -supports_abbrev(const SerdWriter* writer) +supports_abbrev(const SerdWriter* const writer) { return writer->syntax == SERD_TURTLE || writer->syntax == SERD_TRIG; } @@ -162,7 +154,7 @@ free_context(WriteContext* const ctx) SERD_LOG_FUNC(3, 4) static SerdStatus -w_err(SerdWriter* writer, SerdStatus st, const char* fmt, ...) +w_err(SerdWriter* const writer, const SerdStatus st, const char* const fmt, ...) { /* TODO: This results in errors with no file information, which is not helpful when re-serializing a file (particularly for "undefined @@ -179,7 +171,7 @@ w_err(SerdWriter* writer, SerdStatus st, const char* fmt, ...) } static void -copy_node(SerdNode* dst, const SerdNode* src) +copy_node(SerdNode* const dst, const SerdNode* const src) { const size_t new_size = src->n_bytes + 1U; uint8_t* const new_buf = (uint8_t*)realloc((char*)dst->buf, new_size); @@ -205,12 +197,12 @@ push_context(SerdWriter* const writer, *(WriteContext*)top = writer->context; // Update the current context - const WriteContext current = {type, graph, subject, predicate, 0U, 0U}; + const WriteContext current = {type, false, graph, subject, predicate}; writer->context = current; } static void -pop_context(SerdWriter* writer) +pop_context(SerdWriter* const writer) { // Replace the current context with the top of the stack free_context(&writer->context); @@ -223,7 +215,7 @@ pop_context(SerdWriter* writer) } SERD_NODISCARD static size_t -sink(const void* buf, size_t len, SerdWriter* writer) +sink(const void* const buf, const size_t len, SerdWriter* const writer) { const size_t written = serd_byte_sink_write(buf, len, &writer->byte_sink); if (written != len) { @@ -238,8 +230,8 @@ sink(const void* buf, size_t len, SerdWriter* writer) return written; } -SERD_NODISCARD static inline SerdStatus -esink(const void* buf, size_t len, SerdWriter* writer) +SERD_NODISCARD static SerdStatus +esink(const void* const buf, const size_t len, SerdWriter* const writer) { return sink(buf, len, writer) == len ? SERD_SUCCESS : SERD_ERR_BAD_WRITE; } @@ -247,10 +239,10 @@ esink(const void* buf, size_t len, SerdWriter* writer) // Write a single character, as an escape for single byte characters // (Caller prints any single byte characters that don't need escaping) static size_t -write_character(SerdWriter* writer, - const uint8_t* utf8, - size_t* size, - SerdStatus* st) +write_character(SerdWriter* const writer, + const uint8_t* const utf8, + uint8_t* const size, + SerdStatus* const st) { char escape[11] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; const uint32_t c = parse_utf8_char(utf8, size); @@ -288,10 +280,10 @@ uri_must_escape(const uint8_t c) } static size_t -write_uri(SerdWriter* writer, - const uint8_t* utf8, - size_t n_bytes, - SerdStatus* st) +write_uri(SerdWriter* const writer, + const uint8_t* const utf8, + const size_t n_bytes, + SerdStatus* const st) { size_t len = 0; for (size_t i = 0; i < n_bytes;) { @@ -315,14 +307,14 @@ write_uri(SerdWriter* writer, } // Write UTF-8 character - size_t size = 0; + uint8_t size = 0U; len += write_character(writer, utf8 + i, &size, st); i += size; if (*st && (writer->style & SERD_STYLE_STRICT)) { break; } - if (size == 0) { + if (!size) { // Corrupt input, write percent-encoded bytes and scan to next start char escape[4] = {0, 0, 0, 0}; for (; i < n_bytes && (utf8[i] & 0x80); ++i) { @@ -336,7 +328,9 @@ write_uri(SerdWriter* writer, } SERD_NODISCARD static SerdStatus -ewrite_uri(SerdWriter* writer, const uint8_t* utf8, size_t n_bytes) +ewrite_uri(SerdWriter* const writer, + const uint8_t* const utf8, + const size_t n_bytes) { SerdStatus st = SERD_SUCCESS; write_uri(writer, utf8, n_bytes, &st); @@ -347,7 +341,7 @@ ewrite_uri(SerdWriter* writer, const uint8_t* utf8, size_t n_bytes) } SERD_NODISCARD static SerdStatus -write_uri_from_node(SerdWriter* writer, const SerdNode* node) +write_uri_from_node(SerdWriter* const writer, const SerdNode* const node) { return ewrite_uri(writer, node->buf, node->n_bytes); } @@ -369,7 +363,9 @@ lname_must_escape(const uint8_t c) } SERD_NODISCARD static SerdStatus -write_lname(SerdWriter* writer, const uint8_t* utf8, size_t n_bytes) +write_lname(SerdWriter* const writer, + const uint8_t* const utf8, + const size_t n_bytes) { SerdStatus st = SERD_SUCCESS; for (size_t i = 0; i < n_bytes; ++i) { @@ -395,10 +391,10 @@ write_lname(SerdWriter* writer, const uint8_t* utf8, size_t n_bytes) } SERD_NODISCARD static SerdStatus -write_text(SerdWriter* writer, - TextContext ctx, - const uint8_t* utf8, - size_t n_bytes) +write_text(SerdWriter* const writer, + const TextContext ctx, + const uint8_t* const utf8, + const size_t n_bytes) { size_t n_consecutive_quotes = 0; SerdStatus st = SERD_SUCCESS; @@ -484,19 +480,19 @@ write_text(SerdWriter* writer, } // Write UTF-8 character - size_t size = 0; + uint8_t size = 0U; write_character(writer, utf8 + i - 1, &size, &st); if (st && (writer->style & SERD_STYLE_STRICT)) { return st; } - if (size == 0) { + if (size > 0U) { + i += size - 1U; + } else { // Corrupt input, write replacement character and scan to the next start st = esink(replacement_char, sizeof(replacement_char), writer); - for (; i < n_bytes && (utf8[i] & 0x80); ++i) { + for (; i < n_bytes && (utf8[i] & 0x80U); ++i) { } - } else { - i += size - 1; } } @@ -509,7 +505,7 @@ typedef struct { } UriSinkContext; SERD_NODISCARD static size_t -uri_sink(const void* buf, size_t len, void* stream) +uri_sink(const void* const buf, const size_t len, void* const stream) { UriSinkContext* const context = (UriSinkContext*)stream; SerdWriter* const writer = context->writer; @@ -518,7 +514,7 @@ uri_sink(const void* buf, size_t len, void* stream) } SERD_NODISCARD static SerdStatus -write_newline(SerdWriter* writer) +write_newline(SerdWriter* const writer) { SerdStatus st = SERD_SUCCESS; @@ -531,7 +527,7 @@ write_newline(SerdWriter* writer) } SERD_NODISCARD static SerdStatus -write_sep(SerdWriter* writer, const Sep sep) +write_sep(SerdWriter* const writer, const Sep sep) { SerdStatus st = SERD_SUCCESS; const SepRule* const rule = &rules[sep]; @@ -546,10 +542,13 @@ write_sep(SerdWriter* writer, const Sep sep) : 0); } - // If this is the first comma, bump the increment for the following object + // Adjust indentation for object comma if necessary if (sep == SEP_END_O && !writer->context.comma_indented) { ++writer->indent; writer->context.comma_indented = true; + } else if (sep == SEP_END_P && writer->context.comma_indented) { + --writer->indent; + writer->context.comma_indented = false; } // Write newline or space before separator if necessary @@ -575,7 +574,6 @@ write_sep(SerdWriter* writer, const Sep sep) // Reset context and write a blank line after ends of subjects if (sep == SEP_END_S) { writer->indent = writer->context.graph.type ? 1 : 0; - writer->context.predicates = false; writer->context.comma_indented = false; TRY(st, esink("\n", 1, writer)); } @@ -585,7 +583,7 @@ write_sep(SerdWriter* writer, const Sep sep) } static void -free_anon_stack(SerdWriter* writer) +free_anon_stack(SerdWriter* const writer) { while (!serd_stack_is_empty(&writer->anon_stack)) { pop_context(writer); @@ -593,7 +591,7 @@ free_anon_stack(SerdWriter* writer) } static SerdStatus -reset_context(SerdWriter* writer, const unsigned flags) +reset_context(SerdWriter* const writer, const unsigned flags) { free_anon_stack(writer); @@ -608,7 +606,6 @@ reset_context(SerdWriter* writer, const unsigned flags) writer->context.type = CTX_NAMED; writer->context.subject.type = SERD_NOTHING; writer->context.predicate.type = SERD_NOTHING; - writer->context.predicates = false; writer->context.comma_indented = false; return SERD_SUCCESS; } @@ -639,11 +636,11 @@ get_xsd_name(const SerdEnv* const env, const SerdNode* const datatype) } SERD_NODISCARD static SerdStatus -write_literal(SerdWriter* writer, - const SerdNode* node, - const SerdNode* datatype, - const SerdNode* lang, - SerdStatementFlags flags) +write_literal(SerdWriter* const writer, + const SerdNode* const node, + const SerdNode* const datatype, + const SerdNode* const lang, + const SerdStatementFlags flags) { SerdStatus st = SERD_SUCCESS; @@ -679,7 +676,7 @@ write_literal(SerdWriter* writer, // Return true iff `buf` is a valid prefixed name prefix or suffix static bool -is_name(const uint8_t* buf, const size_t len) +is_name(const uint8_t* const buf, const size_t len) { // TODO: This is more strict than it should be for (size_t i = 0; i < len; ++i) { @@ -692,9 +689,9 @@ is_name(const uint8_t* buf, const size_t len) } SERD_NODISCARD static SerdStatus -write_uri_node(SerdWriter* const writer, - const SerdNode* node, - const Field field) +write_uri_node(SerdWriter* const writer, + const SerdNode* const node, + const Field field) { SerdStatus st = SERD_SUCCESS; SerdNode prefix = SERD_NODE_NULL; @@ -741,8 +738,8 @@ write_uri_node(SerdWriter* const writer, serd_uri_parse(node->buf, &uri); SERD_RESTORE_WARNINGS serd_uri_resolve(&uri, &in_base_uri, &abs_uri); - bool rooted = uri_is_under(&writer->base_uri, &writer->root_uri); - SerdURI* root = rooted ? &writer->root_uri : &writer->base_uri; + const bool rooted = uri_is_under(&writer->base_uri, &writer->root_uri); + const SerdURI* root = rooted ? &writer->root_uri : &writer->base_uri; UriSinkContext ctx = {writer, SERD_SUCCESS}; if (!uri_is_under(&abs_uri, root) || writer->syntax == SERD_NTRIPLES || writer->syntax == SERD_NQUADS) { @@ -789,7 +786,7 @@ write_curie(SerdWriter* const writer, const SerdNode* const node) SERD_NODISCARD static SerdStatus write_blank(SerdWriter* const writer, - const SerdNode* node, + const SerdNode* const node, const Field field, const SerdStatementFlags flags) { @@ -828,12 +825,12 @@ write_blank(SerdWriter* const writer, } SERD_NODISCARD static SerdStatus -write_node(SerdWriter* writer, - const SerdNode* node, - const SerdNode* datatype, - const SerdNode* lang, - Field field, - SerdStatementFlags flags) +write_node(SerdWriter* const writer, + const SerdNode* const node, + const SerdNode* const datatype, + const SerdNode* const lang, + const Field field, + const SerdStatementFlags flags) { return (node->type == SERD_LITERAL) ? write_literal(writer, node, datatype, lang, flags) @@ -844,13 +841,15 @@ write_node(SerdWriter* writer, } static bool -is_resource(const SerdNode* node) +is_resource(const SerdNode* const node) { return node->buf && node->type > SERD_LITERAL; } SERD_NODISCARD static SerdStatus -write_pred(SerdWriter* writer, SerdStatementFlags flags, const SerdNode* pred) +write_pred(SerdWriter* const writer, + const SerdStatementFlags flags, + const SerdNode* const pred) { SerdStatus st = SERD_SUCCESS; @@ -858,18 +857,17 @@ write_pred(SerdWriter* writer, SerdStatementFlags flags, const SerdNode* pred) TRY(st, write_sep(writer, SEP_P_O)); copy_node(&writer->context.predicate, pred); - writer->context.predicates = true; writer->context.comma_indented = false; return st; } SERD_NODISCARD static SerdStatus -write_list_next(SerdWriter* writer, - SerdStatementFlags flags, - const SerdNode* predicate, - const SerdNode* object, - const SerdNode* datatype, - const SerdNode* lang) +write_list_next(SerdWriter* const writer, + const SerdStatementFlags flags, + const SerdNode* const predicate, + const SerdNode* const object, + const SerdNode* const datatype, + const SerdNode* const lang) { SerdStatus st = SERD_SUCCESS; @@ -888,7 +886,7 @@ write_list_next(SerdWriter* writer, } SERD_NODISCARD static SerdStatus -terminate_context(SerdWriter* writer) +terminate_context(SerdWriter* const writer) { SerdStatus st = SERD_SUCCESS; @@ -904,14 +902,14 @@ terminate_context(SerdWriter* writer) } SerdStatus -serd_writer_write_statement(SerdWriter* writer, - SerdStatementFlags flags, - const SerdNode* graph, - const SerdNode* subject, - const SerdNode* predicate, - const SerdNode* object, - const SerdNode* datatype, - const SerdNode* lang) +serd_writer_write_statement(SerdWriter* const writer, + SerdStatementFlags flags, + const SerdNode* const graph, + const SerdNode* const subject, + const SerdNode* const predicate, + const SerdNode* const object, + const SerdNode* const datatype, + const SerdNode* const lang) { assert(writer); assert(subject); @@ -920,10 +918,6 @@ serd_writer_write_statement(SerdWriter* writer, SerdStatus st = SERD_SUCCESS; - if (!is_resource(subject) || !is_resource(predicate) || !object->buf) { - return SERD_ERR_BAD_ARG; - } - if ((flags & SERD_LIST_O_BEGIN) && !strcmp((const char*)object->buf, NS_RDF "nil")) { /* Tolerate LIST_O_BEGIN for "()" objects, even though it doesn't make @@ -932,6 +926,17 @@ serd_writer_write_statement(SerdWriter* writer, flags &= (SerdStatementFlags)~SERD_LIST_O_BEGIN; } + // Refuse to write incoherent statements + if (!is_resource(subject) || !is_resource(predicate) || + object->type == SERD_NOTHING || !object->buf || + (datatype && datatype->buf && lang && lang->buf) || + ((flags & SERD_ANON_S_BEGIN) && (flags & SERD_LIST_S_BEGIN)) || + ((flags & SERD_EMPTY_S) && (flags & SERD_LIST_S_BEGIN)) || + ((flags & SERD_ANON_O_BEGIN) && (flags & SERD_LIST_O_BEGIN)) || + ((flags & SERD_EMPTY_O) && (flags & SERD_LIST_O_BEGIN))) { + return SERD_ERR_BAD_ARG; + } + // Simple case: write a line of NTriples or NQuads if (writer->syntax == SERD_NTRIPLES || writer->syntax == SERD_NQUADS) { TRY(st, write_node(writer, subject, NULL, NULL, FIELD_SUBJECT, flags)); @@ -947,23 +952,21 @@ serd_writer_write_statement(SerdWriter* writer, return SERD_SUCCESS; } - SERD_DISABLE_NULL_WARNINGS - // Separate graphs if necessary - if ((graph && !serd_node_equals(graph, &writer->context.graph)) || - (!graph && writer->context.graph.type)) { + const SerdNode* const out_graph = writer->syntax == SERD_TRIG ? graph : NULL; + if ((out_graph && !serd_node_equals(out_graph, &writer->context.graph)) || + (!out_graph && writer->context.graph.type)) { TRY(st, terminate_context(writer)); reset_context(writer, RESET_GRAPH | RESET_INDENT); TRY(st, write_newline(writer)); - if (graph) { - TRY(st, write_node(writer, graph, datatype, lang, FIELD_GRAPH, flags)); + if (out_graph) { + TRY(st, + write_node(writer, out_graph, datatype, lang, FIELD_GRAPH, flags)); TRY(st, write_sep(writer, SEP_GRAPH_BEGIN)); - copy_node(&writer->context.graph, graph); + copy_node(&writer->context.graph, out_graph); } } - SERD_RESTORE_WARNINGS - if ((flags & SERD_LIST_CONT)) { // Continue a list if (!strcmp((const char*)predicate->buf, NS_RDF "first") && @@ -997,11 +1000,6 @@ serd_writer_write_statement(SerdWriter* writer, } else { // Elide S (write P and O) - if (writer->context.comma_indented) { - --writer->indent; - writer->context.comma_indented = false; - } - const bool first = !writer->context.predicate.type; TRY(st, write_sep(writer, first ? SEP_S_P : SEP_END_P)); TRY(st, write_pred(writer, flags, predicate)); @@ -1046,7 +1044,7 @@ serd_writer_write_statement(SerdWriter* writer, const bool is_list = (flags & SERD_LIST_S_BEGIN); push_context(writer, is_list ? CTX_LIST : CTX_BLANK, - serd_node_copy(graph), + serd_node_copy(out_graph), serd_node_copy(subject), is_list ? SERD_NODE_NULL : serd_node_copy(predicate)); } @@ -1055,7 +1053,7 @@ serd_writer_write_statement(SerdWriter* writer, // Push context for anonymous or list object if necessary push_context(writer, (flags & SERD_LIST_O_BEGIN) ? CTX_LIST : CTX_BLANK, - serd_node_copy(graph), + serd_node_copy(out_graph), serd_node_copy(object), SERD_NODE_NULL); } @@ -1064,7 +1062,7 @@ serd_writer_write_statement(SerdWriter* writer, } SerdStatus -serd_writer_end_anon(SerdWriter* writer, const SerdNode* node) +serd_writer_end_anon(SerdWriter* const writer, const SerdNode* const node) { assert(writer); @@ -1083,19 +1081,16 @@ serd_writer_end_anon(SerdWriter* writer, const SerdNode* node) TRY(st, write_sep(writer, SEP_ANON_END)); pop_context(writer); - SERD_DISABLE_NULL_WARNINGS - if (node && serd_node_equals(node, &writer->context.subject)) { // Now-finished anonymous node is the new subject with no other context writer->context.predicate.type = SERD_NOTHING; } - SERD_RESTORE_WARNINGS return st; } SerdStatus -serd_writer_finish(SerdWriter* writer) +serd_writer_finish(SerdWriter* const writer) { assert(writer); @@ -1107,18 +1102,17 @@ serd_writer_finish(SerdWriter* writer) } SerdWriter* -serd_writer_new(SerdSyntax syntax, - SerdStyle style, - SerdEnv* env, - const SerdURI* base_uri, - SerdSink ssink, - void* stream) +serd_writer_new(const SerdSyntax syntax, + const SerdStyle style, + SerdEnv* const env, + const SerdURI* const base_uri, + SerdSink ssink, + void* const stream) { assert(env); assert(ssink); - const WriteContext context = WRITE_CONTEXT_NULL; - SerdWriter* writer = (SerdWriter*)calloc(1, sizeof(SerdWriter)); + SerdWriter* writer = (SerdWriter*)calloc(1, sizeof(SerdWriter)); writer->syntax = syntax; writer->style = style; @@ -1127,7 +1121,6 @@ serd_writer_new(SerdSyntax syntax, writer->root_uri = SERD_URI_NULL; writer->base_uri = base_uri ? *base_uri : SERD_URI_NULL; writer->anon_stack = serd_stack_new(SERD_PAGE_SIZE); - writer->context = context; writer->byte_sink = serd_byte_sink_new( ssink, stream, (style & SERD_STYLE_BULK) ? SERD_PAGE_SIZE : 1); @@ -1135,9 +1128,9 @@ serd_writer_new(SerdSyntax syntax, } void -serd_writer_set_error_sink(SerdWriter* writer, - SerdErrorSink error_sink, - void* error_handle) +serd_writer_set_error_sink(SerdWriter* const writer, + const SerdErrorSink error_sink, + void* const error_handle) { assert(writer); assert(error_sink); @@ -1146,7 +1139,8 @@ serd_writer_set_error_sink(SerdWriter* writer, } void -serd_writer_chop_blank_prefix(SerdWriter* writer, const uint8_t* prefix) +serd_writer_chop_blank_prefix(SerdWriter* const writer, + const uint8_t* const prefix) { assert(writer); @@ -1163,7 +1157,7 @@ serd_writer_chop_blank_prefix(SerdWriter* writer, const uint8_t* prefix) } SerdStatus -serd_writer_set_base_uri(SerdWriter* writer, const SerdNode* uri) +serd_writer_set_base_uri(SerdWriter* const writer, const SerdNode* const uri) { assert(writer); @@ -1185,7 +1179,7 @@ serd_writer_set_base_uri(SerdWriter* writer, const SerdNode* uri) } SerdStatus -serd_writer_set_root_uri(SerdWriter* writer, const SerdNode* uri) +serd_writer_set_root_uri(SerdWriter* const writer, const SerdNode* const uri) { assert(writer); @@ -1205,9 +1199,9 @@ serd_writer_set_root_uri(SerdWriter* writer, const SerdNode* uri) } SerdStatus -serd_writer_set_prefix(SerdWriter* writer, - const SerdNode* name, - const SerdNode* uri) +serd_writer_set_prefix(SerdWriter* const writer, + const SerdNode* const name, + const SerdNode* const uri) { assert(writer); assert(name); @@ -1218,7 +1212,12 @@ serd_writer_set_prefix(SerdWriter* writer, TRY(st, serd_env_set_prefix(writer->env, name, uri)); if (writer->syntax == SERD_TURTLE || writer->syntax == SERD_TRIG) { + const bool had_subject = writer->context.subject.type; TRY(st, terminate_context(writer)); + if (had_subject) { + TRY(st, esink("\n", 1, writer)); + } + TRY(st, esink("@prefix ", 8, writer)); TRY(st, esink(name->buf, name->n_bytes, writer)); TRY(st, esink(": <", 3, writer)); @@ -1231,15 +1230,13 @@ serd_writer_set_prefix(SerdWriter* writer, } void -serd_writer_free(SerdWriter* writer) +serd_writer_free(SerdWriter* const writer) { if (!writer) { return; } - SERD_DISABLE_NULL_WARNINGS serd_writer_finish(writer); - SERD_RESTORE_WARNINGS free_context(&writer->context); free_anon_stack(writer); serd_stack_free(&writer->anon_stack); @@ -1250,14 +1247,14 @@ serd_writer_free(SerdWriter* writer) } SerdEnv* -serd_writer_get_env(SerdWriter* writer) +serd_writer_get_env(SerdWriter* const writer) { assert(writer); return writer->env; } size_t -serd_file_sink(const void* buf, size_t len, void* stream) +serd_file_sink(const void* const buf, const size_t len, void* const stream) { assert(buf); assert(stream); @@ -1265,7 +1262,7 @@ serd_file_sink(const void* buf, size_t len, void* stream) } size_t -serd_chunk_sink(const void* buf, size_t len, void* stream) +serd_chunk_sink(const void* const buf, const size_t len, void* const stream) { assert(buf); assert(stream); @@ -1281,7 +1278,7 @@ serd_chunk_sink(const void* buf, size_t len, void* stream) } uint8_t* -serd_chunk_sink_finish(SerdChunk* stream) +serd_chunk_sink_finish(SerdChunk* const stream) { assert(stream); serd_chunk_sink("", 1, stream); |