From e0e5b83f30b784699a5ae039e8ae7e1dd69410d4 Mon Sep 17 00:00:00 2001 From: David Robillard Date: Fri, 9 Jul 2021 21:25:23 -0400 Subject: Improve writer error handling --- NEWS | 1 + include/serd/serd.h | 5 +- src/serdi.c | 3 +- src/string.c | 4 + src/writer.c | 475 ++++++++++++++++++++++++++++++---------------------- test/test_string.c | 2 +- test/test_writer.c | 101 +++++++++++ 7 files changed, 391 insertions(+), 200 deletions(-) diff --git a/NEWS b/NEWS index c28a7ab8..aa1087b6 100644 --- a/NEWS +++ b/NEWS @@ -6,6 +6,7 @@ serd (1.0.1) unstable; * Add support for writing terse collections * Add support for xsd:float and xsd:double literals * Bring read/write interface closer to C standard + * Improve error handling * Make nodes opaque * Remove serd_uri_to_path() * Remove support for Turtle named inline nodes extension diff --git a/include/serd/serd.h b/include/serd/serd.h index bfab6192..3263027b 100644 --- a/include/serd/serd.h +++ b/include/serd/serd.h @@ -203,6 +203,8 @@ typedef enum { SERD_ERR_BAD_CURIE, ///< Invalid CURIE or unknown namespace prefix SERD_ERR_INTERNAL, ///< Unexpected internal error SERD_ERR_OVERFLOW, ///< Stack overflow + SERD_ERR_BAD_TEXT, ///< Invalid text encoding + SERD_ERR_BAD_WRITE, ///< Error writing to file/stream } SerdStatus; /** @@ -1595,7 +1597,8 @@ typedef enum { SERD_WRITE_ASCII = 1u << 0u, ///< Escape all non-ASCII characters SERD_WRITE_UNQUALIFIED = 1u << 1u, ///< Do not shorten URIs into CURIEs SERD_WRITE_UNRESOLVED = 1u << 2u, ///< Do not make URIs relative - SERD_WRITE_TERSE = 1u << 3u ///< Write terser output without newlines + SERD_WRITE_TERSE = 1u << 3u, ///< Write terser output without newlines + SERD_WRITE_STRICT = 1u << 4u ///< Abort with error on lossy output } SerdWriterFlag; /// Bitwise OR of SerdWriterFlag values diff --git a/src/serdi.c b/src/serdi.c index eb0b0185..2aea8657 100644 --- a/src/serdi.c +++ b/src/serdi.c @@ -98,7 +98,7 @@ main(int argc, char** argv) SerdSyntax input_syntax = (SerdSyntax)0; SerdSyntax output_syntax = (SerdSyntax)0; - SerdWriterFlags writer_flags = 0u; + SerdWriterFlags writer_flags = SERD_WRITE_STRICT; bool from_string = false; bool from_stdin = false; bool bulk_read = true; @@ -130,6 +130,7 @@ main(int argc, char** argv) } else if (opt == 'h') { return print_usage(prog, false); } else if (opt == 'l') { + writer_flags &= ~(unsigned)SERD_WRITE_STRICT; lax = true; } else if (opt == 'q') { quiet = true; diff --git a/src/string.c b/src/string.c index 842beeef..8393a3fb 100644 --- a/src/string.c +++ b/src/string.c @@ -52,6 +52,10 @@ serd_strerror(const SerdStatus status) return "Internal error"; case SERD_ERR_OVERFLOW: return "Stack overflow"; + case SERD_ERR_BAD_TEXT: + return "Invalid text encoding"; + case SERD_ERR_BAD_WRITE: + return "Error writing to file"; } return "Unknown error"; diff --git a/src/writer.c b/src/writer.c index 8c96b2a8..74d858c7 100644 --- a/src/writer.c +++ b/src/writer.c @@ -20,18 +20,33 @@ #include "sink.h" #include "stack.h" #include "string_utils.h" +#include "system.h" #include "uri_utils.h" #include "world.h" #include "serd/serd.h" #include +#include #include #include #include #include #include +#ifndef _MSC_VER +# define SERD_WARN_UNUSED_RESULT __attribute__((warn_unused_result)) +#else +# define SERD_WARN_UNUSED_RESULT +#endif + +#define TRY(st, exp) \ + do { \ + if (((st) = (exp))) { \ + return (st); \ + } \ + } while (0) + typedef enum { CTX_NAMED, ///< Normal non-anonymous context CTX_BLANK, ///< Anonymous blank node @@ -130,7 +145,7 @@ serd_writer_set_prefix(SerdWriter* writer, const SerdNode* name, const SerdNode* uri); -static bool +SERD_WARN_UNUSED_RESULT static SerdStatus write_node(SerdWriter* writer, const SerdNode* node, SerdField field, @@ -213,16 +228,38 @@ ctx(SerdWriter* writer, const SerdField field) return node && node->type ? node : NULL; } -static size_t +SERD_WARN_UNUSED_RESULT static size_t sink(const void* buf, size_t len, SerdWriter* writer) { - return writer->write_func(buf, 1, len, writer->stream); + const size_t written = writer->write_func(buf, 1, len, writer->stream); + if (written != len) { + if (errno) { + char message[1024] = {0}; + serd_system_strerror(errno, message, sizeof(message)); + + serd_world_errorf( + writer->world, SERD_ERR_BAD_WRITE, "write error (%s)\n", message); + } else { + serd_world_errorf(writer->world, SERD_ERR_BAD_WRITE, "write error\n"); + } + } + + return written; +} + +SERD_WARN_UNUSED_RESULT static SerdStatus +esink(const void* buf, size_t len, SerdWriter* writer) +{ + return sink(buf, len, writer) == len ? SERD_SUCCESS : SERD_ERR_BAD_WRITE; } // Write a single character, as an escape for single byte characters // (Caller prints any single byte characters that don't need escaping) static size_t -write_character(SerdWriter* writer, const uint8_t* utf8, size_t* size) +write_character(SerdWriter* writer, + const uint8_t* utf8, + size_t* size, + SerdStatus* st) { char escape[11] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; const uint32_t c = parse_utf8_char(utf8, size); @@ -230,6 +267,7 @@ write_character(SerdWriter* writer, const uint8_t* utf8, size_t* size) case 0: serd_world_errorf( writer->world, SERD_ERR_BAD_ARG, "invalid UTF-8 start: %X\n", utf8[0]); + *st = SERD_ERR_BAD_TEXT; return 0; case 1: snprintf(escape, sizeof(escape), "\\u%04X", utf8[0]); @@ -273,7 +311,7 @@ uri_must_escape(const int c) } static size_t -write_uri(SerdWriter* writer, const char* utf8, size_t n_bytes) +write_uri(SerdWriter* writer, const char* utf8, size_t n_bytes, SerdStatus* st) { size_t len = 0; for (size_t i = 0; i < n_bytes;) { @@ -285,15 +323,25 @@ write_uri(SerdWriter* writer, const char* utf8, size_t n_bytes) } // Bulk write all characters up to this special one - len += sink(&utf8[i], j - i, writer); + const size_t n_bulk = sink(&utf8[i], j - i, writer); + len += n_bulk; + if (n_bulk != j - i) { + *st = SERD_ERR_BAD_WRITE; + return len; + } + if ((i = j) == n_bytes) { break; // Reached end } // Write UTF-8 character size_t size = 0; - len += write_character(writer, (const uint8_t*)utf8 + i, &size); + len += write_character(writer, (const uint8_t*)utf8 + i, &size, st); i += size; + if (*st && (writer->flags & SERD_WRITE_STRICT)) { + break; + } + if (size == 0) { // Corrupt input, write percent-encoded bytes and scan to next start char escape[4] = {0, 0, 0, 0}; @@ -303,13 +351,25 @@ write_uri(SerdWriter* writer, const char* utf8, size_t n_bytes) } } } + return len; } -static size_t +SERD_WARN_UNUSED_RESULT static SerdStatus +ewrite_uri(SerdWriter* writer, const char* utf8, size_t n_bytes) +{ + SerdStatus st = SERD_SUCCESS; + write_uri(writer, utf8, n_bytes, &st); + + return (st == SERD_ERR_BAD_WRITE || (writer->flags & SERD_WRITE_STRICT)) + ? st + : SERD_SUCCESS; +} + +SERD_WARN_UNUSED_RESULT static SerdStatus write_uri_from_node(SerdWriter* writer, const SerdNode* node) { - return write_uri(writer, serd_node_string(node), node->length); + return ewrite_uri(writer, serd_node_string(node), node->length); } static bool @@ -349,10 +409,10 @@ lname_must_escape(const char c) return false; } -static size_t +SERD_WARN_UNUSED_RESULT static SerdStatus write_lname(SerdWriter* writer, const char* utf8, size_t n_bytes) { - size_t len = 0; + SerdStatus st = SERD_SUCCESS; for (size_t i = 0; i < n_bytes; ++i) { size_t j = i; // Index of next character that must be escaped for (; j < n_bytes; ++j) { @@ -362,28 +422,28 @@ write_lname(SerdWriter* writer, const char* utf8, size_t n_bytes) } // Bulk write all characters up to this special one - len += sink(&utf8[i], j - i, writer); + TRY(st, esink(&utf8[i], j - i, writer)); if ((i = j) == n_bytes) { break; // Reached end } // Write escape - len += sink("\\", 1, writer); - len += sink(&utf8[i], 1, writer); + TRY(st, esink("\\", 1, writer)); + TRY(st, esink(&utf8[i], 1, writer)); } - return len; + return st; } -static size_t +SERD_WARN_UNUSED_RESULT static SerdStatus write_text(SerdWriter* writer, TextContext ctx, const char* utf8, size_t n_bytes) { - size_t len = 0; - size_t n_consecutive_quotes = 0; - for (size_t i = 0; i < n_bytes;) { + size_t n_consecutive_quotes = 0; + SerdStatus st = SERD_SUCCESS; + for (size_t i = 0; !st && i < n_bytes;) { if (utf8[i] != '"') { n_consecutive_quotes = 0; } @@ -397,7 +457,7 @@ write_text(SerdWriter* writer, } } - len += sink(&utf8[i], j - i, writer); + st = esink(&utf8[i], j - i, writer); if ((i = j) == n_bytes) { break; // Reached end } @@ -408,23 +468,23 @@ write_text(SerdWriter* writer, switch (in) { case '\\': - len += sink("\\\\", 2, writer); + st = esink("\\\\", 2, writer); continue; case '\b': - len += sink("\\b", 2, writer); + st = esink("\\b", 2, writer); continue; case '\n': case '\r': case '\t': case '\f': - len += sink(&in, 1, writer); // Write character as-is + st = esink(&in, 1, writer); // Write character as-is continue; case '\"': if (n_consecutive_quotes >= 3 || i == n_bytes) { // Two quotes in a row, or quote at string end, escape - len += sink("\\\"", 2, writer); + st = esink("\\\"", 2, writer); } else { - len += sink(&in, 1, writer); + st = esink(&in, 1, writer); } continue; default: @@ -433,19 +493,19 @@ write_text(SerdWriter* writer, } else if (ctx == WRITE_STRING) { switch (in) { case '\\': - len += sink("\\\\", 2, writer); + st = esink("\\\\", 2, writer); continue; case '\n': - len += sink("\\n", 2, writer); + st = esink("\\n", 2, writer); continue; case '\r': - len += sink("\\r", 2, writer); + st = esink("\\r", 2, writer); continue; case '\t': - len += sink("\\t", 2, writer); + st = esink("\\t", 2, writer); continue; case '"': - len += sink("\\\"", 2, writer); + st = esink("\\\"", 2, writer); continue; default: break; @@ -453,10 +513,10 @@ write_text(SerdWriter* writer, if (writer->syntax == SERD_TURTLE) { switch (in) { case '\b': - len += sink("\\b", 2, writer); + st = esink("\\b", 2, writer); continue; case '\f': - len += sink("\\f", 2, writer); + st = esink("\\f", 2, writer); continue; default: break; @@ -466,52 +526,69 @@ write_text(SerdWriter* writer, // Write UTF-8 character size_t size = 0; - len += write_character(writer, (const uint8_t*)utf8 + i - 1, &size); + write_character(writer, (const uint8_t*)utf8 + i - 1, &size, &st); + if (st && (writer->flags & SERD_WRITE_STRICT)) { + return st; + } if (size == 0) { // Corrupt input, write replacement character and scan to the next start - len += sink(replacement_char, sizeof(replacement_char), writer); + st = esink(replacement_char, sizeof(replacement_char), writer); for (; i < n_bytes && (utf8[i] & 0x80); ++i) { } } else { i += size - 1; } } - return len; + + return (writer->flags & SERD_WRITE_STRICT) ? st : SERD_SUCCESS; } -static size_t +typedef struct { + SerdWriter* writer; + SerdStatus status; +} UriSinkContext; + +SERD_WARN_UNUSED_RESULT static size_t uri_sink(const void* buf, size_t size, size_t nmemb, void* stream) { (void)size; assert(size == 1); - return write_uri((SerdWriter*)stream, (const char*)buf, nmemb); + + UriSinkContext* const context = (UriSinkContext*)stream; + SerdWriter* const writer = context->writer; + + return write_uri(writer, (const char*)buf, nmemb, &context->status); } -static void +SERD_WARN_UNUSED_RESULT static SerdStatus write_newline(SerdWriter* writer, bool terse) { if (terse || (writer->flags & SERD_WRITE_TERSE)) { - sink(" ", 1, writer); - } else { - sink("\n", 1, writer); - for (int i = 0; i < writer->indent; ++i) { - sink("\t", 1, writer); - } + return esink(" ", 1, writer); + } + + SerdStatus st = SERD_SUCCESS; + TRY(st, esink("\n", 1, writer)); + for (int i = 0; i < writer->indent; ++i) { + TRY(st, esink("\t", 1, writer)); } + + return st; } -static void +SERD_WARN_UNUSED_RESULT static SerdStatus write_top_level_sep(SerdWriter* writer) { - if (!writer->empty && !(writer->flags & SERD_WRITE_TERSE)) { - write_newline(writer, false); - } + return ((!writer->empty && !(writer->flags & SERD_WRITE_TERSE)) + ? write_newline(writer, false) + : SERD_SUCCESS); } -static bool +SERD_WARN_UNUSED_RESULT static SerdStatus write_sep(SerdWriter* writer, const SerdStatementFlags flags, Sep sep) { + SerdStatus st = SERD_SUCCESS; const SepRule* const rule = &rules[sep]; const bool terse = (((flags & SERD_TERSE_S) && (flags & SERD_LIST_S)) || @@ -531,17 +608,17 @@ write_sep(SerdWriter* writer, const SerdStatementFlags flags, Sep sep) // Write newline or space before separator if necessary if (rule->pre_line_after & (1u << writer->last_sep)) { - write_newline(writer, terse); + TRY(st, write_newline(writer, terse)); } else if (rule->pre_space_after & (1u << writer->last_sep)) { - sink(" ", 1, writer); + TRY(st, esink(" ", 1, writer)); } // Write actual separator string - sink(rule->str, rule->len, writer); + TRY(st, esink(rule->str, rule->len, writer)); // Write newline after separator if necessary if (rule->post_line_after & (1u << writer->last_sep)) { - write_newline(writer, terse); + TRY(st, write_newline(writer, terse)); writer->last_sep = SEP_NONE; } else { writer->last_sep = sep; @@ -551,10 +628,10 @@ write_sep(SerdWriter* writer, const SerdStatementFlags flags, Sep sep) writer->indent = 0; } - return true; + return st; } -static SerdStatus +static void reset_context(SerdWriter* writer, bool graph) { // Free any lingering contexts in case there was an error @@ -578,8 +655,6 @@ reset_context(SerdWriter* writer, bool graph) writer->empty = false; serd_stack_clear(&writer->anon_stack); - - return SERD_SUCCESS; } static bool @@ -592,7 +667,7 @@ is_inline_start(const SerdWriter* writer, (field == SERD_OBJECT && (flags & SERD_ANON_O)))); } -static bool +SERD_WARN_UNUSED_RESULT static SerdStatus write_literal(SerdWriter* const writer, const SerdNode* const node, const SerdStatementFlags flags) @@ -606,8 +681,7 @@ write_literal(SerdWriter* const writer, if (supports_abbrev(writer) && type_uri) { if (serd_node_equals(datatype, writer->world->xsd_boolean) || serd_node_equals(datatype, writer->world->xsd_integer)) { - sink(node_str, node->length, writer); - return true; + return esink(node_str, node->length, writer); } if (serd_node_equals(datatype, writer->world->xsd_decimal) && @@ -616,29 +690,29 @@ write_literal(SerdWriter* const writer, not be written bare in Turtle. We could add a 0 which is prettier, but changes the text and breaks round tripping. */ - sink(node_str, node->length, writer); - return true; + return esink(node_str, node->length, writer); } } + SerdStatus st = SERD_SUCCESS; if (supports_abbrev(writer) && (node->flags & (SERD_HAS_NEWLINE | SERD_HAS_QUOTE))) { - sink("\"\"\"", 3, writer); - write_text(writer, WRITE_LONG_STRING, node_str, node->length); - sink("\"\"\"", 3, writer); + TRY(st, esink("\"\"\"", 3, writer)); + TRY(st, write_text(writer, WRITE_LONG_STRING, node_str, node->length)); + TRY(st, esink("\"\"\"", 3, writer)); } else { - sink("\"", 1, writer); - write_text(writer, WRITE_STRING, node_str, node->length); - sink("\"", 1, writer); + TRY(st, esink("\"", 1, writer)); + TRY(st, write_text(writer, WRITE_STRING, node_str, node->length)); + TRY(st, esink("\"", 1, writer)); } if (lang && serd_node_string(lang)) { - sink("@", 1, writer); - sink(serd_node_string(lang), lang->length, writer); + TRY(st, esink("@", 1, writer)); + TRY(st, esink(serd_node_string(lang), lang->length, writer)); } else if (type_uri) { - sink("^^", 2, writer); + TRY(st, esink("^^", 2, writer)); return write_node(writer, datatype, (SerdField)-1, flags); } - return true; + return st; } // Return true iff `buf` is a valid prefixed name prefix or suffix @@ -655,11 +729,12 @@ is_name(const char* buf, const size_t len) return true; } -static bool +SERD_WARN_UNUSED_RESULT static SerdStatus write_uri_node(SerdWriter* const writer, const SerdNode* const node, const SerdField field) { + SerdStatus st = SERD_SUCCESS; const SerdNode* prefix = NULL; SerdStringView suffix = {NULL, 0}; const char* node_str = serd_node_string(node); @@ -667,21 +742,20 @@ write_uri_node(SerdWriter* const writer, if (supports_abbrev(writer)) { if (field == SERD_PREDICATE && serd_node_equals(node, writer->world->rdf_type)) { - return sink("a", 1, writer) == 1; + return esink("a", 1, writer); } if (serd_node_equals(node, writer->world->rdf_nil)) { - return sink("()", 2, writer) == 2; + return esink("()", 2, writer); } if (has_scheme && !(writer->flags & SERD_WRITE_UNQUALIFIED) && serd_env_qualify_in_place(writer->env, node, &prefix, &suffix) && is_name(serd_node_string(prefix), serd_node_length(prefix)) && is_name(suffix.buf, suffix.len)) { - write_uri_from_node(writer, prefix); - sink(":", 1, writer); - write_uri(writer, suffix.buf, suffix.len); - return true; + TRY(st, write_uri_from_node(writer, prefix)); + TRY(st, esink(":", 1, writer)); + return ewrite_uri(writer, suffix.buf, suffix.len); } } @@ -691,10 +765,10 @@ write_uri_node(SerdWriter* const writer, SERD_ERR_BAD_ARG, "syntax does not support URI reference <%s>\n", node_str); - return false; + return SERD_ERR_BAD_ARG; } - sink("<", 1, writer); + TRY(st, esink("<", 1, writer)); if (!(writer->flags & SERD_WRITE_UNRESOLVED) && serd_env_base_uri(writer->env)) { const SerdURIView base_uri = serd_env_base_uri_view(writer->env); @@ -702,22 +776,22 @@ write_uri_node(SerdWriter* const writer, SerdURIView abs_uri = serd_resolve_uri(uri, base_uri); bool rooted = uri_is_under(&base_uri, &writer->root_uri); const SerdURIView* root = rooted ? &writer->root_uri : &base_uri; + UriSinkContext ctx = {writer, SERD_SUCCESS}; + const bool write_abs = + (!supports_abbrev(writer) || !uri_is_under(&abs_uri, root)); + + write_abs + ? serd_write_uri(abs_uri, uri_sink, &ctx) + : serd_write_uri(serd_relative_uri(uri, base_uri), uri_sink, &ctx); - if (writer->syntax == SERD_NTRIPLES || writer->syntax == SERD_NQUADS || - !uri_is_under(&abs_uri, root) || !uri_is_related(&abs_uri, &base_uri)) { - serd_write_uri(abs_uri, uri_sink, writer); - } else { - serd_write_uri(serd_relative_uri(uri, base_uri), uri_sink, writer); - } } else { - write_uri_from_node(writer, node); + TRY(st, write_uri_from_node(writer, node)); } - sink(">", 1, writer); - return true; + return st ? st : esink(">", 1, writer); } -static bool +SERD_WARN_UNUSED_RESULT static SerdStatus write_curie(SerdWriter* const writer, const SerdNode* const node) { writer->last_sep = SEP_NONE; @@ -733,23 +807,23 @@ write_curie(SerdWriter* const writer, const SerdNode* const node) st, "undefined namespace prefix `%s'\n", serd_node_string(node)); - return false; + return st; } - sink("<", 1, writer); - write_uri(writer, prefix.buf, prefix.len); - write_uri(writer, suffix.buf, suffix.len); - sink(">", 1, writer); + TRY(st, esink("<", 1, writer)); + TRY(st, ewrite_uri(writer, prefix.buf, prefix.len)); + TRY(st, ewrite_uri(writer, suffix.buf, suffix.len)); + TRY(st, esink(">", 1, writer)); break; case SERD_TURTLE: case SERD_TRIG: - write_lname(writer, serd_node_string(node), node->length); + TRY(st, write_lname(writer, serd_node_string(node), node->length)); break; } - return true; + return st; } -static bool +SERD_WARN_UNUSED_RESULT static SerdStatus write_blank(SerdWriter* const writer, const SerdNode* node, const SerdField field, @@ -768,42 +842,50 @@ write_blank(SerdWriter* const writer, if (field == SERD_SUBJECT && (flags & SERD_EMPTY_S)) { writer->last_sep = SEP_NONE; // Treat "[]" like a node - return sink("[]", 2, writer) == 2; + return esink("[]", 2, writer); } } - sink("_:", 2, writer); - if (writer->bprefix && + SerdStatus st = SERD_SUCCESS; + TRY(st, esink("_:", 2, writer)); + if (!st && writer->bprefix && !strncmp(node_str, writer->bprefix, writer->bprefix_len)) { - sink(node_str + writer->bprefix_len, - node->length - writer->bprefix_len, - writer); + TRY(st, + esink(node_str + writer->bprefix_len, + node->length - writer->bprefix_len, + writer)); } else { - sink(node_str, node->length, writer); + TRY(st, esink(node_str, node->length, writer)); } writer->last_sep = SEP_NONE; - return true; + return st; } -static bool +SERD_WARN_UNUSED_RESULT static SerdStatus write_node(SerdWriter* const writer, const SerdNode* const node, const SerdField field, const SerdStatementFlags flags) { + SerdStatus st = SERD_SUCCESS; + switch (node->type) { case SERD_LITERAL: - return write_literal(writer, node, flags); + st = write_literal(writer, node, flags); + break; case SERD_URI: - return write_uri_node(writer, node, field); + st = write_uri_node(writer, node, field); + break; case SERD_CURIE: - return write_curie(writer, node); + st = write_curie(writer, node); + break; case SERD_BLANK: - return write_blank(writer, node, field, flags); + st = write_blank(writer, node, field, flags); + break; } - return false; + return st; } static bool @@ -812,32 +894,31 @@ is_resource(const SerdNode* node) return node && node->type > SERD_LITERAL; } -static void +SERD_WARN_UNUSED_RESULT static SerdStatus write_pred(SerdWriter* writer, SerdStatementFlags flags, const SerdNode* pred) { - write_node(writer, pred, SERD_PREDICATE, flags); - write_sep(writer, flags, SEP_P_O); + SerdStatus st = SERD_SUCCESS; + TRY(st, write_node(writer, pred, SERD_PREDICATE, flags)); + TRY(st, write_sep(writer, flags, SEP_P_O)); serd_node_set(&writer->context.predicate, pred); + return st; } -static bool +SERD_WARN_UNUSED_RESULT static SerdStatus write_list_obj(SerdWriter* const writer, const SerdStatementFlags flags, const SerdNode* const predicate, - const SerdNode* const object) + const SerdNode* const object, + bool* const is_end) { if (serd_node_equals(object, writer->world->rdf_nil)) { - write_sep(writer, writer->context.flags, SEP_LIST_END); - return true; - } - - if (serd_node_equals(predicate, writer->world->rdf_first)) { - write_node(writer, object, SERD_OBJECT, flags); - } else { - write_sep(writer, writer->context.flags, SEP_LIST_SEP); + *is_end = true; + return write_sep(writer, writer->context.flags, SEP_LIST_END); } - return false; + return (serd_node_equals(predicate, writer->world->rdf_first) + ? write_node(writer, object, SERD_OBJECT, flags) + : write_sep(writer, writer->context.flags, SEP_LIST_SEP)); } static SerdStatus @@ -862,49 +943,44 @@ serd_writer_write_statement(SerdWriter* const writer, return SERD_ERR_BAD_ARG; } -#define TRY(write_result) \ - do { \ - if (!(write_result)) { \ - return SERD_ERR_UNKNOWN; \ - } \ - } while (0) - if (writer->syntax == SERD_NTRIPLES || writer->syntax == SERD_NQUADS) { - TRY(write_node(writer, subject, SERD_SUBJECT, flags)); - sink(" ", 1, writer); - TRY(write_node(writer, predicate, SERD_PREDICATE, flags)); - sink(" ", 1, writer); - TRY(write_node(writer, object, SERD_OBJECT, flags)); + TRY(st, write_node(writer, subject, SERD_SUBJECT, flags)); + TRY(st, esink(" ", 1, writer)); + TRY(st, write_node(writer, predicate, SERD_PREDICATE, flags)); + TRY(st, esink(" ", 1, writer)); + TRY(st, write_node(writer, object, SERD_OBJECT, flags)); if (writer->syntax == SERD_NQUADS && graph) { - sink(" ", 1, writer); - TRY(write_node(writer, graph, SERD_GRAPH, flags)); + TRY(st, esink(" ", 1, writer)); + TRY(st, write_node(writer, graph, SERD_GRAPH, flags)); } - sink(" .\n", 3, writer); + TRY(st, esink(" .\n", 3, writer)); return SERD_SUCCESS; } if ((graph && !serd_node_equals(graph, writer->context.graph)) || (!graph && ctx(writer, SERD_GRAPH))) { if (ctx(writer, SERD_SUBJECT)) { - write_sep(writer, writer->context.flags, SEP_END_S); + TRY(st, write_sep(writer, writer->context.flags, SEP_END_S)); } if (ctx(writer, SERD_GRAPH)) { - write_sep(writer, writer->context.flags, SEP_GRAPH_END); + TRY(st, write_sep(writer, writer->context.flags, SEP_GRAPH_END)); } - write_top_level_sep(writer); + TRY(st, write_top_level_sep(writer)); reset_context(writer, true); if (graph) { - TRY(write_node(writer, graph, SERD_GRAPH, flags)); - write_sep(writer, flags, SEP_GRAPH_BEGIN); + TRY(st, write_node(writer, graph, SERD_GRAPH, flags)); + TRY(st, write_sep(writer, flags, SEP_GRAPH_BEGIN)); serd_node_set(&writer->context.graph, graph); } } if (writer->context.type == CTX_LIST) { - if (write_list_obj(writer, flags, predicate, object)) { + bool is_end = false; + TRY(st, write_list_obj(writer, flags, predicate, object, &is_end)); + if (is_end) { // Reached end of list pop_context(writer); return SERD_SUCCESS; @@ -918,8 +994,8 @@ serd_writer_write_statement(SerdWriter* const writer, writer->context.indented_object = true; } - write_sep(writer, writer->context.flags, SEP_END_O); - write_node(writer, object, SERD_OBJECT, flags); + TRY(st, write_sep(writer, writer->context.flags, SEP_END_O)); + TRY(st, write_node(writer, object, SERD_OBJECT, flags)); } else { // Abbreviate S if (writer->context.indented_object && writer->indent > 0) { @@ -927,10 +1003,10 @@ serd_writer_write_statement(SerdWriter* const writer, writer->context.indented_object = false; } - Sep sep = ctx(writer, SERD_PREDICATE) ? SEP_END_P : SEP_S_P; - write_sep(writer, writer->context.flags, sep); - write_pred(writer, writer->context.flags, predicate); - write_node(writer, object, SERD_OBJECT, flags); + const Sep sep = ctx(writer, SERD_PREDICATE) ? SEP_END_P : SEP_S_P; + TRY(st, write_sep(writer, writer->context.flags, sep)); + TRY(st, write_pred(writer, writer->context.flags, predicate)); + TRY(st, write_node(writer, object, SERD_OBJECT, flags)); } } else { // No abbreviation @@ -941,30 +1017,30 @@ serd_writer_write_statement(SerdWriter* const writer, if (serd_stack_is_empty(&writer->anon_stack)) { if (ctx(writer, SERD_SUBJECT)) { // Terminate last subject - write_sep(writer, writer->context.flags, SEP_END_S); + TRY(st, write_sep(writer, writer->context.flags, SEP_END_S)); } - write_top_level_sep(writer); + TRY(st, write_top_level_sep(writer)); } if (serd_stack_is_empty(&writer->anon_stack)) { - write_node(writer, subject, SERD_SUBJECT, flags); + TRY(st, write_node(writer, subject, SERD_SUBJECT, flags)); if (!(flags & (SERD_ANON_S | SERD_LIST_S))) { - write_sep(writer, writer->context.flags, SEP_S_P); + TRY(st, write_sep(writer, writer->context.flags, SEP_S_P)); } else if (flags & SERD_ANON_S) { - write_sep(writer, writer->context.flags, SEP_ANON_S_P); + TRY(st, write_sep(writer, writer->context.flags, SEP_ANON_S_P)); } } else { - write_sep(writer, writer->context.flags, SEP_ANON_S_P); + TRY(st, write_sep(writer, writer->context.flags, SEP_ANON_S_P)); } reset_context(writer, false); serd_node_set(&writer->context.subject, subject); if (!(flags & SERD_LIST_S)) { - write_pred(writer, flags, predicate); + TRY(st, write_pred(writer, flags, predicate)); } - write_node(writer, object, SERD_OBJECT, flags); + TRY(st, write_node(writer, object, SERD_OBJECT, flags)); } // Push context for list or anonymous subject if necessary @@ -997,7 +1073,7 @@ serd_writer_write_statement(SerdWriter* const writer, return st; } -static SerdStatus +SERD_WARN_UNUSED_RESULT static SerdStatus serd_writer_end_anon(SerdWriter* writer, const SerdNode* node) { if (writer->syntax == SERD_NTRIPLES || writer->syntax == SERD_NQUADS) { @@ -1011,7 +1087,7 @@ serd_writer_end_anon(SerdWriter* writer, const SerdNode* node) serd_node_string(node)); } - write_sep(writer, writer->context.flags, SEP_ANON_END); + SerdStatus st = write_sep(writer, writer->context.flags, SEP_ANON_END); pop_context(writer); if (writer->context.predicate && @@ -1020,7 +1096,7 @@ serd_writer_end_anon(SerdWriter* writer, const SerdNode* node) memset(writer->context.predicate, 0, sizeof(SerdNode)); } - return SERD_SUCCESS; + return st; } static SerdStatus @@ -1045,12 +1121,13 @@ serd_writer_on_event(SerdWriter* writer, const SerdEvent* event) SerdStatus serd_writer_finish(SerdWriter* writer) { + SerdStatus st = SERD_SUCCESS; if (ctx(writer, SERD_SUBJECT)) { - write_sep(writer, writer->context.flags, SEP_END_S); + st = write_sep(writer, writer->context.flags, SEP_END_S); } - if (ctx(writer, SERD_GRAPH)) { - write_sep(writer, writer->context.flags, SEP_GRAPH_END); + if (!st && ctx(writer, SERD_GRAPH)) { + st = write_sep(writer, writer->context.flags, SEP_GRAPH_END); } // Free any lingering contexts in case there was an error @@ -1062,7 +1139,7 @@ serd_writer_finish(SerdWriter* writer) writer->indent = 0; writer->context = WRITE_CONTEXT_NULL; writer->empty = true; - return SERD_SUCCESS; + return st; } SerdWriter* @@ -1116,20 +1193,24 @@ serd_writer_set_base_uri(SerdWriter* writer, const SerdNode* uri) return SERD_ERR_BAD_ARG; } - serd_env_set_base_uri(writer->env, serd_node_string_view(uri)); + SerdStatus st = + serd_env_set_base_uri(writer->env, serd_node_string_view(uri)); - if (writer->syntax == SERD_TURTLE || writer->syntax == SERD_TRIG) { - if (ctx(writer, SERD_GRAPH) || ctx(writer, SERD_SUBJECT)) { - sink(" .\n\n", 4, writer); - reset_context(writer, true); + if (!st) { + if (writer->syntax == SERD_TURTLE || writer->syntax == SERD_TRIG) { + if (ctx(writer, SERD_GRAPH) || ctx(writer, SERD_SUBJECT)) { + TRY(st, esink(" .\n\n", 4, writer)); + reset_context(writer, true); + } + TRY(st, esink("@base <", 7, writer)); + TRY(st, esink(serd_node_string(uri), uri->length, writer)); + TRY(st, esink("> .\n", 4, writer)); } - sink("@base <", 7, writer); - sink(serd_node_string(uri), uri->length, writer); - sink("> .\n", 4, writer); + writer->indent = 0; + reset_context(writer, true); } - writer->indent = 0; - return reset_context(writer, true); + return st; } SerdStatus @@ -1152,32 +1233,32 @@ serd_writer_set_prefix(SerdWriter* writer, const SerdNode* name, const SerdNode* uri) { - SerdStatus st = SERD_SUCCESS; - if (name->type != SERD_LITERAL || uri->type != SERD_URI) { return SERD_ERR_BAD_ARG; } - if ((st = serd_env_set_prefix(writer->env, - serd_node_string_view(name), - serd_node_string_view(uri)))) { - return st; - } + SerdStatus st = serd_env_set_prefix( + writer->env, serd_node_string_view(name), serd_node_string_view(uri)); - if (writer->syntax == SERD_TURTLE || writer->syntax == SERD_TRIG) { - if (ctx(writer, SERD_GRAPH) || ctx(writer, SERD_SUBJECT)) { - sink(" .\n\n", 4, writer); - reset_context(writer, true); + if (!st) { + if (writer->syntax == SERD_TURTLE || writer->syntax == SERD_TRIG) { + if (ctx(writer, SERD_GRAPH) || ctx(writer, SERD_SUBJECT)) { + TRY(st, esink(" .\n\n", 4, writer)); + reset_context(writer, true); + } + + TRY(st, esink("@prefix ", 8, writer)); + TRY(st, esink(serd_node_string(name), name->length, writer)); + TRY(st, esink(": <", 3, writer)); + TRY(st, write_uri_from_node(writer, uri)); + TRY(st, esink("> .\n", 4, writer)); } - sink("@prefix ", 8, writer); - sink(serd_node_string(name), name->length, writer); - sink(": <", 3, writer); - write_uri_from_node(writer, uri); - sink("> .\n", 4, writer); + + writer->indent = 0; + reset_context(writer, true); } - writer->indent = 0; - return reset_context(writer, true); + return st; } void diff --git a/test/test_string.c b/test/test_string.c index a3fb9247..2456e17a 100644 --- a/test/test_string.c +++ b/test/test_string.c @@ -39,7 +39,7 @@ test_strerror(void) { const char* msg = serd_strerror(SERD_SUCCESS); assert(!strcmp(msg, "Success")); - for (int i = SERD_FAILURE; i <= SERD_ERR_OVERFLOW; ++i) { + for (int i = SERD_FAILURE; i <= SERD_ERR_BAD_WRITE; ++i) { msg = serd_strerror((SerdStatus)i); assert(strcmp(msg, "Success")); } diff --git a/test/test_writer.c b/test/test_writer.c index 0611a40d..169c460a 100644 --- a/test/test_writer.c +++ b/test/test_writer.c @@ -19,6 +19,8 @@ #include "serd/serd.h" #include +#include +#include #include #include @@ -170,6 +172,103 @@ test_writer_stack_overflow(void) serd_world_free(world); } +static void +test_strict_write(void) +{ + SerdWorld* world = serd_world_new(); + SerdNodes* nodes = serd_world_nodes(world); + const char* path = "serd_strict_write_test.ttl"; + FILE* fd = fopen(path, "wb"); + assert(fd); + + SerdEnv* env = serd_env_new(SERD_EMPTY_STRING()); + SerdWriter* writer = serd_writer_new( + world, SERD_TURTLE, SERD_WRITE_STRICT, env, (SerdWriteFunc)fwrite, fd); + + assert(writer); + + const SerdSink* sink = serd_writer_sink(writer); + const uint8_t bad_str[] = {0xFF, 0x90, 'h', 'i', 0}; + const SerdStringView bad_view = {(const char*)bad_str, 4}; + + const SerdNode* s = + serd_nodes_uri(nodes, SERD_STRING("http://example.org/s")); + + const SerdNode* p = + serd_nodes_uri(nodes, SERD_STRING("http://example.org/s")); + + const SerdNode* bad_lit = serd_nodes_string(nodes, bad_view); + const SerdNode* bad_uri = serd_nodes_uri(nodes, bad_view); + + assert(serd_sink_write(sink, 0, s, p, bad_lit, 0) == SERD_ERR_BAD_TEXT); + assert(serd_sink_write(sink, 0, s, p, bad_uri, 0) == SERD_ERR_BAD_TEXT); + + serd_writer_free(writer); + serd_env_free(env); + fclose(fd); + serd_world_free(world); +} + +static size_t +faulty_sink(const void* const buf, + const size_t size, + const size_t nmemb, + void* const stream) +{ + (void)buf; + (void)size; + (void)nmemb; + + if (nmemb > 1) { + errno = stream ? ERANGE : 0; + return 0u; + } + + return size * nmemb; +} + +static void +test_write_error(void) +{ + SerdWorld* world = serd_world_new(); + SerdNodes* nodes = serd_world_nodes(world); + SerdEnv* env = serd_env_new(SERD_EMPTY_STRING()); + + const SerdNode* s = + serd_nodes_uri(nodes, SERD_STRING("http://example.org/s")); + + const SerdNode* p = + serd_nodes_uri(nodes, SERD_STRING("http://example.org/p")); + + const SerdNode* o = + serd_nodes_uri(nodes, SERD_STRING("http://example.org/o")); + + // Test with setting errno + + SerdWriter* writer = + serd_writer_new(world, SERD_TURTLE, 0u, env, faulty_sink, NULL); + + assert(writer); + + SerdStatus st = serd_sink_write(serd_writer_sink(writer), 0u, s, p, o, NULL); + assert(st == SERD_ERR_BAD_WRITE); + + serd_writer_free(writer); + + // Test without setting errno + writer = serd_writer_new(world, SERD_TURTLE, 0u, env, faulty_sink, world); + + assert(writer); + + assert(serd_sink_write(serd_writer_sink(writer), 0u, s, p, o, NULL) == + SERD_ERR_BAD_WRITE); + + serd_writer_free(writer); + + serd_env_free(env); + serd_world_free(world); +} + int main(void) { @@ -177,6 +276,8 @@ main(void) test_write_bad_prefix(); test_write_long_literal(); test_writer_stack_overflow(); + test_strict_write(); + test_write_error(); return 0; } -- cgit v1.2.1