From 2d5e6aa234faeb406911ed44f56038dc73f8ff8e Mon Sep 17 00:00:00 2001 From: David Robillard Date: Mon, 27 Feb 2023 21:19:15 -0500 Subject: [WIP] Simplify block writing interface and improve error handling --- src/block_dumper.h | 55 ++++++++++++++--------- src/byte_source.h | 1 + src/writer.c | 129 +++++++++++++++++++++++++---------------------------- 3 files changed, 95 insertions(+), 90 deletions(-) diff --git a/src/block_dumper.h b/src/block_dumper.h index 038f0a82..9820ed03 100644 --- a/src/block_dumper.h +++ b/src/block_dumper.h @@ -7,15 +7,16 @@ #include "serd/output_stream.h" #include "serd/status.h" #include "serd/world.h" +#include "serd/write_result.h" #include "zix/allocator.h" #include "zix/attributes.h" #include #include +/// The partially inlinable sink interface used by the writer typedef struct { - ZixAllocator* ZIX_NONNULL allocator; ///< Buffer allocator - + ZixAllocator* ZIX_NONNULL allocator; ///< Buffer allocator SerdOutputStream* ZIX_ALLOCATED out; ///< Output stream to write to char* ZIX_ALLOCATED buf; ///< Local buffer if needed size_t size; ///< Bytes pending for this block @@ -34,6 +35,12 @@ serd_block_dumper_open(const SerdWorld* ZIX_NONNULL world, SerdOutputStream* ZIX_NONNULL output, size_t block_size); +/** + Flush any pending writes. + + This is typically used before closing to ensure that all of the writes have + actually been written to disk. +*/ SerdStatus serd_block_dumper_flush(SerdBlockDumper* ZIX_NONNULL dumper); @@ -46,37 +53,43 @@ serd_block_dumper_close(SerdBlockDumper* ZIX_NONNULL dumper); This works like any other SerdWriteFunc, but will append to an internal buffer and only actually write to the output when a whole block is ready. */ -static inline size_t -serd_block_dumper_write(const void* ZIX_NONNULL buf, - const size_t size, - const size_t nmemb, - SerdBlockDumper* ZIX_NONNULL const dumper) +static inline SerdWriteResult +serd_block_dumper_write(SerdBlockDumper* ZIX_NONNULL const dumper, + const void* ZIX_NONNULL buf, + const size_t size) { - if (dumper->block_size == 1) { - return dumper->out->write(buf, size, nmemb, dumper->out->stream); + SerdWriteResult result = {SERD_SUCCESS, 0U}; + const size_t block_size = dumper->block_size; + const SerdOutputStream* const out = dumper->out; + + if (block_size == 1) { + result.count = out->write(buf, 1U, size, out->stream); + result.status = result.count == size ? SERD_SUCCESS : SERD_BAD_WRITE; + return result; } - size_t len = size * nmemb; - const size_t orig_len = len; - while (len) { - const size_t space = dumper->block_size - dumper->size; - const size_t n = space < len ? space : len; + while (result.count < size) { + const size_t unwritten = size - result.count; + const size_t space = block_size - dumper->size; + const size_t n = space < unwritten ? space : unwritten; // Write as much as possible into the remaining buffer space - memcpy(dumper->buf + dumper->size, buf, n); + memcpy(dumper->buf + dumper->size, (const char*)buf + result.count, n); dumper->size += n; - buf = (const char*)buf + n; - len -= n; + result.count += n; // Flush page if buffer is full - if (dumper->size == dumper->block_size) { - dumper->out->write( - dumper->buf, 1, dumper->block_size, dumper->out->stream); + if (dumper->size == block_size) { + if (out->write(dumper->buf, 1, block_size, out->stream) != block_size) { + result.status = SERD_BAD_WRITE; + return result; + } + dumper->size = 0; } } - return orig_len; + return result; } #endif // SERD_SRC_DUMPER_H diff --git a/src/byte_source.h b/src/byte_source.h index 9e65ef75..dae1c88e 100644 --- a/src/byte_source.h +++ b/src/byte_source.h @@ -18,6 +18,7 @@ #include #include +/// The partially inlinable source interface used by the reader typedef struct { SerdInputStream* in; ///< Input stream to read from size_t block_size; ///< Number of bytes to read at a time diff --git a/src/writer.c b/src/writer.c index ccb24e5f..faf5b7d9 100644 --- a/src/writer.c +++ b/src/writer.c @@ -9,7 +9,6 @@ #include "node.h" #include "sink.h" #include "string_utils.h" -#include "system.h" #include "try.h" #include "turtle.h" #include "uri_utils.h" @@ -26,11 +25,11 @@ #include "serd/syntax.h" #include "serd/uri.h" #include "serd/world.h" +#include "serd/write_result.h" #include "zix/allocator.h" #include "zix/string_view.h" #include -#include #include #include #include @@ -258,33 +257,23 @@ pop_context(SerdWriter* writer) writer->context = writer->anon_stack[--writer->anon_stack_size]; } -SERD_NODISCARD static size_t -sink(const void* buf, size_t len, SerdWriter* writer) +SERD_NODISCARD static inline SerdWriteResult +wsink(const void* buf, size_t len, SerdWriter* writer) { - const size_t written = serd_block_dumper_write(buf, 1, len, &writer->output); - - if (written != len) { - if (errno) { - char message[1024] = {0}; - serd_system_strerror(errno, message, sizeof(message)); - - w_err(writer, SERD_BAD_WRITE, "write error (%s)", message); - } else { - w_err(writer, - SERD_BAD_WRITE, - "unknown write error, %zu / %zu bytes written", - written, - len); - } - } + return serd_block_dumper_write(&writer->output, buf, len); +} - return written; +SERD_NODISCARD static inline size_t +sink(const void* buf, size_t len, SerdWriter* writer) +{ + const SerdWriteResult r = serd_block_dumper_write(&writer->output, buf, len); + return r.count; } SERD_NODISCARD static inline SerdStatus esink(const void* buf, size_t len, SerdWriter* writer) { - return sink(buf, len, writer) == len ? SERD_SUCCESS : SERD_BAD_WRITE; + return serd_block_dumper_write(&writer->output, buf, len).status; } static VariableResult @@ -295,20 +284,25 @@ write_UCHAR(SerdWriter* const writer, const uint8_t* const utf8) uint8_t c_size = 0U; const uint32_t c = parse_utf8_char(utf8, &c_size); + SerdWriteResult w = {SERD_SUCCESS, 0U}; result.read_count = c_size; + if (result.read_count == 0U) { + w = wsink(replacement_char, sizeof(replacement_char), writer); result.status = w_err(writer, SERD_BAD_TEXT, "invalid UTF-8 start: %X", utf8[0]); } else if (c <= 0xFFFF) { // Write short (4 digit) escape snprintf(escape, sizeof(escape), "\\u%04X", c); - result.write_count = sink(escape, 6, writer); + w = wsink(escape, 6, writer); } else { - // Write long (6 digit) escape + // Write long (8 digit) escape snprintf(escape, sizeof(escape), "\\U%08X", c); - result.write_count = sink(escape, 10, writer); + w = wsink(escape, 10, writer); } + result.status = w.status ? w.status : result.status; + result.write_count = w.count; return result; } @@ -542,7 +536,7 @@ write_lname(SerdWriter* writer, const char* utf8, const size_t n_bytes) return st; } -SERD_NODISCARD static size_t +SERD_NODISCARD static SerdStatus write_long_string_escape(SerdWriter* const writer, const size_t n_consecutive_quotes, const bool is_last, @@ -550,47 +544,47 @@ write_long_string_escape(SerdWriter* const writer, { switch (c) { case '\\': - return sink("\\\\", 2, writer); + return esink("\\\\", 2, writer); case '\b': - return sink("\\b", 2, writer); + return esink("\\b", 2, writer); case '\n': case '\r': case '\t': case '\f': - return sink(&c, 1, writer); // Write character as-is + return esink(&c, 1, writer); // Write character as-is case '\"': if (n_consecutive_quotes >= 3 || is_last) { // Two quotes in a row, or quote at string end, escape - return sink("\\\"", 2, writer); + return esink("\\\"", 2, writer); } - return sink(&c, 1, writer); + return esink(&c, 1, writer); default: break; } - return 0; + return SERD_FAILURE; } -SERD_NODISCARD static size_t +SERD_NODISCARD static SerdStatus write_short_string_escape(SerdWriter* const writer, const char c) { switch (c) { case '\\': - return sink("\\\\", 2, writer); + return esink("\\\\", 2, writer); case '\n': - return sink("\\n", 2, writer); + return esink("\\n", 2, writer); case '\r': - return sink("\\r", 2, writer); + return esink("\\r", 2, writer); case '\t': - return (writer->flags & SERD_WRITE_ESCAPES) ? sink("\\t", 2, writer) - : sink("\t", 1, writer); + return (writer->flags & SERD_WRITE_ESCAPES) ? esink("\\t", 2, writer) + : esink("\t", 1, writer); case '"': - return sink("\\\"", 2, writer); + return esink("\\\"", 2, writer); default: break; } @@ -599,15 +593,15 @@ write_short_string_escape(SerdWriter* const writer, const char c) // These are written with UCHAR in pre-NTriples test cases format switch (c) { case '\b': - return sink("\\b", 2, writer); + return esink("\\b", 2, writer); case '\f': - return sink("\\f", 2, writer); + return esink("\\f", 2, writer); default: break; } } - return 0; + return SERD_FAILURE; } SERD_NODISCARD static bool @@ -619,6 +613,7 @@ text_must_escape(const uint8_t c) SERD_NODISCARD static SerdStatus write_short_text(SerdWriter* writer, const char* utf8, size_t n_bytes) { + const bool lax = (writer->flags & SERD_WRITE_LAX); VariableResult result = {SERD_SUCCESS, 0U, 0U}; for (size_t i = 0; !result.status && i < n_bytes;) { // Write leading chunk as a single fast bulk write @@ -629,22 +624,20 @@ write_short_text(SerdWriter* writer, const char* utf8, size_t n_bytes) } // Try to write character as a special short escape (newline and friends) - const char in = utf8[i]; - const size_t escape_len = write_short_string_escape(writer, in); - - if (!escape_len) { + const char in = utf8[i]; + result.status = write_short_string_escape(writer, in); + if (!result.status) { + result.read_count = 1U; + } else if (result.status == SERD_FAILURE) { // No special escape for this character, write full Unicode escape result = write_text_character(writer, (const uint8_t*)utf8 + i); - i += result.read_count; + } - if (!result.read_count && (writer->flags & SERD_WRITE_LAX)) { - // Corrupt input, write replacement character and scan to the next start - result.status = - esink(replacement_char, sizeof(replacement_char), writer); - i += next_text_index(utf8, i, n_bytes, is_utf8_leading); - } + if (!result.read_count) { + i = next_text_index(utf8, i + 1U, n_bytes, is_utf8_leading); + result.status = lax ? SERD_SUCCESS : result.status; } else { - ++i; + i += result.read_count; } } @@ -654,6 +647,7 @@ write_short_text(SerdWriter* writer, const char* utf8, size_t n_bytes) SERD_NODISCARD static SerdStatus write_long_text(SerdWriter* writer, const char* utf8, size_t n_bytes) { + const bool lax = (writer->flags & SERD_WRITE_LAX); size_t n_quotes = 0; VariableResult result = {SERD_SUCCESS, 0U, 0U}; for (size_t i = 0; !result.status && i < n_bytes;) { @@ -668,26 +662,23 @@ write_long_text(SerdWriter* writer, const char* utf8, size_t n_bytes) break; // Reached end } - // Try to write character as a special long escape (newline and friends) - const char in = utf8[i]; - n_quotes = (in == '\"') ? (n_quotes + 1U) : 0; - const size_t escape_len = - write_long_string_escape(writer, n_quotes, i + 1U == n_bytes, in); + const bool last = i + 1U == n_bytes; + n_quotes = (utf8[i] == '\"') ? (n_quotes + 1U) : 0U; - if (!escape_len) { + // Try to write character as a special long escape (newline and friends) + result.status = write_long_string_escape(writer, n_quotes, last, utf8[i]); + if (!result.status) { + result.read_count = 1U; + } else if (result.status == SERD_FAILURE) { // No special escape for this character, write full Unicode escape result = write_UCHAR(writer, (const uint8_t*)utf8 + i); - i += result.read_count; + } - if (!result.read_count && (writer->flags & SERD_WRITE_LAX)) { - // Corrupt input, write replacement character and scan to the next - // start - result.status = - esink(replacement_char, sizeof(replacement_char), writer); - i += next_text_index(utf8, i, n_bytes, is_utf8_leading); - } + if (!result.read_count) { + i = next_text_index(utf8, i + 1U, n_bytes, is_utf8_leading); + result.status = lax ? SERD_SUCCESS : result.status; } else { - ++i; + i += result.read_count; } } -- cgit v1.2.1