From 02d56e83931e53e1cde57247c64d56fda3804f77 Mon Sep 17 00:00:00 2001 From: David Robillard Date: Fri, 1 Dec 2023 20:39:44 -0500 Subject: [WIP] Tighten up reader node management [WIP] Broken on 32-bit This makes the reader stack manipulations stricter, to make the code more regular and avoid redundant work and bad cache activity. Now, functions that push node headers and their bodies are responsible for (more or less) immediately pushing any trailing null bytes required for termination and alignment. This makes the writes to the node in the stack more local, ensures nodes are terminated as early as possible (to reduce the risk of using non-terminated strings), and avoids the need to calculate aligned stack allocations. --- src/byte_source.h | 43 +++++++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 20 deletions(-) (limited to 'src/byte_source.h') diff --git a/src/byte_source.h b/src/byte_source.h index 5ae40acb..9e65ef75 100644 --- a/src/byte_source.h +++ b/src/byte_source.h @@ -15,8 +15,8 @@ #include #include -#include #include +#include typedef struct { SerdInputStream* in; ///< Input stream to read from @@ -32,14 +32,15 @@ typedef struct { bool eof; ///< True iff end of file reached } SerdByteSource; -SerdByteSource* -serd_byte_source_new_input(ZixAllocator* allocator, - SerdInputStream* input, - const SerdNode* name, - size_t block_size); +SerdStatus +serd_byte_source_init(ZixAllocator* allocator, + SerdByteSource* source, + SerdInputStream* input, + const SerdNode* name, + size_t block_size); void -serd_byte_source_free(ZixAllocator* allocator, SerdByteSource* source); +serd_byte_source_destroy(ZixAllocator* allocator, SerdByteSource* source); SerdStatus serd_byte_source_prepare(SerdByteSource* source); @@ -50,35 +51,37 @@ serd_byte_source_page(SerdByteSource* source); SerdStatus serd_byte_source_skip_bom(SerdByteSource* source); -ZIX_PURE_FUNC static inline uint8_t -serd_byte_source_peek(SerdByteSource* source) +ZIX_PURE_FUNC static inline int +serd_byte_source_peek(const SerdByteSource* const source) { assert(source->prepared); - return source->read_buf[source->read_head]; + + return source->eof ? EOF : (int)source->read_buf[source->read_head]; } static inline SerdStatus -serd_byte_source_advance(SerdByteSource* source) +serd_byte_source_advance_past(SerdByteSource* const source, const int current) { - SerdStatus st = SERD_SUCCESS; - const bool was_eof = source->eof; + /* Reading the buffer here can be an expensive cache miss, so we only assert + that the passed current character is correct in debug builds. In release + builds, this function only accesses the `source` structure, unless a page + read needs to happen. */ + + assert(current == serd_byte_source_peek(source)); - switch (serd_byte_source_peek(source)) { - case '\0': - break; - case '\n': + if (current == '\n') { ++source->caret.line; source->caret.col = 0; - break; - default: + } else { ++source->caret.col; } + SerdStatus st = SERD_SUCCESS; if (++source->read_head >= source->buf_size) { st = serd_byte_source_page(source); } - return (was_eof && source->eof) ? SERD_FAILURE : st; + return st; } #endif // SERD_SRC_BYTE_SOURCE_H -- cgit v1.2.1