From e2d2c7b027ac9ac142ae306d248f2d687df211eb Mon Sep 17 00:00:00 2001 From: David Robillard Date: Tue, 9 Apr 2019 10:28:59 +0200 Subject: Align nodes with posix_memalign if possible --- src/byte_sink.c | 6 +++++- src/byte_source.c | 2 +- src/node.c | 31 ++++++++++++++++--------------- src/node.h | 8 ++++++++ src/reader.c | 18 +++++++++++++++--- src/stack.h | 6 +++--- src/system.c | 25 +++++++++++++++++++++++-- src/system.h | 10 ++++++++-- src/writer.c | 2 +- tests/overflow_test.c | 28 +++++++++++++++------------- 10 files changed, 95 insertions(+), 41 deletions(-) diff --git a/src/byte_sink.c b/src/byte_sink.c index 738290fa..965d9666 100644 --- a/src/byte_sink.c +++ b/src/byte_sink.c @@ -40,7 +40,11 @@ serd_byte_sink_new(SerdWriteFunc write_func, void* stream, size_t block_size) sink->sink = write_func; sink->stream = stream; sink->block_size = block_size; - sink->buf = ((block_size > 1) ? (char*)serd_bufalloc(block_size) : NULL); + + if (block_size > 1) { + sink->buf = (char*)serd_allocate_buffer(block_size); + } + return sink; } diff --git a/src/byte_source.c b/src/byte_source.c index 0cfdc0bd..eac74c63 100644 --- a/src/byte_source.c +++ b/src/byte_source.c @@ -66,7 +66,7 @@ serd_byte_source_open_source(SerdByteSource* source, source->cur = cur; if (page_size > 1) { - source->file_buf = (uint8_t*)serd_bufalloc(page_size); + source->file_buf = (uint8_t*)serd_allocate_buffer(page_size); source->read_buf = source->file_buf; memset(source->file_buf, '\0', page_size); } else { diff --git a/src/node.c b/src/node.c index cc0b0a30..a2b0c1c5 100644 --- a/src/node.c +++ b/src/node.c @@ -19,6 +19,7 @@ #include "serd_internal.h" #include "static_nodes.h" #include "string_utils.h" +#include "system.h" #include "serd/serd.h" @@ -31,37 +32,34 @@ #include #include -static const size_t serd_node_align = sizeof(SerdNode); - static SerdNode* serd_new_from_uri(const SerdURI* uri, const SerdURI* base); static size_t serd_node_pad_size(const size_t n_bytes) { - const size_t pad = serd_node_align - (n_bytes + 2) % serd_node_align; - const size_t size = n_bytes + 2 + pad; - assert(size % serd_node_align == 0); - return size; + const size_t pad = sizeof(SerdNode) - (n_bytes + 2) % sizeof(SerdNode); + + return n_bytes + 2 + pad; } static const SerdNode* serd_node_meta_c(const SerdNode* node) { - return node + 1 + (serd_node_pad_size(node->n_bytes) / serd_node_align); + return node + 1 + (serd_node_pad_size(node->n_bytes) / sizeof(SerdNode)); } static SerdNode* serd_node_meta(SerdNode* node) { - return node + 1 + (serd_node_pad_size(node->n_bytes) / serd_node_align); + return node + 1 + (serd_node_pad_size(node->n_bytes) / sizeof(SerdNode)); } static const SerdNode* serd_node_maybe_get_meta_c(const SerdNode* node) { return (node->flags & (SERD_HAS_LANGUAGE | SERD_HAS_DATATYPE)) - ? (node + 1 + (serd_node_pad_size(node->n_bytes) / serd_node_align)) + ? (node + 1 + (serd_node_pad_size(node->n_bytes) / sizeof(SerdNode))) : NULL; } @@ -94,11 +92,12 @@ SerdNode* serd_node_malloc(size_t n_bytes, SerdNodeFlags flags, SerdNodeType type) { const size_t size = sizeof(SerdNode) + serd_node_pad_size(n_bytes); - SerdNode* node = (SerdNode*)calloc(1, size); + SerdNode* node = (SerdNode*)serd_calloc_aligned(size, serd_node_align); + node->n_bytes = 0; node->flags = flags; node->type = type; - assert((uintptr_t)node % serd_node_align == 0u); + return node; } @@ -184,7 +183,7 @@ serd_new_plain_literal_i(const char* str, memcpy(serd_node_buffer(node), str, str_len); node->n_bytes = str_len; - SerdNode* lang_node = node + 1 + (len / serd_node_align); + SerdNode* lang_node = node + 1 + (len / sizeof(SerdNode)); lang_node->type = SERD_LITERAL; lang_node->n_bytes = lang_len; memcpy(serd_node_buffer(lang_node), lang, lang_len); @@ -215,7 +214,7 @@ serd_new_typed_literal_i(const char* str, memcpy(serd_node_buffer(node), str, str_len); node->n_bytes = str_len; - SerdNode* datatype_node = node + 1 + (len / serd_node_align); + SerdNode* datatype_node = node + 1 + (len / sizeof(SerdNode)); datatype_node->n_bytes = datatype_uri_len; datatype_node->type = SERD_URI; memcpy(serd_node_buffer(datatype_node), datatype_uri, datatype_uri_len); @@ -339,9 +338,11 @@ serd_node_copy(const SerdNode* node) return NULL; } - const size_t size = serd_node_total_size(node); serd_node_check_padding(node); - SerdNode* copy = (SerdNode*)calloc(1, size + 3); + + const size_t size = serd_node_total_size(node); + SerdNode* copy = (SerdNode*)serd_calloc_aligned(size + 3, serd_node_align); + memcpy(copy, node, size); return copy; } diff --git a/src/node.h b/src/node.h index db5d25e2..d69d6421 100644 --- a/src/node.h +++ b/src/node.h @@ -28,6 +28,14 @@ struct SerdNodeImpl { SerdNodeType type; /**< Node type */ }; +/* We need nodes aligned to at least size_t so that this is not an unaligned + access. Though it would be possible to make the node header fixed-size and + fit entirely in 64 bits, saving some memory in the process, using weird + types here needs a lot of sketchy casting, particularly since size_t is + universal for string lengths in C. So, we simply suffer the hassle (and + overhead) internally for now to prevent the API from being too weird. */ +static const size_t serd_node_align = sizeof(size_t); + static inline char* serd_node_buffer(SerdNode* node) { diff --git a/src/reader.c b/src/reader.c index 2a021e83..f4a874cb 100644 --- a/src/reader.c +++ b/src/reader.c @@ -83,8 +83,9 @@ push_node_padded(SerdReader* reader, size_t maxlen, } *terminator = 0; - void* mem = serd_stack_push_aligned( - &reader->stack, sizeof(SerdNode) + maxlen + 1, sizeof(SerdNode)); + void* mem = serd_stack_push_aligned(&reader->stack, + sizeof(SerdNode) + maxlen + 1, + sizeof(SerdNode)); if (!mem) { return NULL; @@ -160,15 +161,26 @@ serd_reader_new(SerdWorld* world, const SerdSink* sink, size_t stack_size) { + if (stack_size < 8 * sizeof(SerdNode)) { + return NULL; + } + SerdReader* me = (SerdReader*)calloc(1, sizeof(SerdReader)); me->world = world; me->sink = sink; - me->stack = serd_stack_new(stack_size); + me->stack = serd_stack_new(stack_size, serd_node_align); me->syntax = syntax; me->next_id = 1; me->strict = true; + /* Reserve a bit of space at the end of the stack to zero pad nodes. This + particular kind of overflow could be detected (in emit_statement), but + this is simpler and a bit more resilient to mistakes since the reader + generally pushes only a few bytes at a time, making it pretty unlikely + to overshoot the buffer by this much. */ + me->stack.buf_size -= 8 * sizeof(size_t); + me->rdf_first = push_node(me, SERD_URI, NS_RDF "first", 48); me->rdf_rest = push_node(me, SERD_URI, NS_RDF "rest", 47); me->rdf_nil = push_node(me, SERD_URI, NS_RDF "nil", 46); diff --git a/src/stack.h b/src/stack.h index dbb196ae..8764afad 100644 --- a/src/stack.h +++ b/src/stack.h @@ -17,7 +17,7 @@ #ifndef SERD_STACK_H #define SERD_STACK_H -#include "serd_internal.h" +#include "system.h" #include #include @@ -38,10 +38,10 @@ typedef struct { #define SERD_STACK_BOTTOM sizeof(void*) static inline SerdStack -serd_stack_new(size_t size) +serd_stack_new(size_t size, size_t align) { SerdStack stack; - stack.buf = (char*)calloc(size, 1); + stack.buf = (char*)serd_calloc_aligned(size, align); stack.buf_size = size; stack.size = SERD_STACK_BOTTOM; return stack; diff --git a/src/system.c b/src/system.c index e9ff3883..5141bf40 100644 --- a/src/system.c +++ b/src/system.c @@ -23,16 +23,37 @@ #include #include +#include void* -serd_bufalloc(size_t size) +serd_malloc_aligned(size_t size, size_t alignment) { #ifdef HAVE_POSIX_MEMALIGN void* ptr = NULL; - const int ret = posix_memalign(&ptr, SERD_PAGE_SIZE, size); + const int ret = posix_memalign(&ptr, alignment, size); return ret ? NULL : ptr; #else return malloc(size); #endif } +void* +serd_calloc_aligned(size_t size, size_t alignment) +{ +#ifdef HAVE_POSIX_MEMALIGN + void* ptr = serd_malloc_aligned(size, alignment); + if (ptr) { + memset(ptr, 0, size); + } + return ptr; +#else + (void)alignment; + return calloc(1, size); +#endif +} + +void* +serd_allocate_buffer(size_t size) +{ + return serd_malloc_aligned(size, SERD_PAGE_SIZE); +} diff --git a/src/system.h b/src/system.h index 6a2eae11..903fdb3d 100644 --- a/src/system.h +++ b/src/system.h @@ -36,7 +36,13 @@ serd_file_read_byte(void* buf, size_t size, size_t nmemb, void* stream) return 1; } -void* -serd_bufalloc(size_t size); +/** Allocate a buffer aligned to `alignment` bytes. */ +void* serd_malloc_aligned(size_t size, size_t alignment); + +/** Allocate a zeroed buffer aligned to `alignment` bytes. */ +void* serd_calloc_aligned(size_t size, size_t alignment); + +/** Allocate an aligned buffer for I/O. */ +void* serd_allocate_buffer(size_t size); #endif // SERD_SYSTEM_H diff --git a/src/writer.c b/src/writer.c index 8353e873..a29d030a 100644 --- a/src/writer.c +++ b/src/writer.c @@ -975,7 +975,7 @@ serd_writer_new(SerdWorld* world, writer->env = env; writer->root_node = NULL; writer->root_uri = SERD_URI_NULL; - writer->anon_stack = serd_stack_new(SERD_PAGE_SIZE); + writer->anon_stack = serd_stack_new(SERD_PAGE_SIZE, SERD_PAGE_SIZE); writer->write_func = write_func; writer->stream = stream; writer->context = context; diff --git a/tests/overflow_test.c b/tests/overflow_test.c index 7e545793..917d02bf 100644 --- a/tests/overflow_test.c +++ b/tests/overflow_test.c @@ -40,19 +40,21 @@ main(void) size_t stack_size; } Test; - const Test tests[] = {{":s :p :%99 .", 338}, - {":s :p .", 239}, - {":s :p \"literal\"", 336}, - {":s :p \"verb\"", 275}, - {":s :p _:blank .", 307}, - {":s :p true .", 307}, - {":s :p true .", 341}, - {":s :p \"\"@en .", 339}, + const size_t sizes = 3 * sizeof(size_t); + + const Test tests[] = {{":s :p :%99 .", sizes + 280}, + {":s :p .", sizes + 224}, + {":s :p \"literal\"", sizes + 264}, + {":s :p \"verb\"", sizes + 263}, + {":s :p _:blank .", sizes + 276}, + {":s :p true .", sizes + 295}, + {":s :p true .", sizes + 329}, + {":s :p \"\"@en .", sizes + 302}, {NULL, 0}}; SerdWorld* world = serd_world_new(); -- cgit v1.2.1