From c02d28085a1f81b542df62fe97a530bb6cbce86d Mon Sep 17 00:00:00 2001 From: David Robillard Date: Sun, 2 Jan 2022 18:22:35 -0500 Subject: Add support for xsd:hexBinary literals --- include/serd/serd.h | 54 +++++++++++++++++++--- src/node.c | 119 +++++++++++++++++++++++++++++++++++++----------- src/nodes.c | 31 +++++++++++-- test/test_node.c | 111 ++++++++++++++++++++++++++++++++++++-------- test/test_node_syntax.c | 10 +++- 5 files changed, 268 insertions(+), 57 deletions(-) diff --git a/include/serd/serd.h b/include/serd/serd.h index 52245077..e450f853 100644 --- a/include/serd/serd.h +++ b/include/serd/serd.h @@ -1014,6 +1014,17 @@ SerdWriteResult serd_node_construct_integer(size_t buf_size, void* SERD_NULLABLE buf, int64_t value); +/** + Construct a canonical xsd:hexBinary literal. + + The constructed node will be an xsd:hexBinary literal like "534D", with + datatype xsd:hexBinary. +*/ +SerdWriteResult +serd_node_construct_hex(size_t buf_size, + void* SERD_NULLABLE buf, + size_t value_size, + const void* SERD_NONNULL value); /** Construct a canonical xsd:base64Binary literal. @@ -1182,6 +1193,21 @@ SERD_API SerdNode* SERD_ALLOCATED serd_new_integer(SerdAllocator* SERD_NULLABLE allocator, int64_t i); +/** + Create a new canonical xsd:hexBinary literal. + + This is a wrapper for serd_node_construct_hex() that allocates a new + node on the heap. + + @return A newly allocated node that must be freed with serd_node_free(), or + null. +*/ +SERD_API +SerdNode* SERD_ALLOCATED +serd_new_hex(SerdAllocator* SERD_NULLABLE allocator, + const void* SERD_NONNULL buf, + size_t size); + /** Create a new canonical xsd:base64Binary literal. @@ -1237,16 +1263,20 @@ serd_get_value_as(const SerdNode* SERD_NONNULL node, bool lossy); /** - Return the maximum size of a decoded base64 node in bytes. + Return the maximum size of a decoded hex or base64 binary node in bytes. This returns an upper bound on the number of bytes that would be decoded by - serd_get_base64(). This is calculated as a simple constant-time arithmetic + serd_get_blob(). This is calculated as a simple constant-time arithmetic expression based on the length of the encoded string, so may be larger than the actual size of the data due to things like additional whitespace. + + @return The size of the decoded hex or base64 blob `node`, or zero if it + does not have datatype or + . */ SERD_PURE_API size_t -serd_get_base64_size(const SerdNode* SERD_NONNULL node); +serd_get_blob_size(const SerdNode* SERD_NONNULL node); /** Decode a base64 node. @@ -1265,9 +1295,9 @@ serd_get_base64_size(const SerdNode* SERD_NONNULL node); */ SERD_API SerdWriteResult -serd_get_base64(const SerdNode* SERD_NONNULL node, - size_t buf_size, - void* SERD_NONNULL buf); +serd_get_blob(const SerdNode* SERD_NONNULL node, + size_t buf_size, + void* SERD_NONNULL buf); /// Return a deep copy of `node` SERD_API @@ -1527,6 +1557,18 @@ SERD_API const SerdNode* SERD_ALLOCATED serd_nodes_integer(SerdNodes* SERD_NONNULL nodes, int64_t value); +/** + Make a canonical xsd:hexBinary node. + + A new node will be constructed with serd_node_construct_hex() if an + equivalent one is not already in the set. +*/ +SERD_API +const SerdNode* SERD_ALLOCATED +serd_nodes_hex(SerdNodes* SERD_NONNULL nodes, + const void* SERD_NONNULL value, + size_t value_size); + /** Make a canonical xsd:base64Binary node. diff --git a/src/node.c b/src/node.c index 7ccc039f..46552449 100644 --- a/src/node.c +++ b/src/node.c @@ -446,27 +446,26 @@ serd_node_construct_integer(const size_t buf_size, SERD_STRING(NS_XSD "integer")); } -SerdWriteResult -serd_node_construct_base64(const size_t buf_size, - void* const buf, - const size_t value_size, - const void* const value) +static SerdWriteResult +serd_node_construct_binary( + const size_t buf_size, + void* const buf, + const size_t value_size, + const void* const value, + const SerdStringView datatype_uri, + ExessResult (*write_func)(size_t, const void*, size_t, char*)) { - static const SerdStringView xsd_base64Binary = - SERD_STRING(NS_XSD "base64Binary"); - // Verify argument sanity if (!value || !value_size) { return result(SERD_BAD_ARG, 0); } - // Determine the type to use (default to xsd:base64Binary) - const SerdStringView type = xsd_base64Binary; - const size_t type_length = serd_node_pad_length(type.len); - const size_t type_size = sizeof(SerdNode) + type_length; + // Find the size required for the datatype + const size_t type_length = serd_node_pad_length(datatype_uri.len); + const size_t type_size = sizeof(SerdNode) + type_length; // Find the length of the encoded string (just an O(1) arithmetic expression) - ExessResult r = exess_write_base64(value_size, value, 0, NULL); + ExessResult r = write_func(value_size, value, 0, NULL); // Check that the provided buffer is large enough const size_t padded_length = serd_node_pad_length(r.count); @@ -480,8 +479,8 @@ serd_node_construct_base64(const size_t buf_size, node->flags = SERD_HAS_DATATYPE; node->type = SERD_LITERAL; - // Write the encoded base64 into the node body - r = exess_write_base64( + // Write the encoded string into the node body + r = write_func( value_size, value, total_size - sizeof(SerdNode), serd_node_buffer(node)); MUST_SUCCEED(r.status); @@ -489,14 +488,42 @@ serd_node_construct_base64(const size_t buf_size, // Append datatype SerdNode* meta_node = node + 1 + (padded_length / sizeof(SerdNode)); - meta_node->length = type.len; + meta_node->length = datatype_uri.len; meta_node->flags = 0u; meta_node->type = SERD_URI; - memcpy(serd_node_buffer(meta_node), type.buf, type.len); + memcpy(serd_node_buffer(meta_node), datatype_uri.buf, datatype_uri.len); return result(SERD_SUCCESS, total_size); } +SerdWriteResult +serd_node_construct_hex(const size_t buf_size, + void* const buf, + const size_t value_size, + const void* const value) +{ + return serd_node_construct_binary(buf_size, + buf, + value_size, + value, + SERD_STRING(NS_XSD "hexBinary"), + exess_write_hex); +} + +SerdWriteResult +serd_node_construct_base64(const size_t buf_size, + void* const buf, + const size_t value_size, + const void* const value) +{ + return serd_node_construct_binary(buf_size, + buf, + value_size, + value, + SERD_STRING(NS_XSD "base64Binary"), + exess_write_base64); +} + static size_t string_sink(const void* const buf, const size_t size, @@ -650,21 +677,45 @@ serd_get_value_as(const SerdNode* const node, } size_t -serd_get_base64_size(const SerdNode* const node) +serd_get_blob_size(const SerdNode* const node) { - return exess_base64_decoded_size(serd_node_length(node)); + const SerdNode* const datatype = serd_node_datatype(node); + if (!datatype) { + return 0u; + } + + if (!strcmp(serd_node_string(datatype), NS_XSD "hexBinary")) { + return exess_hex_decoded_size(serd_node_length(node)); + } + + if (!strcmp(serd_node_string(datatype), NS_XSD "base64Binary")) { + return exess_base64_decoded_size(serd_node_length(node)); + } + + return 0u; } SerdWriteResult -serd_get_base64(const SerdNode* const node, - const size_t buf_size, - void* const buf) +serd_get_blob(const SerdNode* const node, + const size_t buf_size, + void* const buf) { - const size_t max_size = serd_get_base64_size(node); - const ExessVariableResult r = - exess_read_base64(buf_size, buf, serd_node_string(node)); + const SerdNode* const datatype = serd_node_datatype(node); + if (!datatype) { + return result(SERD_BAD_ARG, 0u); + } + + ExessVariableResult r = {EXESS_UNSUPPORTED, 0u, 0u}; - return r.status == EXESS_NO_SPACE ? result(SERD_OVERFLOW, max_size) + if (!strcmp(serd_node_string(datatype), NS_XSD "hexBinary")) { + r = exess_read_hex(buf_size, buf, serd_node_string(node)); + } else if (!strcmp(serd_node_string(datatype), NS_XSD "base64Binary")) { + r = exess_read_base64(buf_size, buf, serd_node_string(node)); + } else { + return result(SERD_BAD_ARG, 0u); + } + + return r.status == EXESS_NO_SPACE ? result(SERD_OVERFLOW, r.write_count) : r.status ? result(SERD_BAD_SYNTAX, 0u) : result(SERD_SUCCESS, r.write_count); } @@ -906,6 +957,22 @@ serd_new_base64(SerdAllocator* const allocator, const void* buf, size_t size) return node; } +SerdNode* +serd_new_hex(SerdAllocator* const allocator, const void* buf, size_t size) +{ + SerdWriteResult r = serd_node_construct_hex(0, NULL, size, buf); + SerdNode* const node = serd_node_try_malloc(allocator, r); + + if (node) { + r = serd_node_construct_hex(r.count, node, size, buf); + MUST_SUCCEED(r.status); + assert(serd_node_length(node) == strlen(serd_node_string(node))); + serd_node_check_padding(node); + } + + return node; +} + SerdNodeType serd_node_type(const SerdNode* const node) { diff --git a/src/nodes.c b/src/nodes.c index b989259d..84c6b696 100644 --- a/src/nodes.c +++ b/src/nodes.c @@ -442,9 +442,9 @@ serd_nodes_integer(SerdNodes* const nodes, const int64_t value) } const SerdNode* -serd_nodes_base64(SerdNodes* const nodes, - const void* const value, - const size_t value_size) +serd_nodes_hex(SerdNodes* const nodes, + const void* const value, + const size_t value_size) { assert(nodes); assert(value); @@ -458,6 +458,31 @@ serd_nodes_base64(SerdNodes* const nodes, a performance issue. More ambitiously, adding support for binary nodes like a Real Database(TM) would largely avoid this problem. */ + // Determine how much space the node needs + SerdWriteResult r = serd_node_construct_hex(0, NULL, value_size, value); + + // Allocate a new entry to and construct the node into it + NodesEntry* const entry = new_entry(nodes->allocator, r.count); + if (entry) { + r = serd_node_construct_hex(r.count, &entry->node, value_size, value); + + assert(!r.status); + (void)r; + } + + return serd_nodes_manage_entry(nodes, entry); +} + +const SerdNode* +serd_nodes_base64(SerdNodes* const nodes, + const void* const value, + const size_t value_size) +{ + assert(nodes); + assert(value); + + // Same situation as for hex above + // Determine how much space the node needs SerdWriteResult r = serd_node_construct_base64(0, NULL, value_size, value); diff --git a/test/test_node.c b/test/test_node.c index 416d975b..d4ac6a2c 100644 --- a/test/test_node.c +++ b/test/test_node.c @@ -449,6 +449,43 @@ test_get_integer(void) check_get_integer("!invalid", NS_XSD "long", true, SERD_NOTHING, 0); } +static void +test_hex(void) +{ + assert(!serd_new_hex(NULL, &SERD_URI_NULL, 0)); + + // Test valid hex blobs with a range of sizes + for (size_t size = 1; size < 256; ++size) { + uint8_t* const data = (uint8_t*)malloc(size); + for (size_t i = 0; i < size; ++i) { + data[i] = (uint8_t)((size + i) % 256); + } + + SerdNode* blob = serd_new_hex(NULL, data, size); + const char* blob_str = serd_node_string(blob); + const size_t max_size = serd_get_blob_size(blob); + uint8_t* out = (uint8_t*)calloc(1, max_size); + + const SerdWriteResult r = serd_get_blob(blob, max_size, out); + assert(r.status == SERD_SUCCESS); + assert(r.count == size); + assert(r.count <= max_size); + assert(serd_node_length(blob) == strlen(blob_str)); + + for (size_t i = 0; i < size; ++i) { + assert(out[i] == data[i]); + } + + const SerdNode* const datatype = serd_node_datatype(blob); + assert(datatype); + assert(!strcmp(serd_node_string(datatype), NS_XSD "hexBinary")); + + serd_node_free(NULL, blob); + free(out); + free(data); + } +} + static void test_base64(void) { @@ -463,10 +500,10 @@ test_base64(void) SerdNode* blob = serd_new_base64(NULL, data, size); const char* blob_str = serd_node_string(blob); - const size_t max_size = serd_get_base64_size(blob); + const size_t max_size = serd_get_blob_size(blob); uint8_t* out = (uint8_t*)calloc(1, max_size); - const SerdWriteResult r = serd_get_base64(blob, max_size, out); + const SerdWriteResult r = serd_get_blob(blob, max_size, out); assert(r.status == SERD_SUCCESS); assert(r.count == size); assert(r.count <= max_size); @@ -487,19 +524,19 @@ test_base64(void) } static void -check_get_base64(const char* string, - const char* datatype_uri, - const char* expected) +check_get_blob(const char* string, + const char* datatype_uri, + const char* expected) { SerdNode* const node = serd_new_literal( NULL, SERD_STRING(string), SERD_HAS_DATATYPE, SERD_STRING(datatype_uri)); assert(node); - const size_t max_size = serd_get_base64_size(node); + const size_t max_size = serd_get_blob_size(node); char* const decoded = (char*)calloc(1, max_size + 1); - const SerdWriteResult r = serd_get_base64(node, max_size, decoded); + const SerdWriteResult r = serd_get_blob(node, max_size, decoded); assert(!r.status); assert(r.count <= max_size); @@ -511,22 +548,55 @@ check_get_base64(const char* string, } static void -test_get_base64(void) +test_get_blob(void) { - check_get_base64("Zm9vYmFy", NS_XSD "base64Binary", "foobar"); - check_get_base64("Zm9vYg==", NS_XSD "base64Binary", "foob"); - check_get_base64(" \f\n\r\t\vZm9v \f\n\r\t\v", NS_XSD "base64Binary", "foo"); + check_get_blob("666F6F626172", NS_XSD "hexBinary", "foobar"); + check_get_blob("666F6F62", NS_XSD "hexBinary", "foob"); - SerdNode* const node = serd_new_literal(NULL, - SERD_STRING("Zm9v"), - SERD_HAS_DATATYPE, - SERD_STRING(NS_XSD "base64Binary")); + check_get_blob("Zm9vYmFy", NS_XSD "base64Binary", "foobar"); + check_get_blob("Zm9vYg==", NS_XSD "base64Binary", "foob"); + check_get_blob(" \f\n\r\t\vZm9v \f\n\r\t\v", NS_XSD "base64Binary", "foo"); - char small[2] = {0}; - const SerdWriteResult r = serd_get_base64(node, sizeof(small), small); + char small[2] = {0}; - assert(r.status == SERD_OVERFLOW); - serd_node_free(NULL, node); + { + SerdNode* const node = serd_new_literal(NULL, + SERD_STRING("Zm9v"), + SERD_HAS_DATATYPE, + SERD_STRING(NS_XSD "base64Binary")); + + const SerdWriteResult r = serd_get_blob(node, sizeof(small), small); + + assert(r.status == SERD_OVERFLOW); + serd_node_free(NULL, node); + } + { + SerdNode* const string = + serd_new_token(NULL, SERD_LITERAL, SERD_STRING("string")); + + assert(serd_get_blob_size(string) == 0u); + + const SerdWriteResult r = serd_get_blob(string, sizeof(small), small); + + assert(r.status == SERD_BAD_ARG); + assert(r.count == 0u); + serd_node_free(NULL, string); + } + { + SerdNode* const unknown = + serd_new_literal(NULL, + SERD_STRING("secret"), + SERD_HAS_DATATYPE, + SERD_STRING("http://example.org/Datatype")); + + assert(serd_get_blob_size(unknown) == 0u); + + const SerdWriteResult r = serd_get_blob(unknown, sizeof(small), small); + + assert(r.status == SERD_BAD_ARG); + assert(r.count == 0u); + serd_node_free(NULL, unknown); + } } static void @@ -789,8 +859,9 @@ main(void) test_get_float(); test_integer(); test_get_integer(); + test_hex(); test_base64(); - test_get_base64(); + test_get_blob(); test_node_equals(); test_node_from_syntax(); test_node_from_substring(); diff --git a/test/test_node_syntax.c b/test/test_node_syntax.c index 47b67c9f..02787ebe 100644 --- a/test/test_node_syntax.c +++ b/test/test_node_syntax.c @@ -22,6 +22,7 @@ #include #include +#include #include static void @@ -87,7 +88,7 @@ check(SerdWorld* const world, static void test_common(SerdWorld* const world, const SerdSyntax syntax) { - static const int data[] = {4, 2}; + static const uint8_t data[] = {19u, 17u, 13u, 7u}; static const SerdStringView datatype = SERD_STRING("http://example.org/Datatype"); @@ -140,11 +141,16 @@ test_common(SerdWorld* const world, const SerdSyntax syntax) serd_nodes_value(nodes, serd_float(1.25f)), "\"1.25E0\"^^")); + assert(check(world, + syntax, + serd_nodes_hex(nodes, data, sizeof(data)), + "\"13110D07\"^^")); + assert( check(world, syntax, serd_nodes_base64(nodes, data, sizeof(data)), - "\"BAAAAAIAAAA=\"^^")); + "\"ExENBw==\"^^")); serd_nodes_free(nodes); } -- cgit v1.2.1