From 847e56d9e696b813d1cdf3da6d54df5e7b389eae Mon Sep 17 00:00:00 2001 From: David Robillard Date: Fri, 30 Dec 2011 08:10:23 +0000 Subject: Add serd_node_new_blob and serd_base64_decode for handling arbitrary binary data via base64 encoding. git-svn-id: http://svn.drobilla.net/serd/trunk@280 490d8e77-9747-427b-9fa3-0b8f29cee8a0 --- ChangeLog | 2 ++ serd/serd.h | 26 ++++++++++++++++++++++++++ src/node.c | 42 ++++++++++++++++++++++++++++++++++++++++++ src/serd_internal.h | 6 ++++++ src/string.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++ tests/serd_test.c | 35 ++++++++++++++++++++++++++++++++++- wscript | 2 +- 7 files changed, 157 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index 5ecbd880..21f0b63d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -21,6 +21,8 @@ serd (UNRELEASED) unstable; urgency=low * Improve URI resolution to cover most of the abnormal cases from RFC3986 * Support file://localhost/foo URIs in serd_uri_to_path() * Support Windows file://c:/foo URIs in serd_uri_to_path() on all platforms + * Add serd_node_new_blob and serd_base64_decode for handling arbitrary + binary data via base64 encoding. -- David Robillard (UNRELEASED) diff --git a/serd/serd.h b/serd/serd.h index 3e84c302..2380c03e 100644 --- a/serd/serd.h +++ b/serd/serd.h @@ -277,6 +277,20 @@ SERD_API double serd_strtod(const char* str, char** endptr); +/** + Decode a base64 string. + This function can be used to deserialise a blob node created with + serd_node_new_blob(). + + @param str Base64 string to decode. + @param len The length of @c str. + @param size Set to the size of the returned blob in bytes. + @return A newly allocated blob which must be freed with free(). +*/ +SERD_API +void* +serd_base64_decode(const uint8_t* str, size_t len, size_t* size); + /** @} @name URI @@ -415,6 +429,18 @@ SERD_API SerdNode serd_node_new_integer(long i); +/** + Create a node by serialising @c buf into an xsd:base64Binary string. + This function can be used to make a serialisable node out of arbitrary + binary data, which can be decoded using serd_base64_decode(). + + @param wrap_lines If true lines are wrapped at 76 characters to conform + to RFC 2045. +*/ +SERD_API +SerdNode +serd_node_new_blob(const void* buf, size_t size, bool wrap_lines); + /** Free any data owned by @c node. diff --git a/src/node.c b/src/node.c index 4628be99..6a27beb7 100644 --- a/src/node.c +++ b/src/node.c @@ -218,6 +218,48 @@ serd_node_new_integer(long i) return node; } +/** + Base64 encoding table. + @see RFC3986 S3. +*/ +static const uint8_t b64_map[64] = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + +/** + Encode 3 raw bytes to 4 base64 characters. +*/ +static inline void +encode_chunk(uint8_t out[4], const uint8_t in[3], size_t n_in) +{ + out[0] = b64_map[in[0] >> 2]; + out[1] = b64_map[((in[0] & 0x03) << 4) | ((in[1] & 0xF0) >> 4)]; + out[2] = ((n_in > 1) + ? (b64_map[((in[1] & 0x0F) << 2) | ((in[2] & 0xC0) >> 6)]) + : (uint8_t)'='); + out[3] = ((n_in > 2) ? b64_map[in[2] & 0x3F] : (uint8_t)'='); +} + +SERD_API +SerdNode +serd_node_new_blob(const void* buf, size_t size, bool wrap_lines) +{ + const size_t len = ((size + 2) / 3) * 4 + (wrap_lines ? (size / 57) : 0); + SerdNode node = { calloc(1, len + 2), len, len, 0, SERD_LITERAL }; + for (size_t i = 0, j = 0; i < size; i += 3, j += 4) { + uint8_t in[4] = { 0, 0, 0, 0 }; + size_t n_in = MIN(3, size - i); + memcpy(in, (const uint8_t*)buf + i, n_in); + + if (wrap_lines && i > 0 && (i % 57) == 0) { + ((uint8_t*)node.buf)[j++] = '\n'; + node.flags |= SERD_HAS_NEWLINE; + } + + encode_chunk((uint8_t*)node.buf + j, in, n_in); + } + return node; +} + SERD_API void serd_node_free(SerdNode* node) diff --git a/src/serd_internal.h b/src/serd_internal.h index d81d0b34..9af3ae5f 100644 --- a/src/serd_internal.h +++ b/src/serd_internal.h @@ -219,4 +219,10 @@ is_space(const char c) } } +static inline bool +is_base64(const uint8_t c) +{ + return is_alpha(c) || is_digit(c) || c == '+' || c == '/' || c == '='; +} + #endif // SERD_INTERNAL_H diff --git a/src/string.c b/src/string.c index 968a20e1..a97c6f98 100644 --- a/src/string.c +++ b/src/string.c @@ -112,3 +112,49 @@ serd_strtod(const char* str, char** endptr) *endptr = (char*)s; return result * sign; } + +/** + Base64 decoding table. + This is indexed by encoded characters and returns the numeric value used + for decoding, shifted up by 47 to be in the range of printable ASCII. + A '$' is a placeholder for characters not in the base64 alphabet. +*/ +static const char b64_unmap[255] = + "$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$m$$$ncdefghijkl$$$$$$" + "$/0123456789:;<=>?@ABCDEFGH$$$$$$IJKLMNOPQRSTUVWXYZ[\\]^_`ab$$$$" + "$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$" + "$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$"; + +static inline uint8_t unmap(const uint8_t in) { return b64_unmap[in] - 47; } + +/** + Decode 4 base64 characters to 3 raw bytes. +*/ +static inline size_t +decode_chunk(const uint8_t in[4], uint8_t out[3]) +{ + out[0] = (uint8_t)(((unmap(in[0]) << 2)) | unmap(in[1]) >> 4); + out[1] = (uint8_t)(((unmap(in[1]) << 4) & 0xF0) | unmap(in[2]) >> 2); + out[2] = (uint8_t)(((unmap(in[2]) << 6) & 0xC0) | unmap(in[3])); + return 1 + (in[2] != '=') + ((in[2] != '=') && (in[3] != '=')); +} + +SERD_API +void* +serd_base64_decode(const uint8_t* str, size_t len, size_t* size) +{ + void* buf = malloc((len * 3) / 4 + 2); + *size = 0; + for (size_t i = 0, j = 0; i < len; j += 3) { + uint8_t in[4] = "===="; + size_t n_in = 0; + for (; i < len && n_in < 4; ++n_in) { + for (; i < len && !is_base64(str[i]); ++i) {} // Skip junk + in[n_in] = str[i++]; + } + if (n_in > 1) { + *size += decode_chunk(in, (uint8_t*)buf + j); + } + } + return buf; +} diff --git a/tests/serd_test.c b/tests/serd_test.c index 7eb9a67e..095437fb 100644 --- a/tests/serd_test.c +++ b/tests/serd_test.c @@ -133,7 +133,6 @@ main() }; for (unsigned i = 0; i < sizeof(int_test_nums) / sizeof(double); ++i) { - fprintf(stderr, "\n*** TEST %ld\n", int_test_nums[i]); SerdNode node = serd_node_new_integer(int_test_nums[i]); if (strcmp((const char*)node.buf, (const char*)int_test_strs[i])) { fprintf(stderr, "error: Serialised `%s' != %s\n", @@ -149,6 +148,40 @@ main() serd_node_free(&node); } + // Test serd_node_new_blob + for (size_t size = 0; size < 256; ++size) { + uint8_t* data = malloc(size); + for (size_t i = 0; i < size; ++i) { + data[i] = (uint8_t)(rand() % 256); + } + + SerdNode blob = serd_node_new_blob(data, size, size % 5); + + if (blob.n_bytes != blob.n_chars) { + fprintf(stderr, "error: Blob %zu bytes != %zu chars\n", + blob.n_bytes, blob.n_chars); + return 1; + } + + size_t out_size; + uint8_t* out = serd_base64_decode(blob.buf, blob.n_bytes, &out_size); + if (out_size != size) { + fprintf(stderr, "error: Blob size %zu != %zu\n", out_size, size); + return 1; + } + + for (size_t i = 0; i < size; ++i) { + if (out[i] != data[i]) { + fprintf(stderr, "error: Corrupt blob at byte %zu\n", i); + return 1; + } + } + + serd_node_free(&blob); + free(out); + free(data); + } + // Test serd_strlen const uint8_t str[] = { '"', '5', 0xE2, 0x82, 0xAC, '"', '\n', 0 }; diff --git a/wscript b/wscript index 783d66b5..6840d0d4 100644 --- a/wscript +++ b/wscript @@ -10,7 +10,7 @@ from waflib.extras import autowaf as autowaf import waflib.Logs as Logs, waflib.Options as Options # Version of this package (even if built as a child) -SERD_VERSION = '0.8.0' +SERD_VERSION = '0.9.0' SERD_MAJOR_VERSION = '0' # Library version (UNIX style major, minor, micro) -- cgit v1.2.1