aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Robillard <d@drobilla.net>2011-12-30 08:10:23 +0000
committerDavid Robillard <d@drobilla.net>2011-12-30 08:10:23 +0000
commit847e56d9e696b813d1cdf3da6d54df5e7b389eae (patch)
treec4492906739264f39a3cdd9a0cacc3c6a4d908c7
parente0f18e34021004a19709f0c627db51af1a27afcf (diff)
downloadserd-847e56d9e696b813d1cdf3da6d54df5e7b389eae.tar.gz
serd-847e56d9e696b813d1cdf3da6d54df5e7b389eae.tar.bz2
serd-847e56d9e696b813d1cdf3da6d54df5e7b389eae.zip
Add serd_node_new_blob and serd_base64_decode for handling arbitrary binary
data via base64 encoding. git-svn-id: http://svn.drobilla.net/serd/trunk@280 490d8e77-9747-427b-9fa3-0b8f29cee8a0
-rw-r--r--ChangeLog2
-rw-r--r--serd/serd.h26
-rw-r--r--src/node.c42
-rw-r--r--src/serd_internal.h6
-rw-r--r--src/string.c46
-rw-r--r--tests/serd_test.c35
-rw-r--r--wscript2
7 files changed, 157 insertions, 2 deletions
diff --git a/ChangeLog b/ChangeLog
index 5ecbd880..21f0b63d 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -21,6 +21,8 @@ serd (UNRELEASED) unstable; urgency=low
* Improve URI resolution to cover most of the abnormal cases from RFC3986
* Support file://localhost/foo URIs in serd_uri_to_path()
* Support Windows file://c:/foo URIs in serd_uri_to_path() on all platforms
+ * Add serd_node_new_blob and serd_base64_decode for handling arbitrary
+ binary data via base64 encoding.
-- David Robillard <d@drobilla.net> (UNRELEASED)
diff --git a/serd/serd.h b/serd/serd.h
index 3e84c302..2380c03e 100644
--- a/serd/serd.h
+++ b/serd/serd.h
@@ -278,6 +278,20 @@ double
serd_strtod(const char* str, char** endptr);
/**
+ Decode a base64 string.
+ This function can be used to deserialise a blob node created with
+ serd_node_new_blob().
+
+ @param str Base64 string to decode.
+ @param len The length of @c str.
+ @param size Set to the size of the returned blob in bytes.
+ @return A newly allocated blob which must be freed with free().
+*/
+SERD_API
+void*
+serd_base64_decode(const uint8_t* str, size_t len, size_t* size);
+
+/**
@}
@name URI
@{
@@ -416,6 +430,18 @@ SerdNode
serd_node_new_integer(long i);
/**
+ Create a node by serialising @c buf into an xsd:base64Binary string.
+ This function can be used to make a serialisable node out of arbitrary
+ binary data, which can be decoded using serd_base64_decode().
+
+ @param wrap_lines If true lines are wrapped at 76 characters to conform
+ to RFC 2045.
+*/
+SERD_API
+SerdNode
+serd_node_new_blob(const void* buf, size_t size, bool wrap_lines);
+
+/**
Free any data owned by @c node.
Note that if @c node is itself dynamically allocated (which is not the case
diff --git a/src/node.c b/src/node.c
index 4628be99..6a27beb7 100644
--- a/src/node.c
+++ b/src/node.c
@@ -218,6 +218,48 @@ serd_node_new_integer(long i)
return node;
}
+/**
+ Base64 encoding table.
+ @see <a href="http://tools.ietf.org/html/rfc3548#section-3">RFC3986 S3</a>.
+*/
+static const uint8_t b64_map[64] =
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+
+/**
+ Encode 3 raw bytes to 4 base64 characters.
+*/
+static inline void
+encode_chunk(uint8_t out[4], const uint8_t in[3], size_t n_in)
+{
+ out[0] = b64_map[in[0] >> 2];
+ out[1] = b64_map[((in[0] & 0x03) << 4) | ((in[1] & 0xF0) >> 4)];
+ out[2] = ((n_in > 1)
+ ? (b64_map[((in[1] & 0x0F) << 2) | ((in[2] & 0xC0) >> 6)])
+ : (uint8_t)'=');
+ out[3] = ((n_in > 2) ? b64_map[in[2] & 0x3F] : (uint8_t)'=');
+}
+
+SERD_API
+SerdNode
+serd_node_new_blob(const void* buf, size_t size, bool wrap_lines)
+{
+ const size_t len = ((size + 2) / 3) * 4 + (wrap_lines ? (size / 57) : 0);
+ SerdNode node = { calloc(1, len + 2), len, len, 0, SERD_LITERAL };
+ for (size_t i = 0, j = 0; i < size; i += 3, j += 4) {
+ uint8_t in[4] = { 0, 0, 0, 0 };
+ size_t n_in = MIN(3, size - i);
+ memcpy(in, (const uint8_t*)buf + i, n_in);
+
+ if (wrap_lines && i > 0 && (i % 57) == 0) {
+ ((uint8_t*)node.buf)[j++] = '\n';
+ node.flags |= SERD_HAS_NEWLINE;
+ }
+
+ encode_chunk((uint8_t*)node.buf + j, in, n_in);
+ }
+ return node;
+}
+
SERD_API
void
serd_node_free(SerdNode* node)
diff --git a/src/serd_internal.h b/src/serd_internal.h
index d81d0b34..9af3ae5f 100644
--- a/src/serd_internal.h
+++ b/src/serd_internal.h
@@ -219,4 +219,10 @@ is_space(const char c)
}
}
+static inline bool
+is_base64(const uint8_t c)
+{
+ return is_alpha(c) || is_digit(c) || c == '+' || c == '/' || c == '=';
+}
+
#endif // SERD_INTERNAL_H
diff --git a/src/string.c b/src/string.c
index 968a20e1..a97c6f98 100644
--- a/src/string.c
+++ b/src/string.c
@@ -112,3 +112,49 @@ serd_strtod(const char* str, char** endptr)
*endptr = (char*)s;
return result * sign;
}
+
+/**
+ Base64 decoding table.
+ This is indexed by encoded characters and returns the numeric value used
+ for decoding, shifted up by 47 to be in the range of printable ASCII.
+ A '$' is a placeholder for characters not in the base64 alphabet.
+*/
+static const char b64_unmap[255] =
+ "$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$m$$$ncdefghijkl$$$$$$"
+ "$/0123456789:;<=>?@ABCDEFGH$$$$$$IJKLMNOPQRSTUVWXYZ[\\]^_`ab$$$$"
+ "$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$"
+ "$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$";
+
+static inline uint8_t unmap(const uint8_t in) { return b64_unmap[in] - 47; }
+
+/**
+ Decode 4 base64 characters to 3 raw bytes.
+*/
+static inline size_t
+decode_chunk(const uint8_t in[4], uint8_t out[3])
+{
+ out[0] = (uint8_t)(((unmap(in[0]) << 2)) | unmap(in[1]) >> 4);
+ out[1] = (uint8_t)(((unmap(in[1]) << 4) & 0xF0) | unmap(in[2]) >> 2);
+ out[2] = (uint8_t)(((unmap(in[2]) << 6) & 0xC0) | unmap(in[3]));
+ return 1 + (in[2] != '=') + ((in[2] != '=') && (in[3] != '='));
+}
+
+SERD_API
+void*
+serd_base64_decode(const uint8_t* str, size_t len, size_t* size)
+{
+ void* buf = malloc((len * 3) / 4 + 2);
+ *size = 0;
+ for (size_t i = 0, j = 0; i < len; j += 3) {
+ uint8_t in[4] = "====";
+ size_t n_in = 0;
+ for (; i < len && n_in < 4; ++n_in) {
+ for (; i < len && !is_base64(str[i]); ++i) {} // Skip junk
+ in[n_in] = str[i++];
+ }
+ if (n_in > 1) {
+ *size += decode_chunk(in, (uint8_t*)buf + j);
+ }
+ }
+ return buf;
+}
diff --git a/tests/serd_test.c b/tests/serd_test.c
index 7eb9a67e..095437fb 100644
--- a/tests/serd_test.c
+++ b/tests/serd_test.c
@@ -133,7 +133,6 @@ main()
};
for (unsigned i = 0; i < sizeof(int_test_nums) / sizeof(double); ++i) {
- fprintf(stderr, "\n*** TEST %ld\n", int_test_nums[i]);
SerdNode node = serd_node_new_integer(int_test_nums[i]);
if (strcmp((const char*)node.buf, (const char*)int_test_strs[i])) {
fprintf(stderr, "error: Serialised `%s' != %s\n",
@@ -149,6 +148,40 @@ main()
serd_node_free(&node);
}
+ // Test serd_node_new_blob
+ for (size_t size = 0; size < 256; ++size) {
+ uint8_t* data = malloc(size);
+ for (size_t i = 0; i < size; ++i) {
+ data[i] = (uint8_t)(rand() % 256);
+ }
+
+ SerdNode blob = serd_node_new_blob(data, size, size % 5);
+
+ if (blob.n_bytes != blob.n_chars) {
+ fprintf(stderr, "error: Blob %zu bytes != %zu chars\n",
+ blob.n_bytes, blob.n_chars);
+ return 1;
+ }
+
+ size_t out_size;
+ uint8_t* out = serd_base64_decode(blob.buf, blob.n_bytes, &out_size);
+ if (out_size != size) {
+ fprintf(stderr, "error: Blob size %zu != %zu\n", out_size, size);
+ return 1;
+ }
+
+ for (size_t i = 0; i < size; ++i) {
+ if (out[i] != data[i]) {
+ fprintf(stderr, "error: Corrupt blob at byte %zu\n", i);
+ return 1;
+ }
+ }
+
+ serd_node_free(&blob);
+ free(out);
+ free(data);
+ }
+
// Test serd_strlen
const uint8_t str[] = { '"', '5', 0xE2, 0x82, 0xAC, '"', '\n', 0 };
diff --git a/wscript b/wscript
index 783d66b5..6840d0d4 100644
--- a/wscript
+++ b/wscript
@@ -10,7 +10,7 @@ from waflib.extras import autowaf as autowaf
import waflib.Logs as Logs, waflib.Options as Options
# Version of this package (even if built as a child)
-SERD_VERSION = '0.8.0'
+SERD_VERSION = '0.9.0'
SERD_MAJOR_VERSION = '0'
# Library version (UNIX style major, minor, micro)