diff options
author | David Robillard <d@drobilla.net> | 2020-08-14 15:51:14 +0200 |
---|---|---|
committer | David Robillard <d@drobilla.net> | 2020-08-14 19:07:52 +0200 |
commit | dee13339e45305ff0a15b3d50d7cbccdc4722b86 (patch) | |
tree | b0ee56c36cdebeffcd4d274b3e3b77b0f0de026e | |
parent | 8827c4a88998b12cbfd2076c207d2d46e57b0b51 (diff) | |
download | serd-dee13339e45305ff0a15b3d50d7cbccdc4722b86.tar.gz serd-dee13339e45305ff0a15b3d50d7cbccdc4722b86.tar.bz2 serd-dee13339e45305ff0a15b3d50d7cbccdc4722b86.zip |
Separate base64 implementation
-rw-r--r-- | src/base64.c | 124 | ||||
-rw-r--r-- | src/base64.h | 46 | ||||
-rw-r--r-- | src/node.c | 37 | ||||
-rw-r--r-- | src/string.c | 49 | ||||
-rw-r--r-- | wscript | 3 |
5 files changed, 176 insertions, 83 deletions
diff --git a/src/base64.c b/src/base64.c new file mode 100644 index 00000000..763c2d2e --- /dev/null +++ b/src/base64.c @@ -0,0 +1,124 @@ +/* + Copyright 2011-2020 David Robillard <http://drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "base64.h" + +#include "serd_internal.h" +#include "string_utils.h" + +#include "serd/serd.h" + +#include <stdbool.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> + +/** + Base64 encoding table. + + @see <a href="http://tools.ietf.org/html/rfc3548#section-3">RFC3548 S3</a>. +*/ +static const uint8_t b64_map[] = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + +/** + Base64 decoding table. + + This is indexed by encoded characters and returns the numeric value used + for decoding, shifted up by 47 to be in the range of printable ASCII. + A '$' is a placeholder for characters not in the base64 alphabet. +*/ +static const char b64_unmap[] = + "$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$m$$$ncdefghijkl$$$$$$" + "$/0123456789:;<=>?@ABCDEFGH$$$$$$IJKLMNOPQRSTUVWXYZ[\\]^_`ab$$$$" + "$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$" + "$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$"; + +/** Encode 3 raw bytes to 4 base64 characters. */ +static inline void +encode_chunk(uint8_t out[4], const uint8_t in[3], size_t n_in) +{ + out[0] = b64_map[in[0] >> 2]; + out[1] = b64_map[((in[0] & 0x03) << 4) | ((in[1] & 0xF0) >> 4)]; + out[2] = ((n_in > 1) + ? (b64_map[((in[1] & 0x0F) << 2) | ((in[2] & 0xC0) >> 6)]) + : (uint8_t)'='); + out[3] = ((n_in > 2) ? b64_map[in[2] & 0x3F] : (uint8_t)'='); +} + +size_t +serd_base64_get_length(const size_t size, const bool wrap_lines) +{ + return (size + 2) / 3 * 4 + (wrap_lines * ((size - 1) / 57)); +} + +bool +serd_base64_encode(uint8_t* const str, + const void* const buf, + const size_t size, + const bool wrap_lines) +{ + bool has_newline = false; + for (size_t i = 0, j = 0; i < size; i += 3, j += 4) { + uint8_t in[4] = { 0, 0, 0, 0 }; + size_t n_in = MIN(3, size - i); + memcpy(in, (const uint8_t*)buf + i, n_in); + + if (wrap_lines && i > 0 && (i % 57) == 0) { + str[j++] = '\n'; + has_newline = true; + } + + encode_chunk(str + j, in, n_in); + } + + return has_newline; +} + +static inline uint8_t +unmap(const uint8_t in) +{ + return (uint8_t)(b64_unmap[in] - 47); +} + +/** Decode 4 base64 characters to 3 raw bytes. */ +static inline size_t +decode_chunk(const uint8_t in[4], uint8_t out[3]) +{ + out[0] = (uint8_t)(((unmap(in[0]) << 2)) | unmap(in[1]) >> 4); + out[1] = (uint8_t)(((unmap(in[1]) << 4) & 0xF0) | unmap(in[2]) >> 2); + out[2] = (uint8_t)(((unmap(in[2]) << 6) & 0xC0) | unmap(in[3])); + return 1 + (in[2] != '=') + ((in[2] != '=') && (in[3] != '=')); +} + +void* +serd_base64_decode(const uint8_t* str, size_t len, size_t* size) +{ + void* buf = malloc((len * 3) / 4 + 2); + *size = 0; + for (size_t i = 0, j = 0; i < len; j += 3) { + uint8_t in[] = "===="; + size_t n_in = 0; + for (; i < len && n_in < 4; ++n_in) { + for (; i < len && !is_base64(str[i]); ++i) {} // Skip junk + in[n_in] = str[i++]; + } + if (n_in > 1) { + *size += decode_chunk(in, (uint8_t*)buf + j); + } + } + return buf; +} diff --git a/src/base64.h b/src/base64.h new file mode 100644 index 00000000..ab3dd0d9 --- /dev/null +++ b/src/base64.h @@ -0,0 +1,46 @@ +/* + Copyright 2011-2018 David Robillard <http://drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#ifndef SERD_BASE64_H +#define SERD_BASE64_H + +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> + +/** + Return the number of bytes required to encode `size` bytes in base64. + + @param size The number of input (binary) bytes to encode. + @param wrap_lines Wrap lines at 76 characters to conform to RFC 2045. + @return The length of the base64 encoding, excluding null terminator. +*/ +size_t +serd_base64_get_length(size_t size, bool wrap_lines); + +/** + Encode `size` bytes of `buf` into `str`, which must be large enough. + + @param str Output string buffer. + @param buf Input binary data. + @param size Number of bytes to encode from `buf`. + @param wrap_lines Wrap lines at 76 characters to conform to RFC 2045. + @return True iff `str` contains newlines. +*/ +bool +serd_base64_encode(uint8_t* str, const void* buf, size_t size, bool wrap_lines); + +#endif // SERD_BASE64_H @@ -16,7 +16,7 @@ #include "node.h" -#include "serd_internal.h" +#include "base64.h" #include "string_utils.h" #include "serd/serd.h" @@ -346,44 +346,15 @@ serd_node_new_integer(int64_t i) return node; } -/** - Base64 encoding table. - @see <a href="http://tools.ietf.org/html/rfc3548#section-3">RFC3986 S3</a>. -*/ -static const uint8_t b64_map[] = - "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; - -/** - Encode 3 raw bytes to 4 base64 characters. -*/ -static inline void -encode_chunk(uint8_t out[4], const uint8_t in[3], size_t n_in) -{ - out[0] = b64_map[in[0] >> 2]; - out[1] = b64_map[((in[0] & 0x03) << 4) | ((in[1] & 0xF0) >> 4)]; - out[2] = ((n_in > 1) - ? (b64_map[((in[1] & 0x0F) << 2) | ((in[2] & 0xC0) >> 6)]) - : (uint8_t)'='); - out[3] = ((n_in > 2) ? b64_map[in[2] & 0x3F] : (uint8_t)'='); -} - SerdNode serd_node_new_blob(const void* buf, size_t size, bool wrap_lines) { - const size_t len = (size + 2) / 3 * 4 + (wrap_lines * ((size - 1) / 57)); + const size_t len = serd_base64_get_length(size, wrap_lines); uint8_t* str = (uint8_t*)calloc(len + 2, 1); SerdNode node = { str, len, len, 0, SERD_LITERAL }; - for (size_t i = 0, j = 0; i < size; i += 3, j += 4) { - uint8_t in[4] = { 0, 0, 0, 0 }; - size_t n_in = MIN(3, size - i); - memcpy(in, (const uint8_t*)buf + i, n_in); - - if (wrap_lines && i > 0 && (i % 57) == 0) { - str[j++] = '\n'; - node.flags |= SERD_HAS_NEWLINE; - } - encode_chunk(str + j, in, n_in); + if (serd_base64_encode(str, buf, size, wrap_lines)) { + node.flags |= SERD_HAS_NEWLINE; } return node; } diff --git a/src/string.c b/src/string.c index 86dc739e..e1e5dbda 100644 --- a/src/string.c +++ b/src/string.c @@ -164,52 +164,3 @@ serd_strtod(const char* str, char** endptr) return result * sign; } - -/** - Base64 decoding table. - This is indexed by encoded characters and returns the numeric value used - for decoding, shifted up by 47 to be in the range of printable ASCII. - A '$' is a placeholder for characters not in the base64 alphabet. -*/ -static const char b64_unmap[] = - "$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$m$$$ncdefghijkl$$$$$$" - "$/0123456789:;<=>?@ABCDEFGH$$$$$$IJKLMNOPQRSTUVWXYZ[\\]^_`ab$$$$" - "$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$" - "$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$"; - -static inline uint8_t -unmap(const uint8_t in) -{ - return (uint8_t)(b64_unmap[in] - 47); -} - -/** - Decode 4 base64 characters to 3 raw bytes. -*/ -static inline size_t -decode_chunk(const uint8_t in[4], uint8_t out[3]) -{ - out[0] = (uint8_t)(((unmap(in[0]) << 2)) | unmap(in[1]) >> 4); - out[1] = (uint8_t)(((unmap(in[1]) << 4) & 0xF0) | unmap(in[2]) >> 2); - out[2] = (uint8_t)(((unmap(in[2]) << 6) & 0xC0) | unmap(in[3])); - return 1u + (in[2] != '=') + ((in[2] != '=') && (in[3] != '=')); -} - -void* -serd_base64_decode(const uint8_t* str, size_t len, size_t* size) -{ - void* buf = malloc((len * 3) / 4 + 2); - *size = 0; - for (size_t i = 0, j = 0; i < len; j += 3) { - uint8_t in[] = "===="; - size_t n_in = 0; - for (; i < len && n_in < 4; ++n_in) { - for (; i < len && !is_base64(str[i]); ++i) {} // Skip junk - in[n_in] = str[i++]; - } - if (n_in > 1) { - *size += decode_chunk(in, (uint8_t*)buf + j); - } - } - return buf; -} @@ -142,7 +142,8 @@ def configure(conf): lib_headers = ['src/reader.h'] -lib_source = ['src/byte_source.c', +lib_source = ['src/base64.c', + 'src/byte_source.c', 'src/env.c', 'src/n3.c', 'src/node.c', |