diff options
author | David Robillard <d@drobilla.net> | 2021-02-25 16:16:54 -0500 |
---|---|---|
committer | David Robillard <d@drobilla.net> | 2021-03-08 23:23:05 -0500 |
commit | fc2e2b667d82f6114e339e542edd8e2ca708dc1b (patch) | |
tree | b70f70fbb932e2f035c02a6ef0b7a1aeceeca36e /src | |
parent | c4821c8e6bf1f81c6ea31e11ebc0fc1666e9337b (diff) | |
download | serd-fc2e2b667d82f6114e339e542edd8e2ca708dc1b.tar.gz serd-fc2e2b667d82f6114e339e542edd8e2ca708dc1b.tar.bz2 serd-fc2e2b667d82f6114e339e542edd8e2ca708dc1b.zip |
WIP: Use exess for reading and writing numeric and binary literals
Diffstat (limited to 'src')
-rw-r--r-- | src/base64.c | 110 | ||||
-rw-r--r-- | src/node.c | 190 | ||||
-rw-r--r-- | src/string.c | 63 |
3 files changed, 102 insertions, 261 deletions
diff --git a/src/base64.c b/src/base64.c index 0edf8a0f..0238afbb 100644 --- a/src/base64.c +++ b/src/base64.c @@ -14,11 +14,9 @@ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ -#include "base64.h" - -#include "serd_internal.h" #include "string_utils.h" +#include "exess/exess.h" #include "serd/serd.h" #include <stdbool.h> @@ -26,103 +24,21 @@ #include <stdlib.h> #include <string.h> -/** - Base64 encoding table. - - @see <a href="http://tools.ietf.org/html/rfc3548#section-3">RFC3548 S3</a>. -*/ -static const uint8_t b64_map[] = - "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; - -/** - Base64 decoding table. - - This is indexed by encoded characters and returns the numeric value used - for decoding, shifted up by 47 to be in the range of printable ASCII. - A '$' is a placeholder for characters not in the base64 alphabet. -*/ -static const char b64_unmap[] = - "$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$m$$$ncdefghijkl$$$$$$" - "$/0123456789:;<=>?@ABCDEFGH$$$$$$IJKLMNOPQRSTUVWXYZ[\\]^_`ab$$$$" - "$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$" - "$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$"; - -/** Encode 3 raw bytes to 4 base64 characters. */ -static inline void -encode_chunk(uint8_t out[4], const uint8_t in[3], size_t n_in) -{ - out[0] = b64_map[in[0] >> 2]; - out[1] = b64_map[((in[0] & 0x03) << 4) | ((in[1] & 0xF0) >> 4)]; - - out[2] = (n_in > 1) ? (b64_map[((in[1] & 0x0F) << 2) | ((in[2] & 0xC0) >> 6)]) - : (uint8_t)'='; - - out[3] = ((n_in > 2) ? b64_map[in[2] & 0x3F] : (uint8_t)'='); -} - -size_t -serd_base64_get_length(const size_t size, const bool wrap_lines) -{ - return (size + 2) / 3 * 4 + (wrap_lines * ((size - 1) / 57)); -} - -bool -serd_base64_encode(uint8_t* const str, - const void* const buf, - const size_t size, - const bool wrap_lines) -{ - bool has_newline = false; - for (size_t i = 0, j = 0; i < size; i += 3, j += 4) { - uint8_t in[4] = {0, 0, 0, 0}; - size_t n_in = MIN(3, size - i); - memcpy(in, (const uint8_t*)buf + i, n_in); - - if (wrap_lines && i > 0 && (i % 57) == 0) { - str[j++] = '\n'; - has_newline = true; - } - - encode_chunk(str + j, in, n_in); - } - - return has_newline; -} - -static inline uint8_t -unmap(const uint8_t in) -{ - return (uint8_t)(b64_unmap[in] - 47); -} - -/** Decode 4 base64 characters to 3 raw bytes. */ -static inline size_t -decode_chunk(const uint8_t in[4], uint8_t out[3]) -{ - out[0] = (uint8_t)(((unmap(in[0]) << 2)) | unmap(in[1]) >> 4); - out[1] = (uint8_t)(((unmap(in[1]) << 4) & 0xF0) | unmap(in[2]) >> 2); - out[2] = (uint8_t)(((unmap(in[2]) << 6) & 0xC0) | unmap(in[3])); - return 1 + (in[2] != '=') + ((in[2] != '=') && (in[3] != '=')); -} - void* serd_base64_decode(const char* str, size_t len, size_t* size) { - const uint8_t* const ustr = (const uint8_t*)str; - - void* buf = malloc((len * 3) / 4 + 2); - *size = 0; - for (size_t i = 0, j = 0; i < len; j += 3) { - uint8_t in[] = "===="; - size_t n_in = 0; - for (; i < len && n_in < 4; ++n_in) { - for (; i < len && !is_base64(ustr[i]); ++i) { - } // Skip junk - in[n_in] = ustr[i++]; - } - if (n_in > 1) { - *size += decode_chunk(in, (uint8_t*)buf + j); - } + const size_t max_size = exess_base64_decoded_size(len); + + void* const buf = malloc(max_size); + ExessBlob blob = {max_size, buf}; + const ExessResult r = exess_read_base64(&blob, str); + if (r.status) { + *size = 0; + free(buf); + return NULL; } + + *size = blob.size; + return buf; } @@ -16,11 +16,10 @@ #include "node.h" -#include "base64.h" -#include "serd_internal.h" #include "static_nodes.h" #include "string_utils.h" +#include "exess/exess.h" #include "serd/serd.h" #include <assert.h> @@ -32,14 +31,10 @@ #include <stdlib.h> #include <string.h> -#ifdef _WIN32 -# ifndef isnan -# define isnan(x) _isnan(x) -# endif -# ifndef isinf -# define isinf(x) (!_finite(x)) -# endif -#endif +typedef struct { + const void* SERD_NULLABLE buf; + size_t len; +} SerdConstBuffer; static const size_t serd_node_align = sizeof(SerdNode); @@ -480,134 +475,113 @@ serd_new_file_uri(const SerdStringView path, const SerdStringView hostname) return node; } -static inline unsigned -serd_digits(double abs) -{ - const double lg = ceil(log10(floor(abs) + 1.0)); - return lg < 1.0 ? 1U : (unsigned)lg; -} +typedef size_t (*SerdWriteLiteralFunc)(const void* const user_data, + const size_t buf_size, + char* const buf); -SerdNode* -serd_new_decimal(double d, unsigned frac_digits, const SerdNode* datatype) +static SerdNode* +serd_new_custom_literal(const void* const user_data, + const size_t len, + SerdWriteLiteralFunc write, + const SerdNode* const datatype) { - if (isnan(d) || isinf(d)) { + if (len == 0 || !write) { return NULL; } - const SerdNode* type = datatype ? datatype : &serd_xsd_decimal.node; - const double abs_d = fabs(d); - const unsigned int_digits = serd_digits(abs_d); - const size_t len = int_digits + frac_digits + 3; - const size_t type_len = serd_node_total_size(type); - const size_t total_len = len + type_len; + const size_t datatype_size = serd_node_total_size(datatype); + const size_t total_size = serd_node_pad_size(len + 1) + datatype_size; - SerdNode* const node = - serd_node_malloc(total_len, SERD_HAS_DATATYPE, SERD_LITERAL); - - // Point s to decimal point location - char* const buf = serd_node_buffer(node); - const double int_part = floor(abs_d); - char* s = buf + int_digits; - if (d < 0.0) { - *buf = '-'; - ++s; - } + SerdNode* const node = serd_node_malloc( + total_size, datatype ? SERD_HAS_DATATYPE : 0, SERD_LITERAL); - // Write integer part (right to left) - char* t = s - 1; - uint64_t dec = (uint64_t)int_part; - do { - *t-- = (char)('0' + dec % 10); - } while ((dec /= 10) > 0); - - *s++ = '.'; - - // Write fractional part (right to left) - double frac_part = fabs(d - int_part); - if (frac_part < DBL_EPSILON) { - *s++ = '0'; - node->n_bytes = (size_t)(s - buf); - } else { - uint64_t frac = (uint64_t)llround(frac_part * pow(10.0, (int)frac_digits)); - s += frac_digits - 1; - unsigned i = 0; - - // Skip trailing zeros - for (; i < frac_digits - 1 && !(frac % 10); ++i, --s, frac /= 10) { - } - - node->n_bytes = (size_t)(s - buf) + 1u; + node->n_bytes = write(user_data, len + 1, serd_node_buffer(node)); + if (node->n_bytes == 0 || node->n_bytes > len) { + serd_node_free(node); + return NULL; + } - // Write digits from last trailing zero to decimal point - for (; i < frac_digits; ++i) { - *s-- = (char)('0' + (frac % 10)); - frac /= 10; - } + if (datatype) { + memcpy(serd_node_meta(node), datatype, datatype_size); } - memcpy(serd_node_meta(node), type, type_len); serd_node_check_padding(node); return node; } -SerdNode* -serd_new_integer(int64_t i, const SerdNode* datatype) +static size_t +write_variant_literal(const void* const user_data, + const size_t buf_size, + char* const buf) { - const SerdNode* type = datatype ? datatype : &serd_xsd_integer.node; - uint64_t abs_i = (uint64_t)((i < 0) ? -i : i); - const unsigned digits = serd_digits((double)abs_i); - const size_t type_len = serd_node_total_size(type); - const size_t total_len = digits + 2 + type_len; + const ExessVariant value = *(const ExessVariant*)user_data; + const ExessResult r = exess_write_variant(value, buf_size, buf); - SerdNode* node = serd_node_malloc(total_len, SERD_HAS_DATATYPE, SERD_LITERAL); + return r.status ? 0 : r.count; +} - // Point s to the end - char* buf = serd_node_buffer(node); - char* s = buf + digits - 1; - if (i < 0) { - *buf = '-'; - ++s; - } +SerdNode* +serd_new_decimal(const double d, const SerdNode* const datatype) +{ + // Use given datatype, or xsd:decimal as a default if it is null + const SerdNode* type = datatype ? datatype : &serd_xsd_decimal.node; + const size_t type_size = serd_node_total_size(type); - node->n_bytes = (size_t)(s - buf) + 1u; + // Measure integer string to know how much space the node will need + ExessResult r = exess_write_decimal(d, 0, NULL); + if (r.status) { + return NULL; + } - // Write integer part (right to left) - do { - *s-- = (char)('0' + (abs_i % 10)); - } while ((abs_i /= 10) > 0); + // Allocate node with enough space for value and datatype URI + SerdNode* const node = + serd_node_malloc(serd_node_pad_size(r.count + 1) + type_size, + SERD_HAS_DATATYPE, + SERD_LITERAL); + + // Write string directly into node + r = exess_write_decimal(d, r.count + 1, serd_node_buffer(node)); + if (r.status) { + free(node); + return NULL; + } - memcpy(serd_node_meta(node), type, type_len); + node->n_bytes = r.count; + memcpy(serd_node_meta(node), type, type_size); serd_node_check_padding(node); return node; } SerdNode* -serd_new_blob(const void* buf, - size_t size, - bool wrap_lines, - const SerdNode* datatype) +serd_new_integer(int64_t i, const SerdNode* datatype) { - if (!buf || !size) { - return NULL; - } + const ExessVariant variant = exess_make_long(i); + const size_t len = exess_write_variant(variant, 0, NULL).count; + const SerdNode* type = datatype ? datatype : &serd_xsd_integer.node; - const SerdNode* type = datatype ? datatype : &serd_xsd_base64Binary.node; - const size_t len = serd_base64_get_length(size, wrap_lines); - const size_t type_len = serd_node_total_size(type); - const size_t total_len = len + 1 + type_len; + return serd_new_custom_literal(&variant, len, write_variant_literal, type); +} - SerdNode* const node = - serd_node_malloc(total_len, SERD_HAS_DATATYPE, SERD_LITERAL); +static size_t +write_base64_literal(const void* const user_data, + const size_t buf_size, + char* const buf) +{ + const SerdConstBuffer blob = *(const SerdConstBuffer*)user_data; - uint8_t* str = (uint8_t*)serd_node_buffer(node); - if (serd_base64_encode(str, buf, size, wrap_lines)) { - node->flags |= SERD_HAS_NEWLINE; - } + const ExessResult r = exess_write_base64(blob.len, blob.buf, buf_size, buf); - node->n_bytes = len; - memcpy(serd_node_meta(node), type, type_len); - serd_node_check_padding(node); - return node; + return r.status ? 0 : r.count; +} + +SerdNode* +serd_new_blob(const void* buf, size_t size, const SerdNode* datatype) +{ + const size_t len = exess_write_base64(size, buf, 0, NULL).count; + const SerdNode* type = datatype ? datatype : &serd_xsd_base64Binary.node; + SerdConstBuffer blob = {buf, size}; + + return serd_new_custom_literal(&blob, len, write_base64_literal, type); } SerdNodeType diff --git a/src/string.c b/src/string.c index d2ccea14..00b1e91a 100644 --- a/src/string.c +++ b/src/string.c @@ -16,6 +16,7 @@ #include "string_utils.h" +#include "exess/exess.h" #include "serd/serd.h" #include <assert.h> @@ -105,64 +106,14 @@ serd_strlen(const char* str, SerdNodeFlags* flags) return strlen(str); } -static inline double -read_sign(const char** sptr) -{ - double sign = 1.0; - switch (**sptr) { - case '-': - sign = -1.0; - // fallthru - case '+': - ++(*sptr); - // fallthru - default: - return sign; - } -} - double -serd_strtod(const char* str, char** endptr) +serd_strtod(const char* const str, const char** end) { - double result = 0.0; - - // Point s at the first non-whitespace character - const char* s = str; - while (is_space(*s)) { - ++s; - } - - // Read leading sign if necessary - const double sign = read_sign(&s); - - // Parse integer part - for (; is_digit(*s); ++s) { - result = (result * 10.0) + (*s - '0'); - } - - // Parse fractional part - if (*s == '.') { - double denom = 10.0; - for (++s; is_digit(*s); ++s) { - result += (*s - '0') / denom; - denom *= 10.0; - } - } - - // Parse exponent - if (*s == 'e' || *s == 'E') { - ++s; - double expt = 0.0; - double expt_sign = read_sign(&s); - for (; is_digit(*s); ++s) { - expt = (expt * 10.0) + (*s - '0'); - } - result *= pow(10, expt * expt_sign); - } - - if (endptr) { - *endptr = (char*)s; + double value = (double)NAN; + const ExessResult r = exess_read_double(&value, str); + if (end) { + *end = str + r.count; } - return result * sign; + return r.status ? (double)NAN : value; } |