diff options
-rw-r--r-- | serd/serd.h | 16 | ||||
-rw-r--r-- | src/decimal.c | 244 | ||||
-rw-r--r-- | src/decimal.h | 20 | ||||
-rw-r--r-- | src/node.c | 131 | ||||
-rw-r--r-- | tests/decimal_test.c | 50 | ||||
-rw-r--r-- | tests/serd_test.c | 4 |
6 files changed, 400 insertions, 65 deletions
diff --git a/serd/serd.h b/serd/serd.h index e76aaa44..9dbbffa0 100644 --- a/serd/serd.h +++ b/serd/serd.h @@ -705,16 +705,24 @@ serd_new_relative_uri(const char* str, will be written after the decimal point, but trailing zeros will automatically be omitted (except one if `d` is a round integer). - Note that about 16 and 8 fractional digits are required to precisely - represent a double and float, respectively. + Note that 17 and 9 significant digits are required to precisely represent + any double and float, respectively. This function may be lossy if + `precision` is too low, or if `frac_digits` is not 0 (unlimited). + + This function may produce very long strings, serd_new_float() or + serd_new_double() may be a better choice for very large or small numbers. @param d The value for the new node. - @param frac_digits The maximum number of digits after the decimal place. + @param max_precision Maximum number of significant decimal digits. + @param max_frac_digits Maximum number of digits after decimal point, or 0. @param datatype Datatype of node, or NULL for xsd:decimal. */ SERD_API SerdNode* -serd_new_decimal(double d, unsigned frac_digits, const SerdNode* datatype); +serd_new_decimal(double d, + unsigned max_precision, + unsigned max_frac_digits, + const SerdNode* datatype); /** Create a new node by serialising `i` into an xsd:integer string diff --git a/src/decimal.c b/src/decimal.c index 6c0bad59..bdd111a7 100644 --- a/src/decimal.c +++ b/src/decimal.c @@ -16,11 +16,15 @@ #include "decimal.h" +#include "bigint.h" +#include "ieee_float.h" #include "int_math.h" +#include "soft_float.h" #include <assert.h> -#include <float.h> #include <math.h> +#include <stdbool.h> +#include <stddef.h> int serd_count_digits(const uint64_t i) @@ -28,60 +32,216 @@ serd_count_digits(const uint64_t i) return i == 0 ? 1 : (int)serd_ilog10(i) + 1; } -unsigned -serd_double_int_digits(const double abs) +/* + This is more or less just an implementation of the classic rational number + based floating point print routine ("Dragon4"). See "How to Print + Floating-Point Numbers Accurately" by Guy L. Steele Jr. and Jon L White for + the canonical source. The basic idea is to find a big rational between 1 and + 10 where value = (numer / denom) * 10^e, then continuously divide it to + generate decimal digits. + + Unfortunately, this algorithm requires pretty massive bigints to work + correctly for all doubles, and isn't particularly fast. Something like + Grisu3 could be added to improve performance, but that has the annoying + property of needing a more precise fallback in some cases, meaning it would + only add more code, not replace any. Since this is already a pretty + ridiculous amount of code, I'll hold off on this until it becomes a problem, + or somebody comes up with a better algorithm. +*/ + +/// Return true if the number is within the lower boundary +static bool +within_lower(const SerdBigint* const numer, + const SerdBigint* const d_lower, + const bool is_even) +{ + return is_even ? serd_bigint_compare(numer, d_lower) <= 0 + : serd_bigint_compare(numer, d_lower) < 0; +} + +/// Return true if the number is within the upper boundary +static bool +within_upper(const SerdBigint* const numer, + const SerdBigint* const denom, + const SerdBigint* const d_upper, + const bool is_even) { - const double lg = ceil(log10(floor(abs) + 1.0)); - return lg < 1.0 ? 1U : (unsigned)lg; + return is_even ? serd_bigint_plus_compare(numer, d_upper, denom) >= 0 + : serd_bigint_plus_compare(numer, d_upper, denom) > 0; } -unsigned -serd_decimals(const double d, char* const buf, const unsigned frac_digits) +/** + Find values so that 0.1 <= numer/denom < 1 or 1 <= numer/denom < 10. + + @param significand Double significand. + @param exponent Double exponent (base 2). + @param decimal_power Decimal exponent (log10 of the double). + @param[out] numer Numerator of rational number. + @param[out] denom Denominator of rational number. + @param[out] delta Distance to the lower and upper boundaries. +*/ +static void +calculate_initial_values(const uint64_t significand, + const int exponent, + const int decimal_power, + const bool lower_is_closer, + SerdBigint* const numer, + SerdBigint* const denom, + SerdBigint* const delta) { - assert(isfinite(d)); - - // Point s to decimal point location - const double abs_d = fabs(d); - const double int_part = floor(abs_d); - const unsigned int_digits = serd_double_int_digits(abs_d); - unsigned n_bytes = 0; - char* s = buf + int_digits; - if (d < 0.0) { - *buf = '-'; - ++s; + /* Use a common denominator of 2^1 so that boundary distance is an integer. + If the lower boundary is closer, we need to scale everything but the + lower boundary to compensate, so add another factor of two here (this is + faster than shifting them again later as in the paper). */ + const unsigned lg_denom = 1u + lower_is_closer; + + if (exponent >= 0) { + // delta = 2^e + serd_bigint_set_u32(delta, 1); + serd_bigint_shift_left(delta, (unsigned)exponent); + + // numer = f * 2^e + serd_bigint_set_u64(numer, significand); + serd_bigint_shift_left(numer, (unsigned)exponent + lg_denom); + + // denom = 10^d + serd_bigint_set_pow10(denom, (unsigned)decimal_power); + serd_bigint_shift_left(denom, lg_denom); + } else if (decimal_power >= 0) { + // delta = 2^e, which is just 1 here since 2^-e is in the denominator + serd_bigint_set_u32(delta, 1); + + // numer = f + serd_bigint_set_u64(numer, significand); + serd_bigint_shift_left(numer, lg_denom); + + // denom = 10^d * 2^-e + serd_bigint_set_pow10(denom, (unsigned)decimal_power); + serd_bigint_shift_left(denom, (unsigned)-exponent + lg_denom); + } else { + // delta = 10^d + serd_bigint_set_pow10(delta, (unsigned)-decimal_power); + + // numer = f * 10^-d + serd_bigint_set(numer, delta); + serd_bigint_multiply_u64(numer, significand); + serd_bigint_shift_left(numer, lg_denom); + + // denom = 2^-exponent + serd_bigint_set_u32(denom, 1); + serd_bigint_shift_left(denom, (unsigned)-exponent + lg_denom); } +} - // Write integer part (right to left) - char* t = s - 1; - uint64_t dec = (uint64_t)int_part; - do { - *t-- = (char)('0' + dec % 10); - } while ((dec /= 10) > 0); +#ifndef NDEBUG +static bool +check_initial_values(const SerdBigint* const numer, + const SerdBigint* const denom, + const SerdBigint* const d_upper) +{ + SerdBigint upper = *numer; + serd_bigint_add(&upper, d_upper); + assert(serd_bigint_compare(&upper, denom) >= 0); + const uint32_t div = serd_bigint_divmod(&upper, denom); + assert(div >= 1 && div < 10); + return true; +} +#endif + +static unsigned +emit_digits(SerdBigint* const numer, + const SerdBigint* const denom, + SerdBigint* const d_lower, + SerdBigint* const d_upper, + const bool is_even, + char* const buffer, + const size_t max_digits) +{ + unsigned length = 0; + for (size_t i = 0; i < max_digits; ++i) { + // Emit the next digit + const uint32_t digit = serd_bigint_divmod(numer, denom); + assert(digit <= 9); + buffer[length++] = (char)('0' + digit); + + // Check for termination + const bool within_low = within_lower(numer, d_lower, is_even); + const bool within_high = within_upper(numer, denom, d_upper, is_even); + if (!within_low && !within_high) { + serd_bigint_multiply_u32(numer, 10); + serd_bigint_multiply_u32(d_lower, 10); + if (d_lower != d_upper) { + serd_bigint_multiply_u32(d_upper, 10); + } + } else { + if (!within_low || + (within_high && + serd_bigint_plus_compare(numer, numer, denom) >= 0)) { + // In high only, or halfway and the next digit is > 5, round up + assert(buffer[length - 1] != '9'); + buffer[length - 1]++; + } + + break; + } + } - *s++ = '.'; + return length; +} - // Write fractional part (right to left) - double frac_part = fabs(d - int_part); - if (frac_part < DBL_EPSILON) { - *s++ = '0'; - n_bytes = (unsigned)(s - buf); +SerdDecimalCount +serd_decimals(const double d, char* const buf, const unsigned max_digits) +{ + assert(isfinite(d) && fpclassify(d) != FP_ZERO); + + const SerdSoftFloat value = soft_float_from_double(d); + const int power = (int)(log10(d)); + const bool is_even = !(value.f & 1); + const bool lower_is_closer = double_lower_boundary_is_closer(d); + + // Calculate initial values so that v = (numer / denom) * 10^power + SerdBigint numer; + SerdBigint denom; + SerdBigint d_lower; + calculate_initial_values( + value.f, value.e, power, lower_is_closer, &numer, &denom, &d_lower); + + SerdBigint d_upper_storage; + SerdBigint* d_upper = NULL; + if (lower_is_closer) { + // Scale upper boundary to account for the closer lower boundary + // (the numerator and denominator were already scaled above) + d_upper_storage = d_lower; + d_upper = &d_upper_storage; + serd_bigint_shift_left(d_upper, 1); } else { - uint64_t frac = (uint64_t)llround(frac_part * pow(10.0, (int)frac_digits)); - s += frac_digits - 1; - unsigned i = 0; + d_upper = &d_lower; // Boundaries are the same, reuse the lower + } - // Skip trailing zeros - for (; i < frac_digits - 1 && !(frac % 10); ++i, --s, frac /= 10) {} + // Scale if necessary to make 1 <= (numer + delta) / denom < 10 + SerdDecimalCount count = {0, 0}; + if (within_upper(&numer, &denom, d_upper, is_even)) { + count.expt = power; + } else { + count.expt = power - 1; + serd_bigint_multiply_u32(&numer, 10); + serd_bigint_multiply_u32(&d_lower, 10); + if (d_upper != &d_lower) { + serd_bigint_multiply_u32(d_upper, 10); + } + } - n_bytes = (unsigned)(s - buf) + 1u; + // Write digits to output + assert(check_initial_values(&numer, &denom, d_upper)); + count.count = emit_digits( + &numer, &denom, &d_lower, d_upper, is_even, buf, max_digits); - // Write digits from last trailing zero to decimal point - for (; i < frac_digits; ++i) { - *s-- = (char)('0' + (frac % 10)); - frac /= 10; - } + // Trim trailing zeros + while (count.count > 1 && buf[count.count - 1] == '0') { + buf[--count.count] = 0; } - return n_bytes; + buf[count.count] = '\0'; + return count; } diff --git a/src/decimal.h b/src/decimal.h index eefc07ef..8575cd85 100644 --- a/src/decimal.h +++ b/src/decimal.h @@ -19,14 +19,26 @@ #include <stdint.h> +typedef struct { + unsigned count; ///< Number of digits + int expt; ///< Power of 10 exponent +} SerdDecimalCount; + /// Return the number of decimal digits required to represent `n` int serd_count_digits(uint64_t i); -unsigned -serd_double_int_digits(double abs); +/** + Write significant decimal digits for `d` into `buf`. + + Writes only significant digits, without any leading or trailing zeros. The + actual number is given by the exponent in the return value. -unsigned -serd_decimals(double d, char* buf, unsigned frac_digits); + @param d The number to convert to decimal, must be finite and non-zero. + @param buf The output buffer at least `max_digits` long. + @param max_digits The maximum number of digits to write. +*/ +SerdDecimalCount +serd_decimals(double d, char* buf, unsigned max_digits); #endif // SERD_DECIMAL_H @@ -17,6 +17,7 @@ #include "node.h" #include "decimal.h" +#include "int_math.h" #include "namespaces.h" #include "static_nodes.h" #include "string_utils.h" @@ -32,6 +33,11 @@ #include <stdlib.h> #include <string.h> +// Define C11 numeric constants if the compiler hasn't already +#ifndef DBL_DECIMAL_DIG +# define DBL_DECIMAL_DIG 17 +#endif + static SerdNode* serd_new_from_uri(const SerdURI* uri, const SerdURI* base); @@ -565,25 +571,126 @@ serd_new_relative_uri(const char* str, return node; } -SerdNode* -serd_new_decimal(double d, unsigned frac_digits, const SerdNode* datatype) +static size_t +copy_digits(char* const dest, const char* const src, const size_t n) +{ + memcpy(dest, src, n); + return n; +} + +static size_t +set_zeros(char* const dest, const size_t n) { - if (!isfinite(d)) { + memset(dest, '0', n); + return n; +} + +typedef enum { POINT_AFTER, POINT_BEFORE, POINT_BETWEEN } PointLocation; + +typedef struct { + PointLocation point_loc; ///< Location of decimal point + unsigned n_zeros_before; ///< Number of additional zeros before point + unsigned n_zeros_after; ///< Number of additional zeros after point + unsigned n_digits; ///< Number of significant digits +} SerdDecimalMetrics; + +static SerdDecimalMetrics +get_decimal_metrics(const SerdDecimalCount count) +{ + const int expt = + count.expt >= 0 ? (count.expt - (int)count.count + 1) : count.expt; + + const unsigned n_digits = count.count; + SerdDecimalMetrics metrics = {POINT_AFTER, 0, 0, n_digits}; + if (count.expt >= (int)n_digits - 1) { + metrics.point_loc = POINT_AFTER; + metrics.n_zeros_before = (unsigned)count.expt - (count.count - 1u); + metrics.n_zeros_after = 1u; + } else if (count.expt < 0) { + metrics.point_loc = POINT_BEFORE; + metrics.n_zeros_before = 1u; + metrics.n_zeros_after = (unsigned)(-expt - 1); + } else { + metrics.point_loc = POINT_BETWEEN; + } + + return metrics; +} + +SerdNode* +serd_new_decimal(const double d, + const unsigned max_precision, + const unsigned max_frac_digits, + const SerdNode* datatype) +{ + const SerdNode* const type = datatype ? datatype : &serd_xsd_decimal.node; + const int fpclass = fpclassify(d); + + if (fpclass == FP_ZERO) { + return signbit(d) ? serd_new_typed_literal("-0.0", type) + : serd_new_typed_literal("0.0", type); + } else if (fpclass != FP_NORMAL && fpclass != FP_SUBNORMAL) { return NULL; } - const SerdNode* type = datatype ? datatype : &serd_xsd_decimal.node; - const double abs_d = fabs(d); - const unsigned int_digits = serd_double_int_digits(abs_d); - const size_t len = int_digits + frac_digits + 3; - const size_t type_len = serd_node_total_size(type); - const size_t total_len = len + type_len; + // Adjust precision to get the right number of fractional digits + unsigned precision = max_precision; + if (max_frac_digits) { + const int order = (int)(log10(fabs(d)) + 1); + const int required_precision = (int)max_frac_digits + order; - SerdNode* const node = - serd_node_malloc(total_len, SERD_HAS_DATATYPE, SERD_LITERAL); + precision = (unsigned)MIN((int)max_precision, + MAX(0, required_precision)); + } + + if (precision == 0) { + return serd_new_typed_literal("0.0", type); + } + + // Get decimal digits and measure + char digits[DBL_DECIMAL_DIG + 1] = {0}; + const SerdDecimalCount count = serd_decimals(fabs(d), digits, precision); + SerdDecimalMetrics m = get_decimal_metrics(count); + + // Calculate string length and allocate node + const unsigned n_zeros = m.n_zeros_before + m.n_zeros_after; + const size_t len = (d < 0) + m.n_digits + 1 + n_zeros; + const size_t type_len = serd_node_total_size(type); + SerdNode* const node = + serd_node_malloc(len + type_len, SERD_HAS_DATATYPE, SERD_LITERAL); + + char* ptr = serd_node_buffer(node); + if (d < 0) { + *ptr++ = '-'; + } + + if (m.point_loc == POINT_AFTER) { + ptr += copy_digits(ptr, digits, m.n_digits); + ptr += set_zeros(ptr, m.n_zeros_before); + *ptr++ = '.'; + *ptr++ = '0'; + } else if (m.point_loc == POINT_BEFORE) { + *ptr++ = '0'; + *ptr++ = '.'; + ptr += set_zeros(ptr, m.n_zeros_after); + copy_digits(ptr, digits, m.n_digits); + } else { + assert(m.point_loc == POINT_BETWEEN); + assert(count.expt >= -1); + + const size_t n_before = (size_t)count.expt + 1u; + const size_t n_after = + (max_frac_digits ? MIN(max_frac_digits, m.n_digits - n_before) + : m.n_digits - n_before); - node->n_bytes = serd_decimals(d, serd_node_buffer(node), frac_digits); + ptr += copy_digits(ptr, digits, n_before); + *ptr++ = '.'; + memcpy(ptr, digits + n_before, n_after); + } + assert(strlen(serd_node_buffer(node)) == len); + node->n_bytes = len; + assert(serd_node_meta(node)->type == 0); memcpy(serd_node_meta(node), type, type_len); serd_node_check_padding(node); return node; diff --git a/tests/decimal_test.c b/tests/decimal_test.c index b0361e2c..674ce5cb 100644 --- a/tests/decimal_test.c +++ b/tests/decimal_test.c @@ -1,5 +1,5 @@ /* - Copyright 2011-2019 David Robillard <http://drobilla.net> + Copyright 2011-2020 David Robillard <http://drobilla.net> Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above @@ -18,7 +18,13 @@ #include "../src/decimal.h" +#include "serd/serd.h" + #include <assert.h> +#include <math.h> +#include <stdbool.h> +#include <stdio.h> +#include <string.h> static void test_count_digits(void) @@ -48,8 +54,50 @@ test_count_digits(void) assert(20 == serd_count_digits(18446744073709551615ull)); } +static void +check_precision(const double d, + const unsigned precision, + const unsigned frac_digits, + const char* expected) +{ + SerdNode* const node = serd_new_decimal(d, precision, frac_digits, NULL); + const char* str = serd_node_string(node); + + if (strcmp(str, expected)) { + fprintf(stderr, "error: string is \"%s\"\n", str); + fprintf(stderr, "note: expected \"%s\"\n", expected); + assert(false); + } + + serd_node_free(node); +} + +static void +test_precision(void) +{ + assert(serd_new_decimal((double)INFINITY, 17, 0, NULL) == NULL); + assert(serd_new_decimal((double)-INFINITY, 17, 0, NULL) == NULL); + assert(serd_new_decimal((double)NAN, 17, 0, NULL) == NULL); + + check_precision(1.0000000001, 17, 8, "1.0"); + check_precision(0.0000000001, 17, 10, "0.0000000001"); + check_precision(0.0000000001, 17, 8, "0.0"); + + check_precision(12345.678900, 9, 5, "12345.6789"); + check_precision(12345.678900, 8, 5, "12345.678"); + check_precision(12345.678900, 5, 5, "12345.0"); + check_precision(12345.678900, 3, 5, "12300.0"); + + check_precision(12345.678900, 9, 0, "12345.6789"); + check_precision(12345.678900, 9, 5, "12345.6789"); + check_precision(12345.678900, 9, 3, "12345.678"); + check_precision(12345.678900, 9, 1, "12345.6"); +} + int main(void) { test_count_digits(); + test_precision(); + return 0; } diff --git a/tests/serd_test.c b/tests/serd_test.c index 599e948a..81fb0322 100644 --- a/tests/serd_test.c +++ b/tests/serd_test.c @@ -310,13 +310,13 @@ test_double_to_node(void) "0.01", "2.05", "-16.00001", - "5.00000001", + "5.0", "0.0", NULL, NULL }; for (size_t i = 0; i < sizeof(dbl_test_nums) / sizeof(double); ++i) { - SerdNode* node = serd_new_decimal(dbl_test_nums[i], 8, NULL); + SerdNode* node = serd_new_decimal(dbl_test_nums[i], 17, 8, NULL); const char* node_str = serd_node_string(node); const bool pass = (node_str && dbl_test_strs[i]) ? !strcmp(node_str, dbl_test_strs[i]) |