aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Robillard <d@drobilla.net>2019-10-06 20:40:56 +0200
committerDavid Robillard <d@drobilla.net>2020-06-21 18:12:04 +0200
commita204962e35263d0f0aca8e9b34e5091669c7e057 (patch)
treef31fa1e5fe01a45011db5a6e88f47500cce4f06e
parente7cacc2a5ec010f6cc0bc12bd0bd4657c04236ee (diff)
downloadserd-a204962e35263d0f0aca8e9b34e5091669c7e057.tar.gz
serd-a204962e35263d0f0aca8e9b34e5091669c7e057.tar.bz2
serd-a204962e35263d0f0aca8e9b34e5091669c7e057.zip
Add precise decimal digit generation
-rw-r--r--serd/serd.h18
-rw-r--r--src/decimal.c243
-rw-r--r--src/decimal.h20
-rw-r--r--src/node.c131
-rw-r--r--tests/decimal_test.c50
-rw-r--r--tests/serd_test.c4
6 files changed, 400 insertions, 66 deletions
diff --git a/serd/serd.h b/serd/serd.h
index 8526a9ab..cde9a3c4 100644
--- a/serd/serd.h
+++ b/serd/serd.h
@@ -1,5 +1,5 @@
/*
- Copyright 2011-2017 David Robillard <http://drobilla.net>
+ Copyright 2011-2020 David Robillard <http://drobilla.net>
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above
@@ -702,16 +702,24 @@ serd_new_relative_uri(const char* str,
will be written after the decimal point, but trailing zeros will
automatically be omitted (except one if `d` is a round integer).
- Note that about 16 and 8 fractional digits are required to precisely
- represent a double and float, respectively.
+ Note that 17 and 9 significant digits are required to precisely represent
+ any double and float, respectively. This function may be lossy if
+ `precision` is too low, or if `frac_digits` is not 0 (unlimited).
+
+ This function may produce very long strings, serd_new_float() or
+ serd_new_double() may be a better choice for very large or small numbers.
@param d The value for the new node.
- @param frac_digits The maximum number of digits after the decimal place.
+ @param max_precision Maximum number of significant decimal digits.
+ @param max_frac_digits Maximum number of digits after decimal point, or 0.
@param datatype Datatype of node, or NULL for xsd:decimal.
*/
SERD_API
SerdNode*
-serd_new_decimal(double d, unsigned frac_digits, const SerdNode* datatype);
+serd_new_decimal(double d,
+ unsigned max_precision,
+ unsigned max_frac_digits,
+ const SerdNode* datatype);
/**
Create a new node by serialising `i` into an xsd:integer string
diff --git a/src/decimal.c b/src/decimal.c
index f60eafa7..bdd111a7 100644
--- a/src/decimal.c
+++ b/src/decimal.c
@@ -16,11 +16,14 @@
#include "decimal.h"
+#include "bigint.h"
+#include "ieee_float.h"
#include "int_math.h"
+#include "soft_float.h"
#include <assert.h>
-#include <float.h>
#include <math.h>
+#include <stdbool.h>
#include <stddef.h>
int
@@ -29,60 +32,216 @@ serd_count_digits(const uint64_t i)
return i == 0 ? 1 : (int)serd_ilog10(i) + 1;
}
-unsigned
-serd_double_int_digits(const double abs)
+/*
+ This is more or less just an implementation of the classic rational number
+ based floating point print routine ("Dragon4"). See "How to Print
+ Floating-Point Numbers Accurately" by Guy L. Steele Jr. and Jon L White for
+ the canonical source. The basic idea is to find a big rational between 1 and
+ 10 where value = (numer / denom) * 10^e, then continuously divide it to
+ generate decimal digits.
+
+ Unfortunately, this algorithm requires pretty massive bigints to work
+ correctly for all doubles, and isn't particularly fast. Something like
+ Grisu3 could be added to improve performance, but that has the annoying
+ property of needing a more precise fallback in some cases, meaning it would
+ only add more code, not replace any. Since this is already a pretty
+ ridiculous amount of code, I'll hold off on this until it becomes a problem,
+ or somebody comes up with a better algorithm.
+*/
+
+/// Return true if the number is within the lower boundary
+static bool
+within_lower(const SerdBigint* const numer,
+ const SerdBigint* const d_lower,
+ const bool is_even)
+{
+ return is_even ? serd_bigint_compare(numer, d_lower) <= 0
+ : serd_bigint_compare(numer, d_lower) < 0;
+}
+
+/// Return true if the number is within the upper boundary
+static bool
+within_upper(const SerdBigint* const numer,
+ const SerdBigint* const denom,
+ const SerdBigint* const d_upper,
+ const bool is_even)
{
- const double lg = ceil(log10(floor(abs) + 1.0));
- return lg < 1.0 ? 1U : (unsigned)lg;
+ return is_even ? serd_bigint_plus_compare(numer, d_upper, denom) >= 0
+ : serd_bigint_plus_compare(numer, d_upper, denom) > 0;
}
-unsigned
-serd_decimals(const double d, char* const buf, const unsigned frac_digits)
+/**
+ Find values so that 0.1 <= numer/denom < 1 or 1 <= numer/denom < 10.
+
+ @param significand Double significand.
+ @param exponent Double exponent (base 2).
+ @param decimal_power Decimal exponent (log10 of the double).
+ @param[out] numer Numerator of rational number.
+ @param[out] denom Denominator of rational number.
+ @param[out] delta Distance to the lower and upper boundaries.
+*/
+static void
+calculate_initial_values(const uint64_t significand,
+ const int exponent,
+ const int decimal_power,
+ const bool lower_is_closer,
+ SerdBigint* const numer,
+ SerdBigint* const denom,
+ SerdBigint* const delta)
{
- assert(isfinite(d));
-
- // Point s to decimal point location
- const double abs_d = fabs(d);
- const double int_part = floor(abs_d);
- const unsigned int_digits = serd_double_int_digits(abs_d);
- unsigned n_bytes = 0;
- char* s = buf + int_digits;
- if (d < 0.0) {
- *buf = '-';
- ++s;
+ /* Use a common denominator of 2^1 so that boundary distance is an integer.
+ If the lower boundary is closer, we need to scale everything but the
+ lower boundary to compensate, so add another factor of two here (this is
+ faster than shifting them again later as in the paper). */
+ const unsigned lg_denom = 1u + lower_is_closer;
+
+ if (exponent >= 0) {
+ // delta = 2^e
+ serd_bigint_set_u32(delta, 1);
+ serd_bigint_shift_left(delta, (unsigned)exponent);
+
+ // numer = f * 2^e
+ serd_bigint_set_u64(numer, significand);
+ serd_bigint_shift_left(numer, (unsigned)exponent + lg_denom);
+
+ // denom = 10^d
+ serd_bigint_set_pow10(denom, (unsigned)decimal_power);
+ serd_bigint_shift_left(denom, lg_denom);
+ } else if (decimal_power >= 0) {
+ // delta = 2^e, which is just 1 here since 2^-e is in the denominator
+ serd_bigint_set_u32(delta, 1);
+
+ // numer = f
+ serd_bigint_set_u64(numer, significand);
+ serd_bigint_shift_left(numer, lg_denom);
+
+ // denom = 10^d * 2^-e
+ serd_bigint_set_pow10(denom, (unsigned)decimal_power);
+ serd_bigint_shift_left(denom, (unsigned)-exponent + lg_denom);
+ } else {
+ // delta = 10^d
+ serd_bigint_set_pow10(delta, (unsigned)-decimal_power);
+
+ // numer = f * 10^-d
+ serd_bigint_set(numer, delta);
+ serd_bigint_multiply_u64(numer, significand);
+ serd_bigint_shift_left(numer, lg_denom);
+
+ // denom = 2^-exponent
+ serd_bigint_set_u32(denom, 1);
+ serd_bigint_shift_left(denom, (unsigned)-exponent + lg_denom);
}
+}
- // Write integer part (right to left)
- char* t = s - 1;
- uint64_t dec = (uint64_t)int_part;
- do {
- *t-- = (char)('0' + dec % 10);
- } while ((dec /= 10) > 0);
+#ifndef NDEBUG
+static bool
+check_initial_values(const SerdBigint* const numer,
+ const SerdBigint* const denom,
+ const SerdBigint* const d_upper)
+{
+ SerdBigint upper = *numer;
+ serd_bigint_add(&upper, d_upper);
+ assert(serd_bigint_compare(&upper, denom) >= 0);
+ const uint32_t div = serd_bigint_divmod(&upper, denom);
+ assert(div >= 1 && div < 10);
+ return true;
+}
+#endif
+
+static unsigned
+emit_digits(SerdBigint* const numer,
+ const SerdBigint* const denom,
+ SerdBigint* const d_lower,
+ SerdBigint* const d_upper,
+ const bool is_even,
+ char* const buffer,
+ const size_t max_digits)
+{
+ unsigned length = 0;
+ for (size_t i = 0; i < max_digits; ++i) {
+ // Emit the next digit
+ const uint32_t digit = serd_bigint_divmod(numer, denom);
+ assert(digit <= 9);
+ buffer[length++] = (char)('0' + digit);
+
+ // Check for termination
+ const bool within_low = within_lower(numer, d_lower, is_even);
+ const bool within_high = within_upper(numer, denom, d_upper, is_even);
+ if (!within_low && !within_high) {
+ serd_bigint_multiply_u32(numer, 10);
+ serd_bigint_multiply_u32(d_lower, 10);
+ if (d_lower != d_upper) {
+ serd_bigint_multiply_u32(d_upper, 10);
+ }
+ } else {
+ if (!within_low ||
+ (within_high &&
+ serd_bigint_plus_compare(numer, numer, denom) >= 0)) {
+ // In high only, or halfway and the next digit is > 5, round up
+ assert(buffer[length - 1] != '9');
+ buffer[length - 1]++;
+ }
+
+ break;
+ }
+ }
- *s++ = '.';
+ return length;
+}
- // Write fractional part (right to left)
- double frac_part = fabs(d - int_part);
- if (frac_part < DBL_EPSILON) {
- *s++ = '0';
- n_bytes = (size_t)(s - buf);
+SerdDecimalCount
+serd_decimals(const double d, char* const buf, const unsigned max_digits)
+{
+ assert(isfinite(d) && fpclassify(d) != FP_ZERO);
+
+ const SerdSoftFloat value = soft_float_from_double(d);
+ const int power = (int)(log10(d));
+ const bool is_even = !(value.f & 1);
+ const bool lower_is_closer = double_lower_boundary_is_closer(d);
+
+ // Calculate initial values so that v = (numer / denom) * 10^power
+ SerdBigint numer;
+ SerdBigint denom;
+ SerdBigint d_lower;
+ calculate_initial_values(
+ value.f, value.e, power, lower_is_closer, &numer, &denom, &d_lower);
+
+ SerdBigint d_upper_storage;
+ SerdBigint* d_upper = NULL;
+ if (lower_is_closer) {
+ // Scale upper boundary to account for the closer lower boundary
+ // (the numerator and denominator were already scaled above)
+ d_upper_storage = d_lower;
+ d_upper = &d_upper_storage;
+ serd_bigint_shift_left(d_upper, 1);
} else {
- uint64_t frac = (uint64_t)llround(frac_part * pow(10.0, (int)frac_digits));
- s += frac_digits - 1;
- unsigned i = 0;
+ d_upper = &d_lower; // Boundaries are the same, reuse the lower
+ }
- // Skip trailing zeros
- for (; i < frac_digits - 1 && !(frac % 10); ++i, --s, frac /= 10) {}
+ // Scale if necessary to make 1 <= (numer + delta) / denom < 10
+ SerdDecimalCount count = {0, 0};
+ if (within_upper(&numer, &denom, d_upper, is_even)) {
+ count.expt = power;
+ } else {
+ count.expt = power - 1;
+ serd_bigint_multiply_u32(&numer, 10);
+ serd_bigint_multiply_u32(&d_lower, 10);
+ if (d_upper != &d_lower) {
+ serd_bigint_multiply_u32(d_upper, 10);
+ }
+ }
- n_bytes = (size_t)(s - buf) + 1u;
+ // Write digits to output
+ assert(check_initial_values(&numer, &denom, d_upper));
+ count.count = emit_digits(
+ &numer, &denom, &d_lower, d_upper, is_even, buf, max_digits);
- // Write digits from last trailing zero to decimal point
- for (; i < frac_digits; ++i) {
- *s-- = (char)('0' + (frac % 10));
- frac /= 10;
- }
+ // Trim trailing zeros
+ while (count.count > 1 && buf[count.count - 1] == '0') {
+ buf[--count.count] = 0;
}
- return n_bytes;
+ buf[count.count] = '\0';
+ return count;
}
diff --git a/src/decimal.h b/src/decimal.h
index eefc07ef..8575cd85 100644
--- a/src/decimal.h
+++ b/src/decimal.h
@@ -19,14 +19,26 @@
#include <stdint.h>
+typedef struct {
+ unsigned count; ///< Number of digits
+ int expt; ///< Power of 10 exponent
+} SerdDecimalCount;
+
/// Return the number of decimal digits required to represent `n`
int
serd_count_digits(uint64_t i);
-unsigned
-serd_double_int_digits(double abs);
+/**
+ Write significant decimal digits for `d` into `buf`.
+
+ Writes only significant digits, without any leading or trailing zeros. The
+ actual number is given by the exponent in the return value.
-unsigned
-serd_decimals(double d, char* buf, unsigned frac_digits);
+ @param d The number to convert to decimal, must be finite and non-zero.
+ @param buf The output buffer at least `max_digits` long.
+ @param max_digits The maximum number of digits to write.
+*/
+SerdDecimalCount
+serd_decimals(double d, char* buf, unsigned max_digits);
#endif // SERD_DECIMAL_H
diff --git a/src/node.c b/src/node.c
index 2bd47038..9a0ad3b9 100644
--- a/src/node.c
+++ b/src/node.c
@@ -17,6 +17,7 @@
#include "node.h"
#include "decimal.h"
+#include "int_math.h"
#include "namespaces.h"
#include "static_nodes.h"
#include "string_utils.h"
@@ -30,6 +31,11 @@
#include <stdlib.h>
#include <string.h>
+// Define C11 numeric constants if the compiler hasn't already
+#ifndef DBL_DECIMAL_DIG
+# define DBL_DECIMAL_DIG 17
+#endif
+
static const size_t serd_node_align = sizeof(SerdNode);
static SerdNode*
@@ -566,25 +572,126 @@ serd_new_relative_uri(const char* str,
return node;
}
-SerdNode*
-serd_new_decimal(double d, unsigned frac_digits, const SerdNode* datatype)
+static size_t
+copy_digits(char* const dest, const char* const src, const size_t n)
+{
+ memcpy(dest, src, n);
+ return n;
+}
+
+static size_t
+set_zeros(char* const dest, const size_t n)
{
- if (!isfinite(d)) {
+ memset(dest, '0', n);
+ return n;
+}
+
+typedef enum { POINT_AFTER, POINT_BEFORE, POINT_BETWEEN } PointLocation;
+
+typedef struct {
+ PointLocation point_loc; ///< Location of decimal point
+ unsigned n_zeros_before; ///< Number of additional zeros before point
+ unsigned n_zeros_after; ///< Number of additional zeros after point
+ unsigned n_digits; ///< Number of significant digits
+} SerdDecimalMetrics;
+
+static SerdDecimalMetrics
+get_decimal_metrics(const SerdDecimalCount count)
+{
+ const int expt =
+ count.expt >= 0 ? (count.expt - (int)count.count + 1) : count.expt;
+
+ const unsigned n_digits = count.count;
+ SerdDecimalMetrics metrics = {POINT_AFTER, 0, 0, n_digits};
+ if (count.expt >= (int)n_digits - 1) {
+ metrics.point_loc = POINT_AFTER;
+ metrics.n_zeros_before = (unsigned)count.expt - (count.count - 1u);
+ metrics.n_zeros_after = 1u;
+ } else if (count.expt < 0) {
+ metrics.point_loc = POINT_BEFORE;
+ metrics.n_zeros_before = 1u;
+ metrics.n_zeros_after = (unsigned)(-expt - 1);
+ } else {
+ metrics.point_loc = POINT_BETWEEN;
+ }
+
+ return metrics;
+}
+
+SerdNode*
+serd_new_decimal(const double d,
+ const unsigned max_precision,
+ const unsigned max_frac_digits,
+ const SerdNode* datatype)
+{
+ const SerdNode* const type = datatype ? datatype : &serd_xsd_decimal.node;
+ const int fpclass = fpclassify(d);
+
+ if (fpclass == FP_ZERO) {
+ return signbit(d) ? serd_new_typed_literal("-0.0", type)
+ : serd_new_typed_literal("0.0", type);
+ } else if (fpclass != FP_NORMAL && fpclass != FP_SUBNORMAL) {
return NULL;
}
- const SerdNode* type = datatype ? datatype : &serd_xsd_decimal.node;
- const double abs_d = fabs(d);
- const unsigned int_digits = serd_double_int_digits(abs_d);
- const size_t len = int_digits + frac_digits + 3;
- const size_t type_len = serd_node_total_size(type);
- const size_t total_len = len + type_len;
+ // Adjust precision to get the right number of fractional digits
+ unsigned precision = max_precision;
+ if (max_frac_digits) {
+ const int order = (int)(log10(fabs(d)) + 1);
+ const int required_precision = (int)max_frac_digits + order;
- SerdNode* const node =
- serd_node_malloc(total_len, SERD_HAS_DATATYPE, SERD_LITERAL);
+ precision = (unsigned)MIN((int)max_precision,
+ MAX(0, required_precision));
+ }
+
+ if (precision == 0) {
+ return serd_new_typed_literal("0.0", type);
+ }
+
+ // Get decimal digits and measure
+ char digits[DBL_DECIMAL_DIG + 1] = {0};
+ const SerdDecimalCount count = serd_decimals(fabs(d), digits, precision);
+ SerdDecimalMetrics m = get_decimal_metrics(count);
+
+ // Calculate string length and allocate node
+ const unsigned n_zeros = m.n_zeros_before + m.n_zeros_after;
+ const size_t len = (d < 0) + m.n_digits + 1 + n_zeros;
+ const size_t type_len = serd_node_total_size(type);
+ SerdNode* const node =
+ serd_node_malloc(len + type_len, SERD_HAS_DATATYPE, SERD_LITERAL);
+
+ char* ptr = serd_node_buffer(node);
+ if (d < 0) {
+ *ptr++ = '-';
+ }
+
+ if (m.point_loc == POINT_AFTER) {
+ ptr += copy_digits(ptr, digits, m.n_digits);
+ ptr += set_zeros(ptr, m.n_zeros_before);
+ *ptr++ = '.';
+ *ptr++ = '0';
+ } else if (m.point_loc == POINT_BEFORE) {
+ *ptr++ = '0';
+ *ptr++ = '.';
+ ptr += set_zeros(ptr, m.n_zeros_after);
+ copy_digits(ptr, digits, m.n_digits);
+ } else {
+ assert(m.point_loc == POINT_BETWEEN);
+ assert(count.expt >= -1);
+
+ const size_t n_before = (size_t)count.expt + 1u;
+ const size_t n_after =
+ (max_frac_digits ? MIN(max_frac_digits, m.n_digits - n_before)
+ : m.n_digits - n_before);
- node->n_bytes = serd_decimals(d, serd_node_buffer(node), frac_digits);
+ ptr += copy_digits(ptr, digits, n_before);
+ *ptr++ = '.';
+ memcpy(ptr, digits + n_before, n_after);
+ }
+ assert(strlen(serd_node_buffer(node)) == len);
+ node->n_bytes = len;
+ assert(serd_node_get_meta(node)->type == 0);
memcpy(serd_node_get_meta(node), type, type_len);
serd_node_check_padding(node);
return node;
diff --git a/tests/decimal_test.c b/tests/decimal_test.c
index b0361e2c..4fb521a5 100644
--- a/tests/decimal_test.c
+++ b/tests/decimal_test.c
@@ -1,5 +1,5 @@
/*
- Copyright 2011-2019 David Robillard <http://drobilla.net>
+ Copyright 2011-2020 David Robillard <http://drobilla.net>
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above
@@ -18,7 +18,13 @@
#include "../src/decimal.h"
+#include "serd/serd.h"
+
#include <assert.h>
+#include <math.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <string.h>
static void
test_count_digits(void)
@@ -48,8 +54,50 @@ test_count_digits(void)
assert(20 == serd_count_digits(18446744073709551615ull));
}
+static void
+check_precision(const double d,
+ const unsigned precision,
+ const unsigned frac_digits,
+ const char* expected)
+{
+ SerdNode* const node = serd_new_decimal(d, precision, frac_digits, NULL);
+ const char* str = serd_node_get_string(node);
+
+ if (strcmp(str, expected)) {
+ fprintf(stderr, "error: string is \"%s\"\n", str);
+ fprintf(stderr, "note: expected \"%s\"\n", expected);
+ assert(false);
+ }
+
+ serd_node_free(node);
+}
+
+static void
+test_precision(void)
+{
+ assert(serd_new_decimal((double)INFINITY, 17, 0, NULL) == NULL);
+ assert(serd_new_decimal((double)-INFINITY, 17, 0, NULL) == NULL);
+ assert(serd_new_decimal((double)NAN, 17, 0, NULL) == NULL);
+
+ check_precision(1.0000000001, 17, 8, "1.0");
+ check_precision(0.0000000001, 17, 10, "0.0000000001");
+ check_precision(0.0000000001, 17, 8, "0.0");
+
+ check_precision(12345.678900, 9, 5, "12345.6789");
+ check_precision(12345.678900, 8, 5, "12345.678");
+ check_precision(12345.678900, 5, 5, "12345.0");
+ check_precision(12345.678900, 3, 5, "12300.0");
+
+ check_precision(12345.678900, 9, 0, "12345.6789");
+ check_precision(12345.678900, 9, 5, "12345.6789");
+ check_precision(12345.678900, 9, 3, "12345.678");
+ check_precision(12345.678900, 9, 1, "12345.6");
+}
+
int
main(void)
{
test_count_digits();
+ test_precision();
+ return 0;
}
diff --git a/tests/serd_test.c b/tests/serd_test.c
index 02527a51..64c4530b 100644
--- a/tests/serd_test.c
+++ b/tests/serd_test.c
@@ -300,13 +300,13 @@ test_double_to_node(void)
"0.01",
"2.05",
"-16.00001",
- "5.00000001",
+ "5.0",
"0.0",
NULL,
NULL };
for (size_t i = 0; i < sizeof(dbl_test_nums) / sizeof(double); ++i) {
- SerdNode* node = serd_new_decimal(dbl_test_nums[i], 8, NULL);
+ SerdNode* node = serd_new_decimal(dbl_test_nums[i], 17, 8, NULL);
const char* node_str = serd_node_get_string(node);
const bool pass = (node_str && dbl_test_strs[i])
? !strcmp(node_str, dbl_test_strs[i])