aboutsummaryrefslogtreecommitdiffstats
path: root/subprojects/exess/src/strtod.c
diff options
context:
space:
mode:
Diffstat (limited to 'subprojects/exess/src/strtod.c')
-rw-r--r--subprojects/exess/src/strtod.c405
1 files changed, 405 insertions, 0 deletions
diff --git a/subprojects/exess/src/strtod.c b/subprojects/exess/src/strtod.c
new file mode 100644
index 00000000..55a0b082
--- /dev/null
+++ b/subprojects/exess/src/strtod.c
@@ -0,0 +1,405 @@
+/*
+ Copyright 2019-2021 David Robillard <d@drobilla.net>
+
+ Permission to use, copy, modify, and/or distribute this software for any
+ purpose with or without fee is hereby granted, provided that the above
+ copyright notice and this permission notice appear in all copies.
+
+ THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+*/
+
+#include "strtod.h"
+#include "bigint.h"
+#include "decimal.h"
+#include "ieee_float.h"
+#include "int_math.h"
+#include "macros.h"
+#include "read_utils.h"
+#include "soft_float.h"
+#include "string_utils.h"
+
+#include <assert.h>
+#include <float.h>
+#include <math.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <string.h>
+
+static inline int
+read_sign(const char** const sptr)
+{
+ if (**sptr == '-') {
+ ++(*sptr);
+ return -1;
+ }
+
+ if (**sptr == '+') {
+ ++(*sptr);
+ }
+
+ return 1;
+}
+
+ExessResult
+parse_decimal(ExessDecimalDouble* const out, const char* const str)
+{
+ memset(out, 0, sizeof(*out));
+
+ // Read leading sign if necessary
+ const char* s = str;
+ const int sign = read_sign(&s);
+ int n_leading_before = 0;
+
+ out->kind = (sign < 0) ? EXESS_NEGATIVE : EXESS_POSITIVE;
+
+ // Check that the first character is valid
+ if (*s != '.' && !is_digit(*s)) {
+ return result(EXESS_EXPECTED_DIGIT, (size_t)(s - str));
+ }
+
+ // Skip leading zeros before decimal point
+ while (*s == '0') {
+ ++s;
+ ++n_leading_before;
+ }
+
+ // Skip leading zeros after decimal point
+ int n_leading_after = 0; // Zeros skipped after decimal point
+ bool after_point = false; // True if we are after the decimal point
+ if (*s == '.') {
+ after_point = true;
+ for (++s; *s == '0'; ++s) {
+ ++n_leading_after;
+ }
+ }
+
+ // Read significant digits of the mantissa into a 64-bit integer
+ uint64_t frac = 0; // Fraction value (ignoring decimal point)
+ int n_before = 0; // Number of digits before decimal point
+ int n_after = 0; // Number of digits after decimal point
+ for (; out->n_digits < DBL_DECIMAL_DIG + 1; ++s) {
+ if (is_digit(*s)) {
+ frac = (frac * 10) + (unsigned)(*s - '0');
+ n_before += !after_point;
+ n_after += after_point;
+ out->digits[out->n_digits++] = *s;
+ } else if (*s == '.' && !after_point) {
+ after_point = true;
+ } else {
+ break;
+ }
+ }
+
+ // Skip extra digits
+ int n_extra_before = 0;
+ int n_extra_after = 0;
+ for (;; ++s) {
+ if (*s == '.' && !after_point) {
+ after_point = true;
+ } else if (is_digit(*s)) {
+ n_extra_before += !after_point;
+ n_extra_after += after_point;
+ } else {
+ break;
+ }
+ }
+
+ // Calculate final output exponent
+ out->expt = n_extra_before - n_after - n_leading_after;
+
+ if (out->n_digits == 0) {
+ out->kind =
+ out->kind == EXESS_NEGATIVE ? EXESS_NEGATIVE_ZERO : EXESS_POSITIVE_ZERO;
+ }
+
+ return result(EXESS_SUCCESS, (size_t)(s - str));
+}
+
+ExessResult
+parse_double(ExessDecimalDouble* const out, const char* const str)
+{
+ memset(out, 0, sizeof(*out));
+
+ // Handle non-numeric special cases
+
+ if (!strcmp(str, "NaN")) {
+ out->kind = EXESS_NAN;
+ return result(EXESS_SUCCESS, 3u);
+ }
+
+ if (!strcmp(str, "-INF")) {
+ out->kind = EXESS_NEGATIVE_INFINITY;
+ return result(EXESS_SUCCESS, 4u);
+ }
+
+ if (!strcmp(str, "INF")) {
+ out->kind = EXESS_POSITIVE_INFINITY;
+ return result(EXESS_SUCCESS, 3u);
+ }
+
+ if (!strcmp(str, "+INF")) {
+ out->kind = EXESS_POSITIVE_INFINITY;
+ return result(EXESS_SUCCESS, 4u);
+ }
+
+ // Read mantissa as a decimal
+ const ExessResult r = parse_decimal(out, str);
+ if (r.status) {
+ return r;
+ }
+
+ const char* s = str + r.count;
+
+ // Read exponent
+ int abs_expt = 0;
+ int expt_sign = 1;
+ if (*s == 'e' || *s == 'E') {
+ ++s;
+
+ if (*s != '-' && *s != '+' && !is_digit(*s)) {
+ return result(EXESS_EXPECTED_DIGIT, (size_t)(s - str));
+ }
+
+ expt_sign = read_sign(&s);
+ while (is_digit(*s)) {
+ abs_expt = (abs_expt * 10) + (*s++ - '0');
+ }
+ }
+
+ // Calculate final output exponent
+ out->expt += expt_sign * abs_expt;
+
+ if (out->n_digits == 0) {
+ out->kind = out->kind < EXESS_POSITIVE_ZERO ? EXESS_NEGATIVE_ZERO
+ : EXESS_POSITIVE_ZERO;
+ }
+
+ return result(EXESS_SUCCESS, (size_t)(s - str));
+}
+
+static uint64_t
+normalize(ExessSoftFloat* value, const uint64_t error)
+{
+ const int original_e = value->e;
+
+ *value = soft_float_normalize(*value);
+
+ assert(value->e <= original_e);
+ return error << (unsigned)(original_e - value->e);
+}
+
+/**
+ Return the error added by floating point multiplication.
+
+ Should be l + r + l*r/(2^64) + 0.5, but we short the denominator to 63 due
+ to lack of precision, which effectively rounds up.
+*/
+static inline uint64_t
+product_error(const uint64_t lerror,
+ const uint64_t rerror,
+ const uint64_t half_ulp)
+{
+ return lerror + rerror + ((lerror * rerror) >> 63u) + half_ulp;
+}
+
+/**
+ Guess the binary floating point value for decimal input.
+
+ @param significand Significand from the input.
+ @param expt10 Decimal exponent from the input.
+ @param n_digits Number of decimal digits in the significand.
+ @param[out] guess Either the exact number, or its predecessor.
+ @return True if `guess` is correct.
+*/
+static bool
+sftod(const uint64_t significand,
+ const int expt10,
+ const int n_digits,
+ ExessSoftFloat* const guess)
+{
+ assert(expt10 <= max_dec_expt);
+ assert(expt10 >= min_dec_expt);
+
+ /* The general idea here is to try and find a power of 10 that we can
+ multiply by the significand to get the number. We get one from the
+ cache which is possibly too small, then multiply by another power of 10
+ to make up the difference if necessary. For example, with a target
+ power of 10^70, if we get 10^68 from the cache, then we multiply again
+ by 10^2. This, as well as normalization, accumulates error, which is
+ tracked throughout to know if we got the precise number. */
+
+ // Use a common denominator of 2^3 to avoid fractions
+ static const unsigned lg_denom = 3;
+ static const uint64_t denom = 1u << 3u;
+ static const uint64_t half_ulp = 4u;
+
+ // Start out with just the significand, and no error
+ ExessSoftFloat input = {significand, 0};
+ uint64_t error = normalize(&input, 0);
+
+ // Get a power of 10 that takes us most of the way without overshooting
+ int cached_expt10 = 0;
+ ExessSoftFloat pow10 = soft_float_pow10_under(expt10, &cached_expt10);
+
+ // Get an exact fixup power if necessary
+ const int d_expt10 = expt10 - cached_expt10;
+ if (d_expt10) {
+ input = soft_float_multiply(input, soft_float_exact_pow10(d_expt10));
+ if (d_expt10 > uint64_digits10 - n_digits) {
+ error += half_ulp; // Product does not fit in an integer
+ }
+ }
+
+ // Multiply the significand by the power, normalize, and update the error
+ input = soft_float_multiply(input, pow10);
+ error = normalize(&input, product_error(error, half_ulp, half_ulp));
+
+ // Get the effective number of significant bits from the order of magnitude
+ const int magnitude = 64 + input.e;
+ const int real_magnitude = magnitude - dbl_subnormal_expt;
+ const unsigned n_significant_bits =
+ (unsigned)MAX(0, MIN(real_magnitude, DBL_MANT_DIG));
+
+ // Calculate the number of "extra" bits of precision we have
+ assert(n_significant_bits <= 64);
+ unsigned n_extra_bits = 64u - n_significant_bits;
+ if (n_extra_bits + lg_denom >= 64u) {
+ // Very small subnormal where extra * denom does not fit in an integer
+ // Shift right (and accumulate some more error) to compensate
+ const unsigned amount = (n_extra_bits + lg_denom) - 63;
+
+ input.f >>= amount;
+ input.e += (int)amount;
+ error = product_error((error >> amount) + 1u, half_ulp, half_ulp);
+ n_extra_bits -= amount;
+ }
+
+ // Calculate boundaries for the extra bits (with the common denominator)
+ assert(n_extra_bits < 64);
+ const uint64_t extra_mask = (1ull << n_extra_bits) - 1u;
+ const uint64_t extra_bits = (input.f & extra_mask) * denom;
+ const uint64_t middle = (1ull << (n_extra_bits - 1u)) * denom;
+ const uint64_t low = middle - error;
+ const uint64_t high = middle + error;
+
+ // Round to nearest representable double
+ guess->f = (input.f >> n_extra_bits) + (extra_bits >= high);
+ guess->e = input.e + (int)n_extra_bits;
+
+ // Too inaccurate if the extra bits are within the error around the middle
+ return extra_bits <= low || extra_bits >= high;
+}
+
+static int
+compare_buffer(const char* buf, const int expt, const ExessSoftFloat upper)
+{
+ ExessBigint buf_bigint;
+ exess_bigint_set_decimal_string(&buf_bigint, buf);
+
+ ExessBigint upper_bigint;
+ exess_bigint_set_u64(&upper_bigint, upper.f);
+
+ if (expt >= 0) {
+ exess_bigint_multiply_pow10(&buf_bigint, (unsigned)expt);
+ } else {
+ exess_bigint_multiply_pow10(&upper_bigint, (unsigned)-expt);
+ }
+
+ if (upper.e > 0) {
+ exess_bigint_shift_left(&upper_bigint, (unsigned)upper.e);
+ } else {
+ exess_bigint_shift_left(&buf_bigint, (unsigned)-upper.e);
+ }
+
+ return exess_bigint_compare(&buf_bigint, &upper_bigint);
+}
+
+double
+parsed_double_to_double(const ExessDecimalDouble in)
+{
+ static const int n_exact_pow10 = sizeof(POW10) / sizeof(POW10[0]);
+ static const unsigned max_exact_int_digits = 15; // Digits that fit exactly
+ static const int max_decimal_power = 309; // Max finite power
+ static const int min_decimal_power = -324; // Min non-zero power
+
+ switch (in.kind) {
+ case EXESS_NEGATIVE:
+ break;
+ case EXESS_NEGATIVE_INFINITY:
+ return (double)-INFINITY;
+ case EXESS_NEGATIVE_ZERO:
+ return -0.0;
+ case EXESS_POSITIVE_ZERO:
+ return 0.0;
+ case EXESS_POSITIVE:
+ break;
+ case EXESS_POSITIVE_INFINITY:
+ return (double)INFINITY;
+ case EXESS_NAN:
+ return (double)NAN;
+ }
+
+ uint64_t frac = 0;
+ for (unsigned i = 0u; i < in.n_digits; ++i) {
+ if (is_digit(in.digits[i])) {
+ frac = (frac * 10) + (unsigned)(in.digits[i] - '0');
+ }
+ }
+
+ const int expt = in.expt;
+ const int result_power = (int)in.n_digits + expt;
+
+ // Return early for simple exact cases
+
+ const int sign = in.kind >= EXESS_POSITIVE_ZERO ? 1 : -1;
+
+ if (result_power > max_decimal_power) {
+ return sign * (double)INFINITY;
+ }
+
+ if (result_power < min_decimal_power) {
+ return sign * 0.0;
+ }
+
+ if (in.n_digits < max_exact_int_digits) {
+ if (expt < 0 && -expt < n_exact_pow10) {
+ return sign * ((double)frac / (double)POW10[-expt]);
+ }
+
+ if (expt >= 0 && expt < n_exact_pow10) {
+ return sign * ((double)frac * (double)POW10[expt]);
+ }
+ }
+
+ // Try to guess the number using only soft floating point (fast path)
+ ExessSoftFloat guess = {0, 0};
+ const bool exact = sftod(frac, expt, (int)in.n_digits, &guess);
+ const double g = soft_float_to_double(guess);
+ if (exact) {
+ return sign * g;
+ }
+
+ // Not sure, guess is either the number or its predecessor (rare slow path)
+ // Compare it with the buffer using bigints to find out which
+ const ExessSoftFloat upper = {guess.f * 2 + 1, guess.e - 1};
+ const int cmp = compare_buffer(in.digits, in.expt, upper);
+ if (cmp < 0) {
+ return sign * g;
+ }
+
+ if (cmp > 0) {
+ return sign * nextafter(g, (double)INFINITY);
+ }
+
+ if ((guess.f & 1u) == 0) {
+ return sign * g; // Round towards even
+ }
+
+ return sign * nextafter(g, (double)INFINITY); // Round odd up
+}