/* Copyright 2019-2021 David Robillard Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above copyright notice and this permission notice appear in all copies. THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include "strtod.h" #include "bigint.h" #include "decimal.h" #include "ieee_float.h" #include "int_math.h" #include "macros.h" #include "read_utils.h" #include "soft_float.h" #include "string_utils.h" #include #include #include #include #include #include static inline int read_sign(const char** const sptr) { if (**sptr == '-') { ++(*sptr); return -1; } if (**sptr == '+') { ++(*sptr); } return 1; } ExessResult parse_decimal(ExessDecimalDouble* const out, const char* const str) { memset(out, 0, sizeof(*out)); // Read leading sign if necessary const char* s = str; const int sign = read_sign(&s); int n_leading_before = 0; out->kind = (sign < 0) ? EXESS_NEGATIVE : EXESS_POSITIVE; // Check that the first character is valid if (*s != '.' && !is_digit(*s)) { return result(EXESS_EXPECTED_DIGIT, (size_t)(s - str)); } // Skip leading zeros before decimal point while (*s == '0') { ++s; ++n_leading_before; } // Skip leading zeros after decimal point int n_leading_after = 0; // Zeros skipped after decimal point bool after_point = false; // True if we are after the decimal point if (*s == '.') { after_point = true; for (++s; *s == '0'; ++s) { ++n_leading_after; } } // Read significant digits of the mantissa into a 64-bit integer uint64_t frac = 0; // Fraction value (ignoring decimal point) int n_before = 0; // Number of digits before decimal point int n_after = 0; // Number of digits after decimal point for (; out->n_digits < DBL_DECIMAL_DIG + 1; ++s) { if (is_digit(*s)) { frac = (frac * 10) + (unsigned)(*s - '0'); n_before += !after_point; n_after += after_point; out->digits[out->n_digits++] = *s; } else if (*s == '.' && !after_point) { after_point = true; } else { break; } } // Skip extra digits int n_extra_before = 0; int n_extra_after = 0; for (;; ++s) { if (*s == '.' && !after_point) { after_point = true; } else if (is_digit(*s)) { n_extra_before += !after_point; n_extra_after += after_point; } else { break; } } // Calculate final output exponent out->expt = n_extra_before - n_after - n_leading_after; if (out->n_digits == 0) { out->kind = out->kind == EXESS_NEGATIVE ? EXESS_NEGATIVE_ZERO : EXESS_POSITIVE_ZERO; } return result(EXESS_SUCCESS, (size_t)(s - str)); } ExessResult parse_double(ExessDecimalDouble* const out, const char* const str) { memset(out, 0, sizeof(*out)); // Handle non-numeric special cases if (!strcmp(str, "NaN")) { out->kind = EXESS_NAN; return result(EXESS_SUCCESS, 3u); } if (!strcmp(str, "-INF")) { out->kind = EXESS_NEGATIVE_INFINITY; return result(EXESS_SUCCESS, 4u); } if (!strcmp(str, "INF")) { out->kind = EXESS_POSITIVE_INFINITY; return result(EXESS_SUCCESS, 3u); } if (!strcmp(str, "+INF")) { out->kind = EXESS_POSITIVE_INFINITY; return result(EXESS_SUCCESS, 4u); } // Read mantissa as a decimal const ExessResult r = parse_decimal(out, str); if (r.status) { return r; } const char* s = str + r.count; // Read exponent int abs_expt = 0; int expt_sign = 1; if (*s == 'e' || *s == 'E') { ++s; if (*s != '-' && *s != '+' && !is_digit(*s)) { return result(EXESS_EXPECTED_DIGIT, (size_t)(s - str)); } expt_sign = read_sign(&s); while (is_digit(*s)) { abs_expt = (abs_expt * 10) + (*s++ - '0'); } } // Calculate final output exponent out->expt += expt_sign * abs_expt; if (out->n_digits == 0) { out->kind = out->kind < EXESS_POSITIVE_ZERO ? EXESS_NEGATIVE_ZERO : EXESS_POSITIVE_ZERO; } return result(EXESS_SUCCESS, (size_t)(s - str)); } static uint64_t normalize(ExessSoftFloat* value, const uint64_t error) { const int original_e = value->e; *value = soft_float_normalize(*value); assert(value->e <= original_e); return error << (unsigned)(original_e - value->e); } /** Return the error added by floating point multiplication. Should be l + r + l*r/(2^64) + 0.5, but we short the denominator to 63 due to lack of precision, which effectively rounds up. */ static inline uint64_t product_error(const uint64_t lerror, const uint64_t rerror, const uint64_t half_ulp) { return lerror + rerror + ((lerror * rerror) >> 63u) + half_ulp; } /** Guess the binary floating point value for decimal input. @param significand Significand from the input. @param expt10 Decimal exponent from the input. @param n_digits Number of decimal digits in the significand. @param[out] guess Either the exact number, or its predecessor. @return True if `guess` is correct. */ static bool sftod(const uint64_t significand, const int expt10, const int n_digits, ExessSoftFloat* const guess) { assert(expt10 <= max_dec_expt); assert(expt10 >= min_dec_expt); /* The general idea here is to try and find a power of 10 that we can multiply by the significand to get the number. We get one from the cache which is possibly too small, then multiply by another power of 10 to make up the difference if necessary. For example, with a target power of 10^70, if we get 10^68 from the cache, then we multiply again by 10^2. This, as well as normalization, accumulates error, which is tracked throughout to know if we got the precise number. */ // Use a common denominator of 2^3 to avoid fractions static const unsigned lg_denom = 3; static const uint64_t denom = 1u << 3u; static const uint64_t half_ulp = 4u; // Start out with just the significand, and no error ExessSoftFloat input = {significand, 0}; uint64_t error = normalize(&input, 0); // Get a power of 10 that takes us most of the way without overshooting int cached_expt10 = 0; ExessSoftFloat pow10 = soft_float_pow10_under(expt10, &cached_expt10); // Get an exact fixup power if necessary const int d_expt10 = expt10 - cached_expt10; if (d_expt10) { input = soft_float_multiply(input, soft_float_exact_pow10(d_expt10)); if (d_expt10 > uint64_digits10 - n_digits) { error += half_ulp; // Product does not fit in an integer } } // Multiply the significand by the power, normalize, and update the error input = soft_float_multiply(input, pow10); error = normalize(&input, product_error(error, half_ulp, half_ulp)); // Get the effective number of significant bits from the order of magnitude const int magnitude = 64 + input.e; const int real_magnitude = magnitude - dbl_subnormal_expt; const unsigned n_significant_bits = (unsigned)MAX(0, MIN(real_magnitude, DBL_MANT_DIG)); // Calculate the number of "extra" bits of precision we have assert(n_significant_bits <= 64); unsigned n_extra_bits = 64u - n_significant_bits; if (n_extra_bits + lg_denom >= 64u) { // Very small subnormal where extra * denom does not fit in an integer // Shift right (and accumulate some more error) to compensate const unsigned amount = (n_extra_bits + lg_denom) - 63; input.f >>= amount; input.e += (int)amount; error = product_error((error >> amount) + 1u, half_ulp, half_ulp); n_extra_bits -= amount; } // Calculate boundaries for the extra bits (with the common denominator) assert(n_extra_bits < 64); const uint64_t extra_mask = (1ull << n_extra_bits) - 1u; const uint64_t extra_bits = (input.f & extra_mask) * denom; const uint64_t middle = (1ull << (n_extra_bits - 1u)) * denom; const uint64_t low = middle - error; const uint64_t high = middle + error; // Round to nearest representable double guess->f = (input.f >> n_extra_bits) + (extra_bits >= high); guess->e = input.e + (int)n_extra_bits; // Too inaccurate if the extra bits are within the error around the middle return extra_bits <= low || extra_bits >= high; } static int compare_buffer(const char* buf, const int expt, const ExessSoftFloat upper) { ExessBigint buf_bigint; exess_bigint_set_decimal_string(&buf_bigint, buf); ExessBigint upper_bigint; exess_bigint_set_u64(&upper_bigint, upper.f); if (expt >= 0) { exess_bigint_multiply_pow10(&buf_bigint, (unsigned)expt); } else { exess_bigint_multiply_pow10(&upper_bigint, (unsigned)-expt); } if (upper.e > 0) { exess_bigint_shift_left(&upper_bigint, (unsigned)upper.e); } else { exess_bigint_shift_left(&buf_bigint, (unsigned)-upper.e); } return exess_bigint_compare(&buf_bigint, &upper_bigint); } double parsed_double_to_double(const ExessDecimalDouble in) { static const int n_exact_pow10 = sizeof(POW10) / sizeof(POW10[0]); static const unsigned max_exact_int_digits = 15; // Digits that fit exactly static const int max_decimal_power = 309; // Max finite power static const int min_decimal_power = -324; // Min non-zero power switch (in.kind) { case EXESS_NEGATIVE: break; case EXESS_NEGATIVE_INFINITY: return (double)-INFINITY; case EXESS_NEGATIVE_ZERO: return -0.0; case EXESS_POSITIVE_ZERO: return 0.0; case EXESS_POSITIVE: break; case EXESS_POSITIVE_INFINITY: return (double)INFINITY; case EXESS_NAN: return (double)NAN; } uint64_t frac = 0; for (unsigned i = 0u; i < in.n_digits; ++i) { if (is_digit(in.digits[i])) { frac = (frac * 10) + (unsigned)(in.digits[i] - '0'); } } const int expt = in.expt; const int result_power = (int)in.n_digits + expt; // Return early for simple exact cases const int sign = in.kind >= EXESS_POSITIVE_ZERO ? 1 : -1; if (result_power > max_decimal_power) { return sign * (double)INFINITY; } if (result_power < min_decimal_power) { return sign * 0.0; } if (in.n_digits < max_exact_int_digits) { if (expt < 0 && -expt < n_exact_pow10) { return sign * ((double)frac / (double)POW10[-expt]); } if (expt >= 0 && expt < n_exact_pow10) { return sign * ((double)frac * (double)POW10[expt]); } } // Try to guess the number using only soft floating point (fast path) ExessSoftFloat guess = {0, 0}; const bool exact = sftod(frac, expt, (int)in.n_digits, &guess); const double g = soft_float_to_double(guess); if (exact) { return sign * g; } // Not sure, guess is either the number or its predecessor (rare slow path) // Compare it with the buffer using bigints to find out which const ExessSoftFloat upper = {guess.f * 2 + 1, guess.e - 1}; const int cmp = compare_buffer(in.digits, in.expt, upper); if (cmp < 0) { return sign * g; } if (cmp > 0) { return sign * nextafter(g, (double)INFINITY); } if ((guess.f & 1u) == 0) { return sign * g; // Round towards even } return sign * nextafter(g, (double)INFINITY); // Round odd up }