aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Robillard <d@drobilla.net>2019-10-06 21:25:40 +0200
committerDavid Robillard <d@drobilla.net>2019-12-20 10:26:55 -0500
commit908e60d9a92b225d0f11407d930421a986154a4f (patch)
tree9275a36abdab7c0cbf6b93051b06d7571143d6a8
parentf2e9541d6a047237c25bc90bf63920de7165e1d4 (diff)
downloadserd-908e60d9a92b225d0f11407d930421a986154a4f.tar.gz
serd-908e60d9a92b225d0f11407d930421a986154a4f.tar.bz2
serd-908e60d9a92b225d0f11407d930421a986154a4f.zip
Separate decimal parsing from floating point conversion
-rw-r--r--src/string.c167
1 files changed, 126 insertions, 41 deletions
diff --git a/src/string.c b/src/string.c
index 65ba1252..2185b476 100644
--- a/src/string.c
+++ b/src/string.c
@@ -16,13 +16,17 @@
#include "string.h"
-#include "serd/serd.h"
+#include "int_math.h"
#include "string_utils.h"
+#include "serd/serd.h"
+
#include <math.h>
#include <stddef.h>
#include <stdlib.h>
+static const int uint64_digits10 = 19;
+
void
serd_free(void* ptr)
{
@@ -66,13 +70,13 @@ serd_strlen(const char* str, SerdNodeFlags* flags)
return strlen(str);
}
-static inline double
+static inline int
read_sign(const char** sptr)
{
- double sign = 1.0;
+ int sign = 1;
switch (**sptr) {
case '-':
- sign = -1.0;
+ sign = -1;
// fallthru
case '+':
++(*sptr);
@@ -82,56 +86,137 @@ read_sign(const char** sptr)
}
}
-double
-serd_strtod(const char* str, size_t* end)
+typedef struct
{
- double result = 0.0;
-
-#define SET_END(index) if (end) { *end = (size_t)(index); }
+ int sign; ///< Sign (+1 or -1)
+ int digits_expt; ///< Exponent for digits
+ const char* digits; ///< Pointer to the first digit in the significand
+ uint64_t frac; ///< Significand
+ int frac_expt; ///< Exponent for frac
+ int n_digits; ///< Number of digits in the significand
+ size_t end; ///< Index of the last read character
+} SerdParsedDouble;
+
+static SerdParsedDouble
+serd_parse_double(const char* const str)
+{
+ // Read leading sign if necessary
+ const char* s = str;
+ const int sign = read_sign(&s);
- if (!strcmp(str, "NaN")) {
- SET_END(3);
- return NAN;
- } else if (!strcmp(str, "-INF")) {
- SET_END(4);
- return -INFINITY;
- } else if (!strcmp(str, "INF")) {
- SET_END(3);
- return INFINITY;
+ // Skip leading zeros before decimal point
+ while (*s == '0') {
+ ++s;
}
- // Point s at the first non-whitespace character
- const char* s = str;
- while (is_space(*s)) { ++s; }
-
- // Read leading sign if necessary
- const double sign = read_sign(&s);
+ // Skip leading zeros after decimal point
+ int n_leading = 0; // Zeros skipped after decimal point
+ bool after_point = false; // True if we are after the decimal point
+ if (*s == '.') {
+ after_point = true;
+ for (++s; *s == '0'; ++s) {
+ ++n_leading;
+ }
+ }
- // Parse integer part
- for (; is_digit(*s); ++s) {
- result = (result * 10.0) + (*s - '0');
+ // Read significant digits of the mantissa into a 64-bit integer
+ const char* const digits = s; // Store pointer to start of digits
+ uint64_t frac = 0; // Fraction value (ignoring decimal point)
+ int n_total = 0; // Number of decimal digits in fraction
+ int n_before = 0; // Number of digits before decimal point
+ int n_after = 0; // Number of digits after decimal point
+ for (int i = 0; i < uint64_digits10; ++i, ++s) {
+ if (is_digit(*s)) {
+ frac = (frac * 10) + (unsigned)(*s - '0');
+ ++n_total;
+ n_before += !after_point;
+ n_after += after_point;
+ } else if (*s == '.' && !after_point) {
+ after_point = true;
+ } else {
+ break;
+ }
}
- // Parse fractional part
- if (*s == '.') {
- double denom = 10.0;
- for (++s; is_digit(*s); ++s) {
- result += (*s - '0') / denom;
- denom *= 10.0;
+ // Skip extra digits
+ const int n_used = MAX(n_total, n_leading ? 1 : 0);
+ int n_extra_before = 0;
+ int n_extra_after = 0;
+ for (;; ++s, ++n_total) {
+ if (*s == '.' && !after_point) {
+ after_point = true;
+ } else if (is_digit(*s)) {
+ n_extra_before += !after_point;
+ n_extra_after += after_point;
+ } else {
+ break;
}
}
- // Parse exponent
+ // Read exponent from input
+ int abs_in_expt = 0;
+ int in_expt_sign = 1;
if (*s == 'e' || *s == 'E') {
++s;
- double expt = 0.0;
- double expt_sign = read_sign(&s);
- for (; is_digit(*s); ++s) {
- expt = (expt * 10.0) + (*s - '0');
+ in_expt_sign = read_sign(&s);
+ while (is_digit(*s)) {
+ abs_in_expt = (abs_in_expt * 10) + (*s++ - '0');
}
- result *= pow(10, expt * expt_sign);
}
- SET_END(s - str);
- return result * sign;
+ // Calculate output exponents
+ const int in_expt = in_expt_sign * abs_in_expt;
+ const int frac_expt = n_extra_before - n_after - n_leading + in_expt;
+ const int digits_expt = in_expt - n_after - n_extra_after - n_leading;
+
+ const SerdParsedDouble result = {sign,
+ digits_expt,
+ digits,
+ frac,
+ frac_expt,
+ n_used,
+ (size_t)(s - str)};
+
+ return result;
+}
+
+
+double
+serd_strtod(const char* str, size_t* end)
+{
+#define SET_END(index) if (end) { *end = (size_t)(index); }
+
+ // Point s at the first non-whitespace character
+ const char* s = str;
+ while (is_space(*s)) {
+ ++s;
+ }
+
+ // Handle non-numeric special cases
+ if (!strcmp(s, "NaN")) {
+ SET_END(s - str + 3);
+ return (double)NAN;
+ } else if (!strcmp(s, "-INF")) {
+ SET_END(s - str + 4);
+ return (double)-INFINITY;
+ } else if (!strcmp(s, "INF")) {
+ SET_END(s - str + 3);
+ return (double)INFINITY;
+ } else if (!strcmp(s, "+INF")) {
+ SET_END(s - str + 4);
+ return (double)INFINITY;
+ } else if (*s != '+' && *s != '-' && *s != '.' && !is_digit(*s)) {
+ SET_END(s - str);
+ return (double)NAN;
+ }
+
+ const SerdParsedDouble in = serd_parse_double(s);
+ SET_END(in.end);
+#undef SET_END
+
+ if (in.n_digits == 0) {
+ return (double)NAN;
+ }
+
+ return in.sign * (in.frac * pow(10, in.frac_expt));
}