Separate decimal parsing from floating point conversion

author: David Robillard <d@drobilla.net> 2019-10-06 21:25:40 +0200
committer: David Robillard <d@drobilla.net> 2019-12-20 10:26:55 -0500
commit: 908e60d9a92b225d0f11407d930421a986154a4f (patch)
tree: 9275a36abdab7c0cbf6b93051b06d7571143d6a8
parent: f2e9541d6a047237c25bc90bf63920de7165e1d4 (diff)
download: serd-908e60d9a92b225d0f11407d930421a986154a4f.tar.gz
serd-908e60d9a92b225d0f11407d930421a986154a4f.tar.bz2
serd-908e60d9a92b225d0f11407d930421a986154a4f.zip
1 files changed, 126 insertions, 41 deletions
diff --git a/src/string.c b/src/string.c
index 65ba1252..2185b476 100644
--- a/src/string.c
+++ b/src/string.c
@@ -16,13 +16,17 @@
 
 #include "string.h"
 
-#include "serd/serd.h"
+#include "int_math.h"
 #include "string_utils.h"
 
+#include "serd/serd.h"
+
 #include <math.h>
 #include <stddef.h>
 #include <stdlib.h>
 
+static const int uint64_digits10 = 19;
+
 void
 serd_free(void* ptr)
 {
@@ -66,13 +70,13 @@ serd_strlen(const char* str, SerdNodeFlags* flags)
 	return strlen(str);
 }
 
-static inline double
+static inline int
 read_sign(const char** sptr)
 {
-	double sign = 1.0;
+	int sign = 1;
 	switch (**sptr) {
 	case '-':
-		sign = -1.0;
+		sign = -1;
 		// fallthru
 	case '+':
 		++(*sptr);
@@ -82,56 +86,137 @@ read_sign(const char** sptr)
 	}
 }
 
-double
-serd_strtod(const char* str, size_t* end)
+typedef struct
 {
-	double result = 0.0;
-
-#define SET_END(index) if (end) { *end = (size_t)(index); }
+	int         sign;        ///< Sign (+1 or -1)
+	int         digits_expt; ///< Exponent for digits
+	const char* digits;      ///< Pointer to the first digit in the significand
+	uint64_t    frac;        ///< Significand
+	int         frac_expt;   ///< Exponent for frac
+	int         n_digits;    ///< Number of digits in the significand
+	size_t      end;         ///< Index of the last read character
+} SerdParsedDouble;
+
+static SerdParsedDouble
+serd_parse_double(const char* const str)
+{
+	// Read leading sign if necessary
+	const char* s    = str;
+	const int   sign = read_sign(&s);
 
-	if (!strcmp(str, "NaN")) {
-		SET_END(3);
-		return NAN;
-	} else if (!strcmp(str, "-INF")) {
-		SET_END(4);
-		return -INFINITY;
-	} else if (!strcmp(str, "INF")) {
-		SET_END(3);
-		return INFINITY;
+	// Skip leading zeros before decimal point
+	while (*s == '0') {
+		++s;
 	}
 
-	// Point s at the first non-whitespace character
-	const char* s = str;
-	while (is_space(*s)) { ++s; }
-
-	// Read leading sign if necessary
-	const double sign = read_sign(&s);
+	// Skip leading zeros after decimal point
+	int  n_leading   = 0;     // Zeros skipped after decimal point
+	bool after_point = false; // True if we are after the decimal point
+	if (*s == '.') {
+		after_point = true;
+		for (++s; *s == '0'; ++s) {
+			++n_leading;
+		}
+	}
 
-	// Parse integer part
-	for (; is_digit(*s); ++s) {
-		result = (result * 10.0) + (*s - '0');
+	// Read significant digits of the mantissa into a 64-bit integer
+	const char* const digits   = s; // Store pointer to start of digits
+	uint64_t          frac     = 0; // Fraction value (ignoring decimal point)
+	int               n_total  = 0; // Number of decimal digits in fraction
+	int               n_before = 0; // Number of digits before decimal point
+	int               n_after  = 0; // Number of digits after decimal point
+	for (int i = 0; i < uint64_digits10; ++i, ++s) {
+		if (is_digit(*s)) {
+			frac = (frac * 10) + (unsigned)(*s - '0');
+			++n_total;
+			n_before += !after_point;
+			n_after  += after_point;
+		} else if (*s == '.' && !after_point) {
+			after_point = true;
+		} else {
+			break;
+		}
 	}
 
-	// Parse fractional part
-	if (*s == '.') {
-		double denom = 10.0;
-		for (++s; is_digit(*s); ++s) {
-			result += (*s - '0') / denom;
-			denom *= 10.0;
+	// Skip extra digits
+	const int n_used         = MAX(n_total, n_leading ? 1 : 0);
+	int       n_extra_before = 0;
+	int       n_extra_after  = 0;
+	for (;; ++s, ++n_total) {
+		if (*s == '.' && !after_point) {
+			after_point = true;
+		} else if (is_digit(*s)) {
+			n_extra_before += !after_point;
+			n_extra_after  += after_point;
+		} else {
+			break;
 		}
 	}
 
-	// Parse exponent
+	// Read exponent from input
+	int abs_in_expt  = 0;
+	int in_expt_sign = 1;
 	if (*s == 'e' || *s == 'E') {
 		++s;
-		double expt      = 0.0;
-		double expt_sign = read_sign(&s);
-		for (; is_digit(*s); ++s) {
-			expt = (expt * 10.0) + (*s - '0');
+		in_expt_sign = read_sign(&s);
+		while (is_digit(*s)) {
+			abs_in_expt = (abs_in_expt * 10) + (*s++ - '0');
 		}
-		result *= pow(10, expt * expt_sign);
 	}
 
-	SET_END(s - str);
-	return result * sign;
+	// Calculate output exponents
+	const int in_expt     = in_expt_sign * abs_in_expt;
+	const int frac_expt   = n_extra_before - n_after - n_leading + in_expt;
+	const int digits_expt = in_expt - n_after - n_extra_after - n_leading;
+
+	const SerdParsedDouble result = {sign,
+	                                 digits_expt,
+	                                 digits,
+	                                 frac,
+	                                 frac_expt,
+	                                 n_used,
+	                                 (size_t)(s - str)};
+
+	return result;
+}
+
+
+double
+serd_strtod(const char* str, size_t* end)
+{
+#define SET_END(index) if (end) { *end = (size_t)(index); }
+
+	// Point s at the first non-whitespace character
+	const char* s = str;
+	while (is_space(*s)) {
+		++s;
+	}
+
+	// Handle non-numeric special cases
+	if (!strcmp(s, "NaN")) {
+		SET_END(s - str + 3);
+		return (double)NAN;
+	} else if (!strcmp(s, "-INF")) {
+		SET_END(s - str + 4);
+		return (double)-INFINITY;
+	} else if (!strcmp(s, "INF")) {
+		SET_END(s - str + 3);
+		return (double)INFINITY;
+	} else if (!strcmp(s, "+INF")) {
+		SET_END(s - str + 4);
+		return (double)INFINITY;
+	} else if (*s != '+' && *s != '-' && *s != '.' && !is_digit(*s)) {
+		SET_END(s - str);
+		return (double)NAN;
+	}
+
+	const SerdParsedDouble in = serd_parse_double(s);
+	SET_END(in.end);
+#undef SET_END
+
+	if (in.n_digits == 0) {
+		return (double)NAN;
+	}
+
+	return in.sign * (in.frac * pow(10, in.frac_expt));
 }
author	David Robillard <d@drobilla.net>	2019-10-06 21:25:40 +0200
committer	David Robillard <d@drobilla.net>	2019-12-20 10:26:55 -0500
commit	908e60d9a92b225d0f11407d930421a986154a4f (patch)
tree	9275a36abdab7c0cbf6b93051b06d7571143d6a8
parent	f2e9541d6a047237c25bc90bf63920de7165e1d4 (diff)
download	serd-908e60d9a92b225d0f11407d930421a986154a4f.tar.gz serd-908e60d9a92b225d0f11407d930421a986154a4f.tar.bz2 serd-908e60d9a92b225d0f11407d930421a986154a4f.zip