From 2469c739d87d6a2bb0c9b9c1e2b2c69b0e981b97 Mon Sep 17 00:00:00 2001
From: David Robillard <d@drobilla.net>
Date: Mon, 19 Dec 2011 02:59:31 +0000
Subject: Add serd_strtod(), serd_node_new_decimal(), and
 serd_node_new_integer() for locale-independent numeric node
 parsing/serialising.

git-svn-id: http://svn.drobilla.net/serd/trunk@260 490d8e77-9747-427b-9fa3-0b8f29cee8a0
---
 ChangeLog           |   2 +
 serd/serd.h         |  36 +++++++++++++++
 src/node.c          |  81 ++++++++++++++++++++++++++++++++
 src/serd_internal.h |  11 +++++
 src/serd_test.c     | 130 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/string.c        |  55 ++++++++++++++++++++++
 wscript             |  21 +++++++--
 7 files changed, 333 insertions(+), 3 deletions(-)
 create mode 100644 src/serd_test.c

diff --git a/ChangeLog b/ChangeLog
index 51203067..5b489eae 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -13,6 +13,8 @@ serd (UNRELEASED) unstable; urgency=low
   * Handle a quote as the last character of a long string literal in the
     writer (by escaping it) rather than the reader, to avoid writing Turtle
     other tools fail to parse.
+  * Add serd_strtod(), serd_node_new_decimal(), and serd_node_new_integer()
+    for locale-independent numeric node parsing/serialising.
 
  -- David Robillard <d@drobilla.net>  (UNRELEASED)
 
diff --git a/serd/serd.h b/serd/serd.h
index b20dacf2..eebfa113 100644
--- a/serd/serd.h
+++ b/serd/serd.h
@@ -266,6 +266,17 @@ SERD_API
 size_t
 serd_strlen(const uint8_t* str, size_t* n_bytes, SerdNodeFlags* flags);
 
+/**
+   Parse a string to a double.
+
+   The API of this function is identical to the standard C strtod function,
+   except this function is locale-independent and always matches the lexical
+   format used in the Turtle grammar (the decimal point is always ".").
+*/
+SERD_API
+double
+serd_strtod(const char* str, char** endptr);
+
 /**
    @}
    @name URI
@@ -379,6 +390,31 @@ SERD_API
 SerdNode
 serd_node_new_uri(const SerdURI* uri, const SerdURI* base, SerdURI* out);
 
+/**
+   Create a new node by serialising @c d into an xsd:decimal string.
+
+   The resulting node will always contain a `.', start with a digit, and end
+   with a digit (i.e. will have a leading and/or trailing `0' if necessary).
+   It will never be in scientific notation.  A maximum of @c frac_digits digits
+   will be written after the decimal point, but trailing zeros will
+   automatically be omitted (except one if @c d is a round integer).
+
+   Note that about 16 and 8 fractional digits are required to precisely
+   represent a double and float, respectively.
+
+   @param frac_digits The maximum number of digits after the decimal place.
+*/
+SERD_API
+SerdNode
+serd_node_new_decimal(double d, unsigned frac_digits);
+
+/**
+   Create a new node by serialising @c d into an xsd:integer string.
+*/
+SERD_API
+SerdNode
+serd_node_new_integer(long i);
+
 /**
    Free any data owned by @c node.
 
diff --git a/src/node.c b/src/node.c
index 09660f74..0fabd09f 100644
--- a/src/node.c
+++ b/src/node.c
@@ -19,6 +19,9 @@
 #include <stdlib.h>
 #include <string.h>
 
+#include <math.h>
+#include <float.h>
+
 SERD_API
 SerdNode
 serd_node_from_string(SerdType type, const uint8_t* buf)
@@ -140,6 +143,84 @@ serd_node_new_uri(const SerdURI* uri, const SerdURI* base, SerdURI* out)
 	return node;
 }
 
+SERD_API
+SerdNode
+serd_node_new_decimal(double d, unsigned frac_digits)
+{
+	const double abs_d      = fabs(d);
+	const long   int_digits = (long)fmax(1.0, ceil(log10(abs_d)));
+	char*        buf        = calloc(int_digits + frac_digits + 3, 1);
+	SerdNode     node       = { (const uint8_t*)buf, 0, 0, 0, SERD_LITERAL };
+
+	const double int_part  = floor(abs_d);
+
+	// Point s to decimal point location
+	char* s = buf + int_digits;
+	if (d < 0.0) {
+		*buf = '-';
+		++s;
+	}
+
+	// Write integer part (right to left)
+	char* t   = s - 1;
+	long  dec = (long)int_part;
+	do {
+		*t-- = '0' + (dec % 10);
+	} while ((dec /= 10) > 0);
+
+	*s++ = '.';
+
+	// Write fractional part (right to left)
+	double frac_part = fabs(d - int_part);
+	if (frac_part < DBL_EPSILON) {
+		*s++ = '0';
+		node.n_bytes = node.n_chars = (s - buf);
+	} else {
+		long frac = lrint(frac_part * pow(10, frac_digits));
+		s += frac_digits - 1;
+		unsigned i = 0;
+
+		// Skip trailing zeros
+		for (; i < frac_digits && (frac % 10 == 0); ++i, --s, frac /= 10) {}
+
+		node.n_bytes = node.n_chars = (s - buf) + 1;
+
+		// Write digits from last trailing zero to decimal point			
+		for (; i < frac_digits; ++i) {
+			*s-- = '0' + (frac % 10);
+			frac /= 10;
+		}
+	}
+
+	return node;
+}
+
+SERD_API
+SerdNode
+serd_node_new_integer(long i)
+{
+	long       abs_i  = labs(i);
+	const long digits = (long)fmax(1.0, ceil(log10((double)abs_i + 1)));
+	char*      buf    = calloc(digits + 1, 1);
+	SerdNode   node   = { (const uint8_t*)buf, 0, 0, 0, SERD_LITERAL };
+
+	// Point s to the end
+	char* s = buf + digits - 1;
+	if (i < 0) {
+		*buf = '-';
+		++s;
+	}
+
+	node.n_bytes = node.n_chars = (s - buf) + 1;
+
+	// Write integer part (right to left)
+	do {
+		*s-- = '0' + (abs_i % 10);
+	} while ((abs_i /= 10) > 0);
+
+	return node;
+}
+
 SERD_API
 void
 serd_node_free(SerdNode* node)
diff --git a/src/serd_internal.h b/src/serd_internal.h
index 5fb9f0ef..39f8d503 100644
--- a/src/serd_internal.h
+++ b/src/serd_internal.h
@@ -209,4 +209,15 @@ is_digit(const uint8_t c)
 	return in_range(c, '0', '9');
 }
 
+static inline bool
+is_space(const char c)
+{
+	switch (c) {
+	case ' ': case '\f': case '\n': case '\r': case '\t': case '\v':
+		return true;
+	default:
+		return false;
+	}
+}
+
 #endif  // SERD_INTERNAL_H
diff --git a/src/serd_test.c b/src/serd_test.c
new file mode 100644
index 00000000..1607373d
--- /dev/null
+++ b/src/serd_test.c
@@ -0,0 +1,130 @@
+/*
+  Copyright 2011 David Robillard <http://drobilla.net>
+
+  Permission to use, copy, modify, and/or distribute this software for any
+  purpose with or without fee is hereby granted, provided that the above
+  copyright notice and this permission notice appear in all copies.
+
+  THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+*/
+
+#include <float.h>
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "serd/serd.h"
+
+static bool
+test_strtod(double dbl, double max_delta)
+{
+	char buf[1024];
+	snprintf(buf, sizeof(buf), "%lf", dbl);
+
+	char* endptr = NULL;
+	const double out = serd_strtod(buf, &endptr);
+
+	const double diff = fabs(out - dbl);
+	if (diff > max_delta) {
+		fprintf(stderr, "error: Parsed %lf != %lf (delta %lf)\n",
+		        dbl, out, diff);
+		return false;
+	}
+	return true;
+}
+
+int
+main()
+{
+	#define MAX       1000000
+	#define NUM_TESTS 1000
+	for (int i = 0; i < NUM_TESTS; ++i) {
+		double dbl = rand() % MAX;
+		dbl += (rand() % MAX) / (double)MAX;
+
+		if (!test_strtod(dbl, 1 / (double)MAX)) {
+			return 1;
+		}
+	}
+
+	const double expt_test_nums[] = {
+		2.0E18, -5e19, +8e20, 2e+34, -5e-5, 8e0, 9e-0, 2e+0
+	};
+
+	const char* expt_test_strs[] = {
+		"02e18", "-5e019", "+8e20", "2E+34", "-5E-5", "8E0", "9e-0", "2e+0"
+	};
+
+	for (unsigned i = 0; i < sizeof(expt_test_nums) / sizeof(double); ++i) {
+		char* endptr;
+		const double num   = serd_strtod(expt_test_strs[i], &endptr);
+		const double delta = fabs(num - expt_test_nums[i]);
+		if (delta > DBL_EPSILON) {
+			fprintf(stderr, "error: Parsed `%s' %lf != %lf (delta %lf)\n",
+			        expt_test_strs[i], num, expt_test_nums[i], delta);
+			return 1;
+		}
+	}
+
+	// Test serd_node_new_decimal
+
+	const double dbl_test_nums[] = {
+		0.0, 42.0, .01, 8.0, 2.05, -16.00001, 5.000000005
+	};
+
+	const char* dbl_test_strs[] = {
+		"0.0", "42.0", "0.01", "8.0", "2.05", "-16.00001", "5.00000001"
+	};
+
+	for (unsigned i = 0; i < sizeof(dbl_test_nums) / sizeof(double); ++i) {
+		SerdNode node = serd_node_new_decimal(dbl_test_nums[i], 8);
+		if (strcmp((const char*)node.buf, (const char*)dbl_test_strs[i])) {
+			fprintf(stderr, "error: Serialised `%s' != %s\n",
+			        node.buf, dbl_test_strs[i]);
+			return 1;
+		}
+		const size_t len = strlen((const char*)node.buf);
+		if (node.n_bytes != len || node.n_chars != len) {
+			fprintf(stderr, "error: Length %zu,%zu != %zu\n",
+			        node.n_bytes, node.n_chars, len);
+			return 1;
+		}
+		serd_node_free(&node);
+	}
+
+	// Test serd_node_new_integer
+
+	const long int_test_nums[] = {
+		0, -0, -23, 23, -12340, 1000, -1000
+	};
+
+	const char* int_test_strs[] = {
+		"0", "0", "-23", "23", "-12340", "1000", "-1000"
+	};
+
+	for (unsigned i = 0; i < sizeof(int_test_nums) / sizeof(double); ++i) {
+		fprintf(stderr, "\n*** TEST %ld\n", int_test_nums[i]);
+		SerdNode node = serd_node_new_integer(int_test_nums[i]);
+		if (strcmp((const char*)node.buf, (const char*)int_test_strs[i])) {
+			fprintf(stderr, "error: Serialised `%s' != %s\n",
+			        node.buf, int_test_strs[i]);
+			return 1;
+		}
+		const size_t len = strlen((const char*)node.buf);
+		if (node.n_bytes != len || node.n_chars != len) {
+			fprintf(stderr, "error: Length %zu,%zu != %zu\n",
+			        node.n_bytes, node.n_chars, len);
+			return 1;
+		}
+		serd_node_free(&node);
+	}
+
+	printf("Success\n");
+	return 0;
+}
diff --git a/src/string.c b/src/string.c
index f747c530..35a3bbc8 100644
--- a/src/string.c
+++ b/src/string.c
@@ -16,6 +16,8 @@
 
 #include "serd_internal.h"
 
+#include <math.h>
+
 SERD_API
 const uint8_t*
 serd_strerror(SerdStatus st)
@@ -56,3 +58,56 @@ serd_strlen(const uint8_t* str, size_t* n_bytes, SerdNodeFlags* flags)
 	}
 	return n_chars;
 }
+
+static inline double
+read_sign(const char** sptr)
+{
+	double sign = 1.0;
+	switch (**sptr) {
+	case '-': sign = -1.0;
+	case '+': ++(*sptr);
+	default:  return sign;
+	}
+}
+
+SERD_API
+double
+serd_strtod(const char* str, char** endptr)
+{
+	double result = 0.0;
+
+	// Point s at the first non-whitespace character
+	const char* s = str;
+	while (is_space(*s)) { ++s; }
+
+	// Read leading sign if necessary
+	const double sign = read_sign(&s);
+
+	// Parse integer part
+	for (; is_digit(*s); ++s) {
+		result = (result * 10.0) + (*s - '0');
+	}
+
+	// Parse fractional part
+	if (*s == '.') {
+		double denom = 10.0;
+		for (++s; is_digit(*s); ++s) {
+			result += (*s - '0') / denom;
+			denom *= 10.0;
+		}
+	}
+
+	// Parse exponent
+	if (*s == 'e' || *s == 'E') {
+		++s;
+		double expt      = 0.0;
+		double expt_sign = read_sign(&s);
+		for (; is_digit(*s); ++s) {
+			expt = (expt * 10.0) + (*s - '0');
+		}
+		result *= pow(10, expt * expt_sign);
+	}
+
+	*endptr = (char*)s;
+	return result * sign;
+}
diff --git a/wscript b/wscript
index 2d71f4c6..1ee9ec83 100644
--- a/wscript
+++ b/wscript
@@ -10,7 +10,7 @@ from waflib.extras import autowaf as autowaf
 import waflib.Logs as Logs, waflib.Options as Options
 
 # Version of this package (even if built as a child)
-SERD_VERSION       = '0.6.0'
+SERD_VERSION       = '0.7.0'
 SERD_MAJOR_VERSION = '0'
 
 # Library version (UNIX style major, minor, micro)
@@ -119,6 +119,7 @@ def build(bld):
               export_includes = ['.'],
               source          = lib_source,
               includes        = ['.', './src'],
+              lib             = ['m'],
               name            = 'libserd',
               target          = 'serd-%s' % SERD_MAJOR_VERSION,
               vnum            = SERD_LIB_VERSION,
@@ -132,6 +133,7 @@ def build(bld):
                   export_includes = ['.'],
                   source          = lib_source,
                   includes        = ['.', './src'],
+                  lib             = ['m'],
                   name            = 'libserd_static',
                   target          = 'serd-%s' % SERD_MAJOR_VERSION,
                   vnum            = SERD_LIB_VERSION,
@@ -143,22 +145,33 @@ def build(bld):
         obj = bld(features     = 'c cstlib',
                   source       = lib_source,
                   includes     = ['.', './src'],
+                  lib          = ['m'],
                   name         = 'libserd_profiled',
                   target       = 'serd_profiled',
                   install_path = '',
                   cflags       = [ '-fprofile-arcs', '-ftest-coverage',
                                    '-DSERD_INTERNAL' ])
 
-        # Unit test program
+        # Unit test serdi
         obj = bld(features     = 'c cprogram',
                   source       = 'src/serdi.c',
                   includes     = ['.', './src'],
                   use          = 'libserd_profiled',
-                  lib          = ['gcov'],
+                  lib          = ['m', 'gcov'],
                   target       = 'serdi_static',
                   install_path = '',
                   cflags       = [ '-fprofile-arcs',  '-ftest-coverage' ])
 
+        # Unit test program
+        obj = bld(features     = 'c cprogram',
+                  source       = 'src/serd_test.c',
+                  includes     = ['.', './src'],
+                  use          = 'libserd_profiled',
+                  lib          = ['m', 'gcov'],
+                  target       = 'serd_test',
+                  install_path = '',
+                  cflags       = [ '-fprofile-arcs',  '-ftest-coverage' ])
+
     # Utilities
     if bld.env['BUILD_UTILS']:
         obj = bld(features     = 'c cprogram',
@@ -258,6 +271,8 @@ def test(ctx):
 
     autowaf.pre_test(ctx, APPNAME)
 
+    autowaf.run_tests(ctx, APPNAME, ['./serd_test'], dirs=['.'])
+
     os.environ['PATH'] = '.' + os.pathsep + os.getenv('PATH')
     nul = os.devnull
     autowaf.run_tests(ctx, APPNAME, [
-- 
cgit v1.2.1