From 2469c739d87d6a2bb0c9b9c1e2b2c69b0e981b97 Mon Sep 17 00:00:00 2001 From: David Robillard Date: Mon, 19 Dec 2011 02:59:31 +0000 Subject: Add serd_strtod(), serd_node_new_decimal(), and serd_node_new_integer() for locale-independent numeric node parsing/serialising. git-svn-id: http://svn.drobilla.net/serd/trunk@260 490d8e77-9747-427b-9fa3-0b8f29cee8a0 --- ChangeLog | 2 + serd/serd.h | 36 +++++++++++++++ src/node.c | 81 ++++++++++++++++++++++++++++++++ src/serd_internal.h | 11 +++++ src/serd_test.c | 130 ++++++++++++++++++++++++++++++++++++++++++++++++++++ src/string.c | 55 ++++++++++++++++++++++ wscript | 21 +++++++-- 7 files changed, 333 insertions(+), 3 deletions(-) create mode 100644 src/serd_test.c diff --git a/ChangeLog b/ChangeLog index 51203067..5b489eae 100644 --- a/ChangeLog +++ b/ChangeLog @@ -13,6 +13,8 @@ serd (UNRELEASED) unstable; urgency=low * Handle a quote as the last character of a long string literal in the writer (by escaping it) rather than the reader, to avoid writing Turtle other tools fail to parse. + * Add serd_strtod(), serd_node_new_decimal(), and serd_node_new_integer() + for locale-independent numeric node parsing/serialising. -- David Robillard (UNRELEASED) diff --git a/serd/serd.h b/serd/serd.h index b20dacf2..eebfa113 100644 --- a/serd/serd.h +++ b/serd/serd.h @@ -266,6 +266,17 @@ SERD_API size_t serd_strlen(const uint8_t* str, size_t* n_bytes, SerdNodeFlags* flags); +/** + Parse a string to a double. + + The API of this function is identical to the standard C strtod function, + except this function is locale-independent and always matches the lexical + format used in the Turtle grammar (the decimal point is always "."). +*/ +SERD_API +double +serd_strtod(const char* str, char** endptr); + /** @} @name URI @@ -379,6 +390,31 @@ SERD_API SerdNode serd_node_new_uri(const SerdURI* uri, const SerdURI* base, SerdURI* out); +/** + Create a new node by serialising @c d into an xsd:decimal string. + + The resulting node will always contain a `.', start with a digit, and end + with a digit (i.e. will have a leading and/or trailing `0' if necessary). + It will never be in scientific notation. A maximum of @c frac_digits digits + will be written after the decimal point, but trailing zeros will + automatically be omitted (except one if @c d is a round integer). + + Note that about 16 and 8 fractional digits are required to precisely + represent a double and float, respectively. + + @param frac_digits The maximum number of digits after the decimal place. +*/ +SERD_API +SerdNode +serd_node_new_decimal(double d, unsigned frac_digits); + +/** + Create a new node by serialising @c d into an xsd:integer string. +*/ +SERD_API +SerdNode +serd_node_new_integer(long i); + /** Free any data owned by @c node. diff --git a/src/node.c b/src/node.c index 09660f74..0fabd09f 100644 --- a/src/node.c +++ b/src/node.c @@ -19,6 +19,9 @@ #include #include +#include +#include + SERD_API SerdNode serd_node_from_string(SerdType type, const uint8_t* buf) @@ -140,6 +143,84 @@ serd_node_new_uri(const SerdURI* uri, const SerdURI* base, SerdURI* out) return node; } +SERD_API +SerdNode +serd_node_new_decimal(double d, unsigned frac_digits) +{ + const double abs_d = fabs(d); + const long int_digits = (long)fmax(1.0, ceil(log10(abs_d))); + char* buf = calloc(int_digits + frac_digits + 3, 1); + SerdNode node = { (const uint8_t*)buf, 0, 0, 0, SERD_LITERAL }; + + const double int_part = floor(abs_d); + + // Point s to decimal point location + char* s = buf + int_digits; + if (d < 0.0) { + *buf = '-'; + ++s; + } + + // Write integer part (right to left) + char* t = s - 1; + long dec = (long)int_part; + do { + *t-- = '0' + (dec % 10); + } while ((dec /= 10) > 0); + + *s++ = '.'; + + // Write fractional part (right to left) + double frac_part = fabs(d - int_part); + if (frac_part < DBL_EPSILON) { + *s++ = '0'; + node.n_bytes = node.n_chars = (s - buf); + } else { + long frac = lrint(frac_part * pow(10, frac_digits)); + s += frac_digits - 1; + unsigned i = 0; + + // Skip trailing zeros + for (; i < frac_digits && (frac % 10 == 0); ++i, --s, frac /= 10) {} + + node.n_bytes = node.n_chars = (s - buf) + 1; + + // Write digits from last trailing zero to decimal point + for (; i < frac_digits; ++i) { + *s-- = '0' + (frac % 10); + frac /= 10; + } + } + + return node; +} + +SERD_API +SerdNode +serd_node_new_integer(long i) +{ + long abs_i = labs(i); + const long digits = (long)fmax(1.0, ceil(log10((double)abs_i + 1))); + char* buf = calloc(digits + 1, 1); + SerdNode node = { (const uint8_t*)buf, 0, 0, 0, SERD_LITERAL }; + + // Point s to the end + char* s = buf + digits - 1; + if (i < 0) { + *buf = '-'; + ++s; + } + + node.n_bytes = node.n_chars = (s - buf) + 1; + + // Write integer part (right to left) + do { + *s-- = '0' + (abs_i % 10); + } while ((abs_i /= 10) > 0); + + return node; +} + SERD_API void serd_node_free(SerdNode* node) diff --git a/src/serd_internal.h b/src/serd_internal.h index 5fb9f0ef..39f8d503 100644 --- a/src/serd_internal.h +++ b/src/serd_internal.h @@ -209,4 +209,15 @@ is_digit(const uint8_t c) return in_range(c, '0', '9'); } +static inline bool +is_space(const char c) +{ + switch (c) { + case ' ': case '\f': case '\n': case '\r': case '\t': case '\v': + return true; + default: + return false; + } +} + #endif // SERD_INTERNAL_H diff --git a/src/serd_test.c b/src/serd_test.c new file mode 100644 index 00000000..1607373d --- /dev/null +++ b/src/serd_test.c @@ -0,0 +1,130 @@ +/* + Copyright 2011 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include +#include +#include +#include + +#include "serd/serd.h" + +static bool +test_strtod(double dbl, double max_delta) +{ + char buf[1024]; + snprintf(buf, sizeof(buf), "%lf", dbl); + + char* endptr = NULL; + const double out = serd_strtod(buf, &endptr); + + const double diff = fabs(out - dbl); + if (diff > max_delta) { + fprintf(stderr, "error: Parsed %lf != %lf (delta %lf)\n", + dbl, out, diff); + return false; + } + return true; +} + +int +main() +{ + #define MAX 1000000 + #define NUM_TESTS 1000 + for (int i = 0; i < NUM_TESTS; ++i) { + double dbl = rand() % MAX; + dbl += (rand() % MAX) / (double)MAX; + + if (!test_strtod(dbl, 1 / (double)MAX)) { + return 1; + } + } + + const double expt_test_nums[] = { + 2.0E18, -5e19, +8e20, 2e+34, -5e-5, 8e0, 9e-0, 2e+0 + }; + + const char* expt_test_strs[] = { + "02e18", "-5e019", "+8e20", "2E+34", "-5E-5", "8E0", "9e-0", "2e+0" + }; + + for (unsigned i = 0; i < sizeof(expt_test_nums) / sizeof(double); ++i) { + char* endptr; + const double num = serd_strtod(expt_test_strs[i], &endptr); + const double delta = fabs(num - expt_test_nums[i]); + if (delta > DBL_EPSILON) { + fprintf(stderr, "error: Parsed `%s' %lf != %lf (delta %lf)\n", + expt_test_strs[i], num, expt_test_nums[i], delta); + return 1; + } + } + + // Test serd_node_new_decimal + + const double dbl_test_nums[] = { + 0.0, 42.0, .01, 8.0, 2.05, -16.00001, 5.000000005 + }; + + const char* dbl_test_strs[] = { + "0.0", "42.0", "0.01", "8.0", "2.05", "-16.00001", "5.00000001" + }; + + for (unsigned i = 0; i < sizeof(dbl_test_nums) / sizeof(double); ++i) { + SerdNode node = serd_node_new_decimal(dbl_test_nums[i], 8); + if (strcmp((const char*)node.buf, (const char*)dbl_test_strs[i])) { + fprintf(stderr, "error: Serialised `%s' != %s\n", + node.buf, dbl_test_strs[i]); + return 1; + } + const size_t len = strlen((const char*)node.buf); + if (node.n_bytes != len || node.n_chars != len) { + fprintf(stderr, "error: Length %zu,%zu != %zu\n", + node.n_bytes, node.n_chars, len); + return 1; + } + serd_node_free(&node); + } + + // Test serd_node_new_integer + + const long int_test_nums[] = { + 0, -0, -23, 23, -12340, 1000, -1000 + }; + + const char* int_test_strs[] = { + "0", "0", "-23", "23", "-12340", "1000", "-1000" + }; + + for (unsigned i = 0; i < sizeof(int_test_nums) / sizeof(double); ++i) { + fprintf(stderr, "\n*** TEST %ld\n", int_test_nums[i]); + SerdNode node = serd_node_new_integer(int_test_nums[i]); + if (strcmp((const char*)node.buf, (const char*)int_test_strs[i])) { + fprintf(stderr, "error: Serialised `%s' != %s\n", + node.buf, int_test_strs[i]); + return 1; + } + const size_t len = strlen((const char*)node.buf); + if (node.n_bytes != len || node.n_chars != len) { + fprintf(stderr, "error: Length %zu,%zu != %zu\n", + node.n_bytes, node.n_chars, len); + return 1; + } + serd_node_free(&node); + } + + printf("Success\n"); + return 0; +} diff --git a/src/string.c b/src/string.c index f747c530..35a3bbc8 100644 --- a/src/string.c +++ b/src/string.c @@ -16,6 +16,8 @@ #include "serd_internal.h" +#include + SERD_API const uint8_t* serd_strerror(SerdStatus st) @@ -56,3 +58,56 @@ serd_strlen(const uint8_t* str, size_t* n_bytes, SerdNodeFlags* flags) } return n_chars; } + +static inline double +read_sign(const char** sptr) +{ + double sign = 1.0; + switch (**sptr) { + case '-': sign = -1.0; + case '+': ++(*sptr); + default: return sign; + } +} + +SERD_API +double +serd_strtod(const char* str, char** endptr) +{ + double result = 0.0; + + // Point s at the first non-whitespace character + const char* s = str; + while (is_space(*s)) { ++s; } + + // Read leading sign if necessary + const double sign = read_sign(&s); + + // Parse integer part + for (; is_digit(*s); ++s) { + result = (result * 10.0) + (*s - '0'); + } + + // Parse fractional part + if (*s == '.') { + double denom = 10.0; + for (++s; is_digit(*s); ++s) { + result += (*s - '0') / denom; + denom *= 10.0; + } + } + + // Parse exponent + if (*s == 'e' || *s == 'E') { + ++s; + double expt = 0.0; + double expt_sign = read_sign(&s); + for (; is_digit(*s); ++s) { + expt = (expt * 10.0) + (*s - '0'); + } + result *= pow(10, expt * expt_sign); + } + + *endptr = (char*)s; + return result * sign; +} diff --git a/wscript b/wscript index 2d71f4c6..1ee9ec83 100644 --- a/wscript +++ b/wscript @@ -10,7 +10,7 @@ from waflib.extras import autowaf as autowaf import waflib.Logs as Logs, waflib.Options as Options # Version of this package (even if built as a child) -SERD_VERSION = '0.6.0' +SERD_VERSION = '0.7.0' SERD_MAJOR_VERSION = '0' # Library version (UNIX style major, minor, micro) @@ -119,6 +119,7 @@ def build(bld): export_includes = ['.'], source = lib_source, includes = ['.', './src'], + lib = ['m'], name = 'libserd', target = 'serd-%s' % SERD_MAJOR_VERSION, vnum = SERD_LIB_VERSION, @@ -132,6 +133,7 @@ def build(bld): export_includes = ['.'], source = lib_source, includes = ['.', './src'], + lib = ['m'], name = 'libserd_static', target = 'serd-%s' % SERD_MAJOR_VERSION, vnum = SERD_LIB_VERSION, @@ -143,22 +145,33 @@ def build(bld): obj = bld(features = 'c cstlib', source = lib_source, includes = ['.', './src'], + lib = ['m'], name = 'libserd_profiled', target = 'serd_profiled', install_path = '', cflags = [ '-fprofile-arcs', '-ftest-coverage', '-DSERD_INTERNAL' ]) - # Unit test program + # Unit test serdi obj = bld(features = 'c cprogram', source = 'src/serdi.c', includes = ['.', './src'], use = 'libserd_profiled', - lib = ['gcov'], + lib = ['m', 'gcov'], target = 'serdi_static', install_path = '', cflags = [ '-fprofile-arcs', '-ftest-coverage' ]) + # Unit test program + obj = bld(features = 'c cprogram', + source = 'src/serd_test.c', + includes = ['.', './src'], + use = 'libserd_profiled', + lib = ['m', 'gcov'], + target = 'serd_test', + install_path = '', + cflags = [ '-fprofile-arcs', '-ftest-coverage' ]) + # Utilities if bld.env['BUILD_UTILS']: obj = bld(features = 'c cprogram', @@ -258,6 +271,8 @@ def test(ctx): autowaf.pre_test(ctx, APPNAME) + autowaf.run_tests(ctx, APPNAME, ['./serd_test'], dirs=['.']) + os.environ['PATH'] = '.' + os.pathsep + os.getenv('PATH') nul = os.devnull autowaf.run_tests(ctx, APPNAME, [ -- cgit v1.2.1