From cd89a74a2f7bf8c3efc3ecf1597cf39d6295db00 Mon Sep 17 00:00:00 2001
From: David Robillard <d@drobilla.net>
Date: Sat, 12 May 2018 18:03:13 +0200
Subject: Set datatypes on integer, decimal, and base64 nodes

---
 include/serd/node.h | 20 ++++++++---
 src/node.c          | 98 +++++++++++++++++++++++++++++++++++++----------------
 test/test_node.c    | 34 +++++++++++++++----
 3 files changed, 111 insertions(+), 41 deletions(-)

diff --git a/include/serd/node.h b/include/serd/node.h
index cbf5efcc..2f4384bd 100644
--- a/include/serd/node.h
+++ b/include/serd/node.h
@@ -172,13 +172,21 @@ serd_new_file_uri(SerdStringView path, SerdStringView hostname);
 
    @param d The value for the new node.
    @param frac_digits The maximum number of digits after the decimal place.
+   @param datatype Datatype of node, or NULL for xsd:decimal.
 */
 SERD_API SerdNode* SERD_ALLOCATED
-serd_new_decimal(double d, unsigned frac_digits);
+serd_new_decimal(double                        d,
+                 unsigned                      frac_digits,
+                 const SerdNode* SERD_NULLABLE datatype);
 
-/// Create a new node by serialising `i` into an xsd:integer string
+/**
+   Create a new node by serialising `i` into an xsd:integer string.
+
+   @param i Integer value to serialise.
+   @param datatype Datatype of node, or NULL for xsd:integer.
+*/
 SERD_API SerdNode* SERD_ALLOCATED
-serd_new_integer(int64_t i);
+serd_new_integer(int64_t i, const SerdNode* SERD_NULLABLE datatype);
 
 /**
    Create a node by serialising `buf` into an xsd:base64Binary string.
@@ -189,9 +197,13 @@ serd_new_integer(int64_t i);
    @param buf Raw binary input data.
    @param size Size of `buf`.
    @param wrap_lines Wrap lines at 76 characters to conform to RFC 2045.
+   @param datatype Datatype of node, or NULL for xsd:base64Binary.
 */
 SERD_API SerdNode* SERD_ALLOCATED
-serd_new_blob(const void* SERD_NONNULL buf, size_t size, bool wrap_lines);
+serd_new_blob(const void* SERD_NONNULL      buf,
+              size_t                        size,
+              bool                          wrap_lines,
+              const SerdNode* SERD_NULLABLE datatype);
 
 /// Return a deep copy of `node`
 SERD_API SerdNode* SERD_ALLOCATED
diff --git a/src/node.c b/src/node.c
index 43a4b502..96a9bcaa 100644
--- a/src/node.c
+++ b/src/node.c
@@ -31,6 +31,21 @@
 #  endif
 #endif
 
+#define NS_XSD "http://www.w3.org/2001/XMLSchema#"
+
+typedef struct StaticNode {
+  SerdNode node;
+  char     buf[sizeof(NS_XSD "base64Binary")];
+} StaticNode;
+
+#define DEFINE_XSD_NODE(name)                 \
+  static const StaticNode serd_xsd_##name = { \
+    {sizeof(NS_XSD #name) - 1, 0, SERD_URI}, NS_XSD #name};
+
+DEFINE_XSD_NODE(base64Binary)
+DEFINE_XSD_NODE(decimal)
+DEFINE_XSD_NODE(integer)
+
 static const size_t serd_node_align = 2 * sizeof(uint64_t);
 
 static const SerdNodeFlags meta_mask = (SERD_HAS_DATATYPE | SERD_HAS_LANGUAGE);
@@ -38,7 +53,7 @@ static const SerdNodeFlags meta_mask = (SERD_HAS_DATATYPE | SERD_HAS_LANGUAGE);
 static SerdNode*
 serd_new_from_uri(SerdURIView uri, SerdURIView base);
 
-static size_t
+SERD_PURE_FUNC static size_t
 serd_uri_string_length(const SerdURIView* const uri)
 {
   size_t len = uri->path_prefix.length;
@@ -66,20 +81,28 @@ string_sink(const void* const buf, const size_t len, void* const stream)
   return len;
 }
 
-static size_t
+SERD_PURE_FUNC static size_t
 serd_node_pad_size(const size_t n_bytes)
 {
-  const size_t pad = sizeof(SerdNode) - (n_bytes + 2) % sizeof(SerdNode);
-  return n_bytes + 2 + pad;
+  const size_t pad  = sizeof(SerdNode) - (n_bytes + 2) % sizeof(SerdNode);
+  const size_t size = n_bytes + 2 + pad;
+  assert(size % sizeof(SerdNode) == 0);
+  return size;
+}
+
+SERD_PURE_FUNC static SerdNode*
+serd_node_meta(SerdNode* const node)
+{
+  return node + 1 + (serd_node_pad_size(node->length) / sizeof(SerdNode));
 }
 
-static const SerdNode*
+SERD_PURE_FUNC static const SerdNode*
 serd_node_meta_c(const SerdNode* const node)
 {
   return node + 1 + (serd_node_pad_size(node->length) / sizeof(SerdNode));
 }
 
-static const SerdNode*
+SERD_PURE_FUNC static const SerdNode*
 serd_node_maybe_get_meta_c(const SerdNode* const node)
 {
   return (node->flags & meta_mask) ? serd_node_meta_c(node) : NULL;
@@ -405,21 +428,28 @@ serd_digits(const double abs)
 }
 
 SerdNode*
-serd_new_decimal(const double d, const unsigned frac_digits)
+serd_new_decimal(const double          d,
+                 const unsigned        frac_digits,
+                 const SerdNode* const datatype)
 {
   if (isnan(d) || isinf(d)) {
     return NULL;
   }
 
+  const SerdNode* type       = datatype ? datatype : &serd_xsd_decimal.node;
   const double    abs_d      = fabs(d);
   const unsigned  int_digits = serd_digits(abs_d);
   const size_t    len        = int_digits + frac_digits + 3;
-  SerdNode* const node       = serd_node_malloc(len, 0, SERD_LITERAL);
-  char* const     buf        = serd_node_buffer(node);
-  const double    int_part   = floor(abs_d);
+  const size_t    type_len   = serd_node_total_size(type);
+  const size_t    total_len  = len + type_len;
+
+  SerdNode* const node =
+    serd_node_malloc(total_len, SERD_HAS_DATATYPE, SERD_LITERAL);
 
   // Point s to decimal point location
-  char* s = buf + int_digits;
+  char* const  buf      = serd_node_buffer(node);
+  const double int_part = floor(abs_d);
+  char*        s        = buf + int_digits;
   if (d < 0.0) {
     *buf = '-';
     ++s;
@@ -457,19 +487,24 @@ serd_new_decimal(const double d, const unsigned frac_digits)
     }
   }
 
+  memcpy(serd_node_meta(node), type, type_len);
   return node;
 }
 
 SerdNode*
-serd_new_integer(const int64_t i)
+serd_new_integer(const int64_t i, const SerdNode* const datatype)
 {
-  uint64_t       abs_i  = (uint64_t)((i < 0) ? -i : i);
-  const unsigned digits = serd_digits((double)abs_i);
-  SerdNode*      node   = serd_node_malloc(digits + 2, 0, SERD_LITERAL);
-  char*          buf    = serd_node_buffer(node);
+  const SerdNode* type      = datatype ? datatype : &serd_xsd_integer.node;
+  uint64_t        abs_i     = (uint64_t)((i < 0) ? -i : i);
+  const unsigned  digits    = serd_digits((double)abs_i);
+  const size_t    type_len  = serd_node_total_size(type);
+  const size_t    total_len = digits + 2 + type_len;
+
+  SerdNode* node = serd_node_malloc(total_len, SERD_HAS_DATATYPE, SERD_LITERAL);
 
   // Point s to the end
-  char* s = buf + digits - 1;
+  char* buf = serd_node_buffer(node);
+  char* s   = buf + digits - 1;
   if (i < 0) {
     *buf = '-';
     ++s;
@@ -482,25 +517,35 @@ serd_new_integer(const int64_t i)
     *s-- = (char)('0' + (abs_i % 10));
   } while ((abs_i /= 10) > 0);
 
+  memcpy(serd_node_meta(node), type, type_len);
   return node;
 }
 
 SerdNode*
-serd_new_blob(const void* const buf, const size_t size, const bool wrap_lines)
+serd_new_blob(const void* const     buf,
+              const size_t          size,
+              const bool            wrap_lines,
+              const SerdNode* const datatype)
 {
   if (!buf || !size) {
     return NULL;
   }
 
-  const size_t    len  = serd_base64_get_length(size, wrap_lines);
-  SerdNode* const node = serd_node_malloc(len + 1, 0, SERD_LITERAL);
-  uint8_t* const  str  = (uint8_t*)serd_node_buffer(node);
+  const SerdNode* type      = datatype ? datatype : &serd_xsd_base64Binary.node;
+  const size_t    len       = serd_base64_get_length(size, wrap_lines);
+  const size_t    type_len  = serd_node_total_size(type);
+  const size_t    total_len = len + 1 + type_len;
+
+  SerdNode* const node =
+    serd_node_malloc(total_len, SERD_HAS_DATATYPE, SERD_LITERAL);
 
+  uint8_t* str = (uint8_t*)serd_node_buffer(node);
   if (serd_base64_encode(str, buf, size, wrap_lines)) {
     node->flags |= SERD_HAS_NEWLINE;
   }
 
   node->length = len;
+  memcpy(serd_node_meta(node), type, type_len);
   return node;
 }
 
@@ -537,13 +582,6 @@ serd_node_uri_view(const SerdNode* const node)
                                   : SERD_URI_NULL;
 }
 
-SERD_PURE_FUNC static const SerdNode*
-serd_node_meta_node(const SerdNode* node)
-{
-  const size_t len = serd_node_pad_size(node->length);
-  return node + 1 + (len / sizeof(SerdNode));
-}
-
 const SerdNode*
 serd_node_datatype(const SerdNode* const node)
 {
@@ -551,7 +589,7 @@ serd_node_datatype(const SerdNode* const node)
     return NULL;
   }
 
-  const SerdNode* const datatype = serd_node_meta_node(node);
+  const SerdNode* const datatype = serd_node_meta_c(node);
   assert(datatype->type == SERD_URI || datatype->type == SERD_CURIE);
   return datatype;
 }
@@ -563,7 +601,7 @@ serd_node_language(const SerdNode* const node)
     return NULL;
   }
 
-  const SerdNode* const lang = serd_node_meta_node(node);
+  const SerdNode* const lang = serd_node_meta_c(node);
   assert(lang->type == SERD_LITERAL);
   return lang;
 }
diff --git a/test/test_node.c b/test/test_node.c
index 68bdf784..3cdff51a 100644
--- a/test/test_node.c
+++ b/test/test_node.c
@@ -25,6 +25,8 @@
 #  define NAN (INFINITY - INFINITY)
 #endif
 
+#define NS_XSD "http://www.w3.org/2001/XMLSchema#"
+
 static void
 test_strtod(double dbl, double max_delta)
 {
@@ -82,14 +84,23 @@ test_double_to_node(void)
                                  NULL};
 
   for (size_t i = 0; i < sizeof(dbl_test_nums) / sizeof(double); ++i) {
-    SerdNode*   node     = serd_new_decimal(dbl_test_nums[i], 8);
+    SerdNode*   node     = serd_new_decimal(dbl_test_nums[i], 8, NULL);
     const char* node_str = node ? serd_node_string(node) : NULL;
     const bool  pass     = (node_str && dbl_test_strs[i])
                              ? !strcmp(node_str, dbl_test_strs[i])
                              : (node_str == dbl_test_strs[i]);
     assert(pass);
-    assert(!node || serd_node_length(node) == strlen(node_str));
-    serd_node_free(node);
+
+    const size_t len = node_str ? strlen(node_str) : 0;
+    assert((!node && len == 0) || serd_node_length(node) == len);
+
+    if (node) {
+      const SerdNode* const datatype = serd_node_datatype(node);
+      assert(datatype);
+      assert(!dbl_test_strs[i] ||
+             !strcmp(serd_node_string(datatype), NS_XSD "decimal"));
+      serd_node_free(node);
+    }
   }
 }
 
@@ -104,10 +115,15 @@ test_integer_to_node(void)
     "0", "0", "-23", "23", "-12340", "1000", "-1000"};
 
   for (size_t i = 0; i < N_TEST_NUMS; ++i) {
-    SerdNode*   node     = serd_new_integer(int_test_nums[i]);
+    SerdNode*   node     = serd_new_integer(int_test_nums[i], NULL);
     const char* node_str = serd_node_string(node);
     assert(!strcmp(node_str, int_test_strs[i]));
-    assert(serd_node_length(node) == strlen(node_str));
+    const size_t len = strlen(node_str);
+    assert(serd_node_length(node) == len);
+
+    const SerdNode* const datatype = serd_node_datatype(node);
+    assert(datatype);
+    assert(!strcmp(serd_node_string(datatype), NS_XSD "integer"));
     serd_node_free(node);
   }
 
@@ -117,7 +133,7 @@ test_integer_to_node(void)
 static void
 test_blob_to_node(void)
 {
-  assert(!serd_new_blob(&SERD_URI_NULL, 0, false));
+  assert(!serd_new_blob(&SERD_URI_NULL, 0, false, NULL));
 
   for (size_t size = 1; size < 256; ++size) {
     uint8_t* const data = (uint8_t*)malloc(size);
@@ -126,7 +142,7 @@ test_blob_to_node(void)
     }
 
     size_t      out_size = 0;
-    SerdNode*   blob     = serd_new_blob(data, size, size % 5);
+    SerdNode*   blob     = serd_new_blob(data, size, size % 5, NULL);
     const char* blob_str = serd_node_string(blob);
     uint8_t*    out =
       (uint8_t*)serd_base64_decode(blob_str, serd_node_length(blob), &out_size);
@@ -138,6 +154,10 @@ test_blob_to_node(void)
       assert(out[i] == data[i]);
     }
 
+    const SerdNode* const datatype = serd_node_datatype(blob);
+    assert(datatype);
+    assert(!strcmp(serd_node_string(datatype), NS_XSD "base64Binary"));
+
     serd_node_free(blob);
     serd_free(out);
     free(data);
-- 
cgit v1.2.1