From 4711fdf527f416faee8ff19e15f050d4b48dcfb2 Mon Sep 17 00:00:00 2001 From: David Robillard Date: Fri, 5 May 2023 09:43:57 -0400 Subject: [WIP] Generalize node construction API --- include/serd/node.h | 432 ++++++++++++++++++++++++++++++++++---------------- include/serd/serd.h | 1 + include/serd/string.h | 18 +-- include/serd/value.h | 118 ++++++++++++++ 4 files changed, 419 insertions(+), 150 deletions(-) create mode 100644 include/serd/value.h (limited to 'include') diff --git a/include/serd/node.h b/include/serd/node.h index f42f07f1..d140e4c0 100644 --- a/include/serd/node.h +++ b/include/serd/node.h @@ -8,6 +8,7 @@ #include "serd/memory.h" #include "serd/string_view.h" #include "serd/uri.h" +#include "serd/value.h" #include "serd/write_result.h" #include "zix/attributes.h" @@ -107,168 +108,324 @@ typedef uint32_t SerdNodeFlags; /** @} - @defgroup serd_node_dynamic_allocation Dynamic Allocation - @{ -*/ + @defgroup serd_node_construction_arguments Arguments -/** - Create a new simple "token" node. + A unified representation of the arguments needed to specify any node. + + Since there are several types of node, and a node can be constructed in + memory in several ways, the API for specifying node arguments is separate + from the APIs for actually creating nodes. This prevents a combinatorial + API explosion by allowing functions that create or access nodes to have a + single parameter that describes the node. - A "token" is a node that isn't a typed or tagged literal. This can be used - to create URIs, blank nodes, CURIEs, and simple string literals. + Arguments constructors like #serd_a_file_uri return a temporary view of + their arguments, which can be passed (usually inline) to node construction + functions like #serd_node_new, or #serd_node_construct. + + @{ */ -SERD_API SerdNode* ZIX_ALLOCATED -serd_new_token(SerdAllocator* ZIX_NULLABLE allocator, - SerdNodeType type, - SerdStringView string); -/** - Create a new string literal node. +/// The type of a #SerdNodeArgs +typedef enum { + SERD_NODE_ARGS_TOKEN, ///< A token @see #serd_a_token + SERD_NODE_ARGS_PARSED_URI, ///< A parsed URI @see #serd_a_parsed_uri + SERD_NODE_ARGS_FILE_URI, ///< A file URI @see #serd_a_file_uri + SERD_NODE_ARGS_LITERAL, ///< A literal @see #serd_a_literal + SERD_NODE_ARGS_PRIMITIVE, ///< A "native" primitive @see #serd_a_primitive + SERD_NODE_ARGS_DECIMAL, ///< A decimal number @see #serd_a_decimal + SERD_NODE_ARGS_INTEGER, ///< An integer number @see #serd_a_integer + SERD_NODE_ARGS_HEX, ///< A hex-encoded blob @see #serd_a_hex + SERD_NODE_ARGS_BASE64, ///< A base64-encoded blob @see #serd_a_base64 +} SerdNodeArgsType; + +/// The data for #SERD_NODE_ARGS_TOKEN +typedef struct { + SerdNodeType type; + SerdStringView string; +} SerdNodeTokenArgs; + +/// The data for #SERD_NODE_ARGS_PARSED_URI +typedef struct { + SerdURIView uri; +} SerdNodeParsedURIArgs; + +/// The data for #SERD_NODE_ARGS_FILE_URI +typedef struct { + SerdStringView path; + SerdStringView hostname; +} SerdNodeFileURIArgs; + +/// The data for #SERD_NODE_ARGS_LITERAL +typedef struct { + SerdStringView string; + SerdNodeFlags flags; + SerdStringView meta; +} SerdNodeLiteralArgs; + +/// The data for #SERD_NODE_ARGS_PRIMITIVE +typedef struct { + SerdValue value; +} SerdNodePrimitiveArgs; + +/// The data for #SERD_NODE_ARGS_DECIMAL +typedef struct { + double value; +} SerdNodeDecimalArgs; + +/// The data for #SERD_NODE_ARGS_INTEGER +typedef struct { + int64_t value; +} SerdNodeIntegerArgs; + +/// The data for #SERD_NODE_ARGS_HEX or #SERD_NODE_ARGS_BASE64 +typedef struct { + size_t size; + const void* ZIX_NONNULL data; +} SerdNodeBlobArgs; + +/// The data of a #SerdNodeArgs +typedef union { + SerdNodeTokenArgs as_token; + SerdNodeParsedURIArgs as_parsed_uri; + SerdNodeFileURIArgs as_file_uri; + SerdNodeLiteralArgs as_literal; + SerdNodePrimitiveArgs as_primitive; + SerdNodeDecimalArgs as_decimal; + SerdNodeIntegerArgs as_integer; + SerdNodeBlobArgs as_blob; +} SerdNodeArgsData; + +/// Arguments for constructing a node +typedef struct { + SerdNodeArgsType type; ///< Type of node described and valid field of `data` + SerdNodeArgsData data; ///< Data union +} SerdNodeArgs; + +/** + A simple "token" node. + + "Token" is just a shorthand used in this API to refer to a node that is not + a typed or tagged literal, that is, a node that is just one string. This + can be used to create URIs, blank nodes, variables, and simple string + literals. + + Note that string literals constructed with this function will have no flags + set, and so will be written as "short" literals (not triple-quoted). To + construct long literals, use the more advanced serd_a_literal() with the + #SERD_IS_LONG flag. +*/ +SERD_CONST_API SerdNodeArgs +serd_a_token(SerdNodeType type, SerdStringView string); + +/// A URI node from a parsed URI +SERD_CONST_API SerdNodeArgs +serd_a_parsed_uri(SerdURIView uri); + +/// A file URI node from a path and optional hostname +SERD_CONST_API SerdNodeArgs +serd_a_file_uri(SerdStringView path, SerdStringView hostname); + +/** + A literal node with an optional datatype or language. + + Either a datatype (which must be an absolute URI) or a language (which must + be an RFC5646 language tag) may be given, but not both. + + This is the most general literal constructor, which can be used to construct + any literal node. + + @param string The string body of the node. + + @param flags Flags that describe the details of the node. + + @param meta The string value of the literal's metadata. If + #SERD_HAS_DATATYPE is set, then this must be an absolute datatype URI. If + #SERD_HAS_LANGUAGE is set, then this must be a language tag like "en-ca". + Otherwise, it is ignored. */ -SERD_API SerdNode* ZIX_ALLOCATED -serd_new_string(SerdAllocator* ZIX_NULLABLE allocator, SerdStringView string); +SERD_CONST_API SerdNodeArgs +serd_a_literal(SerdStringView string, SerdNodeFlags flags, SerdStringView meta); -/** - Create a new literal node with optional datatype or language. +/// A simple string literal node from a string view +ZIX_CONST_FUNC static inline SerdNodeArgs +serd_a_string_view(SerdStringView string) +{ + return serd_a_token(SERD_LITERAL, string); +} - This can create more complex literals than serd_new_string() with an - associated datatype URI or language tag, as well as control whether a - literal should be written as a short or long (triple-quoted) string. +/// A simple string literal node from a C string +ZIX_CONST_FUNC static inline SerdNodeArgs +serd_a_string(const char* ZIX_NONNULL string) +{ + return serd_a_string_view(serd_string(string)); +} - @param allocator Allocator for the returned node. +/// A blank node from a string view +ZIX_CONST_FUNC static inline SerdNodeArgs +serd_a_blank(SerdStringView name) +{ + return serd_a_token(SERD_BLANK, name); +} - @param string The string value of the literal. +/// A blank node from a string +ZIX_CONST_FUNC static inline SerdNodeArgs +serd_a_blank_string(const char* ZIX_NONNULL name) +{ + return serd_a_blank(serd_string(name)); +} - @param flags Flags to describe the literal and its metadata. This must be a - valid combination of flags, in particular, at most one of #SERD_HAS_DATATYPE - and #SERD_HAS_LANGUAGE may be set. +/// A URI node from a string view +ZIX_CONST_FUNC static inline SerdNodeArgs +serd_a_uri(SerdStringView uri) +{ + return serd_a_token(SERD_URI, uri); +} - @param meta The string value of the literal's metadata. If - #SERD_HAS_DATATYPE is set, then this must be an absolute datatype URI. If - #SERD_HAS_LANGUAGE is set, then this must be a language tag like "en-ca". - Otherwise, it is ignored. +/** + A URI node from a string. - @return A newly allocated literal node that must be freed with - serd_node_free(), or null if the arguments are invalid or allocation failed. + @param uri The URI string. */ -SERD_API SerdNode* ZIX_ALLOCATED -serd_new_literal(SerdAllocator* ZIX_NULLABLE allocator, - SerdStringView string, - SerdNodeFlags flags, - SerdStringView meta); +ZIX_CONST_FUNC static inline SerdNodeArgs +serd_a_uri_string(const char* ZIX_NONNULL uri) +{ + return serd_a_uri(serd_string(uri)); +} /** - Create a new node from a blank node label. + A literal node with a datatype. + + @param string The string body of the node. + @param datatype The absolute URI of the datatype. */ -SERD_API SerdNode* ZIX_ALLOCATED -serd_new_blank(SerdAllocator* ZIX_NULLABLE allocator, SerdStringView string); +ZIX_CONST_FUNC static inline SerdNodeArgs +serd_a_typed_literal(const SerdStringView string, const SerdStringView datatype) +{ + return serd_a_literal(string, SERD_HAS_DATATYPE, datatype); +} /** - Create a new URI node from a parsed URI. + A literal node with a language. + + @param string The string body of the node. + @param language A language tag like "en-ca". */ -SERD_API SerdNode* ZIX_ALLOCATED -serd_new_parsed_uri(SerdAllocator* ZIX_NULLABLE allocator, SerdURIView uri); +ZIX_CONST_FUNC static inline SerdNodeArgs +serd_a_plain_literal(const SerdStringView string, const SerdStringView language) +{ + return serd_a_literal(string, SERD_HAS_LANGUAGE, language); +} /** - Create a new URI node from a string. + A canonical literal for a primitive value. + + The node will be a typed literal in canonical form for the xsd datatype + corresponding to the value. */ -SERD_API SerdNode* ZIX_ALLOCATED -serd_new_uri(SerdAllocator* ZIX_NULLABLE allocator, SerdStringView string); +SERD_CONST_API SerdNodeArgs +serd_a_primitive(SerdValue value); /** - Create a new file URI node from a file system path and optional hostname. + A canonical xsd:decimal literal. - Backslashes in Windows paths will be converted, and other characters will be - percent encoded as necessary. + The node will be an xsd:decimal literal, like "12.34", with datatype + xsd:decimal. - If `path` is relative, `hostname` is ignored. + The node will always contain a '.', start with a digit, and end with a digit + (a leading and/or trailing '0' will be added if necessary), for example, + "1.0". It will never be in scientific notation. */ -SERD_API SerdNode* ZIX_ALLOCATED -serd_new_file_uri(SerdAllocator* ZIX_NULLABLE allocator, - SerdStringView path, - SerdStringView hostname); +SERD_CONST_API SerdNodeArgs +serd_a_decimal(double value); /** - Create a new canonical xsd:boolean node. + A canonical xsd:integer literal. + + The node will be an xsd:integer literal like "1234", with datatype + xsd:integer. */ -SERD_API SerdNode* ZIX_ALLOCATED -serd_new_boolean(SerdAllocator* ZIX_NULLABLE allocator, bool b); +SERD_CONST_API SerdNodeArgs +serd_a_integer(int64_t value); /** - Create a new canonical xsd:decimal literal. + A canonical xsd:hexBinary literal. - The node will be an xsd:decimal literal, like "12.34", with - datatype xsd:decimal by default, or a custom datatype. + The node will be an xsd:hexBinary literal like "534D", with datatype + xsd:hexBinary. +*/ +SERD_CONST_API SerdNodeArgs +serd_a_hex(size_t size, const void* ZIX_NONNULL data); - The node will always contain a '.', start with a digit, and end with a digit - (a leading and/or trailing '0' will be added if necessary), for example, - "1.0". It will never be in scientific notation. +/** + A canonical xsd:base64Binary literal. - @param allocator Allocator for the returned node. - @param d The value for the new node. - @param datatype Datatype of node, or NULL for xsd:decimal. + The node will be an xsd:base64Binary literal like "Zm9vYmFy", with datatype + xsd:base64Binary. */ -SERD_API SerdNode* ZIX_ALLOCATED -serd_new_decimal(SerdAllocator* ZIX_NULLABLE allocator, - double d, - const SerdNode* ZIX_NULLABLE datatype); +SERD_CONST_API SerdNodeArgs +serd_a_base64(size_t size, const void* ZIX_NONNULL data); /** - Create a new canonical xsd:double literal. + @} + @defgroup serd_node_construction Construction - The node will be in scientific notation, like "1.23E4", except for NaN and - negative/positive infinity, which are "NaN", "-INF", and "INF", - respectively. + This is the low-level node construction API, which can be used to construct + nodes into existing buffers. Advanced applications can use this to + specially manage node memory, for example by allocating nodes on the stack, + or with a special allocator. - Uses the shortest possible representation that precisely describes the - value, which has at most 17 significant digits (under 24 characters total). + Note that nodes are "plain old data", so there is no need to destroy a + constructed node, and nodes may be trivially copied, for example with + memcpy(). - @param allocator Allocator for the returned node. - @param d Double value to write. - @return A literal node with datatype xsd:double. + @{ */ -SERD_API SerdNode* ZIX_ALLOCATED -serd_new_double(SerdAllocator* ZIX_NULLABLE allocator, double d); /** - Create a new canonical xsd:float literal. + Construct a node into an existing buffer. - Uses identical formatting to serd_new_double(), except with at most 9 - significant digits (under 14 characters total). + This is the universal node constructor which can construct any node. The + type of node is specified in a #SerdNodeArgs tagged union, to avoid API + bloat and allow this function to be used with data-based dispatch. - @param allocator Allocator for the returned node. - @param f Float value of literal. - @return A literal node with datatype xsd:float. + This function may also be used to determine the size of buffer required by + passing a null buffer with zero size. + + @param buf_size The size of `buf` in bytes, or zero to only measure. + + @param buf Buffer where the node will be written, or null to only measure. + + @param args Arguments describing the node to construct. + + @return A result with a `status` and a `count` of bytes written. If the + buffer is too small for the node, then `status` will be #SERD_OVERFLOW, and + `count` will be set to the number of bytes required to successfully + construct the node. */ -SERD_API SerdNode* ZIX_ALLOCATED -serd_new_float(SerdAllocator* ZIX_NULLABLE allocator, float f); +SERD_API SerdWriteResult +serd_node_construct(size_t buf_size, void* ZIX_NULLABLE buf, SerdNodeArgs args); /** - Create a new canonical xsd:integer literal. + @} + @defgroup serd_node_dynamic_allocation Dynamic Allocation - The node will be an xsd:integer literal like "1234", with datatype - xsd:integer. + This is a convenient higher-level node construction API which allocates + nodes with an allocator. The returned nodes must be freed with + serd_node_free() using the same allocator. - @param allocator Allocator for the returned node. - @param i Integer value of literal. + @{ */ -SERD_API SerdNode* ZIX_ALLOCATED -serd_new_integer(SerdAllocator* ZIX_NULLABLE allocator, int64_t i); /** - Create a new canonical xsd:base64Binary literal. + Create a new node. - This function can be used to make a node out of arbitrary binary data, which - can be decoded using serd_base64_decode(). + This allocates and constructs a new node of any type. - @param allocator Allocator for the returned node. - @param buf Raw binary data to encode in node. - @param size Size of `buf` in bytes. + @return A newly allocated node that must be freed with serd_node_free(), or + null. */ SERD_API SerdNode* ZIX_ALLOCATED -serd_new_base64(SerdAllocator* ZIX_NULLABLE allocator, - const void* ZIX_NONNULL buf, - size_t size); +serd_node_new(SerdAllocator* ZIX_NULLABLE allocator, SerdNodeArgs args); /** Return a deep copy of `node`. @@ -353,45 +510,37 @@ SERD_PURE_API const SerdNode* ZIX_NULLABLE serd_node_language(const SerdNode* ZIX_NONNULL node); /** - Return the value of `node` as a boolean. + Return the primitive value of a literal node. - This will work for booleans, and numbers of any datatype if they are 0 or - 1. + This will return a typed numeric value if the node can be read as one, or + nothing otherwise. - @return The value of `node` as a `bool`, or `false` on error. + @return The primitive value of `node`, if possible and supported. */ -SERD_API bool -serd_get_boolean(const SerdNode* ZIX_NONNULL node); +SERD_API SerdValue +serd_node_value(const SerdNode* ZIX_NONNULL node); /** - Return the value of `node` as a double. + Return the primitive value of a node as a specific type of number. - This will coerce numbers of any datatype to double, if the value fits. + This is like serd_node_value(), but will coerce the value of the node to the + requested type if possible. - @return The value of `node` as a `double`, or NaN on error. -*/ -SERD_API double -serd_get_double(const SerdNode* ZIX_NONNULL node); + @param node The node to interpret as a number. -/** - Return the value of `node` as a float. + @param type The desired numeric datatype of the result. - This will coerce numbers of any datatype to float, if the value fits. + @param lossy Whether lossy conversions can be used. If this is false, then + this function only succeeds if the value could be converted back to the + original datatype of the node without loss. Otherwise, precision may be + reduced or values may be truncated to fit the result. - @return The value of `node` as a `float`, or NaN on error. + @return The value of `node` as a #SerdValue, or nothing. */ -SERD_API float -serd_get_float(const SerdNode* ZIX_NONNULL node); - -/** - Return the value of `node` as a long (signed 64-bit integer). - - This will coerce numbers of any datatype to long, if the value fits. - - @return The value of `node` as a `int64_t`, or 0 on error. -*/ -SERD_API int64_t -serd_get_integer(const SerdNode* ZIX_NONNULL node); +SERD_API SerdValue +serd_node_value_as(const SerdNode* ZIX_NONNULL node, + SerdValueType type, + bool lossy); /** Return the maximum size of a decoded binary node in bytes. @@ -400,16 +549,21 @@ serd_get_integer(const SerdNode* ZIX_NONNULL node); decode to. This is calculated as a simple constant-time arithmetic expression based on the length of the encoded string, so may be larger than the actual size of the data due to things like additional whitespace. + + @return The size of the decoded hex or base64 blob `node`, or zero if it + does not have datatype or + . */ SERD_PURE_API size_t -serd_get_base64_size(const SerdNode* ZIX_NONNULL node); +serd_node_decoded_size(const SerdNode* ZIX_NONNULL node); /** - Decode a base64 node. + Decode a binary (base64 or hex) node. - This function can be used to decode a node created with serd_new_base64(). + This function can be used to decode a node created with serd_a_base64() or + serd_a_hex() and retrieve the original unencoded binary data. - @param node A literal node which is an encoded base64 string. + @param node A literal node which is an encoded base64 or hex string. @param buf_size The size of `buf` in bytes. @@ -420,9 +574,9 @@ serd_get_base64_size(const SerdNode* ZIX_NONNULL node); along with the number of bytes required for successful decoding. */ SERD_API SerdWriteResult -serd_get_base64(const SerdNode* ZIX_NONNULL node, - size_t buf_size, - void* ZIX_NONNULL buf); +serd_node_decode(const SerdNode* ZIX_NONNULL node, + size_t buf_size, + void* ZIX_NONNULL buf); /** @} diff --git a/include/serd/serd.h b/include/serd/serd.h index 6103c543..88be5daa 100644 --- a/include/serd/serd.h +++ b/include/serd/serd.h @@ -61,6 +61,7 @@ #include "serd/node.h" #include "serd/statement.h" #include "serd/uri.h" +#include "serd/value.h" /** @} diff --git a/include/serd/string.h b/include/serd/string.h index 11e55e6e..028a1a29 100644 --- a/include/serd/string.h +++ b/include/serd/string.h @@ -18,19 +18,15 @@ SERD_BEGIN_DECLS */ /** - Decode a base64 string. + Compare two strings ignoring case. - This function can be used to decode a node created with serd_new_base64(). - - @param str Base64 string to decode. - @param len The length of `str`. - @param size Set to the size of the returned blob in bytes. - @return A newly allocated blob which must be freed with serd_free(). + @return Less than, equal to, or greater than zero if `s1` is less than, + equal to, or greater than `s2`, respectively. */ -SERD_API void* ZIX_ALLOCATED -serd_base64_decode(const char* ZIX_NONNULL str, - size_t len, - size_t* ZIX_NONNULL size); +SERD_PURE_API int +serd_strncasecmp(const char* ZIX_NONNULL s1, + const char* ZIX_NONNULL s2, + size_t n); /** @} diff --git a/include/serd/value.h b/include/serd/value.h new file mode 100644 index 00000000..a31268e0 --- /dev/null +++ b/include/serd/value.h @@ -0,0 +1,118 @@ +// Copyright 2011-2022 David Robillard +// SPDX-License-Identifier: ISC + +#ifndef SERD_VALUE_H +#define SERD_VALUE_H + +#include "serd/attributes.h" + +#include +#include + +SERD_BEGIN_DECLS + +/** + @defgroup serd_node_value Values + @ingroup serd_data + + Serd supports reading and writing machine-native numbers, called "values", + in a standards-conformant and portable way. The value structure is used in + the API to allow passing and returning a primitive value of any supported + type. Note that this is just an API convenience, literal nodes themselves + always store their values as strings. + + @{ +*/ + +/// The type of a #SerdValue +typedef enum { + SERD_NOTHING, ///< Sentinel for unknown datatypes or errors + SERD_BOOL, ///< xsd:boolean (bool) + SERD_DOUBLE, ///< xsd:double (double) + SERD_FLOAT, ///< xsd:float (float) + SERD_LONG, ///< xsd:long (int64_t) + SERD_INT, ///< xsd:integer (int32_t) + SERD_SHORT, ///< xsd:short (int16_t) + SERD_BYTE, ///< xsd:byte (int8_t) + SERD_ULONG, ///< xsd:unsignedLong (uint64_t) + SERD_UINT, ///< xsd:unsignedInt (uint32_t) + SERD_USHORT, ///< xsd:unsignedShort (uint16_t) + SERD_UBYTE, ///< xsd:unsignedByte (uint8_t) +} SerdValueType; + +/// The data of a #SerdValue (the actual machine-native primitive) +typedef union { + bool as_bool; + double as_double; + float as_float; + int64_t as_long; + int32_t as_int; + int16_t as_short; + int8_t as_byte; + uint64_t as_ulong; + uint32_t as_uint; + uint16_t as_ushort; + uint8_t as_ubyte; +} SerdValueData; + +/// A primitive value with a type tag +typedef struct { + SerdValueType type; + SerdValueData data; +} SerdValue; + +/// Convenience constructor to make a #SERD_NOTHING (non-)value +SERD_CONST_API SerdValue +serd_nothing(void); + +/// Convenience constructor to make a #SERD_BOOL value +SERD_CONST_API SerdValue +serd_bool(bool v); + +/// Convenience constructor to make a #SERD_DOUBLE value +SERD_CONST_API SerdValue +serd_double(double v); + +/// Convenience constructor to make a #SERD_FLOAT value +SERD_CONST_API SerdValue +serd_float(float v); + +/// Convenience constructor to make a #SERD_LONG value +SERD_CONST_API SerdValue +serd_long(int64_t v); + +/// Convenience constructor to make a #SERD_INT value +SERD_CONST_API SerdValue +serd_int(int32_t v); + +/// Convenience constructor to make a #SERD_SHORT value +SERD_CONST_API SerdValue +serd_short(int16_t v); + +/// Convenience constructor to make a #SERD_BYTE value +SERD_CONST_API SerdValue +serd_byte(int8_t v); + +/// Convenience constructor to make a #SERD_ULONG value +SERD_CONST_API SerdValue +serd_ulong(uint64_t v); + +/// Convenience constructor to make a #SERD_UINT value +SERD_CONST_API SerdValue +serd_uint(uint32_t v); + +/// Convenience constructor to make a #SERD_USHORT value +SERD_CONST_API SerdValue +serd_ushort(uint16_t v); + +/// Convenience constructor to make a #SERD_UBYTE value +SERD_CONST_API SerdValue +serd_ubyte(uint8_t v); + +/** + @} +*/ + +SERD_END_DECLS + +#endif // SERD_VALUE_H -- cgit v1.2.1