diff options
47 files changed, 1554 insertions, 977 deletions
diff --git a/.clang-format b/.clang-format index 26b69415..0a3ac765 100644 --- a/.clang-format +++ b/.clang-format @@ -23,6 +23,7 @@ KeepEmptyLinesAtTheStartOfBlocks: false SpacesInContainerLiterals: false AttributeMacros: - SERD_ALLOCATED + - SERD_ALWAYS_INLINE_FUNC - SERD_API - SERD_CONST_FINC - SERD_FALLTHROUGH diff --git a/doc/Doxyfile.in b/doc/Doxyfile.in index 1fd32769..f035564f 100644 --- a/doc/Doxyfile.in +++ b/doc/Doxyfile.in @@ -24,11 +24,14 @@ SHOW_FILES = NO MACRO_EXPANSION = YES PREDEFINED = SERD_ALLOCATED \ + SERD_ALWAYS_INLINE_FUNC= \ SERD_API \ + SERD_CONST_API= \ SERD_CONST_FUNC= \ SERD_NONNULL= \ SERD_NULLABLE= \ - SERD_PURE_FUNC= + SERD_PURE_API= \ + SERD_PURE_FUNC= \ RECURSIVE = YES STRIP_FROM_PATH = @SERD_SRCDIR@ diff --git a/include/serd/attributes.h b/include/serd/attributes.h new file mode 100644 index 00000000..4e0d68e2 --- /dev/null +++ b/include/serd/attributes.h @@ -0,0 +1,83 @@ +// Copyright 2011-2022 David Robillard <d@drobilla.net> +// SPDX-License-Identifier: ISC + +#ifndef SERD_ATTRIBUTES_H +#define SERD_ATTRIBUTES_H + +/** + @defgroup serd_attributes Attributes + @ingroup serd_library + @{ +*/ + +#ifdef __cplusplus +# ifdef __GNUC__ +# define SERD_BEGIN_DECLS \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wzero-as-null-pointer-constant\"") \ + extern "C" { +# define SERD_END_DECLS \ + } \ + _Pragma("GCC diagnostic pop") +# else +# define SERD_BEGIN_DECLS extern "C" { +# define SERD_END_DECLS } +# endif +#else +# define SERD_BEGIN_DECLS +# define SERD_END_DECLS +#endif + +// SERD_API must be used to decorate things in the public API +#ifndef SERD_API +# if defined(_WIN32) && !defined(SERD_STATIC) && defined(SERD_INTERNAL) +# define SERD_API __declspec(dllexport) +# elif defined(_WIN32) && !defined(SERD_STATIC) +# define SERD_API __declspec(dllimport) +# elif defined(__GNUC__) +# define SERD_API __attribute__((visibility("default"))) +# else +# define SERD_API +# endif +#endif + +// GCC function attributes +#ifdef __GNUC__ +# define SERD_ALWAYS_INLINE_FUNC __attribute__((always_inline)) +# define SERD_CONST_FUNC __attribute__((const)) +# define SERD_LOG_FUNC(fmt, arg1) __attribute__((format(printf, fmt, arg1))) +# define SERD_MALLOC_FUNC __attribute__((malloc)) +# define SERD_NODISCARD __attribute__((warn_unused_result)) +# define SERD_PURE_FUNC __attribute__((pure)) +#else +# define SERD_ALWAYS_INLINE_FUNC ///< Should absolutely always be inlined +# define SERD_CONST_FUNC ///< Only reads its parameters +# define SERD_LOG_FUNC(fmt, arg1) ///< Has printf-like parameters +# define SERD_MALLOC_FUNC ///< Allocates memory +# define SERD_NODISCARD ///< Returns a value that must be used +# define SERD_PURE_FUNC ///< Only reads memory +#endif + +// Clang nullability annotations +#if defined(__clang__) && __clang_major__ >= 7 +# define SERD_NONNULL _Nonnull +# define SERD_NULLABLE _Nullable +# define SERD_ALLOCATED _Null_unspecified +#else +# define SERD_NONNULL ///< A non-null pointer +# define SERD_NULLABLE ///< A nullable pointer +# define SERD_ALLOCATED ///< An allocated (possibly null) pointer +# define SERD_UNSPECIFIED ///< A pointer with unspecified nullability +#endif + +/// A pure function in the public API that only reads memory +#define SERD_PURE_API SERD_API SERD_PURE_FUNC + +/// A const function in the public API that is pure and only reads parameters +#define SERD_CONST_API SERD_API SERD_CONST_FUNC + +/** + @} +*/ + +#endif // SERD_ATTRIBUTES_H diff --git a/include/serd/buffer.h b/include/serd/buffer.h new file mode 100644 index 00000000..90b51a48 --- /dev/null +++ b/include/serd/buffer.h @@ -0,0 +1,56 @@ +// Copyright 2011-2022 David Robillard <d@drobilla.net> +// SPDX-License-Identifier: ISC + +#ifndef SERD_BUFFER_H +#define SERD_BUFFER_H + +#include "serd/attributes.h" + +#include <stddef.h> + +SERD_BEGIN_DECLS + +/** + @defgroup serd_buffer Dynamic Memory Buffers + @ingroup serd_memory + + The #SerdBuffer type represents a writable area of memory with a known size. + + @{ +*/ + +/// A mutable buffer in memory +typedef struct { + void* SERD_NULLABLE buf; ///< Buffer + size_t len; ///< Size of buffer in bytes +} SerdBuffer; + +/** + A convenience sink function for writing to a string. + + This function can be used as a #SerdSink to write to a SerdBuffer which is + resized as necessary with realloc(). The `stream` parameter must point to + an initialized #SerdBuffer. When the write is finished, the string should be + retrieved with serd_buffer_sink_finish(). +*/ +SERD_API size_t +serd_buffer_sink(const void* SERD_NONNULL buf, + size_t len, + void* SERD_NONNULL stream); + +/** + Finish writing to a buffer with serd_buffer_sink(). + + The returned string is the result of the serialisation, which is null + terminated (by this function) and owned by the caller. +*/ +SERD_API char* SERD_NONNULL +serd_buffer_sink_finish(SerdBuffer* SERD_NONNULL stream); + +/** + @} +*/ + +SERD_END_DECLS + +#endif // SERD_BUFFER_H diff --git a/include/serd/env.h b/include/serd/env.h new file mode 100644 index 00000000..906ec2f7 --- /dev/null +++ b/include/serd/env.h @@ -0,0 +1,103 @@ +// Copyright 2011-2022 David Robillard <d@drobilla.net> +// SPDX-License-Identifier: ISC + +#ifndef SERD_ENV_H +#define SERD_ENV_H + +#include "serd/attributes.h" +#include "serd/node.h" +#include "serd/sink.h" +#include "serd/status.h" +#include "serd/string_view.h" +#include "serd/uri.h" + +#include <stdbool.h> + +SERD_BEGIN_DECLS + +/** + @defgroup serd_env Environment + @ingroup serd_streaming + @{ +*/ + +/// Lexical environment for relative URIs or CURIEs (base URI and namespaces) +typedef struct SerdEnvImpl SerdEnv; + +/// Create a new environment +SERD_API SerdEnv* SERD_ALLOCATED +serd_env_new(const SerdNode* SERD_NULLABLE base_uri); + +/// Free `env` +SERD_API void +serd_env_free(SerdEnv* SERD_NULLABLE env); + +/// Get the current base URI +SERD_API const SerdNode* SERD_NONNULL +serd_env_base_uri(const SerdEnv* SERD_NONNULL env, + SerdURIView* SERD_NULLABLE out); + +/// Set the current base URI +SERD_API SerdStatus +serd_env_set_base_uri(SerdEnv* SERD_NONNULL env, + const SerdNode* SERD_NULLABLE uri); + +/** + Set a namespace prefix. + + A namespace prefix is used to expand CURIE nodes, for example, with the + prefix "xsd" set to "http://www.w3.org/2001/XMLSchema#", "xsd:decimal" will + expand to "http://www.w3.org/2001/XMLSchema#decimal". +*/ +SERD_API SerdStatus +serd_env_set_prefix(SerdEnv* SERD_NONNULL env, + const SerdNode* SERD_NONNULL name, + const SerdNode* SERD_NONNULL uri); + +/// Set a namespace prefix +SERD_API SerdStatus +serd_env_set_prefix_from_strings(SerdEnv* SERD_NONNULL env, + const char* SERD_NONNULL name, + const char* SERD_NONNULL uri); + +/// Qualify `uri` into a CURIE if possible +SERD_API bool +serd_env_qualify(const SerdEnv* SERD_NULLABLE env, + const SerdNode* SERD_NONNULL uri, + SerdNode* SERD_NONNULL prefix, + SerdStringView* SERD_NONNULL suffix); + +/** + Expand `curie`. + + Errors: SERD_ERR_BAD_ARG if `curie` is not valid, or SERD_ERR_BAD_CURIE if + prefix is not defined in `env`. +*/ +SERD_API SerdStatus +serd_env_expand(const SerdEnv* SERD_NULLABLE env, + const SerdNode* SERD_NONNULL curie, + SerdStringView* SERD_NONNULL uri_prefix, + SerdStringView* SERD_NONNULL uri_suffix); + +/** + Expand `node`, which must be a CURIE or URI, to a full URI. + + Returns null if `node` can not be expanded. +*/ +SERD_API SerdNode +serd_env_expand_node(const SerdEnv* SERD_NULLABLE env, + const SerdNode* SERD_NONNULL node); + +/// Call `func` for each prefix defined in `env` +SERD_API void +serd_env_foreach(const SerdEnv* SERD_NONNULL env, + SerdPrefixFunc SERD_NONNULL func, + void* SERD_NULLABLE handle); + +/** + @} +*/ + +SERD_END_DECLS + +#endif // SERD_ENV_H diff --git a/include/serd/error.h b/include/serd/error.h new file mode 100644 index 00000000..7be1d372 --- /dev/null +++ b/include/serd/error.h @@ -0,0 +1,45 @@ +// Copyright 2011-2022 David Robillard <d@drobilla.net> +// SPDX-License-Identifier: ISC + +#ifndef SERD_ERROR_H +#define SERD_ERROR_H + +#include "serd/attributes.h" +#include "serd/status.h" + +#include <stdarg.h> + +SERD_BEGIN_DECLS + +/** + @defgroup serd_error Error reporting + @ingroup serd_errors + @{ +*/ + +/// An error description +typedef struct { + SerdStatus status; ///< Error code + const char* SERD_NULLABLE filename; ///< File with error + unsigned line; ///< Line in file with error or 0 + unsigned col; ///< Column in file with error + const char* SERD_NONNULL fmt; ///< Printf-style format string + va_list* SERD_NONNULL args; ///< Arguments for fmt +} SerdError; + +/** + Callback function for errors. + + @param handle Handle for user data. + @param error Error description. +*/ +typedef SerdStatus (*SerdErrorFunc)(void* SERD_NULLABLE handle, + const SerdError* SERD_NONNULL error); + +/** + @} +*/ + +SERD_END_DECLS + +#endif // SERD_ERROR_H diff --git a/include/serd/memory.h b/include/serd/memory.h new file mode 100644 index 00000000..1bbd649c --- /dev/null +++ b/include/serd/memory.h @@ -0,0 +1,33 @@ +// Copyright 2011-2022 David Robillard <d@drobilla.net> +// SPDX-License-Identifier: ISC + +#ifndef SERD_MEMORY_H +#define SERD_MEMORY_H + +#include "serd/attributes.h" + +SERD_BEGIN_DECLS + +/** + @defgroup serd_allocator Allocator + @ingroup serd_memory + @{ +*/ + +/** + Free memory allocated by Serd. + + This function exists because some systems require memory allocated by a + library to be freed by code in the same library. It is otherwise equivalent + to the standard C free() function. +*/ +SERD_API void +serd_free(void* SERD_NULLABLE ptr); + +/** + @} +*/ + +SERD_END_DECLS + +#endif // SERD_MEMORY_H diff --git a/include/serd/node.h b/include/serd/node.h new file mode 100644 index 00000000..8f85a8d7 --- /dev/null +++ b/include/serd/node.h @@ -0,0 +1,243 @@ +// Copyright 2011-2022 David Robillard <d@drobilla.net> +// SPDX-License-Identifier: ISC + +#ifndef SERD_NODE_H +#define SERD_NODE_H + +#include "serd/attributes.h" +#include "serd/uri.h" + +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> + +SERD_BEGIN_DECLS + +/** + @defgroup serd_node Node + @ingroup serd_data + @{ +*/ + +/** + Type of a node. + + An RDF node, in the abstract sense, can be either a resource, literal, or a + blank. This type is more precise, because syntactically there are two ways + to refer to a resource (by URI or CURIE). + + There are also two ways to refer to a blank node in syntax (by ID or + anonymously), but this is handled by statement flags rather than distinct + node types. +*/ +typedef enum { + /** + The type of a nonexistent node. + + This type is useful as a sentinel, but is never emitted by the reader. + */ + SERD_NOTHING = 0, + + /** + Literal value. + + A literal is a string that optionally has either a language, or a datatype + (but never both). Literals can only occur as the object of a statement, + never the subject or predicate. + */ + SERD_LITERAL = 1, + + /** + Universal Resource Identifier (URI). + + A URI (more pedantically, a URI reference) is either a relative reference + with respect to some base URI, like "foo/bar", or an absolute URI with a + scheme, like "http://example.org/foo". + + @see [RFC3986](http://tools.ietf.org/html/rfc3986) + */ + SERD_URI = 2, + + /** + CURIE, a shortened URI. + + Value is an unquoted CURIE string relative to the current environment, + e.g. "rdf:type". @see [CURIE Syntax 1.0](http://www.w3.org/TR/curie) + */ + SERD_CURIE = 3, + + /** + A blank node. + + A blank node is a resource that has no URI. The identifier of a blank + node is local to its context (a document, for example), and so unlike + URIs, blank nodes can't be used to link data across sources. + + @see [RDF 1.1 + Turtle](http://www.w3.org/TR/turtle/#grammar-production-BLANK_NODE_LABEL) + */ + SERD_BLANK = 4, +} SerdNodeType; + +/// Node flags, which ORed together make a #SerdNodeFlags +typedef enum { + SERD_HAS_NEWLINE = 1U << 0U, ///< Contains line breaks ('\\n' or '\\r') + SERD_HAS_QUOTE = 1U << 1U, ///< Contains quotes ('"') +} SerdNodeFlag; + +/// Bitwise OR of #SerdNodeFlag values +typedef uint32_t SerdNodeFlags; + +/// A syntactic RDF node +typedef struct { + const char* SERD_NULLABLE buf; ///< Value string + size_t n_bytes; ///< Size in bytes (excluding null) + SerdNodeFlags flags; ///< Node flags (string properties) + SerdNodeType type; ///< Node type +} SerdNode; + +static const SerdNode SERD_NODE_NULL = {NULL, 0, 0, SERD_NOTHING}; + +/** + Make a (shallow) node from `str`. + + This measures, but does not copy, `str`. No memory is allocated. +*/ +SERD_API SerdNode +serd_node_from_string(SerdNodeType type, const char* SERD_NULLABLE str); + +/** + Make a (shallow) node from a prefix of `str`. + + This measures, but does not copy, `str`. No memory is allocated. + Note that the returned node may not be null terminated. +*/ +SERD_API SerdNode +serd_node_from_substring(SerdNodeType type, + const char* SERD_NULLABLE str, + size_t len); + +/// Simple wrapper for serd_node_new_uri() to resolve a URI node +SERD_API SerdNode +serd_node_new_uri_from_node(const SerdNode* SERD_NONNULL uri_node, + const SerdURIView* SERD_NULLABLE base, + SerdURIView* SERD_NULLABLE out); + +/// Simple wrapper for serd_node_new_uri() to resolve a URI string +SERD_API SerdNode +serd_node_new_uri_from_string(const char* SERD_NULLABLE str, + const SerdURIView* SERD_NULLABLE base, + SerdURIView* SERD_NULLABLE out); + +/** + Create a new file URI node from a file system path and optional hostname. + + Backslashes in Windows paths will be converted, and other characters will be + percent encoded as necessary. + + If `path` is relative, `hostname` is ignored. + If `out` is not NULL, it will be set to the parsed URI. +*/ +SERD_API SerdNode +serd_node_new_file_uri(const char* SERD_NONNULL path, + const char* SERD_NULLABLE hostname, + SerdURIView* SERD_NULLABLE out); + +/** + Create a new node by serialising `uri` into a new string. + + @param uri The URI to serialise. + + @param base Base URI to resolve `uri` against (or NULL for no resolution). + + @param out Set to the parsing of the new URI (i.e. points only to + memory owned by the new returned node). +*/ +SERD_API SerdNode +serd_node_new_uri(const SerdURIView* SERD_NONNULL uri, + const SerdURIView* SERD_NULLABLE base, + SerdURIView* SERD_NULLABLE out); + +/** + Create a new node by serialising `uri` into a new relative URI. + + @param uri The URI to serialise. + + @param base Base URI to make `uri` relative to, if possible. + + @param root Root URI for resolution (see serd_uri_serialise_relative()). + + @param out Set to the parsing of the new URI (i.e. points only to + memory owned by the new returned node). +*/ +SERD_API SerdNode +serd_node_new_relative_uri(const SerdURIView* SERD_NONNULL uri, + const SerdURIView* SERD_NULLABLE base, + const SerdURIView* SERD_NULLABLE root, + SerdURIView* SERD_NULLABLE out); + +/** + Create a new node by serialising `d` into an xsd:decimal string. + + The resulting node will always contain a '.', start with a digit, and end + with a digit (i.e. will have a leading and/or trailing '0' if necessary). + It will never be in scientific notation. A maximum of `frac_digits` digits + will be written after the decimal point, but trailing zeros will + automatically be omitted (except one if `d` is a round integer). + + Note that about 16 and 8 fractional digits are required to precisely + represent a double and float, respectively. + + @param d The value for the new node. + @param frac_digits The maximum number of digits after the decimal place. +*/ +SERD_API SerdNode +serd_node_new_decimal(double d, unsigned frac_digits); + +/// Create a new node by serialising `i` into an xsd:integer string +SERD_API SerdNode +serd_node_new_integer(int64_t i); + +/** + Create a node by serialising `buf` into an xsd:base64Binary string. + + This function can be used to make a serialisable node out of arbitrary + binary data, which can be decoded using serd_base64_decode(). + + @param buf Raw binary input data. + @param size Size of `buf`. + @param wrap_lines Wrap lines at 76 characters to conform to RFC 2045. +*/ +SERD_API SerdNode +serd_node_new_blob(const void* SERD_NONNULL buf, size_t size, bool wrap_lines); + +/** + Make a deep copy of `node`. + + @return a node that the caller must free with serd_node_free(). +*/ +SERD_API SerdNode +serd_node_copy(const SerdNode* SERD_NULLABLE node); + +/// Return true iff `a` is equal to `b` +SERD_PURE_API +bool +serd_node_equals(const SerdNode* SERD_NONNULL a, + const SerdNode* SERD_NONNULL b); + +/** + Free any data owned by `node`. + + Note that if `node` is itself dynamically allocated (which is not the case + for nodes created internally by serd), it will not be freed. +*/ +SERD_API void +serd_node_free(SerdNode* SERD_NULLABLE node); + +/** + @} +*/ + +SERD_END_DECLS + +#endif // SERD_NODE_H diff --git a/include/serd/reader.h b/include/serd/reader.h new file mode 100644 index 00000000..1d9e9a6b --- /dev/null +++ b/include/serd/reader.h @@ -0,0 +1,180 @@ +// Copyright 2011-2022 David Robillard <d@drobilla.net> +// SPDX-License-Identifier: ISC + +#ifndef SERD_READER_H +#define SERD_READER_H + +#include "serd/attributes.h" +#include "serd/error.h" +#include "serd/node.h" +#include "serd/sink.h" +#include "serd/status.h" +#include "serd/stream.h" +#include "serd/syntax.h" + +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> + +SERD_BEGIN_DECLS + +/** + @defgroup serd_reader Reader + @ingroup serd_reading_writing + @{ +*/ + +/// Streaming parser that reads a text stream and writes to a statement sink +typedef struct SerdReaderImpl SerdReader; + +/// Create a new RDF reader +SERD_API SerdReader* SERD_ALLOCATED +serd_reader_new(SerdSyntax syntax, + void* SERD_NULLABLE handle, + void (*SERD_NULLABLE free_handle)(void* SERD_NULLABLE), + SerdBaseFunc SERD_NULLABLE base_func, + SerdPrefixFunc SERD_NULLABLE prefix_func, + SerdStatementFunc SERD_NULLABLE statement_func, + SerdEndFunc SERD_NULLABLE end_func); + +/** + Enable or disable strict parsing. + + The reader is non-strict (lax) by default, which will tolerate URIs with + invalid characters. Setting strict will fail when parsing such files. An + error is printed for invalid input in either case. +*/ +SERD_API void +serd_reader_set_strict(SerdReader* SERD_NONNULL reader, bool strict); + +/** + Set a function to be called when errors occur during reading. + + The `error_func` will be called with `handle` as its first argument. If + no error function is set, errors are printed to stderr in GCC style. +*/ +SERD_API void +serd_reader_set_error_sink(SerdReader* SERD_NONNULL reader, + SerdErrorFunc SERD_NULLABLE error_func, + void* SERD_NULLABLE error_handle); + +/// Return the `handle` passed to serd_reader_new() +SERD_PURE_API +void* SERD_NULLABLE +serd_reader_handle(const SerdReader* SERD_NONNULL reader); + +/** + Set a prefix to be added to all blank node identifiers. + + This is useful when multiple files are to be parsed into the same output (a + model or a file). Since Serd preserves blank node IDs, this could cause + conflicts where two non-equivalent blank nodes are merged, resulting in + corrupt data. By setting a unique blank node prefix for each parsed file, + this can be avoided, while preserving blank node names. +*/ +SERD_API void +serd_reader_add_blank_prefix(SerdReader* SERD_NONNULL reader, + const char* SERD_NULLABLE prefix); + +/** + Set the URI of the default graph. + + If this is set, the reader will emit quads with the graph set to the given + node for any statements that are not in a named graph (which is currently + all of them since Serd currently does not support any graph syntaxes). +*/ +SERD_API void +serd_reader_set_default_graph(SerdReader* SERD_NONNULL reader, + const SerdNode* SERD_NULLABLE graph); + +/// Read a file at a given `uri` +SERD_API SerdStatus +serd_reader_read_file(SerdReader* SERD_NONNULL reader, + const char* SERD_NONNULL uri); + +/** + Start an incremental read from a file handle. + + Iff `bulk` is true, `file` will be read a page at a time. This is more + efficient, but uses a page of memory and means that an entire page of input + must be ready before any callbacks will fire. To react as soon as input + arrives, set `bulk` to false. +*/ +SERD_API SerdStatus +serd_reader_start_stream(SerdReader* SERD_NONNULL reader, + FILE* SERD_NONNULL file, + const char* SERD_NULLABLE name, + bool bulk); + +/** + Start an incremental read from a user-specified source. + + The `read_func` is guaranteed to only be called for `page_size` elements + with size 1 (i.e. `page_size` bytes). +*/ +SERD_API SerdStatus +serd_reader_start_source_stream(SerdReader* SERD_NONNULL reader, + SerdSource SERD_NONNULL read_func, + SerdStreamErrorFunc SERD_NONNULL error_func, + void* SERD_NONNULL stream, + const char* SERD_NULLABLE name, + size_t page_size); + +/** + Read a single "chunk" of data during an incremental read. + + This function will read a single top level description, and return. This + may be a directive, statement, or several statements; essentially it reads + until a '.' is encountered. This is particularly useful for reading + directly from a pipe or socket. +*/ +SERD_API SerdStatus +serd_reader_read_chunk(SerdReader* SERD_NONNULL reader); + +/// Finish an incremental read from a file handle +SERD_API SerdStatus +serd_reader_end_stream(SerdReader* SERD_NONNULL reader); + +/// Read `file` +SERD_API SerdStatus +serd_reader_read_file_handle(SerdReader* SERD_NONNULL reader, + FILE* SERD_NONNULL file, + const char* SERD_NULLABLE name); + +/// Read a user-specified byte source +SERD_API SerdStatus +serd_reader_read_source(SerdReader* SERD_NONNULL reader, + SerdSource SERD_NONNULL source, + SerdStreamErrorFunc SERD_NONNULL error, + void* SERD_NONNULL stream, + const char* SERD_NULLABLE name, + size_t page_size); + +/// Read `utf8` +SERD_API SerdStatus +serd_reader_read_string(SerdReader* SERD_NONNULL reader, + const char* SERD_NONNULL utf8); + +/** + Skip over bytes in the input until a specific byte is encountered. + + Typically used for recording from errors in a line-based syntax by skipping + ahead to the next newline. + + @return #SERD_SUCCESS if the given byte was reached, or #SERD_FAILURE if the + end of input is reached. +*/ +SERD_API SerdStatus +serd_reader_skip_until_byte(SerdReader* SERD_NONNULL reader, uint8_t byte); + +/// Free `reader` +SERD_API void +serd_reader_free(SerdReader* SERD_NULLABLE reader); + +/** + @} +*/ + +SERD_END_DECLS + +#endif // SERD_READER_H diff --git a/include/serd/serd.h b/include/serd/serd.h index ba78b7e9..bf02bc96 100644 --- a/include/serd/serd.h +++ b/include/serd/serd.h @@ -6,981 +6,85 @@ #ifndef SERD_SERD_H #define SERD_SERD_H -#include <stdarg.h> -#include <stdbool.h> -#include <stddef.h> -#include <stdint.h> -#include <stdio.h> - -// SERD_API must be used to decorate things in the public API -#ifndef SERD_API -# if defined(_WIN32) && !defined(SERD_STATIC) && defined(SERD_INTERNAL) -# define SERD_API __declspec(dllexport) -# elif defined(_WIN32) && !defined(SERD_STATIC) -# define SERD_API __declspec(dllimport) -# elif defined(__GNUC__) -# define SERD_API __attribute__((visibility("default"))) -# else -# define SERD_API -# endif -#endif - -#ifdef __GNUC__ -# define SERD_PURE_FUNC __attribute__((pure)) -# define SERD_CONST_FUNC __attribute__((const)) -#else -# define SERD_PURE_FUNC -# define SERD_CONST_FUNC -#endif - -#if defined(__clang__) && __clang_major__ >= 7 -# define SERD_NONNULL _Nonnull -# define SERD_NULLABLE _Nullable -# define SERD_ALLOCATED _Null_unspecified -#else -# define SERD_NONNULL -# define SERD_NULLABLE -# define SERD_ALLOCATED -#endif - -#define SERD_PURE_API SERD_API SERD_PURE_FUNC -#define SERD_CONST_API SERD_API SERD_CONST_FUNC - -#ifdef __cplusplus -extern "C" { -# if defined(__GNUC__) -# pragma GCC diagnostic push -# pragma GCC diagnostic ignored "-Wzero-as-null-pointer-constant" -# endif -#endif - /** @defgroup serd Serd C API @{ */ -/// RDF syntax type -typedef enum { - SERD_TURTLE = 1, ///< Terse triples http://www.w3.org/TR/turtle - SERD_NTRIPLES = 2, ///< Line-based triples http://www.w3.org/TR/n-triples/ - SERD_NQUADS = 3, ///< Line-based quads http://www.w3.org/TR/n-quads/ - SERD_TRIG = 4, ///< Terse quads http://www.w3.org/TR/trig/ -} SerdSyntax; - -/// Flags indicating certain string properties relevant to serialisation -typedef enum { - SERD_HAS_NEWLINE = 1U << 0U, ///< Contains line breaks ('\\n' or '\\r') - SERD_HAS_QUOTE = 1U << 1U, ///< Contains quotes ('"') -} SerdNodeFlag; - -/// Bitwise OR of SerdNodeFlag values -typedef uint32_t SerdNodeFlags; - -/** - An immutable slice of a string. - - This type is used for many string parameters, to allow referring to slices - of strings in-place and to avoid redundant string measurement. -*/ -typedef struct { - const char* SERD_NULLABLE buf; ///< Start of string - size_t len; ///< Length of string in bytes -} SerdStringView; - -/// A mutable buffer in memory -typedef struct { - void* SERD_NULLABLE buf; ///< Buffer - size_t len; ///< Size of buffer in bytes -} SerdBuffer; - -/** - Free memory allocated by Serd. - - This function exists because some systems require memory allocated by a - library to be freed by code in the same library. It is otherwise equivalent - to the standard C free() function. -*/ -SERD_API void -serd_free(void* SERD_NULLABLE ptr); +// IWYU pragma: begin_exports /** - @defgroup serd_status Status Codes + @defgroup serd_library Library @{ */ -/// Return status code -typedef enum { - SERD_SUCCESS, ///< No error - SERD_FAILURE, ///< Non-fatal failure - SERD_ERR_UNKNOWN, ///< Unknown error - SERD_ERR_BAD_SYNTAX, ///< Invalid syntax - SERD_ERR_BAD_ARG, ///< Invalid argument - SERD_ERR_NOT_FOUND, ///< Not found - SERD_ERR_ID_CLASH, ///< Encountered clashing blank node IDs - SERD_ERR_BAD_CURIE, ///< Invalid CURIE (e.g. prefix does not exist) - SERD_ERR_INTERNAL, ///< Unexpected internal error (should not happen) - SERD_ERR_BAD_WRITE, ///< Error writing to file/stream - SERD_ERR_BAD_TEXT, ///< Invalid text encoding -} SerdStatus; - -/// Return a string describing a status code -SERD_CONST_API -const char* SERD_NONNULL -serd_strerror(SerdStatus status); +#include "serd/attributes.h" /** @} - @defgroup serd_string String Utilities + @defgroup serd_errors Error Handling @{ */ -/** - Measure a UTF-8 string. - - @return Length of `str` in bytes. - @param str A null-terminated UTF-8 string. - @param flags (Output) Set to the applicable flags. -*/ -SERD_API size_t -serd_strlen(const char* SERD_NONNULL str, SerdNodeFlags* SERD_NULLABLE flags); - -/** - Parse a string to a double. - - The API of this function is identical to the standard C strtod function, - except this function is locale-independent and always matches the lexical - format used in the Turtle grammar (the decimal point is always "."). -*/ -SERD_API double -serd_strtod(const char* SERD_NONNULL str, - char* SERD_NONNULL* SERD_NULLABLE endptr); - -/** - Decode a base64 string. - - This function can be used to deserialise a blob node created with - serd_node_new_blob(). - - @param str Base64 string to decode. - @param len The length of `str`. - @param size Set to the size of the returned blob in bytes. - @return A newly allocated blob which must be freed with serd_free(). -*/ -SERD_API void* SERD_ALLOCATED -serd_base64_decode(const char* SERD_NONNULL str, - size_t len, - size_t* SERD_NONNULL size); +#include "serd/error.h" +#include "serd/status.h" /** @} - @defgroup serd_streams Byte Streams + @defgroup serd_memory Memory Management @{ */ -/** - Function to detect I/O stream errors. - - Identical semantics to `ferror`. - - @return Non-zero if `stream` has encountered an error. -*/ -typedef int (*SerdStreamErrorFunc)(void* SERD_NONNULL stream); - -/** - Source function for raw string input. - - Identical semantics to `fread`, but may set errno for more informative error - reporting than supported by SerdStreamErrorFunc. - - @param buf Output buffer. - @param size Size of a single element of data in bytes (always 1). - @param nmemb Number of elements to read. - @param stream Stream to read from (FILE* for fread). - @return Number of elements (bytes) read. -*/ -typedef size_t (*SerdSource)(void* SERD_NONNULL buf, - size_t size, - size_t nmemb, - void* SERD_NONNULL stream); - -/// Sink function for raw string output -typedef size_t (*SerdSink)(const void* SERD_NONNULL buf, - size_t len, - void* SERD_NONNULL stream); +#include "serd/buffer.h" +#include "serd/memory.h" /** @} - @defgroup serd_uri URI + @defgroup serd_utilities Utilities @{ */ -/** - A parsed URI. - - This struct directly refers to slices in other strings, it does not own any - memory itself. This allows some URI operations like resolution to be done - in-place without allocating memory. -*/ -typedef struct { - SerdStringView scheme; ///< Scheme - SerdStringView authority; ///< Authority - SerdStringView path_base; ///< Path prefix if relative - SerdStringView path; ///< Path suffix - SerdStringView query; ///< Query - SerdStringView fragment; ///< Fragment -} SerdURIView; - -static const SerdURIView SERD_URI_NULL = - {{NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}}; - -/** - Get the unescaped path and hostname from a file URI. - - The returned path and `*hostname` must be freed with serd_free(). - - @param uri A file URI. - @param hostname If non-NULL, set to the hostname, if present. - @return The path component of the URI. -*/ -SERD_API char* SERD_NULLABLE -serd_file_uri_parse(const char* SERD_NONNULL uri, - char* SERD_NONNULL* SERD_NULLABLE hostname); - -/// Return true iff `utf8` starts with a valid URI scheme -SERD_PURE_API -bool -serd_uri_string_has_scheme(const char* SERD_NULLABLE utf8); - -/// Parse `utf8`, writing result to `out` -SERD_API SerdStatus -serd_uri_parse(const char* SERD_NONNULL utf8, SerdURIView* SERD_NONNULL out); - -/** - Set target `t` to reference `r` resolved against `base`. - - @see [RFC3986 5.2.2](http://tools.ietf.org/html/rfc3986#section-5.2.2) -*/ -SERD_API void -serd_uri_resolve(const SerdURIView* SERD_NONNULL r, - const SerdURIView* SERD_NONNULL base, - SerdURIView* SERD_NONNULL t); - -/// Serialise `uri` with a series of calls to `sink` -SERD_API size_t -serd_uri_serialise(const SerdURIView* SERD_NONNULL uri, - SerdSink SERD_NONNULL sink, - void* SERD_NONNULL stream); - -/** - Serialise `uri` relative to `base` with a series of calls to `sink` - - The `uri` is written as a relative URI iff if it a child of `base` and - `root`. The optional `root` parameter must be a prefix of `base` and can be - used keep up-references ("../") within a certain namespace. -*/ -SERD_API size_t -serd_uri_serialise_relative(const SerdURIView* SERD_NONNULL uri, - const SerdURIView* SERD_NULLABLE base, - const SerdURIView* SERD_NULLABLE root, - SerdSink SERD_NONNULL sink, - void* SERD_NONNULL stream); +#include "serd/string.h" +#include "serd/string_view.h" +#include "serd/syntax.h" /** @} - @defgroup serd_node Node + @defgroup serd_data Data @{ */ -/** - Type of a node. - - An RDF node, in the abstract sense, can be either a resource, literal, or a - blank. This type is more precise, because syntactically there are two ways - to refer to a resource (by URI or CURIE). - - There are also two ways to refer to a blank node in syntax (by ID or - anonymously), but this is handled by statement flags rather than distinct - node types. -*/ -typedef enum { - /** - The type of a nonexistent node. - - This type is useful as a sentinel, but is never emitted by the reader. - */ - SERD_NOTHING = 0, - - /** - Literal value. - - A literal optionally has either a language, or a datatype (not both). - */ - SERD_LITERAL = 1, - - /** - URI (absolute or relative). - - Value is an unquoted URI string, which is either a relative reference - with respect to the current base URI (e.g. "foo/bar"), or an absolute - URI (e.g. "http://example.org/foo"). - @see [RFC3986](http://tools.ietf.org/html/rfc3986) - */ - SERD_URI = 2, - - /** - CURIE, a shortened URI. - - Value is an unquoted CURIE string relative to the current environment, - e.g. "rdf:type". @see [CURIE Syntax 1.0](http://www.w3.org/TR/curie) - */ - SERD_CURIE = 3, - - /** - A blank node. - - Value is a blank node ID without any syntactic prefix, like "id3", which - is meaningful only within this serialisation. @see [RDF 1.1 - Turtle](http://www.w3.org/TR/turtle/#grammar-production-BLANK_NODE_LABEL) - */ - SERD_BLANK = 4, -} SerdNodeType; - -/// A syntactic RDF node -typedef struct { - const char* SERD_NULLABLE buf; ///< Value string - size_t n_bytes; ///< Size in bytes (excluding null) - SerdNodeFlags flags; ///< Node flags (string properties) - SerdNodeType type; ///< Node type -} SerdNode; - -static const SerdNode SERD_NODE_NULL = {NULL, 0, 0, SERD_NOTHING}; - -/** - Make a (shallow) node from `str`. - - This measures, but does not copy, `str`. No memory is allocated. -*/ -SERD_API SerdNode -serd_node_from_string(SerdNodeType type, const char* SERD_NULLABLE str); - -/** - Make a (shallow) node from a prefix of `str`. - - This measures, but does not copy, `str`. No memory is allocated. - Note that the returned node may not be null terminated. -*/ -SERD_API SerdNode -serd_node_from_substring(SerdNodeType type, - const char* SERD_NULLABLE str, - size_t len); - -/// Simple wrapper for serd_node_new_uri() to resolve a URI node -SERD_API SerdNode -serd_node_new_uri_from_node(const SerdNode* SERD_NONNULL uri_node, - const SerdURIView* SERD_NULLABLE base, - SerdURIView* SERD_NULLABLE out); - -/// Simple wrapper for serd_node_new_uri() to resolve a URI string -SERD_API SerdNode -serd_node_new_uri_from_string(const char* SERD_NULLABLE str, - const SerdURIView* SERD_NULLABLE base, - SerdURIView* SERD_NULLABLE out); - -/** - Create a new file URI node from a file system path and optional hostname. - - Backslashes in Windows paths will be converted, and other characters will be - percent encoded as necessary. - - If `path` is relative, `hostname` is ignored. - If `out` is not NULL, it will be set to the parsed URI. -*/ -SERD_API SerdNode -serd_node_new_file_uri(const char* SERD_NONNULL path, - const char* SERD_NULLABLE hostname, - SerdURIView* SERD_NULLABLE out); - -/** - Create a new node by serialising `uri` into a new string. - - @param uri The URI to serialise. - - @param base Base URI to resolve `uri` against (or NULL for no resolution). - - @param out Set to the parsing of the new URI (i.e. points only to - memory owned by the new returned node). -*/ -SERD_API SerdNode -serd_node_new_uri(const SerdURIView* SERD_NONNULL uri, - const SerdURIView* SERD_NULLABLE base, - SerdURIView* SERD_NULLABLE out); - -/** - Create a new node by serialising `uri` into a new relative URI. - - @param uri The URI to serialise. - - @param base Base URI to make `uri` relative to, if possible. - - @param root Root URI for resolution (see serd_uri_serialise_relative()). - - @param out Set to the parsing of the new URI (i.e. points only to - memory owned by the new returned node). -*/ -SERD_API SerdNode -serd_node_new_relative_uri(const SerdURIView* SERD_NONNULL uri, - const SerdURIView* SERD_NULLABLE base, - const SerdURIView* SERD_NULLABLE root, - SerdURIView* SERD_NULLABLE out); - -/** - Create a new node by serialising `d` into an xsd:decimal string. - - The resulting node will always contain a '.', start with a digit, and end - with a digit (i.e. will have a leading and/or trailing '0' if necessary). - It will never be in scientific notation. A maximum of `frac_digits` digits - will be written after the decimal point, but trailing zeros will - automatically be omitted (except one if `d` is a round integer). - - Note that about 16 and 8 fractional digits are required to precisely - represent a double and float, respectively. - - @param d The value for the new node. - @param frac_digits The maximum number of digits after the decimal place. -*/ -SERD_API SerdNode -serd_node_new_decimal(double d, unsigned frac_digits); - -/// Create a new node by serialising `i` into an xsd:integer string -SERD_API SerdNode -serd_node_new_integer(int64_t i); - -/** - Create a node by serialising `buf` into an xsd:base64Binary string. - - This function can be used to make a serialisable node out of arbitrary - binary data, which can be decoded using serd_base64_decode(). - - @param buf Raw binary input data. - @param size Size of `buf`. - @param wrap_lines Wrap lines at 76 characters to conform to RFC 2045. -*/ -SERD_API SerdNode -serd_node_new_blob(const void* SERD_NONNULL buf, size_t size, bool wrap_lines); - -/** - Make a deep copy of `node`. - - @return a node that the caller must free with serd_node_free(). -*/ -SERD_API SerdNode -serd_node_copy(const SerdNode* SERD_NULLABLE node); - -/// Return true iff `a` is equal to `b` -SERD_PURE_API -bool -serd_node_equals(const SerdNode* SERD_NONNULL a, - const SerdNode* SERD_NONNULL b); - -/** - Free any data owned by `node`. - - Note that if `node` is itself dynamically allocated (which is not the case - for nodes created internally by serd), it will not be freed. -*/ -SERD_API void -serd_node_free(SerdNode* SERD_NULLABLE node); +#include "serd/node.h" +#include "serd/statement.h" +#include "serd/uri.h" /** @} - @defgroup serd_event Event Handlers + @defgroup serd_streaming Streaming @{ */ -/// Flags indicating inline abbreviation information for a statement -typedef enum { - SERD_EMPTY_S = 1U << 1U, ///< Empty blank node subject - SERD_EMPTY_O = 1U << 2U, ///< Empty blank node object - SERD_ANON_S_BEGIN = 1U << 3U, ///< Start of anonymous subject - SERD_ANON_O_BEGIN = 1U << 4U, ///< Start of anonymous object - SERD_ANON_CONT = 1U << 5U, ///< Continuation of anonymous node - SERD_LIST_S_BEGIN = 1U << 6U, ///< Start of list subject - SERD_LIST_O_BEGIN = 1U << 7U, ///< Start of list object - SERD_LIST_CONT = 1U << 8U, ///< Continuation of list -} SerdStatementFlag; - -/// Bitwise OR of SerdStatementFlag values -typedef uint32_t SerdStatementFlags; - -/// An error description -typedef struct { - SerdStatus status; ///< Error code - const char* SERD_NULLABLE filename; ///< File with error - unsigned line; ///< Line in file with error or 0 - unsigned col; ///< Column in file with error - const char* SERD_NONNULL fmt; ///< Printf-style format string - va_list* SERD_NONNULL args; ///< Arguments for fmt -} SerdError; - -/** - Callback function for errors. - - @param handle Handle for user data. - @param error Error description. -*/ -typedef SerdStatus (*SerdErrorFunc)(void* SERD_NULLABLE handle, - const SerdError* SERD_NONNULL error); - -/** - Sink function for base URI changes. - - Called whenever the base URI of the serialisation changes. -*/ -typedef SerdStatus (*SerdBaseFunc)(void* SERD_NULLABLE handle, - const SerdNode* SERD_NONNULL uri); - -/** - Sink function for namespace definitions. - - Called whenever a prefix is defined in the serialisation. -*/ -typedef SerdStatus (*SerdPrefixFunc)(void* SERD_NULLABLE handle, - const SerdNode* SERD_NONNULL name, - const SerdNode* SERD_NONNULL uri); - -/** - Sink function for statements. - - Called for every RDF statement in the serialisation. -*/ -typedef SerdStatus (*SerdStatementFunc)( - void* SERD_NULLABLE handle, - SerdStatementFlags flags, - const SerdNode* SERD_NULLABLE graph, - const SerdNode* SERD_NONNULL subject, - const SerdNode* SERD_NONNULL predicate, - const SerdNode* SERD_NONNULL object, - const SerdNode* SERD_NULLABLE object_datatype, - const SerdNode* SERD_NULLABLE object_lang); - -/** - Sink function for anonymous node end markers. - - This is called to indicate that the anonymous node with the given `value` - will no longer be referred to by any future statements (so the anonymous - node is finished). -*/ -typedef SerdStatus (*SerdEndFunc)(void* SERD_NULLABLE handle, - const SerdNode* SERD_NONNULL node); +#include "serd/env.h" +#include "serd/sink.h" /** @} - @defgroup serd_env Environment + @defgroup serd_reading_writing Reading and Writing @{ */ -/// Lexical environment for relative URIs or CURIEs (base URI and namespaces) -typedef struct SerdEnvImpl SerdEnv; - -/// Create a new environment -SERD_API SerdEnv* SERD_ALLOCATED -serd_env_new(const SerdNode* SERD_NULLABLE base_uri); - -/// Free `env` -SERD_API void -serd_env_free(SerdEnv* SERD_NULLABLE env); - -/// Get the current base URI -SERD_API const SerdNode* SERD_NONNULL -serd_env_base_uri(const SerdEnv* SERD_NONNULL env, - SerdURIView* SERD_NULLABLE out); - -/// Set the current base URI -SERD_API SerdStatus -serd_env_set_base_uri(SerdEnv* SERD_NONNULL env, - const SerdNode* SERD_NULLABLE uri); - -/** - Set a namespace prefix. - - A namespace prefix is used to expand CURIE nodes, for example, with the - prefix "xsd" set to "http://www.w3.org/2001/XMLSchema#", "xsd:decimal" will - expand to "http://www.w3.org/2001/XMLSchema#decimal". -*/ -SERD_API SerdStatus -serd_env_set_prefix(SerdEnv* SERD_NONNULL env, - const SerdNode* SERD_NONNULL name, - const SerdNode* SERD_NONNULL uri); - -/// Set a namespace prefix -SERD_API SerdStatus -serd_env_set_prefix_from_strings(SerdEnv* SERD_NONNULL env, - const char* SERD_NONNULL name, - const char* SERD_NONNULL uri); - -/// Qualify `uri` into a CURIE if possible -SERD_API bool -serd_env_qualify(const SerdEnv* SERD_NULLABLE env, - const SerdNode* SERD_NONNULL uri, - SerdNode* SERD_NONNULL prefix, - SerdStringView* SERD_NONNULL suffix); - -/** - Expand `curie`. - - Errors: SERD_ERR_BAD_ARG if `curie` is not valid, or SERD_ERR_BAD_CURIE if - prefix is not defined in `env`. -*/ -SERD_API SerdStatus -serd_env_expand(const SerdEnv* SERD_NULLABLE env, - const SerdNode* SERD_NONNULL curie, - SerdStringView* SERD_NONNULL uri_prefix, - SerdStringView* SERD_NONNULL uri_suffix); - -/** - Expand `node`, which must be a CURIE or URI, to a full URI. - - Returns null if `node` can not be expanded. -*/ -SERD_API SerdNode -serd_env_expand_node(const SerdEnv* SERD_NULLABLE env, - const SerdNode* SERD_NONNULL node); - -/// Call `func` for each prefix defined in `env` -SERD_API void -serd_env_foreach(const SerdEnv* SERD_NONNULL env, - SerdPrefixFunc SERD_NONNULL func, - void* SERD_NULLABLE handle); +#include "serd/reader.h" +#include "serd/stream.h" +#include "serd/writer.h" /** @} - @defgroup serd_reader Reader - @{ */ -/// Streaming parser that reads a text stream and writes to a statement sink -typedef struct SerdReaderImpl SerdReader; - -/// Create a new RDF reader -SERD_API SerdReader* SERD_ALLOCATED -serd_reader_new(SerdSyntax syntax, - void* SERD_NULLABLE handle, - void (*SERD_NULLABLE free_handle)(void* SERD_NULLABLE), - SerdBaseFunc SERD_NULLABLE base_func, - SerdPrefixFunc SERD_NULLABLE prefix_func, - SerdStatementFunc SERD_NULLABLE statement_func, - SerdEndFunc SERD_NULLABLE end_func); - -/** - Enable or disable strict parsing. - - The reader is non-strict (lax) by default, which will tolerate URIs with - invalid characters. Setting strict will fail when parsing such files. An - error is printed for invalid input in either case. -*/ -SERD_API void -serd_reader_set_strict(SerdReader* SERD_NONNULL reader, bool strict); - -/** - Set a function to be called when errors occur during reading. - - The `error_func` will be called with `handle` as its first argument. If - no error function is set, errors are printed to stderr in GCC style. -*/ -SERD_API void -serd_reader_set_error_sink(SerdReader* SERD_NONNULL reader, - SerdErrorFunc SERD_NULLABLE error_func, - void* SERD_NULLABLE error_handle); - -/// Return the `handle` passed to serd_reader_new() -SERD_PURE_API -void* SERD_NULLABLE -serd_reader_handle(const SerdReader* SERD_NONNULL reader); - -/** - Set a prefix to be added to all blank node identifiers. - - This is useful when multiple files are to be parsed into the same output (a - model or a file). Since Serd preserves blank node IDs, this could cause - conflicts where two non-equivalent blank nodes are merged, resulting in - corrupt data. By setting a unique blank node prefix for each parsed file, - this can be avoided, while preserving blank node names. -*/ -SERD_API void -serd_reader_add_blank_prefix(SerdReader* SERD_NONNULL reader, - const char* SERD_NULLABLE prefix); - -/** - Set the URI of the default graph. - - If this is set, the reader will emit quads with the graph set to the given - node for any statements that are not in a named graph (which is currently - all of them since Serd currently does not support any graph syntaxes). -*/ -SERD_API void -serd_reader_set_default_graph(SerdReader* SERD_NONNULL reader, - const SerdNode* SERD_NULLABLE graph); - -/// Read a file at a given `uri` -SERD_API SerdStatus -serd_reader_read_file(SerdReader* SERD_NONNULL reader, - const char* SERD_NONNULL uri); - -/** - Start an incremental read from a file handle. - - Iff `bulk` is true, `file` will be read a page at a time. This is more - efficient, but uses a page of memory and means that an entire page of input - must be ready before any callbacks will fire. To react as soon as input - arrives, set `bulk` to false. -*/ -SERD_API SerdStatus -serd_reader_start_stream(SerdReader* SERD_NONNULL reader, - FILE* SERD_NONNULL file, - const char* SERD_NULLABLE name, - bool bulk); - -/** - Start an incremental read from a user-specified source. - - The `read_func` is guaranteed to only be called for `page_size` elements - with size 1 (i.e. `page_size` bytes). -*/ -SERD_API SerdStatus -serd_reader_start_source_stream(SerdReader* SERD_NONNULL reader, - SerdSource SERD_NONNULL read_func, - SerdStreamErrorFunc SERD_NONNULL error_func, - void* SERD_NONNULL stream, - const char* SERD_NULLABLE name, - size_t page_size); - -/** - Read a single "chunk" of data during an incremental read. - - This function will read a single top level description, and return. This - may be a directive, statement, or several statements; essentially it reads - until a '.' is encountered. This is particularly useful for reading - directly from a pipe or socket. -*/ -SERD_API SerdStatus -serd_reader_read_chunk(SerdReader* SERD_NONNULL reader); - -/// Finish an incremental read from a file handle -SERD_API SerdStatus -serd_reader_end_stream(SerdReader* SERD_NONNULL reader); - -/// Read `file` -SERD_API SerdStatus -serd_reader_read_file_handle(SerdReader* SERD_NONNULL reader, - FILE* SERD_NONNULL file, - const char* SERD_NULLABLE name); - -/// Read a user-specified byte source -SERD_API SerdStatus -serd_reader_read_source(SerdReader* SERD_NONNULL reader, - SerdSource SERD_NONNULL source, - SerdStreamErrorFunc SERD_NONNULL error, - void* SERD_NONNULL stream, - const char* SERD_NULLABLE name, - size_t page_size); - -/// Read `utf8` -SERD_API SerdStatus -serd_reader_read_string(SerdReader* SERD_NONNULL reader, - const char* SERD_NONNULL utf8); - -/** - Skip over bytes in the input until a specific byte is encountered. - - Typically used for recording from errors in a line-based syntax by skipping - ahead to the next newline. - - @return #SERD_SUCCESS if the given byte was reached, or #SERD_FAILURE if the - end of input is reached. -*/ -SERD_API SerdStatus -serd_reader_skip_until_byte(SerdReader* SERD_NONNULL reader, uint8_t byte); - -/// Free `reader` -SERD_API void -serd_reader_free(SerdReader* SERD_NULLABLE reader); - -/** - @} - @defgroup serd_writer Writer - @{ -*/ - -/// Streaming serialiser that writes a text stream as statements are pushed -typedef struct SerdWriterImpl SerdWriter; - -/** - Writer style options. - - These flags allow more precise control of writer output style. Note that - some options are only supported for some syntaxes, for example, NTriples - does not support abbreviation and is always ASCII. -*/ -typedef enum { - SERD_WRITE_ABBREVIATED = 1U << 0U, ///< Abbreviate triples when possible - SERD_WRITE_ASCII = 1U << 1U, ///< Escape all non-ASCII characters - SERD_WRITE_RESOLVED = 1U << 2U, ///< Resolve URIs against base URI - SERD_WRITE_CURIED = 1U << 3U, ///< Shorten URIs into CURIEs - SERD_WRITE_BULK = 1U << 4U, ///< Write output in pages - SERD_WRITE_STRICT = 1U << 5U, ///< Abort with error on lossy output -} SerdWriterFlag; - -/// Bitwise OR of SerdWriterFlag values -typedef uint32_t SerdWriterFlags; - -/// Create a new RDF writer -SERD_API SerdWriter* SERD_ALLOCATED -serd_writer_new(SerdSyntax syntax, - SerdWriterFlags flags, - SerdEnv* SERD_NONNULL env, - const SerdURIView* SERD_NULLABLE base_uri, - SerdSink SERD_NONNULL ssink, - void* SERD_NULLABLE stream); - -/// Free `writer` -SERD_API void -serd_writer_free(SerdWriter* SERD_NULLABLE writer); - -/// Return the env used by `writer` -SERD_PURE_API -SerdEnv* SERD_NONNULL -serd_writer_env(SerdWriter* SERD_NONNULL writer); - -/** - A convenience sink function for writing to a FILE*. - - This function can be used as a SerdSink when writing to a FILE*. The - `stream` parameter must be a FILE* opened for writing. -*/ -SERD_API size_t -serd_file_sink(const void* SERD_NONNULL buf, - size_t len, - void* SERD_NONNULL stream); - -/** - A convenience sink function for writing to a string. - - This function can be used as a SerdSink to write to a SerdBuffer which is - resized as necessary with realloc(). The `stream` parameter must point to - an initialized SerdBuffer. When the write is finished, the string should be - retrieved with serd_buffer_sink_finish(). -*/ -SERD_API size_t -serd_buffer_sink(const void* SERD_NONNULL buf, - size_t len, - void* SERD_NONNULL stream); - -/** - Finish writing to a buffer with serd_buffer_sink(). - - The returned string is the result of the serialisation, which is null - terminated (by this function) and owned by the caller. -*/ -SERD_API char* SERD_NONNULL -serd_buffer_sink_finish(SerdBuffer* SERD_NONNULL stream); - -/** - Set a function to be called when errors occur during writing. - - The `error_func` will be called with `handle` as its first argument. If - no error function is set, errors are printed to stderr. -*/ -SERD_API void -serd_writer_set_error_sink(SerdWriter* SERD_NONNULL writer, - SerdErrorFunc SERD_NONNULL error_func, - void* SERD_NULLABLE error_handle); - -/** - Set a prefix to be removed from matching blank node identifiers. - - This is the counterpart to serd_reader_add_blank_prefix() which can be used - to "undo" added prefixes. -*/ -SERD_API void -serd_writer_chop_blank_prefix(SerdWriter* SERD_NONNULL writer, - const char* SERD_NULLABLE prefix); - -/** - Set the current output base URI, and emit a directive if applicable. - - Note this function can be safely casted to SerdBaseSink. -*/ -SERD_API SerdStatus -serd_writer_set_base_uri(SerdWriter* SERD_NONNULL writer, - const SerdNode* SERD_NULLABLE uri); - -/** - Set the current root URI. - - The root URI should be a prefix of the base URI. The path of the root URI - is the highest path any relative up-reference can refer to. For example, - with root <file:///foo/root> and base <file:///foo/root/base>, - <file:///foo/root> will be written as <../>, but <file:///foo> will be - written non-relatively as <file:///foo>. If the root is not explicitly set, - it defaults to the base URI, so no up-references will be created at all. -*/ -SERD_API SerdStatus -serd_writer_set_root_uri(SerdWriter* SERD_NONNULL writer, - const SerdNode* SERD_NULLABLE uri); - -/** - Set a namespace prefix (and emit directive if applicable). - - Note this function can be safely casted to SerdPrefixSink. -*/ -SERD_API SerdStatus -serd_writer_set_prefix(SerdWriter* SERD_NONNULL writer, - const SerdNode* SERD_NONNULL name, - const SerdNode* SERD_NONNULL uri); - -/** - Write a statement. - - Note this function can be safely casted to SerdStatementSink. -*/ -SERD_API SerdStatus -serd_writer_write_statement(SerdWriter* SERD_NONNULL writer, - SerdStatementFlags flags, - const SerdNode* SERD_NULLABLE graph, - const SerdNode* SERD_NONNULL subject, - const SerdNode* SERD_NONNULL predicate, - const SerdNode* SERD_NONNULL object, - const SerdNode* SERD_NULLABLE datatype, - const SerdNode* SERD_NULLABLE lang); - -/** - Mark the end of an anonymous node's description. - - Note this function can be safely casted to SerdEndSink. -*/ -SERD_API SerdStatus -serd_writer_end_anon(SerdWriter* SERD_NONNULL writer, - const SerdNode* SERD_NULLABLE node); - -/** - Finish a write. - - This flushes any pending output, for example terminating punctuation, so - that the output is a complete document. -*/ -SERD_API SerdStatus -serd_writer_finish(SerdWriter* SERD_NONNULL writer); +// IWYU pragma: end_exports /** @} - @} */ -#ifdef __cplusplus -# if defined(__GNUC__) -# pragma GCC diagnostic pop -# endif -} /* extern "C" */ -#endif - #endif /* SERD_SERD_H */ diff --git a/include/serd/sink.h b/include/serd/sink.h new file mode 100644 index 00000000..97b47a2f --- /dev/null +++ b/include/serd/sink.h @@ -0,0 +1,68 @@ +// Copyright 2011-2021 David Robillard <d@drobilla.net> +// SPDX-License-Identifier: ISC + +#ifndef SERD_SINK_H +#define SERD_SINK_H + +#include "serd/attributes.h" +#include "serd/node.h" +#include "serd/statement.h" +#include "serd/status.h" + +SERD_BEGIN_DECLS + +/** + @defgroup serd_sink Sink + @ingroup serd_streaming + @{ +*/ + +/** + Sink function for base URI changes. + + Called whenever the base URI of the serialisation changes. +*/ +typedef SerdStatus (*SerdBaseFunc)(void* SERD_NULLABLE handle, + const SerdNode* SERD_NONNULL uri); + +/** + Sink function for namespace definitions. + + Called whenever a prefix is defined in the serialisation. +*/ +typedef SerdStatus (*SerdPrefixFunc)(void* SERD_NULLABLE handle, + const SerdNode* SERD_NONNULL name, + const SerdNode* SERD_NONNULL uri); + +/** + Sink function for statements. + + Called for every RDF statement in the serialisation. +*/ +typedef SerdStatus (*SerdStatementFunc)( + void* SERD_NULLABLE handle, + SerdStatementFlags flags, + const SerdNode* SERD_NULLABLE graph, + const SerdNode* SERD_NONNULL subject, + const SerdNode* SERD_NONNULL predicate, + const SerdNode* SERD_NONNULL object, + const SerdNode* SERD_NULLABLE object_datatype, + const SerdNode* SERD_NULLABLE object_lang); + +/** + Sink function for anonymous node end markers. + + This is called to indicate that the anonymous node with the given `value` + will no longer be referred to by any future statements (so the anonymous + node is finished). +*/ +typedef SerdStatus (*SerdEndFunc)(void* SERD_NULLABLE handle, + const SerdNode* SERD_NONNULL node); + +/** + @} +*/ + +SERD_END_DECLS + +#endif // SERD_SINK_H diff --git a/include/serd/statement.h b/include/serd/statement.h new file mode 100644 index 00000000..321aace4 --- /dev/null +++ b/include/serd/statement.h @@ -0,0 +1,40 @@ +// Copyright 2011-2022 David Robillard <d@drobilla.net> +// SPDX-License-Identifier: ISC + +#ifndef SERD_STATEMENT_H +#define SERD_STATEMENT_H + +#include "serd/attributes.h" + +#include <stdint.h> + +SERD_BEGIN_DECLS + +/** + @defgroup serd_statement Statements + @ingroup serd_data + @{ +*/ + +/// Flags indicating inline abbreviation information for a statement +typedef enum { + SERD_EMPTY_S = 1U << 1U, ///< Empty blank node subject + SERD_EMPTY_O = 1U << 2U, ///< Empty blank node object + SERD_ANON_S_BEGIN = 1U << 3U, ///< Start of anonymous subject + SERD_ANON_O_BEGIN = 1U << 4U, ///< Start of anonymous object + SERD_ANON_CONT = 1U << 5U, ///< Continuation of anonymous node + SERD_LIST_S_BEGIN = 1U << 6U, ///< Start of list subject + SERD_LIST_O_BEGIN = 1U << 7U, ///< Start of list object + SERD_LIST_CONT = 1U << 8U, ///< Continuation of list +} SerdStatementFlag; + +/// Bitwise OR of SerdStatementFlag values +typedef uint32_t SerdStatementFlags; + +/** + @} +*/ + +SERD_END_DECLS + +#endif // SERD_STATEMENT_H diff --git a/include/serd/status.h b/include/serd/status.h new file mode 100644 index 00000000..c640b427 --- /dev/null +++ b/include/serd/status.h @@ -0,0 +1,43 @@ +// Copyright 2011-2022 David Robillard <d@drobilla.net> +// SPDX-License-Identifier: ISC + +#ifndef SERD_STATUS_H +#define SERD_STATUS_H + +#include "serd/attributes.h" + +SERD_BEGIN_DECLS + +/** + @defgroup serd_status Status Codes + @ingroup serd_errors + @{ +*/ + +/// Return status code +typedef enum { + SERD_SUCCESS, ///< No error + SERD_FAILURE, ///< Non-fatal failure + SERD_ERR_UNKNOWN, ///< Unknown error + SERD_ERR_BAD_SYNTAX, ///< Invalid syntax + SERD_ERR_BAD_ARG, ///< Invalid argument + SERD_ERR_NOT_FOUND, ///< Not found + SERD_ERR_ID_CLASH, ///< Encountered clashing blank node IDs + SERD_ERR_BAD_CURIE, ///< Invalid CURIE (e.g. prefix does not exist) + SERD_ERR_INTERNAL, ///< Unexpected internal error (should not happen) + SERD_ERR_BAD_WRITE, ///< Error writing to file/stream + SERD_ERR_BAD_TEXT, ///< Invalid text encoding +} SerdStatus; + +/// Return a string describing a status code +SERD_CONST_API +const char* SERD_NONNULL +serd_strerror(SerdStatus status); + +/** + @} +*/ + +SERD_END_DECLS + +#endif // SERD_STATUS_H diff --git a/include/serd/stream.h b/include/serd/stream.h new file mode 100644 index 00000000..8b023add --- /dev/null +++ b/include/serd/stream.h @@ -0,0 +1,62 @@ +// Copyright 2011-2022 David Robillard <d@drobilla.net> +// SPDX-License-Identifier: ISC + +#ifndef SERD_STREAM_H +#define SERD_STREAM_H + +#include "serd/attributes.h" + +#include <stddef.h> + +SERD_BEGIN_DECLS + +/** + @defgroup serd_stream Byte Stream Interface + @ingroup serd_reading_writing + + These types define the interface for byte streams (generalized files) which + can be provided to read/write from/to any custom source/sink. It is + directly compatible with the standard C `FILE` API, so the standard library + functions may be used directly. + + @{ +*/ + +/** + Function to detect I/O stream errors. + + Identical semantics to `ferror`. + + @return Non-zero if `stream` has encountered an error. +*/ +typedef int (*SerdStreamErrorFunc)(void* SERD_NONNULL stream); + +/** + Source function for raw string input. + + Identical semantics to `fread`, but may set errno for more informative error + reporting than supported by SerdStreamErrorFunc. + + @param buf Output buffer. + @param size Size of a single element of data in bytes (always 1). + @param nmemb Number of elements to read. + @param stream Stream to read from (FILE* for fread). + @return Number of elements (bytes) read. +*/ +typedef size_t (*SerdSource)(void* SERD_NONNULL buf, + size_t size, + size_t nmemb, + void* SERD_NONNULL stream); + +/// Sink function for raw string output +typedef size_t (*SerdSink)(const void* SERD_NONNULL buf, + size_t len, + void* SERD_NONNULL stream); + +/** + @} +*/ + +SERD_END_DECLS + +#endif // SERD_STREAM_H diff --git a/include/serd/string.h b/include/serd/string.h new file mode 100644 index 00000000..373dfdf0 --- /dev/null +++ b/include/serd/string.h @@ -0,0 +1,63 @@ +// Copyright 2011-2022 David Robillard <d@drobilla.net> +// SPDX-License-Identifier: ISC + +#ifndef SERD_STRING_H +#define SERD_STRING_H + +#include "serd/attributes.h" +#include "serd/node.h" + +#include <stddef.h> + +SERD_BEGIN_DECLS + +/** + @defgroup serd_string String Utilities + @ingroup serd_utilities + @{ +*/ + +/** + Measure a UTF-8 string. + + @return Length of `str` in bytes. + @param str A null-terminated UTF-8 string. + @param flags (Output) Set to the applicable flags. +*/ +SERD_API size_t +serd_strlen(const char* SERD_NONNULL str, SerdNodeFlags* SERD_NULLABLE flags); + +/** + Parse a string to a double. + + The API of this function is identical to the standard C strtod function, + except this function is locale-independent and always matches the lexical + format used in the Turtle grammar (the decimal point is always "."). +*/ +SERD_API double +serd_strtod(const char* SERD_NONNULL str, + char* SERD_NONNULL* SERD_NULLABLE endptr); + +/** + Decode a base64 string. + + This function can be used to deserialise a blob node created with + serd_node_new_blob(). + + @param str Base64 string to decode. + @param len The length of `str`. + @param size Set to the size of the returned blob in bytes. + @return A newly allocated blob which must be freed with serd_free(). +*/ +SERD_API void* SERD_ALLOCATED +serd_base64_decode(const char* SERD_NONNULL str, + size_t len, + size_t* SERD_NONNULL size); + +/** + @} +*/ + +SERD_END_DECLS + +#endif // SERD_STRING_H diff --git a/include/serd/string_view.h b/include/serd/string_view.h new file mode 100644 index 00000000..767cc6b1 --- /dev/null +++ b/include/serd/string_view.h @@ -0,0 +1,79 @@ +// Copyright 2011-2021 David Robillard <d@drobilla.net> +// SPDX-License-Identifier: ISC + +#ifndef SERD_STRING_VIEW_H +#define SERD_STRING_VIEW_H + +#include "serd/attributes.h" + +#include <stddef.h> +#include <string.h> + +SERD_BEGIN_DECLS + +/** + @defgroup serd_string_view String View + @ingroup serd_utilities + @{ +*/ + +/** + An immutable slice of a string. + + This type is used for many string parameters, to allow referring to slices + of strings in-place and to avoid redundant string measurement. +*/ +typedef struct { + const char* SERD_NONNULL buf; ///< Start of string + size_t len; ///< Length of string in bytes +} SerdStringView; + +/// Return a view of an empty string +SERD_ALWAYS_INLINE_FUNC SERD_CONST_FUNC static inline SerdStringView +serd_empty_string(void) +{ + const SerdStringView view = {"", 0U}; + return view; +} + +/** + Return a view of a substring, or a premeasured string. + + This makes either a view of a slice of a string (which may not be null + terminated), or a view of a string that has already been measured. This is + faster than serd_string() for dynamic strings since it does not call + `strlen`, so should be used when the length of the string is already known. + + @param str Pointer to the start of the substring. + + @param len Length of the substring in bytes, not including the trailing null + terminator if present. +*/ +SERD_ALWAYS_INLINE_FUNC SERD_CONST_FUNC static inline SerdStringView +serd_substring(const char* const SERD_NONNULL str, const size_t len) +{ + const SerdStringView view = {str, len}; + return view; +} + +/** + Return a view of an entire string by measuring it. + + This makes a view of the given string by measuring it with `strlen`. + + @param str Pointer to the start of a null-terminated C string, or null. +*/ +SERD_ALWAYS_INLINE_FUNC SERD_PURE_FUNC static inline SerdStringView +// NOLINTNEXTLINE(clang-diagnostic-unused-function) +serd_string(const char* const SERD_NULLABLE str) +{ + return str ? serd_substring(str, strlen(str)) : serd_empty_string(); +} + +/** + @} +*/ + +SERD_END_DECLS + +#endif // SERD_STRING_VIEW_H diff --git a/include/serd/syntax.h b/include/serd/syntax.h new file mode 100644 index 00000000..21847438 --- /dev/null +++ b/include/serd/syntax.h @@ -0,0 +1,31 @@ +// Copyright 2011-2021 David Robillard <d@drobilla.net> +// SPDX-License-Identifier: ISC + +#ifndef SERD_SYNTAX_H +#define SERD_SYNTAX_H + +#include "serd/attributes.h" + +SERD_BEGIN_DECLS + +/** + @defgroup serd_syntax Syntax Utilities + @ingroup serd_utilities + @{ +*/ + +/// RDF syntax type +typedef enum { + SERD_TURTLE = 1, ///< Terse triples http://www.w3.org/TR/turtle + SERD_NTRIPLES = 2, ///< Line-based triples http://www.w3.org/TR/n-triples/ + SERD_NQUADS = 3, ///< Line-based quads http://www.w3.org/TR/n-quads/ + SERD_TRIG = 4, ///< Terse quads http://www.w3.org/TR/trig/ +} SerdSyntax; + +/** + @} +*/ + +SERD_END_DECLS + +#endif // SERD_SYNTAX_H diff --git a/include/serd/uri.h b/include/serd/uri.h new file mode 100644 index 00000000..fc41c0c2 --- /dev/null +++ b/include/serd/uri.h @@ -0,0 +1,100 @@ +// Copyright 2011-2022 David Robillard <d@drobilla.net> +// SPDX-License-Identifier: ISC + +#ifndef SERD_URI_H +#define SERD_URI_H + +#include "serd/attributes.h" +#include "serd/status.h" +#include "serd/stream.h" +#include "serd/string_view.h" + +#include <stdbool.h> +#include <stddef.h> + +SERD_BEGIN_DECLS + +/** + @defgroup serd_uri URI + @ingroup serd_data + @{ +*/ + +/** + A parsed URI. + + This struct directly refers to slices in other strings, it does not own any + memory itself. This allows some URI operations like resolution to be done + in-place without allocating memory. +*/ +typedef struct { + SerdStringView scheme; ///< Scheme + SerdStringView authority; ///< Authority + SerdStringView path_base; ///< Path prefix if relative + SerdStringView path; ///< Path suffix + SerdStringView query; ///< Query + SerdStringView fragment; ///< Fragment +} SerdURIView; + +static const SerdURIView SERD_URI_NULL = + {{NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}}; + +/** + Get the unescaped path and hostname from a file URI. + + The returned path and `*hostname` must be freed with serd_free(). + + @param uri A file URI. + @param hostname If non-NULL, set to the hostname, if present. + @return The path component of the URI. +*/ +SERD_API char* SERD_NULLABLE +serd_file_uri_parse(const char* SERD_NONNULL uri, + char* SERD_NONNULL* SERD_NULLABLE hostname); + +/// Return true iff `utf8` starts with a valid URI scheme +SERD_PURE_API +bool +serd_uri_string_has_scheme(const char* SERD_NULLABLE utf8); + +/// Parse `utf8`, writing result to `out` +SERD_API SerdStatus +serd_uri_parse(const char* SERD_NONNULL utf8, SerdURIView* SERD_NONNULL out); + +/** + Set target `t` to reference `r` resolved against `base`. + + @see [RFC3986 5.2.2](http://tools.ietf.org/html/rfc3986#section-5.2.2) +*/ +SERD_API void +serd_uri_resolve(const SerdURIView* SERD_NONNULL r, + const SerdURIView* SERD_NONNULL base, + SerdURIView* SERD_NONNULL t); + +/// Serialise `uri` with a series of calls to `sink` +SERD_API size_t +serd_uri_serialise(const SerdURIView* SERD_NONNULL uri, + SerdSink SERD_NONNULL sink, + void* SERD_NONNULL stream); + +/** + Serialise `uri` relative to `base` with a series of calls to `sink` + + The `uri` is written as a relative URI iff if it a child of `base` and + `root`. The optional `root` parameter must be a prefix of `base` and can be + used keep up-references ("../") within a certain namespace. +*/ +SERD_API size_t +serd_uri_serialise_relative(const SerdURIView* SERD_NONNULL uri, + const SerdURIView* SERD_NULLABLE base, + const SerdURIView* SERD_NULLABLE root, + SerdSink SERD_NONNULL sink, + void* SERD_NONNULL stream); + +/** + @} +*/ + +SERD_END_DECLS + +#endif // SERD_URI_H diff --git a/include/serd/writer.h b/include/serd/writer.h new file mode 100644 index 00000000..35f44940 --- /dev/null +++ b/include/serd/writer.h @@ -0,0 +1,172 @@ +// Copyright 2011-2022 David Robillard <d@drobilla.net> +// SPDX-License-Identifier: ISC + +#ifndef SERD_WRITER_H +#define SERD_WRITER_H + +#include "serd/attributes.h" +#include "serd/env.h" +#include "serd/error.h" +#include "serd/node.h" +#include "serd/statement.h" +#include "serd/status.h" +#include "serd/stream.h" +#include "serd/syntax.h" +#include "serd/uri.h" + +#include <stddef.h> +#include <stdint.h> + +SERD_BEGIN_DECLS + +/** + @defgroup serd_writer Writer + @ingroup serd_reading_writing + @{ +*/ + +/// Streaming serialiser that writes a text stream as statements are pushed +typedef struct SerdWriterImpl SerdWriter; + +/** + Writer style options. + + These flags allow more precise control of writer output style. Note that + some options are only supported for some syntaxes, for example, NTriples + does not support abbreviation and is always ASCII. +*/ +typedef enum { + SERD_WRITE_ABBREVIATED = 1U << 0U, ///< Abbreviate triples when possible + SERD_WRITE_ASCII = 1U << 1U, ///< Escape all non-ASCII characters + SERD_WRITE_RESOLVED = 1U << 2U, ///< Resolve URIs against base URI + SERD_WRITE_CURIED = 1U << 3U, ///< Shorten URIs into CURIEs + SERD_WRITE_BULK = 1U << 4U, ///< Write output in pages + SERD_WRITE_STRICT = 1U << 5U, ///< Abort with error on lossy output +} SerdWriterFlag; + +/// Bitwise OR of #SerdWriterFlag values +typedef uint32_t SerdWriterFlags; + +/// Create a new RDF writer +SERD_API SerdWriter* SERD_ALLOCATED +serd_writer_new(SerdSyntax syntax, + SerdWriterFlags flags, + SerdEnv* SERD_NONNULL env, + const SerdURIView* SERD_NULLABLE base_uri, + SerdSink SERD_NONNULL ssink, + void* SERD_NULLABLE stream); + +/// Free `writer` +SERD_API void +serd_writer_free(SerdWriter* SERD_NULLABLE writer); + +/// Return the env used by `writer` +SERD_PURE_API +SerdEnv* SERD_NONNULL +serd_writer_env(SerdWriter* SERD_NONNULL writer); + +/** + A convenience sink function for writing to a FILE*. + + This function can be used as a #SerdSink when writing to a FILE*. The + `stream` parameter must be a FILE* opened for writing. +*/ +SERD_API size_t +serd_file_sink(const void* SERD_NONNULL buf, + size_t len, + void* SERD_NONNULL stream); + +/** + Set a function to be called when errors occur during writing. + + The `error_func` will be called with `handle` as its first argument. If + no error function is set, errors are printed to stderr. +*/ +SERD_API void +serd_writer_set_error_sink(SerdWriter* SERD_NONNULL writer, + SerdErrorFunc SERD_NONNULL error_func, + void* SERD_NULLABLE error_handle); + +/** + Set a prefix to be removed from matching blank node identifiers. + + This is the counterpart to serd_reader_add_blank_prefix() which can be used + to "undo" added prefixes. +*/ +SERD_API void +serd_writer_chop_blank_prefix(SerdWriter* SERD_NONNULL writer, + const char* SERD_NULLABLE prefix); + +/** + Set the current output base URI, and emit a directive if applicable. + + Note this function can be safely casted to #SerdBaseFunc. +*/ +SERD_API SerdStatus +serd_writer_set_base_uri(SerdWriter* SERD_NONNULL writer, + const SerdNode* SERD_NULLABLE uri); + +/** + Set the current root URI. + + The root URI should be a prefix of the base URI. The path of the root URI + is the highest path any relative up-reference can refer to. For example, + with root <file:///foo/root> and base <file:///foo/root/base>, + <file:///foo/root> will be written as <../>, but <file:///foo> will be + written non-relatively as <file:///foo>. If the root is not explicitly set, + it defaults to the base URI, so no up-references will be created at all. +*/ +SERD_API SerdStatus +serd_writer_set_root_uri(SerdWriter* SERD_NONNULL writer, + const SerdNode* SERD_NULLABLE uri); + +/** + Set a namespace prefix (and emit directive if applicable). + + Note this function can be safely casted to #SerdPrefixFunc. +*/ +SERD_API SerdStatus +serd_writer_set_prefix(SerdWriter* SERD_NONNULL writer, + const SerdNode* SERD_NONNULL name, + const SerdNode* SERD_NONNULL uri); + +/** + Write a statement. + + Note this function can be safely casted to #SerdStatementFunc. +*/ +SERD_API SerdStatus +serd_writer_write_statement(SerdWriter* SERD_NONNULL writer, + SerdStatementFlags flags, + const SerdNode* SERD_NULLABLE graph, + const SerdNode* SERD_NONNULL subject, + const SerdNode* SERD_NONNULL predicate, + const SerdNode* SERD_NONNULL object, + const SerdNode* SERD_NULLABLE datatype, + const SerdNode* SERD_NULLABLE lang); + +/** + Mark the end of an anonymous node's description. + + Note this function can be safely casted to #SerdEndFunc. +*/ +SERD_API SerdStatus +serd_writer_end_anon(SerdWriter* SERD_NONNULL writer, + const SerdNode* SERD_NULLABLE node); + +/** + Finish a write. + + This flushes any pending output, for example terminating punctuation, so + that the output is a complete document. +*/ +SERD_API SerdStatus +serd_writer_finish(SerdWriter* SERD_NONNULL writer); + +/** + @} +*/ + +SERD_END_DECLS + +#endif // SERD_WRITER_H diff --git a/meson.build b/meson.build index e4157a7d..14abaa0b 100644 --- a/meson.build +++ b/meson.build @@ -103,7 +103,23 @@ endif include_dirs = include_directories('include') c_headers = files( + 'include/serd/attributes.h', + 'include/serd/buffer.h', + 'include/serd/env.h', + 'include/serd/error.h', + 'include/serd/memory.h', + 'include/serd/node.h', + 'include/serd/reader.h', 'include/serd/serd.h', + 'include/serd/sink.h', + 'include/serd/statement.h', + 'include/serd/status.h', + 'include/serd/stream.h', + 'include/serd/string.h', + 'include/serd/string_view.h', + 'include/serd/syntax.h', + 'include/serd/uri.h', + 'include/serd/writer.h', ) sources = files( diff --git a/src/attributes.h b/src/attributes.h deleted file mode 100644 index 11c699f3..00000000 --- a/src/attributes.h +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright 2019-2023 David Robillard <d@drobilla.net> -// SPDX-License-Identifier: ISC - -#ifndef SERD_SRC_ATTRIBUTES_H -#define SERD_SRC_ATTRIBUTES_H - -#if defined(__GNUC__) -# define SERD_LOG_FUNC(fmt, arg1) __attribute__((format(printf, fmt, arg1))) -#else -# define SERD_LOG_FUNC(fmt, arg1) -#endif - -#ifdef __GNUC__ -# define SERD_MALLOC_FUNC __attribute__((malloc)) -#else -# define SERD_MALLOC_FUNC -#endif - -#ifdef __GNUC__ -# define SERD_NODISCARD __attribute__((warn_unused_result)) -#else -# define SERD_NODISCARD -#endif - -#endif // SERD_SRC_ATTRIBUTES_H diff --git a/src/base64.c b/src/base64.c index 13f08ab0..14f45b71 100644 --- a/src/base64.c +++ b/src/base64.c @@ -6,7 +6,7 @@ #include "serd_internal.h" #include "string_utils.h" -#include "serd/serd.h" +#include "serd/string.h" #include <stdbool.h> #include <stdint.h> diff --git a/src/base64.h b/src/base64.h index 21a6878c..d3e2b6e1 100644 --- a/src/base64.h +++ b/src/base64.h @@ -4,7 +4,7 @@ #ifndef SERD_SRC_BASE64_H #define SERD_SRC_BASE64_H -#include "serd/serd.h" +#include "serd/attributes.h" #include <stdbool.h> #include <stddef.h> diff --git a/src/byte_sink.h b/src/byte_sink.h index 39248c8c..0b4a83b1 100644 --- a/src/byte_sink.h +++ b/src/byte_sink.h @@ -7,7 +7,7 @@ #include "serd_internal.h" #include "system.h" -#include "serd/serd.h" +#include "serd/sink.h" #include <stddef.h> #include <string.h> diff --git a/src/byte_source.c b/src/byte_source.c index 72a2366e..b56e0102 100644 --- a/src/byte_source.c +++ b/src/byte_source.c @@ -5,8 +5,6 @@ #include "system.h" -#include "serd/serd.h" - #include <stdbool.h> #include <stdint.h> #include <string.h> diff --git a/src/byte_source.h b/src/byte_source.h index 218e47fa..b6cba5d4 100644 --- a/src/byte_source.h +++ b/src/byte_source.h @@ -4,7 +4,9 @@ #ifndef SERD_SRC_BYTE_SOURCE_H #define SERD_SRC_BYTE_SOURCE_H -#include "serd/serd.h" +#include "serd/attributes.h" +#include "serd/status.h" +#include "serd/stream.h" #include <assert.h> #include <stdbool.h> @@ -1,7 +1,7 @@ // Copyright 2011-2023 David Robillard <d@drobilla.net> // SPDX-License-Identifier: ISC -#include "serd/serd.h" +#include "serd/env.h" #include <stdbool.h> #include <stdio.h> @@ -8,7 +8,11 @@ #include "try.h" #include "uri_utils.h" -#include "serd/serd.h" +#include "serd/node.h" +#include "serd/reader.h" +#include "serd/statement.h" +#include "serd/status.h" +#include "serd/syntax.h" #include <assert.h> #include <stdbool.h> @@ -6,7 +6,10 @@ #include "base64.h" #include "string_utils.h" -#include "serd/serd.h" +#include "serd/buffer.h" +#include "serd/node.h" +#include "serd/string.h" +#include "serd/uri.h" #include <float.h> #include <math.h> @@ -4,7 +4,8 @@ #ifndef SERD_SRC_NODE_H #define SERD_SRC_NODE_H -#include "serd/serd.h" +#include "serd/attributes.h" +#include "serd/node.h" #include <stddef.h> diff --git a/src/reader.c b/src/reader.c index f34de787..00f5e73f 100644 --- a/src/reader.c +++ b/src/reader.c @@ -9,6 +9,10 @@ #include "serd_internal.h" +#include "serd/memory.h" +#include "serd/stream.h" +#include "serd/uri.h" + #include <errno.h> #include <stdarg.h> #include <stdint.h> diff --git a/src/reader.h b/src/reader.h index c0bf3765..e24bb31a 100644 --- a/src/reader.h +++ b/src/reader.h @@ -4,11 +4,17 @@ #ifndef SERD_SRC_READER_H #define SERD_SRC_READER_H -#include "attributes.h" #include "byte_source.h" #include "stack.h" -#include "serd/serd.h" +#include "serd/attributes.h" +#include "serd/error.h" +#include "serd/node.h" +#include "serd/reader.h" +#include "serd/sink.h" +#include "serd/statement.h" +#include "serd/status.h" +#include "serd/syntax.h" #include <assert.h> #include <stdbool.h> diff --git a/src/serd_internal.h b/src/serd_internal.h index 7fcae691..b3221ae7 100644 --- a/src/serd_internal.h +++ b/src/serd_internal.h @@ -4,7 +4,7 @@ #ifndef SERD_SRC_SERD_INTERNAL_H #define SERD_SRC_SERD_INTERNAL_H -#include "serd/serd.h" +#include "serd/error.h" #include <stdio.h> diff --git a/src/serdi.c b/src/serdi.c index 421e5df8..27e6d782 100644 --- a/src/serdi.c +++ b/src/serdi.c @@ -4,7 +4,16 @@ #include "serd_config.h" #include "string_utils.h" -#include "serd/serd.h" +#include "serd/attributes.h" +#include "serd/env.h" +#include "serd/error.h" +#include "serd/node.h" +#include "serd/reader.h" +#include "serd/sink.h" +#include "serd/status.h" +#include "serd/syntax.h" +#include "serd/uri.h" +#include "serd/writer.h" #ifdef _WIN32 # ifdef _MSC_VER diff --git a/src/string.c b/src/string.c index b0ffe51a..4d8f4f09 100644 --- a/src/string.c +++ b/src/string.c @@ -3,7 +3,10 @@ #include "string_utils.h" -#include "serd/serd.h" +#include "serd/memory.h" +#include "serd/node.h" +#include "serd/status.h" +#include "serd/string.h" #include <assert.h> #include <math.h> diff --git a/src/string_utils.h b/src/string_utils.h index 723df710..86795dcb 100644 --- a/src/string_utils.h +++ b/src/string_utils.h @@ -4,7 +4,7 @@ #ifndef SERD_SRC_STRING_UTILS_H #define SERD_SRC_STRING_UTILS_H -#include "serd/serd.h" +#include "serd/node.h" #include <stdbool.h> #include <stddef.h> diff --git a/src/system.h b/src/system.h index 1a749bae..081b60c4 100644 --- a/src/system.h +++ b/src/system.h @@ -4,7 +4,7 @@ #ifndef SERD_SRC_SYSTEM_H #define SERD_SRC_SYSTEM_H -#include "attributes.h" +#include "serd/attributes.h" #include <stdio.h> @@ -4,7 +4,11 @@ #include "string_utils.h" #include "uri_utils.h" -#include "serd/serd.h" +#include "serd/buffer.h" +#include "serd/status.h" +#include "serd/stream.h" +#include "serd/string_view.h" +#include "serd/uri.h" #include <stdbool.h> #include <stdint.h> @@ -268,7 +272,7 @@ serd_uri_resolve(const SerdURIView* const r, return; } - t->path_base.buf = NULL; + t->path_base.buf = ""; t->path_base.len = 0; if (r->scheme.len) { *t = *r; diff --git a/src/uri_utils.h b/src/uri_utils.h index b275cf5f..3974b5ce 100644 --- a/src/uri_utils.h +++ b/src/uri_utils.h @@ -4,7 +4,7 @@ #ifndef SERD_SRC_URI_UTILS_H #define SERD_SRC_URI_UTILS_H -#include "serd/serd.h" +#include "serd/attributes.h" #include "string_utils.h" diff --git a/src/writer.c b/src/writer.c index eea85ddf..612c2620 100644 --- a/src/writer.c +++ b/src/writer.c @@ -1,7 +1,6 @@ // Copyright 2011-2023 David Robillard <d@drobilla.net> // SPDX-License-Identifier: ISC -#include "attributes.h" #include "byte_sink.h" #include "serd_internal.h" #include "stack.h" @@ -9,7 +8,18 @@ #include "try.h" #include "uri_utils.h" -#include "serd/serd.h" +#include "serd/attributes.h" +#include "serd/buffer.h" +#include "serd/env.h" +#include "serd/error.h" +#include "serd/node.h" +#include "serd/statement.h" +#include "serd/status.h" +#include "serd/stream.h" +#include "serd/string_view.h" +#include "serd/syntax.h" +#include "serd/uri.h" +#include "serd/writer.h" #include <errno.h> #include <stdarg.h> diff --git a/test/test_env.c b/test/test_env.c index 2db28f2d..8ef95f1a 100644 --- a/test/test_env.c +++ b/test/test_env.c @@ -3,7 +3,10 @@ #undef NDEBUG -#include "serd/serd.h" +#include "serd/env.h" +#include "serd/node.h" +#include "serd/status.h" +#include "serd/string_view.h" #include <assert.h> #include <string.h> diff --git a/test/test_free_null.c b/test/test_free_null.c index 96153c8d..10ff4d10 100644 --- a/test/test_free_null.c +++ b/test/test_free_null.c @@ -3,7 +3,11 @@ #undef NDEBUG -#include "serd/serd.h" +#include "serd/env.h" +#include "serd/memory.h" +#include "serd/node.h" +#include "serd/reader.h" +#include "serd/writer.h" #include <stddef.h> diff --git a/test/test_node.c b/test/test_node.c index 6343be5b..a1473e7a 100644 --- a/test/test_node.c +++ b/test/test_node.c @@ -3,7 +3,9 @@ #undef NDEBUG -#include "serd/serd.h" +#include "serd/memory.h" +#include "serd/node.h" +#include "serd/string.h" #include <assert.h> #include <float.h> diff --git a/test/test_reader_writer.c b/test/test_reader_writer.c index 4acfb976..0fcc5ba3 100644 --- a/test/test_reader_writer.c +++ b/test/test_reader_writer.c @@ -3,7 +3,18 @@ #undef NDEBUG -#include "serd/serd.h" +#include "serd/buffer.h" +#include "serd/env.h" +#include "serd/error.h" +#include "serd/memory.h" +#include "serd/node.h" +#include "serd/reader.h" +#include "serd/sink.h" +#include "serd/statement.h" +#include "serd/status.h" +#include "serd/stream.h" +#include "serd/syntax.h" +#include "serd/writer.h" #ifdef _WIN32 # include <windows.h> diff --git a/test/test_string.c b/test/test_string.c index 99db03f4..3e6e77e5 100644 --- a/test/test_string.c +++ b/test/test_string.c @@ -3,7 +3,10 @@ #undef NDEBUG -#include "serd/serd.h" +#include "serd/attributes.h" +#include "serd/node.h" +#include "serd/status.h" +#include "serd/string.h" #include <assert.h> #include <stdint.h> @@ -35,7 +38,7 @@ test_strerror(void) assert(!strcmp(msg, "Unknown error")); } -int +SERD_PURE_FUNC int main(void) { test_strlen(); diff --git a/test/test_uri.c b/test/test_uri.c index 5f1a7f16..61f432fd 100644 --- a/test/test_uri.c +++ b/test/test_uri.c @@ -3,7 +3,10 @@ #undef NDEBUG -#include "serd/serd.h" +#include "serd/memory.h" +#include "serd/node.h" +#include "serd/string_view.h" +#include "serd/uri.h" #include <assert.h> #include <stdbool.h> diff --git a/test/test_writer.c b/test/test_writer.c index 783bf3b0..0a823d2b 100644 --- a/test/test_writer.c +++ b/test/test_writer.c @@ -3,7 +3,14 @@ #undef NDEBUG -#include "serd/serd.h" +#include "serd/buffer.h" +#include "serd/env.h" +#include "serd/memory.h" +#include "serd/node.h" +#include "serd/statement.h" +#include "serd/status.h" +#include "serd/syntax.h" +#include "serd/writer.h" #include <assert.h> #include <stdint.h> |