diff options
-rw-r--r-- | serd/serd.h | 96 |
1 files changed, 58 insertions, 38 deletions
diff --git a/serd/serd.h b/serd/serd.h index a43acbca..04decb82 100644 --- a/serd/serd.h +++ b/serd/serd.h @@ -73,19 +73,19 @@ typedef struct SerdStatementImpl SerdStatement; /// The origin of a statement in a document typedef struct SerdCursorImpl SerdCursor; -/// Lexical environment for relative URIs or CURIEs (base URI and namespaces) +/// Lexical environment for abbreviating and expanding URIs typedef struct SerdEnvImpl SerdEnv; /// An indexed set of statements typedef struct SerdModelImpl SerdModel; -/// Model Iterator +/// An iterator that points to a statement in a model typedef struct SerdIterImpl SerdIter; -/// Model Range +/// A range of statements in a model typedef struct SerdRangeImpl SerdRange; -/// Streaming parser that reads a text stream and writes to a statement sink +/// Streaming parser that reads a text stream and writes to a sink typedef struct SerdReaderImpl SerdReader; /// Streaming serialiser that writes a text stream as statements are pushed @@ -136,7 +136,7 @@ typedef enum { /// Bitwise OR of SerdStatementFlag values typedef uint32_t SerdStatementFlags; -/// Flags that control style for a model serialisation +/// Flags that control the style of a model serialisation typedef enum { SERD_NO_INLINE_OBJECTS = 1 << 0 ///< Disable object inlining } SerdSerialisationFlag; @@ -145,13 +145,16 @@ typedef enum { typedef uint32_t SerdSerialisationFlags; /** - Type of a syntactic RDF node + Type of a node - This is more precise than the type of an abstract RDF node. An abstract - node is either a resource, literal, or blank. In syntax there are two ways - to refer to a resource (by URI or CURIE) and two ways to refer to a blank - (by ID or anonymously). Anonymous (inline) blank nodes are expressed using - SerdStatementFlags rather than this type. + An RDF node, in the abstract sense, can be either a resource, literal, or a + blank. This type is more precise, because syntactically there are two ways + to refer to a resource (by URI or CURIE). Serd also has support for + variables nodes to support some features, which are not RDF nodes. + + There are also two ways to refer to a blank node in syntax (by ID or + anonymously), but this is handled by statement flags rather than distinct + node types. */ typedef enum { /** @@ -209,7 +212,7 @@ typedef enum { /// Bitwise OR of SerdNodeFlag values typedef uint32_t SerdNodeFlags; -/// Field in a statement +/// Index of a statement in a field typedef enum { SERD_SUBJECT = 0, ///< Subject SERD_PREDICATE = 1, ///< Predicate ("key") @@ -217,7 +220,7 @@ typedef enum { SERD_GRAPH = 3 ///< Graph ("context") } SerdField; -/// Indexing option +/// Flags that control model storage and indexing typedef enum { SERD_INDEX_SPO = 1, ///< Subject, Predicate, Object SERD_INDEX_SOP = 1 << 1, ///< Subject, Object, Predicate @@ -263,7 +266,7 @@ typedef struct { SerdStringView fragment; ///< Fragment } SerdURI; -/// Reader options +/// Reader support options typedef enum { SERD_READ_LAX = 1 << 0, ///< Tolerate invalid input where possible SERD_READ_VARIABLES = 1 << 1, ///< Support variable nodes @@ -276,10 +279,9 @@ typedef uint32_t SerdReaderFlags; /** Writer style options - The style of the writer output can be controlled by ORing together - values from this enumeration. Note that some options are only supported - for some syntaxes (e.g. NTriples does not support abbreviation and is - always ASCII). + These flags allow more precise control of writer output style. Note that + some options are only supported for some syntaxes, for example, NTriples + does not support abbreviation and is always ASCII. */ typedef enum { SERD_WRITE_ASCII = 1 << 0, ///< Escape all non-ASCII characters @@ -1049,11 +1051,11 @@ typedef struct { /** An event in a data stream - Streams of data are represented as an ordered series of events. Events - represent everything that can occur in an RDF document, and are used to plumb - together different components. For example, when parsing a document, a - reader emits a stream of events which can be sent to a writer to serialise a - document, or to an inserter to build a model in memory. + Streams of data are represented as a series of events. Events represent + everything that can occur in an RDF document, and are used to plumb together + different components. For example, when parsing a document, a reader emits + a stream of events which can be sent to a writer to serialise a document, or + to an inserter to build a model in memory. */ typedef union { SerdEventType type; ///< Event type (always set) @@ -1246,7 +1248,13 @@ SERD_API SerdStatus serd_env_set_base_uri(SerdEnv* env, const SerdNode* uri); -/// Set a namespace prefix +/** + Set a namespace prefix + + A namespace prefix is used to expand CURIE nodes, for example, with the + prefix "xsd" set to "http://www.w3.org/2001/XMLSchema#", "xsd:decimal" will + expand to "http://www.w3.org/2001/XMLSchema#decimal". +*/ SERD_API SerdStatus serd_env_set_prefix(SerdEnv* env, const SerdNode* name, const SerdNode* uri); @@ -1261,7 +1269,8 @@ serd_env_set_prefix_from_strings(SerdEnv* env, /** Qualify `uri` into a CURIE if possible - Returns null if `node` can not be qualified. + Returns null if `uri` can not be qualified (usually because no corresponding + prefix is defined). */ SERD_API SerdNode* @@ -1270,7 +1279,9 @@ serd_env_qualify(const SerdEnv* env, const SerdNode* uri); /** Expand `node`, transforming CURIEs and URI references into absolute URIs. - If `node` is a literal, its datatype is expanded if necessary. + If `node` is a relative URI reference, it is expanded to a full URI if + possible. If `node` is a literal, its datatype is expanded if necessary. + If `node` is a CURIE, it is expanded to a full URI if possible. For simple nodes that do not require expansion, a copy is returned. Null is returned if `node` is/contains a CURIE or relative URI that can not be @@ -1406,11 +1417,11 @@ serd_reader_new(SerdWorld* world, /** Set a prefix to be added to all blank node identifiers - This is useful when multiple files are to be parsed into the same output - (e.g. a store, or other files). Since Serd preserves blank node IDs, this - could cause conflicts where two non-equivalent blank nodes are merged, - resulting in corrupt data. By setting a unique blank node prefix for each - parsed file, this can be avoided, while preserving blank node names. + This is useful when multiple files are to be parsed into the same output (a + model or a file). Since Serd preserves blank node IDs, this could cause + conflicts where two non-equivalent blank nodes are merged, resulting in + corrupt data. By setting a unique blank node prefix for each parsed file, + this can be avoided, while preserving blank node names. */ SERD_API void @@ -1447,8 +1458,7 @@ serd_reader_read_document(SerdReader* reader); /** Finish reading from the source - This will close the open file, if applicable, and ensure the reader has - processed all input. + This should be called before starting to read from another source. */ SERD_API SerdStatus @@ -1515,13 +1525,18 @@ SERD_API char* serd_buffer_sink_finish(SerdBuffer* stream); -/// Set a prefix to be removed from matching blank node identifiers +/** + Set a prefix to be removed from matching blank node identifiers + + This is the counterpart to serd_reader_add_blank_prefix() which can be used + to "undo" added prefixes. +*/ SERD_API void serd_writer_chop_blank_prefix(SerdWriter* writer, const char* prefix); /** - Set the current output base URI (and emit directive if applicable) + Set the current output base URI, and emit a directive if applicable Note this function can be safely casted to SerdBaseSink. */ @@ -1543,7 +1558,12 @@ SERD_API SerdStatus serd_writer_set_root_uri(SerdWriter* writer, const SerdNode* uri); -/// Finish a write +/** + Finish a write + + This flushes any pending output, for example terminating punctuation, so + that the output is a complete document. +*/ SERD_API SerdStatus serd_writer_finish(SerdWriter* writer); @@ -1912,8 +1932,8 @@ serd_statement_equals(const SerdStatement* a, const SerdStatement* b); /** Return true iff `statement` matches the given pattern - The matching rules are the same used for querying: nodes match if they are - equivalent, and NULL acts as a wildcard that matches any node. + Nodes match if they are equivalent, or if one of them is NULL. The + statement matches if every node matches. */ SERD_API bool |