aboutsummaryrefslogtreecommitdiffstats
path: root/doc
diff options
context:
space:
mode:
authorDavid Robillard <d@drobilla.net>2021-03-28 13:42:35 -0400
committerDavid Robillard <d@drobilla.net>2022-01-28 21:57:29 -0500
commitf8a59da9c492b7df38f53ba96505313e931d76cc (patch)
tree5bf1e44e67f8662894a37fbc84d770585f5957dd /doc
parentac0ac05ccf96dee4406db8bdd4d098d3de61c01f (diff)
downloadserd-f8a59da9c492b7df38f53ba96505313e931d76cc.tar.gz
serd-f8a59da9c492b7df38f53ba96505313e931d76cc.tar.bz2
serd-f8a59da9c492b7df38f53ba96505313e931d76cc.zip
Add high-level documentation
Diffstat (limited to 'doc')
-rw-r--r--doc/_static/meson.build11
-rw-r--r--doc/c/.clang-tidy12
-rw-r--r--doc/c/index.rst6
-rw-r--r--doc/c/meson.build53
-rw-r--r--doc/c/model.rst237
-rw-r--r--doc/c/nodes.rst66
-rw-r--r--doc/c/overview.rst87
-rw-r--r--doc/c/overview_code.c459
-rw-r--r--doc/c/reading_and_writing.rst149
-rw-r--r--doc/c/statements.rst123
-rw-r--r--doc/c/stream_processing.rst47
-rw-r--r--doc/c/string_views.rst58
-rw-r--r--doc/c/using_serd.rst15
-rw-r--r--doc/c/world.rst48
-rw-r--r--doc/command_line_tools.rst.in14
-rw-r--r--doc/conf.py.in13
-rw-r--r--doc/data_model.rst107
-rw-r--r--doc/getting_started.rst88
-rw-r--r--doc/summary.rst2
19 files changed, 1573 insertions, 22 deletions
diff --git a/doc/_static/meson.build b/doc/_static/meson.build
index 7d30dbac..cb212b1e 100644
--- a/doc/_static/meson.build
+++ b/doc/_static/meson.build
@@ -1 +1,10 @@
-configure_file(copy: true, input: '../../resources/serd.svg', output: 'serd.svg')
+resource_filenames = [
+ 'epubstyle.css',
+ 'model_pipeline.svg',
+ 'serd.svg',
+ 'writer_pipeline.svg',
+]
+
+foreach filename: resource_filenames
+ configure_file(copy: true, input: '../../resources/' + filename, output: filename)
+endforeach
diff --git a/doc/c/.clang-tidy b/doc/c/.clang-tidy
new file mode 100644
index 00000000..1772d682
--- /dev/null
+++ b/doc/c/.clang-tidy
@@ -0,0 +1,12 @@
+Checks: >
+ *,
+ -*-magic-numbers,
+ -*-uppercase-literal-suffix,
+ -altera-struct-pack-align,
+ -clang-analyzer-deadcode.DeadStores,
+ -clang-analyzer-nullability.NullablePassedToNonnull,
+ -hicpp-signed-bitwise,
+ -llvmlibc-*,
+WarningsAsErrors: '*'
+HeaderFilterRegex: '.*'
+FormatStyle: file
diff --git a/doc/c/index.rst b/doc/c/index.rst
index fe14fc3b..1df161cf 100644
--- a/doc/c/index.rst
+++ b/doc/c/index.rst
@@ -5,6 +5,10 @@ Serd
.. include:: summary.rst
.. toctree::
+ :numbered:
- overview
+ getting_started
+ data_model
+ command_line_tools
+ using_serd
api/serd
diff --git a/doc/c/meson.build b/doc/c/meson.build
index 4e044f97..02f5afc9 100644
--- a/doc/c/meson.build
+++ b/doc/c/meson.build
@@ -1,6 +1,20 @@
config = configuration_data()
config.set('SERD_VERSION', meson.project_version())
+if mandoc.found()
+ config.set('SERD_COMMAND_LINE_INDEX_ENTRY', '\n command_line_tools\n')
+ config.set('SERD_PIPE_LINK', '`serd-pipe <../../man/serd-pipe.html>`_')
+ config.set('SERD_SORT_LINK', '`serd-sort <../../man/serd-sort.html>`_')
+ config.set('SERD_FILTER_LINK', '`serd-filter <../../man/serd-filter.html>`_')
+ config.set('SERD_VALIDATE_LINK', '`serd-validate <../../man/serd-validate.html>`_')
+else
+ config.set('SERD_COMMAND_LINE_INDEX_ENTRY', '')
+ config.set('SERD_PIPE_LINK', '``serd-pipe``')
+ config.set('SERD_SORT_LINK', '``serd-sort``')
+ config.set('SERD_FILTER_LINK', '``serd-filter``')
+ config.set('SERD_VALIDATE_LINK', '``serd-validate``')
+endif
+
conf_py = configure_file(configuration: config,
input: files('../conf.py.in'),
output: 'conf.py')
@@ -9,11 +23,31 @@ configure_file(copy: true,
input: files('../summary.rst'),
output: 'summary.rst')
+configure_file(copy: true,
+ input: files('overview_code.c'),
+ output: 'overview_code.c')
+
+executable('overview_code', files('overview_code.c'), dependencies: [serd_dep])
+
c_rst_files = files(
+ '../data_model.rst',
+ '../getting_started.rst',
'index.rst',
+ 'model.rst',
+ 'nodes.rst',
'overview.rst',
+ 'reading_and_writing.rst',
+ 'statements.rst',
+ 'stream_processing.rst',
+ 'string_views.rst',
+ 'using_serd.rst',
+ 'world.rst',
)
+configure_file(configuration: config,
+ input: files('../command_line_tools.rst.in'),
+ output: 'command_line_tools.rst')
+
foreach f : c_rst_files
configure_file(copy: true, input: f, output: '@PLAINNAME@')
endforeach
@@ -22,10 +56,10 @@ subdir('xml')
subdir('api')
docs = custom_target(
- 'singlehtml documentation for serd',
+ 'singlehtml C documentation for serd',
command: [sphinx_build, '-M', 'singlehtml',
meson.current_build_dir(), meson.current_build_dir(),
- '-E', '-q', '-t', 'singlehtml'],
+ '-W', '-E', '-a', '-q', '-t', 'singlehtml'],
input: [c_rst_files, c_serd_rst, c_index_xml],
output: 'singlehtml',
build_by_default: true,
@@ -33,12 +67,23 @@ docs = custom_target(
install_dir: docdir / versioned_name)
docs = custom_target(
- 'html documentation for serd',
+ 'html C documentation for serd',
command: [sphinx_build, '-M', 'html',
meson.current_build_dir(), meson.current_build_dir(),
- '-E', '-q', '-t', 'html'],
+ '-W', '-E', '-a', '-q', '-t', 'html'],
input: [c_rst_files, c_serd_rst, c_index_xml],
output: 'html',
build_by_default: true,
install: true,
install_dir: docdir / versioned_name)
+
+docs = custom_target(
+ 'epub C documentation for serd',
+ command: [sphinx_build, '-M', 'epub',
+ meson.current_build_dir(), meson.current_build_dir(),
+ '-W', '-E', '-a', '-q', '-t', 'epub'],
+ input: [c_rst_files, c_serd_rst, c_index_xml],
+ output: 'epub',
+ build_by_default: true,
+ install: true,
+ install_dir: docdir / versioned_name)
diff --git a/doc/c/model.rst b/doc/c/model.rst
new file mode 100644
index 00000000..399f370b
--- /dev/null
+++ b/doc/c/model.rst
@@ -0,0 +1,237 @@
+Model
+=====
+
+.. default-domain:: c
+.. highlight:: c
+
+A :struct:`SerdModel` is an indexed set of statements.
+A model can be used to store any data set,
+from a few statements (for example, a protocol message),
+to an entire document,
+to a database with millions of statements.
+
+A new model can be created with :func:`serd_model_new`:
+
+.. literalinclude:: overview_code.c
+ :start-after: begin model-new
+ :end-before: end model-new
+ :dedent: 2
+
+The information to store for each statement can be controlled by passing flags.
+Additional indices can also be enabled with :func:`serd_model_add_index`.
+For example, to be able to quickly search by predicate,
+and store a cursor for each statement,
+the model can be constructed with the :enumerator:`SERD_STORE_CARETS` flag,
+and an additional :enumerator:`SERD_ORDER_PSO` index can be added like so:
+
+.. literalinclude:: overview_code.c
+ :start-after: begin fancy-model-new
+ :end-before: end fancy-model-new
+ :dedent: 2
+
+Accessors
+---------
+
+The flags set for a model can be accessed with :func:`serd_model_flags`.
+
+The number of statements can be accessed with :func:`serd_model_size` and :func:`serd_model_empty`:
+
+.. literalinclude:: overview_code.c
+ :start-after: begin model-size
+ :end-before: end model-size
+ :dedent: 2
+
+Adding Statements
+-----------------
+
+Statements can be added to a model with :func:`serd_model_add`:
+
+.. literalinclude:: overview_code.c
+ :start-after: begin model-add
+ :end-before: end model-add
+ :dedent: 2
+
+Alternatively, :func:`serd_model_insert` can be used if you already have a statement.
+For example, the first statement in one model could be added to another like so:
+
+.. literalinclude:: overview_code.c
+ :start-after: begin model-insert
+ :end-before: end model-insert
+ :dedent: 2
+
+An entire range of statements can be inserted at once with :func:`serd_model_insert_statements`.
+For example, all statements in one model could be copied into another like so:
+
+.. literalinclude:: overview_code.c
+ :start-after: begin model-add-range
+ :end-before: end model-add-range
+ :dedent: 2
+
+Iteration
+---------
+
+An iterator is a reference to a particular statement in a model.
+:func:`serd_model_begin` returns an iterator to the first statement in the model,
+and :func:`serd_model_end` returns a sentinel that is one past the last statement in the model:
+
+.. literalinclude:: overview_code.c
+ :start-after: begin model-begin-end
+ :end-before: end model-begin-end
+ :dedent: 2
+
+A cursor can be advanced to the next statement with :func:`serd_cursor_advance`,
+which returns :enumerator:`SERD_FAILURE` if the iterator reached the end:
+
+.. literalinclude:: overview_code.c
+ :start-after: begin iter-next
+ :end-before: end iter-next
+ :dedent: 2
+
+Iterators are dynamically allocated,
+and must eventually be destroyed with :func:`serd_cursor_free`:
+
+.. literalinclude:: overview_code.c
+ :start-after: begin iter-free
+ :end-before: end iter-free
+ :dedent: 2
+
+Pattern Matching
+----------------
+
+There are several functions that can be used to quickly find statements in the model that match a pattern.
+The simplest is :func:`serd_model_ask` which checks if there is any matching statement:
+
+.. literalinclude:: overview_code.c
+ :start-after: begin model-ask
+ :end-before: end model-ask
+ :dedent: 2
+
+To access the unknown fields,
+an iterator to the matching statement can be found with :func:`serd_model_find` instead:
+
+.. literalinclude:: overview_code.c
+ :start-after: begin model-find
+ :end-before: end model-find
+ :dedent: 2
+
+To iterate over the matching statements,
+the iterator returned by :func:`serd_model_find` can be advanced.
+It will reach its end when it reaches the last matching statement:
+
+.. literalinclude:: overview_code.c
+ :start-after: begin model-range
+ :end-before: end model-range
+ :dedent: 2
+
+
+Similar to :func:`serd_model_ask`,
+:func:`serd_model_count` can be used to count the number of matching statements:
+
+.. literalinclude:: overview_code.c
+ :start-after: begin model-count
+ :end-before: end model-count
+ :dedent: 2
+
+Indexing
+--------
+
+A model can contain several indices that use different orderings to support different kinds of queries.
+For good performance,
+there should be an index where the least significant fields in the ordering correspond to wildcards in the pattern
+(or, in other words, one where the most significant fields in the ordering correspond to nodes given in the pattern).
+The table below lists the indices that best support a kind of pattern,
+where a "?" represents a wildcard in the pattern.
+
++---------+--------------+
+| Pattern | Good Indices |
++=========+==============+
+| s p o | Any |
++---------+--------------+
+| s p ? | SPO, PSO |
++---------+--------------+
+| s ? o | SOP, OSP |
++---------+--------------+
+| s ? ? | SPO, SOP |
++---------+--------------+
+| ? p o | POS, OPS |
++---------+--------------+
+| ? p ? | POS, PSO |
++---------+--------------+
+| ? ? o | OSP, OPS |
++---------+--------------+
+| ? ? ? | Any |
++---------+--------------+
+
+If graphs are enabled,
+then statements are indexed both with and without the graph fields,
+so queries with and without a graph wildcard will have similar performance.
+
+Since indices take up space and slow down insertion,
+it is best to enable the fewest indices possible that cover the queries that will be performed.
+For example,
+an applications might enable just SPO and OPS order,
+because they always search for specific subjects or objects,
+but never for just a predicate without specifying any other field.
+
+Getting Values
+--------------
+
+Sometimes you are only interested in a single node,
+and it is cumbersome to first search for a statement and then get the node from it.
+A more convenient way is to use :func:`serd_model_get`.
+To get a value, specify a triple pattern where exactly one of the subject, predicate, and object is a wildcard.
+If a statement matches, then the node that "fills" the wildcard will be returned:
+
+.. literalinclude:: overview_code.c
+ :start-after: begin model-get
+ :end-before: end model-get
+ :dedent: 2
+
+If multiple statements match the pattern,
+then the matching node from an arbitrary statement is returned.
+It is an error to specify more than one wildcard, excluding the graph.
+
+The similar :func:`serd_model_get_statement` instead returns the matching statement:
+
+.. literalinclude:: overview_code.c
+ :start-after: begin model-get-statement
+ :end-before: end model-get-statement
+ :dedent: 2
+
+Erasing Statements
+------------------
+
+Individual statements can be erased with :func:`serd_model_erase`,
+which takes a cursor:
+
+.. literalinclude:: overview_code.c
+ :start-after: begin model-erase
+ :end-before: end model-erase
+ :dedent: 2
+
+The similar :func:`serd_model_erase_statements` will erase all statements in the cursor's range:
+
+.. literalinclude:: overview_code.c
+ :start-after: begin model-erase-range
+ :end-before: end model-erase-range
+ :dedent: 2
+
+Lifetime
+--------
+
+Models are value-like and can be copied with :func:`serd_model_copy` and compared with :func:`serd_model_equals`:
+
+.. literalinclude:: overview_code.c
+ :start-after: begin model-copy
+ :end-before: end model-copy
+ :dedent: 2
+
+When a model is no longer needed, it can be destroyed with :func:`serd_model_free`:
+
+.. literalinclude:: overview_code.c
+ :start-after: begin model-free
+ :end-before: end model-free
+ :dedent: 2
+
+Destroying a model invalidates all nodes and statements within that model,
+so care should be taken to ensure that no dangling pointers are created.
diff --git a/doc/c/nodes.rst b/doc/c/nodes.rst
new file mode 100644
index 00000000..c55dcedb
--- /dev/null
+++ b/doc/c/nodes.rst
@@ -0,0 +1,66 @@
+Nodes
+=====
+
+.. default-domain:: c
+.. highlight:: c
+
+Nodes are the basic building blocks of data.
+Nodes are essentially strings,
+but also have a :enum:`type <SerdNodeType>`,
+and optionally either a datatype or a language.
+
+In the abstract, a node is either a literal, a URI, or blank.
+Literals are essentially strings,
+but may have a datatype or a language tag.
+URIs are used to identify resources,
+as are blank nodes,
+except blank nodes only have labels with a limited scope and may be written anonymously.
+
+Serd also has a type for variable nodes,
+which are used for some features but not present in RDF data.
+
+Fundamental Constructors
+------------------------
+
+To allow the application to manage node memory,
+node constructors are provided that construct nodes in existing memory buffers.
+The universal constructor :func:`serd_node_construct` can construct any type of node,
+but is somewhat verbose and tricky to use.
+
+Several constructors for more specific types of node are also available:
+
+- :func:`serd_node_construct_token`
+- :func:`serd_node_construct_uri`
+- :func:`serd_node_construct_file_uri`
+- :func:`serd_node_construct_literal`
+- :func:`serd_node_construct_value`
+- :func:`serd_node_construct_decimal`
+- :func:`serd_node_construct_integer`
+- :func:`serd_node_construct_base64`
+
+If explicit memory management is not required,
+high-level constructors that allocate nodes on the heap can be used instead:
+
+- :func:`serd_new_token`
+- :func:`serd_new_uri`
+- :func:`serd_new_file_uri`
+- :func:`serd_new_literal`
+- :func:`serd_new_value`
+- :func:`serd_new_decimal`
+- :func:`serd_new_integer`
+- :func:`serd_new_base64`
+
+Accessors
+---------
+
+The basic attributes of a node can be accessed with :func:`serd_node_type`,
+:func:`serd_node_string`,
+and :func:`serd_node_length`.
+
+A measured view of the string can be accessed with :func:`serd_node_string_view`.
+This can be passed to functions that take a string view,
+to avoid redundant measurement of the node string.
+
+The datatype or language can be retrieved with :func:`serd_node_datatype` or :func:`serd_node_language`, respectively.
+Note that only literals can have a datatype or language,
+but never both at once.
diff --git a/doc/c/overview.rst b/doc/c/overview.rst
index 2b204155..296c9042 100644
--- a/doc/c/overview.rst
+++ b/doc/c/overview.rst
@@ -1,22 +1,83 @@
-########
Overview
-########
+========
.. default-domain:: c
.. highlight:: c
-The API revolves around two main types: the :doc:`api/serd_reader`,
-which reads text and fires callbacks,
-and the :doc:`api/serd_writer`,
-which writes text when driven by corresponding functions.
-Both work in a streaming fashion but still support pretty-printing,
-so the pair can be used to pretty-print, translate,
-or otherwise process arbitrarily large documents very quickly.
-The context of a stream is tracked by the :doc:`api/serd_env`,
-which stores the current base URI and set of namespace prefixes.
-
-The complete API is declared in ``serd.h``:
+The serd API is declared in ``serd.h``:
.. code-block:: c
#include <serd/serd.h>
+
+An instance of serd is represented by a :doc:`api/serd_world`,
+which manages "global" facilities like memory allocation and logging.
+The rest of the API can be broadly grouped into four categories:
+
+Data
+ A :doc:`api/serd_node` is the basic building block of data,
+ 3 or 4 nodes together make a :doc:`api/serd_statement`.
+ All data is expressed in statements.
+
+Streams
+ Components communicate by sending and receiving streams of data.
+ Data is streamed via :doc:`api/serd_sink`,
+ which is an abstract interface that receives :doc:`api/serd_event`.
+ The fundamental event is a statement event,
+ but there are a few additional event types that describe context which is useful for things like pretty-printing.
+
+ Some components both send and receive data,
+ which allow them to be inserted in a `pipeline` to process the data as it streams through.
+ For example,
+ a :doc:`api/serd_canon` converts literals to canonical form,
+ and a :doc:`api/serd_filter` filters statements that match (or do not match) some pattern.
+
+ An event stream describes changes to data and its context,
+ but does not store the context.
+ For that, an associated :doc:`api/serd_env` is maintained.
+ This stores the active base URI and namespace prefixes which can,
+ for example,
+ be used to write output with the same abbreviations used in the source.
+
+Reading and Writing
+ Reading and writing data is performed using a :doc:`api/serd_reader`,
+ which reads text and emits data to a sink,
+ and a :doc:`api/serd_writer`,
+ which is a sink that writes the incoming data as text.
+ Both work in a streaming fashion so that large documents can be pretty-printed,
+ translated,
+ or otherwise processed quickly using only a small amount of memory.
+
+Storage
+ A set of statements can be stored in memory as a :doc:`api/serd_model`.
+ This supports quickly searching and scanning statements,
+ provided an appropriate index is enabled.
+
+ Data can be loaded into a model via an :doc:`api/serd_inserter`,
+ which is a sink that inserts incoming statements into a model.
+ Data in a model can be written out by calling :func:`serd_describe_range` on the desired range of statements.
+
+The sink interface acts as a generic connection which can be used to build custom data processing pipelines.
+For example,
+a simple pipeline to read a document, filter out some statements, and write the result to a new file,
+would look something like:
+
+.. image:: ../_static/writer_pipeline.svg
+
+Here, dotted arrows represent event streams,
+and solid arrows represent explicit use of a component.
+In other words, dotted arrows represent connections via the abstract :doc:`api/serd_sink` interface.
+In this case both reader and writer are using the same environment,
+so the output document will have the same abbreviations as the input.
+It is also possible to use different environments,
+for example to set additional namespace prefixes to further abbreviate the document.
+
+Similarly, a document could be loaded into a model with canonical literals using a pipeline like:
+
+.. image:: ../_static/model_pipeline.svg
+
+Many other useful pipelines can be built using the components in serd,
+and applications can implement custom ones to add additional functionality.
+
+The following documentation gives a more detailed bottom-up introduction to the API,
+with links to the complete reference where further detail can be found.
diff --git a/doc/c/overview_code.c b/doc/c/overview_code.c
new file mode 100644
index 00000000..0b7b5600
--- /dev/null
+++ b/doc/c/overview_code.c
@@ -0,0 +1,459 @@
+/*
+ Copyright 2021 David Robillard <d@drobilla.net>
+
+ Permission to use, copy, modify, and/or distribute this software for any
+ purpose with or without fee is hereby granted, provided that the above
+ copyright notice and this permission notice appear in all copies.
+
+ THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+*/
+
+/*
+ Example code that is included in the documentation. Code in the
+ documentation is included from here rather than written inline so that it can
+ be tested and avoid rotting. The code here doesn't make much sense, but is
+ written such that it at least compiles and will run without crashing.
+*/
+
+#include "serd/serd.h"
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stdio.h>
+
+#if defined(__GNUC__)
+# pragma GCC diagnostic push
+# pragma GCC diagnostic ignored "-Wunused-variable"
+#endif
+
+static void
+string_views(void)
+{
+ static const char* const string_pointer = "some string";
+
+ // begin make-empty-string
+ SerdStringView empty = SERD_EMPTY_STRING();
+ // end make-empty-string
+
+ // begin make-static-string
+ SerdStringView hello = SERD_STRING("hello");
+ // end make-static-string
+
+ // begin measure-string
+ SerdStringView view = SERD_STRING(string_pointer);
+ // end measure-string
+
+ // begin make-string-view
+ SerdStringView slice = SERD_SUBSTRING(string_pointer, 4);
+ // end make-string-view
+}
+
+static void
+statements(void)
+{
+ SerdNodes* nodes = serd_nodes_new(NULL);
+
+ // begin statement-new
+ SerdStatement* statement = serd_statement_new(
+ NULL,
+ serd_nodes_uri(nodes, SERD_STRING("http://example.org/drobilla")),
+ serd_nodes_uri(nodes, SERD_STRING("http://example.org/firstName")),
+ serd_nodes_string(nodes, SERD_STRING("David")),
+ NULL,
+ NULL);
+ // end statement-new
+
+ serd_statement_free(NULL, statement);
+ serd_nodes_free(nodes);
+}
+
+static void
+statements_accessing_fields(void)
+{
+ SerdNode* ss = serd_new_uri(NULL, SERD_STRING("http://example.org/s"));
+ SerdNode* sp = serd_new_uri(NULL, SERD_STRING("http://example.org/p"));
+ SerdNode* so = serd_new_uri(NULL, SERD_STRING("http://example.org/o"));
+
+ SerdStatement* statement = serd_statement_new(NULL, ss, sp, so, NULL, NULL);
+
+ // begin get-subject
+ const SerdNode* s = serd_statement_node(statement, SERD_SUBJECT);
+ // end get-subject
+
+ // begin get-pog
+ const SerdNode* p = serd_statement_predicate(statement);
+ const SerdNode* o = serd_statement_object(statement);
+ const SerdNode* g = serd_statement_graph(statement);
+ // end get-pog
+
+ // begin get-caret
+ const SerdCaret* c = serd_statement_caret(statement);
+ // end get-caret
+}
+
+static void
+statements_comparison(void)
+{
+ SerdNode* ss = serd_new_uri(NULL, SERD_STRING("http://example.org/s"));
+ SerdNode* sp = serd_new_uri(NULL, SERD_STRING("http://example.org/p"));
+ SerdNode* so = serd_new_uri(NULL, SERD_STRING("http://example.org/o"));
+
+ SerdStatement* statement1 = serd_statement_new(NULL, ss, sp, so, NULL, NULL);
+ SerdStatement* statement2 = serd_statement_new(NULL, ss, sp, so, NULL, NULL);
+
+ // begin statement-equals
+ if (serd_statement_equals(statement1, statement2)) {
+ printf("Match\n");
+ }
+ // end statement-equals
+
+ SerdStatement* statement = statement1;
+
+ // begin statement-matches
+ SerdNode* eg_name =
+ serd_new_uri(NULL, SERD_STRING("http://example.org/name"));
+
+ if (serd_statement_matches(statement, NULL, eg_name, NULL, NULL)) {
+ printf("%s has name %s\n",
+ serd_node_string(serd_statement_subject(statement)),
+ serd_node_string(serd_statement_object(statement)));
+ }
+ // end statement-matches
+}
+
+static void
+statements_lifetime(void)
+{
+ SerdStatement* statement = NULL;
+
+ // begin statement-copy
+ SerdStatement* copy = serd_statement_copy(NULL, statement);
+ // end statement-copy
+
+ // begin statement-free
+ serd_statement_free(NULL, copy);
+ // end statement-free
+}
+
+static void
+world(void)
+{
+ // begin world-new
+ SerdWorld* world = serd_world_new(NULL);
+ // end world-new
+
+ // begin get-blank
+ const SerdNode* world_blank = serd_world_get_blank(world);
+ SerdNode* my_blank = serd_node_copy(NULL, world_blank);
+ // end get-blank
+}
+
+static void
+model(void)
+{
+ SerdWorld* world = serd_world_new(NULL);
+
+ // begin model-new
+ SerdModel* model = serd_model_new(world, SERD_ORDER_SPO, 0u);
+ // end model-new
+
+ // begin fancy-model-new
+ SerdModel* fancy_model =
+ serd_model_new(world, SERD_ORDER_SPO, SERD_STORE_CARETS);
+
+ serd_model_add_index(fancy_model, SERD_ORDER_PSO);
+ // end fancy-model-new
+
+ // begin model-copy
+ SerdModel* copy = serd_model_copy(NULL, model);
+
+ assert(serd_model_equals(copy, model));
+ // end model-copy
+
+ // begin model-size
+ if (serd_model_empty(model)) {
+ printf("Model is empty\n");
+ } else if (serd_model_size(model) > 1000) {
+ printf("Model has over 1000 statements\n");
+ }
+ // end model-size
+
+ // begin model-free
+ serd_model_free(copy);
+ // end model-free
+
+ // begin model-add
+ SerdNodes* nodes = serd_nodes_new(NULL);
+
+ serd_model_add(
+ model,
+ serd_nodes_uri(nodes, SERD_STRING("http://example.org/thing")), // S
+ serd_nodes_uri(nodes, SERD_STRING("http://example.org/name")), // P
+ serd_nodes_string(nodes, SERD_STRING("Thing")), // O
+ NULL); // G
+ // end model-add
+
+ SerdModel* other_model = model;
+
+ // begin model-insert
+ const SerdCursor* cursor = serd_model_begin(other_model);
+
+ serd_model_insert(model, serd_cursor_get(cursor));
+ // end model-insert
+
+ // begin model-add-range
+ SerdCursor* other_range = serd_model_begin(other_model);
+
+ serd_model_insert_statements(model, other_range);
+
+ serd_cursor_free(other_range);
+ // end model-add-range
+
+ // begin model-begin-end
+ SerdCursor* i = serd_model_begin(model);
+ if (serd_cursor_equals(i, serd_model_end(model))) {
+ printf("Model is empty\n");
+ } else {
+ const SerdStatement* s = serd_cursor_get(i);
+
+ printf("First statement subject: %s\n",
+ serd_node_string(serd_statement_subject(s)));
+ }
+ // end model-begin-end
+
+ // begin iter-next
+ if (!serd_cursor_advance(i)) {
+ const SerdStatement* s = serd_cursor_get(i);
+
+ printf("Second statement subject: %s\n",
+ serd_node_string(serd_statement_subject(s)));
+ }
+ // end iter-next
+
+ // begin iter-free
+ serd_cursor_free(i);
+ // end iter-free
+
+ // begin model-all
+ SerdCursor* all = serd_model_begin(model);
+ // end model-all
+
+ // begin range-next
+ if (serd_cursor_is_end(all)) {
+ printf("Model is empty\n");
+ } else {
+ const SerdStatement* s = serd_cursor_get(all);
+
+ printf("First statement subject: %s\n",
+ serd_node_string(serd_statement_subject(s)));
+ }
+
+ if (!serd_cursor_advance(all)) {
+ const SerdStatement* s = serd_cursor_get(all);
+
+ printf("Second statement subject: %s\n",
+ serd_node_string(serd_statement_subject(s)));
+ }
+ // end range-next
+
+ // begin model-ask
+ const SerdNode* rdf_type = serd_nodes_uri(
+ nodes, SERD_STRING("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"));
+
+ if (serd_model_ask(model, NULL, rdf_type, NULL, NULL)) {
+ printf("Model contains a type statement\n");
+ }
+ // end model-ask
+
+ // Add a statement so that the searching examples below work
+ SerdNode* inst = serd_new_uri(NULL, SERD_STRING("http://example.org/i"));
+ SerdNode* type = serd_new_uri(NULL, SERD_STRING("http://example.org/T"));
+ serd_model_add(model, inst, rdf_type, type, NULL);
+
+ // begin model-find
+ SerdCursor* it = serd_model_find(model, NULL, rdf_type, NULL, NULL);
+
+ const SerdStatement* statement = serd_cursor_get(it);
+ const SerdNode* instance =
+ statement ? serd_statement_subject(statement) : NULL;
+ // end model-find
+
+ // begin model-count
+ size_t n = serd_model_count(model, instance, rdf_type, NULL, NULL);
+ printf("Instance has %zu types\n", n);
+ // end model-count
+
+ // begin model-range
+ SerdCursor* range = serd_model_find(model,
+ instance, // Subject = instance
+ rdf_type, // Predicate = rdf:type
+ NULL, // Object = anything
+ NULL); // Graph = anything
+
+ for (; !serd_cursor_is_end(range); serd_cursor_advance(range)) {
+ const SerdStatement* s = serd_cursor_get(range);
+
+ printf("Instance has type %s\n",
+ serd_node_string(serd_statement_object(s)));
+ }
+
+ serd_cursor_free(range);
+ // end model-range
+
+ // begin model-get
+ const SerdNode* t = serd_model_get(model,
+ instance, // Subject
+ rdf_type, // Predicate
+ NULL, // Object
+ NULL); // Graph
+ if (t) {
+ printf("Instance has type %s\n", serd_node_string(t));
+ }
+ // end model-get
+
+ // begin model-get-statement
+ const SerdStatement* ts =
+ serd_model_get_statement(model, instance, rdf_type, NULL, NULL);
+
+ if (ts) {
+ printf("Instance %s has type %s\n",
+ serd_node_string(serd_statement_subject(ts)),
+ serd_node_string(serd_statement_object(ts)));
+ }
+ // end model-get-statement
+
+ // begin model-erase
+ SerdCursor* some_type = serd_model_find(model, NULL, rdf_type, NULL, NULL);
+ serd_model_erase(model, some_type);
+ serd_cursor_free(some_type);
+ // end model-erase
+
+ // begin model-erase-range
+ SerdCursor* all_types = serd_model_find(model, NULL, rdf_type, NULL, NULL);
+ serd_model_erase_statements(model, all_types);
+ serd_cursor_free(all_types);
+ // end model-erase-range
+}
+
+static void
+reading_writing(void)
+{
+ SerdWorld* world = serd_world_new(NULL);
+
+ // begin env-new
+ SerdStringView host = SERD_EMPTY_STRING();
+ SerdStringView path = SERD_STRING("/some/file.ttl");
+ SerdNode* base = serd_new_file_uri(NULL, path, host);
+ SerdEnv* env = serd_env_new(world, serd_node_string_view(base));
+ // end env-new
+
+ // begin env-set-prefix
+ serd_env_set_prefix(
+ env,
+ SERD_STRING("rdf"),
+ SERD_STRING("http://www.w3.org/1999/02/22-rdf-syntax-ns#"));
+ // end env-set-prefix
+
+ // begin byte-sink-new
+ SerdOutputStream out = serd_open_output_file("/tmp/eg.ttl");
+ // end byte-sink-new
+
+ // clang-format off
+ // begin writer-new
+ SerdWriter* writer = serd_writer_new(
+ world, // World
+ SERD_TURTLE, // Syntax
+ 0, // Writer flags
+ env, // Environment
+ &out, // Output stream
+ 4096); // Block size
+ // end writer-new
+
+ // begin reader-new
+ SerdReader* reader = serd_reader_new(
+ world, // World
+ SERD_TURTLE, // Syntax
+ 0, // Reader flags
+ env, // Environment
+ serd_writer_sink(writer), // Target sink
+ 4096); // Block size
+ // end reader-new
+
+ // clang-format on
+
+ // begin read-document
+ SerdStatus st = serd_reader_read_document(reader);
+ if (st) {
+ printf("Error reading document: %s\n", serd_strerror(st));
+ }
+ // end read-document
+
+ // begin reader-writer-free
+ serd_reader_free(reader);
+ serd_writer_free(writer);
+ // end reader-writer-free
+
+ // begin byte-sink-free
+ serd_close_output(&out);
+ // end byte-sink-free
+
+ // begin inserter-new
+ SerdModel* model = serd_model_new(world, SERD_ORDER_SPO, 0u);
+ SerdSink* inserter = serd_inserter_new(model, NULL);
+ // end inserter-new
+
+ // begin model-reader-new
+ SerdReader* const model_reader =
+ serd_reader_new(world, SERD_TURTLE, 0, env, inserter, 4096);
+
+ st = serd_reader_read_document(model_reader);
+ if (st) {
+ printf("Error loading model: %s\n", serd_strerror(st));
+ }
+ // end model-reader-new
+
+ // begin write-range
+ serd_describe_range(serd_model_begin(model), serd_writer_sink(writer), 0);
+ // end write-range
+
+ // begin canon-new
+ SerdSink* canon = serd_canon_new(world, inserter, 0);
+ // end canon-new
+
+ SerdNode* rdf_type = NULL;
+
+ // begin filter-new
+ SerdSink* filter = serd_filter_new(world, // World
+ inserter, // Target
+ NULL, // Subject
+ rdf_type, // Predicate
+ NULL, // Object
+ NULL, // Graph
+ true); // Inclusive
+ // end filter-new
+}
+
+int
+main(void)
+{
+ string_views();
+ statements();
+ statements_accessing_fields();
+ statements_comparison();
+ statements_lifetime();
+ world();
+ model();
+ reading_writing();
+
+ return 0;
+}
+
+#if defined(__GNUC__)
+# pragma GCC diagnostic pop
+#endif
diff --git a/doc/c/reading_and_writing.rst b/doc/c/reading_and_writing.rst
new file mode 100644
index 00000000..1180d03d
--- /dev/null
+++ b/doc/c/reading_and_writing.rst
@@ -0,0 +1,149 @@
+Reading and Writing
+===================
+
+.. default-domain:: c
+.. highlight:: c
+
+Reading and writing documents in a textual syntax is handled by the :struct:`SerdReader` and :struct:`SerdWriter`, respectively.
+Serd is designed around a concept of event streams,
+so the reader or writer can be at the beginning or end of a "pipeline" of stream processors.
+This allows large documents to be processed quickly in an "online" fashion,
+while requiring only a small constant amount of memory.
+If you are familiar with XML,
+this is roughly analogous to SAX.
+
+A common simple setup is to simply connect a reader directly to a writer.
+This can be used for things like pretty-printing,
+or converting a document from one syntax to another.
+This can be done by passing the sink returned by :func:`serd_writer_sink` to the reader constructor, :func:`serd_reader_new`.
+
+First,
+in order to write a document,
+an environment needs to be created.
+This defines the base URI and any namespace prefixes,
+which is used to resolve any relative URIs or prefixed names,
+and may be used to abbreviate the output.
+In most cases, the base URI should simply be the URI of the file being written.
+For example:
+
+.. literalinclude:: overview_code.c
+ :start-after: begin env-new
+ :end-before: end env-new
+ :dedent: 2
+
+Namespace prefixes can also be defined for any vocabularies used:
+
+.. literalinclude:: overview_code.c
+ :start-after: begin env-set-prefix
+ :end-before: end env-set-prefix
+ :dedent: 2
+
+We now have an environment set up for our document,
+but still need to specify where to write it.
+This is done by creating a :struct:`SerdOutputStream`,
+which is a generic interface that can be set up to write to a file,
+a buffer in memory,
+or a custom function that can be used to write output anywhere.
+In this case, we will write to the file we set up as the base URI:
+
+.. literalinclude:: overview_code.c
+ :start-after: begin byte-sink-new
+ :end-before: end byte-sink-new
+ :dedent: 2
+
+The second argument is the page size in bytes,
+so I/O will be performed in chunks for better performance.
+The value used here, 4096, is a typical filesystem block size that should perform well on most machines.
+
+With an environment and byte sink ready,
+the writer can now be created:
+
+.. literalinclude:: overview_code.c
+ :start-after: begin writer-new
+ :end-before: end writer-new
+ :dedent: 2
+
+Output is written by feeding statements and other events to the sink returned by :func:`serd_writer_sink`.
+:struct:`SerdSink` is the generic interface for anything that can consume data streams.
+Many objects provide the same interface to do various things with the data,
+but in this case we will send data directly to the writer:
+
+.. literalinclude:: overview_code.c
+ :start-after: begin reader-new
+ :end-before: end reader-new
+ :dedent: 2
+
+The third argument of :func:`serd_reader_new` takes a bitwise ``OR`` of :enum:`SerdReaderFlag` flags that can be used to configure the reader.
+In this case only :enumerator:`SERD_READ_LAX` is given,
+which tolerates some invalid input without halting on an error,
+but others can be included.
+For example, passing ``SERD_READ_LAX | SERD_READ_RELATIVE`` would enable lax mode and preserve relative URIs in the input.
+
+Now that we have a reader that is set up to directly push its output to a writer,
+we can finally process the document:
+
+.. literalinclude:: overview_code.c
+ :start-after: begin read-document
+ :end-before: end read-document
+ :dedent: 2
+
+Alternatively, one "chunk" of input can be read at a time with :func:`serd_reader_read_chunk`.
+A "chunk" is generally one top-level description of a resource,
+including any anonymous blank nodes in its description,
+but this depends on the syntax and the structure of the document being read.
+
+The reader pushes events to its sink as input is read,
+so in this scenario the data should now have been re-written by the writer
+(assuming no error occurred).
+To finish and ensure that a complete document has been read and written,
+:func:`serd_reader_finish` can be called followed by :func:`serd_writer_finish`.
+However these will be automatically called on destruction if necessary,
+so if the reader and writer are no longer required they can simply be destroyed:
+
+.. literalinclude:: overview_code.c
+ :start-after: begin reader-writer-free
+ :end-before: end reader-writer-free
+ :dedent: 2
+
+Note that it is important to free the reader first in this case,
+since finishing the read may push events to the writer.
+Finally, closing the output with :func:`serd_close_output` will flush and close the output file,
+so it is ready to be read again later.
+
+.. literalinclude:: overview_code.c
+ :start-after: begin byte-sink-free
+ :end-before: end byte-sink-free
+ :dedent: 2
+
+Reading into a Model
+--------------------
+
+A document can be loaded into a model by setting up a reader that pushes data to a model "inserter" rather than a writer:
+
+.. literalinclude:: overview_code.c
+ :start-after: begin inserter-new
+ :end-before: end inserter-new
+ :dedent: 2
+
+The process of reading the document is the same as above,
+only the sink is different:
+
+.. literalinclude:: overview_code.c
+ :start-after: begin model-reader-new
+ :end-before: end model-reader-new
+ :dedent: 2
+
+Writing a Model
+---------------
+
+A model, or parts of a model, can be written by writing the desired range with :func:`serd_describe_range`:
+
+.. literalinclude:: overview_code.c
+ :start-after: begin write-range
+ :end-before: end write-range
+ :dedent: 2
+
+By default,
+this writes the range in chunks suited to pretty-printing with anonymous blank nodes (like "[ ... ]" in Turtle or TriG).
+Any rdf:type properties (written "a" in Turtle or TriG) will be written before any other properties of their subject.
+This can be disabled by passing the flag :enumerator:`SERD_NO_TYPE_FIRST`.
diff --git a/doc/c/statements.rst b/doc/c/statements.rst
new file mode 100644
index 00000000..da7b8a03
--- /dev/null
+++ b/doc/c/statements.rst
@@ -0,0 +1,123 @@
+Statements
+==========
+
+.. default-domain:: c
+.. highlight:: c
+
+A :struct:`SerdStatement` is a tuple of either 3 or 4 nodes:
+the `subject`, `predicate`, `object`, and optional `graph`.
+Statements declare that a subject has some property.
+The predicate identifies the property,
+and the object is its value.
+
+A statement can be thought of as a very simple machine-readable sentence.
+The subject and object are as in natural language,
+and the predicate is something like a verb, but more general.
+For example, we could make a statement in English about your intrepid author:
+
+ drobilla has the first name "David"
+
+We can break this statement into 3 pieces like so:
+
+.. list-table::
+ :header-rows: 1
+
+ * - Subject
+ - Predicate
+ - Object
+ * - drobilla
+ - has the first name
+ - "David"
+
+To make a :struct:`SerdStatement` out of this, we need to define some URIs.
+In RDF, the subject and predicate must be *resources* with an identifier
+(for example, neither can be a string).
+Conventionally, predicate names do not start with "has" or similar words,
+since that would be redundant in this context.
+So, we assume that ``http://example.org/drobilla`` is the URI for drobilla,
+and that ``http://example.org/firstName`` has been defined somewhere to be
+a property with the appropriate meaning,
+and can make an equivalent :struct:`SerdStatement`:
+
+.. literalinclude:: overview_code.c
+ :start-after: begin statement-new
+ :end-before: end statement-new
+ :dedent: 2
+
+The last two fields are the graph and the cursor.
+The graph is another node that can be used to group statements,
+for example by the URI of the document they were loaded from.
+The cursor represents the location in a document where the statement was loaded from, if applicable.
+
+Accessing Fields
+----------------
+
+Statement fields can be accessed with
+:func:`serd_statement_node`, for example:
+
+.. literalinclude:: overview_code.c
+ :start-after: begin get-subject
+ :end-before: end get-subject
+ :dedent: 2
+
+Alternatively, an accessor function is provided for each field:
+
+.. literalinclude:: overview_code.c
+ :start-after: begin get-pog
+ :end-before: end get-pog
+ :dedent: 2
+
+Every statement has a subject, predicate, and object,
+but the graph may be null.
+The cursor may also be null (as it would be in this case),
+but if available it can be accessed with :func:`serd_statement_caret`:
+
+.. literalinclude:: overview_code.c
+ :start-after: begin get-caret
+ :end-before: end get-caret
+ :dedent: 2
+
+Comparison
+----------
+
+Two statements can be compared with :func:`serd_statement_equals`:
+
+.. literalinclude:: overview_code.c
+ :start-after: begin statement-equals
+ :end-before: end statement-equals
+ :dedent: 2
+
+Statements are equal if all four corresponding pairs of nodes are equal.
+The cursor is considered metadata, and is ignored for comparison.
+
+It is also possible to match statements against a pattern using ``NULL`` as a wildcard,
+with :func:`serd_statement_matches`:
+
+.. literalinclude:: overview_code.c
+ :start-after: begin statement-matches
+ :end-before: end statement-matches
+ :dedent: 2
+
+Lifetime
+--------
+
+A statement only contains const references to nodes,
+it does not own nodes or manage their lifetimes internally.
+The cursor, however, is owned by the statement.
+A statement can be copied with :func:`serd_statement_copy`:
+
+.. literalinclude:: overview_code.c
+ :start-after: begin statement-copy
+ :end-before: end statement-copy
+ :dedent: 2
+
+The copied statement will refer to exactly the same nodes,
+though the cursor will be deep copied.
+
+In most cases, statements come from a reader or model which manages them internally,
+but a statement owned by the application must be freed with :func:`serd_statement_free`:
+
+.. literalinclude:: overview_code.c
+ :start-after: begin statement-free
+ :end-before: end statement-free
+ :dedent: 2
diff --git a/doc/c/stream_processing.rst b/doc/c/stream_processing.rst
new file mode 100644
index 00000000..0b3f126f
--- /dev/null
+++ b/doc/c/stream_processing.rst
@@ -0,0 +1,47 @@
+Stream Processing
+=================
+
+.. default-domain:: c
+.. highlight:: c
+
+The above examples show how a document can be either written to a file or loaded into a model,
+simply by changing the sink that the data is written to.
+There are also sinks that filter or transform the data before passing it on to another sink,
+which can be used to build more advanced pipelines with several processing stages.
+
+Canonical Literals
+------------------
+
+A `canon` is a stream processor that converts literals with supported XSD datatypes into canonical form.
+For example, this will rewrite an xsd:decimal literal like ".10" as "0.1".
+A canon is created with :func:`serd_canon_new`,
+which needs to be passed the "target" sink that the transformed statements should be written to,
+for example:
+
+.. literalinclude:: overview_code.c
+ :start-after: begin canon-new
+ :end-before: end canon-new
+ :dedent: 2
+
+The last argument is a bitwise ``OR`` of :enum:`SerdCanonFlag` flags.
+For example, :enumerator:`SERD_CANON_LAX` will tolerate and pass through invalid literals,
+which can be useful for cleaning up questionabe data as much as possible without losing any information.
+
+Filtering Statements
+--------------------
+
+A `filter` is a stream processor that filters statements based on a pattern.
+It can be configured in either inclusive or exclusive mode,
+which passes through only statements that match or don't match the pattern,
+respectively.
+A filter is created with :func:`serd_filter_new`,
+which takes a target, pattern, and inclusive flag.
+For example, all statements with predicate ``rdf:type`` could be filtered out when loading a model:
+
+.. literalinclude:: overview_code.c
+ :start-after: begin filter-new
+ :end-before: end filter-new
+ :dedent: 2
+
+If ``false`` is passed for the last parameter instead,
+then the filter operates in exclusive mode and will instead insert only statements with predicate ``rdf:type``.
diff --git a/doc/c/string_views.rst b/doc/c/string_views.rst
new file mode 100644
index 00000000..2ae7d29b
--- /dev/null
+++ b/doc/c/string_views.rst
@@ -0,0 +1,58 @@
+String Views
+============
+
+.. default-domain:: c
+.. highlight:: c
+
+For performance reasons,
+most functions in serd that take a string take a :struct:`SerdStringView`,
+rather than a bare pointer.
+This forces code to be explicit about string measurement,
+which discourages common patterns of repeated measurement of the same string.
+For convenience, several macros are provided for constructing string views:
+
+:macro:`SERD_EMPTY_STRING`
+
+ Constructs a view of an empty string, for example:
+
+ .. literalinclude:: overview_code.c
+ :start-after: begin make-empty-string
+ :end-before: end make-empty-string
+ :dedent: 2
+
+:macro:`SERD_STRING`
+
+ Constructs a view of an entire string or string literal, for example:
+
+ .. literalinclude:: overview_code.c
+ :start-after: begin make-static-string
+ :end-before: end make-static-string
+ :dedent: 2
+
+ or:
+
+ .. literalinclude:: overview_code.c
+ :start-after: begin measure-string
+ :end-before: end measure-string
+ :dedent: 2
+
+ This macro calls ``strlen`` to measure the string.
+ Modern compilers will optimise this away if the parameter is a string literal.
+
+:macro:`SERD_SUBSTRING`
+
+ Constructs a view of a slice of a string with an explicit length,
+ for example:
+
+ .. literalinclude:: overview_code.c
+ :start-after: begin make-string-view
+ :end-before: end make-string-view
+ :dedent: 2
+
+ This macro can also be used to create a view of a pre-measured string.
+ If the length a dynamic string is already known,
+ it is faster to use this than :macro:`SERD_STRING`.
+
+These macros can be used inline when passing parameters,
+but if the same dynamic string is used several times,
+it is better to make a string view variable to avoid redundant measurement.
diff --git a/doc/c/using_serd.rst b/doc/c/using_serd.rst
new file mode 100644
index 00000000..cfe57c4c
--- /dev/null
+++ b/doc/c/using_serd.rst
@@ -0,0 +1,15 @@
+##########
+Using Serd
+##########
+
+.. toctree::
+
+ overview
+ string_views
+ nodes
+ statements
+ world
+ model
+ reading_and_writing
+ stream_processing
+
diff --git a/doc/c/world.rst b/doc/c/world.rst
new file mode 100644
index 00000000..31cbe16b
--- /dev/null
+++ b/doc/c/world.rst
@@ -0,0 +1,48 @@
+World
+=====
+
+.. default-domain:: c
+.. highlight:: c
+
+So far, we have only used nodes and statements,
+which are simple independent objects.
+Higher-level facilities in Serd require a :struct:`SerdWorld`,
+which represents the global library state.
+
+A program typically uses just one world,
+which can be constructed using :func:`serd_world_new`:
+
+.. literalinclude:: overview_code.c
+ :start-after: begin world-new
+ :end-before: end world-new
+ :dedent: 2
+
+All "global" library state is handled explicitly via the world.
+Serd does not contain any static mutable data,
+allowing it to be used concurrently in several parts of a program,
+for example in plugins.
+
+If multiple worlds *are* used in a single program,
+they must never be mixed:
+objects "inside" one world can not be used with objects inside another.
+
+Note that the world is not a database,
+it only manages a small amount of library state for things like configuration and logging.
+
+Generating Blanks
+-----------------
+
+Blank nodes, or simply "blanks",
+are used for resources that do not have URIs.
+Unlike URIs, they are not global identifiers,
+and only have meaning within their local context (for example, a document).
+The world provides a method for automatically generating unique blank identifiers:
+
+.. literalinclude:: overview_code.c
+ :start-after: begin get-blank
+ :end-before: end get-blank
+ :dedent: 2
+
+Note that the returned pointer is to a node that will be updated on the next call to :func:`serd_world_get_blank`,
+so it is usually best to copy the node,
+like in the example above.
diff --git a/doc/command_line_tools.rst.in b/doc/command_line_tools.rst.in
new file mode 100644
index 00000000..078e9c4d
--- /dev/null
+++ b/doc/command_line_tools.rst.in
@@ -0,0 +1,14 @@
+##################
+Command-Line Tools
+##################
+
+Serd includes several tools that can be used to process data on the command-line.
+Each is documented by their own man page:
+
+ * @SERD_PIPE_LINK@ is a streaming tool for reading and writing documents.
+
+ * @SERD_SORT_LINK@ is similar to serd-pipe, but loads data into an in-memory model instead of streaming.
+
+ * @SERD_FILTER_LINK@ is a ``grep``-like statement filtering tool.
+
+ * @SERD_VALIDATE_LINK@ validates data and prints warnings where data is invalid according to the schemas it uses.
diff --git a/doc/conf.py.in b/doc/conf.py.in
index bc64c9f3..cfb67183 100644
--- a/doc/conf.py.in
+++ b/doc/conf.py.in
@@ -4,6 +4,7 @@ project = "Serd"
copyright = "2021, David Robillard"
author = "David Robillard"
release = "@SERD_VERSION@"
+version = "@SERD_VERSION@"
# General configuration
@@ -25,11 +26,9 @@ _opaque = [
"SerdCaretImpl",
"SerdCursorImpl",
"SerdEnvImpl",
- "SerdIterImpl",
"SerdModelImpl",
"SerdNodeImpl",
"SerdNodesImpl",
- "SerdRangeImpl",
"SerdReaderImpl",
"SerdSinkImpl",
"SerdStatementImpl",
@@ -54,6 +53,7 @@ nitpick_ignore = list(map(lambda x: ("c:identifier", x), _opaque))
# HTML output
html_copy_source = False
+html_secnumber_suffix = " "
html_short_title = "Serd"
html_static_path = ["../_static"]
html_theme = "sphinx_lv2_theme"
@@ -116,3 +116,12 @@ else:
"globaltoc_maxdepth": 1,
"globaltoc_collapse": True,
}
+
+# EPub output
+
+epub_show_urls = "no"
+epub_cover = ("../_static/serd.svg", "")
+epub_description = "Serd, a lightweight library for working with RDF"
+epub_title = "Serd @SERD_VERSION@ Documentation"
+epub_basename = "Serd-@SERD_VERSION@"
+epub_css_files = ["epubstyle.css"]
diff --git a/doc/data_model.rst b/doc/data_model.rst
new file mode 100644
index 00000000..a0ee2e46
--- /dev/null
+++ b/doc/data_model.rst
@@ -0,0 +1,107 @@
+##########
+Data Model
+##########
+
+*********
+Structure
+*********
+
+Serd is based on RDF, a model for Linked Data.
+A deep understanding of what this means isn't necessary,
+but it is important to have a basic understanding of how this data is structured.
+
+The basic building block of data is the *node*,
+which is essentially a string with some extra type information.
+A *statement* is a tuple of 3 or 4 nodes.
+All information is represented by a set of statements,
+which makes this model structurally very simple:
+any document or database is essentially a single table with 3 or 4 columns.
+This is easiest to see in NTriples or NQuads documents,
+which are simple flat files with a single statement per line.
+
+There are, however, some restrictions.
+Each node in a statement has a specific role:
+subject, predicate, object, and (optionally) graph, in that order.
+A statement declares that a subject has some property.
+The predicate identifies the property,
+and the object is its value.
+
+A statement is a bit like a very simple machine-readable sentence.
+The "subject" and "object" are as in natural language,
+and the predicate is something like a verb (but much more general).
+For example, we could make a statement in English
+about your intrepid author:
+
+ drobilla has the first name David
+
+We can break this statement into 3 pieces like so:
+
+.. list-table::
+ :header-rows: 1
+
+ * - Subject
+ - Predicate
+ - Object
+ * - drobilla
+ - has the first name
+ - David
+
+The subject and predicate must be *resources* with an identifier,
+so we will need to define some URIs to represent this statement.
+Conventionally, predicate names do not start with "has" or similar words,
+since that would be redundant in this context.
+So,
+we assume that ``http://example.org/drobilla`` is the URI for drobilla,
+and that ``http://example.org/firstName`` has been defined as the appropriate property ("has the first name"),
+and can represent the statement in a machine-readable way:
+
+.. list-table::
+ :header-rows: 1
+
+ * - Subject
+ - Predicate
+ - Object
+ * - ``http://example.org/drobilla``
+ - ``http://example.org/firstName``
+ - David
+
+Which can be written in NTriples like so::
+
+ <http://example.org/drobilla> <http://example.org/firstName> "David" .
+
+*****************
+Working with Data
+*****************
+
+The power of this data model lies in its uniform "physical" structure,
+and the use of URIs as a decentralized namespace mechanism.
+In particular, it makes filtering, merging, and otherwise "mixing" data from various sources easy.
+
+For example, we could add some statements to the above example to better describe the same subject::
+
+ <http://example.org/drobilla> <http://example.org/firstName> "David" .
+ <http://example.org/drobilla> <http://example.org/lastName> "Robillard" .
+
+We could also add information about other subjects::
+
+ <http://drobilla.net/sw/serd> <http://example.org/programmingLanguage> "C" .
+
+Including statements that relate them to each other::
+
+ <http://example.org/drobilla> <http://example.org/wrote> <http://drobilla.net/sw/serd> .
+
+Note that there is no "physical" tree structure here,
+which is an important distinction from structured document formats like XML or JSON.
+Since all information is just a set of statements,
+the information in two documents,
+for example,
+can be combined by simply concatenating the documents.
+Similarly,
+any arbitrary subset of statements in a document can be separated into a new document.
+The use of URIs enables such things even with data from many independent sources,
+without any need to agree on a common schema.
+
+In practice, sharing URI "vocabulary" is encouraged since this is how different parties can have a shared understanding of what data *means*.
+That, however, is a higher-level application concern.
+Only the "physical" structure of data described here is important for understanding how Serd works,
+and what its tools and APIs can do.
diff --git a/doc/getting_started.rst b/doc/getting_started.rst
new file mode 100644
index 00000000..370f0c32
--- /dev/null
+++ b/doc/getting_started.rst
@@ -0,0 +1,88 @@
+###############
+Getting Started
+###############
+
+***********
+Downloading
+***********
+
+Serd is distributed in several ways.
+There are no "official" binaries, only source code releases which must be compiled.
+However, many operating system distributions do package binaries.
+Check if your package manager has a reasonably recent package,
+if so,
+that is the easiest and most reliable installation method.
+
+Release announcements with links to source code archives can be found at `<https://drobilla.net/category/serd/>`_.
+All release archives and their signatures are available in the directory `<http://download.drobilla.net/>`_.
+
+The code can also be checked out of `git <https://gitlab.com/drobilla/serd>`_::
+
+ git clone https://gitlab.com/drobilla/serd.git
+
+*********
+Compiling
+*********
+
+Serd uses the `meson <https://mesonbuild.com/>`_ build system.
+From within an extracted release archive or repository checkout,
+the library can be built and tested with default options like so::
+
+ meson setup build
+ cd build
+ ninja test
+
+There are many configuration options,
+which can be displayed by running ``meson configure``.
+
+See the `meson documentation <https://mesonbuild.com/Quick-guide.html>`_ for more details on using meson.
+
+**********
+Installing
+**********
+
+If the library compiled successfully,
+then ``ninja install`` can be used to install it.
+Note that you may need superuser privileges, for example::
+
+ sudo ninja install
+
+The installation prefix can be changed by setting the ``prefix`` option, for example::
+
+ meson configure -Dprefix=/opt/serd
+
+If you do not want to install anything,
+you can also "vendor" the code in your project
+(provided, of course, that you adhere to the terms of the license).
+If you are using meson,
+then it should simply work as a subproject without modification.
+Otherwise,
+you will need to set up the build yourself.
+
+*********
+Including
+*********
+
+Serd installs a `pkg-config <https://www.freedesktop.org/wiki/Software/pkg-config/>`_ file,
+which can be used to set the appropriate compiler and linker flags for projects to use it.
+If installed to a standard prefix,
+then it should show up in ``pkg-config`` automatically::
+
+ pkg-config --list-all | grep serd
+
+If not, you may need to adjust the ``PKG_CONFIG_PATH`` environment variable to include the installation prefix, for example::
+
+ export PKG_CONFIG_PATH=/opt/serd/lib/pkgconfig
+ pkg-config --list-all | grep serd
+
+Most popular build systems natively support pkg-config.
+For example, in meson::
+
+ serd_dep = dependency('serd-1')
+
+On systems where pkg-config is not available,
+you will need to set up compiler and linker flags manually,
+by adding something like ``-I/opt/serd/include/serd-1``,
+and ``-lserd-1``, respectively.
+
+Once things are set up, you should be able to include the API header and start using Serd in your code.
diff --git a/doc/summary.rst b/doc/summary.rst
index 4b8d6e4e..8ce04ca8 100644
--- a/doc/summary.rst
+++ b/doc/summary.rst
@@ -1,4 +1,4 @@
-Serd is a lightweight C library for reading and writing RDF in Turtle_, NTriples_, NQuads_, and TriG_.
+Serd is a lightweight C library and set of command-line utilities for working with RDF data in Turtle_, NTriples_, NQuads_, and TriG_ formats.
.. _Turtle: http://www.w3.org/TR/turtle/
.. _NTriples: http://www.w3.org/TR/n-triples/