diff options
author | David Robillard <d@drobilla.net> | 2021-03-28 13:42:35 -0400 |
---|---|---|
committer | David Robillard <d@drobilla.net> | 2022-01-28 21:57:29 -0500 |
commit | f8a59da9c492b7df38f53ba96505313e931d76cc (patch) | |
tree | 5bf1e44e67f8662894a37fbc84d770585f5957dd /doc/c | |
parent | ac0ac05ccf96dee4406db8bdd4d098d3de61c01f (diff) | |
download | serd-f8a59da9c492b7df38f53ba96505313e931d76cc.tar.gz serd-f8a59da9c492b7df38f53ba96505313e931d76cc.tar.bz2 serd-f8a59da9c492b7df38f53ba96505313e931d76cc.zip |
Add high-level documentation
Diffstat (limited to 'doc/c')
-rw-r--r-- | doc/c/.clang-tidy | 12 | ||||
-rw-r--r-- | doc/c/index.rst | 6 | ||||
-rw-r--r-- | doc/c/meson.build | 53 | ||||
-rw-r--r-- | doc/c/model.rst | 237 | ||||
-rw-r--r-- | doc/c/nodes.rst | 66 | ||||
-rw-r--r-- | doc/c/overview.rst | 87 | ||||
-rw-r--r-- | doc/c/overview_code.c | 459 | ||||
-rw-r--r-- | doc/c/reading_and_writing.rst | 149 | ||||
-rw-r--r-- | doc/c/statements.rst | 123 | ||||
-rw-r--r-- | doc/c/stream_processing.rst | 47 | ||||
-rw-r--r-- | doc/c/string_views.rst | 58 | ||||
-rw-r--r-- | doc/c/using_serd.rst | 15 | ||||
-rw-r--r-- | doc/c/world.rst | 48 |
13 files changed, 1342 insertions, 18 deletions
diff --git a/doc/c/.clang-tidy b/doc/c/.clang-tidy new file mode 100644 index 00000000..1772d682 --- /dev/null +++ b/doc/c/.clang-tidy @@ -0,0 +1,12 @@ +Checks: > + *, + -*-magic-numbers, + -*-uppercase-literal-suffix, + -altera-struct-pack-align, + -clang-analyzer-deadcode.DeadStores, + -clang-analyzer-nullability.NullablePassedToNonnull, + -hicpp-signed-bitwise, + -llvmlibc-*, +WarningsAsErrors: '*' +HeaderFilterRegex: '.*' +FormatStyle: file diff --git a/doc/c/index.rst b/doc/c/index.rst index fe14fc3b..1df161cf 100644 --- a/doc/c/index.rst +++ b/doc/c/index.rst @@ -5,6 +5,10 @@ Serd .. include:: summary.rst .. toctree:: + :numbered: - overview + getting_started + data_model + command_line_tools + using_serd api/serd diff --git a/doc/c/meson.build b/doc/c/meson.build index 4e044f97..02f5afc9 100644 --- a/doc/c/meson.build +++ b/doc/c/meson.build @@ -1,6 +1,20 @@ config = configuration_data() config.set('SERD_VERSION', meson.project_version()) +if mandoc.found() + config.set('SERD_COMMAND_LINE_INDEX_ENTRY', '\n command_line_tools\n') + config.set('SERD_PIPE_LINK', '`serd-pipe <../../man/serd-pipe.html>`_') + config.set('SERD_SORT_LINK', '`serd-sort <../../man/serd-sort.html>`_') + config.set('SERD_FILTER_LINK', '`serd-filter <../../man/serd-filter.html>`_') + config.set('SERD_VALIDATE_LINK', '`serd-validate <../../man/serd-validate.html>`_') +else + config.set('SERD_COMMAND_LINE_INDEX_ENTRY', '') + config.set('SERD_PIPE_LINK', '``serd-pipe``') + config.set('SERD_SORT_LINK', '``serd-sort``') + config.set('SERD_FILTER_LINK', '``serd-filter``') + config.set('SERD_VALIDATE_LINK', '``serd-validate``') +endif + conf_py = configure_file(configuration: config, input: files('../conf.py.in'), output: 'conf.py') @@ -9,11 +23,31 @@ configure_file(copy: true, input: files('../summary.rst'), output: 'summary.rst') +configure_file(copy: true, + input: files('overview_code.c'), + output: 'overview_code.c') + +executable('overview_code', files('overview_code.c'), dependencies: [serd_dep]) + c_rst_files = files( + '../data_model.rst', + '../getting_started.rst', 'index.rst', + 'model.rst', + 'nodes.rst', 'overview.rst', + 'reading_and_writing.rst', + 'statements.rst', + 'stream_processing.rst', + 'string_views.rst', + 'using_serd.rst', + 'world.rst', ) +configure_file(configuration: config, + input: files('../command_line_tools.rst.in'), + output: 'command_line_tools.rst') + foreach f : c_rst_files configure_file(copy: true, input: f, output: '@PLAINNAME@') endforeach @@ -22,10 +56,10 @@ subdir('xml') subdir('api') docs = custom_target( - 'singlehtml documentation for serd', + 'singlehtml C documentation for serd', command: [sphinx_build, '-M', 'singlehtml', meson.current_build_dir(), meson.current_build_dir(), - '-E', '-q', '-t', 'singlehtml'], + '-W', '-E', '-a', '-q', '-t', 'singlehtml'], input: [c_rst_files, c_serd_rst, c_index_xml], output: 'singlehtml', build_by_default: true, @@ -33,12 +67,23 @@ docs = custom_target( install_dir: docdir / versioned_name) docs = custom_target( - 'html documentation for serd', + 'html C documentation for serd', command: [sphinx_build, '-M', 'html', meson.current_build_dir(), meson.current_build_dir(), - '-E', '-q', '-t', 'html'], + '-W', '-E', '-a', '-q', '-t', 'html'], input: [c_rst_files, c_serd_rst, c_index_xml], output: 'html', build_by_default: true, install: true, install_dir: docdir / versioned_name) + +docs = custom_target( + 'epub C documentation for serd', + command: [sphinx_build, '-M', 'epub', + meson.current_build_dir(), meson.current_build_dir(), + '-W', '-E', '-a', '-q', '-t', 'epub'], + input: [c_rst_files, c_serd_rst, c_index_xml], + output: 'epub', + build_by_default: true, + install: true, + install_dir: docdir / versioned_name) diff --git a/doc/c/model.rst b/doc/c/model.rst new file mode 100644 index 00000000..399f370b --- /dev/null +++ b/doc/c/model.rst @@ -0,0 +1,237 @@ +Model +===== + +.. default-domain:: c +.. highlight:: c + +A :struct:`SerdModel` is an indexed set of statements. +A model can be used to store any data set, +from a few statements (for example, a protocol message), +to an entire document, +to a database with millions of statements. + +A new model can be created with :func:`serd_model_new`: + +.. literalinclude:: overview_code.c + :start-after: begin model-new + :end-before: end model-new + :dedent: 2 + +The information to store for each statement can be controlled by passing flags. +Additional indices can also be enabled with :func:`serd_model_add_index`. +For example, to be able to quickly search by predicate, +and store a cursor for each statement, +the model can be constructed with the :enumerator:`SERD_STORE_CARETS` flag, +and an additional :enumerator:`SERD_ORDER_PSO` index can be added like so: + +.. literalinclude:: overview_code.c + :start-after: begin fancy-model-new + :end-before: end fancy-model-new + :dedent: 2 + +Accessors +--------- + +The flags set for a model can be accessed with :func:`serd_model_flags`. + +The number of statements can be accessed with :func:`serd_model_size` and :func:`serd_model_empty`: + +.. literalinclude:: overview_code.c + :start-after: begin model-size + :end-before: end model-size + :dedent: 2 + +Adding Statements +----------------- + +Statements can be added to a model with :func:`serd_model_add`: + +.. literalinclude:: overview_code.c + :start-after: begin model-add + :end-before: end model-add + :dedent: 2 + +Alternatively, :func:`serd_model_insert` can be used if you already have a statement. +For example, the first statement in one model could be added to another like so: + +.. literalinclude:: overview_code.c + :start-after: begin model-insert + :end-before: end model-insert + :dedent: 2 + +An entire range of statements can be inserted at once with :func:`serd_model_insert_statements`. +For example, all statements in one model could be copied into another like so: + +.. literalinclude:: overview_code.c + :start-after: begin model-add-range + :end-before: end model-add-range + :dedent: 2 + +Iteration +--------- + +An iterator is a reference to a particular statement in a model. +:func:`serd_model_begin` returns an iterator to the first statement in the model, +and :func:`serd_model_end` returns a sentinel that is one past the last statement in the model: + +.. literalinclude:: overview_code.c + :start-after: begin model-begin-end + :end-before: end model-begin-end + :dedent: 2 + +A cursor can be advanced to the next statement with :func:`serd_cursor_advance`, +which returns :enumerator:`SERD_FAILURE` if the iterator reached the end: + +.. literalinclude:: overview_code.c + :start-after: begin iter-next + :end-before: end iter-next + :dedent: 2 + +Iterators are dynamically allocated, +and must eventually be destroyed with :func:`serd_cursor_free`: + +.. literalinclude:: overview_code.c + :start-after: begin iter-free + :end-before: end iter-free + :dedent: 2 + +Pattern Matching +---------------- + +There are several functions that can be used to quickly find statements in the model that match a pattern. +The simplest is :func:`serd_model_ask` which checks if there is any matching statement: + +.. literalinclude:: overview_code.c + :start-after: begin model-ask + :end-before: end model-ask + :dedent: 2 + +To access the unknown fields, +an iterator to the matching statement can be found with :func:`serd_model_find` instead: + +.. literalinclude:: overview_code.c + :start-after: begin model-find + :end-before: end model-find + :dedent: 2 + +To iterate over the matching statements, +the iterator returned by :func:`serd_model_find` can be advanced. +It will reach its end when it reaches the last matching statement: + +.. literalinclude:: overview_code.c + :start-after: begin model-range + :end-before: end model-range + :dedent: 2 + + +Similar to :func:`serd_model_ask`, +:func:`serd_model_count` can be used to count the number of matching statements: + +.. literalinclude:: overview_code.c + :start-after: begin model-count + :end-before: end model-count + :dedent: 2 + +Indexing +-------- + +A model can contain several indices that use different orderings to support different kinds of queries. +For good performance, +there should be an index where the least significant fields in the ordering correspond to wildcards in the pattern +(or, in other words, one where the most significant fields in the ordering correspond to nodes given in the pattern). +The table below lists the indices that best support a kind of pattern, +where a "?" represents a wildcard in the pattern. + ++---------+--------------+ +| Pattern | Good Indices | ++=========+==============+ +| s p o | Any | ++---------+--------------+ +| s p ? | SPO, PSO | ++---------+--------------+ +| s ? o | SOP, OSP | ++---------+--------------+ +| s ? ? | SPO, SOP | ++---------+--------------+ +| ? p o | POS, OPS | ++---------+--------------+ +| ? p ? | POS, PSO | ++---------+--------------+ +| ? ? o | OSP, OPS | ++---------+--------------+ +| ? ? ? | Any | ++---------+--------------+ + +If graphs are enabled, +then statements are indexed both with and without the graph fields, +so queries with and without a graph wildcard will have similar performance. + +Since indices take up space and slow down insertion, +it is best to enable the fewest indices possible that cover the queries that will be performed. +For example, +an applications might enable just SPO and OPS order, +because they always search for specific subjects or objects, +but never for just a predicate without specifying any other field. + +Getting Values +-------------- + +Sometimes you are only interested in a single node, +and it is cumbersome to first search for a statement and then get the node from it. +A more convenient way is to use :func:`serd_model_get`. +To get a value, specify a triple pattern where exactly one of the subject, predicate, and object is a wildcard. +If a statement matches, then the node that "fills" the wildcard will be returned: + +.. literalinclude:: overview_code.c + :start-after: begin model-get + :end-before: end model-get + :dedent: 2 + +If multiple statements match the pattern, +then the matching node from an arbitrary statement is returned. +It is an error to specify more than one wildcard, excluding the graph. + +The similar :func:`serd_model_get_statement` instead returns the matching statement: + +.. literalinclude:: overview_code.c + :start-after: begin model-get-statement + :end-before: end model-get-statement + :dedent: 2 + +Erasing Statements +------------------ + +Individual statements can be erased with :func:`serd_model_erase`, +which takes a cursor: + +.. literalinclude:: overview_code.c + :start-after: begin model-erase + :end-before: end model-erase + :dedent: 2 + +The similar :func:`serd_model_erase_statements` will erase all statements in the cursor's range: + +.. literalinclude:: overview_code.c + :start-after: begin model-erase-range + :end-before: end model-erase-range + :dedent: 2 + +Lifetime +-------- + +Models are value-like and can be copied with :func:`serd_model_copy` and compared with :func:`serd_model_equals`: + +.. literalinclude:: overview_code.c + :start-after: begin model-copy + :end-before: end model-copy + :dedent: 2 + +When a model is no longer needed, it can be destroyed with :func:`serd_model_free`: + +.. literalinclude:: overview_code.c + :start-after: begin model-free + :end-before: end model-free + :dedent: 2 + +Destroying a model invalidates all nodes and statements within that model, +so care should be taken to ensure that no dangling pointers are created. diff --git a/doc/c/nodes.rst b/doc/c/nodes.rst new file mode 100644 index 00000000..c55dcedb --- /dev/null +++ b/doc/c/nodes.rst @@ -0,0 +1,66 @@ +Nodes +===== + +.. default-domain:: c +.. highlight:: c + +Nodes are the basic building blocks of data. +Nodes are essentially strings, +but also have a :enum:`type <SerdNodeType>`, +and optionally either a datatype or a language. + +In the abstract, a node is either a literal, a URI, or blank. +Literals are essentially strings, +but may have a datatype or a language tag. +URIs are used to identify resources, +as are blank nodes, +except blank nodes only have labels with a limited scope and may be written anonymously. + +Serd also has a type for variable nodes, +which are used for some features but not present in RDF data. + +Fundamental Constructors +------------------------ + +To allow the application to manage node memory, +node constructors are provided that construct nodes in existing memory buffers. +The universal constructor :func:`serd_node_construct` can construct any type of node, +but is somewhat verbose and tricky to use. + +Several constructors for more specific types of node are also available: + +- :func:`serd_node_construct_token` +- :func:`serd_node_construct_uri` +- :func:`serd_node_construct_file_uri` +- :func:`serd_node_construct_literal` +- :func:`serd_node_construct_value` +- :func:`serd_node_construct_decimal` +- :func:`serd_node_construct_integer` +- :func:`serd_node_construct_base64` + +If explicit memory management is not required, +high-level constructors that allocate nodes on the heap can be used instead: + +- :func:`serd_new_token` +- :func:`serd_new_uri` +- :func:`serd_new_file_uri` +- :func:`serd_new_literal` +- :func:`serd_new_value` +- :func:`serd_new_decimal` +- :func:`serd_new_integer` +- :func:`serd_new_base64` + +Accessors +--------- + +The basic attributes of a node can be accessed with :func:`serd_node_type`, +:func:`serd_node_string`, +and :func:`serd_node_length`. + +A measured view of the string can be accessed with :func:`serd_node_string_view`. +This can be passed to functions that take a string view, +to avoid redundant measurement of the node string. + +The datatype or language can be retrieved with :func:`serd_node_datatype` or :func:`serd_node_language`, respectively. +Note that only literals can have a datatype or language, +but never both at once. diff --git a/doc/c/overview.rst b/doc/c/overview.rst index 2b204155..296c9042 100644 --- a/doc/c/overview.rst +++ b/doc/c/overview.rst @@ -1,22 +1,83 @@ -######## Overview -######## +======== .. default-domain:: c .. highlight:: c -The API revolves around two main types: the :doc:`api/serd_reader`, -which reads text and fires callbacks, -and the :doc:`api/serd_writer`, -which writes text when driven by corresponding functions. -Both work in a streaming fashion but still support pretty-printing, -so the pair can be used to pretty-print, translate, -or otherwise process arbitrarily large documents very quickly. -The context of a stream is tracked by the :doc:`api/serd_env`, -which stores the current base URI and set of namespace prefixes. - -The complete API is declared in ``serd.h``: +The serd API is declared in ``serd.h``: .. code-block:: c #include <serd/serd.h> + +An instance of serd is represented by a :doc:`api/serd_world`, +which manages "global" facilities like memory allocation and logging. +The rest of the API can be broadly grouped into four categories: + +Data + A :doc:`api/serd_node` is the basic building block of data, + 3 or 4 nodes together make a :doc:`api/serd_statement`. + All data is expressed in statements. + +Streams + Components communicate by sending and receiving streams of data. + Data is streamed via :doc:`api/serd_sink`, + which is an abstract interface that receives :doc:`api/serd_event`. + The fundamental event is a statement event, + but there are a few additional event types that describe context which is useful for things like pretty-printing. + + Some components both send and receive data, + which allow them to be inserted in a `pipeline` to process the data as it streams through. + For example, + a :doc:`api/serd_canon` converts literals to canonical form, + and a :doc:`api/serd_filter` filters statements that match (or do not match) some pattern. + + An event stream describes changes to data and its context, + but does not store the context. + For that, an associated :doc:`api/serd_env` is maintained. + This stores the active base URI and namespace prefixes which can, + for example, + be used to write output with the same abbreviations used in the source. + +Reading and Writing + Reading and writing data is performed using a :doc:`api/serd_reader`, + which reads text and emits data to a sink, + and a :doc:`api/serd_writer`, + which is a sink that writes the incoming data as text. + Both work in a streaming fashion so that large documents can be pretty-printed, + translated, + or otherwise processed quickly using only a small amount of memory. + +Storage + A set of statements can be stored in memory as a :doc:`api/serd_model`. + This supports quickly searching and scanning statements, + provided an appropriate index is enabled. + + Data can be loaded into a model via an :doc:`api/serd_inserter`, + which is a sink that inserts incoming statements into a model. + Data in a model can be written out by calling :func:`serd_describe_range` on the desired range of statements. + +The sink interface acts as a generic connection which can be used to build custom data processing pipelines. +For example, +a simple pipeline to read a document, filter out some statements, and write the result to a new file, +would look something like: + +.. image:: ../_static/writer_pipeline.svg + +Here, dotted arrows represent event streams, +and solid arrows represent explicit use of a component. +In other words, dotted arrows represent connections via the abstract :doc:`api/serd_sink` interface. +In this case both reader and writer are using the same environment, +so the output document will have the same abbreviations as the input. +It is also possible to use different environments, +for example to set additional namespace prefixes to further abbreviate the document. + +Similarly, a document could be loaded into a model with canonical literals using a pipeline like: + +.. image:: ../_static/model_pipeline.svg + +Many other useful pipelines can be built using the components in serd, +and applications can implement custom ones to add additional functionality. + +The following documentation gives a more detailed bottom-up introduction to the API, +with links to the complete reference where further detail can be found. diff --git a/doc/c/overview_code.c b/doc/c/overview_code.c new file mode 100644 index 00000000..0b7b5600 --- /dev/null +++ b/doc/c/overview_code.c @@ -0,0 +1,459 @@ +/* + Copyright 2021 David Robillard <d@drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +/* + Example code that is included in the documentation. Code in the + documentation is included from here rather than written inline so that it can + be tested and avoid rotting. The code here doesn't make much sense, but is + written such that it at least compiles and will run without crashing. +*/ + +#include "serd/serd.h" + +#include <assert.h> +#include <stdbool.h> +#include <stdio.h> + +#if defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wunused-variable" +#endif + +static void +string_views(void) +{ + static const char* const string_pointer = "some string"; + + // begin make-empty-string + SerdStringView empty = SERD_EMPTY_STRING(); + // end make-empty-string + + // begin make-static-string + SerdStringView hello = SERD_STRING("hello"); + // end make-static-string + + // begin measure-string + SerdStringView view = SERD_STRING(string_pointer); + // end measure-string + + // begin make-string-view + SerdStringView slice = SERD_SUBSTRING(string_pointer, 4); + // end make-string-view +} + +static void +statements(void) +{ + SerdNodes* nodes = serd_nodes_new(NULL); + + // begin statement-new + SerdStatement* statement = serd_statement_new( + NULL, + serd_nodes_uri(nodes, SERD_STRING("http://example.org/drobilla")), + serd_nodes_uri(nodes, SERD_STRING("http://example.org/firstName")), + serd_nodes_string(nodes, SERD_STRING("David")), + NULL, + NULL); + // end statement-new + + serd_statement_free(NULL, statement); + serd_nodes_free(nodes); +} + +static void +statements_accessing_fields(void) +{ + SerdNode* ss = serd_new_uri(NULL, SERD_STRING("http://example.org/s")); + SerdNode* sp = serd_new_uri(NULL, SERD_STRING("http://example.org/p")); + SerdNode* so = serd_new_uri(NULL, SERD_STRING("http://example.org/o")); + + SerdStatement* statement = serd_statement_new(NULL, ss, sp, so, NULL, NULL); + + // begin get-subject + const SerdNode* s = serd_statement_node(statement, SERD_SUBJECT); + // end get-subject + + // begin get-pog + const SerdNode* p = serd_statement_predicate(statement); + const SerdNode* o = serd_statement_object(statement); + const SerdNode* g = serd_statement_graph(statement); + // end get-pog + + // begin get-caret + const SerdCaret* c = serd_statement_caret(statement); + // end get-caret +} + +static void +statements_comparison(void) +{ + SerdNode* ss = serd_new_uri(NULL, SERD_STRING("http://example.org/s")); + SerdNode* sp = serd_new_uri(NULL, SERD_STRING("http://example.org/p")); + SerdNode* so = serd_new_uri(NULL, SERD_STRING("http://example.org/o")); + + SerdStatement* statement1 = serd_statement_new(NULL, ss, sp, so, NULL, NULL); + SerdStatement* statement2 = serd_statement_new(NULL, ss, sp, so, NULL, NULL); + + // begin statement-equals + if (serd_statement_equals(statement1, statement2)) { + printf("Match\n"); + } + // end statement-equals + + SerdStatement* statement = statement1; + + // begin statement-matches + SerdNode* eg_name = + serd_new_uri(NULL, SERD_STRING("http://example.org/name")); + + if (serd_statement_matches(statement, NULL, eg_name, NULL, NULL)) { + printf("%s has name %s\n", + serd_node_string(serd_statement_subject(statement)), + serd_node_string(serd_statement_object(statement))); + } + // end statement-matches +} + +static void +statements_lifetime(void) +{ + SerdStatement* statement = NULL; + + // begin statement-copy + SerdStatement* copy = serd_statement_copy(NULL, statement); + // end statement-copy + + // begin statement-free + serd_statement_free(NULL, copy); + // end statement-free +} + +static void +world(void) +{ + // begin world-new + SerdWorld* world = serd_world_new(NULL); + // end world-new + + // begin get-blank + const SerdNode* world_blank = serd_world_get_blank(world); + SerdNode* my_blank = serd_node_copy(NULL, world_blank); + // end get-blank +} + +static void +model(void) +{ + SerdWorld* world = serd_world_new(NULL); + + // begin model-new + SerdModel* model = serd_model_new(world, SERD_ORDER_SPO, 0u); + // end model-new + + // begin fancy-model-new + SerdModel* fancy_model = + serd_model_new(world, SERD_ORDER_SPO, SERD_STORE_CARETS); + + serd_model_add_index(fancy_model, SERD_ORDER_PSO); + // end fancy-model-new + + // begin model-copy + SerdModel* copy = serd_model_copy(NULL, model); + + assert(serd_model_equals(copy, model)); + // end model-copy + + // begin model-size + if (serd_model_empty(model)) { + printf("Model is empty\n"); + } else if (serd_model_size(model) > 1000) { + printf("Model has over 1000 statements\n"); + } + // end model-size + + // begin model-free + serd_model_free(copy); + // end model-free + + // begin model-add + SerdNodes* nodes = serd_nodes_new(NULL); + + serd_model_add( + model, + serd_nodes_uri(nodes, SERD_STRING("http://example.org/thing")), // S + serd_nodes_uri(nodes, SERD_STRING("http://example.org/name")), // P + serd_nodes_string(nodes, SERD_STRING("Thing")), // O + NULL); // G + // end model-add + + SerdModel* other_model = model; + + // begin model-insert + const SerdCursor* cursor = serd_model_begin(other_model); + + serd_model_insert(model, serd_cursor_get(cursor)); + // end model-insert + + // begin model-add-range + SerdCursor* other_range = serd_model_begin(other_model); + + serd_model_insert_statements(model, other_range); + + serd_cursor_free(other_range); + // end model-add-range + + // begin model-begin-end + SerdCursor* i = serd_model_begin(model); + if (serd_cursor_equals(i, serd_model_end(model))) { + printf("Model is empty\n"); + } else { + const SerdStatement* s = serd_cursor_get(i); + + printf("First statement subject: %s\n", + serd_node_string(serd_statement_subject(s))); + } + // end model-begin-end + + // begin iter-next + if (!serd_cursor_advance(i)) { + const SerdStatement* s = serd_cursor_get(i); + + printf("Second statement subject: %s\n", + serd_node_string(serd_statement_subject(s))); + } + // end iter-next + + // begin iter-free + serd_cursor_free(i); + // end iter-free + + // begin model-all + SerdCursor* all = serd_model_begin(model); + // end model-all + + // begin range-next + if (serd_cursor_is_end(all)) { + printf("Model is empty\n"); + } else { + const SerdStatement* s = serd_cursor_get(all); + + printf("First statement subject: %s\n", + serd_node_string(serd_statement_subject(s))); + } + + if (!serd_cursor_advance(all)) { + const SerdStatement* s = serd_cursor_get(all); + + printf("Second statement subject: %s\n", + serd_node_string(serd_statement_subject(s))); + } + // end range-next + + // begin model-ask + const SerdNode* rdf_type = serd_nodes_uri( + nodes, SERD_STRING("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")); + + if (serd_model_ask(model, NULL, rdf_type, NULL, NULL)) { + printf("Model contains a type statement\n"); + } + // end model-ask + + // Add a statement so that the searching examples below work + SerdNode* inst = serd_new_uri(NULL, SERD_STRING("http://example.org/i")); + SerdNode* type = serd_new_uri(NULL, SERD_STRING("http://example.org/T")); + serd_model_add(model, inst, rdf_type, type, NULL); + + // begin model-find + SerdCursor* it = serd_model_find(model, NULL, rdf_type, NULL, NULL); + + const SerdStatement* statement = serd_cursor_get(it); + const SerdNode* instance = + statement ? serd_statement_subject(statement) : NULL; + // end model-find + + // begin model-count + size_t n = serd_model_count(model, instance, rdf_type, NULL, NULL); + printf("Instance has %zu types\n", n); + // end model-count + + // begin model-range + SerdCursor* range = serd_model_find(model, + instance, // Subject = instance + rdf_type, // Predicate = rdf:type + NULL, // Object = anything + NULL); // Graph = anything + + for (; !serd_cursor_is_end(range); serd_cursor_advance(range)) { + const SerdStatement* s = serd_cursor_get(range); + + printf("Instance has type %s\n", + serd_node_string(serd_statement_object(s))); + } + + serd_cursor_free(range); + // end model-range + + // begin model-get + const SerdNode* t = serd_model_get(model, + instance, // Subject + rdf_type, // Predicate + NULL, // Object + NULL); // Graph + if (t) { + printf("Instance has type %s\n", serd_node_string(t)); + } + // end model-get + + // begin model-get-statement + const SerdStatement* ts = + serd_model_get_statement(model, instance, rdf_type, NULL, NULL); + + if (ts) { + printf("Instance %s has type %s\n", + serd_node_string(serd_statement_subject(ts)), + serd_node_string(serd_statement_object(ts))); + } + // end model-get-statement + + // begin model-erase + SerdCursor* some_type = serd_model_find(model, NULL, rdf_type, NULL, NULL); + serd_model_erase(model, some_type); + serd_cursor_free(some_type); + // end model-erase + + // begin model-erase-range + SerdCursor* all_types = serd_model_find(model, NULL, rdf_type, NULL, NULL); + serd_model_erase_statements(model, all_types); + serd_cursor_free(all_types); + // end model-erase-range +} + +static void +reading_writing(void) +{ + SerdWorld* world = serd_world_new(NULL); + + // begin env-new + SerdStringView host = SERD_EMPTY_STRING(); + SerdStringView path = SERD_STRING("/some/file.ttl"); + SerdNode* base = serd_new_file_uri(NULL, path, host); + SerdEnv* env = serd_env_new(world, serd_node_string_view(base)); + // end env-new + + // begin env-set-prefix + serd_env_set_prefix( + env, + SERD_STRING("rdf"), + SERD_STRING("http://www.w3.org/1999/02/22-rdf-syntax-ns#")); + // end env-set-prefix + + // begin byte-sink-new + SerdOutputStream out = serd_open_output_file("/tmp/eg.ttl"); + // end byte-sink-new + + // clang-format off + // begin writer-new + SerdWriter* writer = serd_writer_new( + world, // World + SERD_TURTLE, // Syntax + 0, // Writer flags + env, // Environment + &out, // Output stream + 4096); // Block size + // end writer-new + + // begin reader-new + SerdReader* reader = serd_reader_new( + world, // World + SERD_TURTLE, // Syntax + 0, // Reader flags + env, // Environment + serd_writer_sink(writer), // Target sink + 4096); // Block size + // end reader-new + + // clang-format on + + // begin read-document + SerdStatus st = serd_reader_read_document(reader); + if (st) { + printf("Error reading document: %s\n", serd_strerror(st)); + } + // end read-document + + // begin reader-writer-free + serd_reader_free(reader); + serd_writer_free(writer); + // end reader-writer-free + + // begin byte-sink-free + serd_close_output(&out); + // end byte-sink-free + + // begin inserter-new + SerdModel* model = serd_model_new(world, SERD_ORDER_SPO, 0u); + SerdSink* inserter = serd_inserter_new(model, NULL); + // end inserter-new + + // begin model-reader-new + SerdReader* const model_reader = + serd_reader_new(world, SERD_TURTLE, 0, env, inserter, 4096); + + st = serd_reader_read_document(model_reader); + if (st) { + printf("Error loading model: %s\n", serd_strerror(st)); + } + // end model-reader-new + + // begin write-range + serd_describe_range(serd_model_begin(model), serd_writer_sink(writer), 0); + // end write-range + + // begin canon-new + SerdSink* canon = serd_canon_new(world, inserter, 0); + // end canon-new + + SerdNode* rdf_type = NULL; + + // begin filter-new + SerdSink* filter = serd_filter_new(world, // World + inserter, // Target + NULL, // Subject + rdf_type, // Predicate + NULL, // Object + NULL, // Graph + true); // Inclusive + // end filter-new +} + +int +main(void) +{ + string_views(); + statements(); + statements_accessing_fields(); + statements_comparison(); + statements_lifetime(); + world(); + model(); + reading_writing(); + + return 0; +} + +#if defined(__GNUC__) +# pragma GCC diagnostic pop +#endif diff --git a/doc/c/reading_and_writing.rst b/doc/c/reading_and_writing.rst new file mode 100644 index 00000000..1180d03d --- /dev/null +++ b/doc/c/reading_and_writing.rst @@ -0,0 +1,149 @@ +Reading and Writing +=================== + +.. default-domain:: c +.. highlight:: c + +Reading and writing documents in a textual syntax is handled by the :struct:`SerdReader` and :struct:`SerdWriter`, respectively. +Serd is designed around a concept of event streams, +so the reader or writer can be at the beginning or end of a "pipeline" of stream processors. +This allows large documents to be processed quickly in an "online" fashion, +while requiring only a small constant amount of memory. +If you are familiar with XML, +this is roughly analogous to SAX. + +A common simple setup is to simply connect a reader directly to a writer. +This can be used for things like pretty-printing, +or converting a document from one syntax to another. +This can be done by passing the sink returned by :func:`serd_writer_sink` to the reader constructor, :func:`serd_reader_new`. + +First, +in order to write a document, +an environment needs to be created. +This defines the base URI and any namespace prefixes, +which is used to resolve any relative URIs or prefixed names, +and may be used to abbreviate the output. +In most cases, the base URI should simply be the URI of the file being written. +For example: + +.. literalinclude:: overview_code.c + :start-after: begin env-new + :end-before: end env-new + :dedent: 2 + +Namespace prefixes can also be defined for any vocabularies used: + +.. literalinclude:: overview_code.c + :start-after: begin env-set-prefix + :end-before: end env-set-prefix + :dedent: 2 + +We now have an environment set up for our document, +but still need to specify where to write it. +This is done by creating a :struct:`SerdOutputStream`, +which is a generic interface that can be set up to write to a file, +a buffer in memory, +or a custom function that can be used to write output anywhere. +In this case, we will write to the file we set up as the base URI: + +.. literalinclude:: overview_code.c + :start-after: begin byte-sink-new + :end-before: end byte-sink-new + :dedent: 2 + +The second argument is the page size in bytes, +so I/O will be performed in chunks for better performance. +The value used here, 4096, is a typical filesystem block size that should perform well on most machines. + +With an environment and byte sink ready, +the writer can now be created: + +.. literalinclude:: overview_code.c + :start-after: begin writer-new + :end-before: end writer-new + :dedent: 2 + +Output is written by feeding statements and other events to the sink returned by :func:`serd_writer_sink`. +:struct:`SerdSink` is the generic interface for anything that can consume data streams. +Many objects provide the same interface to do various things with the data, +but in this case we will send data directly to the writer: + +.. literalinclude:: overview_code.c + :start-after: begin reader-new + :end-before: end reader-new + :dedent: 2 + +The third argument of :func:`serd_reader_new` takes a bitwise ``OR`` of :enum:`SerdReaderFlag` flags that can be used to configure the reader. +In this case only :enumerator:`SERD_READ_LAX` is given, +which tolerates some invalid input without halting on an error, +but others can be included. +For example, passing ``SERD_READ_LAX | SERD_READ_RELATIVE`` would enable lax mode and preserve relative URIs in the input. + +Now that we have a reader that is set up to directly push its output to a writer, +we can finally process the document: + +.. literalinclude:: overview_code.c + :start-after: begin read-document + :end-before: end read-document + :dedent: 2 + +Alternatively, one "chunk" of input can be read at a time with :func:`serd_reader_read_chunk`. +A "chunk" is generally one top-level description of a resource, +including any anonymous blank nodes in its description, +but this depends on the syntax and the structure of the document being read. + +The reader pushes events to its sink as input is read, +so in this scenario the data should now have been re-written by the writer +(assuming no error occurred). +To finish and ensure that a complete document has been read and written, +:func:`serd_reader_finish` can be called followed by :func:`serd_writer_finish`. +However these will be automatically called on destruction if necessary, +so if the reader and writer are no longer required they can simply be destroyed: + +.. literalinclude:: overview_code.c + :start-after: begin reader-writer-free + :end-before: end reader-writer-free + :dedent: 2 + +Note that it is important to free the reader first in this case, +since finishing the read may push events to the writer. +Finally, closing the output with :func:`serd_close_output` will flush and close the output file, +so it is ready to be read again later. + +.. literalinclude:: overview_code.c + :start-after: begin byte-sink-free + :end-before: end byte-sink-free + :dedent: 2 + +Reading into a Model +-------------------- + +A document can be loaded into a model by setting up a reader that pushes data to a model "inserter" rather than a writer: + +.. literalinclude:: overview_code.c + :start-after: begin inserter-new + :end-before: end inserter-new + :dedent: 2 + +The process of reading the document is the same as above, +only the sink is different: + +.. literalinclude:: overview_code.c + :start-after: begin model-reader-new + :end-before: end model-reader-new + :dedent: 2 + +Writing a Model +--------------- + +A model, or parts of a model, can be written by writing the desired range with :func:`serd_describe_range`: + +.. literalinclude:: overview_code.c + :start-after: begin write-range + :end-before: end write-range + :dedent: 2 + +By default, +this writes the range in chunks suited to pretty-printing with anonymous blank nodes (like "[ ... ]" in Turtle or TriG). +Any rdf:type properties (written "a" in Turtle or TriG) will be written before any other properties of their subject. +This can be disabled by passing the flag :enumerator:`SERD_NO_TYPE_FIRST`. diff --git a/doc/c/statements.rst b/doc/c/statements.rst new file mode 100644 index 00000000..da7b8a03 --- /dev/null +++ b/doc/c/statements.rst @@ -0,0 +1,123 @@ +Statements +========== + +.. default-domain:: c +.. highlight:: c + +A :struct:`SerdStatement` is a tuple of either 3 or 4 nodes: +the `subject`, `predicate`, `object`, and optional `graph`. +Statements declare that a subject has some property. +The predicate identifies the property, +and the object is its value. + +A statement can be thought of as a very simple machine-readable sentence. +The subject and object are as in natural language, +and the predicate is something like a verb, but more general. +For example, we could make a statement in English about your intrepid author: + + drobilla has the first name "David" + +We can break this statement into 3 pieces like so: + +.. list-table:: + :header-rows: 1 + + * - Subject + - Predicate + - Object + * - drobilla + - has the first name + - "David" + +To make a :struct:`SerdStatement` out of this, we need to define some URIs. +In RDF, the subject and predicate must be *resources* with an identifier +(for example, neither can be a string). +Conventionally, predicate names do not start with "has" or similar words, +since that would be redundant in this context. +So, we assume that ``http://example.org/drobilla`` is the URI for drobilla, +and that ``http://example.org/firstName`` has been defined somewhere to be +a property with the appropriate meaning, +and can make an equivalent :struct:`SerdStatement`: + +.. literalinclude:: overview_code.c + :start-after: begin statement-new + :end-before: end statement-new + :dedent: 2 + +The last two fields are the graph and the cursor. +The graph is another node that can be used to group statements, +for example by the URI of the document they were loaded from. +The cursor represents the location in a document where the statement was loaded from, if applicable. + +Accessing Fields +---------------- + +Statement fields can be accessed with +:func:`serd_statement_node`, for example: + +.. literalinclude:: overview_code.c + :start-after: begin get-subject + :end-before: end get-subject + :dedent: 2 + +Alternatively, an accessor function is provided for each field: + +.. literalinclude:: overview_code.c + :start-after: begin get-pog + :end-before: end get-pog + :dedent: 2 + +Every statement has a subject, predicate, and object, +but the graph may be null. +The cursor may also be null (as it would be in this case), +but if available it can be accessed with :func:`serd_statement_caret`: + +.. literalinclude:: overview_code.c + :start-after: begin get-caret + :end-before: end get-caret + :dedent: 2 + +Comparison +---------- + +Two statements can be compared with :func:`serd_statement_equals`: + +.. literalinclude:: overview_code.c + :start-after: begin statement-equals + :end-before: end statement-equals + :dedent: 2 + +Statements are equal if all four corresponding pairs of nodes are equal. +The cursor is considered metadata, and is ignored for comparison. + +It is also possible to match statements against a pattern using ``NULL`` as a wildcard, +with :func:`serd_statement_matches`: + +.. literalinclude:: overview_code.c + :start-after: begin statement-matches + :end-before: end statement-matches + :dedent: 2 + +Lifetime +-------- + +A statement only contains const references to nodes, +it does not own nodes or manage their lifetimes internally. +The cursor, however, is owned by the statement. +A statement can be copied with :func:`serd_statement_copy`: + +.. literalinclude:: overview_code.c + :start-after: begin statement-copy + :end-before: end statement-copy + :dedent: 2 + +The copied statement will refer to exactly the same nodes, +though the cursor will be deep copied. + +In most cases, statements come from a reader or model which manages them internally, +but a statement owned by the application must be freed with :func:`serd_statement_free`: + +.. literalinclude:: overview_code.c + :start-after: begin statement-free + :end-before: end statement-free + :dedent: 2 diff --git a/doc/c/stream_processing.rst b/doc/c/stream_processing.rst new file mode 100644 index 00000000..0b3f126f --- /dev/null +++ b/doc/c/stream_processing.rst @@ -0,0 +1,47 @@ +Stream Processing +================= + +.. default-domain:: c +.. highlight:: c + +The above examples show how a document can be either written to a file or loaded into a model, +simply by changing the sink that the data is written to. +There are also sinks that filter or transform the data before passing it on to another sink, +which can be used to build more advanced pipelines with several processing stages. + +Canonical Literals +------------------ + +A `canon` is a stream processor that converts literals with supported XSD datatypes into canonical form. +For example, this will rewrite an xsd:decimal literal like ".10" as "0.1". +A canon is created with :func:`serd_canon_new`, +which needs to be passed the "target" sink that the transformed statements should be written to, +for example: + +.. literalinclude:: overview_code.c + :start-after: begin canon-new + :end-before: end canon-new + :dedent: 2 + +The last argument is a bitwise ``OR`` of :enum:`SerdCanonFlag` flags. +For example, :enumerator:`SERD_CANON_LAX` will tolerate and pass through invalid literals, +which can be useful for cleaning up questionabe data as much as possible without losing any information. + +Filtering Statements +-------------------- + +A `filter` is a stream processor that filters statements based on a pattern. +It can be configured in either inclusive or exclusive mode, +which passes through only statements that match or don't match the pattern, +respectively. +A filter is created with :func:`serd_filter_new`, +which takes a target, pattern, and inclusive flag. +For example, all statements with predicate ``rdf:type`` could be filtered out when loading a model: + +.. literalinclude:: overview_code.c + :start-after: begin filter-new + :end-before: end filter-new + :dedent: 2 + +If ``false`` is passed for the last parameter instead, +then the filter operates in exclusive mode and will instead insert only statements with predicate ``rdf:type``. diff --git a/doc/c/string_views.rst b/doc/c/string_views.rst new file mode 100644 index 00000000..2ae7d29b --- /dev/null +++ b/doc/c/string_views.rst @@ -0,0 +1,58 @@ +String Views +============ + +.. default-domain:: c +.. highlight:: c + +For performance reasons, +most functions in serd that take a string take a :struct:`SerdStringView`, +rather than a bare pointer. +This forces code to be explicit about string measurement, +which discourages common patterns of repeated measurement of the same string. +For convenience, several macros are provided for constructing string views: + +:macro:`SERD_EMPTY_STRING` + + Constructs a view of an empty string, for example: + + .. literalinclude:: overview_code.c + :start-after: begin make-empty-string + :end-before: end make-empty-string + :dedent: 2 + +:macro:`SERD_STRING` + + Constructs a view of an entire string or string literal, for example: + + .. literalinclude:: overview_code.c + :start-after: begin make-static-string + :end-before: end make-static-string + :dedent: 2 + + or: + + .. literalinclude:: overview_code.c + :start-after: begin measure-string + :end-before: end measure-string + :dedent: 2 + + This macro calls ``strlen`` to measure the string. + Modern compilers will optimise this away if the parameter is a string literal. + +:macro:`SERD_SUBSTRING` + + Constructs a view of a slice of a string with an explicit length, + for example: + + .. literalinclude:: overview_code.c + :start-after: begin make-string-view + :end-before: end make-string-view + :dedent: 2 + + This macro can also be used to create a view of a pre-measured string. + If the length a dynamic string is already known, + it is faster to use this than :macro:`SERD_STRING`. + +These macros can be used inline when passing parameters, +but if the same dynamic string is used several times, +it is better to make a string view variable to avoid redundant measurement. diff --git a/doc/c/using_serd.rst b/doc/c/using_serd.rst new file mode 100644 index 00000000..cfe57c4c --- /dev/null +++ b/doc/c/using_serd.rst @@ -0,0 +1,15 @@ +########## +Using Serd +########## + +.. toctree:: + + overview + string_views + nodes + statements + world + model + reading_and_writing + stream_processing + diff --git a/doc/c/world.rst b/doc/c/world.rst new file mode 100644 index 00000000..31cbe16b --- /dev/null +++ b/doc/c/world.rst @@ -0,0 +1,48 @@ +World +===== + +.. default-domain:: c +.. highlight:: c + +So far, we have only used nodes and statements, +which are simple independent objects. +Higher-level facilities in Serd require a :struct:`SerdWorld`, +which represents the global library state. + +A program typically uses just one world, +which can be constructed using :func:`serd_world_new`: + +.. literalinclude:: overview_code.c + :start-after: begin world-new + :end-before: end world-new + :dedent: 2 + +All "global" library state is handled explicitly via the world. +Serd does not contain any static mutable data, +allowing it to be used concurrently in several parts of a program, +for example in plugins. + +If multiple worlds *are* used in a single program, +they must never be mixed: +objects "inside" one world can not be used with objects inside another. + +Note that the world is not a database, +it only manages a small amount of library state for things like configuration and logging. + +Generating Blanks +----------------- + +Blank nodes, or simply "blanks", +are used for resources that do not have URIs. +Unlike URIs, they are not global identifiers, +and only have meaning within their local context (for example, a document). +The world provides a method for automatically generating unique blank identifiers: + +.. literalinclude:: overview_code.c + :start-after: begin get-blank + :end-before: end get-blank + :dedent: 2 + +Note that the returned pointer is to a node that will be updated on the next call to :func:`serd_world_get_blank`, +so it is usually best to copy the node, +like in the example above. |