diff options
Diffstat (limited to 'doc/cpp')
-rw-r--r-- | doc/cpp/.clang-tidy | 25 | ||||
-rw-r--r-- | doc/cpp/Doxyfile.in | 45 | ||||
-rw-r--r-- | doc/cpp/api/meson.build | 8 | ||||
-rw-r--r-- | doc/cpp/cpp_facilities.rst | 53 | ||||
-rw-r--r-- | doc/cpp/index.rst | 16 | ||||
-rw-r--r-- | doc/cpp/meson.build | 57 | ||||
-rw-r--r-- | doc/cpp/model.rst | 266 | ||||
-rw-r--r-- | doc/cpp/nodes.rst | 38 | ||||
-rw-r--r-- | doc/cpp/overview.cpp | 370 | ||||
-rw-r--r-- | doc/cpp/overview.rst | 83 | ||||
-rw-r--r-- | doc/cpp/reading_and_writing.rst | 147 | ||||
-rw-r--r-- | doc/cpp/statements.rst | 124 | ||||
-rw-r--r-- | doc/cpp/stream_processing.rst | 48 | ||||
-rw-r--r-- | doc/cpp/using_serd.rst | 14 | ||||
-rw-r--r-- | doc/cpp/world.rst | 45 | ||||
-rw-r--r-- | doc/cpp/xml/meson.build | 19 |
16 files changed, 1358 insertions, 0 deletions
diff --git a/doc/cpp/.clang-tidy b/doc/cpp/.clang-tidy new file mode 100644 index 00000000..df126af1 --- /dev/null +++ b/doc/cpp/.clang-tidy @@ -0,0 +1,25 @@ +Checks: > + *, + -*-magic-numbers, + -*-named-parameter, + -*-non-private-member-variables-in-classes, + -*-uppercase-literal-suffix, + -altera-struct-pack-align, + -clang-analyzer-deadcode.DeadStores, + -clang-analyzer-nullability.NullablePassedToNonnull, + -cppcoreguidelines-pro-bounds-array-to-pointer-decay, + -cppcoreguidelines-pro-bounds-pointer-arithmetic, + -cppcoreguidelines-pro-type-vararg, + -fuchsia-default-argument*, + -fuchsia-multiple-inheritance, + -fuchsia-overloaded-operator, + -google-runtime-references, + -hicpp-no-array-decay, + -hicpp-signed-bitwise, + -hicpp-vararg, + -llvmlibc-*, + -modernize-use-trailing-return-type, + -readability-implicit-bool-conversion, +WarningsAsErrors: '*' +HeaderFilterRegex: '.*' +FormatStyle: file diff --git a/doc/cpp/Doxyfile.in b/doc/cpp/Doxyfile.in new file mode 100644 index 00000000..de0a8766 --- /dev/null +++ b/doc/cpp/Doxyfile.in @@ -0,0 +1,45 @@ +PROJECT_NAME = Serd +PROJECT_BRIEF = "A lightweight library for RDF storage and serialisation" + +QUIET = YES +WARN_AS_ERROR = NO +WARN_IF_UNDOCUMENTED = YES +WARN_NO_PARAMDOC = NO + +CASE_SENSE_NAMES = YES +EXTRACT_ALL = NO +EXTRACT_LOCAL_CLASSES = YES +EXTRACT_PRIVATE = NO +EXTRACT_STATIC = YES +HIDE_FRIEND_COMPOUNDS = YES +HIDE_IN_BODY_DOCS = YES +HIDE_UNDOC_CLASSES = YES +HIDE_UNDOC_MEMBERS = YES +INHERIT_DOCS = NO +INLINE_INHERITED_MEMB = NO +REFERENCES_LINK_SOURCE = NO + +AUTOLINK_SUPPORT = NO +GENERATE_HTML = NO +GENERATE_LATEX = NO +GENERATE_XML = YES +JAVADOC_AUTOBRIEF = YES +SHOW_FILES = NO +XML_PROGRAMLISTING = NO + +MACRO_EXPANSION = YES +PREDEFINED = SERD_ALLOCATED \ + SERD_API \ + SERD_CONST_FUNC= \ + SERD_DEPRECATED_BY(x)= \ + SERD_DISABLE_DEPRECATED \ + SERD_NONNULL= \ + SERD_NULLABLE= \ + SERD_PURE_FUNC= + +RECURSIVE = YES +STRIP_FROM_PATH = @SERD_SRCDIR@ +INPUT = @SERD_SRCDIR@/include \ + @SERD_SRCDIR@/bindings/cpp/include + +OUTPUT_DIRECTORY = @DOX_OUTPUT@ diff --git a/doc/cpp/api/meson.build b/doc/cpp/api/meson.build new file mode 100644 index 00000000..33171d5f --- /dev/null +++ b/doc/cpp/api/meson.build @@ -0,0 +1,8 @@ +cpp_serd_rst = custom_target( + 'Serd C++ API ReST Documentation', + command: [dox_to_sphinx, + '-l', 'cpp', + '-f', '@INPUT0@', + meson.current_build_dir()], + input: [cpp_index_xml] + cpp_rst_files, + output: 'serd.rst') diff --git a/doc/cpp/cpp_facilities.rst b/doc/cpp/cpp_facilities.rst new file mode 100644 index 00000000..53b8704d --- /dev/null +++ b/doc/cpp/cpp_facilities.rst @@ -0,0 +1,53 @@ +C++ Facilities +============== + +.. default-domain:: cpp +.. highlight:: cpp +.. namespace:: serd + +String Views +------------ + +For performance reasons, +most functions that take a string take a :type:`StringView`. +This allows many types of string to be passed as an argument, +and redundant string measurement to be avoided. + +:type:`StringView` works similarly to ``std::string_view`` (and will likely be removed when C++17 support is more widespread). +A :type:`StringView` parameter will accept a string literal, +dynamic C string, +or a ``std::string`` as an argument. +Note, however, that the constructor that takes only a ``const char*`` calls ``strlen`` to measure the string, +so care should be taken to avoid this in performance-critical code if the string length is already known. + +Optionals +--------- + +Several places in the C API take or return a pointer that may be null. +This is wrapped more safely in the C++ API as an :class:`Optional`. + +From a user perspective, :class:`Optional` works similarly to ``std::optional``, +with pointer-like access operators and explicit boolean conversion enabling code like: + +.. code-block:: cpp + + if (optional_value) { + use_value(*optional_value); + } + +or: + +.. code-block:: cpp + + if (optional_object) { + optional_object->do_something(); + } + +The :class:`Optional` implementation is serd-specific, +and takes advantage of the fact that the contained object is really just a "fancy pointer". +This means that null can be used to represent an unset value, +avoiding the space overhead of more general types like ``std::optional``. + +A pointer to the underlying C object can be retrieved with the :func:`~Optional::cobj` method, +which will return null if the optional is unset. + diff --git a/doc/cpp/index.rst b/doc/cpp/index.rst new file mode 100644 index 00000000..32ad97c9 --- /dev/null +++ b/doc/cpp/index.rst @@ -0,0 +1,16 @@ +#### +Serd +#### + +.. include:: summary.rst + +This is the documentation for its C++ bindings, +a thin header-only wrapper that provides more convenience and safety, +with minimal overhead compared to using the C API directly. + +.. toctree:: + + using_serd + api/serdpp + api/serdpp_detail + api/serd diff --git a/doc/cpp/meson.build b/doc/cpp/meson.build new file mode 100644 index 00000000..4f0d7b4f --- /dev/null +++ b/doc/cpp/meson.build @@ -0,0 +1,57 @@ +config = configuration_data() +config.set('SERD_VERSION', meson.project_version()) + +conf_py = configure_file(configuration: config, + input: files('../conf.py.in'), + output: 'conf.py') + +configure_file(copy: true, + input: files('../summary.rst'), + output: 'summary.rst') + +configure_file(copy: true, input: files('overview.cpp'), output: 'overview.cpp') + +executable('overview', files('overview.cpp'), + dependencies: [serd_dep, serdpp_dep]) + +cpp_rst_files = files( + 'cpp_facilities.rst', + 'index.rst', + 'model.rst', + 'nodes.rst', + 'overview.rst', + 'reading_and_writing.rst', + 'statements.rst', + 'stream_processing.rst', + 'using_serd.rst', + 'world.rst', +) + +foreach f : cpp_rst_files + configure_file(copy: true, input: f, output: '@PLAINNAME@') +endforeach + +subdir('xml') +subdir('api') + +docs = custom_target( + 'singlehtml C++ documentation for serd', + command: [sphinx_build, '-M', 'singlehtml', + meson.current_build_dir(), meson.current_build_dir(), + '-E', '-q', '-t', 'singlehtml'], + input: [cpp_rst_files, cpp_serd_rst, cpp_index_xml], + output: 'singlehtml', + build_by_default: true, + install: true, + install_dir: docdir / 'serd-0') + +docs = custom_target( + 'html C++ documentation for serd', + command: [sphinx_build, '-M', 'html', + meson.current_build_dir(), meson.current_build_dir(), + '-E', '-q', '-t', 'html'], + input: [cpp_rst_files, cpp_serd_rst, cpp_index_xml], + output: 'html', + build_by_default: true, + install: true, + install_dir: docdir / 'serd-0') diff --git a/doc/cpp/model.rst b/doc/cpp/model.rst new file mode 100644 index 00000000..22e47160 --- /dev/null +++ b/doc/cpp/model.rst @@ -0,0 +1,266 @@ +Model +===== + +.. default-domain:: cpp +.. highlight:: cpp +.. namespace:: serd + +A :struct:`Model` is an indexed set of statements. +A model can be used to store any set of data, +from a few statements (for example, a protocol message), +to an entire document, +to a database with millions of statements. + +Constructing a model requires a world, +and :type:`flags <ModelFlags>` which can be used to configure the model: + +.. literalinclude:: overview.cpp + :start-after: begin model-new + :end-before: end model-new + :dedent: 2 + +Combinations of flags can be used to enable different indices, +or the storage of graphs and cursors. +For example, to be able to quickly search by predicate, +and store a cursor for each statement, +the flag :enumerator:`ModelFlag::store_carets` and a :enumerator:`StatementOrder::PSO` index can be added like so: + +.. literalinclude:: overview.cpp + :start-after: begin fancy-model-new + :end-before: end fancy-model-new + :dedent: 2 + +Model Operations +---------------- + +Models are value-like and can be copied and compared for equality: + +.. literalinclude:: overview.cpp + :start-after: begin model-copy + :end-before: end model-copy + :dedent: 2 + +The number of statements in a model can be accessed with the :func:`~Model::size` and :func:`~Model::empty` methods: + +.. literalinclude:: overview.cpp + :start-after: begin model-size + :end-before: end model-size + :dedent: 2 + +Destroying a model invalidates all nodes and statements within that model, +so care should be taken to ensure that no dangling pointers are created. + +Adding Statements +----------------- + +Statements can be added to the model by passing the nodes of the statement to :func:`~Model::insert`: + +.. literalinclude:: overview.cpp + :start-after: begin model-add + :end-before: end model-add + :dedent: 2 + +Alternatively, if you already have a statement (for example from another model), +the overload that takes a :type:`StatementView` can be used instead. +For example, the first statement in one model could be added to another like so: + +.. literalinclude:: overview.cpp + :start-after: begin model-insert + :end-before: end model-insert + :dedent: 2 + +An entire range of statements can be inserted at once by passing a range. +For example, all statements in one model could be copied into another like so: + +.. literalinclude:: overview.cpp + :start-after: begin model-add-range + :end-before: end model-add-range + :dedent: 2 + +Note that this overload consumes its argument, +so a copy must be made to insert a range without modifying the original. + +Iteration +--------- + +An iterator is a reference to a particular statement in a model. +The :func:`~Model::begin` method returns an iterator to the first statement in the model, +and :func:`~Model::end` returns a sentinel that is one past the last statement in the model: + +.. literalinclude:: overview.cpp + :start-after: begin model-begin-end + :end-before: end model-begin-end + :dedent: 2 + +Iterators can be advanced and compared manually: + +.. literalinclude:: overview.cpp + :start-after: begin iter-next + :end-before: end iter-next + :dedent: 2 + +However, they are more typically used automatically when iterating over a model: + +.. literalinclude:: overview.cpp + :start-after: begin model-iteration + :end-before: end model-iteration + :dedent: 2 + +Ranges +------ + +It is often more convenient to work with ranges of statements, +rather than iterators to individual statements. + +The simplest range, +the range of all statements in the model, +is returned by the :func:`~Model::begin()` method: + +.. literalinclude:: overview.cpp + :start-after: begin model-all + :end-before: end model-all + :dedent: 2 + +Iterating over this range will produce statements in GSPO or SPO order, +which is the natural order for writing nicely abbreviated documents. + +In some more advanced cases, +it is useful to iterate over a model in a specific order, +for example to optimise an algorithm that performs several iterations at once. +A range of all statements with a specific ordering can be had via the :func:`~Model::begin_ordered()` method: + +.. literalinclude:: overview.cpp + :start-after: begin model-ordered + :end-before: end model-ordered + :dedent: 2 + +Ranges work like immutable collections, +and can be iterated over in the typical way: + +.. literalinclude:: overview.cpp + :start-after: begin range-iteration + :end-before: end range-iteration + :dedent: 2 + + +Pattern Matching +---------------- + +There are several model methods that can be used to quickly find statements in the model that match a pattern. +The simplest is :func:`~Model::ask` which checks if there is any matching statement: + +.. literalinclude:: overview.cpp + :start-after: begin model-ask + :end-before: end model-ask + :dedent: 2 + +To access the unknown fields, +an iterator to the matching statement can be found with :func:`~Model::find` instead: + +.. literalinclude:: overview.cpp + :start-after: begin model-find + :end-before: end model-find + :dedent: 2 + +Similar to :func:`~Model::ask`, +:func:`~Model::count` can be used to count the number of matching statements: + +.. literalinclude:: overview.cpp + :start-after: begin model-count + :end-before: end model-count + :dedent: 2 + +To iterate over matching statements, +:func:`~Model::find` can be used, +which returns a cursor that will visit only statements that match the pattern: + +.. literalinclude:: overview.cpp + :start-after: begin model-range + :end-before: end model-range + :dedent: 2 + +Indexing +-------- + +A model can contain several indices that use different orderings to support different kinds of queries. +For good performance, +there should be an index where the least significant fields in the ordering correspond to wildcards in the pattern +(or, in other words, one where the most significant fields in the ordering correspond to nodes given in the pattern). +The table below lists the indices that best support a kind of pattern, +where a "?" represents a wildcard. + ++---------+--------------+ +| Pattern | Good Indices | ++=========+==============+ +| s p o | Any | ++---------+--------------+ +| s p ? | SPO, PSO | ++---------+--------------+ +| s ? o | SOP, OSP | ++---------+--------------+ +| s ? ? | SPO, SOP | ++---------+--------------+ +| ? p o | POS, OPS | ++---------+--------------+ +| ? p ? | POS, PSO | ++---------+--------------+ +| ? ? o | OSP, OPS | ++---------+--------------+ +| ? ? ? | Any | ++---------+--------------+ + +If graphs are enabled, +then statements are indexed both with and without the graph fields, +so queries with and without a graph wildcard will have similar performance. + +Since indices take up space and slow down insertion, +it is best to enable the fewest indices possible that cover the queries that will be performed. +For example, +an applications might enable just SPO and OPS order, +because they always search for specific subjects or objects, +but never for just a predicate without specifying any other field. + +Getting Values +-------------- + +Sometimes you are only interested in a single node, +and it is cumbersome to first search for a statement and then get the node from it. +A more convenient way is to use the :func:`~Model::get` method. +To get a value, specify a pattern where exactly one of the subject, predicate, and object is a wildcard. +If a statement matches, then the node that "fills" the wildcard will be returned: + +.. literalinclude:: overview.cpp + :start-after: begin model-get + :end-before: end model-get + :dedent: 2 + +If multiple statements match the pattern, +then the matching node from an arbitrary statement is returned. +It is an error to specify more than one wildcard, excluding the graph. + +The similar :func:`~Model::get_statement` instead returns the matching statement: + +.. literalinclude:: overview.cpp + :start-after: begin model-get-statement + :end-before: end model-get-statement + :dedent: 2 + +Erasing Statements +------------------ + +Individual statements can be erased with :func:`~Model::erase`, +which takes an iterator: + +.. literalinclude:: overview.cpp + :start-after: begin model-erase + :end-before: end model-erase + :dedent: 2 + +There is also an overload that takes a range and erases all statements in that range: + +.. literalinclude:: overview.cpp + :start-after: begin model-erase-range + :end-before: end model-erase-range + :dedent: 2 + +Erasing statements from a model invalidates all iterators to that model. diff --git a/doc/cpp/nodes.rst b/doc/cpp/nodes.rst new file mode 100644 index 00000000..c59bbd0d --- /dev/null +++ b/doc/cpp/nodes.rst @@ -0,0 +1,38 @@ +Nodes +===== + +.. default-domain:: cpp +.. highlight:: cpp +.. namespace:: serd + +Nodes are the basic building blocks of data. +Nodes are essentially strings, +but also have a :enum:`type <NodeType>`, +and optionally either a datatype or a language. + +In the abstract, a node is either a literal, a URI, or blank. +Serd also has a type for variable nodes, +which are used for some features but not present in RDF data. + +Construction +------------ + +Several convenient construction functions are provided that return nodes: + +- :func:`make_token` +- :func:`make_uri` +- :func:`make_file_uri` +- :func:`make_literal` +- :func:`make_boolean` +- :func:`make_decimal` +- :func:`make_double` +- :func:`make_float` +- :func:`make_integer` +- :func:`make_base64` + +Accessors +--------- + +The datatype or language of a node can be retrieved with :func:`~NodeWrapper::datatype` or :func:`~NodeWrapper::language`, respectively. +Note that only literals can have a datatype or language, +but never both at once. diff --git a/doc/cpp/overview.cpp b/doc/cpp/overview.cpp new file mode 100644 index 00000000..63c4f690 --- /dev/null +++ b/doc/cpp/overview.cpp @@ -0,0 +1,370 @@ +/* + Copyright 2021 David Robillard <d@drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +/* + Example code that is included in the documentation. Code in the + documentation is included from here rather than written inline so that it can + be tested and avoid rotting. The code here doesn't make much sense, but is + written such that it at least compiles and will run without crashing. +*/ + +#include "serd/serd.hpp" + +#include <cassert> +#include <cstddef> +#include <iostream> + +#if defined(__clang__) +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wunused-variable" +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wunused-variable" +# pragma GCC diagnostic ignored "-Wunused-but-set-variable" +#endif + +using namespace serd; // NOLINT(google-build-using-namespace) + +static void +statements() +{ + // begin statement-new + Statement triple{make_uri("http://example.org/drobilla"), // Subject + make_uri("http://example.org/firstName"), // Predicate + make_string("David")}; // Object + // end statement-new + + // begin statement-new-graph + Statement quad{make_uri("http://example.org/drobilla"), // Subject + make_uri("http://example.org/firstName"), // Predicate + make_string("David"), // Object + make_uri("http://example.org/userData")}; // Graph + // end statement-new-graph + + // begin statement-new-cursor + Node file{make_uri("file:///tmp/userdata.ttl")}; + Statement triple2{make_uri("http://example.org/drobilla"), // Subject + make_uri("http://example.org/firstName"), // Predicate + make_string("David"), // Object + Caret{file, 4, 27}}; // Caret + // end statement-new-cursor + + // begin statement-new-graph-cursor + Statement quad2{make_uri("http://example.org/drobilla"), // Subject + make_uri("http://example.org/firstName"), // Predicate + make_string("David"), // Object + make_uri("http://example.org/userData"), // Graph + Caret{file, 4, 27}}; // Caret + // end statement-new-graph-cursor +} + +static void +statements_accessing_fields() +{ + Node ss{make_uri("http://example.org/s")}; + Node sp{make_uri("http://example.org/p")}; + Node so{make_uri("http://example.org/o")}; + + Statement statement{ss, sp, so}; + + // begin get-subject + NodeView s = statement.node(Field::subject); + // end get-subject + + // begin get-pog + NodeView p = statement.predicate(); + NodeView o = statement.object(); + Optional<NodeView> g = statement.graph(); + // end get-pog + + // begin get-caret + Optional<CaretView> c = statement.caret(); + // end get-caret +} + +static void +statements_comparison() +{ + Node ss{make_uri("http://example.org/s")}; + Node sp{make_uri("http://example.org/p")}; + Node so{make_uri("http://example.org/o")}; + + Statement statement1{ss, sp, so}; + Statement statement2{ss, sp, so}; + + // begin statement-equals + if (statement1 == statement2) { + std::cout << "Match" << std::endl; + } + // end statement-equals + + const Statement& statement = statement1; + + // begin statement-matches + if (statement.matches({}, make_uri("http://example.org/name"), {})) { + std::cout << statement.subject() << " has name " << statement.object() + << std::endl; + } + // end statement-matches +} + +static void +world() +{ + // begin world-new + World world; + // end world-new + + // begin get-blank + Node blank = world.get_blank(); + // end get-blank +} + +static void +model() +{ + World world; + + // begin model-new + Model model{world, StatementOrder::SPO, {}}; + // end model-new + + // begin fancy-model-new + Model fancy_model{world, StatementOrder::SPO, ModelFlag::store_carets}; + fancy_model.add_index(StatementOrder::PSO); + // end fancy-model-new + + // begin model-copy + Model copy{model}; + assert(copy == model); + + copy = fancy_model; + assert(copy == fancy_model); + // end model-copy + + // begin model-size + if (model.empty()) { + std::cout << "Model is empty" << std::endl; + } else if (model.size() > 9000) { + std::cout << "Model has over 9000 statements" << std::endl; + } + // end model-size + + // begin model-add + Node s{make_uri("http://example.org/thing")}; + Node p{make_uri("http://example.org/name")}; + Node o{make_string("Thing")}; + + model.insert(s, p, o); + // end model-add + + Model other_model{model}; + + // begin model-insert + model.insert(*other_model.begin()); + // end model-insert + + // begin model-add-range + model.insert_statements(other_model.begin()); + // end model-add-range + + // begin model-begin-end + Cursor i = model.begin(); + if (i == model.end()) { + std::cout << "Model is empty" << std::endl; + } else { + std::cout << "First statement subject: " << i->subject() << std::endl; + } + // end model-begin-end + + // begin iter-next + if (++i != model.end()) { + std::cout << "Second statement subject: " << i->subject() << std::endl; + } + // end iter-next + + // begin model-iteration + for (const StatementView& statement : model) { + std::cout << "Model statement subject: " << statement.subject() + << std::endl; + } + // end model-iteration + + // begin model-all + Cursor all = model.begin(); + // end model-all + + // begin model-ordered + Cursor ordered = model.begin_ordered(StatementOrder::OPS); + // end model-ordered + + // begin range-iteration + // FIXME + // for (const StatementView& statement : all) { + // std::cout << "Cursor statement subject: " << statement.subject() + // << std::endl; + // } + // end range-iteration + + // begin model-ask + Node rdf_type = make_uri("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"); + + if (model.ask({}, rdf_type, {}, {})) { + std::cout << "Model contains a type statement" << std::endl; + } + // end model-ask + + // Add a statement so that the searching examples below work + Node inst{make_uri("http://example.org/i")}; + Node type{make_uri("http://example.org/T")}; + model.insert(inst, rdf_type, type); + + // begin model-find + Model::Range it = model.find({}, rdf_type, {}); + + NodeView instance = it.begin()->subject(); + // end model-find + + // begin model-count + size_t n = model.count(instance, rdf_type, {}); + std::cout << "Instance has " << n << " types" << std::endl; + // end model-count + + // begin model-range + for (const StatementView& statement : model.find(instance, rdf_type, {})) { + std::cout << "Instance has type " << statement.object() << std::endl; + } + // end model-range + + // begin model-get + Optional<NodeView> t = model.get(instance, rdf_type, {}); + if (t) { + std::cout << "Instance has type " << *t << std::endl; + } + // end model-get + + // begin model-get-statement + Optional<StatementView> ts = model.get_statement(instance, rdf_type, {}); + if (ts) { + std::cout << "Instance " << ts->subject() << " has type " << ts->object() + << std::endl; + } + // end model-get-statement + + // begin model-erase + Model::Range itype = model.find({}, rdf_type, {}); + model.erase(itype.begin()); + // end model-erase + + // begin model-erase-range + // FIXME + // Model::Range all_types = model.find({}, rdf_type, {}); + // model.erase_statements(all_types); + // end model-erase-range +} + +static void +reading_writing() +{ + World world; + + // begin env-new + Node base = make_file_uri("/some/file.ttl"); + + Env env{world, base}; + // end env-new + + // begin env-set-prefix + env.set_prefix("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#"); + // end env-set-prefix + + // begin byte-sink-new + OutputStream out = serd::open_output_file("/tmp/eg.ttl"); + // end byte-sink-new + + // begin writer-new + Writer writer{world, serd::Syntax::Turtle, {}, env, out}; + // end writer-new + + // begin reader-new + Reader reader{world, Syntax::Turtle, {}, env, writer.sink(), 4096}; + // end reader-new + + // begin read-document + Status st = reader.read_document(); + if (st != Status::success) { + std::cout << "Error reading document: " << strerror(st) << std::endl; + } + // end read-document + + // begin byte-sink-close + // out.close(); + // end byte-sink-close + + // begin inserter-new + Model model{world, StatementOrder::SPO, {}}; + SinkWrapper<SerdSink> inserter = make_inserter(model); + // end inserter-new + + // begin model-reader-new + Reader model_reader{world, Syntax::Turtle, {}, env, inserter, 4096}; + + st = model_reader.read_document(); + if (st != Status::success) { + std::cout << "Error loading model: " << strerror(st) << std::endl; + } + // end model-reader-new + + // begin write-range + // FIXME + // model.all().write(writer.sink(), {}); + // end write-range + + // begin canon-new + SinkWrapper<SerdSink> canon = make_canon(world, inserter, {}); + // end canon-new + + Node rdf_type = make_uri("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"); + + // begin filter-new + SinkWrapper<SerdSink> filter = make_filter(world, + inserter, // Target + {}, // Subject + rdf_type, // Predicate + {}, // Object + {}, // Graph + true); // Inclusive + // end filter-new +} + +int +main() +{ + statements(); + statements_accessing_fields(); + statements_comparison(); + world(); + model(); + reading_writing(); + + return 0; +} + +#if defined(__clang__) +# pragma clang diagnostic pop +#elif defined(__GNUC__) +# pragma GCC diagnostic pop +#endif diff --git a/doc/cpp/overview.rst b/doc/cpp/overview.rst new file mode 100644 index 00000000..763620e8 --- /dev/null +++ b/doc/cpp/overview.rst @@ -0,0 +1,83 @@ +######## +Overview +######## + +.. default-domain:: cpp +.. highlight:: cpp +.. namespace:: serd + +The serd C++ API is declared in ``serd.hpp``: + +.. code-block:: cpp + + #include <serd/serd.hpp> + +An application using serd first creates a :doc:`api/serd_world`, +which represents an instance of serd and is used to manage "global" facilities like logging. + +The rest of the API declares objects that can be used together in different ways. +They can be broadly placed into four categories: + +Data + A :doc:`api/serd_node` is the basic building block of data, + 3 or 4 nodes together make a :doc:`api/serd_statement`. + All data is expressed in this form. + +Streams + Objects stream data to each other via :doc:`api/serd_sink`, + which is an abstract interface that receives :doc:`api/serd_event`. + An event is essentially a statement, + but there are a few additional event types that reflect context changes and support pretty-printing. + + Some objects both act as a sink and send data to another sink, + which allow them to be inserted in a data `pipeline` to process the data as it streams through. + For example, + a :doc:`api/serd_canon` converts literals to canonical form, + and a :doc:`api/serd_filter` filters statements that match (or do not match) some pattern. + + The syntactic context at a particular point is represented by an :doc:`api/serd_env`. + This stores the base URI and set of namespace prefixes, + which are used to expand relative and abbreviated URIs. + +Reading and Writing + Reading and writing data is performed using a :doc:`api/serd_reader`, + which reads text and emits data to a sink, + and a :doc:`api/serd_writer`, + which is a sink that writes the incoming data as text. + Both work in a streaming fashion so that large documents can be pretty-printed, + translated, + or otherwise processed quickly using only a small amount of memory. + +Storage + A set of statements can be stored in memory as a :doc:`api/serd_model`. + A model acts as a collection of statements, + and provides most of the interface expected for a standard C++ collection. + There are also several query methods which search for statements quickly, + provided an appropriate index is enabled. + + Data can be loaded into a model via an :doc:`api/serd_inserter`, + which is a sink that inserts incoming statements into a model. + +The sink interface acts as a generic connection which can be used to build custom data processing pipelines. +For example, +a simple pipeline to read a document, filter out some statements, and write the result to a new file, +would look something like: + +.. image:: ../_static/writer_pipeline.svg + +Here, event streams are shown as a dashed line, and a solid line represents explicit use of an object. +In other words, dashed lines represent connections via the abstract :doc:`api/serd_sink` interface. +In this case both reader and writer are using the same environment, +so the output document will have the same abbreviations as the input. +It is also possible to use different environments, +for example to set additional namespace prefixes to further abbreviate the document. + +Similarly, a document could be loaded into a model with canonical literals using a pipeline like: + +.. image:: ../_static/model_pipeline.svg + +Many other useful pipelines can be built from the objects included in serd, +and applications can implement custom sinks if those are not sufficient. + +The remainder of this overview gives a bottom-up introduction to the API, +with links to the complete reference where further detail can be found. diff --git a/doc/cpp/reading_and_writing.rst b/doc/cpp/reading_and_writing.rst new file mode 100644 index 00000000..893e6f7b --- /dev/null +++ b/doc/cpp/reading_and_writing.rst @@ -0,0 +1,147 @@ +Reading and Writing +=================== + +.. default-domain:: cpp +.. highlight:: cpp +.. namespace:: serd + +Reading and writing documents in a textual syntax is handled by the :struct:`Reader` and :struct:`Writer`, respectively. +Serd is designed around a concept of event streams, +so the reader or writer can be at the beginning or end of a "pipeline" of stream processors. +This allows large documents to be processed quickly in an "online" fashion, +while requiring only a small constant amount of memory. +If you are familiar with XML, +this is roughly analogous to SAX. + +A common setup is to simply connect a reader directly to a writer. +This can be used for things like pretty-printing, +or converting a document from one syntax to another. +This can be done by passing the sink returned by the writer's :func:`~Writer::sink` method to the :class:`~Reader` constructor. + +First though, +an environment needs to be set up in order to write a document. +This defines the base URI and any namespace prefixes, +which are used to resolve any relative URIs or prefixed names by the reader, +and to abbreviate the output by the writer. +In most cases, the base URI should simply be the URI of the file being written. +For example: + +.. literalinclude:: overview.cpp + :start-after: begin env-new + :end-before: end env-new + :dedent: 2 + +Namespace prefixes can also be defined for any vocabularies used: + +.. literalinclude:: overview.cpp + :start-after: begin env-set-prefix + :end-before: end env-set-prefix + :dedent: 2 + +The reader will set any additional prefixes from the document as they are encountered. + +We now have an environment set up for the contents of our document, +but still need to specify where to write it. +This is done by creating an :struct:`OutputStream`, +which is a generic interface that can be set up to write to a file, +a buffer in memory, +or a custom function that can be used to write output anywhere. +In this case, we will write to the file we set up as the base URI: + +.. literalinclude:: overview.cpp + :start-after: begin byte-sink-new + :end-before: end byte-sink-new + :dedent: 2 + +The second argument is the page size in bytes, +so I/O will be performed in chunks for better performance. +The value used here, 4096, is a typical filesystem block size that should perform well on most machines. + +With an environment and byte sink ready, +the writer can now be created: + +.. literalinclude:: overview.cpp + :start-after: begin writer-new + :end-before: end writer-new + :dedent: 2 + +Output is written by feeding statements and other events to the sink returned by the writer's :func:`~Writer::sink` method. +:struct:`Sink` is the generic interface for anything that can consume data streams. +Many objects provide the same interface to do various things with the data, +but in this case we will send data directly to the writer: + +.. literalinclude:: overview.cpp + :start-after: begin reader-new + :end-before: end reader-new + :dedent: 2 + +The third argument of the reader constructor takes a bitwise ``OR`` of :enum:`ReaderFlag` flags that can be used to configure the reader. +In this case no flags are given, +but for example, +passing ``ReaderFlag::lax | ReaderFlag::relative`` would enable lax mode and preserve relative URIs in the input. + +Now that we have a reader that is set up to directly push its output to a writer, +we can finally process the document: + +.. literalinclude:: overview.cpp + :start-after: begin read-document + :end-before: end read-document + :dedent: 2 + +Alternatively, one "chunk" of input can be read at a time with :func:`~Reader::read_chunk`. +A "chunk" is generally one top-level description of a resource, +including any anonymous blank nodes in its description, +but this depends on the syntax and the structure of the document being read. + +The reader pushes events to its sink as input is read, +so in this scenario the data should now have been re-written by the writer +(assuming no error occurred). +To finish and ensure that a complete document has been read and written, +:func:`~Reader::finish` can be called followed by :func:`~Writer::finish`. +However these will be automatically called on destruction if necessary, +so if the reader and writer are no longer required they can simply be destroyed. + +Finally, closing the byte sink will flush and close the output file, +so it is ready to be read again later. +Similar to the reader and writer, +this can be done explicitly by calling its :func:`~OutputStream::close` method, +or implicitly by destroying the byte sink if it is no longer needed: + +.. literalinclude:: overview.cpp + :start-after: begin byte-sink-close + :end-before: end byte-sink-close + :dedent: 2 + +Reading into a Model +-------------------- + +A document can be loaded into a model by setting up a reader that pushes data to a model `inserter` rather than a writer: + +.. literalinclude:: overview.cpp + :start-after: begin inserter-new + :end-before: end inserter-new + :dedent: 2 + +The process of reading the document is the same as above, +only the sink is different: + +.. literalinclude:: overview.cpp + :start-after: begin model-reader-new + :end-before: end model-reader-new + :dedent: 2 + +.. + Writing a Model + --------------- + + A model, or parts of a model, can be written by writing the desired range using its :func:`Range::write` method: + + .. literalinclude:: overview.cpp + :start-after: begin write-range + :end-before: end write-range + :dedent: 2 + + By default, + this writes the range in chunks suited to pretty-printing with anonymous blank nodes (like "[ ... ]" in Turtle or TriG). + The flag :enumerator:`SerialisationFlag::no_inline_objects` can be given to instead write the range in a simple SPO order, + which can be useful in other situations because it is faster and emits statements in strictly increasing order. diff --git a/doc/cpp/statements.rst b/doc/cpp/statements.rst new file mode 100644 index 00000000..a77c8050 --- /dev/null +++ b/doc/cpp/statements.rst @@ -0,0 +1,124 @@ +Statements +========== + +.. default-domain:: cpp +.. highlight:: cpp +.. namespace:: serd + +A :struct:`Statement` is a tuple of either 3 or 4 nodes: +the `subject`, `predicate`, `object`, and optional `graph`. +Statements declare that a subject has some property. +The predicate identifies the property, +and the object is its value on the subject. + +A statement can be thought of as a very simple machine-readable sentence. +The subject and object are as in natural language, +and the predicate is something like a verb, but more general. +For example, we could make a statement in English about your intrepid author: + + drobilla has the first name "David" + +We can break this statement into 3 pieces like so: + +.. list-table:: + :header-rows: 1 + + * - Subject + - Predicate + - Object + * - drobilla + - has the first name + - "David" + +To make a :class:`Statement` out of this, we need to define some URIs. +In RDF, the subject and predicate must be *resources* with an identifier +(for example, neither can be a string). +Conventionally, predicate names do not start with "has" or similar words, +since that would be redundant in this context. +So, we assume that ``http://example.org/drobilla`` is the URI for drobilla, +and that ``http://example.org/firstName`` has been defined somewhere to be +a property with the appropriate meaning, +and can make an equivalent :class:`Statement`: + +.. literalinclude:: overview.cpp + :start-after: begin statement-new + :end-before: end statement-new + :dedent: 2 + +Statements also have an additional field, the graph, +which is used to group statements together. +For example, this can be used to store the document where statements originated, +or to keep schema data separate from application data. +A statement with a graph can be constructed by passing the graph as the fourth parameter: + +.. literalinclude:: overview.cpp + :start-after: begin statement-new-graph + :end-before: end statement-new-graph + :dedent: 2 + +Finally, a :class:`Caret` may also be passed which records a position in the file that the statement was loaded from. +This is typically used for printing useful error messages. +The cursor is considered metadata and not part of the statement itself, +for example, +it is not considered in equality comparison. +Typically, the cursor will be automatically set by a reader, +but a statement with a cursor can be constructed manually by passing the cursor as the last parameter: + +.. literalinclude:: overview.cpp + :start-after: begin statement-new-cursor + :end-before: end statement-new-cursor + :dedent: 2 + +.. literalinclude:: overview.cpp + :start-after: begin statement-new-graph-cursor + :end-before: end statement-new-graph-cursor + :dedent: 2 + + +Accessing Fields +---------------- + +Statement fields can be accessed with the :func:`~StatementWrapper::node` method, for example: + +.. literalinclude:: overview.cpp + :start-after: begin get-subject + :end-before: end get-subject + :dedent: 2 + +Alternatively, an accessor function is provided for each field: + +.. literalinclude:: overview.cpp + :start-after: begin get-pog + :end-before: end get-pog + :dedent: 2 + +Every statement has a subject, predicate, and object, +but the graph is optional. +The caret is also optional, +and can be accessed with the :func:`~StatementWrapper::caret` method: + +.. literalinclude:: overview.cpp + :start-after: begin get-caret + :end-before: end get-caret + :dedent: 2 + +Comparison +---------- + +Two statements can be compared with the equals operator: + +.. literalinclude:: overview.cpp + :start-after: begin statement-equals + :end-before: end statement-equals + :dedent: 2 + +Statements are equal if all four corresponding pairs of nodes are equal. +The cursor is considered metadata, and is ignored for comparison. + +It is also possible to match statements against a pattern with the :func:`~StatementWrapper::matches` method, +where empty parameters act as wildcards: + +.. literalinclude:: overview.cpp + :start-after: begin statement-matches + :end-before: end statement-matches + :dedent: 2 diff --git a/doc/cpp/stream_processing.rst b/doc/cpp/stream_processing.rst new file mode 100644 index 00000000..39265287 --- /dev/null +++ b/doc/cpp/stream_processing.rst @@ -0,0 +1,48 @@ +Stream Processing +================= + +.. default-domain:: cpp +.. highlight:: cpp +.. namespace:: serd + +The above examples show how a document can be either written to a file or loaded into a model, +simply by changing the sink that the data is written to. +There are also sinks that filter or transform the data before passing it on to another sink, +which can be used to build more advanced pipelines with several processing stages. + +Canonical Literals +------------------ + +A `canon` is a stream processor that converts literals with supported XSD datatypes into canonical form. +For example, this will rewrite an xsd:decimal literal like ".10" as "0.1". +A canon can be constructed by passing the "target" sink that the transformed statements should be written to, +for example: + +.. literalinclude:: overview.cpp + :start-after: begin canon-new + :end-before: end canon-new + :dedent: 2 + +The last argument is a bitwise ``OR`` of :enum:`CanonFlag` flags. +For example, :enumerator:`CanonFlag::lax` will tolerate and pass through invalid literals, +which can be useful for cleaning up questionabe data as much as possible without losing any information. + +Filtering Statements +-------------------- + +A `filter` is a stream processor that filters statements based on a pattern. +It can be configured in either inclusive or exclusive mode, +which passes through only statements that match or don't match the pattern, +respectively. +A filter can be constructed by passing the target sink, +the statement pattern as individual nodes, +and an inclusive flag. +For example, all statements with predicate ``rdf:type`` could be filtered out when loading a model: + +.. literalinclude:: overview.cpp + :start-after: begin filter-new + :end-before: end filter-new + :dedent: 2 + +If ``false`` is passed for the last parameter instead, +then the filter operates in exclusive mode and will instead insert only statements with predicate ``rdf:type``. diff --git a/doc/cpp/using_serd.rst b/doc/cpp/using_serd.rst new file mode 100644 index 00000000..ee9112ef --- /dev/null +++ b/doc/cpp/using_serd.rst @@ -0,0 +1,14 @@ +########## +Using Serd +########## + +.. toctree:: + + overview + cpp_facilities + nodes + statements + world + model + reading_and_writing + stream_processing diff --git a/doc/cpp/world.rst b/doc/cpp/world.rst new file mode 100644 index 00000000..d6736485 --- /dev/null +++ b/doc/cpp/world.rst @@ -0,0 +1,45 @@ +World +===== + +.. default-domain:: cpp +.. highlight:: cpp +.. namespace:: serd + +So far, we have only used nodes and statements, +which are simple independent objects. +Higher-level facilities in Serd require a :struct:`World`, +which represents the global library state. + +A program typically uses just one world, +which can be constructed with no arguments: + +.. literalinclude:: overview.cpp + :start-after: begin world-new + :end-before: end world-new + :dedent: 2 + +All "global" library state is handled explicitly via the world. +Serd does not contain any static mutable data, +allowing it to be used concurrently in several parts of a program, +for example in plugins. + +If multiple worlds *are* used in a single program, +they must never be mixed: +objects "inside" one world can not be used with objects inside another. + +Note that the world is not a database, +it only manages a small amount of library state for things like configuration and logging. + +Generating Blanks +----------------- + +Blank nodes, or simply "blanks", +are used for resources that do not have URIs. +Unlike URIs, they are not global identifiers, +and only have meaning within their local context (for example, a document). +The world provides a method for automatically generating unique blank identifiers: + +.. literalinclude:: overview.cpp + :start-after: begin get-blank + :end-before: end get-blank + :dedent: 2 diff --git a/doc/cpp/xml/meson.build b/doc/cpp/xml/meson.build new file mode 100644 index 00000000..afc187d0 --- /dev/null +++ b/doc/cpp/xml/meson.build @@ -0,0 +1,19 @@ +doxygen = find_program('doxygen') + +cpp_doxygen_input = [] +foreach h : cpp_headers + cpp_doxygen_input += ['..' / h] +endforeach + +config = configuration_data() +config.set('SERD_SRCDIR', serd_src_root) +config.set('DOX_OUTPUT', meson.current_build_dir() / '..') + +cpp_doxyfile = configure_file(configuration: config, + input: files('../Doxyfile.in'), + output: 'Doxyfile') + +cpp_index_xml = custom_target('serd-cpp-index.xml', + command: [doxygen, '@INPUT0@'], + input: [cpp_doxyfile] + cpp_header_files, + output: 'index.xml') |