diff options
author | David Robillard <d@drobilla.net> | 2020-06-21 18:50:55 +0200 |
---|---|---|
committer | David Robillard <d@drobilla.net> | 2022-01-28 22:21:07 -0500 |
commit | ccbbd538d001ae4e17c86839b0583716e0dd3527 (patch) | |
tree | 5d55f82f617171bb34199484c21f7ec6c50f28d9 /bindings/python | |
parent | cb9bc60bfd95173ead26540714dc682842cad80b (diff) | |
download | serd-ccbbd538d001ae4e17c86839b0583716e0dd3527.tar.gz serd-ccbbd538d001ae4e17c86839b0583716e0dd3527.tar.bz2 serd-ccbbd538d001ae4e17c86839b0583716e0dd3527.zip |
[WIP] Add Python bindings
Diffstat (limited to 'bindings/python')
-rw-r--r-- | bindings/python/_static/custom.css | 35 | ||||
-rw-r--r-- | bindings/python/_static/meson.build | 3 | ||||
-rw-r--r-- | bindings/python/_static/serd.svg | 135 | ||||
-rw-r--r-- | bindings/python/conf.py | 120 | ||||
-rw-r--r-- | bindings/python/index.rst | 11 | ||||
-rw-r--r-- | bindings/python/meson.build | 71 | ||||
-rw-r--r-- | bindings/python/overview.rst | 644 | ||||
-rw-r--r-- | bindings/python/reference.rst | 8 | ||||
-rw-r--r-- | bindings/python/serd.pyx | 2470 | ||||
-rw-r--r-- | bindings/python/test_serd.py | 1036 |
10 files changed, 4533 insertions, 0 deletions
diff --git a/bindings/python/_static/custom.css b/bindings/python/_static/custom.css new file mode 100644 index 00000000..9542e3be --- /dev/null +++ b/bindings/python/_static/custom.css @@ -0,0 +1,35 @@ +div.document { + margin : 0 +} + +div.body { + margin-top : 2em +} + +div.sphinxsidebarwrapper { + background : #EEE +} + +div.sphinxsidebarwrapper p.blurb { + text-align : center +} + +img.logo { + width : 6em +} + +.class { + padding-top : 1.5em +} + +.exception { + padding-top : 1.5em +} + +.function { + padding-top : 1.5em +} + +.method { + padding-top : 0.75em +} diff --git a/bindings/python/_static/meson.build b/bindings/python/_static/meson.build new file mode 100644 index 00000000..54cb21ff --- /dev/null +++ b/bindings/python/_static/meson.build @@ -0,0 +1,3 @@ +configure_file(copy: true, + input: '../../../resources/serd.svg', + output: 'serd.svg') diff --git a/bindings/python/_static/serd.svg b/bindings/python/_static/serd.svg new file mode 100644 index 00000000..6682c2e2 --- /dev/null +++ b/bindings/python/_static/serd.svg @@ -0,0 +1,135 @@ +<?xml version="1.0" encoding="UTF-8" standalone="no"?> +<svg + xmlns:dc="http://purl.org/dc/elements/1.1/" + xmlns:cc="http://creativecommons.org/ns#" + xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" + xmlns:svg="http://www.w3.org/2000/svg" + xmlns="http://www.w3.org/2000/svg" + xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd" + xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape" + inkscape:version="1.0 (4035a4fb49, 2020-05-01)" + sodipodi:docname="serd.svg" + width="33.866669mm" + height="33.866669mm" + viewBox="0 0 33.866668 33.866668" + version="1.1" + id="svg8"> + <sodipodi:namedview + inkscape:current-layer="svg8" + inkscape:window-maximized="0" + inkscape:window-y="48" + inkscape:window-x="12" + inkscape:cy="62.700126" + inkscape:cx="-7.2291926" + inkscape:zoom="4.2953355" + fit-margin-bottom="0" + fit-margin-right="0" + fit-margin-left="0" + fit-margin-top="0" + showgrid="false" + id="namedview26" + inkscape:window-height="2100" + inkscape:window-width="3816" + inkscape:pageshadow="2" + inkscape:pageopacity="0" + guidetolerance="10" + gridtolerance="10" + objecttolerance="10" + borderopacity="1" + bordercolor="#666666" + pagecolor="#ffffff" /> + <defs + id="defs2" /> + <metadata + id="metadata5"> + <rdf:RDF> + <cc:Work + rdf:about=""> + <dc:format>image/svg+xml</dc:format> + <dc:type + rdf:resource="http://purl.org/dc/dcmitype/StillImage" /> + <dc:title></dc:title> + </cc:Work> + </rdf:RDF> + </metadata> + <path + d="M 26.726105,7.1405637 H 33.25462 V 0.61204813 h -6.528515 z" + style="fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:#444444;stroke-width:1.05833325;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" + id="path889" /> + <path + d="M 26.726105,7.1405637 H 33.25462 V 0.61204813 h -6.528515 z" + style="fill:none;stroke:#444444;stroke-width:1.05833325;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" + id="path891" /> + <path + d="m 13.669077,7.1405637 h 6.528516 V 0.61204813 h -6.528516 z" + style="fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:#444444;stroke-width:1.05833325;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" + id="path893" /> + <path + d="m 13.669077,7.1405637 h 6.528516 V 0.61204813 h -6.528516 z" + style="fill:none;stroke:#444444;stroke-width:1.05833325;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" + id="path895" /> + <path + d="M 0.61204754,7.1405637 H 7.1405623 V 0.61204813 H 0.61204754 Z" + style="fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:#444444;stroke-width:1.05833325;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" + id="path897" /> + <path + d="M 0.61204754,7.1405637 H 7.1405623 V 0.61204813 H 0.61204754 Z" + style="fill:none;stroke:#444444;stroke-width:1.05833325;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" + id="path899" /> + <path + d="M 26.726105,33.254621 H 33.25462 V 26.726109 H 26.726105 Z" + style="fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:#444444;stroke-width:1.05833325;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" + id="path901" /> + <path + d="M 26.726105,33.254621 H 33.25462 V 26.726109 H 26.726105 Z" + style="fill:none;stroke:#444444;stroke-width:1.05833325;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" + id="path903" /> + <path + d="m 13.669077,33.254621 h 6.528516 v -6.528512 h -6.528516 z" + style="fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:#444444;stroke-width:1.05833325;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" + id="path905" /> + <path + d="m 13.669077,33.254621 h 6.528516 v -6.528512 h -6.528516 z" + style="fill:none;stroke:#444444;stroke-width:1.05833325;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" + id="path907" /> + <path + d="M 0.61204754,33.254621 H 7.1405623 V 26.726109 H 0.61204754 Z" + style="fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:#444444;stroke-width:1.05833325;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" + id="path909" /> + <path + d="M 0.61204754,33.254621 H 7.1405623 V 26.726109 H 0.61204754 Z" + style="fill:none;stroke:#444444;stroke-width:1.05833325;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" + id="path911" /> + <path + d="m 13.669077,20.197594 h 6.528516 v -6.528515 h -6.528516 z" + style="fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:#444444;stroke-width:1.05833325;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" + id="path913" /> + <path + d="m 13.669077,20.197594 h 6.528516 v -6.528515 h -6.528516 z" + style="fill:none;stroke:#444444;stroke-width:1.05833325;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" + id="path915" /> + <path + d="m 20.197593,3.8763056 h 6.528512" + style="fill:none;stroke:#444444;stroke-width:1.05833325;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" + id="path917" /> + <path + d="M 7.1405623,3.8763056 H 13.669077" + style="fill:none;stroke:#444444;stroke-width:1.05833325;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" + id="path919" /> + <path + d="M 7.1405623,7.1405637 13.669077,13.669079" + style="fill:none;stroke:#444444;stroke-width:1.05833325;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" + id="path921" /> + <path + d="m 20.197593,20.197594 6.528512,6.528515" + style="fill:none;stroke:#444444;stroke-width:1.05833325;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" + id="path923" /> + <path + d="M 7.1405623,29.990363 H 13.669077" + style="fill:none;stroke:#444444;stroke-width:1.05833325;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" + id="path925" /> + <path + d="m 20.197593,29.990363 h 6.528512" + style="fill:none;stroke:#444444;stroke-width:1.05833325;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" + id="path927" /> +</svg> diff --git a/bindings/python/conf.py b/bindings/python/conf.py new file mode 100644 index 00000000..f145d217 --- /dev/null +++ b/bindings/python/conf.py @@ -0,0 +1,120 @@ +import os +import sys + + +from unittest.mock import Mock as MagicMock + +sys.path.insert(0, os.path.abspath("../../build/bindings/python")) + + +class Mock(MagicMock): + @classmethod + def __getattr__(cls, name): + return MagicMock() + + +MOCK_MODULES = ["cython", "libc.stdint"] +sys.modules.update((mod_name, Mock()) for mod_name in MOCK_MODULES) + + +# Project information + +project = "Serd" +copyright = "2021, David Robillard" +author = "David Robillard" +release = "0.0.0" # FIXME + + +# General configuration + +exclude_patterns = ["xml", "_build", "Thumbs.db", ".DS_Store"] +language = "en" +nitpicky = True +pygments_style = "friendly" + +extensions = [ + "sphinx.ext.autodoc", + "sphinx.ext.doctest", +] + +# Ignore everything opaque or external for nitpicky mode +_opaque = [ + "serd._SinkBase", + "serd.__ByteSource", + "unicode", +] + +nitpick_ignore = list(map(lambda x: ("py:class", x), _opaque)) + +# HTML output + +try: + import sphinx_lv2_theme + + have_lv2_theme = True +except ModuleNotFoundError: + have_lv2_theme = False + +html_copy_source = False +html_short_title = "Serd" +html_static_path = ["_static"] +html_theme = "sphinx_lv2_theme" + +if have_lv2_theme: + html_theme = "sphinx_lv2_theme" + + if tags.has("singlehtml"): + html_sidebars = { + "**": [ + "globaltoc.html", + ] + } + + html_theme_options = { + "body_max_width": "48em", + "body_min_width": "48em", + "description": "A lightweight library for working with RDF", + "show_footer_version": True, + "show_logo_version": False, + "logo": "serd.svg", + "logo_name": True, + "logo_width": "8em", + "nosidebar": False, + "page_width": "80em", + "sidebar_width": "18em", + "globaltoc_maxdepth": 3, + "globaltoc_collapse": False, + } + + else: + html_theme_options = { + "body_max_width": "60em", + "body_min_width": "40em", + "description": "A lightweight library for working with RDF", + "show_footer_version": True, + "show_logo_version": False, + "logo": "serd.svg", + "logo_name": True, + "logo_width": "8em", + "nosidebar": True, + "page_width": "60em", + "sidebar_width": "14em", + "globaltoc_maxdepth": 1, + "globaltoc_collapse": True, + } + +else: + + html_theme = "alabaster" + + html_theme_options = { + "body_max_width": "60em", + "body_min_width": "40em", + "description": "A lightweight library for working with RDF", + "logo": "serd.svg", + "logo_name": True, + "page_width": "60em", + "sidebar_width": "14em", + "globaltoc_maxdepth": 1, + "globaltoc_collapse": True, + } diff --git a/bindings/python/index.rst b/bindings/python/index.rst new file mode 100644 index 00000000..d939ed49 --- /dev/null +++ b/bindings/python/index.rst @@ -0,0 +1,11 @@ +######################### +Serd Python Documentation +######################### + +.. toctree:: + + overview + reference + +:ref:`genindex` +:ref:`modindex` diff --git a/bindings/python/meson.build b/bindings/python/meson.build new file mode 100644 index 00000000..3a181870 --- /dev/null +++ b/bindings/python/meson.build @@ -0,0 +1,71 @@ +srcdir = meson.current_source_dir() +blddir = meson.current_build_dir() + +if cc.get_id() == 'clang' or cc.get_id() == 'gcc' + cython_c_args = [ + '-Wno-deprecated-declarations', + '-Wno-unused-variable', + ] +endif + +# Generate extension module C source code with cython +pyserd_c = custom_target( + 'serd.cpython.so', + command: [cython, '-3', '--fast-fail', '-Wextra', '@INPUT0@', '-o', '@OUTPUT@'], + input: ['serd.pyx'], + output: 'pyserd.c', + install: true, + install_dir: py.get_install_dir()) + +# Compile extension module +pyserd = py.extension_module('serd', + pyserd_c, + c_args: cython_c_args, + dependencies: [py_dep, serd_dep]) + +# Set up an environment for loading the module from the build directory +python_env = environment() +python_env.set('PYTHONPATH', meson.current_build_dir()) + +# Run API unit tests +test('serd.pyx', + py, + args: ['-m', 'unittest', 'discover', '-b', '-v', srcdir], + env: python_env, + suite: ['bindings', 'python']) + +if sphinx_build.found() + # Test all code/output snippets in the documentation + test('doctest', + sphinx_build, + args: ['-W', '-b', 'doctest', srcdir, blddir], + env: python_env, + suite: ['bindings', 'python']) + + if not get_option('docs').disabled() + py_html_docs = custom_target( + 'html documentation for serd python bindings', + command: [sphinx_build, + '-b', 'html', '-W', '-E', '-a', '-q', '-t', 'html', + srcdir, blddir], + input: [pyserd], + output: 'html', + build_by_default: true, + install: true, + install_dir: docdir / 'serd-0') + + py_singlehtml_docs = custom_target( + 'singlehtml documentation for serd python bindings', + command: [sphinx_build, + '-b', 'singlehtml', '-W', '-E', '-a', '-q', '-t', 'singlehtml', + srcdir, blddir], + input: [pyserd], + output: 'singlehtml', + build_by_default: true, + install: true, + install_dir: docdir / 'serd-0') + + # Copy static resources + subdir('_static') + endif +endif diff --git a/bindings/python/overview.rst b/bindings/python/overview.rst new file mode 100644 index 00000000..bbeece4f --- /dev/null +++ b/bindings/python/overview.rst @@ -0,0 +1,644 @@ +.. testsetup:: * + + import serd + +======== +Overview +======== + +Serd is a lightweight C library for working with RDF data. This is the +documentation for its Python bindings, which also serves as a gentle +introduction to the basics of RDF. + +Serd is designed for high-performance or resource-constrained applications, and +makes it possible to work with very large documents quickly and/or using +minimal memory. In particular, it is dramatically faster than `rdflib +<https://rdflib.readthedocs.io/en/stable/>`_, though it is less fully-featured +and not pure Python. + +Nodes +===== + +Nodes are the basic building blocks of data. Nodes are essentially strings: + +>>> print(serd.uri("http://example.org/something")) +http://example.org/something + +>>> print(serd.string("hello")) +hello + +>>> print(serd.decimal(1234)) +1234.0 + +>>> len(serd.string("hello")) +5 + +However, nodes also have a :meth:`~serd.Node.type`, and optionally either a +:meth:`~serd.Node.datatype` or :meth:`~serd.Node.language`. + +Representation +-------------- + +The string content of a node as shown above can be ambiguous. For example, it +is impossible to tell a URI from a string literal using only their string +contents. The :meth:`~serd.Node.to_syntax` method returns a complete +representation of a node, in the `Turtle <https://www.w3.org/TR/turtle/>`_ +syntax by default: + +>>> print(serd.uri("http://example.org/something").to_syntax()) +<http://example.org/something> + +>>> print(serd.string("hello").to_syntax()) +"hello" + +>>> print(serd.decimal(1234).to_syntax()) +1234.0 + +Note that the representation of a node in some syntax *may* be the same as the +``str()`` contents which are printed, but this is usually not the case. For +example, as shown above, URIs and strings are quoted differently in Turtle. + +A different syntax can be used by specifying one explicitly: + +>>> print(serd.decimal(1234).to_syntax(serd.Syntax.NTRIPLES)) +"1234.0"^^<http://www.w3.org/2001/XMLSchema#decimal> + +An identical node can be recreated from such a string using the +:meth:`~serd.Node.from_syntax` method: + +>>> node = serd.decimal(1234) +>>> copy = serd.Node.from_syntax(node.to_syntax()) # Don't actually do this +>>> print(copy) +1234.0 + +Alternatively, the ``repr()`` builtin will return the Python construction +representation: + +>>> repr(serd.decimal(1234)) +'serd.typed_literal("1234.0", "http://www.w3.org/2001/XMLSchema#decimal")' + +Any node can be round-tripped to and from a string using these methods. That +is, for any node `n`, both:: + + serd.Node.from_syntax(world, n.to_syntax()) + +and:: + + eval(repr(n)) + +produce an equivalent node. Using the `to_syntax()` method is generally +recommended, since it uses standard syntax. + +Primitives +---------- + +For convenience, nodes can be constructed from Python primitives by simply +passing a value to the constructor: + +>>> repr(serd.Node(True)) +'serd.boolean(True)' +>>> repr(serd.Node("hello")) +'serd.string("hello")' +>>> repr(serd.Node(1234)) +'serd.typed_literal("1234", "http://www.w3.org/2001/XMLSchema#integer")' +>>> repr(serd.Node(12.34)) +'serd.typed_literal("1.234E1", "http://www.w3.org/2001/XMLSchema#double")' + +Note that it is not possible to construct every type of node this way, and care +should be taken to not accidentally construct a string literal where a URI is +desired. + +Fundamental Constructors +------------------------ + +As the above examples suggest, several node constructors are just convenience +wrappers for more fundamental ones. All node constructors reduce to one of the +following: + +:func:`serd.plain_literal` + A string with optional language, like ``"hallo"@de`` in Turtle. + +:func:`serd.typed_literal` + A string with optional datatype, like ``"1.2E9"^^xsd:float`` in Turtle. + +:func:`serd.blank` + A blank node ID, like "b42", or ``_:b42`` in Turtle. + +:func:`serd.uri` + A URI, like "http://example.org", or ``<http://example.org>`` in Turtle. + +Convenience Constructors +------------------------ + +:func:`serd.string` + A string literal with no language or datatype. + +:func:`serd.decimal` + An `xsd:decimal <https://www.w3.org/TR/xmlschema-2/#decimal>`_, + like "123.45". + +:func:`serd.double` + An `xsd:double <https://www.w3.org/TR/xmlschema-2/#double>`_, + like "1.2345E2". + +:func:`serd.float` + An `xsd:float <https://www.w3.org/TR/xmlschema-2/#float>`_, + like "1.2345E2". + +:func:`serd.integer` + An `xsd:integer <https://www.w3.org/TR/xmlschema-2/#integer>`_, + like "1234567". + +:func:`serd.boolean` + An `xsd:boolean <https://www.w3.org/TR/xmlschema-2/#boolean>`_, + like "true" or "false". + +:func:`serd.base64` + An `xsd:base64Binary <https://www.w3.org/TR/xmlschema-2/#base64Binary>`_, + like "aGVsbG8=". + +:func:`serd.file_uri` + A file URI, like "file:///doc.ttl". + +Namespaces +========== + +It is common to use many URIs that share a common prefix. The +:class:`~serd.Namespace` utility class can be used to make code more readable +and make mistakes less likely: + +>>> eg = serd.Namespace("http://example.org/") +>>> print(eg.thing) +http://example.org/thing + +.. testsetup:: * + + eg = serd.Namespace("http://example.org/") + +Dictionary syntax can also be used: + +>>> print(eg["thing"]) +http://example.org/thing + +For convenience, namespaces also act like strings in many cases: + +>>> print(eg) +http://example.org/ +>>> print(eg + "stringeyName") +http://example.org/stringeyName + +Note that this class is just a simple syntactic convenience, it does not +"remember" names and there is no corresponding C API. + +Statements +========== + +A :class:`~serd.Statement` is a tuple of either 3 or 4 nodes: the subject, +predicate, object, and optional graph. Statements declare that a subject has +some property. The predicate identifies the property, and the object is its +value. + +A statement is a bit like a very simple machine-readable sentence. The +"subject" and "object" are as in natural language, and the predicate is like +the verb, but more general. For example, we could make a statement in English +about your intrepid author: + + drobilla has the first name "David" + +We can break this statement into 3 pieces like so: + +.. list-table:: + :header-rows: 1 + + * - Subject + - Predicate + - Object + * - drobilla + - has the first name + - "David" + +To make a :class:`~serd.Statement` out of this, we need to define some URIs. In +RDF, the subject and predicate must be *resources* with an identifier (for +example, neither can be a string). Conventionally, predicate names do not +start with "has" or similar words, since that would be redundant in this +context. So, we assume that ``http://example.org/drobilla`` is the URI for +drobilla, and ``http://example.org/firstName`` has been defined somewhere to be +a property with the appropriate meaning, and can make an equivalent +:class:`~serd.Statement`: + +>>> print(serd.Statement(eg.drobilla, eg.firstName, serd.string("David"))) +<http://example.org/drobilla> <http://example.org/firstName> "David" + +If you find this terminology confusing, it may help to think in terms of +dictionaries instead. For example, the above can be thought of as equivalent +to:: + + drobilla[firstName] = "David" + +or:: + + drobilla.firstName = "David" + +Accessing Fields +---------------- + +Statement fields can be accessed via named methods or array indexing: + +>>> statement = serd.Statement(eg.s, eg.p, eg.o, eg.g) +>>> print(statement.subject()) +http://example.org/s +>>> print(statement[serd.Field.SUBJECT]) +http://example.org/s +>>> print(statement[0]) +http://example.org/s + +Graph +----- + +The graph field can be used as a context to distinguish otherwise identical +statements. For example, it is often set to the URI of the document that the +statement was loaded from: + +>>> print(serd.Statement(eg.s, eg.p, eg.o, serd.uri("file:///doc.ttl"))) +<http://example.org/s> <http://example.org/p> <http://example.org/o> <file:///doc.ttl> + +The graph field is always accessible, but may be ``None``: + + >>> triple = serd.Statement(eg.s, eg.p, eg.o) + >>> print(triple.graph()) + None + >>> quad = serd.Statement(eg.s, eg.p, eg.o, eg.g) + >>> print(quad.graph()) + http://example.org/g + +World +===== + +So far, we have only used nodes and statements, which are simple independent +objects. Higher-level facilities in serd require a :class:`~serd.World` which +represents the global library state. + +A program typically uses just one world, which can be constructed with no +arguments:: + + world = serd.World() + +.. testsetup:: * + + world = serd.World() + +All "global" library state is handled explicitly via the world. +Serd does not contain any static mutable data, +allowing it to be used concurrently in several parts of a program, +for example in plugins. + +If multiple worlds *are* used in a single program, +they must never be mixed: +objects "inside" one world can not be used with objects inside another. + +Note that the world is not a database, +it only manages a small amount of library state for things like configuration and logging. + +Generating Blanks +----------------- + +Blank nodes, or simply "blanks", are used for resources that do not have URIs. +Unlike URIs, they are not global identifiers, and only have meaning within +their local context (for example, a document). The world provides a method for +automatically generating unique blank identifiers: + +>>> print(repr(world.get_blank())) +serd.blank("b1") +>>> print(repr(world.get_blank())) +serd.blank("b2") + +Model +===== + +A :class:`~serd.Model` is an indexed set of statements. A model can be used to +store any set of data, from a few statements (for example, a protocol message), +to an entire document, to a database with millions of statements. + +A model can be constructed and statements inserted manually using the +:meth:`~serd.Model.insert` method. Tuple syntax is supported as a shorthand +for creating statements: + +>>> model = serd.Model(world) +>>> model.insert((eg.s, eg.p, eg.o1)) +>>> model.insert((eg.s, eg.p, eg.o2)) +>>> model.insert((eg.t, eg.p, eg.o3)) + +.. testsetup:: model_manual + + import serd + eg = serd.Namespace("http://example.org/") + world = serd.World() + model = serd.Model(world) + model.insert((eg.s, eg.p, eg.o1)) + model.insert((eg.s, eg.p, eg.o2)) + model.insert((eg.t, eg.p, eg.o3)) + +Iterating over the model yields every statement: + +>>> for s in model: print(s) +<http://example.org/s> <http://example.org/p> <http://example.org/o1> +<http://example.org/s> <http://example.org/p> <http://example.org/o2> +<http://example.org/t> <http://example.org/p> <http://example.org/o3> + +Familiar Pythonic collection operations work as you would expect: + +>>> print(len(model)) +3 +>>> print((eg.s, eg.p, eg.o4) in model) +False +>>> model += (eg.s, eg.p, eg.o4) +>>> print((eg.s, eg.p, eg.o4) in model) +True + +Pattern Matching +---------------- + +The :meth:`~serd.Model.ask` method can be used to check if a statement is in a +model: + +>>> print(model.ask(eg.s, eg.p, eg.o1)) +True +>>> print(model.ask(eg.s, eg.p, eg.s)) +False + +This method is more powerful than the ``in`` statement because it also does +pattern matching. To check for a pattern, use `None` as a wildcard: + +>>> print(model.ask(eg.s, None, None)) +True +>>> print(model.ask(eg.unknown, None, None)) +False + +The :meth:`~serd.Model.count` method works similarly, but instead returns the +number of statements that match the pattern: + +>>> print(model.count(eg.s, None, None)) +3 +>>> print(model.count(eg.unknown, None, None)) +0 + +Getting Values +-------------- + +Sometimes you are only interested in a single node, and it is cumbersome to +first search for a statement and then get the node from it. The +:meth:`~serd.Model.get` method provides a more convenient way to do this. To +get a value, specify a triple pattern where exactly one field is ``None``. If +a statement matches, then the node that "fills" the wildcard will be returned: + +>>> print(model.get(eg.t, eg.p, None)) +http://example.org/o3 + +If multiple statements match the pattern, then the matching node from an +arbitrary statement is returned. It is an error to specify more than one +wildcard, excluding the graph. + +Erasing Statements +------------------ + +>>> model2 = model.copy() +>>> for s in model2: print(s) +<http://example.org/s> <http://example.org/p> <http://example.org/o1> +<http://example.org/s> <http://example.org/p> <http://example.org/o2> +<http://example.org/s> <http://example.org/p> <http://example.org/o4> +<http://example.org/t> <http://example.org/p> <http://example.org/o3> + +Individual statements can be erased by value, again with tuple syntax supported +for convenience: + +>>> model2.erase((eg.s, eg.p, eg.o1)) +>>> for s in model2: print(s) +<http://example.org/s> <http://example.org/p> <http://example.org/o2> +<http://example.org/s> <http://example.org/p> <http://example.org/o4> +<http://example.org/t> <http://example.org/p> <http://example.org/o3> + +Many statements can be erased at once by erasing a range: + +>>> model2.erase_statements(model2.find(eg.s, None, None)) +>>> for s in model2: print(s) +<http://example.org/t> <http://example.org/p> <http://example.org/o3> + +Saving Documents +---------------- + +Serd provides simple methods to save an entire model to a file or string, which +are similar to functions in the standard Python ``json`` module. + +A model can be saved to a file with the :meth:`~serd.World.dump` method: + +.. doctest:: + :options: +NORMALIZE_WHITESPACE + + >>> world.dump(model, "out.ttl") + >>> print(open("out.ttl", "r").read()) + <http://example.org/s> + <http://example.org/p> <http://example.org/o1> , + <http://example.org/o2> , + <http://example.org/o4> . + <BLANKLINE> + <http://example.org/t> + <http://example.org/p> <http://example.org/o3> . + <BLANKLINE> + +Similarly, a model can be written as a string with the :meth:`serd.World.dumps` +method: + +.. doctest:: + :options: +ELLIPSIS + + >>> print(world.dumps(model)) + <http://example.org/s> + ... + +Loading Documents +----------------- + +There are also simple methods to load an entire model, again loosely following +the standard Python ``json`` module. + +A model can be loaded from a file with the :meth:`~serd.World.load` method: + +>>> model3 = world.load("out.ttl") +>>> print(model3 == model) +True + +By default, the syntax type is determined by the file extension, +and statements are stored in (S, P, O) order, +so only ``(s p ?)`` and ``(s ? ?)`` queries will be fast. +See the method documentation for how to control things more precisely. + +Similarly, a model can be loaded from a string with the +:meth:`~serd.World.loads` method: + +>>> ttl = "<{}> <{}> <{}> .".format(eg.s, eg.p, eg.o) +>>> model4 = world.loads(ttl) +>>> for s in model4: print(s) +<http://example.org/s> <http://example.org/p> <http://example.org/o> + +File Caret +---------- + +When data is loaded from a file into a model with the flag +:data:`~serd.ModelFlags.STORE_CARETS`, each statement will have a *caret* +which describes the file name, line, and column where the statement originated. +The caret points to the start of the object node in the statement: + +>>> model5 = world.load("out.ttl", model_flags=serd.ModelFlags.STORE_CARETS) +>>> for s in model5: print(s.caret()) +out.ttl:2:24 +out.ttl:3:2 +out.ttl:4:2 +out.ttl:7:24 + +Streaming Data +============== + +More advanced input and output can be performed by using the +:class:`~serd.Reader` and :class:`~serd.Writer` classes directly. The Reader +produces an :class:`~serd.Event` stream which describes the content of the +file, and the Writer consumes such a stream and writes syntax. + +Reading Files +------------- + +The reader reads from a source, which should be a :class:`~serd.FileInput` +to read from a file. Parsed input is sent to a sink, which is +called for each event: + +.. testcode:: + + def sink(event): + print(event) + + env = serd.Env(world) + reader = serd.Reader(world, serd.Syntax.TURTLE, 0, env, sink, 4096) + input = serd.FileInput("out.ttl") + with reader.open(input) as context: + context.read_document() + + # FIXME: caret + +.. testoutput:: + :options: +ELLIPSIS + + serd.Event.statement(serd.Statement(serd.uri("http://example.org/s"), serd.uri("http://example.org/p"), serd.uri("http://example.org/o1"), serd.Caret(serd.string("input"), 2, 24))) + ... + +For more advanced use cases that keep track of state, the sink can be a custom +:class:`~serd.Sink` with a call operator: + +.. testcode:: + + class MySink(serd.Sink): + def __init__(self, world): + super().__init__(world) + self.events = [] + + def __call__(self, event: serd.Event) -> serd.Status: + self.events += [event] + return serd.Status.SUCCESS + + env = serd.Env(world) + sink = MySink(world) + reader = serd.Reader(world, serd.Syntax.TURTLE, 0, env, sink, 4096) + input_stream = serd.FileInput("out.ttl") + with reader.open(input_stream) as context: + context.read_document() + + print(sink.events[0]) + + # FIXME: caret + +.. testoutput:: + + serd.Event.statement(serd.Statement(serd.uri("http://example.org/s"), serd.uri("http://example.org/p"), serd.uri("http://example.org/o1"), serd.Caret(serd.string("input"), 2, 24))) + +Reading Strings +--------------- + +To read from a string, use a :class:`~serd.StringInput` with the reader: + +.. testcode:: + + ttl = """ + @base <http://drobilla.net/> . + @prefix eg: <http://example.org/> . + <sw/serd> eg:name "Serd" . + """ + + def sink(event): + print(event) + + env = serd.Env(world) + reader = serd.Reader(world, serd.Syntax.TURTLE, 0, env, sink, 4096) + with reader.open(serd.StringInput(ttl)) as context: + context.read_document() + + # FIXME: caret + +.. testoutput:: + + serd.Event.base("http://drobilla.net/") + serd.Event.prefix("eg", "http://example.org/") + serd.Event.statement(serd.Statement(serd.uri("http://drobilla.net/sw/serd"), serd.uri("http://example.org/name"), serd.string("Serd"), serd.Caret(serd.string("input"), 4, 19))) + +Reading into a Model +-------------------- + +To read new data into an existing model, +send it to the sink returned by :meth:`~serd.Model.inserter`: + +.. testcode:: + + ttl = """ + @prefix eg: <http://example.org/> . + eg:newSubject eg:p eg:o . + """ + + env = serd.Env(world) + sink = model.inserter(env) + reader = serd.Reader(world, serd.Syntax.TURTLE, 0, env, sink, 4096) + with reader.open(serd.StringInput(ttl)) as context: + context.read_document() + + for s in model: print(s) + +.. testoutput:: + + <http://example.org/newSubject> <http://example.org/p> <http://example.org/o> + <http://example.org/s> <http://example.org/p> <http://example.org/o1> + <http://example.org/s> <http://example.org/p> <http://example.org/o2> + <http://example.org/s> <http://example.org/p> <http://example.org/o4> + <http://example.org/t> <http://example.org/p> <http://example.org/o3> + +Writing Files +------------- + +.. testcode:: + + env = serd.Env(world) + output = serd.FileOutput("written.ttl") + writer = serd.Writer(world, serd.Syntax.TURTLE, 0, env, output) + st = model.all().write(writer.sink(), 0) + writer.finish() + output.close() + print(open("written.ttl", "r").read()) + +.. testoutput:: + :options: +NORMALIZE_WHITESPACE + + <http://example.org/newSubject> + <http://example.org/p> <http://example.org/o> . + + <http://example.org/s> + <http://example.org/p> <http://example.org/o1> , + <http://example.org/o2> , + <http://example.org/o4> . + + <http://example.org/t> + <http://example.org/p> <http://example.org/o3> . diff --git a/bindings/python/reference.rst b/bindings/python/reference.rst new file mode 100644 index 00000000..0b64037e --- /dev/null +++ b/bindings/python/reference.rst @@ -0,0 +1,8 @@ +============= +API Reference +============= + +.. automodule:: serd + :members: + :undoc-members: + :inherited-members: diff --git a/bindings/python/serd.pyx b/bindings/python/serd.pyx new file mode 100644 index 00000000..143705a3 --- /dev/null +++ b/bindings/python/serd.pyx @@ -0,0 +1,2470 @@ +# cython: binding=True +# cython: language_level=3 +# cython: warn.maybe_uninitialized=True +# cython: warn.multiple_declarators=True +# cython: warn.unused=True + +"""A lightweight library for working with RDF data.""" + +import enum +import errno +import logging + +import cython + +from libc.stdint cimport int64_t, int32_t, int16_t, int8_t +from libc.stdint cimport uint64_t, uint32_t, uint16_t, uint8_t + +logger = logging.getLogger(__name__) + +cdef extern from "stdarg.h": + ctypedef struct va_list: + pass + +cdef extern from "serd/serd.h": + ctypedef struct SerdWorld + ctypedef struct SerdNodes + ctypedef struct SerdStatement + ctypedef struct SerdCaret + ctypedef struct SerdEnv + ctypedef struct SerdModel + ctypedef struct SerdCursor + ctypedef struct SerdReader + ctypedef struct SerdWriter + ctypedef struct SerdSink + + ctypedef enum SerdStatus: pass + ctypedef enum SerdSyntax: pass + ctypedef enum SerdStatementFlag: pass + ctypedef enum SerdDescribeFlag: pass + ctypedef enum SerdNodeType: pass + ctypedef enum SerdNodeFlags: pass + ctypedef enum SerdValueType: pass + ctypedef enum SerdField: pass + ctypedef enum SerdStatementOrder: pass + ctypedef enum SerdModelFlag: pass + + ctypedef uint32_t SerdStatementFlags + ctypedef uint32_t SerdDescribeFlags + ctypedef uint32_t SerdModelFlags + + ctypedef struct SerdNode + + ctypedef struct SerdAllocator + + ctypedef struct SerdStringView: + const char* buf; + size_t len; + + ctypedef struct SerdBuffer: + SerdAllocator* allocator; + void* buf; + size_t len; + + ctypedef struct SerdURIView: + SerdStringView scheme + SerdStringView authority + SerdStringView path_prefix + SerdStringView path + SerdStringView query + SerdStringView fragment + + ctypedef union SerdValueData: + bint as_bool + double as_double + float as_float + int64_t as_long + int32_t as_int + int16_t as_short + int8_t as_byte + uint64_t as_ulong + uint32_t as_uint + uint16_t as_ushort + uint8_t as_ubyte + + ctypedef struct SerdValue: + SerdValueType type; + SerdValueData data; + + ctypedef enum SerdReaderFlag : pass + ctypedef uint32_t SerdReaderFlags + + ctypedef enum SerdWriterFlag : pass + ctypedef uint32_t SerdWriterFlags + + void serd_free(SerdAllocator* allocator, void* ptr); + + # String Utilities + + const char* serd_strerror(SerdStatus status); + + # Base64 + + size_t serd_base64_encoded_length(size_t size, bint wrap_lines); + size_t serd_base64_decoded_size(size_t len); + + bint serd_base64_encode(char* str, + const void* buf, + size_t size, + bint wrap_lines); + + SerdStatus serd_base64_decode(void* buf, + size_t* size, + const char* str, + size_t len); + + # Buffer + + size_t serd_buffer_write(const void* buf, + size_t size, + size_t nmemb, + void* stream); + + int serd_buffer_error(void* const stream); + int serd_buffer_close(void* const stream); + + # I/O Function Types + + ctypedef size_t (*SerdReadFunc)(void* buf, + size_t size, + size_t nmemb, + void* stream); + + ctypedef size_t (*SerdWriteFunc)(const void* buf, + size_t size, + size_t nmemb, + void* stream); + + ctypedef int (*SerdStreamErrorFunc)(void* stream); + + ctypedef int (*SerdStreamCloseFunc)(void* stream); + + # Syntax Utilities + + SerdSyntax serd_syntax_by_name(const char* name); + SerdSyntax serd_guess_syntax(const char* filename); + bint serd_syntax_has_graphs(SerdSyntax syntax); + + # URI + + char* serd_parse_file_uri(const char* uri, char** hostname); + bint serd_uri_string_has_scheme(const char* utf8); + SerdStatus serd_parse_uri(const char* utf8, SerdURIView* out); + + SerdURIView serd_resolve_uri(SerdURIView r, SerdURIView base); + + size_t serd_write_uri(SerdURIView uri, SerdWriteFunc sink, void* stream); + + # Node + + SerdNode* serd_node_from_syntax(SerdAllocator* allocator, + const char* str, + SerdSyntax syntax, + SerdEnv* env); + + char* serd_node_to_syntax(SerdAllocator* allocator, + const SerdNode* node, + SerdSyntax syntax, + const SerdEnv* env); + + SerdNode* serd_new_token(SerdAllocator* allocator, SerdNodeType type, SerdStringView string); + SerdNode* serd_new_string(SerdAllocator* allocator, SerdStringView string); + SerdNode* serd_new_uri(SerdAllocator* allocator, SerdURIView uri); + SerdNode* serd_new_file_uri(SerdAllocator* allocator, SerdStringView path, SerdStringView hostname); + + SerdNode* serd_new_literal(SerdAllocator* allocator, + SerdStringView string, + SerdNodeFlags flags, + SerdStringView meta); + + SerdNode* serd_new_value(SerdAllocator* allocator, SerdValue value); + SerdNode* serd_new_decimal(SerdAllocator* allocator, double d); + SerdNode* serd_new_integer(SerdAllocator* allocator, int64_t i); + SerdNode* serd_new_base64(SerdAllocator* allocator, const void* buf, size_t size); + + bint serd_get_boolean(const SerdNode* node); + double serd_get_double(const SerdNode* node); + float serd_get_float(const SerdNode* node); + int64_t serd_get_integer(const SerdNode* node); + SerdNode* serd_node_copy(SerdAllocator* allocator, const SerdNode* node); + void serd_node_free(SerdAllocator* allocator, SerdNode* node); + SerdNodeType serd_node_type(const SerdNode* node); + const char* serd_node_string(const SerdNode* node); + size_t serd_node_length(const SerdNode* node); + SerdStringView serd_node_string_view(const SerdNode* node); + SerdURIView serd_node_uri_view(const SerdNode* node); + const SerdNode* serd_node_datatype(const SerdNode* node); + const SerdNode* serd_node_language(const SerdNode* node); + bint serd_node_equals(const SerdNode* a, const SerdNode* b); + int serd_node_compare(const SerdNode* a, const SerdNode* b); + + # Event + + ctypedef enum SerdEventType: pass + + ctypedef struct SerdBaseEvent: + SerdEventType type; + const SerdNode* uri; + + ctypedef struct SerdPrefixEvent: + SerdEventType type; + const SerdNode* name; + const SerdNode* uri; + + ctypedef struct SerdStatementEvent: + SerdEventType type; + SerdStatementFlags flags; + const SerdStatement* statement; + + ctypedef struct SerdEndEvent: + SerdEventType type; + const SerdNode* node; + + ctypedef union SerdEvent: + SerdEventType type; + SerdBaseEvent base; + SerdPrefixEvent prefix; + SerdStatementEvent statement; + SerdEndEvent end; + + ctypedef SerdStatus (*SerdEventFunc)(void* handle, const SerdEvent* event); + + # World + + SerdWorld* serd_world_new(SerdAllocator* allocator); + void serd_world_free(SerdWorld* world); + SerdNodes* serd_world_nodes(SerdWorld* world); + const SerdNode* serd_world_get_blank(SerdWorld* world); + + SerdAllocator* serd_world_allocator(const SerdWorld* world); + + # TODO: logging + + ctypedef enum SerdLogLevel: pass + + cdef struct SerdLogField: + const char* key; + const char* value; + + cdef struct SerdLogEntry: + const char* domain; + const SerdLogField* fields; + const char* fmt; + va_list* args; + SerdLogLevel level; + size_t n_fields; + + ctypedef SerdStatus (*SerdLogFunc)(void* handle, const SerdLogEntry* entry); + + # SerdStatus serd_quiet_error_func(void* handle, const SerdLogEntry* entry); + + # const char* serd_log_entry_get_field(const SerdLogEntry* entry, + # const char* key); + + void serd_world_set_log_func(SerdWorld* world, + SerdLogFunc log_func, + void* handle); + + # SerdStatus serd_world_vlogf(const SerdWorld* world, + # const char* domain, + # SerdLogLevel level, + # size_t n_fields, + # const SerdLogField* fields, + # const char* fmt, + # va_list args); + + SerdStatus serd_world_logf(const SerdWorld* world, + const char* domain, + SerdLogLevel level, + size_t n_fields, + const SerdLogField* fields, + const char* fmt, + ...); + + # Environment + + SerdEnv* serd_env_new(const SerdWorld* world, const SerdStringView base_uri); + SerdEnv* serd_env_copy(SerdAllocator* allocator, const SerdEnv* env); + bint serd_env_equals(const SerdEnv* a, const SerdEnv* b); + void serd_env_free(SerdEnv* env); + const SerdNode* serd_env_base_uri(SerdEnv* env) + SerdStatus serd_env_set_base_uri(SerdEnv* env, SerdStringView uri); + + SerdStatus serd_env_set_prefix(SerdEnv* env, + SerdStringView name, + SerdStringView uri); + + SerdNode* serd_env_expand_node(const SerdEnv* env, const SerdNode* node); + + void serd_env_write_prefixes(const SerdEnv* env, const SerdSink* sink); + + # Inserter + + SerdSink* serd_inserter_new(SerdModel* model, + const SerdNode* default_graph); + + # Statement + + SerdStatement* serd_statement_new(SerdAllocator* allocator, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g, + const SerdCaret* caret); + + SerdStatement* serd_statement_copy(SerdAllocator* allocator, + const SerdStatement* statement); + + void serd_statement_free(SerdAllocator* allocator, + SerdStatement* statement); + + const SerdNode* serd_statement_node(const SerdStatement* statement, + SerdField field); + + const SerdNode* serd_statement_subject(const SerdStatement* statement); + const SerdNode* serd_statement_predicate(const SerdStatement* statement); + const SerdNode* serd_statement_object(const SerdStatement* statement); + const SerdNode* serd_statement_graph(const SerdStatement* statement); + + const SerdCaret* serd_statement_caret(const SerdStatement* statement); + + bint serd_statement_equals(const SerdStatement* a, const SerdStatement* b); + + bint serd_statement_matches(const SerdStatement* statement, + const SerdNode* subject, + const SerdNode* predicate, + const SerdNode* object, + const SerdNode* graph); + + # Iter + + SerdCursor* serd_cursor_copy(SerdAllocator* allocator, + const SerdCursor* cursor); + + const SerdStatement* serd_cursor_get(const SerdCursor* cursor); + + SerdStatus serd_cursor_advance(SerdCursor* cursor); + bint serd_cursor_is_end(const SerdCursor* lhs); + bint serd_cursor_equals(const SerdCursor* lhs, const SerdCursor* rhs); + void serd_cursor_free(SerdCursor* cursor); + + # Range + + SerdStatus serd_describe_range(const SerdCursor* range, + const SerdSink* sink, + SerdDescribeFlags flags); + + + # Sink + + ctypedef void (*SerdFreeFunc)(void* ptr); + + SerdSink* serd_sink_new(const SerdWorld* world, + void* handle, + SerdEventFunc event_func, + SerdFreeFunc free_handle); + + void serd_sink_free(SerdSink* sink); + + SerdStatus serd_sink_set_event_func(SerdSink* sink, + SerdEventFunc event_func); + + SerdStatus serd_sink_write_event(const SerdSink* sink, + const SerdEvent* event); + + SerdStatus serd_sink_write_base(const SerdSink* sink, + const SerdNode* uri); + + SerdStatus serd_sink_write_prefix(const SerdSink* sink, + const SerdNode* name, + const SerdNode* uri); + + SerdStatus serd_sink_write_statement(const SerdSink* sink, + SerdStatementFlags flags, + const SerdStatement* statement); + + SerdStatus serd_sink_write(const SerdSink* sink, + SerdStatementFlags flags, + const SerdNode* subject, + const SerdNode* predicate, + const SerdNode* object, + const SerdNode* graph); + + SerdStatus serd_sink_write_end(const SerdSink* sink, const SerdNode* node); + + # Stream Processing + + SerdSink* serd_canon_new(const SerdSink* target); + + SerdSink* serd_filter_new(const SerdSink* target, + const SerdNode* subject, + const SerdNode* predicate, + const SerdNode* object, + const SerdNode* graph); + + # Input Streams + + ctypedef struct SerdInputStream: + void* stream; + SerdReadFunc read; + SerdStreamErrorFunc error; + SerdStreamCloseFunc close; + + SerdInputStream serd_open_input_stream(SerdReadFunc read_func, + SerdStreamErrorFunc error_func, + SerdStreamCloseFunc close_func, + void* stream); + + SerdInputStream serd_open_input_string(const char** position); + + SerdInputStream serd_open_input_file(const char* path); + + SerdStatus serd_close_input(SerdInputStream* input); + + # Reader + + SerdReader* serd_reader_new(SerdWorld* world, + SerdSyntax syntax, + SerdReaderFlags flags, + SerdEnv* env, + const SerdSink* sink, + size_t stack_size); + + SerdStatus serd_reader_start(SerdReader* reader, + SerdInputStream* input, + const SerdNode* input_name, + size_t block_size); + + SerdStatus serd_reader_read_chunk(SerdReader* reader); + SerdStatus serd_reader_read_document(SerdReader* reader); + SerdStatus serd_reader_finish(SerdReader* reader); + + void serd_reader_free(SerdReader* reader); + + # Output Streams + + ctypedef struct SerdOutputStream: + void* stream; + SerdWriteFunc write; + SerdStreamCloseFunc close; + + SerdOutputStream serd_open_output_stream(SerdWriteFunc write_func, + SerdStreamCloseFunc close_func, + void* stream); + + SerdOutputStream serd_open_output_buffer(SerdBuffer* buffer); + + SerdOutputStream serd_open_output_file(const char* path); + + SerdStatus serd_close_output(SerdOutputStream* output); + + # Writer + + SerdWriter* serd_writer_new(SerdWorld* world, + SerdSyntax syntax, + SerdWriterFlags flags, + SerdEnv* env, + SerdOutputStream* output, + size_t block_size); + + void serd_writer_free(SerdWriter* writer); + const SerdSink* serd_writer_sink(SerdWriter* writer); + + SerdStatus serd_writer_set_base_uri(SerdWriter* writer, + const SerdNode* uri); + + SerdStatus serd_writer_set_root_uri(SerdWriter* writer, + SerdStringView uri); + + SerdStatus serd_writer_finish(SerdWriter* writer); + + # Model + + SerdModel* serd_model_new(SerdWorld* world, + SerdStatementOrder default_order, + SerdModelFlags flags); + + SerdModel* serd_model_copy(SerdAllocator* allocator, const SerdModel* model); + bint serd_model_equals(const SerdModel* a, const SerdModel* b); + void serd_model_free(SerdModel* model); + SerdWorld* serd_model_world(SerdModel* model); + SerdStatementOrder serd_model_default_order(const SerdModel* model); + SerdModelFlags serd_model_flags(const SerdModel* model); + size_t serd_model_size(const SerdModel* model); + bint serd_model_empty(const SerdModel* model); + SerdCursor* serd_model_begin(const SerdModel* model); + const SerdCursor* serd_model_end(const SerdModel* model); + SerdCursor* serd_model_begin_ordered(const SerdModel* model, + SerdStatementOrder order); + + SerdStatus serd_model_add_index(SerdModel* model, SerdStatementOrder order); + + SerdStatus serd_model_drop_index(SerdModel* model, SerdStatementOrder order); + + SerdCursor* serd_model_ordered(const SerdModel* model, + const SerdStatementOrder order); + + SerdCursor* serd_model_find(const SerdModel* model, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g); + + const SerdNode* serd_model_get(const SerdModel* model, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g); + + const SerdStatement* serd_model_get_statement(const SerdModel* model, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g); + + bint serd_model_ask(const SerdModel* model, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g); + + size_t serd_model_count(const SerdModel* model, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g); + + SerdStatus serd_model_add(SerdModel* model, + const SerdNode* s, + const SerdNode* p, + const SerdNode* o, + const SerdNode* g); + + SerdStatus serd_model_insert(SerdModel* model, + const SerdStatement* statement); + + SerdStatus serd_model_insert_statements(SerdModel* model, SerdCursor* range); + + SerdStatus serd_model_insert_statements(SerdModel* model, SerdCursor* cursor); + SerdStatus serd_model_erase(SerdModel* model, SerdCursor* cursor); + SerdStatus serd_model_erase_statements(SerdModel* model, SerdCursor* cursor); + SerdStatus serd_model_clear(SerdModel* model); + SerdStatus serd_validate(const SerdModel* model); + + + # Caret + + SerdCaret* serd_caret_new(SerdAllocator* allocator, + const SerdNode* name, + unsigned line, + unsigned col); + + SerdCaret* serd_caret_copy(SerdAllocator* allocator, + const SerdCaret* caret); + + void serd_caret_free(SerdAllocator* allocator, SerdCaret* caret); + + bint serd_caret_equals(const SerdCaret* lhs, const SerdCaret* rhs); + + const SerdNode* serd_caret_name(const SerdCaret* caret); + unsigned serd_caret_line(const SerdCaret* caret); + unsigned serd_caret_column(const SerdCaret* caret); + + +cdef SerdValue _value(v): + cdef SerdValue value + + if isinstance(v, bool): + value.type = <SerdValueType>1 # SERD_BOOL + value.data.as_bool = <bint>v + return value + + if isinstance(v, float): + value.type = <SerdValueType>2 # SERD_DOUBLE + value.data.as_double = v + return value + + if isinstance(v, int): + if v < -9223372036854775808 or v > 9223372036854775807: + raise ValueError("Integer out of range for xsd:long: %s" % v) + + value.type = <SerdValueType>4 # SERD_LONG + value.data.as_long = v + return value + + raise ValueError("Unsupported value type %s" % type(v)) + + +cdef SerdStringView _empty_string = SerdStringView("", 0) + + +class Status(enum.IntEnum): + """Return status code.""" + + SUCCESS = 0, # Success + FAILURE = 1, # Non-fatal failure + UNKNOWN_ERROR = 2, # Unknown error + NO_DATA = 3, # Missing input + OVERFLOW = 4, # Insufficient space + + BAD_ALLOC = 5, # Memory allocation failed + BAD_ARG = 6, # Invalid argument + BAD_CALL = 7, # Invalid call + BAD_CURIE = 8, # Invalid CURIE or unknown namespace prefix + BAD_CURSOR = 9, # Use of invalidated cursor + BAD_EVENT = 10, # Invalid event in stream + BAD_INDEX = 11, # No optimal model index available + BAD_LABEL = 12, # Encountered clashing blank node label + BAD_LITERAL = 13, # Invalid literal + BAD_PATTERN = 14, # Invalid statement pattern + BAD_READ = 15, # Error reading from file + BAD_STACK = 16, # Stack overflow + BAD_SYNTAX = 17, # Invalid syntax + BAD_TEXT = 18, # Invalid text encoding + BAD_URI = 19, # Invalid or unresolved URI + BAD_WRITE = 20, # Error writing to file + BAD_DATA = 21, # Invalid data + + +class Syntax(enum.IntEnum): + """RDF syntax type.""" + + EMPTY = 0 # Empty syntax (suppress input or output) + TURTLE = 1 # Terse triples http://www.w3.org/TR/turtle + NTRIPLES = 2 # Flat triples http://www.w3.org/TR/n-triples/ + NQUADS = 3 # Flat quads http://www.w3.org/TR/n-quads/ + TRIG = 4 # Terse quads http://www.w3.org/TR/trig/ + + +class StatementFlags(enum.IntFlag): + """Flags indicating inline abbreviation information for a statement.""" + + EMPTY_S = 1 << 0 # Empty blank node subject + ANON_S = 1 << 1 # Start of anonymous subject + ANON_O = 1 << 2 # Start of anonymous object + LIST_S = 1 << 3 # Start of list subject + LIST_O = 1 << 4 # Start of list object + TERSE_S = 1 << 5 # Terse serialisation of new subject + TERSE_O = 1 << 6 # Terse serialisation of new object + + +class DescribeFlags(enum.IntFlag): + """Flags that control the style of a model serialisation.""" + + NO_INLINE_OBJECTS = 1 << 0 # Disable object inlining + + +class NodeType(enum.IntEnum): + """Type of a node + + An RDF node, in the abstract sense, can be either a resource, literal, or a + blank. This type is more precise, because syntactically there are two ways + to refer to a resource (by URI or CURIE). Serd also has support for + variable nodes to support some features, which are not RDF nodes. + + There are also two ways to refer to a blank node in syntax (by ID or + anonymously), but this is handled by statement flags rather than distinct + node types. + """ + + LITERAL = 1 # Literal value + URI = 2 # URI (absolute or relative) + BLANK = 3 # Blank node + VARIABLE = 4 # Variable node + + +class NodeFlag(enum.IntEnum): + """Flags that describe the details of a node.""" + + IS_LONG = 1u << 0u # Literal node should be triple-quoted + HAS_DATATYPE = 1u << 1u # Literal node has datatype + HAS_LANGUAGE = 1u << 2u # Literal node has language + + +class Field(enum.IntEnum): + """Index of a statement in a field.""" + + SUBJECT = 0 # Subject + PREDICATE = 1 # Predicate ("key") + OBJECT = 2 # Object ("value") + GRAPH = 3 # Graph ("context") + + +class StatementOrder(enum.IntEnum): + """Statement ordering.""" + + SPO = 0 # Subject, Predicate, Object + SOP = 1 # Subject, Object, Predicate + OPS = 2 # Object, Predicate, Subject + OSP = 3 # Object, Subject, Predicate + PSO = 4 # Predicate, Subject, Object + POS = 5 # Predicate, Object, Subject + GSPO = 6 # Graph, Subject, Predicate, Object + GSOP = 7 # Graph, Subject, Object, Predicate + GOPS = 8 # Graph, Object, Predicate, Subject + GOSP = 9 # Graph, Object, Subject, Predicate + GPSO = 10 # Graph, Predicate, Subject, Object + GPOS = 11 # Graph, Predicate, Object, Subject + + +class ModelFlags(enum.IntFlag): + """Flags that control model storage and indexing.""" + + STORE_GRAPHS = 1 << 0 # Support multiple graphs in model + STORE_CARETS = 1 << 1 # Store original caret of statements + +# TODO: URI + + +class ReaderFlags(enum.IntFlag): + """Reader support options.""" + + READ_LAX = 1 << 0 # Tolerate invalid input where possible + READ_VARIABLES = 1 << 1 # Support variable nodes + + +class WriterFlags(enum.IntFlag): + """Writer style options. + + These flags allow more precise control of writer output style. Note that + some options are only supported for some syntaxes, for example, NTriples + does not support abbreviation and is always ASCII. + """ + + WRITE_ASCII = 1 << 0 # Escape all non-ASCII characters + WRITE_TERSE = 1 << 1 # Write terser output without newlines + WRITE_LAX = 1 << 2 # Tolerate lossy output + + +class EventType(enum.IntEnum): + """The type of a :class:`serd.Event`.""" + + BASE = 1 + PREFIX = 2 + STATEMENT = 3 + END = 4 + + +# Private Python Bindings Utilities + +cdef SerdNode* _unwrap_node(node: Node): + if node is None: + return NULL + elif type(node) == Node: + return (<Node>node)._ptr + + raise TypeError("Expected Node, got %s" % type(node)) + + +def _uri_from_param(param) -> Node: + if isinstance(param, type("")): + return uri(param) + elif isinstance(param, Node) and (<Node>param).type() == NodeType.URI: + return param + + raise TypeError("Expected string or URI node, got %s" % type(param)) + + +def _blank_from_param(param) -> Node: + if isinstance(param, type("")): + return blank(param) + elif isinstance(param, Node) and (<Node>param).type() == NodeType.BLANK: + return param + + raise TypeError("Expected string or blank node, got %s" % type(param)) + + +def _tocstr(s: str): + return s.encode('utf-8') + + +def _string_view(s: str): + encoded = s.encode('utf-8') + return SerdStringView(encoded, len(encoded)) # FIXME: len? + + +def _fromcstr(const char* s): + return s.decode('utf-8') + + +# Public Python API Utilities + +class Namespace: + """Namespace prefix. + + Use attribute syntax to easily create URIs within this namespace, for + example:: + + >>> world = lilv.World() + >>> ns = Namespace(world, "http://example.org/") + >>> print(ns.foo) + http://example.org/foo + """ + + def __init__(self, prefix): + self.prefix = str(_uri_from_param(prefix)) + + def __add__(self, suffix: str): + return uri(self.prefix + suffix) + + def __eq__(self, other): + if type(other) == Namespace: + return self.prefix == other.prefix + elif type(other) == str: + return self.prefix == other + elif type(other) == Node: + return other.type() == NodeType.URI and other == self.prefix + + def __str__(self): + return self.prefix + + def __getattr__(self, suffix: str): + return uri(self.prefix + suffix) + + def __getitem__(self, suffix: str): + return uri(self.prefix + suffix) + + def name(self, uri): + uri = _uri_from_param(uri) + if uri is not None and str(uri).startswith(self.prefix): + return str(uri)[len(self.prefix):] + + return None + + +# String Utilities + + +def strerror(status: Status) -> str: + """Return a string describing a status code.""" + return _fromcstr(serd_strerror(status)) + + +# Base64 + + +# def base64_encode(const unsigned char[:] data, wrap_lines=False) -> str: +# """Encode `data` to base64. + +# Args: +# data: Array of arbitrary bytes to encode. +# wrap_lines: Wrap lines at 76 characters to conform to RFC 2045. + +# Returns: +# A string encoded in base64 format. +# """ +# size = len(data) +# length = serd_base64_encoded_length(size, wrap_lines) +# result = bytes(length) +# serd_base64_encode(result, &data[0], size, wrap_lines) + +# return result.decode("utf-8") + + +# def base64_decode(string: str) -> bytes: +# """Decode `string` from base64.""" +# length = len(string) +# size = serd_base64_decoded_size(length) +# result = cython.view.array(shape=(size,), itemsize=1, format="c") +# actual_size = <size_t>0 + +# cdef unsigned char[::1] result_view = result + +# serd_base64_decode(&result_view[0], &actual_size, _tocstr(string), length) +# assert actual_size <= size + +# return bytes(result[0 : actual_size]) + + +# Syntax Utilities + + +def syntax_by_name(name: str) -> Syntax: + """Get a syntax by name. + + Case-insensitive, supports "Turtle", "NTriples", "NQuads", and "TriG". + + Returns: + A syntax, or Syntax.EMPTY if the name is not recognized. + """ + return Syntax(serd_syntax_by_name(_tocstr(name))) + + +def guess_syntax(filename: str) -> Syntax: + """Guess a syntax from a filename. + + This uses the file extension to guess the syntax of a file. + + Returns: + A syntax, or Syntax.EMPTY if the name is not recognized. + """ + return Syntax(serd_guess_syntax(_tocstr(filename))) + + +def syntax_has_graphs(syntax: Syntax) -> bool: + """Return whether a syntax can represent multiple graphs. + + Returns: + True for Syntax.NQUADS and Syntax.TRIG, False otherwise. + """ + return serd_syntax_has_graphs(syntax) + + +# World + + +@cython.no_gc +cdef class World: + """Global library state.""" + + cdef SerdWorld* _ptr + + def __cinit__(self): + self._ptr = serd_world_new(NULL) + + def __dealloc__(self): + serd_world_free(self._ptr) + self._ptr = NULL + + def get_blank(self) -> Node: + """Return a unique blank node.""" + return Node._wrap(serd_world_get_blank(self._ptr)) + + def load( + self, + path: str, + syntax: Syntax = Syntax.TURTLE, + reader_flags: ReaderFlags = ReaderFlags(0), + model_flags: ModelFlags = ModelFlags(0), + stack_size: int = 4096, + ) -> Model: + """Load a model from a file and return it.""" + base_uri = file_uri(path) + env = Env(self, base_uri) + model = Model(self, StatementOrder.SPO, model_flags) + inserter = model.inserter(env) + input_stream = FileInput(path) + reader = Reader(self, syntax, reader_flags, env, inserter, stack_size) + + st = reader.start(input_stream, base_uri) + _ensure_success(st, "Failed to open file {}".format(path)) + + st = reader.read_document() + _ensure_success(st, "Failed to read file {}".format(path)) + + st = reader.finish() + _ensure_success(st, "Failed to finish reading file {}".format(path)) + + return model + + def loads( + self, + s: str, + base_uri: Node = None, + syntax: Syntax = Syntax.TURTLE, + reader_flags: ReaderFlags = ReaderFlags(0), + model_flags: ModelFlags = ModelFlags(0), + stack_size: int = 4096, + ) -> Model: + """Load a model from a string and return it.""" + env = Env(self, base_uri) + model = Model(self, StatementOrder.SPO, model_flags) + inserter = model.inserter(env) + input_stream = StringInput(s) + reader = Reader(self, syntax, reader_flags, env, inserter, stack_size) + + st = reader.start(input_stream) + _ensure_success(st, "Failed to start reading string") + + st = reader.read_document() + _ensure_success(st, "Failed to read string") + + st = reader.finish() + _ensure_success(st, "Failed to finish reading string") + + return model + + def dump( + self, + model: Model, + path: str, + syntax: Syntax = Syntax.TURTLE, + writer_flags: WriterFlags = WriterFlags(0), + serialisation_flags: DescribeFlags = DescribeFlags(0), + env: Env = None, + ) -> None: + """Write a model to a file.""" + + if env is None: + env = Env(self, file_uri(path)) + + output_stream = FileOutput(filename=path) + writer = Writer(self, syntax, writer_flags, env, output_stream) + st = model.all().write(writer.sink(), serialisation_flags) + writer.finish() + output_stream.close() + _ensure_success(st, "Failed to write model") + + def dumps( + self, + model: Model, + syntax: Syntax = Syntax.TURTLE, + writer_flags: WriterFlags = WriterFlags(0), + serialisation_flags: DescribeFlags = DescribeFlags(0), + env: Env = None, + ) -> str: + """Write a model to a string and return it.""" + + if env is None: + env = Env(self) + + output_stream = StringOutput() + writer = Writer(self, syntax, writer_flags, env, output_stream) + st = model.all().write(writer.sink(), serialisation_flags) + writer.finish() + + _ensure_success(st, "Failed to write model") + + output_stream.close() + return output_stream.output() + + +cdef class Node: + """An RDF node.""" + + cdef SerdNode* _ptr + + @staticmethod + cdef Node _manage(SerdNode* ptr): + if ptr is NULL: + return None + + cdef Node wrapper = Node.__new__(Node) + wrapper._ptr = ptr + return wrapper + + @staticmethod + cdef Node _wrap(const SerdNode* ptr): + if ptr is NULL: + return None + + cdef SerdNode* copy = serd_node_copy(NULL, ptr) + if not copy: + return None + + cdef Node wrapper = Node.__new__(Node) + wrapper._ptr = copy + return wrapper + + @staticmethod + def from_syntax(string: str, + syntax: Syntax = Syntax.TURTLE, + env: Env = None): + """Return a new node created from a string. + + The string must be a single node in the given syntax, as returned by + :meth:`serd.Node.to_syntax`. + """ + + cenv = env._ptr if env is not None else NULL + return Node._manage(serd_node_from_syntax(NULL, + _tocstr(string), + Syntax.TURTLE, + cenv)) + + def __init__(self, v): + if isinstance(v, str): + value_view = _string_view(v) + self._ptr = serd_new_string(NULL, value_view) + elif isinstance(v, bool): + self._ptr = serd_new_value(NULL, _value(v)) + elif isinstance(v, int): + if v < -9223372036854775808 or v > 9223372036854775807: + int_string = str(v) + self._ptr = serd_new_literal(NULL, + _string_view(int_string), + <SerdNodeFlags>2, # SERD_HAS_DATATYPE + _string_view("http://www.w3.org/2001/XMLSchema#integer")) + else: + self._ptr = serd_new_integer(NULL, v); + else: + self._ptr = serd_new_value(NULL, _value(v)) + + assert self._ptr + + def __dealloc__(self): + if self._ptr is not NULL: + serd_node_free(NULL, self._ptr) + self._ptr = NULL + + def __hash__(self): + return (hash(self.type()) ^ + hash(str(self)) ^ + hash(self.datatype()) ^ + hash(self.language())) + + def __str__(self): + return _fromcstr(serd_node_string(self._ptr)) + + def __repr__(self): + assert self._ptr is not NULL + if self.type() == NodeType.LITERAL: + datatype = self.datatype() + language = self.language() + if datatype is None and language is None: + return 'serd.string("{}")'.format(self) + elif language is not None: + return 'serd.plain_literal("{}", "{}")'.format( + self, self.language()) + elif datatype == "http://www.w3.org/2001/XMLSchema#boolean": + return 'serd.boolean({})'.format( + "True" if self == "true" else "False") + + return 'serd.typed_literal("{}", "{}")'.format( + self, self.datatype()) + if self.type() == NodeType.URI: + return 'serd.uri("{}")'.format(self) + if self.type() == NodeType.BLANK: + return 'serd.blank("{}")'.format(self) + if self.type() == NodeType.VARIABLE: + return 'serd.variable("{}")'.format(self) + + raise NotImplementedError("Unknown node type {}".format(self.type())) + + def __len__(self): + if self._ptr: + return serd_node_length(self._ptr) + + return 0 + + def __eq__(self, rhs): + if rhs is None: + return False + elif type(rhs) == Node: + return serd_node_equals(self._ptr, (<Node>rhs)._ptr) + else: + return str(self) == rhs + + def __lt__(self, rhs: Node): + return serd_node_compare(self._ptr, rhs._ptr) < 0 + + def __le__(self, rhs: Node): + return serd_node_compare(self._ptr, rhs._ptr) <= 0 + + def string_view(self): + return serd_node_string_view(self._ptr) + + def type(self) -> NodeType: + """Return the type of this node. + + This returns the fundamental "kind" of the node, for example + NodeType.URI or NodeType.LITERAL. Note that this is different than the + optional datatype URI of a literal node, which, for example, states + that a literal is an integer or a double. + """ + assert self._ptr + return NodeType(serd_node_type(self._ptr)) + + def datatype(self) -> Node: + """Return the datatype of this literal, or None. + + The returned node is always a URI, typically something like + `serd.uri("http://www.w3.org/2001/XMLSchema#decimal")`. + """ + return Node._wrap(serd_node_datatype(self._ptr)) + + def language(self) -> Node: + """Return the language of this literal, or None. + + The returned node is always a string, typically something like + `serd.string("en")`. + """ + return Node._wrap(serd_node_language(self._ptr)) + + def to_syntax(self, + syntax: Syntax = Syntax.TURTLE, + env: Env = None) -> str: + """Return a string representation of this node in a syntax. + + The returned string represents that node as if written as an object in + the given syntax, without any extra quoting or punctuation. The syntax + should be either TURTLE or NTRIPLES (the others are redundant). Note + that namespaced (CURIE) nodes and relative URIs can not be expressed in + NTriples. + + Passing the returned string to Node.from_syntax() will produce a node + equivalent to this one. + """ + + cenv = env._ptr if env is not None else NULL + cstr = serd_node_to_syntax(NULL, self._ptr, syntax, cenv) + + result = _fromcstr(cstr) + serd_free(NULL, cstr) + return result + + +# Node constructors + + +def string(s: str) -> Node: + s_view = _string_view(s) + return Node._manage(serd_new_string(NULL, s_view)) + + +def plain_literal(s: str, lang: str = None) -> Node: + s_view = _string_view(s) + if lang is not None: + s_view = _string_view(s) + lang_view = _string_view(lang) + return Node._manage(serd_new_literal(NULL, s_view, NodeFlag.HAS_LANGUAGE, lang_view)) + else: + s_view = _string_view(s) + return Node._manage(serd_new_string(NULL, s_view)) + + +def typed_literal(s: str, datatype) -> Node: + s_view = _string_view(s) + datatype_node = _uri_from_param(datatype) + if type(datatype_node) == Node: + datatype_uri_view = datatype_node.string_view() + return Node._manage(serd_new_literal(NULL, s_view, NodeFlag.HAS_DATATYPE, datatype_uri_view)) + + return None + + +def blank(s: str) -> Node: + s_view = _string_view(s) + return Node._manage(serd_new_token(NULL, NodeType.BLANK, s_view)) + + +def uri(s: str) -> Node: + s_view = _string_view(s) + return Node._manage(serd_new_token(NULL, NodeType.URI, s_view)) + + +def file_uri(path: str, hostname: str = "") -> Node: + path_view = _string_view(path) + hostname_view = _string_view(hostname) + return Node._manage(serd_new_file_uri(NULL, path_view, hostname_view)) + + +def decimal( + d: float, +) -> Node: + return Node._manage(serd_new_decimal(NULL, d)) + + +def double(d: double) -> Node: + return Node._manage(serd_new_value(NULL, _value(d))) + + +def integer(i: int) -> Node: + return Node._manage(serd_new_integer(NULL, i)) + + +def boolean(b: bool) -> Node: + return Node._manage(serd_new_value(NULL, _value(b))) + + +def base64(const unsigned char[:] buf) -> Node: + return Node._manage(serd_new_base64(NULL, &buf[0], len(buf))) + + +def variable(s: str) -> Node: + s_view = _string_view(s) + return Node._manage(serd_new_token(NULL, NodeType.VARIABLE, s_view)) + + +cdef class Env: + + """Lexical environment for abbreviating and expanding URIs.""" + + cdef SerdEnv* _ptr + + @staticmethod + cdef Env _manage(SerdEnv* ptr): + if ptr is NULL: + return None + + cdef Env wrapper = Env.__new__(Node) + wrapper._ptr = ptr + return wrapper + + def __init__(self, world: World, arg=None): + assert world is not None + assert type(world) == World + + if arg is None: + self._ptr = serd_env_new(world._ptr, _empty_string) + elif type(arg) == Env: + self._ptr = serd_env_copy(serd_world_allocator(world._ptr), + (<Env>arg)._ptr) + elif type(arg) == Node: + arg_view = arg.string_view() + self._ptr = serd_env_new(world._ptr, arg_view) + else: + raise TypeError("Bad argument type for Env(): %s" % type(arg)) + + def __dealloc__(self): + serd_env_free(self._ptr) + self._ptr = NULL + + def __eq__(self, rhs): + return type(rhs) == Env and serd_env_equals(self._ptr, (<Env>rhs)._ptr) + + def base_uri(self) -> Node: + """Return the current base URI.""" + return Node._wrap(serd_env_base_uri(self._ptr)) + + def set_base_uri(self, uri) -> Status: + """Set the current base URI.""" + if uri is None: + return Status(serd_env_set_base_uri(self._ptr, _empty_string)) + + uri_view = _uri_from_param(uri).string_view() + return Status(serd_env_set_base_uri(self._ptr, uri_view)) + + def set_prefix(self, name, uri: Node) -> Status: + """Set a namespace prefix. + + A namespace prefix is used to expand CURIE nodes, for example, with the + prefix "xsd" set to "http://www.w3.org/2001/XMLSchema#", "xsd:decimal" + will expand to "http://www.w3.org/2001/XMLSchema#decimal". + """ + name_node = string(name) if type(name) == str else name + name_view = name_node.string_view() + uri_view = uri.string_view() + assert type(name_node) == Node + return Status(serd_env_set_prefix(self._ptr, name_view, uri_view)) + + def expand(self, node: Node) -> Node: + """Expand `node`, transforming CURIEs into URIs + + If `node` is a relative URI reference, it is expanded to a full URI if + possible. If `node` is a literal, its datatype is expanded if + necessary. If `node` is a CURIE, it is expanded to a full URI if + possible. + + Returns None if `node` can not be expanded. + """ + return Node._manage(serd_env_expand_node(self._ptr, node._ptr)) + + +class ReadContext(object): + """Context manager for a scoped read.""" + + def __init__(self, reader, source): + self.reader = reader + self.source = source + + def __enter__(self): + _ensure_success(self.reader.start(self.source), + "Failed to start reading") + return self + + def __exit__(self, type, value, traceback) -> None: + _ensure_success(self.reader.finish(), "Failed to finish reading") + + def read_chunk(self) -> None: + """Read a single "chunk" of data during an incremental read. + + This function will read a single top level description, and return. + This may be a directive, statement, or several statements; essentially + it reads until a '.' is encountered. This is particularly useful for + reading directly from a pipe or socket. + """ + + _ensure_success(self.reader.read_chunk(), "Failed to read chunk") + + def read_document(self) -> None: + """Read a complete document from the source. + + This function will continue pulling from the source until a complete + document has been read. Note that this may block when used with + streams, for incremental reading use serd_reader_read_chunk(). + """ + + _ensure_success(self.reader.read_document(), "Failed to read document") + + +cdef class Reader: + """Streaming parser that reads a text stream and writes to a sink. + + .. py:function:: serd.Reader(world: serd.World, syntax: serd.Syntax, flags: serd.ReaderFlags, env: serd.Env, sink, stack_size: int = 4096) + + Construct a new reader. + + The `sink` can be either a :class:`serd.Sink`, a built-in sink (for + example, from :meth:`serd.Writer.sink()` or :meth:`serd.Model.inserter`), + or a function that takes a :class:`serd.Event` and returns a + :class:`serd.Status`. + """ + + cdef SerdReader* _ptr + # cdef __ByteSource _byte_source + cdef _SinkBase _sink + cdef object _callback + + @staticmethod + cdef Reader _manage(SerdReader* ptr): + if ptr is NULL: + return None + + cdef Reader wrapper = Reader.__new__(Reader) + wrapper._ptr = ptr + return wrapper + + def __init__(self, + world: World, + syntax: Syntax, + flags: ReaderFlags, + env: Env, + sink, + stack_size: int = 4096): + if isinstance(sink, _SinkBase): + self._sink = sink + else: + self._callback = sink + self._sink = Sink(world, func=self._callback) + + assert isinstance(self._sink, _SinkBase) + assert self._sink._cptr is not NULL + + self._ptr = serd_reader_new(world._ptr, + syntax, + flags, + env._ptr, + (<_SinkBase>self._sink)._cptr, + stack_size) + + def __dealloc__(self): + serd_reader_free(self._ptr) + self._ptr = NULL + + def start(self, + input_stream: InputStream, + input_name: Node = None, + block_size: int = 1) -> Status: + """Prepare to read from an input stream.""" + return Status(serd_reader_start( + self._ptr, + &input_stream._stream, + input_name._ptr if input_name is not None else NULL, + block_size)) + + def read_chunk(self) -> Status: + """Read a single "chunk" of data during an incremental read. + + This function will read a single top level description, and return. + This may be a directive, statement, or several statements; essentially + it reads until a '.' is encountered. This is particularly useful for + reading directly from a pipe or socket. + """ + return Status(serd_reader_read_chunk(self._ptr)) + + def read_document(self) -> Status: + """Read a complete document from the source. + + This function will continue pulling from the source until a complete + document has been read. Note that this may block when used with + streams, for incremental reading use serd_reader_read_chunk(). + """ + return Status(serd_reader_read_document(self._ptr)) + + def finish(self) -> Status: + """Finish reading from the source. + + This should be called before starting to read from another source. + Finish reading from the source. + """ + return Status(serd_reader_finish(self._ptr)) + + def open(self, input_stream) -> ReadContext: + """Return a scoped read context.""" + + return ReadContext(self, input_stream) + + +cdef class InputStream: + """A source for bytes that provides text input. + + This is only a base class, use StringInput or FileInput instead. + """ + + cdef SerdInputStream _stream + + def __dealloc__(self): + serd_close_input(&self._stream) + + +cdef class StringInput(InputStream): + """A byte source for text input that reads from a string.""" + + cdef const char* _position + cdef bytes _bytes + + def __init__(self, string: str): + super().__init__() + + self._bytes = string.encode('utf-8') + self._position = self._bytes + self._stream = serd_open_input_string(&self._position) + + +cdef class FileInput(InputStream): + """A byte source for text input that reads from a file.""" + + def __init__(self, filename: str): + super().__init__() + self._stream = serd_open_input_file(_tocstr(filename)) + + +# cdef class ByteSink: +# """A sink for bytes that receives text output.""" +# cdef SerdByteSink* _ptr + +# def __dealloc__(self): +# serd_byte_sink_free(self._ptr) +# self._ptr = NULL + +# def flush(self) -> None: +# """Flush any pending output to the underlying stream.""" +# serd_byte_sink_flush(self._ptr) + +# def close(self) -> None: +# """Close sink, including the underlying file if necessary.""" +# serd_byte_sink_close(self._ptr) + + +# cdef class FileSink(ByteSink): +# """A sink for bytes that writes text output to a file.""" + +# def __init__(self, +# filename: str, +# block_size: int = 4096): +# super().__init__() + +# self._ptr = serd_byte_sink_new_filename(_tocstr(filename), +# block_size) + +# if self._ptr is NULL: +# raise OSError(errno, strerror(errno), filename) + + +# cdef class StringSink(ByteSink): +# cdef SerdBuffer _buffer + +# def __dealloc__(self): +# serd_free(NULL, self._buffer.buf) +# self._buffer.buf = NULL +# self._buffer.len = 0 +# # super().__dealloc__(self) + +# def __init__(self): +# super().__init__() + +# self._buffer.buf = NULL +# self._buffer.len = 0 +# self._ptr = serd_byte_sink_new_buffer(&self._buffer) + +# def output(self) -> str: +# """Finish writing to this string sink and return the output.""" +# self.flush() +# self.close() +# return _fromcstr(<char*>self._buffer.buf) + + +cdef class OutputStream: + """An output stream that receives bytes. + + This is only a base class, use StringOutput or FileOutput instead. + """ + + cdef SerdOutputStream _stream + + def close(self): + serd_close_output(&self._stream) + + def __dealloc__(self): + self.close() + + +cdef class StringOutput(OutputStream): + cdef SerdBuffer _buffer + + """An output stream that writes to a string.""" + def __init__(self): + super().__init__() + + self._stream = serd_open_output_buffer(&self._buffer) + + def close(self): + super().close() + + def output(self) -> str: + return _fromcstr(<const char*>self._buffer.buf) + + + +cdef class FileOutput(OutputStream): + """An output stream that writes to a file.""" + def __init__(self, filename: str): + super().__init__() + self._stream = serd_open_output_file(_tocstr(filename)) + + + +cdef class Writer: + """Streaming writer that emits text as it receives events.""" + + cdef SerdWriter* _ptr + + def __init__(self, + world: World, + syntax: Syntax, + flags: WriterFlags, + env: Env, + output_stream: OutputStream, + block_size: int = 1): + self._ptr = serd_writer_new( + world._ptr, + syntax, + flags, + env._ptr if env is not None else NULL, + &output_stream._stream, + block_size + ) + + def __dealloc__(self): + serd_writer_free(self._ptr) + self._ptr = NULL + + def sink(self) -> SinkView: + """Return a sink interface that emits statements via this writer.""" + return SinkView._wrap(serd_writer_sink(self._ptr)) + + def set_base_uri(self, uri: Node) -> Status: + """Set the current output base URI, and emit a directive if applicable. + """ + return Status(serd_writer_set_base_uri(self._ptr, uri._ptr)) + + def set_root_uri(self, uri: str) -> Status: + """Set the current root URI. + + The root URI should be a prefix of the base URI. The path of the root + URI is the highest path any relative up-reference can refer to. For + example, with root <file:///foo/root> and base <file:///foo/root/base>, + <file:///foo/root> will be written as <../>, but <file:///foo> will be + written non-relatively as <file:///foo>. If the root is not explicitly + set, it defaults to the base URI, so no up-references will be created + at all. + """ + return Status(serd_writer_set_root_uri(self._ptr, _string_view(uri))) + + def finish(self) -> Status: + """Finish a write. + + This flushes any pending output, for example terminating punctuation, + so that the output is a complete document. + """ + return Status(serd_writer_finish(self._ptr)) + + +class SerdError(RuntimeError): + """An exception thrown by serd.""" + + def __init__(self, status: Status, message: str): + super().__init__("%s (%s)" % (message, strerror(status))) + + self.status = status + + +def _ensure_success(status: Status, message: str): + if status != Status.SUCCESS: + raise SerdError(status, message) + + +@cython.no_gc +cdef class Model: + """An indexed set of statements.""" + cdef SerdModel* _ptr + cdef World _world + + def __cinit__(self, + world: World, + default_order: StatementOrder = StatementOrder.SPO, + flags: ModelFlags = ModelFlags(0), + model: Model = None): + if model is not None: + self._world = world + self._ptr = serd_model_copy(NULL, model._ptr) + else: + assert type(default_order) == StatementOrder + assert type(flags) == ModelFlags or type(flags) == int + self._world = world + self._ptr = serd_model_new(world._ptr, default_order, flags) + # FIXME: ? + serd_model_add_index(self._ptr, StatementOrder.OPS) + serd_model_add_index(self._ptr, StatementOrder.GSPO) + serd_model_add_index(self._ptr, StatementOrder.GOPS) + # elif type(flags) == ModelFlags: + # self._world = world + # self._ptr = serd_model_new(world._ptr, default_order, flags) + # else: + # raise TypeError("Bad arguments for Model()") + + def __dealloc__(self): + serd_model_free(self._ptr) + self._world = None + self._ptr = NULL + + def __eq__(self, rhs): + return (type(rhs) == Model and + serd_model_equals(self._ptr, (<Model>rhs)._ptr)) + + def __len__(self): + return self.size() + + def __iter__(self): + if self.size() == 0: + return self._end() + + return Cursor._manage(serd_model_begin(self._ptr)) + + def __contains__(self, statement): + return self._find(Statement._from_param(statement)) != self._end() + + def __delitem__(self, statement): + i = self._find(statement) + if i is not None: + self.erase(i) + + def __add__(self: Model, statement_param): + statement = Statement._from_param(statement_param) + status = serd_model_insert(self._ptr, (<Statement>statement)._ptr) + _ensure_success(status, "Failed to insert statement") + return self + + def world(self) -> World: + """Get the world associated with this model.""" + return self._world + + def clear(self) -> None: + """Remove everything from this model.""" + return serd_model_clear(self._ptr) + + def copy(self) -> Model: + """Return a deep copy of this model.""" + return Model(self._world, self.default_order(), self.flags(), self) + + def default_order(self) -> StatementOrder: + """Get the default statement order of this model.""" + return StatementOrder(serd_model_default_order(self._ptr)) + + def flags(self) -> ModelFlags: + """Get the flags enabled on this model.""" + return ModelFlags(serd_model_flags(self._ptr)) + + def size(self) -> int: + """Return the number of statements stored in this model.""" + return serd_model_size(self._ptr) + + def empty(self) -> bool: + """Return true iff there are no statements in this model.""" + return serd_model_empty(self._ptr) + + def inserter(self, env: Env, default_graph: Node = None) -> Sink: + """Return a sink that will insert into this model when written to.""" + return Sink._manage(serd_inserter_new( + self._ptr, _unwrap_node(default_graph) + )) + + def insert(self, arg) -> None: + """Insert a statement into this model.""" + if type(arg) == Cursor: + return Status(serd_model_insert( + self._ptr, serd_cursor_get((<Cursor>arg)._ptr))) + + statement = Statement._from_param(arg) + st = serd_model_insert(self._ptr, (<Statement>statement)._ptr) + _ensure_success(st, "Failed to insert statement") + + def insert_statements(self, range: Cursor) -> None: + """Insert a range of statements into this model.""" + st = serd_model_insert_statements(self._ptr, range._ptr) + if st != Status.SUCCESS and st != Status.FAILURE: + raise SerdError(st, "Failed to insert statement") + + def erase(self, arg) -> Status: + """Erase a statement from the model. + + The argument can be a statement, tuple of nodes, or a cursor. + """ + + if type(arg) == Cursor: + # TODO: Check for end + _ensure_success( + serd_model_erase(self._ptr, (<Cursor>arg)._ptr), + "Failed to erase range") + elif type(arg) == Statement: + i = self._find(arg) + if i == self._end(): + raise ValueError("serd.Model.erase(): statement not in model") + + self.erase(i) + elif type(arg) == tuple: + self.erase(Statement._from_param(arg)) + else: + raise TypeError("Bad argument type for Model.erase: %s" % type(arg)) + + def erase_statements(self, cursor: Cursor) -> Status: + """Erase a range of statements from the model.""" + _ensure_success( + serd_model_erase_statements(self._ptr, cursor._ptr), + "Failed to erase range") + + # def begin(self) -> _Iter: + # return _Iter._manage(serd_model_begin(self._ptr)) + + def _end(self) -> Cursor: + return Cursor._wrap(serd_model_end(self._ptr)) + + def all(self) -> Cursor: + """Return a range of all statements in the model in SPO order.""" + return Cursor._manage(serd_model_begin(self._ptr)) + + def ordered(self, order: StatementOrder) -> Cursor: + """Return a range of all statements in the model in a given order.""" + return Cursor._manage(serd_model_begin_ordered(self._ptr, order)) + + # FIXME: ? + def _find(self, statement) -> Cursor: + statement = Statement._from_param(statement) + s = statement.subject() + p = statement.predicate() + o = statement.object() + g = statement.graph() + + c_iter = serd_model_find( + self._ptr, + _unwrap_node(s), + _unwrap_node(p), + _unwrap_node(o), + _unwrap_node(g) + ) + + return Cursor._manage(c_iter) if c_iter else self._end() + + def find(self, + subject: Node = None, + predicate: Node = None, + object: Node = None, + graph: Node = None) -> Cursor: + """Search for statements that match a pattern. + + Returns a cursor that points to the first match, or the end if no + matches were found. + """ + + return Cursor._wrap( + serd_model_find( + self._ptr, + _unwrap_node(subject), + _unwrap_node(predicate), + _unwrap_node(object), + _unwrap_node(graph) + ) + ) + + def get(self, + subject: Node = None, + predicate: Node = None, + object: Node = None, + graph: Node = None) -> Node: + """Search for a single node that matches a pattern. + + Exactly one of ``subject``, ``predicate``, or ``object`` must be + ``None``. This function is mainly useful for predicates that only have + one value. + + Returns the first matching node, or ``None`` if no matches are found. + """ + + return Node._wrap( + serd_model_get( + self._ptr, + _unwrap_node(subject), + _unwrap_node(predicate), + _unwrap_node(object), + _unwrap_node(graph) + ) + ) + + def ask(self, s: Node, p: Node, o: Node, g: Node = None) -> bool: + """Return true iff the model contains a statement matching a pattern. + + None can be used as a wildcard which matches any node. + """ + return serd_model_ask( + self._ptr, + _unwrap_node(s), + _unwrap_node(p), + _unwrap_node(o), + _unwrap_node(g) + ) + + def count(self, s: Node, p: Node, o: Node, g: Node = None) -> int: + """Return the number of statements in the model that match a pattern. + + None can be used as a wildcard which matches any node. + """ + return serd_model_count( + self._ptr, + _unwrap_node(s), + _unwrap_node(p), + _unwrap_node(o), + _unwrap_node(g) + ) + + +# cdef class Inserter: +# """A statement sink that inserts into a model.""" +# cdef SerdInserter* _ptr + +# @staticmethod +# cdef Inserter _manage(SerdInserter* ptr): +# if ptr is NULL: +# return None + +# cdef Inserter wrapper = Inserter.__new__(Inserter) +# wrapper._ptr = ptr +# return wrapper + +# def __init__(self, model: Model, env: Env, default_graph: Node = None): +# self._ptr = serd_inserter_new( +# model._ptr, env._ptr, _unwrap_node(default_graph) +# ) + +# def __dealloc__(self): +# serd_inserter_free(self._ptr) +# self._ptr = NULL + +# def sink(self) -> SinkView: +# return SinkView._wrap(serd_inserter_get_sink(self._ptr)) + + +cdef class Statement: + """An RDF statement. + + .. py:function:: serd.serd.Statement(subject: serd.Node, predicate: serd.Node, object: serd.Node, graph: serd.Node = None, caret: serd.Caret = None) + + Construct a new statement. + """ + + cdef SerdStatement* _ptr + cdef Node _subject + cdef Node _predicate + cdef Node _object + cdef Node _graph + cdef Caret _caret + + @staticmethod + cdef Statement _manage(SerdStatement* ptr): + if ptr is NULL: + return None + + cdef Statement wrapper = Statement.__new__(Statement) + wrapper._ptr = ptr + return wrapper + + @staticmethod + cdef Statement _wrap(const SerdStatement* ptr): + if ptr is NULL: + return None + + cdef Statement wrapper = Statement.__new__(Statement) + wrapper._subject = Node._wrap(serd_statement_subject(ptr)) + wrapper._predicate = Node._wrap(serd_statement_predicate(ptr)) + wrapper._object = Node._wrap(serd_statement_object(ptr)) + wrapper._graph = Node._wrap(serd_statement_graph(ptr)) + wrapper._caret = Caret._wrap(serd_statement_caret(ptr)) + wrapper._ptr = serd_statement_new( + NULL, + _unwrap_node(wrapper._subject), + _unwrap_node(wrapper._predicate), + _unwrap_node(wrapper._object), + _unwrap_node(wrapper._graph), + (<Caret>wrapper._caret)._ptr if wrapper._caret is not None else NULL) + + return wrapper + + @staticmethod + def _from_param(obj): + if type(obj) == Statement: + return obj + + if type(obj) == tuple: + if len(obj) != 3 and len(obj) != 4: + raise ValueError("Bad number of statement fields") + + for i in range(len(obj)): + if type(obj[i]) != Node: + raise TypeError("Bad type for statement field %d" % i) + + g = obj[3] if len(obj) == 4 else None + return Statement(obj[0], obj[1], obj[2], g) + + raise TypeError("Bad argument type for Statement: %s" % type(obj)) + + def __init__( + self, + subject: Node, + predicate: Node, + object: Node, + graph: Node = None, + caret: Caret = None, + ): + self._subject = <Node>subject + self._predicate = <Node>predicate + self._object = <Node>object + self._graph = <Node>graph + self._caret = <Caret>caret + + self._ptr = serd_statement_new( + NULL, + _unwrap_node(self._subject), + _unwrap_node(self._predicate), + _unwrap_node(self._object), + _unwrap_node(self._graph), + (<Caret>self._caret)._ptr if self._caret is not None else NULL, + ) + + def __dealloc__(self): + serd_statement_free(NULL, self._ptr) + self._ptr = NULL + + def __getitem__(self, field): + if field < 0 or field > 3 or (field == 3 and self.graph() is None): + raise IndexError(field) + + return self.node(field) + + def __eq__(self, rhs): + return type(rhs) == Statement and serd_statement_equals( + self._ptr, (<Statement>rhs)._ptr + ) + + def __str__(self): + result = " ".join( + [ + self.subject().to_syntax(), + self.predicate().to_syntax(), + self.object().to_syntax(), + ] + ) + + if serd_statement_graph(self._ptr) is not NULL: + result += " " + self.graph().to_syntax() + + return result + + def __repr__(self): + args = [repr(self.subject()), + repr(self.predicate()), + repr(self.object())] + + if self.graph() is not None: + args += [repr(self.graph())] + + if serd_statement_caret(self._ptr): + args += [repr(self.caret())] + + return "serd.Statement({})".format(", ".join(args)) + + def matches(self, s: Node, p: Node, o: Node, g: Node = None): + """Return true iff this statement matches the given pattern. + + Nodes match if they are equivalent, or if one of them is NULL. The + statement matches if every node matches. + """ + return serd_statement_matches( + self._ptr, + s._ptr if s is not None else NULL, + p._ptr if p is not None else NULL, + o._ptr if o is not None else NULL, + g._ptr if g is not None else NULL, + ) + + def node(self, field: Field) -> Node: + """Return a node in this statement.""" + assert field >= Field.SUBJECT and field <= Field.GRAPH + return Node._wrap(serd_statement_node(self._ptr, field)) + + def subject(self) -> Node: + """Return the subject node of this statement.""" + return Node._wrap(serd_statement_subject(self._ptr)) + + def predicate(self) -> Node: + """Return the predicate node of this statement.""" + return Node._wrap(serd_statement_predicate(self._ptr)) + + def object(self) -> Node: + """Return the object node in this statement.""" + return Node._wrap(serd_statement_object(self._ptr)) + + def graph(self) -> Node: + """Return the graph node in this statement.""" + return Node._wrap(serd_statement_graph(self._ptr)) + + def caret(self) -> Caret: + """Return the file location this statement came from, or None.""" + return Caret._wrap(serd_statement_caret(self._ptr)) + + +cdef class Cursor: + """A range of statements in a model. + + This class is iterable so it can be used like a collection. For example, + :meth:`serd.Model.all()` returns a range, so all the statements in a model + can be printed like so:: + + for statement in model.all(): + print(statement) + + A range is "truthy" if it is non-empty. + """ + cdef SerdCursor* _ptr + + @staticmethod + cdef _manage(SerdCursor* ptr): + if ptr is NULL: + return None + + cdef Cursor wrapper = Cursor.__new__(Cursor) + wrapper._ptr = ptr + return wrapper + + @staticmethod + cdef _wrap(const SerdCursor* ptr): + return Cursor._manage(serd_cursor_copy(NULL, ptr)) + + def __init__(self, range: Cursor): + assert type(range) == Cursor + self._ptr = serd_cursor_copy(NULL, (<Cursor>range)._ptr) + + def __dealloc__(self): + serd_cursor_free(self._ptr) + self._ptr = NULL + + def __bool__(self): + return not self.empty() + + def __eq__(self, rhs): + return type(rhs) == Cursor and serd_cursor_equals(self._ptr, (<Cursor>rhs)._ptr) + + def __iter__(self): + return self + # if self.empty(): + # return Cursor._end() + + # return _Iter._wrap(serd_cursor_begin(self._ptr)) + + def __next__(self): + """Move to and return the next item.""" + + if serd_cursor_is_end(self._ptr): + raise StopIteration + + item = serd_cursor_get(self._ptr) + + # status = serd_cursor_advance(self._ptr) + # if status > 1: + # raise StopIteration + + # # if status != 0: + # # print("STOP ITERATION: {}".format(status)) + # # raise StopIteration + # # else: + # # print("GOOD ITERATION") + + # item = serd_cursor_get(self._ptr) + serd_cursor_advance(self._ptr) + + return Statement._wrap(item) + + # def front(self) -> Statement: + # """Return the first statement in this range, or None.""" + # return Statement._wrap(serd_cursor_front(self._ptr)) + + def empty(self) -> bool: + """Return true iff there are no statements in this range.""" + return serd_cursor_is_end(self._ptr) + + def write(self, sink: _SinkBase, flags: DescribeFlags) -> Status: + """Write this range to `sink`. + + The serialisation style can be controlled with `flags`. The default is + to write statements in an order suited for pretty-printing with Turtle + or TriG with as many objects written inline as possible. If + `DescribeFlags.NO_INLINE_OBJECTS` is given, a simple sorted stream is + written instead, which is significantly faster since no searching is + required, but can result in ugly output for Turtle or Trig. + """ + return Status(serd_describe_range(self._ptr, sink._cptr, flags)) + + +cdef class Caret: + """The origin of a statement in a document.""" + cdef SerdCaret* _ptr + cdef Node _name_node + + @staticmethod + cdef Caret _wrap(const SerdCaret* ptr): + if ptr is NULL: + return None + + name_node = Node._wrap(serd_caret_name(ptr)) + + cdef SerdCaret* copy = serd_caret_new(NULL, + name_node._ptr, + serd_caret_line(ptr), + serd_caret_column(ptr)); + + cdef Caret wrapper = Caret.__new__(Caret) + wrapper._ptr = copy + wrapper._name_node = name_node + return wrapper + + def __init__(self, name, line: uint = 1, col: uint = 0): + if type(name) == Node: + self._name_node = name + self._ptr = serd_caret_new(NULL, self._name_node._ptr, line, col) + elif type(name) == str: + self._name_node = string(name) + self._ptr = serd_caret_new(NULL, self._name_node._ptr, line, col) + else: + raise TypeError("Bad name argument type for Caret(): %s" % type(name)) + + def __dealloc__(self): + serd_caret_free(NULL, self._ptr) + self._ptr = NULL + self._name_node = None + + def __eq__(self, rhs): + return (type(rhs) == Caret and + serd_caret_equals(self._ptr, (<Caret>rhs)._ptr)) + + def __str__(self): + return "{}:{}:{}".format(self._name_node, self.line(), self.column()) + + def __repr__(self): + return "serd.Caret({})".format( + ", ".join([repr(self._name_node), str(self.line()), str(self.column())])) + + def name(self) -> Node: + """Return the document name. + + This is typically a file URI, but may be a descriptive string node for + statements that originate from strings or streams. + """ + + return self._name_node + + def line(self) -> int: + """Return the one-relative line number in the document.""" + return serd_caret_line(self._ptr) + + def column(self) -> int: + """Return the zero-relative column number in the line.""" + return serd_caret_column(self._ptr) + + +cdef class Event: + """An event in a data stream. + + Streams of data are represented as a series of events. Events represent + everything that can occur in an RDF document, and are used to plumb + together different components. For example, when parsing a document, a + reader emits a stream of events which can be sent to a writer to rewrite + the document, or to an inserter to build a model in memory. + """ + + cdef SerdEvent* _ptr + cdef SerdEventType _type + cdef Node _name + cdef Node _uri + cdef SerdStatementFlags _flags + cdef Statement _statement + cdef Node _node + + @staticmethod + cdef Event _wrap(const SerdEvent* ptr): + event = Event() + event._type = ptr.type + + if event._type == EventType.BASE: + base_event = <const SerdBaseEvent*>ptr + event._uri = Node._wrap(base_event.uri) + elif event._type == EventType.PREFIX: + prefix_event = <const SerdPrefixEvent*>ptr + event._name = Node._wrap(prefix_event.name) + event._uri = Node._wrap(prefix_event.uri) + elif event._type == EventType.STATEMENT: + statement_event = <const SerdStatementEvent*>ptr + event._flags = StatementFlags(statement_event.flags) + event._statement = Statement._wrap(statement_event.statement) + elif event._type == EventType.END: + end_event = <const SerdEndEvent*>ptr + event._node = Node._wrap(end_event.node) + else: + return None + + return event + + @staticmethod + def base(base_uri): + """Return an event that sets the base URI.""" + event = Event() + event._type = EventType.BASE + event._uri = _uri_from_param(base_uri) + return event + + @staticmethod + def prefix(name, namespace_uri): + """Return an event that sets a namespace prefix.""" + event = Event() + event._type = EventType.PREFIX + event._name = string(name) + event._uri = _uri_from_param(namespace_uri) + return event + + @staticmethod + def statement(statement, flags: StatementFlags = StatementFlags(0)): + """Return an event that represents a statement.""" + assert type(statement) == Statement + + event = Event() + event._type = EventType.STATEMENT + event._statement = Statement._from_param(statement) + event._flags = StatementFlags(flags) + return event + + @staticmethod + def end(node): + """Return an event that ends an anonymous node description.""" + event = Event() + event._type = EventType.END + event._node = _blank_from_param(node) + return event + + def __eq__(self, rhs): + if type(rhs) != Event: + return False + + other = <Event>rhs + if self._type != other._type: + return False + elif self._type == EventType.BASE: + return self._uri == other._uri + elif self._type == EventType.PREFIX: + return (self._name, self._uri) == (other._name, other._uri) + elif self._type == EventType.STATEMENT: + return (self._statement, self._flags) == (other._statement, other._flags) + elif self._type == EventType.END: + return self._node == other._node + + return False + + def __repr__(self): + def flags_repr(flags): + active = [] + for f in [StatementFlags.EMPTY_S, + StatementFlags.ANON_S, + StatementFlags.ANON_O, + StatementFlags.LIST_S, + StatementFlags.LIST_O, + StatementFlags.TERSE_S, + StatementFlags.TERSE_O]: + if flags & f: + active += ['serd.' + str(f)] + + return ' | '.join(active) + + if self._type == EventType.BASE: + return 'serd.Event.base("%s")' % self._uri + elif self._type == EventType.PREFIX: + return 'serd.Event.prefix("%s", "%s")' % (self._name, self._uri) + elif self._type == EventType.STATEMENT: + result = 'serd.Event.statement(%s' % repr(self._statement) + if self._flags: + result += ', %s' % flags_repr(self._flags) + + return result + ')' + elif self._type == EventType.END: + return 'serd.Event.end(%s)' % repr(self._node) + + return "None" + + +cdef class _SinkBase: + cdef const SerdSink* _cptr + + +cdef class SinkView(_SinkBase): + @staticmethod + cdef SinkView _wrap(const SerdSink* cptr): + if cptr is NULL: + return None + + cdef SinkView wrapper = SinkView.__new__(SinkView) + wrapper._cptr = cptr + return wrapper + + +cdef class Sink(_SinkBase): + cdef SerdSink* _ptr + cdef Env _env + cdef object _func + + @staticmethod + cdef Sink _manage(SerdSink* ptr): + if ptr is NULL: + return None + + cdef Sink wrapper = Sink.__new__(Sink) + wrapper._cptr = ptr + wrapper._ptr = ptr + return wrapper + + def __init__(self: Sink, world: World, func: callable = None): + if func is not None: + self._env = Env(world) + self._func = func + self._ptr = serd_sink_new(world._ptr, <void*>self, Sink._c_on_event, NULL) + self._cptr = self._ptr + # TODO: get_env? + else: + self._env = Env(world) + self._func = None + self._ptr = serd_sink_new(world._ptr, <void*>self, Sink._c_on_event, NULL) + self._cptr = self._ptr + # TODO: get_env? + + def __dealloc__(self): + serd_sink_free(self._ptr) + self._ptr = NULL + self._cptr = NULL + + def on_event(self, event: Event) -> Status: + return Status.SUCCESS + + def __call__(self, event: Event) -> Status: + return self._func(event) if self._func is not None else Status.SUCCESS + + @staticmethod + cdef SerdStatus _c_on_event(void* handle, const SerdEvent* event): + self = <Sink>handle + result = self.__call__(Event._wrap(event)) + assert result is None or type(result) == Status + return result if result is not None else Status.SUCCESS diff --git a/bindings/python/test_serd.py b/bindings/python/test_serd.py new file mode 100644 index 00000000..33f90e02 --- /dev/null +++ b/bindings/python/test_serd.py @@ -0,0 +1,1036 @@ +# Copyright 2020 David Robillard <d@drobilla.net> +# +# Permission to use, copy, modify, and/or distribute this software for any +# purpose with or without fee is hereby granted, provided that the above +# copyright notice and this permission notice appear in all copies. +# +# THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +import base64 +import math +import os +import serd +import shutil +import tempfile +import textwrap +import unittest +import itertools + + +class NamespaceTests(unittest.TestCase): + def testConstruction(self): + with self.assertRaises(TypeError): + serd.Namespace() + + with self.assertRaises(TypeError): + serd.Namespace(4) + + with self.assertRaises(TypeError): + serd.Namespace(None) + + ns1 = serd.Namespace(serd.uri("http://example.org")) + ns2 = serd.Namespace("http://example.org") + + self.assertEqual(ns1, ns2) + + def testComparison(self): + ns1 = serd.Namespace("http://example.org/a#") + ns2 = serd.Namespace("http://example.org/b#") + + self.assertNotEqual(ns1, ns2) + + self.assertEqual(ns1, serd.Namespace("http://example.org/a#")) + self.assertEqual(ns1, serd.uri("http://example.org/a#")) + self.assertEqual(ns1, "http://example.org/a#") + + self.assertNotEqual(ns1, serd.uri("http://drobilla.net/a#")) + self.assertNotEqual(ns1, "http://drobilla.net/a#") + + def testAdd(self): + ns = serd.Namespace("http://example.org/") + + self.assertEqual(ns + "foo", "http://example.org/foo") + self.assertEqual(ns.foo, "http://example.org/foo") + self.assertEqual(ns["foo"], "http://example.org/foo") + + def testName(self): + ns = serd.Namespace("http://example.org/") + + self.assertEqual(ns.name("http://example.org/foo"), "foo") + self.assertEqual(ns.name(serd.uri("http://example.org/foo")), "foo") + + self.assertIsNone(ns.name("http://drobilla.net/foo")) + + +class StringTests(unittest.TestCase): + def testStrerror(self): + self.assertEqual(serd.strerror(serd.Status.SUCCESS), "Success") + self.assertEqual(serd.strerror(99999), "Unknown error") + + with self.assertRaises(OverflowError): + serd.strerror(-1) + + self.assertEqual( + serd.strerror(serd.Status.BAD_WRITE), "Error writing to file" + ) + + +# class Base64Tests(unittest.TestCase): +# def testShortBase64(self): +# data = "foobar".encode("utf-8") +# encoded = "Zm9vYmFy" + +# self.assertEqual(serd.base64_encode(data), encoded) +# self.assertEqual(serd.base64_encode(data, True), encoded) +# self.assertEqual(serd.base64_decode(encoded), data) + +# def testLongBase64(self): +# data = ("foobar" * 20).encode("utf-8") +# oneline_encoded = "Zm9vYmFy" * 20 +# multiline_encoded = "\n".join(textwrap.wrap("Zm9vYmFy" * 20, width=76)) + +# self.assertEqual(serd.base64_encode(data), oneline_encoded) +# self.assertEqual(serd.base64_encode(data, True), multiline_encoded) +# self.assertEqual(serd.base64_decode(oneline_encoded), data) +# self.assertEqual(serd.base64_decode(multiline_encoded), data) + + +class SyntaxTests(unittest.TestCase): + def testSyntaxByName(self): + self.assertEqual(serd.syntax_by_name("TuRtLe"), serd.Syntax.TURTLE) + self.assertEqual(serd.syntax_by_name("wat"), serd.Syntax.EMPTY) + + def testGuessSyntax(self): + self.assertEqual(serd.guess_syntax("foo.nq"), serd.Syntax.NQUADS) + self.assertEqual(serd.guess_syntax("foo.txt"), serd.Syntax.EMPTY) + + def testSyntaxHasGraphs(self): + self.assertFalse(serd.syntax_has_graphs(serd.Syntax.EMPTY)) + self.assertFalse(serd.syntax_has_graphs(serd.Syntax.TURTLE)) + self.assertFalse(serd.syntax_has_graphs(serd.Syntax.NTRIPLES)) + self.assertTrue(serd.syntax_has_graphs(serd.Syntax.NQUADS)) + self.assertTrue(serd.syntax_has_graphs(serd.Syntax.TRIG)) + + +class WorldTests(unittest.TestCase): + def setUp(self): + self.world = serd.World() + + def testGetBlank(self): + self.assertEqual(self.world.get_blank(), serd.blank("b1")) + self.assertEqual(self.world.get_blank(), serd.blank("b2")) + + +class NodeTests(unittest.TestCase): + def _throughSyntax(self, n): + return serd.Node.from_syntax(n.to_syntax()) + + def setUp(self): + self.world = serd.World() + + def testConstruction(self): + self.assertEqual(serd.Node("hello"), serd.plain_literal("hello")) + self.assertEqual(serd.Node(False), serd.boolean(False)) + self.assertEqual(serd.Node(True), serd.boolean(True)) + self.assertEqual(serd.Node(42), serd.integer(42)) + self.assertEqual(serd.Node(42.34), serd.double(42.34)) + self.assertEqual(str(serd.Node(-9223372036854775809)), "-9223372036854775809") + self.assertEqual(str(serd.Node(9223372036854775808)), "9223372036854775808") + + def testString(self): + n = serd.string("hello") + self.assertEqual(n.type(), serd.NodeType.LITERAL) + self.assertEqual(n, "hello") + self.assertEqual(len(n), 5) + self.assertEqual(repr(n), 'serd.string("hello")') + self.assertEqual(n, eval(repr(n))) + self.assertEqual(n, self._throughSyntax(n)) + self.assertIsNone(n.datatype()) + self.assertIsNone(n.language()) + + def testPlainLiteral(self): + n = serd.plain_literal("hallo", "de") + self.assertEqual(n.type(), serd.NodeType.LITERAL) + self.assertEqual(n, "hallo") + self.assertEqual(len(n), 5) + self.assertEqual(repr(n), 'serd.plain_literal("hallo", "de")') + self.assertEqual(n, eval(repr(n))) + self.assertEqual(n, self._throughSyntax(n)) + self.assertIsNone(n.datatype()) + self.assertEqual(n.language(), serd.string("de")) + + def testTypedLiteral(self): + datatype = serd.uri("http://example.org/ns#Hex") + n = serd.typed_literal("ABCD", datatype) + self.assertEqual(n.type(), serd.NodeType.LITERAL) + self.assertEqual(n, "ABCD") + self.assertEqual(len(n), 4) + self.assertEqual( + repr(n), 'serd.typed_literal("ABCD", "http://example.org/ns#Hex")' + ) + self.assertEqual(n, eval(repr(n))) + self.assertEqual(n, self._throughSyntax(n)) + self.assertEqual(n.datatype(), datatype) + self.assertIsNone(n.language()) + + def testBlank(self): + n = serd.blank("b0") + self.assertEqual(n.type(), serd.NodeType.BLANK) + self.assertEqual(n, "b0") + self.assertEqual(len(n), 2) + self.assertEqual(repr(n), 'serd.blank("b0")') + self.assertEqual(n, eval(repr(n))) + self.assertEqual(n, self._throughSyntax(n)) + self.assertIsNone(n.datatype()) + self.assertIsNone(n.language()) + + def testUri(self): + n = serd.uri("http://example.org/") + self.assertEqual(n.type(), serd.NodeType.URI) + self.assertEqual(n, "http://example.org/") + self.assertEqual(len(n), 19) + self.assertEqual(repr(n), 'serd.uri("http://example.org/")') + self.assertEqual(n, eval(repr(n))) + self.assertEqual(n, self._throughSyntax(n)) + self.assertIsNone(n.datatype()) + self.assertIsNone(n.language()) + + def testRelativeUri(self): + n = serd.uri("rel/uri") + self.assertEqual(n.type(), serd.NodeType.URI) + self.assertEqual(n, "rel/uri") + self.assertEqual(len(n), 7) + self.assertEqual(repr(n), 'serd.uri("rel/uri")') + self.assertEqual(n, eval(repr(n))) + self.assertEqual(n, self._throughSyntax(n)) + self.assertIsNone(n.datatype()) + self.assertIsNone(n.language()) + + # def testResolvedUri(self): + # base = serd.uri("http://example.org/") + # n = serd.resolved_uri("name", base) + # self.assertEqual(n.type(), serd.NodeType.URI) + # self.assertEqual(n, "http://example.org/name") + # self.assertEqual(len(n), 23) + # self.assertEqual(repr(n), 'serd.uri("http://example.org/name")') + # self.assertEqual(n, eval(repr(n))) + # self.assertEqual(n, self._throughSyntax(n)) + # self.assertIsNone(n.datatype()) + # self.assertIsNone(n.language()) + + def testLocalFileUri(self): + n = serd.file_uri("/foo/bar") + self.assertEqual(n.type(), serd.NodeType.URI) + self.assertEqual(n, "file:///foo/bar") + self.assertEqual(len(n), 15) + self.assertEqual(repr(n), 'serd.uri("file:///foo/bar")') + self.assertEqual(n, eval(repr(n))) + self.assertEqual(n, self._throughSyntax(n)) + self.assertIsNone(n.datatype()) + self.assertIsNone(n.language()) + + def testFileUriWithHostname(self): + n = serd.file_uri("/foo/bar", "host") + self.assertEqual(n.type(), serd.NodeType.URI) + self.assertEqual(n, "file://host/foo/bar") + print(n) + self.assertEqual(len(n), 19) + self.assertEqual(repr(n), 'serd.uri("file://host/foo/bar")') + self.assertEqual(n, eval(repr(n))) + self.assertEqual(n, self._throughSyntax(n)) + self.assertIsNone(n.datatype()) + self.assertIsNone(n.language()) + + def testDecimal(self): + xsd_decimal = "http://www.w3.org/2001/XMLSchema#decimal" + + n = serd.decimal(12.34) + self.assertEqual(n.type(), serd.NodeType.LITERAL) + self.assertEqual(n, "12.34") + self.assertEqual(len(n), 5) + self.assertEqual( + repr(n), + 'serd.typed_literal("12.34", "http://www.w3.org/2001/XMLSchema#decimal")', + ) + self.assertEqual(n, eval(repr(n))) + self.assertEqual(n, self._throughSyntax(n)) + self.assertEqual(n.datatype(), serd.uri(xsd_decimal)) + self.assertIsNone(n.language()) + + n = serd.decimal(12.34) + self.assertEqual(n.type(), serd.NodeType.LITERAL) + self.assertEqual(n, "12.34") + self.assertEqual(len(n), 5) + self.assertEqual( + repr(n), 'serd.typed_literal("12.34", "{}")'.format(xsd_decimal) + ) + self.assertEqual(n, eval(repr(n))) + self.assertEqual(n, self._throughSyntax(n)) + self.assertEqual(n.datatype(), serd.uri(xsd_decimal)) + self.assertIsNone(n.language()) + + def testDouble(self): + xsd_double = "http://www.w3.org/2001/XMLSchema#double" + n = serd.double(12.34) + self.assertEqual(n.type(), serd.NodeType.LITERAL) + self.assertEqual(n, "1.234E1") + self.assertEqual(len(n), 7) + self.assertEqual( + repr(n), 'serd.typed_literal("1.234E1", "{}")'.format(xsd_double) + ) + self.assertEqual(n, eval(repr(n))) + self.assertEqual(n, self._throughSyntax(n)) + self.assertEqual(n.datatype(), serd.uri(xsd_double)) + self.assertIsNone(n.language()) + + def testInteger(self): + xsd_integer = "http://www.w3.org/2001/XMLSchema#integer" + n = serd.integer(42) + self.assertEqual(n.type(), serd.NodeType.LITERAL) + self.assertEqual(n, "42") + self.assertEqual(len(n), 2) + self.assertEqual( + repr(n), 'serd.typed_literal("42", "{}")'.format(xsd_integer) + ) + self.assertEqual(n, eval(repr(n))) + self.assertEqual(n, self._throughSyntax(n)) + self.assertEqual(n.datatype(), serd.uri(xsd_integer)) + self.assertIsNone(n.language()) + + def testBoolean(self): + xsd_boolean = "http://www.w3.org/2001/XMLSchema#boolean" + t = serd.boolean(True) + self.assertEqual(t.type(), serd.NodeType.LITERAL) + self.assertEqual(t, "true") + self.assertEqual(len(t), 4) + self.assertEqual(repr(t), "serd.boolean(True)") + self.assertEqual(t, eval(repr(t))) + self.assertEqual(t, self._throughSyntax(t)) + self.assertEqual(t.datatype(), serd.uri(xsd_boolean)) + self.assertIsNone(t.language()) + + f = serd.boolean(False) + self.assertEqual(f.type(), serd.NodeType.LITERAL) + self.assertEqual(f, "false") + self.assertEqual(len(f), 5) + self.assertEqual(repr(f), "serd.boolean(False)") + self.assertEqual(f, eval(repr(f))) + self.assertEqual(f, self._throughSyntax(f)) + self.assertEqual(f.datatype(), serd.uri(xsd_boolean)) + self.assertIsNone(f.language()) + + def testBlob(self): + xsd_base64Binary = "http://www.w3.org/2001/XMLSchema#base64Binary" + n = serd.base64(b"DEAD") + n_bytes = base64.b64encode(b"DEAD") + self.assertEqual(n.type(), serd.NodeType.LITERAL) + self.assertEqual(bytes(str(n), "utf-8"), n_bytes) + self.assertEqual(len(n), 8) + self.assertEqual( + repr(n), + 'serd.typed_literal("{}", "{}")'.format( + n_bytes.decode("utf-8"), xsd_base64Binary + ), + ) + self.assertEqual(n, eval(repr(n))) + self.assertEqual(n, self._throughSyntax(n)) + self.assertEqual(n.datatype(), serd.uri(xsd_base64Binary)) + self.assertIsNone(n.language()) + + def testVariable(self): + n = serd.variable("foo") + self.assertEqual(n.type(), serd.NodeType.VARIABLE) + self.assertEqual(n, "foo") + self.assertEqual(len(n), 3) + self.assertEqual(repr(n), 'serd.variable("foo")') + self.assertEqual(n, eval(repr(n))) + # self.assertEqual(n, self._throughSyntax(n)) + self.assertIsNone(n.datatype()) + self.assertIsNone(n.language()) + + def testComparison(self): + a = serd.string("Aardvark") + b = serd.string("Banana") + + self.assertEqual(a, a) + self.assertNotEqual(a, b) + self.assertLess(a, b) + self.assertLessEqual(a, b) + self.assertLessEqual(a, a) + self.assertGreater(b, a) + self.assertGreaterEqual(b, a) + self.assertGreaterEqual(b, b) + + def testHash(self): + nodes = [ + serd.plain_literal("hello"), + serd.plain_literal("hello", "en"), + serd.typed_literal("hello", "http://example.org/hex"), + serd.blank("hello"), + serd.uri("http://example.org"), + ] + + # Check that all node types have a distinct hash + for lhs, rhs in itertools.combinations(nodes, r=2): + self.assertNotEqual(hash(lhs), hash(rhs)) + + # Check that nodes work in a set + self.assertEqual(len(set(nodes)), len(nodes)) + + +class EnvTests(unittest.TestCase): + def setUp(self): + self.world = serd.World() + + def testEquality(self): + uri = serd.uri("http://example.org/") + env1 = serd.Env(self.world) + env2 = serd.Env(self.world) + self.assertEqual(env1, env2) + + env2.set_base_uri(uri) + self.assertNotEqual(env1, env2) + + env2.set_base_uri(None) + self.assertEqual(env1, env2) + + env2.set_prefix("eg", uri) + self.assertNotEqual(env1, env2) + + env1.set_prefix(serd.string("eg"), uri) + self.assertEqual(env1, env2) + + def testBaseUri(self): + env = serd.Env(self.world) + self.assertIsNone(env.base_uri()) + + base = serd.uri("http://example.org/") + env.set_base_uri(base) + self.assertEqual(env.base_uri(), base) + + def testInitialBaseUri(self): + base = serd.uri("http://example.org/") + env = serd.Env(self.world, base) + self.assertEqual(env.base_uri(), base) + + def testExpand(self): + env = serd.Env(self.world) + rel = serd.uri("rel") + + self.assertIsNone(env.expand(rel)) + + env.set_base_uri("http://example.org/") + + self.assertEqual(env.expand(rel), serd.uri("http://example.org/rel")) + + +class ModelTests(unittest.TestCase): + def setUp(self): + self.world = serd.World() + self.s = serd.uri("http://example.org/s") + self.p = serd.uri("http://example.org/p") + self.o = serd.uri("http://example.org/o") + self.o1 = serd.uri("http://example.org/o1") + self.o2 = serd.uri("http://example.org/o2") + self.g = serd.uri("http://example.org/g") + self.x = serd.uri("http://example.org/x") + + def testConstruction(self): + model = serd.Model( + self.world, serd.StatementOrder.SPO, serd.ModelFlags.STORE_GRAPHS + ) + + self.assertEqual(model.flags(), serd.ModelFlags.STORE_GRAPHS) + self.assertEqual(model.world(), self.world) + + def testInsertErase(self): + model = serd.Model(self.world) + + model.insert((self.s, self.p, self.o)) + self.assertEqual(len(model), 1) + model.erase(iter(model)) + self.assertEqual(len(model), 0) + + statement = serd.Statement(self.s, self.p, self.o) + model += statement + self.assertEqual(len(model), 1) + del model[statement] + self.assertEqual(len(model), 0) + + def testSize(self): + model = serd.Model(self.world) + self.assertEqual(model.size(), 0) + self.assertEqual(len(model), 0) + self.assertTrue(model.empty()) + + model.insert((self.s, self.p, self.o)) + self.assertEqual(model.size(), 1) + self.assertEqual(len(model), 1) + self.assertFalse(model.empty()) + + model.erase(iter(model)) + self.assertEqual(model.size(), 0) + self.assertEqual(len(model), 0) + self.assertTrue(model.empty()) + + # def testBeginEnd(self): + # s, p, o, g = self.s, self.p, self.o, self.g + # model = serd.Model(self.world) + + # self.assertEqual(model.begin(), model.end()) + + # model.insert((s, p, o, g)) + # self.assertNotEqual(model.begin(), model.end()) + + # def testFind(self): + # s, p, o, g, x = self.s, self.p, self.o, self.g, self.x + # flags = serd.ModelFlags.INDEX_SPO | serd.ModelFlags.INDEX_GRAPHS + # model = serd.Model(self.world, flags) + # in_statement = serd.Statement(s, p, o, g) + # out_statement = serd.Statement(x, p, o, g) + + # model += in_statement + # self.assertEqual(model.find(out_statement), model.end()) + # self.assertNotEqual(model.find(in_statement), model.end()) + + def testGet(self): + s, p, o, g = self.s, self.p, self.o, self.g + model = serd.Model(self.world, flags=serd.ModelFlags.STORE_GRAPHS) + + model.insert((s, p, o, g)) + self.assertEqual(model.get(None, p, o, g), s) + self.assertEqual(model.get(s, None, o, g), p) + self.assertEqual(model.get(s, p, None, g), o) + self.assertEqual(model.get(s, p, o, None), g) + + def testAsk(self): + s, p, o, g, x = self.s, self.p, self.o, self.g, self.x + model = serd.Model(self.world, flags=serd.ModelFlags.STORE_GRAPHS) + model.insert((s, p, o, g)) + + self.assertTrue(model.ask(s, p, o, g)) + self.assertIn(serd.Statement(s, p, o, g), model) + self.assertIn((s, p, o, g), model) + + self.assertFalse(model.ask(x, p, o, g)) + self.assertNotIn(serd.Statement(x, p, o, g), model) + self.assertNotIn((x, p, o, g), model) + + self.assertTrue(model.ask(None, p, o, g)) + self.assertTrue(model.ask(s, None, o, g)) + self.assertTrue(model.ask(s, p, None, g)) + self.assertTrue(model.ask(s, p, o, None)) + + self.assertFalse(model.ask(None, x, o, g)) + self.assertFalse(model.ask(s, None, x, g)) + self.assertFalse(model.ask(s, p, None, x)) + self.assertFalse(model.ask(x, p, o, None)) + + def testCount(self): + s, p, o1, o2, g, x = self.s, self.p, self.o1, self.o2, self.g, self.x + model = serd.Model(self.world, flags=serd.ModelFlags.STORE_GRAPHS) + model.insert((s, p, o1, g)) + model.insert((s, p, o2, g)) + + self.assertEqual(model.count(s, p, o1, g), 1) + self.assertEqual(model.count(s, p, None, g), 2) + self.assertEqual(model.count(s, p, x, g), 0) + + +class StatementTests(unittest.TestCase): + def setUp(self): + self.s = serd.uri("http://example.org/s") + self.p = serd.uri("http://example.org/p") + self.o = serd.uri("http://example.org/o") + self.g = serd.uri("http://example.org/g") + self.caret = serd.Caret("foo.ttl", 1, 0) + + def testGet(self): + s, p, o, g = self.s, self.p, self.o, self.g + statement = serd.Statement(s, p, o, g, self.caret) + + self.assertEqual(statement[serd.Field.SUBJECT], s) + self.assertEqual(statement[serd.Field.PREDICATE], p) + self.assertEqual(statement[serd.Field.OBJECT], o) + self.assertEqual(statement[serd.Field.GRAPH], g) + + self.assertEqual(statement[0], s) + self.assertEqual(statement[1], p) + self.assertEqual(statement[2], o) + self.assertEqual(statement[3], g) + + with self.assertRaises(IndexError): + nothing = statement[-1] + + with self.assertRaises(IndexError): + nothing = statement[4] + + def testAllFields(self): + s, p, o, g = self.s, self.p, self.o, self.g + statement = serd.Statement(s, p, o, g, self.caret) + + self.assertEqual(statement.node(serd.Field.SUBJECT), s) + self.assertEqual(statement.node(serd.Field.PREDICATE), p) + self.assertEqual(statement.node(serd.Field.OBJECT), o) + self.assertEqual(statement.node(serd.Field.GRAPH), g) + + self.assertEqual(statement.subject(), s) + self.assertEqual(statement.predicate(), p) + self.assertEqual(statement.object(), o) + self.assertEqual(statement.graph(), g) + + self.assertEqual(statement.caret(), self.caret) + + def testNoGraph(self): + s, p, o = self.s, self.p, self.o + statement = serd.Statement(s, p, o, None, self.caret) + + self.assertEqual(statement.node(serd.Field.SUBJECT), s) + self.assertEqual(statement.node(serd.Field.PREDICATE), p) + self.assertEqual(statement.node(serd.Field.OBJECT), o) + self.assertIsNone(statement.node(serd.Field.GRAPH)) + + self.assertEqual(statement.subject(), s) + self.assertEqual(statement.predicate(), p) + self.assertEqual(statement.object(), o) + self.assertIsNone(statement.graph()) + + self.assertEqual(statement.caret(), self.caret) + + def testNoCaret(self): + s, p, o, g = self.s, self.p, self.o, self.g + statement = serd.Statement(s, p, o, g) + + self.assertEqual(statement.node(serd.Field.SUBJECT), s) + self.assertEqual(statement.node(serd.Field.PREDICATE), p) + self.assertEqual(statement.node(serd.Field.OBJECT), o) + self.assertEqual(statement.node(serd.Field.GRAPH), g) + + self.assertEqual(statement.subject(), s) + self.assertEqual(statement.predicate(), p) + self.assertEqual(statement.object(), o) + self.assertEqual(statement.graph(), g) + + self.assertIsNone(statement.caret()) + + def testNoGraphOrCaret(self): + s, p, o = self.s, self.p, self.o + statement = serd.Statement(s, p, o) + + self.assertEqual(statement.node(serd.Field.SUBJECT), s) + self.assertEqual(statement.node(serd.Field.PREDICATE), p) + self.assertEqual(statement.node(serd.Field.OBJECT), o) + self.assertIsNone(statement.node(serd.Field.GRAPH)) + + self.assertEqual(statement.subject(), s) + self.assertEqual(statement.predicate(), p) + self.assertEqual(statement.object(), o) + self.assertIsNone(statement.graph()) + + self.assertIsNone(statement.caret()) + + def testComparison(self): + s, p, o, g = self.s, self.p, self.o, self.g + statement1 = serd.Statement(s, p, o, g) + statement2 = serd.Statement(o, p, s, g) + + self.assertEqual(statement1, statement1) + self.assertNotEqual(statement1, statement2) + + def testMatches(self): + s, p, o, g = self.s, self.p, self.o, self.g + x = serd.uri("http://example.org/x") + statement = serd.Statement(s, p, o, g) + + self.assertTrue(statement.matches(s, p, o, g)) + self.assertTrue(statement.matches(None, p, o, g)) + self.assertTrue(statement.matches(s, None, o, g)) + self.assertTrue(statement.matches(s, p, None, g)) + self.assertTrue(statement.matches(s, p, o, None)) + + self.assertFalse(statement.matches(x, p, o, g)) + self.assertFalse(statement.matches(s, x, o, g)) + self.assertFalse(statement.matches(s, p, x, g)) + self.assertFalse(statement.matches(s, p, o, x)) + + def testIteration(self): + triple = serd.Statement(self.s, self.p, self.o) + quad = serd.Statement(self.s, self.p, self.o, self.g) + + self.assertEqual([n for n in triple], [self.s, self.p, self.o]) + self.assertEqual([n for n in quad], [self.s, self.p, self.o, self.g]) + + def testStr(self): + self.assertEqual( + str(serd.Statement(self.s, self.p, self.o)), + "<http://example.org/s> <http://example.org/p> <http://example.org/o>", + ) + + self.assertEqual( + str(serd.Statement(self.s, self.p, self.o, self.g)), + "<http://example.org/s> <http://example.org/p> <http://example.org/o> <http://example.org/g>", + ) + + def testRepr(self): + self.assertEqual( + repr(serd.Statement(self.s, self.p, self.o)), + 'serd.Statement(serd.uri("http://example.org/s"), serd.uri("http://example.org/p"), serd.uri("http://example.org/o"))', + ) + + self.assertEqual( + repr(serd.Statement(self.s, self.p, self.o, self.g)), + 'serd.Statement(serd.uri("http://example.org/s"), serd.uri("http://example.org/p"), serd.uri("http://example.org/o"), serd.uri("http://example.org/g"))', + ) + + +class RangeTests(unittest.TestCase): + def setUp(self): + self.world = serd.World() + self.s = serd.uri("http://example.org/s") + self.p = serd.uri("http://example.org/p") + self.p1 = serd.uri("http://example.org/p1") + self.p2 = serd.uri("http://example.org/p2") + self.o1 = serd.uri("http://example.org/o1") + self.o2 = serd.uri("http://example.org/o2") + self.g = serd.uri("http://example.org/g") + + # def testFront(self): + # model = serd.Model(self.world) + + # model.insert((self.s, self.p, self.o1)) + # self.assertEqual( + # model.all().front(), serd.Statement(self.s, self.p, self.o1) + # ) + + # def testEmpty(self): + # model = serd.Model(self.world) + + # self.assertTrue(model.all().empty()) + # self.assertFalse(model.all()) + + # model.insert((self.s, self.p, self.o1)) + # self.assertFalse(model.all().empty()) + # self.assertTrue(model.all()) + + # def testIteration(self): + # model = serd.Model(self.world) + + # model.insert((self.s, self.p, self.o1)) + # model.insert((self.s, self.p, self.o2)) + + # i = iter(model.all()) + # self.assertEqual(next(i), serd.Statement(self.s, self.p, self.o1)) + # self.assertEqual(next(i), serd.Statement(self.s, self.p, self.o2)) + # with self.assertRaises(StopIteration): + # next(i) + + def testEmptyIteration(self): + model = serd.Model(self.world) + count = 0 + + for s in model: + count += 1 + + self.assertEqual(count, 0) + + # for s in model.all(): + # count += 1 + + # self.assertEqual(count, 0) + + def testInsertErase(self): + model1 = serd.Model(self.world) + model2 = serd.Model(self.world) + + model1.insert((self.s, self.p1, self.o1)) + model1.insert((self.s, self.p1, self.o2)) + model1.insert((self.s, self.p2, self.o1)) + model1.insert((self.s, self.p2, self.o2)) + + model2.insert_statements(model1.find(self.s, self.p1, None)) + + self.assertEqual( + [s for s in model2], + [ + serd.Statement(self.s, self.p1, self.o1), + serd.Statement(self.s, self.p1, self.o2), + ], + ) + + model1.erase_statements(model1.find(self.s, self.p2, None)) + self.assertEqual(model1, model2) + + +class CaretTests(unittest.TestCase): + def testStringConstruction(self): + cur = serd.Caret("foo.ttl", 3, 4) + self.assertEqual(cur.name(), "foo.ttl") + self.assertEqual(cur.line(), 3) + self.assertEqual(cur.column(), 4) + + def testNodeConstruction(self): + name = serd.string("foo.ttl") + cur = serd.Caret(name, 5, 6) + self.assertEqual(cur.name(), name) + self.assertEqual(cur.line(), 5) + self.assertEqual(cur.column(), 6) + + def testComparison(self): + self.assertEqual( + serd.Caret("foo.ttl", 1, 2), serd.Caret("foo.ttl", 1, 2) + ) + self.assertNotEqual( + serd.Caret("foo.ttl", 9, 2), serd.Caret("foo.ttl", 1, 2) + ) + self.assertNotEqual( + serd.Caret("foo.ttl", 1, 9), serd.Caret("foo.ttl", 1, 2) + ) + self.assertNotEqual( + serd.Caret("bar.ttl", 1, 2), serd.Caret("foo.ttl", 1, 2) + ) + + +class EventTests(unittest.TestCase): + def testRepr(self): + base = serd.uri("http://example.org/base") + ns = serd.uri("http://example.org/ns") + + self.assertEqual( + repr(serd.Event.base(base)), + 'serd.Event.base("http://example.org/base")', + ) + + self.assertEqual( + repr(serd.Event.prefix("eg", ns)), + 'serd.Event.prefix("eg", "http://example.org/ns")', + ) + + s = serd.blank("s") + p = serd.uri("http://example.org/p") + o = serd.uri("http://example.org/o") + g = serd.uri("http://example.org/g") + statement = serd.Statement(s, p, o, g) + + self.assertEqual( + repr(serd.Event.statement(statement)), + 'serd.Event.statement(serd.Statement(serd.blank("s"), serd.uri("http://example.org/p"), serd.uri("http://example.org/o"), serd.uri("http://example.org/g")))', + ) + + self.assertEqual( + repr( + serd.Event.statement( + statement, + serd.StatementFlags.EMPTY_S | serd.StatementFlags.ANON_O, + ) + ), + 'serd.Event.statement(serd.Statement(serd.blank("s"), serd.uri("http://example.org/p"), serd.uri("http://example.org/o"), serd.uri("http://example.org/g")), serd.StatementFlags.EMPTY_S | serd.StatementFlags.ANON_O)', + ) + + self.assertEqual( + repr(serd.Event.end(s)), 'serd.Event.end(serd.blank("s"))' + ) + + +class ReaderTests(unittest.TestCase): + def setUp(self): + self.world = serd.World() + self.temp_dir = tempfile.mkdtemp() + self.ttl_path = os.path.join(self.temp_dir, "input.ttl") + self.s = serd.uri("http://example.org/s") + self.p1 = serd.uri("http://example.org/p1") + self.p2 = serd.uri("http://example.org/p2") + self.o1 = serd.uri("http://example.org/o1") + self.o2 = serd.uri("http://example.org/o2") + + self.ttl_document = """@prefix eg: <http://example.org/> . +@base <http://example.org/base> . +eg:s eg:p1 eg:o1 ; +eg:p2 eg:o2 . +""" + self.events = [ + serd.Event.prefix("eg", "http://example.org/"), + serd.Event.base("http://example.org/base"), + serd.Event.statement(serd.Statement(self.s, self.p1, self.o1)), + serd.Event.statement(serd.Statement(self.s, self.p2, self.o2)), + ] + + with open(self.ttl_path, "w") as f: + f.write(self.ttl_document) + + def tearDown(self): + shutil.rmtree(self.temp_dir) + + def testReadFileToSink(self): + class TestSink(serd.Sink): + def __init__(self, world): + super().__init__(world) + self.events = [] + + def __call__(self, event): + self.events += [event] + return serd.Status.SUCCESS + + s, p1, p2, o1, o2 = self.s, self.p1, self.p2, self.o1, self.o2 + + env = serd.Env(self.world) + source = serd.FileInput(self.ttl_path) + sink = TestSink(self.world) + reader = serd.Reader( + self.world, serd.Syntax.TURTLE, 0, env, sink, 4096 + ) + + self.assertEqual(reader.start(source), serd.Status.SUCCESS) + self.assertEqual(sink.events, []) + self.assertEqual(reader.read_document(), serd.Status.SUCCESS) + self.assertEqual(reader.finish(), serd.Status.SUCCESS) + self.assertEqual(sink.events, self.events) + + def testReadFileToFunction(self): + captured_events = [] + + def sink(event): + captured_events.append(event) + + s, p1, p2, o1, o2 = self.s, self.p1, self.p2, self.o1, self.o2 + + env = serd.Env(self.world) + source = serd.FileInput(self.ttl_path) + reader = serd.Reader( + self.world, serd.Syntax.TURTLE, 0, env, sink, 4096 + ) + + self.assertEqual(reader.start(source), serd.Status.SUCCESS) + self.assertEqual(captured_events, []) + self.assertEqual(reader.read_document(), serd.Status.SUCCESS) + self.assertEqual(reader.finish(), serd.Status.SUCCESS) + self.assertEqual(captured_events, self.events) + + def testReadStringToFunction(self): + captured_events = [] + + def sink(event): + captured_events.append(event) + + s, p1, p2, o1, o2 = self.s, self.p1, self.p2, self.o1, self.o2 + + env = serd.Env(self.world) + source = serd.StringInput(self.ttl_document) + reader = serd.Reader( + self.world, serd.Syntax.TURTLE, 0, env, sink, 4096 + ) + + self.assertEqual(reader.start(source), serd.Status.SUCCESS) + self.assertEqual(captured_events, []) + self.assertEqual(reader.read_document(), serd.Status.SUCCESS) + self.assertEqual(reader.finish(), serd.Status.SUCCESS) + self.assertEqual(captured_events, self.events) + + +# class LoadTests(unittest.TestCase): +# def setUp(self): +# self.world = serd.World() +# self.temp_dir = tempfile.mkdtemp() +# self.ttl_path = os.path.join(self.temp_dir, "input.ttl") +# self.ttl_document = r"""@prefix eg: <http://example.org/> . +# @base <http://example.org/base> . +# eg:s eg:p1 eg:o1 ; +# eg:p2 eg:o2 . +# """ + +# with open(self.ttl_path, "w") as f: +# f.write(self.ttl_document) + +# def tearDown(self): +# shutil.rmtree(self.temp_dir) + +# def testLoad(self): +# s = serd.uri("http://example.org/s") +# p1 = serd.uri("http://example.org/p1") +# p2 = serd.uri("http://example.org/p2") +# o1 = serd.uri("http://example.org/o1") +# o2 = serd.uri("http://example.org/o2") + +# model = self.world.load(self.ttl_path) + +# print([statement for statement in model]) +# self.assertEqual( +# [statement for statement in model], +# [ +# serd.Statement(s, p1, o1), +# serd.Statement(s, p2, o2), +# ], +# ) + +# def testLoadString(self): +# s = serd.uri("http://example.org/s") +# p1 = serd.uri("http://example.org/p1") +# p2 = serd.uri("http://example.org/p2") +# o1 = serd.uri("http://example.org/o1") +# o2 = serd.uri("http://example.org/o2") + +# print(self.ttl_document) +# model = self.world.loads(self.ttl_document) + +# assert len(model) == 2 + +# self.assertEqual( +# [statement for statement in model], +# [ +# serd.Statement(s, p1, o1), +# serd.Statement(s, p2, o2), +# ], +# ) + + +# class DumpTests(unittest.TestCase): +# def setUp(self): +# self.world = serd.World() +# self.temp_dir = tempfile.mkdtemp() +# self.ttl_path = os.path.join(self.temp_dir, "output.ttl") + +# self.ttl_document = r"""<http://example.org/s> +# <http://example.org/p> <http://example.org/o1> , +# <http://example.org/o2> . +# """ + +# self.s = serd.uri("http://example.org/s") +# self.p = serd.uri("http://example.org/p") +# self.o = serd.uri("http://example.org/o") +# self.o1 = serd.uri("http://example.org/o1") +# self.o2 = serd.uri("http://example.org/o2") +# self.g = serd.uri("http://example.org/g") +# self.x = serd.uri("http://example.org/x") + +# def tearDown(self): +# shutil.rmtree(self.temp_dir) + +# def testDumpFile(self): +# s, p, o, o1, o2 = self.s, self.p, self.o, self.o1, self.o2 +# g, x = self.g, self.x + +# model = serd.Model(self.world) + +# model.insert((s, p, o1)) +# model.insert((s, p, o2)) + +# self.world.dump(model, self.ttl_path) + +# with open(self.ttl_path, "r") as output: +# self.assertEqual(output.read(), self.ttl_document) + +# def testDumpString(self): +# s, p, o, o1, o2 = self.s, self.p, self.o, self.o1, self.o2 +# g, x = self.g, self.x + +# model = serd.Model(self.world) + +# model.insert((s, p, o1)) +# model.insert((s, p, o2)) + +# self.assertEqual(self.world.dumps(model), self.ttl_document) |