aboutsummaryrefslogtreecommitdiffstats
path: root/include/serd
diff options
context:
space:
mode:
authorDavid Robillard <d@drobilla.net>2020-10-10 12:31:29 +0200
committerDavid Robillard <d@drobilla.net>2023-12-02 18:49:08 -0500
commit155f5e2f24c24f5b5ffbf13fcea5cf1a355ec372 (patch)
tree5f5f7009e96379aa0e31cb1db3757f8d6589669f /include/serd
parente8f392d57bf6eba9b62509a32e4073e8b34b18e2 (diff)
downloadserd-155f5e2f24c24f5b5ffbf13fcea5cf1a355ec372.tar.gz
serd-155f5e2f24c24f5b5ffbf13fcea5cf1a355ec372.tar.bz2
serd-155f5e2f24c24f5b5ffbf13fcea5cf1a355ec372.zip
[WIP] Simplify reader and writer flags
[WIP] Testing?
Diffstat (limited to 'include/serd')
-rw-r--r--include/serd/reader.h41
-rw-r--r--include/serd/writer.h49
2 files changed, 81 insertions, 9 deletions
diff --git a/include/serd/reader.h b/include/serd/reader.h
index b6b9cac3..e25565cb 100644
--- a/include/serd/reader.h
+++ b/include/serd/reader.h
@@ -30,10 +30,43 @@ typedef struct SerdReaderImpl SerdReader;
/// Reader options
typedef enum {
- SERD_READ_LAX = 1U << 0U, ///< Tolerate invalid input where possible
- SERD_READ_VARIABLES = 1U << 1U, ///< Support variable nodes
- SERD_READ_PREFIXED = 1U << 2U, ///< Do not expand prefixed names
- SERD_READ_RELATIVE = 1U << 3U, ///< Do not expand relative URI references
+ /**
+ Tolerate invalid input where possible.
+
+ This will attempt to ignore invalid input and continue reading. Invalid
+ Unicode characters will be replaced with the replacement character, and
+ various other syntactic problems will be ignored. If there are more
+ severe problems, the reader will try to skip the statement and continue
+ parsing. This should work reasonably well for line-based syntaxes like
+ NTriples and NQuads, but abbreviated Turtle or TriG may not recover.
+
+ Note that this flag should be used carefully, since it can result in data
+ loss.
+ */
+ SERD_READ_LAX = 1U << 0U,
+
+ /**
+ Support reading variable nodes.
+
+ As an extension, serd supports reading variables nodes with SPARQL-like
+ syntax, for example "?foo" or "$bar". This can be used for storing
+ graph patterns and templates.
+ */
+ SERD_READ_VARIABLES = 1U << 1U,
+
+ /**
+ Read URIs and blank node labels exactly.
+
+ Normally, the reader expands all relative URIs, and may adjust blank node
+ labels to avoid clashing with generated ones. This flag disables all of
+ this processing, so that URI references and blank nodes are passed to the
+ sink exactly as they are in the input.
+
+ Note that this does not apply to CURIEs, since serd deliberately does not
+ have a way to represent CURIE nodes. A bad namespace prefix is considered
+ a syntax error.
+ */
+ SERD_READ_VERBATIM = 1U << 2U,
} SerdReaderFlag;
/// Bitwise OR of SerdReaderFlag values
diff --git a/include/serd/writer.h b/include/serd/writer.h
index b85c7b82..812b1851 100644
--- a/include/serd/writer.h
+++ b/include/serd/writer.h
@@ -36,11 +36,50 @@ typedef struct SerdWriterImpl SerdWriter;
does not support abbreviation and is always ASCII.
*/
typedef enum {
- SERD_WRITE_ASCII = 1U << 0U, ///< Escape all non-ASCII characters
- SERD_WRITE_UNQUALIFIED = 1U << 1U, ///< Do not shorten URIs into CURIEs
- SERD_WRITE_UNRESOLVED = 1U << 2U, ///< Do not make URIs relative
- SERD_WRITE_LAX = 1U << 3U, ///< Tolerate lossy output
- SERD_WRITE_TERSE = 1U << 4U, ///< Write terser output without newlines
+ /**
+ Escape all non-ASCII characters.
+
+ Although all the supported syntaxes are UTF-8 by definition, this can be
+ used to escape all non-ASCII characters so that data will survive
+ transmission through ASCII-only channels.
+ */
+ SERD_WRITE_ASCII = 1U << 0U,
+
+ /**
+ Write expanded URIs instead of prefixed names.
+
+ This will avoid shortening URIs into CURIEs entirely, even if the output
+ syntax supports prefixed names. This can be useful for making chunks of
+ syntax context-free.
+ */
+ SERD_WRITE_EXPANDED = 1U << 1U,
+
+ /**
+ Write URI references exactly as they are received.
+
+ Normally, the writer resolves URIs against the base URI, so it can
+ potentially write them as relative URI references. This flag disables
+ that, so URI nodes are written exactly as they are received.
+ */
+ SERD_WRITE_VERBATIM = 1U << 2U,
+
+ /**
+ Write terser output without newlines.
+
+ For Turtle and TriG, this enables a terser form of output which only has
+ newlines at the top level. This can result in very long lines, but is
+ more compact and useful for making these abbreviated syntaxes line-based.
+ */
+ SERD_WRITE_TERSE = 1U << 3U,
+
+ /**
+ Tolerate lossy output.
+
+ This will tolerate input that can not be written without loss, in
+ particular invalid UTF-8 text. Note that this flag should be used
+ carefully, since it can result in data loss.
+ */
+ SERD_WRITE_LAX = 1U << 4U,
} SerdWriterFlag;
/// Bitwise OR of #SerdWriterFlag values