aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Robillard <d@drobilla.net>2021-08-01 16:54:32 -0400
committerDavid Robillard <d@drobilla.net>2022-01-28 21:57:07 -0500
commit633300d4f09f9c6000923dce35bb4a7302d6d92c (patch)
tree386fc75a321d548a900ad4e6d5f13c25b5368487
parent8e30b617725a50a5b27d400247095d0577e5874e (diff)
downloadserd-633300d4f09f9c6000923dce35bb4a7302d6d92c.tar.gz
serd-633300d4f09f9c6000923dce35bb4a7302d6d92c.tar.bz2
serd-633300d4f09f9c6000923dce35bb4a7302d6d92c.zip
Simplify reader and writer flags
-rw-r--r--include/serd/serd.h93
-rw-r--r--src/n3.c2
-rw-r--r--src/node_syntax.c2
-rw-r--r--src/read_ntriples.c2
-rw-r--r--src/serdi.c2
-rw-r--r--src/writer.c8
6 files changed, 92 insertions, 17 deletions
diff --git a/include/serd/serd.h b/include/serd/serd.h
index 5edb4b2f..f9a60354 100644
--- a/include/serd/serd.h
+++ b/include/serd/serd.h
@@ -2226,10 +2226,43 @@ typedef struct SerdReaderImpl SerdReader;
/// Reader options
typedef enum {
- SERD_READ_LAX = 1u << 0u, ///< Tolerate invalid input where possible
- SERD_READ_VARIABLES = 1u << 1u, ///< Support variable nodes
- SERD_READ_EXACT_BLANKS = 1u << 2u, ///< Allow clashes with generated blanks
- SERD_READ_RELATIVE = 1u << 3u, ///< Do not expand relative URI references
+ /**
+ Tolerate invalid input where possible.
+
+ This will attempt to ignore invalid input and continue reading. Invalid
+ Unicode characters will be replaced with the replacement character, and
+ various other syntactic problems will be ignored. If there are more
+ severe problems, the reader will try to skip the statement and continue
+ parsing. This should work reasonably well for line-based syntaxes like
+ NTriples and NQuads, but abbreviated Turtle or TriG may not recover.
+
+ Note that this flag should be used carefully, since it can result in data
+ loss.
+ */
+ SERD_READ_LAX = 1u << 0u,
+
+ /**
+ Support reading variable nodes.
+
+ As an extension, serd supports reading variables nodes with SPARQL-like
+ syntax, for example "?foo" or "$bar". This can be used for storing
+ graph patterns and templates.
+ */
+ SERD_READ_VARIABLES = 1u << 1u,
+
+ /**
+ Read URIs and blank node labels exactly.
+
+ Normally, the reader expands all relative URIs, and may adjust blank node
+ labels to avoid clashing with generated ones. This flag disables all of
+ this processing, so that URI references and blank nodes are passed to the
+ sink exactly as they are in the input.
+
+ Note that this does not apply to CURIEs, since serd deliberately does not
+ have a way to represent CURIE nodes. A bad namespace prefix is considered
+ a syntax error.
+ */
+ SERD_READ_VERBATIM = 1u << 2u,
} SerdReaderFlag;
/// Bitwise OR of SerdReaderFlag values
@@ -2394,11 +2427,53 @@ typedef struct SerdWriterImpl SerdWriter;
does not support abbreviation and is always ASCII.
*/
typedef enum {
- SERD_WRITE_ASCII = 1u << 0u, ///< Escape all non-ASCII characters
- SERD_WRITE_UNQUALIFIED = 1u << 1u, ///< Do not shorten URIs into CURIEs
- SERD_WRITE_UNRESOLVED = 1u << 2u, ///< Do not make URIs relative
- SERD_WRITE_TERSE = 1u << 3u, ///< Write terser output without newlines
- SERD_WRITE_LAX = 1u << 4u ///< Tolerate lossy output
+ /**
+ Escape all non-ASCII characters.
+
+ Although all the supported syntaxes are UTF-8 by definition, this can be
+ used to escape all non-ASCII characters so that data will survive
+ transmission through ASCII-only channels.
+ */
+ SERD_WRITE_ASCII = 1u << 0u,
+
+ /**
+ Write expanded URIs instead of prefixed names.
+
+ This will avoid shortening URIs into CURIEs entirely, even if the output
+ syntax supports prefixed names. This can be useful for making chunks of
+ syntax context-free.
+ */
+ SERD_WRITE_EXPANDED = 1u << 1u,
+
+ /**
+ Write URI references exactly as they are received.
+
+ Normally, the writer resolves URIs against the base URI, so it can
+ potentially writem them as relative URI references. This flag disables
+ that, so URI nodes are written exactly as they are received.
+
+ When fed by a reader with #SERD_READ_VERBATIM enabled, this will write URI
+ references exactly as they are in the input.
+ */
+ SERD_WRITE_VERBATIM = 1u << 2u,
+
+ /**
+ Write terser output without newlines.
+
+ For Turtle and TriG, this enables a terser form of output which only has
+ newlines at the top level. This can result in very long lines, but is
+ more compact and useful for making these abbreviated syntaxes line-based.
+ */
+ SERD_WRITE_TERSE = 1u << 3u,
+
+ /**
+ Tolerate lossy output.
+
+ This will tolerate input that can not be written without loss, in
+ particular invalid UTF-8 text. Note that this flag should be used
+ carefully, since it can result in data loss.
+ */
+ SERD_WRITE_LAX = 1u << 4u
} SerdWriterFlag;
/// Bitwise OR of SerdWriterFlag values
diff --git a/src/n3.c b/src/n3.c
index 00d5bc1d..7e913d8b 100644
--- a/src/n3.c
+++ b/src/n3.c
@@ -374,7 +374,7 @@ read_IRIREF(SerdReader* const reader, SerdNode** const dest)
return st;
}
- return (reader->flags & SERD_READ_RELATIVE)
+ return (reader->flags & SERD_READ_VERBATIM)
? SERD_SUCCESS
: resolve_IRIREF(reader, *dest, string_start_offset);
}
diff --git a/src/node_syntax.c b/src/node_syntax.c
index ff8c32d4..b1d10a5d 100644
--- a/src/node_syntax.c
+++ b/src/node_syntax.c
@@ -56,7 +56,7 @@ serd_node_from_syntax_in(const char* const str,
SerdByteSource* const source = serd_byte_source_new_string(doc, NULL);
SerdReader* const reader = serd_reader_new(
- world, syntax, SERD_READ_EXACT_BLANKS, env, sink, 1024 + doc_len);
+ world, syntax, SERD_READ_VERBATIM, env, sink, 1024 + doc_len);
serd_reader_start(reader, source);
serd_reader_read_document(reader);
diff --git a/src/read_ntriples.c b/src/read_ntriples.c
index 0d5dba2a..aa8f5468 100644
--- a/src/read_ntriples.c
+++ b/src/read_ntriples.c
@@ -256,7 +256,7 @@ read_STRING_LITERAL(SerdReader* const reader,
static SerdStatus
adjust_blank_id(SerdReader* const reader, char* const buf)
{
- if (!(reader->flags & SERD_READ_EXACT_BLANKS) &&
+ if (!(reader->flags & SERD_READ_VERBATIM) &&
is_digit(buf[reader->bprefix_len + 1])) {
const char tag = buf[reader->bprefix_len];
if (tag == 'b') {
diff --git a/src/serdi.c b/src/serdi.c
index 7c664b72..ef5a7633 100644
--- a/src/serdi.c
+++ b/src/serdi.c
@@ -248,7 +248,7 @@ main(int argc, char** argv)
bulk_read = false;
} else if (opt == 'f') {
no_inline = true;
- writer_flags |= (SERD_WRITE_UNQUALIFIED | SERD_WRITE_UNRESOLVED);
+ writer_flags |= (SERD_WRITE_EXPANDED | SERD_WRITE_VERBATIM);
} else if (opt == 'h') {
return print_usage(prog, false);
} else if (opt == 'l') {
diff --git a/src/writer.c b/src/writer.c
index d139ef9d..e56f0b75 100644
--- a/src/writer.c
+++ b/src/writer.c
@@ -793,10 +793,10 @@ write_literal(SerdWriter* const writer,
SERD_WARN_UNUSED_RESULT static SerdStatus
write_full_uri_node(SerdWriter* const writer, const SerdNode* const node)
{
- SerdStatus st = SERD_SUCCESS;
- const bool resolve_disabled = writer->flags & SERD_WRITE_UNRESOLVED;
+ SerdStatus st = SERD_SUCCESS;
+ const bool verbatim = (writer->flags & SERD_WRITE_VERBATIM);
- if (resolve_disabled || !serd_env_base_uri(writer->env)) {
+ if (verbatim || !serd_env_base_uri(writer->env)) {
// Resolution disabled or we have no base URI, simply write the node
TRY(st, esink("<", 1, writer));
TRY(st, write_uri_from_node(writer, node));
@@ -848,7 +848,7 @@ write_uri_node(SerdWriter* const writer,
return esink("()", 2, writer);
}
- if (has_scheme && !(writer->flags & SERD_WRITE_UNQUALIFIED) &&
+ if (has_scheme && !(writer->flags & SERD_WRITE_EXPANDED) &&
!serd_env_qualify(writer->env, node_view, &prefix, &suffix)) {
TRY(st, write_lname(writer, prefix.buf, prefix.len));
TRY(st, esink(":", 1, writer));