aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Robillard <d@drobilla.net>2018-07-05 21:01:12 +0200
committerDavid Robillard <d@drobilla.net>2023-12-02 18:49:07 -0500
commitab1aa43256fac3e017212abe6f9d845bf74c024c (patch)
treecda9cce165fb795ac5ca21332f66957ee7cc8afc
parent40534e6c42daabb6cc8ec2d49785bec088d1d3a8 (diff)
downloadserd-ab1aa43256fac3e017212abe6f9d845bf74c024c.tar.gz
serd-ab1aa43256fac3e017212abe6f9d845bf74c024c.tar.bz2
serd-ab1aa43256fac3e017212abe6f9d845bf74c024c.zip
Simplify writer style options and write UTF-8 by default
-rw-r--r--NEWS1
-rw-r--r--include/serd/writer.h11
-rw-r--r--src/serdi.c73
-rw-r--r--src/writer.c7
-rw-r--r--test/extra/perfect/test-long-whitespace.nt4
-rw-r--r--test/extra/perfect/test-utf8-uri.nt2
-rw-r--r--test/extra/perfect/test-utf8.nt6
-rw-r--r--test/meson.build8
-rw-r--r--test/test_reader_writer.c3
9 files changed, 40 insertions, 75 deletions
diff --git a/NEWS b/NEWS
index 8d4e3ed8..255f94dc 100644
--- a/NEWS
+++ b/NEWS
@@ -10,6 +10,7 @@ serd (1.1.1) unstable; urgency=medium
* Remove support for reading Turtle named inline nodes extension
* Remove useless character counting from API
* Rename SerdChunk to SerdStringView
+ * Simplify writer style options and write UTF-8 by default
* Use a fixed-size reader stack
* Use char* for strings in public API
diff --git a/include/serd/writer.h b/include/serd/writer.h
index 77ecac80..96059932 100644
--- a/include/serd/writer.h
+++ b/include/serd/writer.h
@@ -34,12 +34,11 @@ typedef struct SerdWriterImpl SerdWriter;
does not support abbreviation and is always ASCII.
*/
typedef enum {
- SERD_WRITE_ABBREVIATED = 1U << 0U, ///< Abbreviate triples when possible
- SERD_WRITE_ASCII = 1U << 1U, ///< Escape all non-ASCII characters
- SERD_WRITE_RESOLVED = 1U << 2U, ///< Resolve URIs against base URI
- SERD_WRITE_CURIED = 1U << 3U, ///< Shorten URIs into CURIEs
- SERD_WRITE_BULK = 1U << 4U, ///< Write output in pages
- SERD_WRITE_STRICT = 1U << 5U, ///< Abort with error on lossy output
+ SERD_WRITE_ASCII = 1U << 0U, ///< Escape all non-ASCII characters
+ SERD_WRITE_UNQUALIFIED = 1U << 1U, ///< Do not shorten URIs into CURIEs
+ SERD_WRITE_UNRESOLVED = 1U << 2U, ///< Do not make URIs relative
+ SERD_WRITE_BULK = 1U << 3U, ///< Write output in pages
+ SERD_WRITE_STRICT = 1U << 4U, ///< Abort with error on lossy output
} SerdWriterFlag;
/// Bitwise OR of #SerdWriterFlag values
diff --git a/src/serdi.c b/src/serdi.c
index 0932348c..01e2e764 100644
--- a/src/serdi.c
+++ b/src/serdi.c
@@ -92,61 +92,24 @@ quiet_error_func(void* const handle, const SerdError* const e)
return SERD_SUCCESS;
}
-static SerdWriterFlags
-choose_style(const SerdSyntax input_syntax,
- const SerdSyntax output_syntax,
- const bool ascii,
- const bool bulk_write,
- const bool full_uris,
- const bool lax)
-{
- SerdWriterFlags writer_flags = 0U;
- if (output_syntax == SERD_NTRIPLES || ascii) {
- writer_flags |= SERD_WRITE_ASCII;
- } else if (output_syntax == SERD_TURTLE) {
- writer_flags |= SERD_WRITE_ABBREVIATED;
- if (!full_uris) {
- writer_flags |= SERD_WRITE_CURIED;
- }
- }
-
- if ((input_syntax == SERD_TURTLE || input_syntax == SERD_TRIG) ||
- (writer_flags & SERD_WRITE_CURIED)) {
- // Base URI may change and/or we're abbreviating URIs, so must resolve
- writer_flags |= SERD_WRITE_RESOLVED;
- }
-
- if (bulk_write) {
- writer_flags |= SERD_WRITE_BULK;
- }
-
- if (!lax) {
- writer_flags |= SERD_WRITE_STRICT;
- }
-
- return writer_flags;
-}
-
int
main(int argc, char** argv)
{
const char* const prog = argv[0];
- SerdSyntax input_syntax = (SerdSyntax)0;
- SerdSyntax output_syntax = (SerdSyntax)0;
- bool from_string = false;
- bool from_stdin = false;
- bool ascii = false;
- bool bulk_read = true;
- bool bulk_write = false;
- bool full_uris = false;
- bool lax = false;
- bool quiet = false;
- size_t stack_size = 1048576U;
- const char* add_prefix = NULL;
- const char* chop_prefix = NULL;
- const char* root_uri = NULL;
- int a = 1;
+ SerdSyntax input_syntax = (SerdSyntax)0;
+ SerdSyntax output_syntax = (SerdSyntax)0;
+ SerdWriterFlags writer_flags = SERD_WRITE_STRICT;
+ bool from_string = false;
+ bool from_stdin = false;
+ bool bulk_read = true;
+ bool lax = false;
+ bool quiet = false;
+ size_t stack_size = 1048576U;
+ const char* add_prefix = NULL;
+ const char* chop_prefix = NULL;
+ const char* root_uri = NULL;
+ int a = 1;
for (; a < argc && !from_string && argv[a][0] == '-'; ++a) {
if (argv[a][1] == '\0') {
from_stdin = true;
@@ -165,17 +128,18 @@ main(int argc, char** argv)
const char opt = argv[a][o];
if (opt == 'a') {
- ascii = true;
+ writer_flags |= SERD_WRITE_ASCII;
} else if (opt == 'b') {
- bulk_write = true;
+ writer_flags |= SERD_WRITE_BULK;
} else if (opt == 'e') {
bulk_read = false;
} else if (opt == 'f') {
- full_uris = true;
+ writer_flags |= (SERD_WRITE_UNQUALIFIED | SERD_WRITE_UNRESOLVED);
} else if (opt == 'h') {
return print_usage(prog, false);
} else if (opt == 'l') {
lax = true;
+ writer_flags &= ~(SerdWriterFlags)SERD_WRITE_STRICT;
} else if (opt == 'q') {
quiet = true;
} else if (opt == 'v') {
@@ -263,9 +227,6 @@ main(int argc, char** argv)
output_syntax = input_has_graphs ? SERD_NQUADS : SERD_NTRIPLES;
}
- const SerdWriterFlags writer_flags = choose_style(
- input_syntax, output_syntax, ascii, bulk_write, full_uris, lax);
-
SerdNode* base = NULL;
if (a < argc) { // Base URI given on command line
base = serd_new_uri(serd_string(argv[a]));
diff --git a/src/writer.c b/src/writer.c
index 376becf1..37263010 100644
--- a/src/writer.c
+++ b/src/writer.c
@@ -762,7 +762,7 @@ write_uri_node(SerdWriter* const writer,
return esink("()", 2, writer);
}
- if (has_scheme && (writer->flags & SERD_WRITE_CURIED) &&
+ if (has_scheme && !(writer->flags & SERD_WRITE_UNQUALIFIED) &&
serd_env_qualify(writer->env, node, &prefix, &suffix) &&
is_name(serd_node_string(prefix), serd_node_length(prefix)) &&
is_name(suffix.data, suffix.length)) {
@@ -782,7 +782,8 @@ write_uri_node(SerdWriter* const writer,
TRY(st, esink("<", 1, writer));
- if ((writer->flags & SERD_WRITE_RESOLVED) && serd_env_base_uri(writer->env)) {
+ if (!(writer->flags & SERD_WRITE_UNRESOLVED) &&
+ serd_env_base_uri(writer->env)) {
const SerdURIView base_uri = serd_env_base_uri_view(writer->env);
SerdURIView uri = serd_parse_uri(node_str);
SerdURIView abs_uri = serd_resolve_uri(uri, base_uri);
@@ -812,7 +813,7 @@ write_curie(SerdWriter* const writer, const SerdNode* const node)
// In fast-and-loose Turtle/TriG mode CURIEs are simply passed through
const bool fast =
- !(writer->flags & (SERD_WRITE_CURIED | SERD_WRITE_RESOLVED));
+ (writer->flags & (SERD_WRITE_UNQUALIFIED | SERD_WRITE_UNRESOLVED));
if (!supports_abbrev(writer) || !fast) {
if ((st = serd_env_expand(writer->env, node, &prefix, &suffix))) {
diff --git a/test/extra/perfect/test-long-whitespace.nt b/test/extra/perfect/test-long-whitespace.nt
index 09664b37..fca880d1 100644
--- a/test/extra/perfect/test-long-whitespace.nt
+++ b/test/extra/perfect/test-long-whitespace.nt
@@ -1,2 +1,2 @@
-<http://example.org/eg#a> <http://example.org/eg#b> "\nthis \ris a \U00015678long\t\nliteral\uABCD\n" .
-<http://example.org/eg#d> <http://example.org/eg#e> "\tThis \uABCDis\r \U00015678another\n\none\n" .
+<http://example.org/eg#a> <http://example.org/eg#b> "\nthis \ris a 𕙸long\t\nliteralꯍ\n" .
+<http://example.org/eg#d> <http://example.org/eg#e> "\tThis ꯍis\r 𕙸another\n\none\n" .
diff --git a/test/extra/perfect/test-utf8-uri.nt b/test/extra/perfect/test-utf8-uri.nt
index b8a73a88..3338039b 100644
--- a/test/extra/perfect/test-utf8-uri.nt
+++ b/test/extra/perfect/test-utf8-uri.nt
@@ -1 +1 @@
-<http://example.org/math/\u2200x\u2208\u211D> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/Thing> .
+<http://example.org/math/∀x∈ℝ> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/Thing> .
diff --git a/test/extra/perfect/test-utf8.nt b/test/extra/perfect/test-utf8.nt
index c02cb375..a9de7bbe 100644
--- a/test/extra/perfect/test-utf8.nt
+++ b/test/extra/perfect/test-utf8.nt
@@ -1,3 +1,3 @@
-<http://example.org/eg#s> <http://example.org/eg#p> "1: Two byte Unicode escape: \u00E0" .
-<http://example.org/eg#s> <http://example.org/eg#p> "2: Largest Unicode escape in Turtle: \U0010FFFF" .
-<http://example.org/eg#s> <http://example.org/eg#p> "3: \u222E E\u22C5da = Q, n \u2192 \u221E, \u2211 f(i) = \u220F g(i)" .
+<http://example.org/eg#s> <http://example.org/eg#p> "1: Two byte Unicode escape: à" .
+<http://example.org/eg#s> <http://example.org/eg#p> "2: Largest Unicode escape in Turtle: 􏿿" .
+<http://example.org/eg#s> <http://example.org/eg#p> "3: ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i)" .
diff --git a/test/meson.build b/test/meson.build
index 3e24fea0..937b9038 100644
--- a/test/meson.build
+++ b/test/meson.build
@@ -336,11 +336,12 @@ test_suites = {
files('extra/good/manifest.ttl'),
ns_serdtest + 'good/',
'--',
+ '-a',
'-b',
],
'fast': [
- files('extra/good/manifest.ttl'),
- ns_serdtest + 'good/',
+ files('extra/perfect/manifest.ttl'),
+ ns_serdtest + 'perfect/',
'--',
'-f',
],
@@ -353,12 +354,15 @@ test_suites = {
'good': [
files('extra/good/manifest.ttl'),
ns_serdtest + 'good/',
+ '--',
+ '-a',
],
'lax_lax': [
'--lax',
files('extra/lax/manifest.ttl'),
ns_serdtest + 'lax/',
'--',
+ '-a',
'-l',
],
'lax_strict': [
diff --git a/test/test_reader_writer.c b/test/test_reader_writer.c
index e792f2dc..c4f8bb90 100644
--- a/test/test_reader_writer.c
+++ b/test/test_reader_writer.c
@@ -409,8 +409,7 @@ test_write_errors(void)
{
SerdWorld* const world = serd_world_new();
ErrorContext ctx = {0U, 0U};
- const SerdWriterFlags style =
- (SerdWriterFlags)(SERD_WRITE_STRICT | SERD_WRITE_CURIED);
+ const SerdWriterFlags style = (SerdWriterFlags)SERD_WRITE_STRICT;
const size_t max_offsets[] = {0, 386, 1911, 2003, 386};