diff options
author | David Robillard <d@drobilla.net> | 2018-07-05 21:01:12 +0200 |
---|---|---|
committer | David Robillard <d@drobilla.net> | 2023-12-02 18:49:07 -0500 |
commit | ab1aa43256fac3e017212abe6f9d845bf74c024c (patch) | |
tree | cda9cce165fb795ac5ca21332f66957ee7cc8afc | |
parent | 40534e6c42daabb6cc8ec2d49785bec088d1d3a8 (diff) | |
download | serd-ab1aa43256fac3e017212abe6f9d845bf74c024c.tar.gz serd-ab1aa43256fac3e017212abe6f9d845bf74c024c.tar.bz2 serd-ab1aa43256fac3e017212abe6f9d845bf74c024c.zip |
Simplify writer style options and write UTF-8 by default
-rw-r--r-- | NEWS | 1 | ||||
-rw-r--r-- | include/serd/writer.h | 11 | ||||
-rw-r--r-- | src/serdi.c | 73 | ||||
-rw-r--r-- | src/writer.c | 7 | ||||
-rw-r--r-- | test/extra/perfect/test-long-whitespace.nt | 4 | ||||
-rw-r--r-- | test/extra/perfect/test-utf8-uri.nt | 2 | ||||
-rw-r--r-- | test/extra/perfect/test-utf8.nt | 6 | ||||
-rw-r--r-- | test/meson.build | 8 | ||||
-rw-r--r-- | test/test_reader_writer.c | 3 |
9 files changed, 40 insertions, 75 deletions
@@ -10,6 +10,7 @@ serd (1.1.1) unstable; urgency=medium * Remove support for reading Turtle named inline nodes extension * Remove useless character counting from API * Rename SerdChunk to SerdStringView + * Simplify writer style options and write UTF-8 by default * Use a fixed-size reader stack * Use char* for strings in public API diff --git a/include/serd/writer.h b/include/serd/writer.h index 77ecac80..96059932 100644 --- a/include/serd/writer.h +++ b/include/serd/writer.h @@ -34,12 +34,11 @@ typedef struct SerdWriterImpl SerdWriter; does not support abbreviation and is always ASCII. */ typedef enum { - SERD_WRITE_ABBREVIATED = 1U << 0U, ///< Abbreviate triples when possible - SERD_WRITE_ASCII = 1U << 1U, ///< Escape all non-ASCII characters - SERD_WRITE_RESOLVED = 1U << 2U, ///< Resolve URIs against base URI - SERD_WRITE_CURIED = 1U << 3U, ///< Shorten URIs into CURIEs - SERD_WRITE_BULK = 1U << 4U, ///< Write output in pages - SERD_WRITE_STRICT = 1U << 5U, ///< Abort with error on lossy output + SERD_WRITE_ASCII = 1U << 0U, ///< Escape all non-ASCII characters + SERD_WRITE_UNQUALIFIED = 1U << 1U, ///< Do not shorten URIs into CURIEs + SERD_WRITE_UNRESOLVED = 1U << 2U, ///< Do not make URIs relative + SERD_WRITE_BULK = 1U << 3U, ///< Write output in pages + SERD_WRITE_STRICT = 1U << 4U, ///< Abort with error on lossy output } SerdWriterFlag; /// Bitwise OR of #SerdWriterFlag values diff --git a/src/serdi.c b/src/serdi.c index 0932348c..01e2e764 100644 --- a/src/serdi.c +++ b/src/serdi.c @@ -92,61 +92,24 @@ quiet_error_func(void* const handle, const SerdError* const e) return SERD_SUCCESS; } -static SerdWriterFlags -choose_style(const SerdSyntax input_syntax, - const SerdSyntax output_syntax, - const bool ascii, - const bool bulk_write, - const bool full_uris, - const bool lax) -{ - SerdWriterFlags writer_flags = 0U; - if (output_syntax == SERD_NTRIPLES || ascii) { - writer_flags |= SERD_WRITE_ASCII; - } else if (output_syntax == SERD_TURTLE) { - writer_flags |= SERD_WRITE_ABBREVIATED; - if (!full_uris) { - writer_flags |= SERD_WRITE_CURIED; - } - } - - if ((input_syntax == SERD_TURTLE || input_syntax == SERD_TRIG) || - (writer_flags & SERD_WRITE_CURIED)) { - // Base URI may change and/or we're abbreviating URIs, so must resolve - writer_flags |= SERD_WRITE_RESOLVED; - } - - if (bulk_write) { - writer_flags |= SERD_WRITE_BULK; - } - - if (!lax) { - writer_flags |= SERD_WRITE_STRICT; - } - - return writer_flags; -} - int main(int argc, char** argv) { const char* const prog = argv[0]; - SerdSyntax input_syntax = (SerdSyntax)0; - SerdSyntax output_syntax = (SerdSyntax)0; - bool from_string = false; - bool from_stdin = false; - bool ascii = false; - bool bulk_read = true; - bool bulk_write = false; - bool full_uris = false; - bool lax = false; - bool quiet = false; - size_t stack_size = 1048576U; - const char* add_prefix = NULL; - const char* chop_prefix = NULL; - const char* root_uri = NULL; - int a = 1; + SerdSyntax input_syntax = (SerdSyntax)0; + SerdSyntax output_syntax = (SerdSyntax)0; + SerdWriterFlags writer_flags = SERD_WRITE_STRICT; + bool from_string = false; + bool from_stdin = false; + bool bulk_read = true; + bool lax = false; + bool quiet = false; + size_t stack_size = 1048576U; + const char* add_prefix = NULL; + const char* chop_prefix = NULL; + const char* root_uri = NULL; + int a = 1; for (; a < argc && !from_string && argv[a][0] == '-'; ++a) { if (argv[a][1] == '\0') { from_stdin = true; @@ -165,17 +128,18 @@ main(int argc, char** argv) const char opt = argv[a][o]; if (opt == 'a') { - ascii = true; + writer_flags |= SERD_WRITE_ASCII; } else if (opt == 'b') { - bulk_write = true; + writer_flags |= SERD_WRITE_BULK; } else if (opt == 'e') { bulk_read = false; } else if (opt == 'f') { - full_uris = true; + writer_flags |= (SERD_WRITE_UNQUALIFIED | SERD_WRITE_UNRESOLVED); } else if (opt == 'h') { return print_usage(prog, false); } else if (opt == 'l') { lax = true; + writer_flags &= ~(SerdWriterFlags)SERD_WRITE_STRICT; } else if (opt == 'q') { quiet = true; } else if (opt == 'v') { @@ -263,9 +227,6 @@ main(int argc, char** argv) output_syntax = input_has_graphs ? SERD_NQUADS : SERD_NTRIPLES; } - const SerdWriterFlags writer_flags = choose_style( - input_syntax, output_syntax, ascii, bulk_write, full_uris, lax); - SerdNode* base = NULL; if (a < argc) { // Base URI given on command line base = serd_new_uri(serd_string(argv[a])); diff --git a/src/writer.c b/src/writer.c index 376becf1..37263010 100644 --- a/src/writer.c +++ b/src/writer.c @@ -762,7 +762,7 @@ write_uri_node(SerdWriter* const writer, return esink("()", 2, writer); } - if (has_scheme && (writer->flags & SERD_WRITE_CURIED) && + if (has_scheme && !(writer->flags & SERD_WRITE_UNQUALIFIED) && serd_env_qualify(writer->env, node, &prefix, &suffix) && is_name(serd_node_string(prefix), serd_node_length(prefix)) && is_name(suffix.data, suffix.length)) { @@ -782,7 +782,8 @@ write_uri_node(SerdWriter* const writer, TRY(st, esink("<", 1, writer)); - if ((writer->flags & SERD_WRITE_RESOLVED) && serd_env_base_uri(writer->env)) { + if (!(writer->flags & SERD_WRITE_UNRESOLVED) && + serd_env_base_uri(writer->env)) { const SerdURIView base_uri = serd_env_base_uri_view(writer->env); SerdURIView uri = serd_parse_uri(node_str); SerdURIView abs_uri = serd_resolve_uri(uri, base_uri); @@ -812,7 +813,7 @@ write_curie(SerdWriter* const writer, const SerdNode* const node) // In fast-and-loose Turtle/TriG mode CURIEs are simply passed through const bool fast = - !(writer->flags & (SERD_WRITE_CURIED | SERD_WRITE_RESOLVED)); + (writer->flags & (SERD_WRITE_UNQUALIFIED | SERD_WRITE_UNRESOLVED)); if (!supports_abbrev(writer) || !fast) { if ((st = serd_env_expand(writer->env, node, &prefix, &suffix))) { diff --git a/test/extra/perfect/test-long-whitespace.nt b/test/extra/perfect/test-long-whitespace.nt index 09664b37..fca880d1 100644 --- a/test/extra/perfect/test-long-whitespace.nt +++ b/test/extra/perfect/test-long-whitespace.nt @@ -1,2 +1,2 @@ -<http://example.org/eg#a> <http://example.org/eg#b> "\nthis \ris a \U00015678long\t\nliteral\uABCD\n" . -<http://example.org/eg#d> <http://example.org/eg#e> "\tThis \uABCDis\r \U00015678another\n\none\n" . +<http://example.org/eg#a> <http://example.org/eg#b> "\nthis \ris a long\t\nliteralꯍ\n" . +<http://example.org/eg#d> <http://example.org/eg#e> "\tThis ꯍis\r another\n\none\n" . diff --git a/test/extra/perfect/test-utf8-uri.nt b/test/extra/perfect/test-utf8-uri.nt index b8a73a88..3338039b 100644 --- a/test/extra/perfect/test-utf8-uri.nt +++ b/test/extra/perfect/test-utf8-uri.nt @@ -1 +1 @@ -<http://example.org/math/\u2200x\u2208\u211D> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/Thing> . +<http://example.org/math/∀x∈ℝ> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/Thing> . diff --git a/test/extra/perfect/test-utf8.nt b/test/extra/perfect/test-utf8.nt index c02cb375..a9de7bbe 100644 --- a/test/extra/perfect/test-utf8.nt +++ b/test/extra/perfect/test-utf8.nt @@ -1,3 +1,3 @@ -<http://example.org/eg#s> <http://example.org/eg#p> "1: Two byte Unicode escape: \u00E0" . -<http://example.org/eg#s> <http://example.org/eg#p> "2: Largest Unicode escape in Turtle: \U0010FFFF" . -<http://example.org/eg#s> <http://example.org/eg#p> "3: \u222E E\u22C5da = Q, n \u2192 \u221E, \u2211 f(i) = \u220F g(i)" . +<http://example.org/eg#s> <http://example.org/eg#p> "1: Two byte Unicode escape: à" . +<http://example.org/eg#s> <http://example.org/eg#p> "2: Largest Unicode escape in Turtle: " . +<http://example.org/eg#s> <http://example.org/eg#p> "3: ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i)" . diff --git a/test/meson.build b/test/meson.build index 3e24fea0..937b9038 100644 --- a/test/meson.build +++ b/test/meson.build @@ -336,11 +336,12 @@ test_suites = { files('extra/good/manifest.ttl'), ns_serdtest + 'good/', '--', + '-a', '-b', ], 'fast': [ - files('extra/good/manifest.ttl'), - ns_serdtest + 'good/', + files('extra/perfect/manifest.ttl'), + ns_serdtest + 'perfect/', '--', '-f', ], @@ -353,12 +354,15 @@ test_suites = { 'good': [ files('extra/good/manifest.ttl'), ns_serdtest + 'good/', + '--', + '-a', ], 'lax_lax': [ '--lax', files('extra/lax/manifest.ttl'), ns_serdtest + 'lax/', '--', + '-a', '-l', ], 'lax_strict': [ diff --git a/test/test_reader_writer.c b/test/test_reader_writer.c index e792f2dc..c4f8bb90 100644 --- a/test/test_reader_writer.c +++ b/test/test_reader_writer.c @@ -409,8 +409,7 @@ test_write_errors(void) { SerdWorld* const world = serd_world_new(); ErrorContext ctx = {0U, 0U}; - const SerdWriterFlags style = - (SerdWriterFlags)(SERD_WRITE_STRICT | SERD_WRITE_CURIED); + const SerdWriterFlags style = (SerdWriterFlags)SERD_WRITE_STRICT; const size_t max_offsets[] = {0, 386, 1911, 2003, 386}; |