aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Robillard <d@drobilla.net>2021-07-22 15:26:22 -0400
committerDavid Robillard <d@drobilla.net>2023-12-02 18:49:08 -0500
commit5e4538756d601e6a941c5290777af95ea8848e1a (patch)
tree9868e188a48a528e9908fcf695147f75790c3a56
parent64024d0fa6a6dc048b2b846738846da597025f56 (diff)
downloadserd-5e4538756d601e6a941c5290777af95ea8848e1a.tar.gz
serd-5e4538756d601e6a941c5290777af95ea8848e1a.tar.bz2
serd-5e4538756d601e6a941c5290777af95ea8848e1a.zip
[WIP] Preserve long or short quoting from input documents
-rw-r--r--NEWS1
-rw-r--r--include/serd/node.h38
-rw-r--r--include/serd/string.h11
-rw-r--r--src/node.c148
-rw-r--r--src/read_ntriples.c22
-rw-r--r--src/read_turtle.c4
-rw-r--r--src/string.c53
-rw-r--r--src/string_utils.h5
-rw-r--r--src/uri_utils.h6
-rw-r--r--src/writer.c3
-rw-r--r--test/extra/good/manifest.ttl35
-rw-r--r--test/extra/good/test-escapes.nt (renamed from test/extra/perfect/test-escapes.nt)0
-rw-r--r--test/extra/good/test-escapes.ttl (renamed from test/extra/perfect/test-escapes.ttl)0
-rw-r--r--test/extra/good/test-long-backspace-escape.nt1
-rw-r--r--test/extra/good/test-long-backspace-escape.ttl4
-rw-r--r--test/extra/good/test-long-delete-escape.nt1
-rw-r--r--test/extra/good/test-long-delete-escape.ttl4
-rw-r--r--test/extra/good/test-long-form-feed-escape.nt1
-rw-r--r--test/extra/good/test-long-form-feed-escape.ttl4
-rw-r--r--test/extra/good/test-long-whitespace.nt2
-rw-r--r--test/extra/good/test-long-whitespace.ttl (renamed from test/extra/perfect/test-long-whitespace.ttl)0
-rw-r--r--test/extra/perfect/manifest.ttl14
-rw-r--r--test/extra/perfect/test-backspace-escape.nt1
-rw-r--r--test/extra/perfect/test-backspace-escape.ttl5
-rw-r--r--test/extra/perfect/test-delete-escape.nt1
-rw-r--r--test/extra/perfect/test-delete-escape.ttl3
-rw-r--r--test/extra/perfect/test-form-feed-escape.nt1
-rw-r--r--test/extra/perfect/test-form-feed-escape.ttl5
-rw-r--r--test/extra/perfect/test-long-whitespace.nt2
-rw-r--r--test/test_env.c6
-rw-r--r--test/test_node.c120
-rw-r--r--test/test_reader_writer.c7
-rw-r--r--test/test_string.c17
-rw-r--r--test/test_writer.c4
34 files changed, 239 insertions, 290 deletions
diff --git a/NEWS b/NEWS
index 21a842f6..b82e70a9 100644
--- a/NEWS
+++ b/NEWS
@@ -9,6 +9,7 @@ serd (1.1.1) unstable; urgency=medium
* Bring read/write interface closer to C standard
* Make nodes opaque
* Preserve anonymous graph syntax in TriG
+ * Preserve long or short quoting from input documents
* Remove SERD_DISABLE_DEPRECATED and SERD_DEPRECATED_BY
* Remove serd_uri_to_path()
* Remove support for reading Turtle named inline nodes extension
diff --git a/include/serd/node.h b/include/serd/node.h
index 1faba160..90ecde71 100644
--- a/include/serd/node.h
+++ b/include/serd/node.h
@@ -104,10 +104,9 @@ typedef enum {
/// Node flags, which ORed together make a #SerdNodeFlags
typedef enum {
- SERD_HAS_NEWLINE = 1U << 0U, ///< Contains line breaks ('\\n' or '\\r')
- SERD_HAS_QUOTE = 1U << 1U, ///< Contains quotes ('"')
- SERD_HAS_DATATYPE = 1U << 2U, ///< Literal node has datatype
- SERD_HAS_LANGUAGE = 1U << 3U, ///< Literal node has language
+ SERD_IS_LONG = 1U << 0U, ///< Literal node should be triple-quoted
+ SERD_HAS_DATATYPE = 1U << 1U, ///< Literal node has datatype
+ SERD_HAS_LANGUAGE = 1U << 2U, ///< Literal node has language
} SerdNodeFlag;
/// Bitwise OR of #SerdNodeFlag values
@@ -135,23 +134,30 @@ SERD_API SerdNode* ZIX_ALLOCATED
serd_new_string(SerdStringView string);
/**
- Create a new plain literal node from `str` with `lang`.
+ Create a new literal node with optional datatype or language.
- A plain literal has no datatype, but may have a language tag. The `lang`
- may be empty, in which case this is equivalent to `serd_new_string()`.
-*/
-SERD_API SerdNode* ZIX_ALLOCATED
-serd_new_plain_literal(SerdStringView str, SerdStringView lang);
+ This can create more complex literals than serd_new_string() with an
+ associated datatype URI or language tag, as well as control whether a
+ literal should be written as a short or long (triple-quoted) string.
-/**
- Create a new typed literal node from `str`.
+ @param string The string value of the literal.
+
+ @param flags Flags to describe the literal and its metadata. This must be a
+ valid combination of flags, in particular, at most one of #SERD_HAS_DATATYPE
+ and #SERD_HAS_LANGUAGE may be set.
+
+ @param meta The string value of the literal's metadata. If
+ #SERD_HAS_DATATYPE is set, then this must be an absolute datatype URI. If
+ #SERD_HAS_LANGUAGE is set, then this must be a language tag like "en-ca".
+ Otherwise, it is ignored.
- A typed literal has no language tag, but may have a datatype. The
- `datatype` may be NULL, in which case this is equivalent to
- `serd_new_string()`.
+ @return A newly allocated literal node that must be freed with
+ serd_node_free(), or null if the arguments are invalid or allocation failed.
*/
SERD_API SerdNode* ZIX_ALLOCATED
-serd_new_typed_literal(SerdStringView str, SerdStringView datatype_uri);
+serd_new_literal(SerdStringView string,
+ SerdNodeFlags flags,
+ SerdStringView meta);
/**
Create a new node from a blank node label.
diff --git a/include/serd/string.h b/include/serd/string.h
index 46da939d..11e55e6e 100644
--- a/include/serd/string.h
+++ b/include/serd/string.h
@@ -5,7 +5,6 @@
#define SERD_STRING_H
#include "serd/attributes.h"
-#include "serd/node.h"
#include "zix/attributes.h"
#include <stddef.h>
@@ -19,16 +18,6 @@ SERD_BEGIN_DECLS
*/
/**
- Measure a UTF-8 string.
-
- @return Length of `str` in bytes.
- @param str A null-terminated UTF-8 string.
- @param flags (Output) Set to the applicable flags.
-*/
-SERD_API size_t
-serd_strlen(const char* ZIX_NONNULL str, SerdNodeFlags* ZIX_NULLABLE flags);
-
-/**
Decode a base64 string.
This function can be used to decode a node created with serd_new_base64().
diff --git a/src/node.c b/src/node.c
index d34d4552..f1fbf0e6 100644
--- a/src/node.c
+++ b/src/node.c
@@ -11,7 +11,6 @@
#include "serd/buffer.h"
#include "serd/node.h"
#include "serd/status.h"
-#include "serd/string.h"
#include "serd/string_view.h"
#include "serd/uri.h"
#include "serd/write_result.h"
@@ -195,88 +194,92 @@ serd_new_token(const SerdNodeType type, const SerdStringView str)
SerdNode*
serd_new_string(const SerdStringView str)
{
- SerdNodeFlags flags = 0;
- const size_t length = serd_substrlen(str.data, str.length, &flags);
- SerdNode* node = serd_node_malloc(length, flags, SERD_LITERAL);
-
- memcpy(serd_node_buffer(node), str.data, str.length);
- node->length = length;
-
- serd_node_check_padding(node);
- return node;
-}
-
-/// Internal pre-measured implementation of serd_new_plain_literal
-static SerdNode*
-serd_new_plain_literal_i(const SerdStringView str,
- SerdNodeFlags flags,
- const SerdStringView lang)
-{
- assert(str.length);
- assert(lang.length);
-
- flags |= SERD_HAS_LANGUAGE;
-
- const size_t len = serd_node_pad_length(str.length);
- const size_t total_len = len + sizeof(SerdNode) + lang.length;
+ SerdNodeFlags flags = 0U;
+ SerdNode* node = serd_node_malloc(str.length, flags, SERD_LITERAL);
- SerdNode* node = serd_node_malloc(total_len, flags, SERD_LITERAL);
- memcpy(serd_node_buffer(node), str.data, str.length);
- node->length = str.length;
+ if (node) {
+ if (str.data && str.length) {
+ memcpy(serd_node_buffer(node), str.data, str.length);
+ }
- SerdNode* lang_node = node + 1 + (len / sizeof(SerdNode));
- lang_node->type = SERD_LITERAL;
- lang_node->length = lang.length;
- memcpy(serd_node_buffer(lang_node), lang.data, lang.length);
- serd_node_check_padding(lang_node);
+ node->length = str.length;
+ serd_node_check_padding(node);
+ }
- serd_node_check_padding(node);
return node;
}
-SerdNode*
-serd_new_plain_literal(const SerdStringView str, const SerdStringView lang)
+ZIX_PURE_FUNC static bool
+is_langtag(const SerdStringView string)
{
- if (!lang.length) {
- return serd_new_string(str);
+ // First character must be a letter
+ size_t i = 0;
+ if (!string.length || !is_alpha(string.data[i])) {
+ return false;
}
- SerdNodeFlags flags = 0;
- serd_strlen(str.data, &flags);
+ // First component must be all letters
+ while (++i < string.length && string.data[i] && string.data[i] != '-') {
+ if (!is_alpha(string.data[i])) {
+ return false;
+ }
+ }
- return serd_new_plain_literal_i(str, flags, lang);
+ // Following components can have letters and digits
+ while (i < string.length && string.data[i] == '-') {
+ while (++i < string.length && string.data[i] && string.data[i] != '-') {
+ const char c = string.data[i];
+ if (!is_alpha(c) && !is_digit(c)) {
+ return false;
+ }
+ }
+ }
+
+ return true;
}
SerdNode*
-serd_new_typed_literal(const SerdStringView str,
- const SerdStringView datatype_uri)
+serd_new_literal(const SerdStringView string,
+ const SerdNodeFlags flags,
+ const SerdStringView meta)
{
- if (!datatype_uri.length) {
- return serd_new_string(str);
+ if (!(flags & (SERD_HAS_DATATYPE | SERD_HAS_LANGUAGE))) {
+ SerdNode* node = serd_node_malloc(string.length, flags, SERD_LITERAL);
+
+ memcpy(serd_node_buffer(node), string.data, string.length);
+ node->length = string.length;
+ serd_node_check_padding(node);
+ return node;
}
- if (!strcmp(datatype_uri.data, NS_RDF "langString")) {
+ if ((flags & SERD_HAS_DATATYPE) && (flags & SERD_HAS_LANGUAGE)) {
return NULL;
}
- SerdNodeFlags flags = 0U;
- serd_strlen(str.data, &flags);
+ if (!meta.length) {
+ return NULL;
+ }
- flags |= SERD_HAS_DATATYPE;
+ if (((flags & SERD_HAS_DATATYPE) &&
+ (!serd_uri_string_has_scheme(meta.data) ||
+ !strcmp(meta.data, NS_RDF "langString"))) ||
+ ((flags & SERD_HAS_LANGUAGE) && !is_langtag(meta))) {
+ return NULL;
+ }
- const size_t len = serd_node_pad_length(str.length);
- const size_t total_len = len + sizeof(SerdNode) + datatype_uri.length;
+ const size_t len = serd_node_pad_length(string.length);
+ const size_t meta_len = serd_node_pad_length(meta.length);
+ const size_t meta_size = sizeof(SerdNode) + meta_len;
- SerdNode* node = serd_node_malloc(total_len, flags, SERD_LITERAL);
- memcpy(serd_node_buffer(node), str.data, str.length);
- node->length = str.length;
+ SerdNode* node = serd_node_malloc(len + meta_size, flags, SERD_LITERAL);
+ memcpy(serd_node_buffer(node), string.data, string.length);
+ node->length = string.length;
- SerdNode* datatype_node = node + 1 + (len / sizeof(SerdNode));
- datatype_node->length = datatype_uri.length;
- datatype_node->type = SERD_URI;
- memcpy(
- serd_node_buffer(datatype_node), datatype_uri.data, datatype_uri.length);
- serd_node_check_padding(datatype_node);
+ SerdNode* meta_node = node + 1U + (len / sizeof(SerdNode));
+ meta_node->length = meta.length;
+ meta_node->type = (flags & SERD_HAS_DATATYPE) ? SERD_URI : SERD_LITERAL;
+ memcpy(serd_node_buffer(meta_node), meta.data, meta.length);
+ serd_node_check_padding(meta_node);
serd_node_check_padding(node);
return node;
@@ -548,13 +551,6 @@ typedef size_t (*SerdWriteLiteralFunc)(const void* user_data,
size_t buf_size,
char* buf);
-SerdNode*
-serd_new_boolean(bool b)
-{
- return serd_new_typed_literal(b ? serd_string("true") : serd_string("false"),
- serd_node_string_view(&serd_xsd_boolean.node));
-}
-
static SerdNode*
serd_new_custom_literal(const void* const user_data,
const size_t len,
@@ -589,8 +585,9 @@ serd_new_double(const double d)
const ExessResult r = exess_write_double(d, sizeof(buf), buf);
return r.status ? NULL
- : serd_new_typed_literal(serd_substring(buf, r.count),
- serd_string(EXESS_XSD_URI "double"));
+ : serd_new_literal(serd_substring(buf, r.count),
+ SERD_HAS_DATATYPE,
+ serd_string(EXESS_XSD_URI "double"));
}
SerdNode*
@@ -601,8 +598,17 @@ serd_new_float(const float f)
const ExessResult r = exess_write_float(f, sizeof(buf), buf);
return r.status ? NULL
- : serd_new_typed_literal(serd_substring(buf, r.count),
- serd_string(EXESS_XSD_URI "float"));
+ : serd_new_literal(serd_substring(buf, r.count),
+ SERD_HAS_DATATYPE,
+ serd_string(EXESS_XSD_URI "float"));
+}
+
+SerdNode*
+serd_new_boolean(bool b)
+{
+ return serd_new_literal(b ? serd_string("true") : serd_string("false"),
+ SERD_HAS_DATATYPE,
+ serd_node_string_view(&serd_xsd_boolean.node));
}
SerdNode*
diff --git a/src/read_ntriples.c b/src/read_ntriples.c
index bec59c13..6822b64f 100644
--- a/src/read_ntriples.c
+++ b/src/read_ntriples.c
@@ -190,24 +190,8 @@ read_IRI(SerdReader* const reader, SerdNode** const dest)
SerdStatus
read_character(SerdReader* const reader, SerdNode* const dest, const uint8_t c)
{
- if (!(c & 0x80)) {
- switch (c) {
- case 0xA:
- case 0xD:
- dest->flags |= SERD_HAS_NEWLINE;
- break;
- case '"':
- case '\'':
- dest->flags |= SERD_HAS_QUOTE;
- break;
- default:
- break;
- }
-
- return push_byte(reader, dest, c);
- }
-
- return read_utf8_continuation(reader, dest, c);
+ return !(c & 0x80) ? push_byte(reader, dest, c)
+ : read_utf8_continuation(reader, dest, c);
}
SerdStatus
@@ -423,10 +407,8 @@ read_ECHAR(SerdReader* const reader, SerdNode* const dest)
case 'b':
return (st = skip_byte(reader, 'b')) ? st : push_byte(reader, dest, '\b');
case 'n':
- dest->flags |= SERD_HAS_NEWLINE;
return (st = skip_byte(reader, 'n')) ? st : push_byte(reader, dest, '\n');
case 'r':
- dest->flags |= SERD_HAS_NEWLINE;
return (st = skip_byte(reader, 'r')) ? st : push_byte(reader, dest, '\r');
case 'f':
return (st = skip_byte(reader, 'f')) ? st : push_byte(reader, dest, '\f');
diff --git a/src/read_turtle.c b/src/read_turtle.c
index c3970a1e..22269741 100644
--- a/src/read_turtle.c
+++ b/src/read_turtle.c
@@ -115,7 +115,6 @@ read_STRING_LITERAL_LONG(SerdReader* const reader,
push_byte(reader, ref, c);
st = read_string_escape(reader, ref);
} else {
- ref->flags |= SERD_HAS_QUOTE;
if (!(st = push_byte(reader, ref, c))) {
st = read_character(reader, ref, (uint8_t)q2);
}
@@ -151,7 +150,10 @@ read_String(SerdReader* const reader, SerdNode* const node)
return SERD_SUCCESS;
}
+ // Long string
skip_byte(reader, q3);
+ node->flags |= SERD_IS_LONG;
+
return read_STRING_LITERAL_LONG(reader, node, (uint8_t)q1);
}
diff --git a/src/string.c b/src/string.c
index 8cc839bd..ed3149d0 100644
--- a/src/string.c
+++ b/src/string.c
@@ -1,16 +1,10 @@
// Copyright 2011-2020 David Robillard <d@drobilla.net>
// SPDX-License-Identifier: ISC
-#include "string_utils.h"
-
#include "serd/memory.h"
-#include "serd/node.h"
#include "serd/status.h"
-#include "serd/string.h"
-#include <assert.h>
#include <stdlib.h>
-#include <string.h>
void
serd_free(void* const ptr)
@@ -68,50 +62,3 @@ serd_strerror(const SerdStatus status)
return "Unknown error";
}
-
-static void
-serd_update_flags(const char c, SerdNodeFlags* const flags)
-{
- switch (c) {
- case '\r':
- case '\n':
- *flags |= SERD_HAS_NEWLINE;
- break;
- case '"':
- *flags |= SERD_HAS_QUOTE;
- break;
- default:
- break;
- }
-}
-
-size_t
-serd_substrlen(const char* const str,
- const size_t len,
- SerdNodeFlags* const flags)
-{
- assert(flags);
-
- size_t i = 0;
- *flags = 0;
- for (; i < len && str[i]; ++i) {
- serd_update_flags(str[i], flags);
- }
-
- return i;
-}
-
-size_t
-serd_strlen(const char* const str, SerdNodeFlags* const flags)
-{
- if (flags) {
- size_t i = 0;
- *flags = 0;
- for (; str[i]; ++i) {
- serd_update_flags(str[i], flags);
- }
- return i;
- }
-
- return strlen(str);
-}
diff --git a/src/string_utils.h b/src/string_utils.h
index 9de03fa0..2517b270 100644
--- a/src/string_utils.h
+++ b/src/string_utils.h
@@ -4,8 +4,6 @@
#ifndef SERD_SRC_STRING_UTILS_H
#define SERD_SRC_STRING_UTILS_H
-#include "serd/node.h"
-
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
@@ -97,9 +95,6 @@ is_windows_path(const char* path)
(path[2] == '/' || path[2] == '\\');
}
-size_t
-serd_substrlen(const char* str, size_t len, SerdNodeFlags* flags);
-
static inline uint8_t
hex_digit_value(const uint8_t c)
{
diff --git a/src/uri_utils.h b/src/uri_utils.h
index 004129d2..76060d6a 100644
--- a/src/uri_utils.h
+++ b/src/uri_utils.h
@@ -4,10 +4,12 @@
#ifndef SERD_SRC_URI_UTILS_H
#define SERD_SRC_URI_UTILS_H
-#include "serd/attributes.h"
-
#include "string_utils.h"
+#include "serd/attributes.h"
+#include "serd/string_view.h"
+#include "serd/uri.h"
+
#include <stdbool.h>
#include <string.h>
diff --git a/src/writer.c b/src/writer.c
index 94c75625..329a29ad 100644
--- a/src/writer.c
+++ b/src/writer.c
@@ -778,8 +778,7 @@ write_literal(SerdWriter* const writer,
}
}
- if (supports_abbrev(writer) &&
- (node->flags & (SERD_HAS_NEWLINE | SERD_HAS_QUOTE))) {
+ if (supports_abbrev(writer) && (node->flags & SERD_IS_LONG)) {
TRY(st, esink("\"\"\"", 3, writer));
TRY(st, write_text(writer, WRITE_LONG_STRING, node_str, node->length));
TRY(st, esink("\"\"\"", 3, writer));
diff --git a/test/extra/good/manifest.ttl b/test/extra/good/manifest.ttl
index bce7c564..7c356285 100644
--- a/test/extra/good/manifest.ttl
+++ b/test/extra/good/manifest.ttl
@@ -23,10 +23,15 @@
<#test-double>
<#test-empty-path-base>
<#test-eof-at-page-end>
+ <#test-escapes>
<#test-id>
<#test-list-in-blank>
<#test-list-subject>
+ <#test-long-backspace-escape>
+ <#test-long-delete-escape>
+ <#test-long-form-feed-escape>
<#test-long-utf8>
+ <#test-long-whitespace>
<#test-no-spaces>
<#test-non-curie-uri>
<#test-prefix>
@@ -140,6 +145,12 @@
mf:name "test-eof-at-page-end" ;
mf:result <test-eof-at-page-end.nt> .
+<#test-escapes>
+ a rdft:TestTurtleEval ;
+ mf:action <test-escapes.ttl> ;
+ mf:name "test-escapes" ;
+ mf:result <test-escapes.nt> .
+
<#test-id>
a rdft:TestTurtleEval ;
mf:action <test-id.ttl> ;
@@ -158,12 +169,36 @@
mf:name "test-list-subject" ;
mf:result <test-list-subject.nt> .
+<#test-long-backspace-escape>
+ a rdft:TestTurtleEval ;
+ mf:action <test-long-backspace-escape.ttl> ;
+ mf:name "test-long-backspace-escape" ;
+ mf:result <test-long-backspace-escape.nt> .
+
+<#test-long-delete-escape>
+ a rdft:TestTurtleEval ;
+ mf:action <test-long-delete-escape.ttl> ;
+ mf:name "test-long-delete-escape" ;
+ mf:result <test-long-delete-escape.nt> .
+
+<#test-long-form-feed-escape>
+ a rdft:TestTurtleEval ;
+ mf:action <test-long-form-feed-escape.ttl> ;
+ mf:name "test-long-form-feed-escape" ;
+ mf:result <test-long-form-feed-escape.nt> .
+
<#test-long-utf8>
a rdft:TestTurtleEval ;
mf:action <test-long-utf8.ttl> ;
mf:name "test-long-utf8" ;
mf:result <test-long-utf8.nt> .
+<#test-long-whitespace>
+ a rdft:TestTurtleEval ;
+ mf:action <test-long-whitespace.ttl> ;
+ mf:name "test-long-whitespace" ;
+ mf:result <test-long-whitespace.nt> .
+
<#test-no-spaces>
a rdft:TestTurtleEval ;
mf:action <test-no-spaces.ttl> ;
diff --git a/test/extra/perfect/test-escapes.nt b/test/extra/good/test-escapes.nt
index 2780d976..2780d976 100644
--- a/test/extra/perfect/test-escapes.nt
+++ b/test/extra/good/test-escapes.nt
diff --git a/test/extra/perfect/test-escapes.ttl b/test/extra/good/test-escapes.ttl
index b8fcce7a..b8fcce7a 100644
--- a/test/extra/perfect/test-escapes.ttl
+++ b/test/extra/good/test-escapes.ttl
diff --git a/test/extra/good/test-long-backspace-escape.nt b/test/extra/good/test-long-backspace-escape.nt
new file mode 100644
index 00000000..74e8e277
--- /dev/null
+++ b/test/extra/good/test-long-backspace-escape.nt
@@ -0,0 +1 @@
+<http://example.org/eg#s> <http://example.org/eg#p> "long\n\u0008\n" .
diff --git a/test/extra/good/test-long-backspace-escape.ttl b/test/extra/good/test-long-backspace-escape.ttl
new file mode 100644
index 00000000..c4b604cb
--- /dev/null
+++ b/test/extra/good/test-long-backspace-escape.ttl
@@ -0,0 +1,4 @@
+<http://example.org/eg#s>
+ <http://example.org/eg#p> """long
+\b
+""" .
diff --git a/test/extra/good/test-long-delete-escape.nt b/test/extra/good/test-long-delete-escape.nt
new file mode 100644
index 00000000..25ed9ab3
--- /dev/null
+++ b/test/extra/good/test-long-delete-escape.nt
@@ -0,0 +1 @@
+<http://example.org/eg#s> <http://example.org/eg#p> "long\n\u007F\n" .
diff --git a/test/extra/good/test-long-delete-escape.ttl b/test/extra/good/test-long-delete-escape.ttl
new file mode 100644
index 00000000..11bc2b21
--- /dev/null
+++ b/test/extra/good/test-long-delete-escape.ttl
@@ -0,0 +1,4 @@
+<http://example.org/eg#s>
+ <http://example.org/eg#p> """long
+\u007F
+""" .
diff --git a/test/extra/good/test-long-form-feed-escape.nt b/test/extra/good/test-long-form-feed-escape.nt
new file mode 100644
index 00000000..d16c6da9
--- /dev/null
+++ b/test/extra/good/test-long-form-feed-escape.nt
@@ -0,0 +1 @@
+<http://example.org/thing> <http://example.org/label> "long\n\u000C\n" .
diff --git a/test/extra/good/test-long-form-feed-escape.ttl b/test/extra/good/test-long-form-feed-escape.ttl
new file mode 100644
index 00000000..f62ec19e
--- /dev/null
+++ b/test/extra/good/test-long-form-feed-escape.ttl
@@ -0,0 +1,4 @@
+<http://example.org/thing>
+ <http://example.org/label> """long
+
+""" .
diff --git a/test/extra/good/test-long-whitespace.nt b/test/extra/good/test-long-whitespace.nt
new file mode 100644
index 00000000..09664b37
--- /dev/null
+++ b/test/extra/good/test-long-whitespace.nt
@@ -0,0 +1,2 @@
+<http://example.org/eg#a> <http://example.org/eg#b> "\nthis \ris a \U00015678long\t\nliteral\uABCD\n" .
+<http://example.org/eg#d> <http://example.org/eg#e> "\tThis \uABCDis\r \U00015678another\n\none\n" .
diff --git a/test/extra/perfect/test-long-whitespace.ttl b/test/extra/good/test-long-whitespace.ttl
index 9c3f946c..9c3f946c 100644
--- a/test/extra/perfect/test-long-whitespace.ttl
+++ b/test/extra/good/test-long-whitespace.ttl
diff --git a/test/extra/perfect/manifest.ttl b/test/extra/perfect/manifest.ttl
index 572f55f2..66dd7547 100644
--- a/test/extra/perfect/manifest.ttl
+++ b/test/extra/perfect/manifest.ttl
@@ -10,13 +10,11 @@
<#test-decimal>
<#test-delete-escape>
<#test-empty>
- <#test-escapes>
<#test-exact-uri>
<#test-form-feed-escape>
<#test-integer>
<#test-lang>
<#test-list>
- <#test-long-whitespace>
<#test-triple>
<#test-uri-escape>
<#test-uri-query>
@@ -48,12 +46,6 @@
mf:name "test-empty" ;
mf:result <test-empty.nt> .
-<#test-escapes>
- a rdft:TestTurtleEval ;
- mf:action <test-escapes.ttl> ;
- mf:name "test-escapes" ;
- mf:result <test-escapes.nt> .
-
<#test-exact-uri>
a rdft:TestTurtleEval ;
mf:action <test-exact-uri.ttl> ;
@@ -84,12 +76,6 @@
mf:name "test-list" ;
mf:result <test-list.nt> .
-<#test-long-whitespace>
- a rdft:TestTurtleEval ;
- mf:action <test-long-whitespace.ttl> ;
- mf:name "test-long-whitespace" ;
- mf:result <test-long-whitespace.nt> .
-
<#test-triple>
a rdft:TestTurtleEval ;
mf:action <test-triple.ttl> ;
diff --git a/test/extra/perfect/test-backspace-escape.nt b/test/extra/perfect/test-backspace-escape.nt
index dd1fda93..f0b894a2 100644
--- a/test/extra/perfect/test-backspace-escape.nt
+++ b/test/extra/perfect/test-backspace-escape.nt
@@ -1,3 +1,2 @@
<http://example.org/eg#s> <http://example.org/eg#p> "\u0008 first" .
<http://example.org/eg#s> <http://example.org/eg#p> "last \u0008" .
-<http://example.org/eg#s> <http://example.org/eg#p> "long\n\u0008\n" .
diff --git a/test/extra/perfect/test-backspace-escape.ttl b/test/extra/perfect/test-backspace-escape.ttl
index a92a9252..ab9c8314 100644
--- a/test/extra/perfect/test-backspace-escape.ttl
+++ b/test/extra/perfect/test-backspace-escape.ttl
@@ -1,6 +1,3 @@
<http://example.org/eg#s>
<http://example.org/eg#p> "\b first" ,
- "last \b" ,
- """long
-\b
-""" .
+ "last \b" .
diff --git a/test/extra/perfect/test-delete-escape.nt b/test/extra/perfect/test-delete-escape.nt
index 1fd30590..f3d92525 100644
--- a/test/extra/perfect/test-delete-escape.nt
+++ b/test/extra/perfect/test-delete-escape.nt
@@ -1,3 +1,2 @@
<http://example.org/eg#s> <http://example.org/eg#p> "last \u007F" .
-<http://example.org/eg#s> <http://example.org/eg#p> "long\n\u007F\n" .
<http://example.org/eg#s> <http://example.org/eg#p> "\u007F first" .
diff --git a/test/extra/perfect/test-delete-escape.ttl b/test/extra/perfect/test-delete-escape.ttl
index a2711920..019fc47a 100644
--- a/test/extra/perfect/test-delete-escape.ttl
+++ b/test/extra/perfect/test-delete-escape.ttl
@@ -1,6 +1,3 @@
<http://example.org/eg#s>
<http://example.org/eg#p> "last \u007F" ,
- """long
-\u007F
-""" ,
"\u007F first" .
diff --git a/test/extra/perfect/test-form-feed-escape.nt b/test/extra/perfect/test-form-feed-escape.nt
index 7848ec5b..6606fb07 100644
--- a/test/extra/perfect/test-form-feed-escape.nt
+++ b/test/extra/perfect/test-form-feed-escape.nt
@@ -1,3 +1,2 @@
<http://example.org/eg#s> <http://example.org/eg#p> "\u000C first" .
<http://example.org/eg#s> <http://example.org/eg#p> "last \u000C" .
-<http://example.org/eg#s> <http://example.org/eg#p> "long\n\u000C\n" .
diff --git a/test/extra/perfect/test-form-feed-escape.ttl b/test/extra/perfect/test-form-feed-escape.ttl
index 73c78a5e..0b38971e 100644
--- a/test/extra/perfect/test-form-feed-escape.ttl
+++ b/test/extra/perfect/test-form-feed-escape.ttl
@@ -1,6 +1,3 @@
<http://example.org/eg#s>
<http://example.org/eg#p> "\f first" ,
- "last \f" ,
- """long
-
-""" .
+ "last \f" .
diff --git a/test/extra/perfect/test-long-whitespace.nt b/test/extra/perfect/test-long-whitespace.nt
deleted file mode 100644
index fca880d1..00000000
--- a/test/extra/perfect/test-long-whitespace.nt
+++ /dev/null
@@ -1,2 +0,0 @@
-<http://example.org/eg#a> <http://example.org/eg#b> "\nthis \ris a 𕙸long\t\nliteralꯍ\n" .
-<http://example.org/eg#d> <http://example.org/eg#e> "\tThis ꯍis\r 𕙸another\n\none\n" .
diff --git a/test/test_env.c b/test/test_env.c
index d2c80343..d45f2ef6 100644
--- a/test/test_env.c
+++ b/test/test_env.c
@@ -161,8 +161,10 @@ test_expand_bad_uri_datatype(void)
{
const SerdStringView type = serd_string("Type");
- SerdNode* const typed = serd_new_typed_literal(serd_string("data"), type);
- SerdEnv* const env = serd_env_new(serd_empty_string());
+ SerdNode* const typed =
+ serd_new_literal(serd_string("data"), SERD_HAS_DATATYPE, type);
+
+ SerdEnv* const env = serd_env_new(serd_empty_string());
assert(!serd_env_expand(env, typed));
diff --git a/test/test_node.c b/test/test_node.c
index 7d37cfe5..7bdc83ec 100644
--- a/test/test_node.c
+++ b/test/test_node.c
@@ -75,8 +75,8 @@ check_get_boolean(const char* string,
const char* datatype_uri,
const bool expected)
{
- SerdNode* const node =
- serd_new_typed_literal(serd_string(string), serd_string(datatype_uri));
+ SerdNode* const node = serd_new_literal(
+ serd_string(string), SERD_HAS_DATATYPE, serd_string(datatype_uri));
assert(node);
assert(serd_get_boolean(node) == expected);
@@ -162,8 +162,8 @@ check_get_double(const char* string,
const char* datatype_uri,
const double expected)
{
- SerdNode* const node =
- serd_new_typed_literal(serd_string(string), serd_string(datatype_uri));
+ SerdNode* const node = serd_new_literal(
+ serd_string(string), SERD_HAS_DATATYPE, serd_string(datatype_uri));
assert(node);
@@ -191,8 +191,8 @@ test_get_double(void)
assert(isnan(serd_get_double(nan)));
serd_node_free(nan);
- SerdNode* const invalid =
- serd_new_typed_literal(serd_string("!invalid"), serd_string(NS_XSD "long"));
+ SerdNode* const invalid = serd_new_literal(
+ serd_string("!invalid"), SERD_HAS_DATATYPE, serd_string(NS_XSD "long"));
assert(isnan(serd_get_double(invalid)));
serd_node_free(invalid);
@@ -235,8 +235,8 @@ check_get_float(const char* string,
const char* datatype_uri,
const float expected)
{
- SerdNode* const node =
- serd_new_typed_literal(serd_string(string), serd_string(datatype_uri));
+ SerdNode* const node = serd_new_literal(
+ serd_string(string), SERD_HAS_DATATYPE, serd_string(datatype_uri));
assert(node);
@@ -262,8 +262,8 @@ test_get_float(void)
assert(isnan(serd_get_float(nan)));
serd_node_free(nan);
- SerdNode* const invalid =
- serd_new_typed_literal(serd_string("!invalid"), serd_string(NS_XSD "long"));
+ SerdNode* const invalid = serd_new_literal(
+ serd_string("!invalid"), SERD_HAS_DATATYPE, serd_string(NS_XSD "long"));
assert(isnan(serd_get_double(invalid)));
@@ -300,8 +300,8 @@ check_get_integer(const char* string,
const char* datatype_uri,
const int64_t expected)
{
- SerdNode* const node =
- serd_new_typed_literal(serd_string(string), serd_string(datatype_uri));
+ SerdNode* const node = serd_new_literal(
+ serd_string(string), SERD_HAS_DATATYPE, serd_string(datatype_uri));
assert(node);
assert(serd_get_integer(node) == expected);
@@ -364,8 +364,8 @@ check_get_base64(const char* string,
const char* datatype_uri,
const char* expected)
{
- SerdNode* const node =
- serd_new_typed_literal(serd_string(string), serd_string(datatype_uri));
+ SerdNode* const node = serd_new_literal(
+ serd_string(string), SERD_HAS_DATATYPE, serd_string(datatype_uri));
assert(node);
@@ -390,8 +390,8 @@ test_get_base64(void)
check_get_base64("Zm9vYg==", NS_XSD "base64Binary", "foob");
check_get_base64(" \f\n\r\t\vZm9v \f\n\r\t\v", NS_XSD "base64Binary", "foo");
- SerdNode* const node = serd_new_typed_literal(
- serd_string("Zm9v"), serd_string(NS_XSD "base64Binary"));
+ SerdNode* const node = serd_new_literal(
+ serd_string("Zm9v"), SERD_HAS_DATATYPE, serd_string(NS_XSD "base64Binary"));
char small[2] = {0};
const SerdWriteResult r = serd_get_base64(node, sizeof(small), small);
@@ -429,7 +429,7 @@ test_node_from_string(void)
{
SerdNode* const hello = serd_new_string(serd_string("hello\""));
assert(serd_node_length(hello) == 6);
- assert(serd_node_flags(hello) == SERD_HAS_QUOTE);
+ assert(!serd_node_flags(hello));
assert(!strncmp(serd_node_string(hello), "hello\"", 6));
assert(!strcmp(serd_node_string_view(hello).data, "hello\""));
assert(serd_node_string_view(hello).length == 6);
@@ -448,7 +448,7 @@ test_node_from_substring(void)
{
SerdNode* const a_b = serd_new_string(serd_substring("a\"bc", 3));
assert(serd_node_length(a_b) == 3);
- assert(serd_node_flags(a_b) == SERD_HAS_QUOTE);
+ assert(!serd_node_flags(a_b));
assert(strlen(serd_node_string(a_b)) == 3);
assert(!strncmp(serd_node_string(a_b), "a\"b", 3));
serd_node_free(a_b);
@@ -467,46 +467,54 @@ check_copy_equals(const SerdNode* const node)
static void
test_literal(void)
{
- SerdNode* hello2 = serd_new_string(serd_string("hello\""));
+ const SerdStringView hello_str = serd_string("hello");
+ const SerdStringView empty_str = serd_empty_string();
- assert(serd_node_length(hello2) == 6 &&
- serd_node_flags(hello2) == SERD_HAS_QUOTE &&
- !strcmp(serd_node_string(hello2), "hello\""));
+ assert(!serd_new_literal(
+ hello_str, SERD_HAS_DATATYPE | SERD_HAS_LANGUAGE, serd_string("whatever")));
- check_copy_equals(hello2);
+ assert(!serd_new_literal(hello_str, SERD_HAS_DATATYPE, empty_str));
+ assert(!serd_new_literal(hello_str, SERD_HAS_LANGUAGE, empty_str));
+
+ assert(!serd_new_literal(hello_str, SERD_HAS_DATATYPE, serd_string("Type")));
+ assert(!serd_new_literal(hello_str, SERD_HAS_DATATYPE, serd_string("de")));
- SerdNode* hello3 =
- serd_new_plain_literal(serd_string("hello\""), serd_empty_string());
+ assert(!serd_new_literal(hello_str, SERD_HAS_LANGUAGE, serd_string("3n")));
+ assert(!serd_new_literal(hello_str, SERD_HAS_LANGUAGE, serd_string("d3")));
+ assert(!serd_new_literal(hello_str, SERD_HAS_LANGUAGE, serd_string("d3")));
+ assert(!serd_new_literal(hello_str, SERD_HAS_LANGUAGE, serd_string("en-!")));
- assert(serd_node_equals(hello2, hello3));
+ SerdNode* hello2 = serd_new_string(serd_string("hello\""));
- SerdNode* hello4 =
- serd_new_typed_literal(serd_string("hello\""), serd_empty_string());
+ assert(serd_node_length(hello2) == 6 &&
+ !strcmp(serd_node_string(hello2), "hello\""));
- assert(!serd_new_typed_literal(serd_string("plain"),
- serd_string(NS_RDF "langString")));
+ check_copy_equals(hello2);
- assert(serd_node_equals(hello4, hello2));
+ assert(!serd_new_literal(
+ serd_string("plain"), SERD_HAS_DATATYPE, serd_string(NS_RDF "langString")));
- serd_node_free(hello4);
- serd_node_free(hello3);
serd_node_free(hello2);
- const char* lang_lit_str = "\"Hello\"@en";
- SerdNode* sliced_lang_lit = serd_new_plain_literal(
- serd_substring(lang_lit_str + 1, 5), serd_substring(lang_lit_str + 8, 2));
+ const char* lang_lit_str = "\"Hello\"@en-ca";
+ SerdNode* sliced_lang_lit =
+ serd_new_literal(serd_substring(lang_lit_str + 1, 5),
+ SERD_HAS_LANGUAGE,
+ serd_substring(lang_lit_str + 8, 5));
assert(!strcmp(serd_node_string(sliced_lang_lit), "Hello"));
const SerdNode* const lang = serd_node_language(sliced_lang_lit);
assert(lang);
- assert(!strcmp(serd_node_string(lang), "en"));
+ assert(!strcmp(serd_node_string(lang), "en-ca"));
check_copy_equals(sliced_lang_lit);
serd_node_free(sliced_lang_lit);
- const char* type_lit_str = "\"Hallo\"^^<http://example.org/Greeting>";
- SerdNode* sliced_type_lit = serd_new_typed_literal(
- serd_substring(type_lit_str + 1, 5), serd_substring(type_lit_str + 10, 27));
+ const char* type_lit_str = "\"Hallo\"^^<http://example.org/Greeting>";
+ SerdNode* sliced_type_lit =
+ serd_new_literal(serd_substring(type_lit_str + 1, 5),
+ SERD_HAS_DATATYPE,
+ serd_substring(type_lit_str + 10, 27));
assert(!strcmp(serd_node_string(sliced_type_lit), "Hallo"));
@@ -514,11 +522,6 @@ test_literal(void)
assert(datatype);
assert(!strcmp(serd_node_string(datatype), "http://example.org/Greeting"));
serd_node_free(sliced_type_lit);
-
- SerdNode* const plain_lit =
- serd_new_plain_literal(serd_string("Plain"), serd_empty_string());
- assert(!strcmp(serd_node_string(plain_lit), "Plain"));
- serd_node_free(plain_lit);
}
static void
@@ -537,17 +540,17 @@ test_compare(void)
SerdNode* xsd_short =
serd_new_uri(serd_string("http://www.w3.org/2001/XMLSchema#short"));
- SerdNode* angst =
- serd_new_plain_literal(serd_string("angst"), serd_empty_string());
+ SerdNode* angst = serd_new_string(serd_string("angst"));
- SerdNode* angst_de =
- serd_new_plain_literal(serd_string("angst"), serd_string("de"));
+ SerdNode* angst_de = serd_new_literal(
+ serd_string("angst"), SERD_HAS_LANGUAGE, serd_string("de"));
- SerdNode* angst_en =
- serd_new_plain_literal(serd_string("angst"), serd_string("en"));
+ assert(angst_de);
+ SerdNode* angst_en = serd_new_literal(
+ serd_string("angst"), SERD_HAS_LANGUAGE, serd_string("en"));
- SerdNode* hallo =
- serd_new_plain_literal(serd_string("Hallo"), serd_string("de"));
+ SerdNode* hallo = serd_new_literal(
+ serd_string("Hallo"), SERD_HAS_LANGUAGE, serd_string("de"));
SerdNode* hello = serd_new_string(serd_string("Hello"));
SerdNode* universe = serd_new_string(serd_string("Universe"));
@@ -555,11 +558,14 @@ test_compare(void)
SerdNode* blank = serd_new_blank(serd_string("b1"));
SerdNode* uri = serd_new_uri(serd_string("http://example.org/"));
- SerdNode* aardvark = serd_new_typed_literal(
- serd_string("alex"), serd_string("http://example.org/Aardvark"));
+ SerdNode* aardvark =
+ serd_new_literal(serd_string("alex"),
+ SERD_HAS_DATATYPE,
+ serd_string("http://example.org/Aardvark"));
- SerdNode* badger = serd_new_typed_literal(
- serd_string("bobby"), serd_string("http://example.org/Badger"));
+ SerdNode* badger = serd_new_literal(serd_string("bobby"),
+ SERD_HAS_DATATYPE,
+ serd_string("http://example.org/Badger"));
// Types are ordered according to their SerdNodeType (more or less arbitrary)
assert(serd_node_compare(hello, uri) < 0);
diff --git a/test/test_reader_writer.c b/test/test_reader_writer.c
index aa820fa6..df2c3957 100644
--- a/test/test_reader_writer.c
+++ b/test/test_reader_writer.c
@@ -179,8 +179,11 @@ test_writer(const char* const path)
const SerdStringView en = serd_string("en");
SerdNode* const o = serd_new_string(serd_string("o"));
- SerdNode* const t = serd_new_typed_literal(serd_string("t"), urn_Type);
- SerdNode* const l = serd_new_plain_literal(serd_string("l"), en);
+
+ SerdNode* const t =
+ serd_new_literal(serd_string("t"), SERD_HAS_DATATYPE, urn_Type);
+
+ SerdNode* const l = serd_new_literal(serd_string("l"), SERD_HAS_LANGUAGE, en);
const SerdNode* good[][3] = {{s, p, o}, {s, p, t}, {s, p, l}};
diff --git a/test/test_string.c b/test/test_string.c
index b551b9fe..5205cc9c 100644
--- a/test/test_string.c
+++ b/test/test_string.c
@@ -3,28 +3,13 @@
#undef NDEBUG
-#include "serd/node.h"
#include "serd/status.h"
-#include "serd/string.h"
#include "zix/attributes.h"
#include <assert.h>
-#include <stdint.h>
-#include <stdio.h>
#include <string.h>
static void
-test_strlen(void)
-{
- const uint8_t str[] = {'"', '5', 0xE2, 0x82, 0xAC, '"', '\n', 0};
-
- SerdNodeFlags flags = 0;
- size_t n_bytes = serd_strlen((const char*)str, &flags);
- assert(n_bytes == 7 && flags == (SERD_HAS_QUOTE | SERD_HAS_NEWLINE));
- assert(serd_strlen((const char*)str, NULL) == 7);
-}
-
-static void
test_strerror(void)
{
const char* msg = serd_strerror(SERD_SUCCESS);
@@ -41,9 +26,7 @@ test_strerror(void)
ZIX_PURE_FUNC int
main(void)
{
- test_strlen();
test_strerror();
- printf("Success\n");
return 0;
}
diff --git a/test/test_writer.c b/test/test_writer.c
index 2a852562..9ce69c55 100644
--- a/test/test_writer.c
+++ b/test/test_writer.c
@@ -110,8 +110,10 @@ test_write_long_literal(void)
SerdNode* s = serd_new_uri(serd_string("http://example.org/s"));
SerdNode* p = serd_new_uri(serd_string("http://example.org/p"));
- SerdNode* o = serd_new_string(serd_string("hello \"\"\"world\"\"\"!"));
+ SerdNode* o = serd_new_literal(
+ serd_string("hello \"\"\"world\"\"\"!"), SERD_IS_LONG, serd_empty_string());
+ assert(serd_node_flags(o) & SERD_IS_LONG);
assert(!serd_sink_write(serd_writer_sink(writer), 0, s, p, o, NULL));
serd_node_free(o);