aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Robillard <d@drobilla.net>2021-02-20 10:11:33 -0500
committerDavid Robillard <d@drobilla.net>2021-03-08 23:36:35 -0500
commitc579186c5dd4e11bffddd353cef8978a66ef9c10 (patch)
treeb89f1059d161872ded394a5c620a3b4f156f7972
parentc7a827b07898551611d43f92cf4e2fa53595957a (diff)
downloadserd-c579186c5dd4e11bffddd353cef8978a66ef9c10.tar.gz
serd-c579186c5dd4e11bffddd353cef8978a66ef9c10.tar.bz2
serd-c579186c5dd4e11bffddd353cef8978a66ef9c10.zip
WIP: Simplify node construction
-rw-r--r--bindings/cpp/include/serd/serd.hpp7
-rw-r--r--bindings/cpp/test/test_serd_hpp.cpp3
-rw-r--r--include/serd/serd.h30
-rw-r--r--src/n3.c25
-rw-r--r--src/node.c76
-rw-r--r--src/string.c46
-rw-r--r--src/string_utils.h3
-rw-r--r--src/writer.c3
-rw-r--r--test/test_node.c10
-rw-r--r--test/test_string.c12
10 files changed, 38 insertions, 177 deletions
diff --git a/bindings/cpp/include/serd/serd.hpp b/bindings/cpp/include/serd/serd.hpp
index 3e90e6b1..95abcc04 100644
--- a/bindings/cpp/include/serd/serd.hpp
+++ b/bindings/cpp/include/serd/serd.hpp
@@ -128,10 +128,9 @@ enum class NodeType {
/// @copydoc SerdNodeFlag
enum class NodeFlag {
- has_newline = SERD_HAS_NEWLINE, ///< @copydoc SERD_HAS_NEWLINE
- has_quote = SERD_HAS_QUOTE, ///< @copydoc SERD_HAS_QUOTE
- has_datatype = SERD_HAS_DATATYPE, ///< @copydoc SERD_HAS_DATATYPE
- has_language = SERD_HAS_LANGUAGE ///< @copydoc SERD_HAS_LANGUAGE
+ is_long_literal = SERD_IS_LONG_LITERAL, ///< @copydoc SERD_IS_LONG_LITERAL
+ has_datatype = SERD_HAS_DATATYPE, ///< @copydoc SERD_HAS_DATATYPE
+ has_language = SERD_HAS_LANGUAGE ///< @copydoc SERD_HAS_LANGUAGE
};
/// Bitwise OR of #NodeFlag values
diff --git a/bindings/cpp/test/test_serd_hpp.cpp b/bindings/cpp/test/test_serd_hpp.cpp
index 24e663fa..e3721e09 100644
--- a/bindings/cpp/test/test_serd_hpp.cpp
+++ b/bindings/cpp/test/test_serd_hpp.cpp
@@ -342,8 +342,7 @@ test_nodes()
"http://example.org/rel/uri");
const auto string = serd::make_string("hello\n\"world\"");
- assert(string.flags() ==
- (serd::NodeFlag::has_newline | serd::NodeFlag::has_quote));
+ assert(!string.flags());
const auto number = serd::make_integer(42);
assert(number.flags() == serd::NodeFlag::has_datatype);
diff --git a/include/serd/serd.h b/include/serd/serd.h
index 68dec4f3..88d3178b 100644
--- a/include/serd/serd.h
+++ b/include/serd/serd.h
@@ -231,10 +231,9 @@ typedef enum {
/// Flags indicating certain string properties relevant to serialisation
typedef enum {
- SERD_HAS_NEWLINE = 1u << 0u, ///< Contains line breaks ('\\n' or '\\r')
- SERD_HAS_QUOTE = 1u << 1u, ///< Contains quotes ('"')
- SERD_HAS_DATATYPE = 1u << 2u, ///< Literal node has datatype
- SERD_HAS_LANGUAGE = 1u << 3u ///< Literal node has language
+ SERD_IS_LONG_LITERAL = 1u << 1u, ///< Literal node uses triple quotes
+ SERD_HAS_DATATYPE = 1u << 2u, ///< Literal node has datatype
+ SERD_HAS_LANGUAGE = 1u << 3u, ///< Literal node has language
} SerdNodeFlag;
/// Bitwise OR of SerdNodeFlag values
@@ -387,17 +386,6 @@ const char* SERD_NONNULL
serd_strerror(SerdStatus status);
/**
- Measure a UTF-8 string.
-
- @return Length of `str` in bytes.
- @param str A null-terminated UTF-8 string.
- @param flags (Output) Set to the applicable flags.
-*/
-SERD_API
-size_t
-serd_strlen(const char* SERD_NONNULL str, SerdNodeFlags* SERD_NULLABLE flags);
-
-/**
Decode a base64 string.
This function can be used to deserialise a blob node created with
@@ -790,10 +778,14 @@ serd_node_to_syntax(const SerdNode* SERD_NONNULL node, SerdSyntax syntax);
/**
Create a new "simple" node that is just a string.
- This can be used to create blank, CURIE, or URI nodes from an already
- measured string or slice of a buffer, which avoids a strlen compared to the
- friendly constructors. This may not be used for literals since those must
- be measured to set the SERD_HAS_NEWLINE and SERD_HAS_QUOTE flags.
+ This can be used to create nodes from an already measured string or slice of
+ a buffer, which avoids measuring the string compared to the friendlier
+ constructors. If `type` is #SERD_LITERAL, then this creates a plain
+ literal with no language tag.
+
+ @param type The type of node to create.
+
+ @param string The string contents of the node.
*/
SERD_API
SerdNode* SERD_ALLOCATED
diff --git a/src/n3.c b/src/n3.c
index edc0f52b..dd1b5168 100644
--- a/src/n3.c
+++ b/src/n3.c
@@ -155,11 +155,9 @@ read_ECHAR(SerdReader* reader, SerdNode* dest)
eat_byte_safe(reader, 'b');
return push_byte(reader, dest, '\b');
case 'n':
- dest->flags |= SERD_HAS_NEWLINE;
eat_byte_safe(reader, 'n');
return push_byte(reader, dest, '\n');
case 'r':
- dest->flags |= SERD_HAS_NEWLINE;
eat_byte_safe(reader, 'r');
return push_byte(reader, dest, '\r');
case 'f':
@@ -248,23 +246,8 @@ read_utf8_code(SerdReader* reader, SerdNode* dest, uint32_t* code, uint8_t c)
static inline SerdStatus
read_character(SerdReader* reader, SerdNode* dest, uint8_t c)
{
- if (!(c & 0x80)) {
- switch (c) {
- case 0xA:
- case 0xD:
- dest->flags |= SERD_HAS_NEWLINE;
- break;
- case '"':
- case '\'':
- dest->flags |= SERD_HAS_QUOTE;
- break;
- default:
- break;
- }
-
- return push_byte(reader, dest, c);
- }
- return read_utf8_character(reader, dest, c);
+ return (c & 0x80) ? read_utf8_character(reader, dest, c)
+ : push_byte(reader, dest, c);
}
// [10] comment ::= '#' ( [^#xA #xD] )*
@@ -350,7 +333,7 @@ read_STRING_LITERAL_LONG(SerdReader* reader, SerdNode* ref, uint8_t q)
eat_byte_safe(reader, q3);
break;
}
- ref->flags |= SERD_HAS_QUOTE;
+
push_byte(reader, ref, c);
st = read_character(reader, ref, (uint8_t)q2);
} else {
@@ -436,6 +419,8 @@ read_String(SerdReader* reader, SerdNode* node)
}
eat_byte_safe(reader, q3);
+ node->flags |= SERD_IS_LONG_LITERAL;
+
return read_STRING_LITERAL_LONG(reader, node, (uint8_t)q1);
}
diff --git a/src/node.c b/src/node.c
index 0283b14f..606789c9 100644
--- a/src/node.c
+++ b/src/node.c
@@ -117,9 +117,8 @@ serd_node_malloc(size_t n_bytes, SerdNodeFlags flags, SerdNodeType type)
const size_t size = sizeof(SerdNode) + serd_node_pad_size(n_bytes);
SerdNode* node = (SerdNode*)serd_calloc_aligned(sizeof(SerdNode), size);
- node->n_bytes = 0;
- node->flags = flags;
- node->type = type;
+ node->flags = flags;
+ node->type = type;
assert((uintptr_t)node % sizeof(SerdNode) == 0u);
return node;
@@ -165,17 +164,10 @@ serd_node_zero_pad(SerdNode* node)
SerdNode*
serd_new_simple_node(SerdNodeType type, const SerdStringView str)
{
- if (type != SERD_BLANK && type != SERD_CURIE && type != SERD_URI &&
- type != SERD_VARIABLE) {
- return NULL;
- }
-
- SerdNodeFlags flags = 0;
- const size_t n_bytes = str.buf ? serd_strlen(str.buf, &flags) : 0;
- SerdNode* node = serd_node_malloc(n_bytes, flags, type);
+ SerdNode* const node = serd_node_malloc(str.len, 0, type);
- memcpy(serd_node_buffer(node), str.buf, n_bytes);
- node->n_bytes = n_bytes;
+ memcpy(serd_node_buffer(node), str.buf, str.len);
+ node->n_bytes = str.len;
serd_node_check_padding(node);
return node;
@@ -184,32 +176,28 @@ serd_new_simple_node(SerdNodeType type, const SerdStringView str)
SerdNode*
serd_new_string(const SerdStringView str)
{
- SerdNodeFlags flags = 0;
- const size_t n_bytes = serd_substrlen(str.buf, str.len, &flags);
- SerdNode* node = serd_node_malloc(n_bytes, flags, SERD_LITERAL);
+ SerdNodeFlags flags = 0;
+ SerdNode* node = serd_node_malloc(str.len, flags, SERD_LITERAL);
memcpy(serd_node_buffer(node), str.buf, str.len);
- node->n_bytes = n_bytes;
+ node->n_bytes = str.len;
serd_node_check_padding(node);
return node;
}
-/// Internal pre-measured implementation of serd_new_plain_literal
-static SerdNode*
-serd_new_plain_literal_i(const SerdStringView str,
- SerdNodeFlags flags,
- const SerdStringView lang)
+SerdNode*
+serd_new_plain_literal(const SerdStringView str, const SerdStringView lang)
{
- assert(str.len);
- assert(lang.len);
-
- flags |= SERD_HAS_LANGUAGE;
+ if (!lang.len) {
+ return serd_new_string(str);
+ }
const size_t len = serd_node_pad_size(str.len);
const size_t total_len = len + sizeof(SerdNode) + lang.len;
- SerdNode* node = serd_node_malloc(total_len, flags, SERD_LITERAL);
+ SerdNode* node = serd_node_malloc(total_len, SERD_HAS_LANGUAGE, SERD_LITERAL);
+
memcpy(serd_node_buffer(node), str.buf, str.len);
node->n_bytes = str.len;
@@ -287,34 +275,6 @@ serd_new_typed_literal_uri(const SerdStringView str,
return node;
}
-/// Internal pre-measured implementation of serd_new_typed_literal
-static SerdNode*
-serd_new_typed_literal_i(const SerdStringView str,
- SerdNodeFlags flags,
- SerdNodeType datatype_type,
- const SerdStringView datatype)
-{
- assert(str.len);
- assert(datatype.len);
- assert(strcmp(datatype.buf, NS_RDF "langString"));
-
- return serd_new_typed_literal_expanded(
- str, flags, datatype_type, datatype, SERD_EMPTY_STRING());
-}
-
-SerdNode*
-serd_new_plain_literal(const SerdStringView str, const SerdStringView lang)
-{
- if (!lang.len) {
- return serd_new_string(str);
- }
-
- SerdNodeFlags flags = 0;
- serd_strlen(str.buf, &flags);
-
- return serd_new_plain_literal_i(str, flags, lang);
-}
-
SerdNode*
serd_new_typed_literal(const SerdStringView str,
const SerdStringView datatype_uri)
@@ -327,10 +287,8 @@ serd_new_typed_literal(const SerdStringView str,
return NULL;
}
- SerdNodeFlags flags = 0;
- serd_strlen(str.buf, &flags);
-
- return serd_new_typed_literal_i(str, flags, SERD_URI, datatype_uri);
+ return serd_new_typed_literal_expanded(
+ str, 0, SERD_URI, datatype_uri, SERD_EMPTY_STRING());
}
SerdNode*
diff --git a/src/string.c b/src/string.c
index f31dfe0b..d9a0140c 100644
--- a/src/string.c
+++ b/src/string.c
@@ -75,49 +75,3 @@ serd_strerror(SerdStatus status)
}
return "Unknown error"; // never reached
}
-
-static inline void
-serd_update_flags(const char c, SerdNodeFlags* const flags)
-{
- switch (c) {
- case '\r':
- case '\n':
- *flags |= SERD_HAS_NEWLINE;
- break;
- case '"':
- *flags |= SERD_HAS_QUOTE;
- default:
- break;
- }
-}
-
-size_t
-serd_substrlen(const char* const str,
- const size_t len,
- SerdNodeFlags* const flags)
-{
- assert(flags);
-
- size_t i = 0;
- *flags = 0;
- for (; i < len && str[i]; ++i) {
- serd_update_flags(str[i], flags);
- }
-
- return i;
-}
-
-size_t
-serd_strlen(const char* str, SerdNodeFlags* flags)
-{
- if (flags) {
- size_t i = 0;
- *flags = 0;
- for (; str[i]; ++i) {
- serd_update_flags(str[i], flags);
- }
- return i;
- }
-
- return strlen(str);
-}
diff --git a/src/string_utils.h b/src/string_utils.h
index a302bc49..cad800b9 100644
--- a/src/string_utils.h
+++ b/src/string_utils.h
@@ -96,9 +96,6 @@ is_windows_path(const char* path)
(path[2] == '/' || path[2] == '\\');
}
-size_t
-serd_substrlen(const char* str, size_t len, SerdNodeFlags* flags);
-
static inline char
serd_to_upper(const char c)
{
diff --git a/src/writer.c b/src/writer.c
index d15320ae..566c6fde 100644
--- a/src/writer.c
+++ b/src/writer.c
@@ -694,8 +694,7 @@ write_literal(SerdWriter* writer,
}
SerdStatus st = SERD_SUCCESS;
- if (supports_abbrev(writer) &&
- (node->flags & (SERD_HAS_NEWLINE | SERD_HAS_QUOTE))) {
+ if (supports_abbrev(writer) && (node->flags & SERD_IS_LONG_LITERAL)) {
TRY(st, esink("\"\"\"", 3, writer));
TRY(st, write_text(writer, WRITE_LONG_STRING, node_str, node->n_bytes));
TRY(st, esink("\"\"\"", 3, writer));
diff --git a/test/test_node.c b/test/test_node.c
index f942f242..f484f9a9 100644
--- a/test/test_node.c
+++ b/test/test_node.c
@@ -302,7 +302,6 @@ test_node_from_syntax(void)
{
SerdNode* const hello = serd_new_string(SERD_STATIC_STRING("hello\""));
assert(serd_node_length(hello) == 6);
- assert(serd_node_flags(hello) == SERD_HAS_QUOTE);
assert(!strncmp(serd_node_string(hello), "hello\"", 6));
serd_node_free(hello);
}
@@ -312,25 +311,17 @@ test_node_from_substring(void)
{
SerdNode* const a_b = serd_new_string(SERD_STRING_VIEW("a\"bc", 3));
assert(serd_node_length(a_b) == 3);
- assert(serd_node_flags(a_b) == SERD_HAS_QUOTE);
assert(strlen(serd_node_string(a_b)) == 3);
assert(!strncmp(serd_node_string(a_b), "a\"b", 3));
serd_node_free(a_b);
}
static void
-test_simple_node(void)
-{
- assert(!serd_new_simple_node(SERD_LITERAL, SERD_STATIC_STRING("Literal")));
-}
-
-static void
test_literal(void)
{
SerdNode* hello2 = serd_new_string(SERD_STATIC_STRING("hello\""));
assert(serd_node_length(hello2) == 6 &&
- serd_node_flags(hello2) == SERD_HAS_QUOTE &&
!strcmp(serd_node_string(hello2), "hello\""));
SerdNode* hello3 =
@@ -402,7 +393,6 @@ main(void)
test_node_equals();
test_node_from_syntax();
test_node_from_substring();
- test_simple_node();
test_literal();
test_blank();
diff --git a/test/test_string.c b/test/test_string.c
index 576f2f96..a4c20506 100644
--- a/test/test_string.c
+++ b/test/test_string.c
@@ -24,17 +24,6 @@
#include <string.h>
static void
-test_strlen(void)
-{
- const uint8_t str[] = {'"', '5', 0xE2, 0x82, 0xAC, '"', '\n', 0};
-
- SerdNodeFlags flags = 0;
- size_t n_bytes = serd_strlen((const char*)str, &flags);
- assert(n_bytes == 7 && flags == (SERD_HAS_QUOTE | SERD_HAS_NEWLINE));
- assert(serd_strlen((const char*)str, NULL) == 7);
-}
-
-static void
test_strerror(void)
{
const char* msg = serd_strerror(SERD_SUCCESS);
@@ -51,7 +40,6 @@ test_strerror(void)
int
main(void)
{
- test_strlen();
test_strerror();
printf("Success\n");