From c579186c5dd4e11bffddd353cef8978a66ef9c10 Mon Sep 17 00:00:00 2001 From: David Robillard Date: Sat, 20 Feb 2021 10:11:33 -0500 Subject: WIP: Simplify node construction --- bindings/cpp/include/serd/serd.hpp | 7 ++-- bindings/cpp/test/test_serd_hpp.cpp | 3 +- include/serd/serd.h | 30 ++++++--------- src/n3.c | 25 +++--------- src/node.c | 76 +++++++++---------------------------- src/string.c | 46 ---------------------- src/string_utils.h | 3 -- src/writer.c | 3 +- test/test_node.c | 10 ----- test/test_string.c | 12 ------ 10 files changed, 38 insertions(+), 177 deletions(-) diff --git a/bindings/cpp/include/serd/serd.hpp b/bindings/cpp/include/serd/serd.hpp index 3e90e6b1..95abcc04 100644 --- a/bindings/cpp/include/serd/serd.hpp +++ b/bindings/cpp/include/serd/serd.hpp @@ -128,10 +128,9 @@ enum class NodeType { /// @copydoc SerdNodeFlag enum class NodeFlag { - has_newline = SERD_HAS_NEWLINE, ///< @copydoc SERD_HAS_NEWLINE - has_quote = SERD_HAS_QUOTE, ///< @copydoc SERD_HAS_QUOTE - has_datatype = SERD_HAS_DATATYPE, ///< @copydoc SERD_HAS_DATATYPE - has_language = SERD_HAS_LANGUAGE ///< @copydoc SERD_HAS_LANGUAGE + is_long_literal = SERD_IS_LONG_LITERAL, ///< @copydoc SERD_IS_LONG_LITERAL + has_datatype = SERD_HAS_DATATYPE, ///< @copydoc SERD_HAS_DATATYPE + has_language = SERD_HAS_LANGUAGE ///< @copydoc SERD_HAS_LANGUAGE }; /// Bitwise OR of #NodeFlag values diff --git a/bindings/cpp/test/test_serd_hpp.cpp b/bindings/cpp/test/test_serd_hpp.cpp index 24e663fa..e3721e09 100644 --- a/bindings/cpp/test/test_serd_hpp.cpp +++ b/bindings/cpp/test/test_serd_hpp.cpp @@ -342,8 +342,7 @@ test_nodes() "http://example.org/rel/uri"); const auto string = serd::make_string("hello\n\"world\""); - assert(string.flags() == - (serd::NodeFlag::has_newline | serd::NodeFlag::has_quote)); + assert(!string.flags()); const auto number = serd::make_integer(42); assert(number.flags() == serd::NodeFlag::has_datatype); diff --git a/include/serd/serd.h b/include/serd/serd.h index 68dec4f3..88d3178b 100644 --- a/include/serd/serd.h +++ b/include/serd/serd.h @@ -231,10 +231,9 @@ typedef enum { /// Flags indicating certain string properties relevant to serialisation typedef enum { - SERD_HAS_NEWLINE = 1u << 0u, ///< Contains line breaks ('\\n' or '\\r') - SERD_HAS_QUOTE = 1u << 1u, ///< Contains quotes ('"') - SERD_HAS_DATATYPE = 1u << 2u, ///< Literal node has datatype - SERD_HAS_LANGUAGE = 1u << 3u ///< Literal node has language + SERD_IS_LONG_LITERAL = 1u << 1u, ///< Literal node uses triple quotes + SERD_HAS_DATATYPE = 1u << 2u, ///< Literal node has datatype + SERD_HAS_LANGUAGE = 1u << 3u, ///< Literal node has language } SerdNodeFlag; /// Bitwise OR of SerdNodeFlag values @@ -386,17 +385,6 @@ SERD_CONST_API const char* SERD_NONNULL serd_strerror(SerdStatus status); -/** - Measure a UTF-8 string. - - @return Length of `str` in bytes. - @param str A null-terminated UTF-8 string. - @param flags (Output) Set to the applicable flags. -*/ -SERD_API -size_t -serd_strlen(const char* SERD_NONNULL str, SerdNodeFlags* SERD_NULLABLE flags); - /** Decode a base64 string. @@ -790,10 +778,14 @@ serd_node_to_syntax(const SerdNode* SERD_NONNULL node, SerdSyntax syntax); /** Create a new "simple" node that is just a string. - This can be used to create blank, CURIE, or URI nodes from an already - measured string or slice of a buffer, which avoids a strlen compared to the - friendly constructors. This may not be used for literals since those must - be measured to set the SERD_HAS_NEWLINE and SERD_HAS_QUOTE flags. + This can be used to create nodes from an already measured string or slice of + a buffer, which avoids measuring the string compared to the friendlier + constructors. If `type` is #SERD_LITERAL, then this creates a plain + literal with no language tag. + + @param type The type of node to create. + + @param string The string contents of the node. */ SERD_API SerdNode* SERD_ALLOCATED diff --git a/src/n3.c b/src/n3.c index edc0f52b..dd1b5168 100644 --- a/src/n3.c +++ b/src/n3.c @@ -155,11 +155,9 @@ read_ECHAR(SerdReader* reader, SerdNode* dest) eat_byte_safe(reader, 'b'); return push_byte(reader, dest, '\b'); case 'n': - dest->flags |= SERD_HAS_NEWLINE; eat_byte_safe(reader, 'n'); return push_byte(reader, dest, '\n'); case 'r': - dest->flags |= SERD_HAS_NEWLINE; eat_byte_safe(reader, 'r'); return push_byte(reader, dest, '\r'); case 'f': @@ -248,23 +246,8 @@ read_utf8_code(SerdReader* reader, SerdNode* dest, uint32_t* code, uint8_t c) static inline SerdStatus read_character(SerdReader* reader, SerdNode* dest, uint8_t c) { - if (!(c & 0x80)) { - switch (c) { - case 0xA: - case 0xD: - dest->flags |= SERD_HAS_NEWLINE; - break; - case '"': - case '\'': - dest->flags |= SERD_HAS_QUOTE; - break; - default: - break; - } - - return push_byte(reader, dest, c); - } - return read_utf8_character(reader, dest, c); + return (c & 0x80) ? read_utf8_character(reader, dest, c) + : push_byte(reader, dest, c); } // [10] comment ::= '#' ( [^#xA #xD] )* @@ -350,7 +333,7 @@ read_STRING_LITERAL_LONG(SerdReader* reader, SerdNode* ref, uint8_t q) eat_byte_safe(reader, q3); break; } - ref->flags |= SERD_HAS_QUOTE; + push_byte(reader, ref, c); st = read_character(reader, ref, (uint8_t)q2); } else { @@ -436,6 +419,8 @@ read_String(SerdReader* reader, SerdNode* node) } eat_byte_safe(reader, q3); + node->flags |= SERD_IS_LONG_LITERAL; + return read_STRING_LITERAL_LONG(reader, node, (uint8_t)q1); } diff --git a/src/node.c b/src/node.c index 0283b14f..606789c9 100644 --- a/src/node.c +++ b/src/node.c @@ -117,9 +117,8 @@ serd_node_malloc(size_t n_bytes, SerdNodeFlags flags, SerdNodeType type) const size_t size = sizeof(SerdNode) + serd_node_pad_size(n_bytes); SerdNode* node = (SerdNode*)serd_calloc_aligned(sizeof(SerdNode), size); - node->n_bytes = 0; - node->flags = flags; - node->type = type; + node->flags = flags; + node->type = type; assert((uintptr_t)node % sizeof(SerdNode) == 0u); return node; @@ -165,17 +164,10 @@ serd_node_zero_pad(SerdNode* node) SerdNode* serd_new_simple_node(SerdNodeType type, const SerdStringView str) { - if (type != SERD_BLANK && type != SERD_CURIE && type != SERD_URI && - type != SERD_VARIABLE) { - return NULL; - } - - SerdNodeFlags flags = 0; - const size_t n_bytes = str.buf ? serd_strlen(str.buf, &flags) : 0; - SerdNode* node = serd_node_malloc(n_bytes, flags, type); + SerdNode* const node = serd_node_malloc(str.len, 0, type); - memcpy(serd_node_buffer(node), str.buf, n_bytes); - node->n_bytes = n_bytes; + memcpy(serd_node_buffer(node), str.buf, str.len); + node->n_bytes = str.len; serd_node_check_padding(node); return node; @@ -184,32 +176,28 @@ serd_new_simple_node(SerdNodeType type, const SerdStringView str) SerdNode* serd_new_string(const SerdStringView str) { - SerdNodeFlags flags = 0; - const size_t n_bytes = serd_substrlen(str.buf, str.len, &flags); - SerdNode* node = serd_node_malloc(n_bytes, flags, SERD_LITERAL); + SerdNodeFlags flags = 0; + SerdNode* node = serd_node_malloc(str.len, flags, SERD_LITERAL); memcpy(serd_node_buffer(node), str.buf, str.len); - node->n_bytes = n_bytes; + node->n_bytes = str.len; serd_node_check_padding(node); return node; } -/// Internal pre-measured implementation of serd_new_plain_literal -static SerdNode* -serd_new_plain_literal_i(const SerdStringView str, - SerdNodeFlags flags, - const SerdStringView lang) +SerdNode* +serd_new_plain_literal(const SerdStringView str, const SerdStringView lang) { - assert(str.len); - assert(lang.len); - - flags |= SERD_HAS_LANGUAGE; + if (!lang.len) { + return serd_new_string(str); + } const size_t len = serd_node_pad_size(str.len); const size_t total_len = len + sizeof(SerdNode) + lang.len; - SerdNode* node = serd_node_malloc(total_len, flags, SERD_LITERAL); + SerdNode* node = serd_node_malloc(total_len, SERD_HAS_LANGUAGE, SERD_LITERAL); + memcpy(serd_node_buffer(node), str.buf, str.len); node->n_bytes = str.len; @@ -287,34 +275,6 @@ serd_new_typed_literal_uri(const SerdStringView str, return node; } -/// Internal pre-measured implementation of serd_new_typed_literal -static SerdNode* -serd_new_typed_literal_i(const SerdStringView str, - SerdNodeFlags flags, - SerdNodeType datatype_type, - const SerdStringView datatype) -{ - assert(str.len); - assert(datatype.len); - assert(strcmp(datatype.buf, NS_RDF "langString")); - - return serd_new_typed_literal_expanded( - str, flags, datatype_type, datatype, SERD_EMPTY_STRING()); -} - -SerdNode* -serd_new_plain_literal(const SerdStringView str, const SerdStringView lang) -{ - if (!lang.len) { - return serd_new_string(str); - } - - SerdNodeFlags flags = 0; - serd_strlen(str.buf, &flags); - - return serd_new_plain_literal_i(str, flags, lang); -} - SerdNode* serd_new_typed_literal(const SerdStringView str, const SerdStringView datatype_uri) @@ -327,10 +287,8 @@ serd_new_typed_literal(const SerdStringView str, return NULL; } - SerdNodeFlags flags = 0; - serd_strlen(str.buf, &flags); - - return serd_new_typed_literal_i(str, flags, SERD_URI, datatype_uri); + return serd_new_typed_literal_expanded( + str, 0, SERD_URI, datatype_uri, SERD_EMPTY_STRING()); } SerdNode* diff --git a/src/string.c b/src/string.c index f31dfe0b..d9a0140c 100644 --- a/src/string.c +++ b/src/string.c @@ -75,49 +75,3 @@ serd_strerror(SerdStatus status) } return "Unknown error"; // never reached } - -static inline void -serd_update_flags(const char c, SerdNodeFlags* const flags) -{ - switch (c) { - case '\r': - case '\n': - *flags |= SERD_HAS_NEWLINE; - break; - case '"': - *flags |= SERD_HAS_QUOTE; - default: - break; - } -} - -size_t -serd_substrlen(const char* const str, - const size_t len, - SerdNodeFlags* const flags) -{ - assert(flags); - - size_t i = 0; - *flags = 0; - for (; i < len && str[i]; ++i) { - serd_update_flags(str[i], flags); - } - - return i; -} - -size_t -serd_strlen(const char* str, SerdNodeFlags* flags) -{ - if (flags) { - size_t i = 0; - *flags = 0; - for (; str[i]; ++i) { - serd_update_flags(str[i], flags); - } - return i; - } - - return strlen(str); -} diff --git a/src/string_utils.h b/src/string_utils.h index a302bc49..cad800b9 100644 --- a/src/string_utils.h +++ b/src/string_utils.h @@ -96,9 +96,6 @@ is_windows_path(const char* path) (path[2] == '/' || path[2] == '\\'); } -size_t -serd_substrlen(const char* str, size_t len, SerdNodeFlags* flags); - static inline char serd_to_upper(const char c) { diff --git a/src/writer.c b/src/writer.c index d15320ae..566c6fde 100644 --- a/src/writer.c +++ b/src/writer.c @@ -694,8 +694,7 @@ write_literal(SerdWriter* writer, } SerdStatus st = SERD_SUCCESS; - if (supports_abbrev(writer) && - (node->flags & (SERD_HAS_NEWLINE | SERD_HAS_QUOTE))) { + if (supports_abbrev(writer) && (node->flags & SERD_IS_LONG_LITERAL)) { TRY(st, esink("\"\"\"", 3, writer)); TRY(st, write_text(writer, WRITE_LONG_STRING, node_str, node->n_bytes)); TRY(st, esink("\"\"\"", 3, writer)); diff --git a/test/test_node.c b/test/test_node.c index f942f242..f484f9a9 100644 --- a/test/test_node.c +++ b/test/test_node.c @@ -302,7 +302,6 @@ test_node_from_syntax(void) { SerdNode* const hello = serd_new_string(SERD_STATIC_STRING("hello\"")); assert(serd_node_length(hello) == 6); - assert(serd_node_flags(hello) == SERD_HAS_QUOTE); assert(!strncmp(serd_node_string(hello), "hello\"", 6)); serd_node_free(hello); } @@ -312,25 +311,17 @@ test_node_from_substring(void) { SerdNode* const a_b = serd_new_string(SERD_STRING_VIEW("a\"bc", 3)); assert(serd_node_length(a_b) == 3); - assert(serd_node_flags(a_b) == SERD_HAS_QUOTE); assert(strlen(serd_node_string(a_b)) == 3); assert(!strncmp(serd_node_string(a_b), "a\"b", 3)); serd_node_free(a_b); } -static void -test_simple_node(void) -{ - assert(!serd_new_simple_node(SERD_LITERAL, SERD_STATIC_STRING("Literal"))); -} - static void test_literal(void) { SerdNode* hello2 = serd_new_string(SERD_STATIC_STRING("hello\"")); assert(serd_node_length(hello2) == 6 && - serd_node_flags(hello2) == SERD_HAS_QUOTE && !strcmp(serd_node_string(hello2), "hello\"")); SerdNode* hello3 = @@ -402,7 +393,6 @@ main(void) test_node_equals(); test_node_from_syntax(); test_node_from_substring(); - test_simple_node(); test_literal(); test_blank(); diff --git a/test/test_string.c b/test/test_string.c index 576f2f96..a4c20506 100644 --- a/test/test_string.c +++ b/test/test_string.c @@ -23,17 +23,6 @@ #include #include -static void -test_strlen(void) -{ - const uint8_t str[] = {'"', '5', 0xE2, 0x82, 0xAC, '"', '\n', 0}; - - SerdNodeFlags flags = 0; - size_t n_bytes = serd_strlen((const char*)str, &flags); - assert(n_bytes == 7 && flags == (SERD_HAS_QUOTE | SERD_HAS_NEWLINE)); - assert(serd_strlen((const char*)str, NULL) == 7); -} - static void test_strerror(void) { @@ -51,7 +40,6 @@ test_strerror(void) int main(void) { - test_strlen(); test_strerror(); printf("Success\n"); -- cgit v1.2.1