diff options
-rw-r--r-- | serd/serd.h | 38 | ||||
-rw-r--r-- | src/n3.c | 12 | ||||
-rw-r--r-- | src/node.c | 132 | ||||
-rw-r--r-- | src/reader.c | 2 | ||||
-rw-r--r-- | src/writer.c | 48 | ||||
-rw-r--r-- | tests/serd_test.c | 93 |
6 files changed, 244 insertions, 81 deletions
diff --git a/serd/serd.h b/serd/serd.h index 3e2bc527..6add5a78 100644 --- a/serd/serd.h +++ b/serd/serd.h @@ -202,8 +202,10 @@ typedef enum { Flags indicating certain string properties relevant to serialisation. */ typedef enum { - SERD_HAS_NEWLINE = 1, /**< Contains line breaks ('\\n' or '\\r') */ - SERD_HAS_QUOTE = 1 << 1 /**< Contains quotes ('"') */ + SERD_HAS_NEWLINE = 1, /**< Contains line breaks ('\\n' or '\\r') */ + SERD_HAS_QUOTE = 1 << 1, /**< Contains quotes ('"') */ + SERD_HAS_DATATYPE = 1 << 2, /**< Literal node has datatype */ + SERD_HAS_LANGUAGE = 1 << 3 /**< Literal node has language */ } SerdNodeFlag; /** @@ -444,6 +446,16 @@ SerdNode* serd_node_new_substring(SerdType type, const char* str, size_t len); /** + Create a new literal node from `str`. + + Either `datatype` or `lang` can be given, but not both, unless `datatype` is + rdf:langString in which case it is ignored. +*/ +SERD_API +SerdNode* +serd_node_new_literal(const char* str, const char* datatype, const char* lang); + +/** Return a deep copy of `node`. */ SERD_API @@ -586,6 +598,20 @@ size_t serd_node_get_length(const SerdNode* node); /** + Return the datatype of a literal node, or NULL. +*/ +SERD_API +const SerdNode* +serd_node_get_datatype(const SerdNode* node); + +/** + Return the language tag of a literal node, or NULL. +*/ +SERD_API +const SerdNode* +serd_node_get_language(const SerdNode* node); + +/** Return the flags (string properties) of a node. */ SERD_API @@ -644,9 +670,7 @@ typedef SerdStatus (*SerdStatementSink)(void* handle, const SerdNode* graph, const SerdNode* subject, const SerdNode* predicate, - const SerdNode* object, - const SerdNode* object_datatype, - const SerdNode* object_lang); + const SerdNode* object); /** Sink (callback) for anonymous node end markers. @@ -1035,9 +1059,7 @@ serd_writer_write_statement(SerdWriter* writer, const SerdNode* graph, const SerdNode* subject, const SerdNode* predicate, - const SerdNode* object, - const SerdNode* datatype, - const SerdNode* lang); + const SerdNode* object); /** Mark the end of an anonymous node's description. @@ -697,7 +697,11 @@ read_0_9(SerdReader* reader, Ref str, bool at_least_one) } static bool -read_number(SerdReader* reader, Ref* dest, Ref* datatype, bool* ate_dot) +read_number(SerdReader* reader, + Ref* dest, + Ref* datatype, + SerdNodeFlags* flags, + bool* ate_dot) { #define XSD_DECIMAL NS_XSD "decimal" #define XSD_DOUBLE NS_XSD "double" @@ -752,6 +756,7 @@ read_number(SerdReader* reader, Ref* dest, Ref* datatype, bool* ate_dot) *datatype = push_node(reader, SERD_URI, XSD_INTEGER, sizeof(XSD_INTEGER) - 1); } + *flags |= SERD_HAS_DATATYPE; *dest = ref; return true; except: @@ -785,11 +790,13 @@ read_literal(SerdReader* reader, Ref* dest, switch (peek_byte(reader)) { case '@': eat_byte_safe(reader, '@'); + *flags |= SERD_HAS_LANGUAGE; TRY_THROW(*lang = read_LANGTAG(reader)); break; case '^': eat_byte_safe(reader, '^'); eat_byte_check(reader, '^'); + *flags |= SERD_HAS_DATATYPE; TRY_THROW(read_iri(reader, datatype, ate_dot)); break; } @@ -999,7 +1006,7 @@ read_object(SerdReader* reader, ReadContext* ctx, bool emit, bool* ate_dot) break; case '+': case '-': case '.': case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': - TRY_THROW(ret = read_number(reader, &o, &datatype, ate_dot)); + TRY_THROW(ret = read_number(reader, &o, &datatype, &flags, ate_dot)); break; case '\"': case '\'': @@ -1016,6 +1023,7 @@ read_object(SerdReader* reader, ReadContext* ctx, bool emit, bool* ate_dot) !memcmp(serd_node_get_string(node), "true", 4)) || (node->n_bytes == 5 && !memcmp(serd_node_get_string(node), "false", 5))) { + flags = flags | SERD_HAS_DATATYPE; node->type = SERD_LITERAL; datatype = push_node( reader, SERD_URI, XSD_BOOLEAN, XSD_BOOLEAN_LEN); @@ -31,13 +31,36 @@ # endif #endif +static const size_t serd_node_align = sizeof(SerdNode); + +static size_t +serd_node_pad_size(const size_t n_bytes) +{ + const size_t pad = serd_node_align - (n_bytes + 2) % serd_node_align; + return n_bytes + 2 + pad; +} + +size_t +serd_node_total_size(const SerdNode* node) +{ + const size_t len = sizeof(SerdNode) + serd_node_pad_size(node->n_bytes); + if (node->flags & SERD_HAS_LANGUAGE) { + return len + serd_node_total_size(serd_node_get_language(node)); + } else if (node->flags & SERD_HAS_DATATYPE) { + return len + serd_node_total_size(serd_node_get_datatype(node)); + } + return len; +} + SerdNode* serd_node_malloc(size_t n_bytes, SerdNodeFlags flags, SerdType type) { - SerdNode* node = (SerdNode*)calloc(1, sizeof(SerdNode) + n_bytes + 1); + const size_t size = sizeof(SerdNode) + serd_node_pad_size(n_bytes); + SerdNode* node = (SerdNode*)calloc(1, size); node->n_bytes = 0; node->flags = flags; node->type = type; + assert((intptr_t)node % serd_node_align == 0); return node; } @@ -51,11 +74,12 @@ void serd_node_set(SerdNode** dst, const SerdNode* src) { if (src) { - if (!(*dst) || (*dst)->n_bytes < src->n_bytes) { - (*dst) = (SerdNode*)realloc(*dst, sizeof(SerdNode) + src->n_bytes + 1); + const size_t size = serd_node_total_size(src); + if (!(*dst) || serd_node_total_size(*dst) < size) { + (*dst) = (SerdNode*)realloc(*dst, size); } - memcpy(*dst, src, sizeof(SerdNode) + src->n_bytes + 1); + memcpy(*dst, src, size); } else if (*dst) { (*dst)->type = SERD_NOTHING; } @@ -95,14 +119,60 @@ serd_node_new_substring(SerdType type, const char* str, const size_t len) SERD_API SerdNode* +serd_node_new_literal(const char* str, const char* datatype, const char* lang) +{ + if (!str || (lang && datatype && strcmp(datatype, NS_RDF "#langString"))) { + return NULL; + } + + uint32_t flags = 0; + const size_t n_bytes = serd_strlen(str, &flags); + const size_t len = serd_node_pad_size(n_bytes); + + SerdNode* node = NULL; + if (lang) { + flags |= SERD_HAS_LANGUAGE; + const size_t lang_len = strlen(lang); + const size_t total_len = len + sizeof(SerdNode) + lang_len; + node = serd_node_malloc(total_len, flags, SERD_LITERAL); + memcpy(serd_node_buffer(node), str, n_bytes); + node->n_bytes = n_bytes; + + SerdNode* lang_node = node + 1 + (len / serd_node_align); + lang_node->type = SERD_LITERAL; + lang_node->n_bytes = lang_len; + memcpy(serd_node_buffer(lang_node), lang, lang_len); + } else if (datatype) { + flags |= SERD_HAS_DATATYPE; + const size_t datatype_len = strlen(datatype); + const size_t total_len = len + sizeof(SerdNode) + datatype_len; + node = serd_node_malloc(total_len, flags, SERD_LITERAL); + memcpy(serd_node_buffer(node), str, n_bytes); + node->n_bytes = n_bytes; + + SerdNode* datatype_node = node + 1 + (len / serd_node_align); + datatype_node->type = SERD_URI; + datatype_node->n_bytes = datatype_len; + memcpy(serd_node_buffer(datatype_node), datatype, datatype_len); + } else { + node = serd_node_malloc(n_bytes, flags, SERD_LITERAL); + memcpy(serd_node_buffer(node), str, n_bytes); + node->n_bytes = n_bytes; + } + + return node; +} + +SERD_API +SerdNode* serd_node_copy(const SerdNode* node) { if (!node) { return NULL; } - const size_t size = sizeof(SerdNode) + node->n_bytes + 1; - SerdNode* copy = (SerdNode*)malloc(size); + const size_t size = serd_node_total_size(node); + SerdNode* copy = (SerdNode*)calloc(1, size + 3); memcpy(copy, node, size); return copy; } @@ -111,11 +181,17 @@ SERD_API bool serd_node_equals(const SerdNode* a, const SerdNode* b) { - return (a == b) || - (a && b && a->type == b->type && a->n_bytes == b->n_bytes && - !memcmp(serd_node_get_string(a), - serd_node_get_string(b), - a->n_bytes)); + if (a == b) { + return true; + } else if (!a || !b) { + return false; + } + + const size_t a_size = serd_node_total_size(a); + if (serd_node_total_size(b) == a_size) { + return !memcmp(a, b, a_size); + } + return false; } static size_t @@ -441,6 +517,40 @@ serd_node_get_length(const SerdNode* node) } SERD_API +const SerdNode* +serd_node_get_datatype(const SerdNode* node) +{ + if (!node || !(node->flags & SERD_HAS_DATATYPE)) { + return NULL; + } + + const size_t len = serd_node_pad_size(node->n_bytes); + assert((intptr_t)node % serd_node_align == 0); + assert(len % serd_node_align == 0); + + const SerdNode* const datatype = node + 1 + (len / serd_node_align); + assert(datatype->type == SERD_URI || datatype->type == SERD_CURIE); + return datatype; +} + +SERD_API +const SerdNode* +serd_node_get_language(const SerdNode* node) +{ + if (!node || !(node->flags & SERD_HAS_LANGUAGE)) { + return NULL; + } + + const size_t len = serd_node_pad_size(node->n_bytes); + assert((intptr_t)node % serd_node_align == 0); + assert(len % serd_node_align == 0); + + const SerdNode* const lang = node + 1 + (len / serd_node_align); + assert(lang->type == SERD_LITERAL); + return lang; +} + +SERD_API SerdNodeFlags serd_node_get_flags(const SerdNode* node) { diff --git a/src/reader.c b/src/reader.c index 8c43f80a..eb9ad516 100644 --- a/src/reader.c +++ b/src/reader.c @@ -123,7 +123,7 @@ emit_statement(SerdReader* reader, ReadContext ctx, Ref o, Ref d, Ref l) !reader->statement_sink( reader->handle, *ctx.flags, graph, deref(reader, ctx.subject), deref(reader, ctx.predicate), - deref(reader, o), deref(reader, d), deref(reader, l)); + deref(reader, o)); *ctx.flags &= SERD_ANON_CONT|SERD_LIST_CONT; // Preserve only cont flags return ret; } diff --git a/src/writer.c b/src/writer.c index 2190d51f..05f83b0f 100644 --- a/src/writer.c +++ b/src/writer.c @@ -103,8 +103,6 @@ typedef enum { static bool write_node(SerdWriter* writer, const SerdNode* node, - const SerdNode* datatype, - const SerdNode* lang, Field field, SerdStatementFlags flags); @@ -428,13 +426,13 @@ is_inline_start(const SerdWriter* writer, Field field, SerdStatementFlags flags) static bool write_literal(SerdWriter* writer, const SerdNode* node, - const SerdNode* datatype, - const SerdNode* lang, Field field, SerdStatementFlags flags) { - const char* node_str = serd_node_get_string(node); - const char* type_uri = serd_node_get_string(datatype); + const SerdNode* datatype = serd_node_get_datatype(node); + const SerdNode* lang = serd_node_get_language(node); + const char* node_str = serd_node_get_string(node); + const char* type_uri = serd_node_get_string(datatype); if (supports_abbrev(writer) && type_uri) { if (!strncmp(type_uri, NS_XSD, sizeof(NS_XSD) - 1) && ( !strcmp(type_uri + sizeof(NS_XSD) - 1, "boolean") || @@ -469,7 +467,7 @@ write_literal(SerdWriter* writer, sink(serd_node_get_string(lang), lang->n_bytes, writer); } else if (type_uri) { sink("^^", 2, writer); - return write_node(writer, datatype, NULL, NULL, FIELD_NONE, flags); + return write_node(writer, datatype, FIELD_NONE, flags); } return true; } @@ -614,15 +612,13 @@ write_blank(SerdWriter* const writer, static bool write_node(SerdWriter* writer, const SerdNode* node, - const SerdNode* datatype, - const SerdNode* lang, Field field, SerdStatementFlags flags) { bool ret = false; switch (node->type) { case SERD_LITERAL: - ret = write_literal(writer, node, datatype, lang, field, flags); + ret = write_literal(writer, node, field, flags); break; case SERD_URI: ret = write_uri_node(writer, node, field, flags); @@ -647,7 +643,7 @@ is_resource(const SerdNode* node) static void write_pred(SerdWriter* writer, SerdStatementFlags flags, const SerdNode* pred) { - write_node(writer, pred, NULL, NULL, FIELD_PREDICATE, flags); + write_node(writer, pred, FIELD_PREDICATE, flags); write_sep(writer, SEP_P_O); serd_node_set(&writer->context.predicate, pred); } @@ -656,9 +652,7 @@ static bool write_list_obj(SerdWriter* writer, SerdStatementFlags flags, const SerdNode* predicate, - const SerdNode* object, - const SerdNode* datatype, - const SerdNode* lang) + const SerdNode* object) { if (!strcmp(serd_node_get_string(object), NS_RDF "nil")) { --writer->indent; @@ -666,7 +660,7 @@ write_list_obj(SerdWriter* writer, return true; } else if (!strcmp(serd_node_get_string(predicate), NS_RDF "first")) { write_sep(writer, SEP_LIST_SEP); - write_node(writer, object, datatype, lang, FIELD_OBJECT, flags); + write_node(writer, object, FIELD_OBJECT, flags); } return false; } @@ -678,9 +672,7 @@ serd_writer_write_statement(SerdWriter* writer, const SerdNode* graph, const SerdNode* subject, const SerdNode* predicate, - const SerdNode* object, - const SerdNode* datatype, - const SerdNode* lang) + const SerdNode* object) { if (!subject || !predicate || !object || !is_resource(subject) || !is_resource(predicate)) { @@ -695,14 +687,14 @@ serd_writer_write_statement(SerdWriter* writer, switch (writer->syntax) { case SERD_NTRIPLES: case SERD_NQUADS: - TRY(write_node(writer, subject, NULL, NULL, FIELD_SUBJECT, flags)); + TRY(write_node(writer, subject, FIELD_SUBJECT, flags)); sink(" ", 1, writer); - TRY(write_node(writer, predicate, NULL, NULL, FIELD_PREDICATE, flags)); + TRY(write_node(writer, predicate, FIELD_PREDICATE, flags)); sink(" ", 1, writer); - TRY(write_node(writer, object, datatype, lang, FIELD_OBJECT, flags)); + TRY(write_node(writer, object, FIELD_OBJECT, flags)); if (writer->syntax == SERD_NQUADS && graph) { sink(" ", 1, writer); - TRY(write_node(writer, graph, datatype, lang, FIELD_GRAPH, flags)); + TRY(write_node(writer, graph, FIELD_GRAPH, flags)); } sink(" .\n", 3, writer); return SERD_SUCCESS; @@ -722,7 +714,7 @@ serd_writer_write_statement(SerdWriter* writer, reset_context(writer, true); if (graph) { - TRY(write_node(writer, graph, datatype, lang, FIELD_GRAPH, flags)); + TRY(write_node(writer, graph, FIELD_GRAPH, flags)); ++writer->indent; write_sep(writer, SEP_GRAPH_BEGIN); serd_node_set(&writer->context.graph, graph); @@ -730,7 +722,7 @@ serd_writer_write_statement(SerdWriter* writer, } if ((flags & SERD_LIST_CONT)) { - if (write_list_obj(writer, flags, predicate, object, datatype, lang)) { + if (write_list_obj(writer, flags, predicate, object)) { // Reached end of list if (--writer->list_depth == 0 && writer->list_subj) { reset_context(writer, false); @@ -747,7 +739,7 @@ serd_writer_write_statement(SerdWriter* writer, ++writer->indent; } write_sep(writer, SEP_END_O); - write_node(writer, object, datatype, lang, FIELD_OBJECT, flags); + write_node(writer, object, FIELD_OBJECT, flags); if (!(flags & SERD_ANON_O_BEGIN)) { --writer->indent; } @@ -756,7 +748,7 @@ serd_writer_write_statement(SerdWriter* writer, Sep sep = ctx(writer, FIELD_PREDICATE) ? SEP_END_P : SEP_S_P; write_sep(writer, sep); write_pred(writer, flags, predicate); - write_node(writer, object, datatype, lang, FIELD_OBJECT, flags); + write_node(writer, object, FIELD_OBJECT, flags); } } else { // No abbreviation @@ -771,7 +763,7 @@ serd_writer_write_statement(SerdWriter* writer, } if (!(flags & SERD_ANON_CONT)) { - write_node(writer, subject, NULL, NULL, FIELD_SUBJECT, flags); + write_node(writer, subject, FIELD_SUBJECT, flags); ++writer->indent; write_sep(writer, SEP_S_P); } else { @@ -785,7 +777,7 @@ serd_writer_write_statement(SerdWriter* writer, write_pred(writer, flags, predicate); } - write_node(writer, object, datatype, lang, FIELD_OBJECT, flags); + write_node(writer, object, FIELD_OBJECT, flags); } if (flags & (SERD_ANON_S_BEGIN|SERD_ANON_O_BEGIN)) { diff --git a/tests/serd_test.c b/tests/serd_test.c index b8f9051e..b96de49f 100644 --- a/tests/serd_test.c +++ b/tests/serd_test.c @@ -71,9 +71,7 @@ test_sink(void* handle, const SerdNode* graph, const SerdNode* subject, const SerdNode* predicate, - const SerdNode* object, - const SerdNode* object_datatype, - const SerdNode* object_lang) + const SerdNode* object) { ReaderTest* rt = (ReaderTest*)handle; ++rt->n_statements; @@ -335,6 +333,40 @@ main(void) } serd_node_free(a_b); + // Test serd_node_new_literal + + if (serd_node_new_literal(NULL, NULL, NULL)) { + FAIL("Successfully created node from NULL string\n"); + } + + SerdNode* hello2 = serd_node_new_literal("hello\"", NULL, NULL); + if (serd_node_get_length(hello2) != 6 || + serd_node_get_flags(hello2) != SERD_HAS_QUOTE || + strcmp(serd_node_get_string(hello2), "hello\"")) { + FAILF("Bad node %s\n", serd_node_get_string(hello2)); + } + serd_node_free(hello2); + + SerdNode* hello_l = serd_node_new_literal("hello_l\"", NULL, "en"); + if (serd_node_get_length(hello_l) != 8 || + strcmp(serd_node_get_string(hello_l), "hello_l\"") || + serd_node_get_flags(hello_l) != (SERD_HAS_QUOTE | SERD_HAS_LANGUAGE) || + strcmp(serd_node_get_string(serd_node_get_language(hello_l)), "en")) { + FAILF("Bad node %s\n", serd_node_get_string(hello_l)); + } + serd_node_free(hello_l); + + SerdNode* hello_dt = + serd_node_new_literal("hello_dt\"", "http://example.org/Thing", NULL); + if (serd_node_get_length(hello_dt) != 9 || + strcmp(serd_node_get_string(hello_dt), "hello_dt\"") || + serd_node_get_flags(hello_dt) != (SERD_HAS_QUOTE | SERD_HAS_DATATYPE) || + strcmp(serd_node_get_string(serd_node_get_datatype(hello_dt)), + "http://example.org/Thing")) { + FAILF("Bad node %s\n", serd_node_get_string(hello_dt)); + } + serd_node_free(hello_dt); + // Test serd_node_new_uri_from_string if (serd_node_new_uri_from_string(NULL, NULL, NULL)) { @@ -489,40 +521,39 @@ main(void) SerdNode* o = serd_node_new_string(SERD_LITERAL, (char*)buf); // Write 3 invalid statements (should write nothing) - const SerdNode* junk[][5] = { { s, p, NULL, NULL, NULL }, - { s, NULL, o, NULL, NULL }, - { NULL, p, o, NULL, NULL }, - { s, p, NULL, NULL, NULL }, - { s, NULL, o, NULL, NULL }, - { NULL, p, o, NULL, NULL }, - { s, o, o, NULL, NULL }, - { o, p, o, NULL, NULL }, - { s, p, NULL, NULL, NULL }, - { NULL, NULL, NULL, NULL, NULL } }; + const SerdNode* junk[][5] = { { s, p, NULL }, + { s, NULL, o }, + { NULL, p, o }, + { s, p, NULL }, + { s, NULL, o }, + { NULL, p, o }, + { s, o, o }, + { o, p, o }, + { s, p, NULL }, + { NULL, NULL, NULL } }; for (unsigned i = 0; i < sizeof(junk) / (sizeof(SerdNode*) * 5); ++i) { if (!serd_writer_write_statement( writer, 0, NULL, - junk[i][0], junk[i][1], junk[i][2], junk[i][3], junk[i][4])) { + junk[i][0], junk[i][1], junk[i][2])) { FAILF("Successfully wrote junk statement %d\n", i); } } - SerdNode* t = serd_node_new_string(SERD_URI, "urn:Type"); - SerdNode* l = serd_node_new_string(SERD_LITERAL, "en"); - const SerdNode* good[][5] = { { s, p, o, NULL, NULL }, - { s, p, o, NULL, NULL }, - { s, p, o, t, NULL }, - { s, p, o, NULL, l }, - { s, p, o, t, l }, - { s, p, o, t, NULL }, - { s, p, o, NULL, l }, - { s, p, o, NULL, NULL }, - { s, p, o, NULL, NULL }, - { s, p, o, NULL, NULL } }; + SerdNode* t = serd_node_new_literal((char*)buf, "urn:Type", NULL); + SerdNode* l = serd_node_new_literal((char*)buf, NULL, "en"); + const SerdNode* good[][5] = { { s, p, o }, + { s, p, o }, + { s, p, t }, + { s, p, l }, + { s, p, l }, + { s, p, t }, + { s, p, l }, + { s, p, o }, + { s, p, o }, + { s, p, o } }; for (unsigned i = 0; i < sizeof(good) / (sizeof(SerdNode*) * 5); ++i) { if (serd_writer_write_statement( - writer, 0, NULL, - good[i][0], good[i][1], good[i][2], good[i][3], good[i][4])) { + writer, 0, NULL, good[i][0], good[i][1], good[i][2])) { FAILF("Failed to write good statement %d\n", i); } } @@ -532,10 +563,10 @@ main(void) SerdNode* bad_lit = serd_node_new_string(SERD_LITERAL, bad_str); SerdNode* bad_uri = serd_node_new_string(SERD_URI, bad_str); if (serd_writer_write_statement(writer, 0, NULL, - s, p, bad_lit, NULL, NULL)) { + s, p, bad_lit)) { FAIL("Failed to write junk UTF-8 literal\n"); } else if (serd_writer_write_statement(writer, 0, NULL, - s, p, bad_uri, NULL, NULL)) { + s, p, bad_uri)) { FAIL("Failed to write junk UTF-8 URI\n"); } serd_node_free(bad_uri); @@ -545,7 +576,7 @@ main(void) serd_node_free(o); o = serd_node_new_string(SERD_LITERAL, "hello"); if (serd_writer_write_statement(writer, 0, NULL, - s, p, o, NULL, NULL)) { + s, p, o)) { FAIL("Failed to write valid statement\n"); } |