From bfece96cead96fdcdb11567f1cf031edc3f53a8b Mon Sep 17 00:00:00 2001 From: David Robillard Date: Fri, 14 Aug 2020 16:05:10 +0200 Subject: Merge datatype/language into node This moves closer to the sord API, and is more convenient in most cases. --- src/n3.c | 37 ++++++++++--------- src/node.c | 119 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++--- src/reader.c | 10 +---- src/reader.h | 2 +- src/writer.c | 78 ++++++++++++++++++--------------------- 5 files changed, 171 insertions(+), 75 deletions(-) (limited to 'src') diff --git a/src/n3.c b/src/n3.c index a4745733..9fb256af 100644 --- a/src/n3.c +++ b/src/n3.c @@ -854,10 +854,11 @@ read_0_9(SerdReader* const reader, const Ref str, const bool at_least_one) } static SerdStatus -read_number(SerdReader* const reader, - Ref* const dest, - Ref* const datatype, - bool* const ate_dot) +read_number(SerdReader* const reader, + Ref* const dest, + Ref* const datatype, + SerdNodeFlags* const flags, + bool* const ate_dot) { #define XSD_DECIMAL NS_XSD "decimal" #define XSD_DOUBLE NS_XSD "double" @@ -916,6 +917,10 @@ read_number(SerdReader* const reader, push_node(reader, SERD_URI, XSD_INTEGER, sizeof(XSD_INTEGER) - 1); } + if (*datatype) { + *flags |= SERD_HAS_DATATYPE; + } + return SERD_SUCCESS; } @@ -950,6 +955,7 @@ read_literal(SerdReader* const reader, switch (peek_byte(reader)) { case '@': eat_byte_safe(reader, '@'); + *flags |= SERD_HAS_LANGUAGE; if ((st = read_LANGTAG(reader, lang))) { *datatype = pop_node(reader, *datatype); *lang = pop_node(reader, *lang); @@ -960,6 +966,7 @@ read_literal(SerdReader* const reader, case '^': eat_byte_safe(reader, '^'); eat_byte_check(reader, '^'); + *flags |= SERD_HAS_DATATYPE; if ((st = read_iri(reader, datatype, ate_dot))) { *datatype = pop_node(reader, *datatype); *lang = pop_node(reader, *lang); @@ -968,7 +975,6 @@ read_literal(SerdReader* const reader, } break; } - return SERD_SUCCESS; } @@ -1079,7 +1085,7 @@ read_anon(SerdReader* const reader, SerdStatus st = SERD_SUCCESS; if (ctx.subject) { - TRY(st, emit_statement(reader, ctx, *dest, 0, 0)); + TRY(st, emit_statement(reader, ctx, *dest)); } ctx.subject = *dest; @@ -1088,21 +1094,17 @@ read_anon(SerdReader* const reader, if (!subject) { *ctx.flags |= SERD_ANON_CONT; } - bool ate_dot_in_list = false; read_predicateObjectList(reader, ctx, &ate_dot_in_list); if (ate_dot_in_list) { return r_err(reader, SERD_ERR_BAD_SYNTAX, "`.' inside blank\n"); } - read_ws_star(reader); if (reader->end_func) { reader->end_func(reader->handle, deref(reader, *dest)); } - *ctx.flags = old_flags; } - return (eat_byte_check(reader, ']') == ']') ? SERD_SUCCESS : SERD_ERR_BAD_SYNTAX; } @@ -1175,7 +1177,7 @@ read_object(SerdReader* const reader, case '7': case '8': case '9': - ret = read_number(reader, &o, &datatype, ate_dot); + ret = read_number(reader, &o, &datatype, &flags, ate_dot); break; case '\"': case '\'': @@ -1191,6 +1193,7 @@ read_object(SerdReader* const reader, node = deref(reader, o); if ((node->length == 4 && !memcmp(serd_node_string(node), "true", 4)) || (node->length == 5 && !memcmp(serd_node_string(node), "false", 5))) { + flags = flags | SERD_HAS_DATATYPE; node->type = SERD_LITERAL; datatype = push_node(reader, SERD_URI, XSD_BOOLEAN, XSD_BOOLEAN_LEN); ret = SERD_SUCCESS; @@ -1210,7 +1213,7 @@ read_object(SerdReader* const reader, } if (!ret && emit && simple) { - ret = emit_statement(reader, *ctx, o, datatype, lang); + ret = emit_statement(reader, *ctx, o); } else if (!ret && !emit) { ctx->object = o; ctx->datatype = datatype; @@ -1306,14 +1309,12 @@ read_collection(SerdReader* const reader, ReadContext ctx, Ref* const dest) { SerdStatus st = SERD_SUCCESS; eat_byte_safe(reader, '('); - bool end = peek_delim(reader, ')'); - - *dest = end ? reader->rdf_nil : blank_id(reader); + *dest = end ? reader->rdf_nil : blank_id(reader); if (ctx.subject) { // subject predicate _:head *ctx.flags |= (end ? 0 : SERD_LIST_O_BEGIN); - TRY(st, emit_statement(reader, ctx, *dest, 0, 0)); + TRY(st, emit_statement(reader, ctx, *dest)); *ctx.flags |= SERD_LIST_CONT; } else { *ctx.flags |= (end ? 0 : SERD_LIST_S_BEGIN); @@ -1352,7 +1353,7 @@ read_collection(SerdReader* const reader, ReadContext ctx, Ref* const dest) // _:node rdf:rest _:rest *ctx.flags |= SERD_LIST_CONT; ctx.predicate = reader->rdf_rest; - TRY(st, emit_statement(reader, ctx, (end ? reader->rdf_nil : rest), 0, 0)); + TRY(st, emit_statement(reader, ctx, (end ? reader->rdf_nil : rest))); ctx.subject = rest; // _:node = _:rest rest = node; // _:rest = (old)_:node @@ -1733,7 +1734,7 @@ read_nquadsDoc(SerdReader* const reader) } } - TRY(st, emit_statement(reader, ctx, ctx.object, ctx.datatype, ctx.lang)); + TRY(st, emit_statement(reader, ctx, ctx.object)); pop_node(reader, ctx.graph); pop_node(reader, ctx.lang); diff --git a/src/node.c b/src/node.c index 854772ac..edfa28b5 100644 --- a/src/node.c +++ b/src/node.c @@ -42,6 +42,8 @@ static const size_t serd_node_align = 2 * sizeof(uint64_t); +static const SerdNodeFlags meta_mask = (SERD_HAS_DATATYPE | SERD_HAS_LANGUAGE); + static size_t serd_uri_string_length(const SerdURIView* const uri) { @@ -77,11 +79,25 @@ serd_node_pad_size(const size_t n_bytes) return n_bytes + 2 + pad; } +static const SerdNode* +serd_node_meta_c(const SerdNode* const node) +{ + return node + 1 + (serd_node_pad_size(node->length) / sizeof(SerdNode)); +} + +static const SerdNode* +serd_node_maybe_get_meta_c(const SerdNode* const node) +{ + return (node->flags & meta_mask) ? serd_node_meta_c(node) : NULL; +} + static SERD_PURE_FUNC size_t serd_node_total_size(const SerdNode* const node) { - return node ? (sizeof(SerdNode) + serd_node_pad_size(node->length)) : 0; + return node ? (sizeof(SerdNode) + serd_node_pad_size(node->length) + + serd_node_total_size(serd_node_maybe_get_meta_c(node))) + : 0; } SerdNode* @@ -96,6 +112,7 @@ serd_node_malloc(const size_t length, node->flags = flags; node->type = type; + assert((intptr_t)node % serd_node_align == 0); return node; } @@ -115,7 +132,7 @@ serd_node_set(SerdNode** const dst, const SerdNode* const src) } assert(*dst); - memcpy(*dst, src, sizeof(SerdNode) + src->length + 1); + memcpy(*dst, src, size); } SerdNode* @@ -124,8 +141,10 @@ serd_new_string(SerdNodeType type, const char* str) SerdNodeFlags flags = 0; const size_t length = serd_strlen(str, &flags); SerdNode* node = serd_node_malloc(length, flags, type); + memcpy(serd_node_buffer(node), str, length); node->length = length; + return node; } @@ -142,6 +161,49 @@ serd_new_substring(const SerdNodeType type, return node; } +SerdNode* +serd_new_literal(const char* const str, + const char* const datatype, + const char* const lang) +{ + SerdNodeFlags flags = 0; + const size_t length = serd_strlen(str, &flags); + const size_t len = serd_node_pad_size(length); + + SerdNode* node = NULL; + if (lang) { + flags |= SERD_HAS_LANGUAGE; + const size_t lang_len = strlen(lang); + const size_t total_len = len + sizeof(SerdNode) + lang_len; + node = serd_node_malloc(total_len, flags, SERD_LITERAL); + memcpy(serd_node_buffer(node), str, length); + node->length = length; + + SerdNode* lang_node = node + 1 + (len / sizeof(SerdNode)); + lang_node->type = SERD_LITERAL; + lang_node->length = lang_len; + memcpy(serd_node_buffer(lang_node), lang, lang_len); + } else if (datatype) { + flags |= SERD_HAS_DATATYPE; + const size_t datatype_len = strlen(datatype); + const size_t total_len = len + sizeof(SerdNode) + datatype_len; + node = serd_node_malloc(total_len, flags, SERD_LITERAL); + memcpy(serd_node_buffer(node), str, length); + node->length = length; + + SerdNode* datatype_node = node + 1 + (len / sizeof(SerdNode)); + datatype_node->type = SERD_URI; + datatype_node->length = datatype_len; + memcpy(serd_node_buffer(datatype_node), datatype, datatype_len); + } else { + node = serd_node_malloc(length, flags, SERD_LITERAL); + memcpy(serd_node_buffer(node), str, length); + node->length = length; + } + + return node; +} + SerdNode* serd_node_copy(const SerdNode* node) { @@ -158,9 +220,30 @@ serd_node_copy(const SerdNode* node) bool serd_node_equals(const SerdNode* const a, const SerdNode* const b) { - return (a == b) || - (a && b && a->type == b->type && a->length == b->length && - !memcmp(serd_node_string(a), serd_node_string(b), a->length)); + if (a == b) { + return true; + } + + if (!a || !b || a->length != b->length || a->flags != b->flags || + a->type != b->type) { + return false; + } + + const size_t length = a->length; + if (!!memcmp(serd_node_string(a), serd_node_string(b), length)) { + return false; + } + + const SerdNodeFlags flags = a->flags; + if (flags & meta_mask) { + const SerdNode* const am = serd_node_meta_c(a); + const SerdNode* const bm = serd_node_meta_c(b); + + return am->length == bm->length && am->type == bm->type && + !memcmp(serd_node_string(am), serd_node_string(bm), am->length); + } + + return true; } SerdNode* @@ -444,6 +527,32 @@ serd_node_uri_view(const SerdNode* const node) : SERD_URI_NULL; } +const SerdNode* +serd_node_datatype(const SerdNode* const node) +{ + if (!node || !(node->flags & SERD_HAS_DATATYPE)) { + return NULL; + } + + const size_t len = serd_node_pad_size(node->length); + const SerdNode* const datatype = node + 1 + (len / sizeof(SerdNode)); + assert(datatype->type == SERD_URI || datatype->type == SERD_CURIE); + return datatype; +} + +const SerdNode* +serd_node_language(const SerdNode* const node) +{ + if (!node || !(node->flags & SERD_HAS_LANGUAGE)) { + return NULL; + } + + const size_t len = serd_node_pad_size(node->length); + const SerdNode* const lang = node + 1 + (len / sizeof(SerdNode)); + assert(lang->type == SERD_LITERAL); + return lang; +} + SerdNodeFlags serd_node_flags(const SerdNode* const node) { diff --git a/src/reader.c b/src/reader.c index ff53ab46..d0cc8a97 100644 --- a/src/reader.c +++ b/src/reader.c @@ -146,11 +146,7 @@ pop_node(SerdReader* const reader, const Ref ref) } SerdStatus -emit_statement(SerdReader* const reader, - const ReadContext ctx, - const Ref o, - const Ref d, - const Ref l) +emit_statement(SerdReader* const reader, const ReadContext ctx, const Ref o) { SerdNode* graph = deref(reader, ctx.graph); if (!graph && reader->default_graph) { @@ -164,9 +160,7 @@ emit_statement(SerdReader* const reader, graph, deref(reader, ctx.subject), deref(reader, ctx.predicate), - deref(reader, o), - deref(reader, d), - deref(reader, l)); + deref(reader, o)); *ctx.flags &= SERD_ANON_CONT | SERD_LIST_CONT; // Preserve only cont flags return st; diff --git a/src/reader.h b/src/reader.h index bb8f0b30..60a52e9d 100644 --- a/src/reader.h +++ b/src/reader.h @@ -118,7 +118,7 @@ Ref pop_node(SerdReader* reader, Ref ref); SerdStatus -emit_statement(SerdReader* reader, ReadContext ctx, Ref o, Ref d, Ref l); +emit_statement(SerdReader* reader, ReadContext ctx, Ref o); SerdStatus read_n3_statement(SerdReader* reader); diff --git a/src/writer.c b/src/writer.c index f0bd11eb..b4917052 100644 --- a/src/writer.c +++ b/src/writer.c @@ -116,8 +116,6 @@ typedef enum { WRITE_STRING, WRITE_LONG_STRING } TextContext; static bool write_node(SerdWriter* writer, const SerdNode* node, - const SerdNode* datatype, - const SerdNode* lang, Field field, SerdStatementFlags flags); @@ -511,14 +509,14 @@ is_inline_start(const SerdWriter* writer, Field field, SerdStatementFlags flags) } static bool -write_literal(SerdWriter* writer, - const SerdNode* node, - const SerdNode* datatype, - const SerdNode* lang, - SerdStatementFlags flags) +write_literal(SerdWriter* const writer, + const SerdNode* const node, + const SerdStatementFlags flags) { - const char* node_str = serd_node_string(node); - const char* type_uri = datatype ? serd_node_string(datatype) : NULL; + const SerdNode* datatype = serd_node_datatype(node); + const SerdNode* lang = serd_node_language(node); + const char* node_str = serd_node_string(node); + const char* type_uri = datatype ? serd_node_string(datatype) : NULL; if (supports_abbrev(writer) && type_uri) { if (!strncmp(type_uri, NS_XSD, sizeof(NS_XSD) - 1) && (!strcmp(type_uri + sizeof(NS_XSD) - 1, "boolean") || @@ -554,7 +552,7 @@ write_literal(SerdWriter* writer, sink(serd_node_string(lang), lang->length, writer); } else if (type_uri) { sink("^^", 2, writer); - return write_node(writer, datatype, NULL, NULL, FIELD_NONE, flags); + return write_node(writer, datatype, FIELD_NONE, flags); } return true; } @@ -713,17 +711,15 @@ write_blank(SerdWriter* const writer, } static bool -write_node(SerdWriter* writer, - const SerdNode* node, - const SerdNode* datatype, - const SerdNode* lang, - Field field, - SerdStatementFlags flags) +write_node(SerdWriter* const writer, + const SerdNode* const node, + const Field field, + const SerdStatementFlags flags) { bool ret = false; switch (node->type) { case SERD_LITERAL: - ret = write_literal(writer, node, datatype, lang, flags); + ret = write_literal(writer, node, flags); break; case SERD_URI: ret = write_uri_node(writer, node, field); @@ -748,18 +744,16 @@ is_resource(const SerdNode* node) static void write_pred(SerdWriter* writer, SerdStatementFlags flags, const SerdNode* pred) { - write_node(writer, pred, NULL, NULL, FIELD_PREDICATE, flags); + write_node(writer, pred, FIELD_PREDICATE, flags); write_sep(writer, SEP_P_O); serd_node_set(&writer->context.predicate, pred); } static bool -write_list_obj(SerdWriter* writer, - SerdStatementFlags flags, - const SerdNode* predicate, - const SerdNode* object, - const SerdNode* datatype, - const SerdNode* lang) +write_list_obj(SerdWriter* const writer, + const SerdStatementFlags flags, + const SerdNode* const predicate, + const SerdNode* const object) { if (!strcmp(serd_node_string(object), NS_RDF "nil")) { --writer->indent; @@ -769,21 +763,19 @@ write_list_obj(SerdWriter* writer, if (!strcmp(serd_node_string(predicate), NS_RDF "first")) { write_sep(writer, SEP_LIST_SEP); - write_node(writer, object, datatype, lang, FIELD_OBJECT, flags); + write_node(writer, object, FIELD_OBJECT, flags); } return false; } SerdStatus -serd_writer_write_statement(SerdWriter* writer, - SerdStatementFlags flags, - const SerdNode* graph, - const SerdNode* subject, - const SerdNode* predicate, - const SerdNode* object, - const SerdNode* datatype, - const SerdNode* lang) +serd_writer_write_statement(SerdWriter* const writer, + const SerdStatementFlags flags, + const SerdNode* const graph, + const SerdNode* const subject, + const SerdNode* const predicate, + const SerdNode* const object) { if (!is_resource(subject) || !is_resource(predicate) || !object) { return SERD_ERR_BAD_ARG; @@ -797,14 +789,14 @@ serd_writer_write_statement(SerdWriter* writer, } while (0) if (writer->syntax == SERD_NTRIPLES || writer->syntax == SERD_NQUADS) { - TRY(write_node(writer, subject, NULL, NULL, FIELD_SUBJECT, flags)); + TRY(write_node(writer, subject, FIELD_SUBJECT, flags)); sink(" ", 1, writer); - TRY(write_node(writer, predicate, NULL, NULL, FIELD_PREDICATE, flags)); + TRY(write_node(writer, predicate, FIELD_PREDICATE, flags)); sink(" ", 1, writer); - TRY(write_node(writer, object, datatype, lang, FIELD_OBJECT, flags)); + TRY(write_node(writer, object, FIELD_OBJECT, flags)); if (writer->syntax == SERD_NQUADS && graph) { sink(" ", 1, writer); - TRY(write_node(writer, graph, datatype, lang, FIELD_GRAPH, flags)); + TRY(write_node(writer, graph, FIELD_GRAPH, flags)); } sink(" .\n", 3, writer); return SERD_SUCCESS; @@ -822,7 +814,7 @@ serd_writer_write_statement(SerdWriter* writer, reset_context(writer, true); if (graph) { - TRY(write_node(writer, graph, datatype, lang, FIELD_GRAPH, flags)); + TRY(write_node(writer, graph, FIELD_GRAPH, flags)); ++writer->indent; write_sep(writer, SEP_GRAPH_BEGIN); serd_node_set(&writer->context.graph, graph); @@ -830,7 +822,7 @@ serd_writer_write_statement(SerdWriter* writer, } if ((flags & SERD_LIST_CONT)) { - if (write_list_obj(writer, flags, predicate, object, datatype, lang)) { + if (write_list_obj(writer, flags, predicate, object)) { // Reached end of list if (--writer->list_depth == 0 && writer->list_subj) { reset_context(writer, false); @@ -847,7 +839,7 @@ serd_writer_write_statement(SerdWriter* writer, ++writer->indent; } write_sep(writer, SEP_END_O); - write_node(writer, object, datatype, lang, FIELD_OBJECT, flags); + write_node(writer, object, FIELD_OBJECT, flags); if (!(flags & SERD_ANON_O_BEGIN)) { --writer->indent; } @@ -856,7 +848,7 @@ serd_writer_write_statement(SerdWriter* writer, Sep sep = ctx(writer, FIELD_PREDICATE) ? SEP_END_P : SEP_S_P; write_sep(writer, sep); write_pred(writer, flags, predicate); - write_node(writer, object, datatype, lang, FIELD_OBJECT, flags); + write_node(writer, object, FIELD_OBJECT, flags); } } else { // No abbreviation @@ -871,7 +863,7 @@ serd_writer_write_statement(SerdWriter* writer, } if (!(flags & SERD_ANON_CONT)) { - write_node(writer, subject, NULL, NULL, FIELD_SUBJECT, flags); + write_node(writer, subject, FIELD_SUBJECT, flags); ++writer->indent; write_sep(writer, SEP_S_P); } else { @@ -885,7 +877,7 @@ serd_writer_write_statement(SerdWriter* writer, write_pred(writer, flags, predicate); } - write_node(writer, object, datatype, lang, FIELD_OBJECT, flags); + write_node(writer, object, FIELD_OBJECT, flags); } if (flags & (SERD_ANON_S_BEGIN | SERD_ANON_O_BEGIN)) { -- cgit v1.2.1