aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorDavid Robillard <d@drobilla.net>2018-04-01 20:33:33 +0200
committerDavid Robillard <d@drobilla.net>2020-06-21 18:12:03 +0200
commit7d25d7319564fe0b1c89690a99d033952df8e55f (patch)
tree74b1f1aa77dfdfe8b7751531f588dd080d3a1e0d /src
parent27ee06024c2d54a2340e83e5c90e919f8764ae0f (diff)
downloadserd-7d25d7319564fe0b1c89690a99d033952df8e55f.tar.gz
serd-7d25d7319564fe0b1c89690a99d033952df8e55f.tar.bz2
serd-7d25d7319564fe0b1c89690a99d033952df8e55f.zip
Merge datatype and/or language into node
This moves closer to the sord API, and is more convenient in most cases.
Diffstat (limited to 'src')
-rw-r--r--src/n3.c23
-rw-r--r--src/node.c131
-rw-r--r--src/reader.c4
-rw-r--r--src/serd_internal.h2
-rw-r--r--src/writer.c48
5 files changed, 158 insertions, 50 deletions
diff --git a/src/n3.c b/src/n3.c
index 19ef8557..dd96307d 100644
--- a/src/n3.c
+++ b/src/n3.c
@@ -727,7 +727,11 @@ read_0_9(SerdReader* reader, Ref str, bool at_least_one)
}
static bool
-read_number(SerdReader* reader, Ref* dest, Ref* datatype, bool* ate_dot)
+read_number(SerdReader* reader,
+ Ref* dest,
+ Ref* datatype,
+ SerdNodeFlags* flags,
+ bool* ate_dot)
{
#define XSD_DECIMAL NS_XSD "decimal"
#define XSD_DOUBLE NS_XSD "double"
@@ -783,6 +787,7 @@ read_number(SerdReader* reader, Ref* dest, Ref* datatype, bool* ate_dot)
*datatype = push_node(reader, SERD_URI,
XSD_INTEGER, sizeof(XSD_INTEGER) - 1);
}
+ *flags |= SERD_HAS_DATATYPE;
*dest = ref;
return true;
except:
@@ -816,11 +821,13 @@ read_literal(SerdReader* reader, Ref* dest,
switch (peek_byte(reader)) {
case '@':
eat_byte_safe(reader, '@');
+ *flags |= SERD_HAS_LANGUAGE;
TRY_THROW(*lang = read_LANGTAG(reader));
break;
case '^':
eat_byte_safe(reader, '^');
eat_byte_check(reader, '^');
+ *flags |= SERD_HAS_DATATYPE;
TRY_THROW(read_iri(reader, datatype, ate_dot));
break;
}
@@ -949,7 +956,7 @@ read_anon(SerdReader* reader, ReadContext ctx, bool subject, Ref* dest)
*dest = blank_id(reader);
}
if (ctx.subject) {
- TRY_RET(emit_statement(reader, ctx, *dest, 0, 0));
+ TRY_RET(emit_statement(reader, ctx, *dest));
}
ctx.subject = *dest;
@@ -1019,7 +1026,7 @@ read_object(SerdReader* reader, ReadContext* ctx, bool emit, bool* ate_dot)
break;
case '+': case '-': case '.': case '0': case '1': case '2': case '3':
case '4': case '5': case '6': case '7': case '8': case '9':
- TRY_THROW(ret = read_number(reader, &o, &datatype, ate_dot));
+ TRY_THROW(ret = read_number(reader, &o, &datatype, &flags, ate_dot));
break;
case '\"':
case '\'':
@@ -1036,6 +1043,7 @@ read_object(SerdReader* reader, ReadContext* ctx, bool emit, bool* ate_dot)
!memcmp(serd_node_get_string(node), "true", 4)) ||
(node->n_bytes == 5 &&
!memcmp(serd_node_get_string(node), "false", 5))) {
+ flags = flags | SERD_HAS_DATATYPE;
node->type = SERD_LITERAL;
datatype = push_node(
reader, SERD_URI, XSD_BOOLEAN, XSD_BOOLEAN_LEN);
@@ -1054,7 +1062,7 @@ read_object(SerdReader* reader, ReadContext* ctx, bool emit, bool* ate_dot)
}
if (ret && emit && simple) {
- ret = emit_statement(reader, *ctx, o, datatype, lang);
+ ret = emit_statement(reader, *ctx, o);
} else if (ret && !emit) {
ctx->object = o;
ctx->datatype = datatype;
@@ -1140,7 +1148,7 @@ read_collection(SerdReader* reader, ReadContext ctx, Ref* dest)
if (ctx.subject) {
// subject predicate _:head
*ctx.flags |= (end ? 0 : SERD_LIST_O_BEGIN);
- TRY_RET(emit_statement(reader, ctx, *dest, 0, 0));
+ TRY_RET(emit_statement(reader, ctx, *dest));
*ctx.flags |= SERD_LIST_CONT;
} else {
*ctx.flags |= (end ? 0 : SERD_LIST_S_BEGIN);
@@ -1179,8 +1187,7 @@ read_collection(SerdReader* reader, ReadContext ctx, Ref* dest)
// _:node rdf:rest _:rest
*ctx.flags |= SERD_LIST_CONT;
ctx.predicate = reader->rdf_rest;
- TRY_RET(emit_statement(reader, ctx,
- (end ? reader->rdf_nil : rest), 0, 0));
+ TRY_RET(emit_statement(reader, ctx, (end ? reader->rdf_nil : rest)));
ctx.subject = rest; // _:node = _:rest
rest = node; // _:rest = (old)_:node
@@ -1512,7 +1519,7 @@ read_nquadsDoc(SerdReader* reader)
eat_byte_check(reader, '.');
}
- if (!emit_statement(reader, ctx, ctx.object, ctx.datatype, ctx.lang)) {
+ if (!emit_statement(reader, ctx, ctx.object)) {
break;
}
diff --git a/src/node.c b/src/node.c
index 256b710d..7def9188 100644
--- a/src/node.c
+++ b/src/node.c
@@ -18,6 +18,7 @@
#include "serd/serd.h"
+#include <assert.h>
#include <float.h>
#include <math.h>
#include <stdbool.h>
@@ -35,13 +36,40 @@
# endif
#endif
+static const size_t serd_node_align = sizeof(SerdNode);
+
+static size_t
+serd_node_pad_size(const size_t n_bytes)
+{
+ const size_t pad = serd_node_align - (n_bytes + 2) % serd_node_align;
+ return n_bytes + 2 + pad;
+}
+
+static const SerdNode*
+serd_node_maybe_get_meta_c(const SerdNode* node)
+{
+ return (node->flags & (SERD_HAS_LANGUAGE | SERD_HAS_DATATYPE))
+ ? (node + 1 + (serd_node_pad_size(node->n_bytes) / serd_node_align))
+ : NULL;
+}
+
+static size_t
+serd_node_total_size(const SerdNode* node)
+{
+ return node ? (sizeof(SerdNode) + serd_node_pad_size(node->n_bytes) +
+ serd_node_total_size(serd_node_maybe_get_meta_c(node)))
+ : 0;
+}
+
SerdNode*
serd_node_malloc(size_t n_bytes, SerdNodeFlags flags, SerdType type)
{
- SerdNode* node = (SerdNode*)calloc(1, sizeof(SerdNode) + n_bytes + 1);
+ const size_t size = sizeof(SerdNode) + serd_node_pad_size(n_bytes);
+ SerdNode* node = (SerdNode*)calloc(1, size);
node->n_bytes = 0;
node->flags = flags;
node->type = type;
+ assert((intptr_t)node % serd_node_align == 0);
return node;
}
@@ -55,11 +83,12 @@ void
serd_node_set(SerdNode** dst, const SerdNode* src)
{
if (src) {
- if (!(*dst) || (*dst)->n_bytes < src->n_bytes) {
- (*dst) = (SerdNode*)realloc(*dst, sizeof(SerdNode) + src->n_bytes + 1);
+ const size_t size = serd_node_total_size(src);
+ if (!(*dst) || serd_node_total_size(*dst) < size) {
+ (*dst) = (SerdNode*)realloc(*dst, size);
}
- memcpy(*dst, src, sizeof(SerdNode) + src->n_bytes + 1);
+ memcpy(*dst, src, size);
} else if (*dst) {
memset(*dst, 0, sizeof(SerdNode));
}
@@ -96,14 +125,59 @@ serd_node_new_substring(SerdType type, const char* str, const size_t len)
}
SerdNode*
+serd_node_new_literal(const char* str, const char* datatype, const char* lang)
+{
+ if (!str || (lang && datatype && strcmp(datatype, NS_RDF "#langString"))) {
+ return NULL;
+ }
+
+ uint32_t flags = 0;
+ const size_t n_bytes = serd_strlen(str, &flags);
+ const size_t len = serd_node_pad_size(n_bytes);
+
+ SerdNode* node = NULL;
+ if (lang) {
+ flags |= SERD_HAS_LANGUAGE;
+ const size_t lang_len = strlen(lang);
+ const size_t total_len = len + sizeof(SerdNode) + lang_len;
+ node = serd_node_malloc(total_len, flags, SERD_LITERAL);
+ memcpy(serd_node_buffer(node), str, n_bytes);
+ node->n_bytes = n_bytes;
+
+ SerdNode* lang_node = node + 1 + (len / serd_node_align);
+ lang_node->type = SERD_LITERAL;
+ lang_node->n_bytes = lang_len;
+ memcpy(serd_node_buffer(lang_node), lang, lang_len);
+ } else if (datatype) {
+ flags |= SERD_HAS_DATATYPE;
+ const size_t datatype_len = strlen(datatype);
+ const size_t total_len = len + sizeof(SerdNode) + datatype_len;
+ node = serd_node_malloc(total_len, flags, SERD_LITERAL);
+ memcpy(serd_node_buffer(node), str, n_bytes);
+ node->n_bytes = n_bytes;
+
+ SerdNode* datatype_node = node + 1 + (len / serd_node_align);
+ datatype_node->type = SERD_URI;
+ datatype_node->n_bytes = datatype_len;
+ memcpy(serd_node_buffer(datatype_node), datatype, datatype_len);
+ } else {
+ node = serd_node_malloc(n_bytes, flags, SERD_LITERAL);
+ memcpy(serd_node_buffer(node), str, n_bytes);
+ node->n_bytes = n_bytes;
+ }
+
+ return node;
+}
+
+SerdNode*
serd_node_copy(const SerdNode* node)
{
if (!node) {
return NULL;
}
- const size_t size = sizeof(SerdNode) + node->n_bytes + 1;
- SerdNode* copy = (SerdNode*)malloc(size);
+ const size_t size = serd_node_total_size(node);
+ SerdNode* copy = (SerdNode*)calloc(1, size + 3);
memcpy(copy, node, size);
return copy;
}
@@ -111,11 +185,14 @@ serd_node_copy(const SerdNode* node)
bool
serd_node_equals(const SerdNode* a, const SerdNode* b)
{
- return (a == b) ||
- (a && b && a->type == b->type && a->n_bytes == b->n_bytes &&
- !memcmp(serd_node_get_string(a),
- serd_node_get_string(b),
- a->n_bytes));
+ if (a == b) {
+ return true;
+ } else if (!a || !b) {
+ return false;
+ }
+
+ const size_t a_size = serd_node_total_size(a);
+ return serd_node_total_size(b) == a_size && !memcmp(a, b, a_size);
}
static size_t
@@ -433,6 +510,38 @@ serd_node_get_length(const SerdNode* node)
return node ? node->n_bytes : 0;
}
+const SerdNode*
+serd_node_get_datatype(const SerdNode* node)
+{
+ if (!node || !(node->flags & SERD_HAS_DATATYPE)) {
+ return NULL;
+ }
+
+ const size_t len = serd_node_pad_size(node->n_bytes);
+ assert((intptr_t)node % serd_node_align == 0);
+ assert(len % serd_node_align == 0);
+
+ const SerdNode* const datatype = node + 1 + (len / serd_node_align);
+ assert(datatype->type == SERD_URI || datatype->type == SERD_CURIE);
+ return datatype;
+}
+
+const SerdNode*
+serd_node_get_language(const SerdNode* node)
+{
+ if (!node || !(node->flags & SERD_HAS_LANGUAGE)) {
+ return NULL;
+ }
+
+ const size_t len = serd_node_pad_size(node->n_bytes);
+ assert((intptr_t)node % serd_node_align == 0);
+ assert(len % serd_node_align == 0);
+
+ const SerdNode* const lang = node + 1 + (len / serd_node_align);
+ assert(lang->type == SERD_LITERAL);
+ return lang;
+}
+
SerdNodeFlags
serd_node_get_flags(const SerdNode* node)
{
diff --git a/src/reader.c b/src/reader.c
index 699a2c4c..161db4dd 100644
--- a/src/reader.c
+++ b/src/reader.c
@@ -114,7 +114,7 @@ pop_node(SerdReader* reader, Ref ref)
}
bool
-emit_statement(SerdReader* reader, ReadContext ctx, Ref o, Ref d, Ref l)
+emit_statement(SerdReader* reader, ReadContext ctx, Ref o)
{
SerdNode* graph = deref(reader, ctx.graph);
if (!graph && reader->default_graph) {
@@ -124,7 +124,7 @@ emit_statement(SerdReader* reader, ReadContext ctx, Ref o, Ref d, Ref l)
!reader->statement_sink(
reader->handle, *ctx.flags, graph,
deref(reader, ctx.subject), deref(reader, ctx.predicate),
- deref(reader, o), deref(reader, d), deref(reader, l));
+ deref(reader, o));
*ctx.flags &= SERD_ANON_CONT|SERD_LIST_CONT; // Preserve only cont flags
return ret;
}
diff --git a/src/serd_internal.h b/src/serd_internal.h
index 9eb4c215..d39d9c4d 100644
--- a/src/serd_internal.h
+++ b/src/serd_internal.h
@@ -630,7 +630,7 @@ SerdNode* deref(SerdReader* reader, Ref ref);
Ref pop_node(SerdReader* reader, Ref ref);
-bool emit_statement(SerdReader* reader, ReadContext ctx, Ref o, Ref d, Ref l);
+bool emit_statement(SerdReader* reader, ReadContext ctx, Ref o);
bool read_n3_statement(SerdReader* reader);
SerdStatus read_nquadsDoc(SerdReader* reader);
diff --git a/src/writer.c b/src/writer.c
index c570c7ba..445362b4 100644
--- a/src/writer.c
+++ b/src/writer.c
@@ -108,8 +108,6 @@ typedef enum {
static bool
write_node(SerdWriter* writer,
const SerdNode* node,
- const SerdNode* datatype,
- const SerdNode* lang,
Field field,
SerdStatementFlags flags);
@@ -432,12 +430,12 @@ is_inline_start(const SerdWriter* writer, Field field, SerdStatementFlags flags)
static bool
write_literal(SerdWriter* writer,
const SerdNode* node,
- const SerdNode* datatype,
- const SerdNode* lang,
SerdStatementFlags flags)
{
- const char* node_str = serd_node_get_string(node);
- const char* type_uri = serd_node_get_string(datatype);
+ const SerdNode* datatype = serd_node_get_datatype(node);
+ const SerdNode* lang = serd_node_get_language(node);
+ const char* node_str = serd_node_get_string(node);
+ const char* type_uri = serd_node_get_string(datatype);
if (supports_abbrev(writer) && type_uri) {
if (!strncmp(type_uri, NS_XSD, sizeof(NS_XSD) - 1) && (
!strcmp(type_uri + sizeof(NS_XSD) - 1, "boolean") ||
@@ -472,7 +470,7 @@ write_literal(SerdWriter* writer,
sink(serd_node_get_string(lang), lang->n_bytes, writer);
} else if (type_uri) {
sink("^^", 2, writer);
- return write_node(writer, datatype, NULL, NULL, FIELD_NONE, flags);
+ return write_node(writer, datatype, FIELD_NONE, flags);
}
return true;
}
@@ -632,15 +630,13 @@ write_blank(SerdWriter* const writer,
static bool
write_node(SerdWriter* writer,
const SerdNode* node,
- const SerdNode* datatype,
- const SerdNode* lang,
Field field,
SerdStatementFlags flags)
{
bool ret = false;
switch (node->type) {
case SERD_LITERAL:
- ret = write_literal(writer, node, datatype, lang, flags);
+ ret = write_literal(writer, node, flags);
break;
case SERD_URI:
ret = write_uri_node(writer, node, field, flags);
@@ -665,7 +661,7 @@ is_resource(const SerdNode* node)
static void
write_pred(SerdWriter* writer, SerdStatementFlags flags, const SerdNode* pred)
{
- write_node(writer, pred, NULL, NULL, FIELD_PREDICATE, flags);
+ write_node(writer, pred, FIELD_PREDICATE, flags);
write_sep(writer, SEP_P_O);
serd_node_set(&writer->context.predicate, pred);
}
@@ -674,9 +670,7 @@ static bool
write_list_obj(SerdWriter* writer,
SerdStatementFlags flags,
const SerdNode* predicate,
- const SerdNode* object,
- const SerdNode* datatype,
- const SerdNode* lang)
+ const SerdNode* object)
{
if (!strcmp(serd_node_get_string(object), NS_RDF "nil")) {
--writer->indent;
@@ -684,7 +678,7 @@ write_list_obj(SerdWriter* writer,
return true;
} else if (!strcmp(serd_node_get_string(predicate), NS_RDF "first")) {
write_sep(writer, SEP_LIST_SEP);
- write_node(writer, object, datatype, lang, FIELD_OBJECT, flags);
+ write_node(writer, object, FIELD_OBJECT, flags);
}
return false;
}
@@ -695,9 +689,7 @@ serd_writer_write_statement(SerdWriter* writer,
const SerdNode* graph,
const SerdNode* subject,
const SerdNode* predicate,
- const SerdNode* object,
- const SerdNode* datatype,
- const SerdNode* lang)
+ const SerdNode* object)
{
if (!subject || !predicate || !object ||
!is_resource(subject) || !is_resource(predicate)) {
@@ -712,14 +704,14 @@ serd_writer_write_statement(SerdWriter* writer,
switch (writer->syntax) {
case SERD_NTRIPLES:
case SERD_NQUADS:
- TRY(write_node(writer, subject, NULL, NULL, FIELD_SUBJECT, flags));
+ TRY(write_node(writer, subject, FIELD_SUBJECT, flags));
sink(" ", 1, writer);
- TRY(write_node(writer, predicate, NULL, NULL, FIELD_PREDICATE, flags));
+ TRY(write_node(writer, predicate, FIELD_PREDICATE, flags));
sink(" ", 1, writer);
- TRY(write_node(writer, object, datatype, lang, FIELD_OBJECT, flags));
+ TRY(write_node(writer, object, FIELD_OBJECT, flags));
if (writer->syntax == SERD_NQUADS && graph) {
sink(" ", 1, writer);
- TRY(write_node(writer, graph, datatype, lang, FIELD_GRAPH, flags));
+ TRY(write_node(writer, graph, FIELD_GRAPH, flags));
}
sink(" .\n", 3, writer);
return SERD_SUCCESS;
@@ -739,7 +731,7 @@ serd_writer_write_statement(SerdWriter* writer,
reset_context(writer, true);
if (graph) {
- TRY(write_node(writer, graph, datatype, lang, FIELD_GRAPH, flags));
+ TRY(write_node(writer, graph, FIELD_GRAPH, flags));
++writer->indent;
write_sep(writer, SEP_GRAPH_BEGIN);
serd_node_set(&writer->context.graph, graph);
@@ -747,7 +739,7 @@ serd_writer_write_statement(SerdWriter* writer,
}
if ((flags & SERD_LIST_CONT)) {
- if (write_list_obj(writer, flags, predicate, object, datatype, lang)) {
+ if (write_list_obj(writer, flags, predicate, object)) {
// Reached end of list
if (--writer->list_depth == 0 && writer->list_subj) {
reset_context(writer, false);
@@ -764,7 +756,7 @@ serd_writer_write_statement(SerdWriter* writer,
++writer->indent;
}
write_sep(writer, SEP_END_O);
- write_node(writer, object, datatype, lang, FIELD_OBJECT, flags);
+ write_node(writer, object, FIELD_OBJECT, flags);
if (!(flags & SERD_ANON_O_BEGIN)) {
--writer->indent;
}
@@ -773,7 +765,7 @@ serd_writer_write_statement(SerdWriter* writer,
Sep sep = ctx(writer, FIELD_PREDICATE) ? SEP_END_P : SEP_S_P;
write_sep(writer, sep);
write_pred(writer, flags, predicate);
- write_node(writer, object, datatype, lang, FIELD_OBJECT, flags);
+ write_node(writer, object, FIELD_OBJECT, flags);
}
} else {
// No abbreviation
@@ -788,7 +780,7 @@ serd_writer_write_statement(SerdWriter* writer,
}
if (!(flags & SERD_ANON_CONT)) {
- write_node(writer, subject, NULL, NULL, FIELD_SUBJECT, flags);
+ write_node(writer, subject, FIELD_SUBJECT, flags);
++writer->indent;
write_sep(writer, SEP_S_P);
} else {
@@ -802,7 +794,7 @@ serd_writer_write_statement(SerdWriter* writer,
write_pred(writer, flags, predicate);
}
- write_node(writer, object, datatype, lang, FIELD_OBJECT, flags);
+ write_node(writer, object, FIELD_OBJECT, flags);
}
if (flags & (SERD_ANON_S_BEGIN|SERD_ANON_O_BEGIN)) {