diff options
-rw-r--r-- | serd/serd.h | 107 | ||||
-rw-r--r-- | src/env.c | 28 | ||||
-rw-r--r-- | src/reader.c | 68 | ||||
-rw-r--r-- | src/serd_internal.h | 29 | ||||
-rw-r--r-- | src/serdi.c | 209 | ||||
-rw-r--r-- | src/string.c | 35 | ||||
-rw-r--r-- | src/uri.c | 56 | ||||
-rw-r--r-- | src/writer.c | 129 |
8 files changed, 367 insertions, 294 deletions
diff --git a/serd/serd.h b/serd/serd.h index 6b27ff18..17f00a80 100644 --- a/serd/serd.h +++ b/serd/serd.h @@ -15,7 +15,7 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ -/* @file +/** @file * Public Serd API. */ @@ -112,7 +112,7 @@ typedef enum { * @{ */ -/** A chunk of memory (unterminated string). */ +/** An unterminated string fragment. */ typedef struct { const uint8_t* buf; ///< Start of chunk size_t len; ///< Length of chunk in bytes @@ -163,38 +163,21 @@ size_t serd_uri_serialise(const SerdURI* uri, SerdSink sink, void* stream); /** @} */ -/** @name SerdString - * @brief A measured UTF-8 string. +/** @name SerdNode + * @brief An RDF node. * @{ */ -/** Measured UTF-8 string. */ +/** A syntactic RDF node. */ typedef struct { - size_t n_bytes; ///< Size in bytes including trailing null byte - size_t n_chars; ///< Length in characters - uint8_t buf[]; ///< Buffer -} SerdString; + SerdType type; + size_t n_bytes; ///< Size in bytes including trailing null byte + size_t n_chars; ///< Length in characters + const uint8_t* buf; ///< Buffer +} SerdNode; -/** Create a new UTF-8 string from @a utf8. */ -SERD_API -SerdString* -serd_string_new(const uint8_t* utf8); - -/** Copy @a string. */ -SERD_API -SerdString* -serd_string_copy(const SerdString* str); +static const SerdNode SERD_NODE_NULL = { 0, 0, 0, 0 }; -/** Free @a str. */ -SERD_API -void -serd_string_free(SerdString* str); - -/** Serialise @a uri to a string. */ -SERD_API -SerdString* -serd_string_new_from_uri(const SerdURI* uri, - SerdURI* out); /** @} */ /** @name SerdEnv @@ -215,17 +198,17 @@ serd_env_free(SerdEnv env); /** Add namespace @a uri to @a ns using prefix @a name. */ SERD_API void -serd_env_add(SerdEnv env, - const SerdString* name, - const SerdString* uri); +serd_env_add(SerdEnv env, + const SerdNode* name, + const SerdNode* uri); /** Expand @a qname. */ SERD_API bool -serd_env_expand(const SerdEnv env, - const SerdString* qname, - SerdChunk* uri_prefix, - SerdChunk* uri_suffix); +serd_env_expand(const SerdEnv env, + const SerdNode* qname, + SerdChunk* uri_prefix, + SerdChunk* uri_suffix); /** @} */ /** @name SerdReader @@ -234,31 +217,30 @@ serd_env_expand(const SerdEnv env, */ /** Sink for base URI changes. */ -typedef bool (*SerdBaseSink)(void* handle, - const SerdString* uri); +typedef bool (*SerdBaseSink)(void* handle, + const SerdNode* uri); /** Sink for namespace definitions. */ -typedef bool (*SerdPrefixSink)(void* handle, - const SerdString* name, - const SerdString* uri); +typedef bool (*SerdPrefixSink)(void* handle, + const SerdNode* name, + const SerdNode* uri); /** Sink for statements. */ -typedef bool (*SerdStatementSink)( - void* handle, - const SerdString* graph, SerdType graph_type, - const SerdString* subject, SerdType subject_type, - const SerdString* predicate, SerdType predicate_type, - const SerdString* object, SerdType object_type, - const SerdString* object_lang, - const SerdString* object_datatype); +typedef bool (*SerdStatementSink)(void* handle, + const SerdNode* graph, + const SerdNode* subject, + const SerdNode* predicate, + const SerdNode* object, + const SerdNode* object_datatype, + const SerdNode* object_lang); /** Sink for anonymous node end markers. * This is called to indicate that the anonymous node with the given * @a value will no longer be referred to by any future statements * (i.e. the anonymous serialisation of the node is finished). */ -typedef bool (*SerdEndSink)(void* handle, - const SerdString* value); +typedef bool (*SerdEndSink)(void* handle, + const SerdNode* node); /** Create a new RDF reader. */ SERD_API @@ -317,27 +299,26 @@ serd_writer_set_base_uri(SerdWriter writer, /** Set the current output base URI. */ SERD_API void -serd_writer_set_prefix(SerdWriter writer, - const SerdString* name, - const SerdString* uri); +serd_writer_set_prefix(SerdWriter writer, + const SerdNode* name, + const SerdNode* uri); /** Write a statement. */ SERD_API bool -serd_writer_write_statement( - SerdWriter writer, - const SerdString* graph, SerdType graph_type, - const SerdString* subject, SerdType subject_type, - const SerdString* predicate, SerdType predicate_type, - const SerdString* object, SerdType object_type, - const SerdString* object_datatype, - const SerdString* object_lang); +serd_writer_write_statement(SerdWriter writer, + const SerdNode* graph, + const SerdNode* subject, + const SerdNode* predicate, + const SerdNode* object, + const SerdNode* object_datatype, + const SerdNode* object_lang); /** Mark the end of an anonymous node's description. */ SERD_API bool -serd_writer_end_anon(SerdWriter writer, - const SerdString* subject); +serd_writer_end_anon(SerdWriter writer, + const SerdNode* node); /** Finish a write. */ SERD_API @@ -8,11 +8,11 @@ * * Serd is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public * License for details. * * You should have received a copy of the GNU Lesser General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. + * along with this program. If not, see <http://www.gnu.org/licenses/>. */ #include <assert.h> @@ -20,7 +20,7 @@ #include <stdlib.h> #include <string.h> -#include "serd/serd.h" +#include "serd_internal.h" typedef struct { SerdString* name; @@ -72,29 +72,29 @@ serd_env_find(SerdEnv env, SERD_API void -serd_env_add(SerdEnv env, - const SerdString* name, - const SerdString* uri) +serd_env_add(SerdEnv env, + const SerdNode* name, + const SerdNode* uri) { assert(name && uri); SerdPrefix* const prefix = serd_env_find(env, name->buf, name->n_chars); if (prefix) { serd_string_free(prefix->uri); - prefix->uri = serd_string_copy(uri); + prefix->uri = serd_string_new_from_node(uri); } else { env->prefixes = realloc(env->prefixes, (++env->n_prefixes) * sizeof(SerdPrefix)); - env->prefixes[env->n_prefixes - 1].name = serd_string_copy(name); - env->prefixes[env->n_prefixes - 1].uri = serd_string_copy(uri); + env->prefixes[env->n_prefixes - 1].name = serd_string_new_from_node(name); + env->prefixes[env->n_prefixes - 1].uri = serd_string_new_from_node(uri); } } SERD_API bool -serd_env_expand(const SerdEnv env, - const SerdString* qname, - SerdChunk* uri_prefix, - SerdChunk* uri_suffix) +serd_env_expand(const SerdEnv env, + const SerdNode* qname, + SerdChunk* uri_prefix, + SerdChunk* uri_suffix) { const uint8_t* const colon = memchr(qname->buf, ':', qname->n_bytes); if (!colon) { @@ -102,7 +102,7 @@ serd_env_expand(const SerdEnv env, } const size_t name_len = colon - qname->buf; - const SerdPrefix* const prefix = serd_env_find(env, qname->buf, name_len); + const SerdPrefix* const prefix = serd_env_find(env, qname->buf, name_len); if (prefix) { uri_prefix->buf = prefix->uri->buf; uri_prefix->len = prefix->uri->n_bytes - 1; diff --git a/src/reader.c b/src/reader.c index 0dfdbedc..35067403 100644 --- a/src/reader.c +++ b/src/reader.c @@ -8,11 +8,11 @@ * * Serd is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public * License for details. * * You should have received a copy of the GNU Lesser General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. + * along with this program. If not, see <http://www.gnu.org/licenses/>. */ #include <assert.h> @@ -34,7 +34,7 @@ #define STACK_PAGE_SIZE 4096 #define READ_BUF_LEN 4096 #ifndef NDEBUG -#define STACK_DEBUG 1 +#define STACK_DEBUG 1 #endif typedef struct { @@ -60,7 +60,7 @@ typedef struct { const Node* predicate; } ReadContext; -static const Node SERD_NODE_NULL = { 0, 0, 0, 0 }; +static const Node INTERNAL_NODE_NULL = { 0, 0, 0, 0 }; struct SerdReaderImpl { void* handle; @@ -269,18 +269,42 @@ pop_string(SerdReader reader, Ref ref) } } +static inline SerdNode +public_node_from_ref(SerdReader reader, SerdType type, Ref ref) +{ + if (!ref) { + return SERD_NODE_NULL; + } + const SerdString* str = deref(reader, ref); + const SerdNode public = { type, str->n_bytes, str->n_chars, str->buf }; + return public; +} + +static inline SerdNode +public_node(SerdReader reader, const Node* private) +{ + return public_node_from_ref(reader, private->type, private->value); +} + + static inline bool emit_statement(SerdReader reader, const Node* g, const Node* s, const Node* p, const Node* o) { assert(s->value && p->value && o->value); - return reader->statement_sink( - reader->handle, - g ? deref(reader, g->value) : NULL, g ? g->type : 0, - deref(reader, s->value), s->type, - deref(reader, p->value), p->type, - deref(reader, o->value), o->type, - deref(reader, o->datatype), deref(reader, o->lang)); + const SerdNode graph = g ? public_node(reader, g) : SERD_NODE_NULL; + const SerdNode subject = public_node(reader, s); + const SerdNode predicate = public_node(reader, p); + const SerdNode object = public_node(reader, o); + const SerdNode object_datatype = public_node_from_ref(reader, SERD_URI, o->datatype); + const SerdNode object_lang = public_node_from_ref(reader, SERD_LITERAL, o->lang); + return reader->statement_sink(reader->handle, + &graph, + &subject, + &predicate, + &object, + &object_datatype, + &object_lang); } static bool read_collection(SerdReader reader, ReadContext ctx, Node* dest); @@ -891,7 +915,7 @@ static bool read_literal(SerdReader reader, Node* dest) { Ref str = 0; - Node datatype = SERD_NODE_NULL; + Node datatype = INTERNAL_NODE_NULL; const uint8_t c = peek_byte(reader); if (c == '-' || c == '+' || is_digit(c)) { return read_number(reader, dest); @@ -1000,7 +1024,8 @@ read_blank(SerdReader reader, ReadContext ctx, Node* dest) read_ws_star(reader); eat_byte(reader, ']'); if (reader->end_sink) { - reader->end_sink(reader->handle, deref(reader, dest->value)); + const SerdNode end = public_node(reader, dest); + reader->end_sink(reader->handle, &end); } return true; case '(': @@ -1040,7 +1065,7 @@ read_object(SerdReader reader, ReadContext ctx) uint8_t pre[6]; bool ret = false; bool emit = (ctx.subject != 0); - Node o = SERD_NODE_NULL; + Node o = INTERNAL_NODE_NULL; const uint8_t c = peek_byte(reader); switch (c) { case ')': @@ -1122,7 +1147,7 @@ read_predicateObjectList(SerdReader reader, ReadContext ctx) if (reader->eof) { return false; } - Node predicate = SERD_NODE_NULL; + Node predicate = INTERNAL_NODE_NULL; TRY_RET(read_verb(reader, &predicate)); TRY_THROW(read_ws_plus(reader)); ctx.predicate = &predicate; @@ -1207,7 +1232,7 @@ read_collection(SerdReader reader, ReadContext ctx, Node* dest) static Node read_subject(SerdReader reader, ReadContext ctx) { - Node subject = SERD_NODE_NULL; + Node subject = INTERNAL_NODE_NULL; switch (peek_byte(reader)) { case '[': case '(': case '_': read_blank(reader, ctx, &subject); @@ -1244,7 +1269,8 @@ read_base(SerdReader reader) TRY_RET(read_ws_plus(reader)); Ref uri; TRY_RET(uri = read_uriref(reader)); - reader->base_sink(reader->handle, deref(reader, uri)); + const SerdNode uri_node = public_node_from_ref(reader, SERD_URI, uri); + reader->base_sink(reader->handle, &uri_node); pop_string(reader, uri); return true; } @@ -1266,9 +1292,9 @@ read_prefixID(SerdReader reader) read_ws_star(reader); Ref uri = 0; TRY_THROW(uri = read_uriref(reader)); - ret = reader->prefix_sink(reader->handle, - deref(reader, name), - deref(reader, uri)); + const SerdNode name_node = public_node_from_ref(reader, SERD_LITERAL, name); + const SerdNode uri_node = public_node_from_ref(reader, SERD_URI, uri); + ret = reader->prefix_sink(reader->handle, &name_node, &uri_node); pop_string(reader, uri); except: pop_string(reader, name); @@ -1352,7 +1378,7 @@ serd_reader_new(SerdSyntax syntax, memset(reader->read_buf, '\0', READ_BUF_LEN * 2); - /* Read into the second page of the buffer. Occasionally readahead + /* Read into the second page of the buffer. Occasionally readahead will move the read_head to before this point when readahead causes a page fault. */ diff --git a/src/serd_internal.h b/src/serd_internal.h index 49db82d5..1b601e12 100644 --- a/src/serd_internal.h +++ b/src/serd_internal.h @@ -8,11 +8,11 @@ * * Serd is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public * License for details. * * You should have received a copy of the GNU Lesser General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. + * along with this program. If not, see <http://www.gnu.org/licenses/>. */ #ifndef SERD_INTERNAL_H @@ -30,7 +30,7 @@ typedef struct { size_t size; ///< Conceptual size of stack in buf } SerdStack; -/** An offset to start the stack at. Note 0 is reserved for NULL. */ +/** An offset to start the stack at. Note 0 is reserved for NULL. */ #define SERD_STACK_BOTTOM sizeof(void*) static inline SerdStack @@ -99,4 +99,27 @@ is_digit(const uint8_t c) return in_range(c, '0', '9'); } +/** Measured UTF-8 string. */ +typedef struct { + size_t n_bytes; ///< Size in bytes including trailing null byte + size_t n_chars; ///< Length in characters + uint8_t buf[]; ///< Buffer +} SerdString; + +#if 0 +/** Create a new UTF-8 string from @a utf8. */ +SerdString* +serd_string_new(const uint8_t* utf8); + +/** Copy @a string. */ +SerdString* +serd_string_copy(const SerdString* str); +#endif + +void +serd_string_free(SerdString* str); + +SerdString* +serd_string_new_from_node(const SerdNode* node); + #endif // SERD_INTERNAL_H diff --git a/src/serdi.c b/src/serdi.c index d8b88dd0..810de74c 100644 --- a/src/serdi.c +++ b/src/serdi.c @@ -8,11 +8,11 @@ * * Serd is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public * License for details. * * You should have received a copy of the GNU Lesser General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. + * along with this program. If not, see <http://www.gnu.org/licenses/>. */ #include <assert.h> @@ -21,103 +21,174 @@ #include "serd/serd.h" +// #define URI_DEBUG 1 + typedef struct { - SerdWriter writer; - SerdEnv env; - SerdString* base_uri_str; - SerdURI base_uri; + SerdWriter writer; + SerdEnv env; + SerdNode base_uri_node; + SerdURI base_uri; } State; -static bool -event_base(void* handle, - const SerdString* uri_str) +static size_t +serd_uri_string_length(const SerdURI* uri) { - State* const state = (State*)handle; + size_t len = uri->path_base.len; + +#define ADD_LEN(field, n_delims) \ + if ((field).len) { len += (field).len + (n_delims); } + + ADD_LEN(uri->path, 1); // + possible leading `/' + ADD_LEN(uri->scheme, 1); // + trailing `:' + ADD_LEN(uri->authority, 2); // + leading `//' + ADD_LEN(uri->query, 1); // + leading `?' + ADD_LEN(uri->fragment, 1); // + leading `#' + + return len; +} + +static size_t +string_sink(const void* buf, size_t len, void* stream) +{ + uint8_t** ptr = (uint8_t**)stream; + memcpy(*ptr, buf, len); + *ptr += len; + return len; +} + +static SerdNode +serd_node_new_uri(const SerdURI* uri, SerdURI* out) +{ + const size_t len = serd_uri_string_length(uri); + uint8_t* buf = malloc(len + 1); + + SerdNode node = { SERD_URI, len + 1, len, buf }; // FIXME: UTF-8 + + uint8_t* ptr = buf; + const size_t actual_len = serd_uri_serialise(uri, string_sink, &ptr); + + buf[actual_len] = '\0'; + node.n_bytes = actual_len + 1; + node.n_chars = actual_len; + + // FIXME: double parse + if (!serd_uri_parse(buf, out)) { + fprintf(stderr, "error parsing URI\n"); + return SERD_NODE_NULL; + } - SerdURI uri; - if (!serd_uri_parse(uri_str->buf, &uri)) { + #ifdef URI_DEBUG + fwrite("URI: `'", 1, 6, stderr); + fwrite(node.buf, 1, node.n_bytes - 1, stderr); + fwrite("'\n", 1, 2, stderr); + #endif + + return node; +} + +static uint8_t* +copy_string(const uint8_t* str, size_t* n_bytes) +{ + const size_t len = strlen((const char*)str); + uint8_t* const ret = malloc(len + 1); + memcpy(ret, str, len + 1); + *n_bytes = len + 1; + return ret; +} + +#if 0 +static SerdNode +serd_node_copy(const SerdNode* node) +{ + SerdNode copy = *node; + uint8_t* buf = malloc(copy.n_bytes); + memcpy(buf, node->buf, copy.n_bytes); + copy.buf = buf; + return copy; +} + +static void +serd_node_free(SerdNode* node) +{ + free((uint8_t*)node->buf); // FIXME: const cast +} +#endif + +static bool +event_base(void* handle, + const SerdNode* uri_node) +{ + State* const state = (State*)handle; + SerdNode base_uri_node = *uri_node; + SerdURI base_uri; + if (!serd_uri_parse(uri_node->buf, &base_uri)) { return false; } - SerdURI base_uri = SERD_URI_NULL; - SerdString* base_uri_str; - if (!uri.scheme.len) { - // URI has no scheme (relative by definition), resolve + if (!base_uri.scheme.len) { // URI has no scheme, resolve relative URI SerdURI abs_base_uri; - if (!serd_uri_resolve(&uri, &state->base_uri, &abs_base_uri)) { + if (!serd_uri_resolve(&base_uri, &state->base_uri, &abs_base_uri)) { fprintf(stderr, "error: failed to resolve new base URI\n"); return false; } - base_uri_str = serd_string_new_from_uri(&abs_base_uri, &base_uri); - // FIXME: double parse - serd_uri_parse(base_uri_str->buf, &base_uri); + base_uri_node = serd_node_new_uri(&abs_base_uri, &base_uri); } else { - // Absolute URI, use literally as new base URI - base_uri_str = serd_string_copy(uri_str); - // FIXME: double parse - serd_uri_parse(base_uri_str->buf, &base_uri); + SerdURI new_base_uri; + base_uri_node = serd_node_new_uri(&base_uri, &new_base_uri); + base_uri = new_base_uri; } - // Replace the old base URI - serd_string_free(state->base_uri_str); - state->base_uri_str = base_uri_str; - state->base_uri = base_uri; + state->base_uri_node = base_uri_node; + state->base_uri = base_uri; serd_writer_set_base_uri(state->writer, &base_uri); - return true; } static bool -event_prefix(void* handle, - const SerdString* name, - const SerdString* uri_string) +event_prefix(void* handle, + const SerdNode* name, + const SerdNode* uri_node) { State* const state = (State*)handle; - if (!serd_uri_string_has_scheme(uri_string->buf)) { + if (!serd_uri_string_has_scheme(uri_node->buf)) { SerdURI uri; - if (!serd_uri_parse(uri_string->buf, &uri)) { + if (!serd_uri_parse(uri_node->buf, &uri)) { return false; } SerdURI abs_uri; if (!serd_uri_resolve(&uri, &state->base_uri, &abs_uri)) { return false; } - SerdURI new_abs_uri; - SerdString* abs_uri_string = serd_string_new_from_uri(&abs_uri, &new_abs_uri); - serd_env_add(state->env, name, abs_uri_string); - serd_string_free(abs_uri_string); + SerdURI base_uri; + SerdNode base_uri_node = serd_node_new_uri(&abs_uri, &base_uri); + serd_env_add(state->env, name, &base_uri_node); } else { - serd_env_add(state->env, name, uri_string); + serd_env_add(state->env, name, uri_node); } - serd_writer_set_prefix(state->writer, name, uri_string); - + serd_writer_set_prefix(state->writer, name, uri_node); return true; } static bool -event_statement(void* handle, - const SerdString* graph, SerdType graph_type, - const SerdString* subject, SerdType subject_type, - const SerdString* predicate, SerdType predicate_type, - const SerdString* object, SerdType object_type, - const SerdString* object_datatype, - const SerdString* object_lang) +event_statement(void* handle, + const SerdNode* graph, + const SerdNode* subject, + const SerdNode* predicate, + const SerdNode* object, + const SerdNode* object_datatype, + const SerdNode* object_lang) { - State* const state = (State*)handle; - return serd_writer_write_statement(state->writer, - graph, graph_type, - subject, subject_type, - predicate, predicate_type, - object, object_type, - object_datatype, object_lang); + return serd_writer_write_statement( + ((State*)handle)->writer, + graph, subject, predicate, object, object_datatype, object_lang); } static bool -event_end(void* handle, - const SerdString* subject) +event_end(void* handle, + const SerdNode* node) { State* const state = (State*)handle; - return serd_writer_end_anon(state->writer, subject); + return serd_writer_end_anon(state->writer, node); } int @@ -184,20 +255,23 @@ main(int argc, char** argv) } } - SerdString* base_uri_str = NULL; - SerdURI base_uri; + uint8_t* base_uri_str = NULL; + size_t base_uri_n_bytes = 0; + SerdURI base_uri; if (a < argc) { // Base URI given on command line const uint8_t* const in_base_uri = (const uint8_t*)argv[a++]; if (!serd_uri_parse((const uint8_t*)in_base_uri, &base_uri)) { fprintf(stderr, "invalid base URI `%s'\n", argv[2]); return 1; } - base_uri_str = serd_string_new(in_base_uri); + base_uri_str = copy_string(in_base_uri, &base_uri_n_bytes); } else { // Use input file URI - base_uri_str = serd_string_new(in_filename); + base_uri_str = copy_string(in_filename, &base_uri_n_bytes); } - serd_uri_parse(base_uri_str->buf, &base_uri); + if (!serd_uri_parse(base_uri_str, &base_uri)) { + fprintf(stderr, "invalid base URI `%s'\n", base_uri_str); + } if (!in_fd) { in_fd = fopen((const char*)in_filename, "r"); @@ -216,10 +290,15 @@ main(int argc, char** argv) ? SERD_STYLE_ASCII : SERD_STYLE_ABBREVIATED; + const SerdNode base_uri_node = { SERD_URI, + base_uri_n_bytes, + base_uri_n_bytes - 1, + base_uri_str }; + State state = { serd_writer_new(output_syntax, output_style, env, &base_uri, file_sink, out_fd), - env, base_uri_str, base_uri + env, base_uri_node, base_uri }; SerdReader reader = serd_reader_new( @@ -233,7 +312,7 @@ main(int argc, char** argv) serd_writer_free(state.writer); serd_env_free(state.env); - serd_string_free(state.base_uri_str); + free(base_uri_str); if (success) { return 0; diff --git a/src/string.c b/src/string.c index 0f5d2430..77346676 100644 --- a/src/string.c +++ b/src/string.c @@ -8,11 +8,11 @@ * * Serd is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public * License for details. * * You should have received a copy of the GNU Lesser General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. + * along with this program. If not, see <http://www.gnu.org/licenses/>. */ #include <assert.h> @@ -20,8 +20,9 @@ #include <stdlib.h> #include <string.h> -#include "serd/serd.h" +#include "serd_internal.h" +#if 0 static inline size_t utf8_strlen(const uint8_t* utf8, size_t* out_n_bytes) { @@ -38,20 +39,37 @@ utf8_strlen(const uint8_t* utf8, size_t* out_n_bytes) } return n_chars; } +#endif +static SerdString* +serd_string_new_measured(const uint8_t* utf8, size_t n_bytes, size_t n_chars) +{ + SerdString* const str = malloc(sizeof(SerdString) + n_bytes); + str->n_bytes = n_bytes; + str->n_chars = n_chars; + memcpy(str->buf, utf8, n_bytes); + return str; +} + +#if 0 SERD_API SerdString* serd_string_new(const uint8_t* utf8) { size_t n_bytes; size_t n_chars = utf8_strlen(utf8, &n_bytes); - SerdString* const str = malloc(sizeof(SerdString) + n_bytes); - str->n_bytes = n_bytes; - str->n_chars = n_chars; - memcpy(str->buf, utf8, str->n_bytes); - return str; + return serd_string_new_measured(utf8, n_bytes, n_chars); +} +#endif + +SERD_API +SerdString* +serd_string_new_from_node(const SerdNode* node) +{ + return serd_string_new_measured(node->buf, node->n_bytes, node->n_chars); } +#if 0 SERD_API SerdString* serd_string_copy(const SerdString* s) @@ -63,6 +81,7 @@ serd_string_copy(const SerdString* s) } return NULL; } +#endif SERD_API void @@ -8,11 +8,11 @@ * * Serd is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public * License for details. * * You should have received a copy of the GNU Lesser General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. + * along with this program. If not, see <http://www.gnu.org/licenses/>. */ #include <assert.h> @@ -338,55 +338,3 @@ serd_uri_serialise(const SerdURI* uri, SerdSink sink, void* stream) } return write_size; } - - -static size_t -serd_uri_string_length(const SerdURI* uri) -{ - size_t len = uri->path_base.len; - -#define ADD_LEN(field, n_delims) \ - if ((field).len) { len += (field).len + (n_delims); } - - ADD_LEN(uri->path, 1); // + possible leading `/' - ADD_LEN(uri->scheme, 1); // + trailing `:' - ADD_LEN(uri->authority, 2); // + leading `//' - ADD_LEN(uri->query, 1); // + leading `?' - ADD_LEN(uri->fragment, 1); // + leading `#' - - return len; -} - -static size_t -string_sink(const void* buf, size_t len, void* stream) -{ - uint8_t** ptr = (uint8_t**)stream; - memcpy(*ptr, buf, len); - *ptr += len; - return len; -} - -SERD_API -SerdString* -serd_string_new_from_uri(const SerdURI* uri, SerdURI* out) -{ - const size_t len = serd_uri_string_length(uri); - SerdString* str = malloc(sizeof(SerdString) + len + 1); - str->n_bytes = len + 1; - str->n_chars = len; // FIXME: UTF-8 - - uint8_t* ptr = str->buf; - const size_t actual_len = serd_uri_serialise(uri, string_sink, &ptr); - - str->buf[actual_len] = '\0'; - str->n_bytes = actual_len + 1; - str->n_chars = str->n_bytes - 1; // FIXME: UTF-8 - - #ifdef URI_DEBUG - fwrite("URI: `'", 1, 6, stderr); - fwrite(str->buf, 1, str->n_bytes - 1, stderr); - fwrite("'\n", 1, 2, stderr); - #endif - - return str; -} diff --git a/src/writer.c b/src/writer.c index 194d26dd..1de08a4c 100644 --- a/src/writer.c +++ b/src/writer.c @@ -8,11 +8,11 @@ * * Serd is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public * License for details. * * You should have received a copy of the GNU Lesser General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. + * along with this program. If not, see <http://www.gnu.org/licenses/>. */ #include <assert.h> @@ -22,12 +22,14 @@ #include "serd_internal.h" typedef struct { - const SerdString* graph; - const SerdString* subject; - const SerdString* predicate; + SerdNode graph; + SerdNode subject; + SerdNode predicate; } WriteContext; -static const WriteContext WRITE_CONTEXT_NULL = { 0, 0, 0 }; +static const WriteContext WRITE_CONTEXT_NULL = { + { 0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0} +}; struct SerdWriterImpl { SerdSyntax syntax; @@ -162,15 +164,14 @@ serd_writer_write_delim(SerdWriter writer, const uint8_t delim) } static bool -write_node(SerdWriter writer, - SerdType type, - const SerdString* str, - const SerdString* datatype, - const SerdString* lang) +write_node(SerdWriter writer, + const SerdNode* node, + const SerdNode* datatype, + const SerdNode* lang) { SerdChunk uri_prefix; SerdChunk uri_suffix; - switch (type) { + switch (node->type) { case SERD_ANON_BEGIN: if (writer->syntax != SERD_NTRIPLES) { ++writer->indent; @@ -178,8 +179,8 @@ write_node(SerdWriter writer, WriteContext* ctx = (WriteContext*)serd_stack_push( &writer->anon_stack, sizeof(WriteContext)); *ctx = writer->context; - writer->context.subject = str; - writer->context.predicate = 0; + writer->context.subject = *node; + writer->context.predicate = SERD_NODE_NULL; break; } case SERD_ANON: @@ -188,13 +189,13 @@ write_node(SerdWriter writer, } // else fall through case SERD_BLANK_ID: writer->sink("_:", 2, writer->stream); - writer->sink(str->buf, str->n_bytes - 1, writer->stream); + writer->sink(node->buf, node->n_bytes - 1, writer->stream); break; case SERD_CURIE: switch (writer->syntax) { case SERD_NTRIPLES: - if (!serd_env_expand(writer->env, str, &uri_prefix, &uri_suffix)) { - fprintf(stderr, "error: undefined namespace prefix `%s'\n", str->buf); + if (!serd_env_expand(writer->env, node, &uri_prefix, &uri_suffix)) { + fprintf(stderr, "error: undefined namespace prefix `%s'\n", node->buf); return false; } writer->sink("<", 1, writer->stream); @@ -203,25 +204,25 @@ write_node(SerdWriter writer, writer->sink(">", 1, writer->stream); break; case SERD_TURTLE: - writer->sink(str->buf, str->n_bytes - 1, writer->stream); + writer->sink(node->buf, node->n_bytes - 1, writer->stream); } break; case SERD_LITERAL: writer->sink("\"", 1, writer->stream); - write_text(writer, WRITE_STRING, str->buf, str->n_bytes - 1, '"'); + write_text(writer, WRITE_STRING, node->buf, node->n_bytes - 1, '"'); writer->sink("\"", 1, writer->stream); - if (lang) { + if (lang->buf) { writer->sink("@", 1, writer->stream); writer->sink(lang->buf, lang->n_bytes - 1, writer->stream); - } else if (datatype) { + } else if (datatype->buf) { writer->sink("^^", 2, writer->stream); - write_node(writer, SERD_URI, datatype, NULL, NULL); + write_node(writer, datatype, NULL, NULL); } break; case SERD_URI: - if (!serd_uri_string_has_scheme(str->buf)) { + if (!serd_uri_string_has_scheme(node->buf)) { SerdURI uri; - if (serd_uri_parse(str->buf, &uri)) { + if (serd_uri_parse(node->buf, &uri)) { SerdURI abs_uri; if (serd_uri_resolve(&uri, &writer->base_uri, &abs_uri)) { writer->sink("<", 1, writer->stream); @@ -232,7 +233,7 @@ write_node(SerdWriter writer, } } else { writer->sink("<", 1, writer->stream); - write_text(writer, WRITE_URI, str->buf, str->n_bytes - 1, '>'); + write_text(writer, WRITE_URI, node->buf, node->n_bytes - 1, '>'); writer->sink(">", 1, writer->stream); return true; } @@ -243,23 +244,22 @@ write_node(SerdWriter writer, SERD_API bool -serd_writer_write_statement( - SerdWriter writer, - const SerdString* graph, SerdType graph_type, - const SerdString* subject, SerdType subject_type, - const SerdString* predicate, SerdType predicate_type, - const SerdString* object, SerdType object_type, - const SerdString* object_datatype, - const SerdString* object_lang) +serd_writer_write_statement(SerdWriter writer, + const SerdNode* graph, + const SerdNode* subject, + const SerdNode* predicate, + const SerdNode* object, + const SerdNode* object_datatype, + const SerdNode* object_lang) { assert(subject && predicate && object); switch (writer->syntax) { case SERD_NTRIPLES: - write_node(writer, subject_type, subject, NULL, NULL); + write_node(writer, subject, NULL, NULL); writer->sink(" ", 1, writer->stream); - write_node(writer, predicate_type, predicate, NULL, NULL); + write_node(writer, predicate, NULL, NULL); writer->sink(" ", 1, writer->stream); - if (!write_node(writer, object_type, object, object_datatype, object_lang)) { + if (!write_node(writer, object, object_datatype, object_lang)) { return false; } writer->sink(" .\n", 3, writer->stream); @@ -267,27 +267,26 @@ serd_writer_write_statement( case SERD_TURTLE: break; } - - if (subject == writer->context.subject) { - if (predicate == writer->context.predicate) { // Abbreviate S P + if (subject->buf == writer->context.subject.buf) { + if (predicate->buf == writer->context.predicate.buf) { // Abbreviate S P ++writer->indent; serd_writer_write_delim(writer, ','); - write_node(writer, object_type, object, object_datatype, object_lang); + write_node(writer, object, object_datatype, object_lang); --writer->indent; } else { // Abbreviate S - if (writer->context.predicate) { + if (writer->context.predicate.buf) { serd_writer_write_delim(writer, ';'); } else { ++writer->indent; serd_writer_write_delim(writer, '\n'); } - write_node(writer, predicate_type, predicate, NULL, NULL); - writer->context.predicate = predicate; + write_node(writer, predicate, NULL, NULL); + writer->context.predicate = *predicate; writer->sink(" ", 1, writer->stream); - write_node(writer, object_type, object, object_datatype, object_lang); + write_node(writer, object, object_datatype, object_lang); } } else { - if (writer->context.subject) { + if (writer->context.subject.buf) { if (writer->indent > 0) { --writer->indent; } @@ -297,41 +296,39 @@ serd_writer_write_statement( } } - if (subject_type == SERD_ANON_BEGIN) { + if (subject->type == SERD_ANON_BEGIN) { writer->sink("[ ", 2, writer->stream); ++writer->indent; WriteContext* ctx = (WriteContext*)serd_stack_push( &writer->anon_stack, sizeof(WriteContext)); *ctx = writer->context; - writer->context.subject = subject; - writer->context.predicate = 0; } else { - write_node(writer, subject_type, subject, NULL, NULL); + write_node(writer, subject, NULL, NULL); ++writer->indent; - if (subject_type != SERD_ANON_BEGIN && subject_type != SERD_ANON) { + if (subject->type != SERD_ANON_BEGIN && subject->type != SERD_ANON) { serd_writer_write_delim(writer, '\n'); } } - writer->context.subject = subject; - writer->context.predicate = 0; + writer->context.subject = *subject; + writer->context.predicate = SERD_NODE_NULL; - write_node(writer, predicate_type, predicate, NULL, NULL); - writer->context.predicate = predicate; + write_node(writer, predicate, NULL, NULL); + writer->context.predicate = *predicate; writer->sink(" ", 1, writer->stream); - write_node(writer, object_type, object, object_datatype, object_lang); + write_node(writer, object, object_datatype, object_lang); } - const WriteContext new_context = { graph, subject, predicate }; + const WriteContext new_context = { *graph, *subject, *predicate }; writer->context = new_context; return true; } SERD_API bool -serd_writer_end_anon(SerdWriter writer, - const SerdString* subject) +serd_writer_end_anon(SerdWriter writer, + const SerdNode* node) { if (writer->syntax == SERD_NTRIPLES) { return true; @@ -346,8 +343,8 @@ serd_writer_end_anon(SerdWriter writer, writer->sink("]", 1, writer->stream); writer->context = *anon_stack_top(writer); serd_stack_pop(&writer->anon_stack, sizeof(WriteContext)); - if (!writer->context.subject) { // End of anonymous subject - writer->context.subject = subject; + if (!writer->context.subject.buf) { // End of anonymous subject + writer->context.subject = *node; } return true; } @@ -356,7 +353,7 @@ SERD_API void serd_writer_finish(SerdWriter writer) { - if (writer->context.subject) { + if (writer->context.subject.buf) { writer->sink(" .\n", 3, writer->stream); } } @@ -391,7 +388,7 @@ serd_writer_set_base_uri(SerdWriter writer, { writer->base_uri = *uri; if (writer->syntax != SERD_NTRIPLES) { - if (writer->context.graph || writer->context.subject) { + if (writer->context.graph.buf || writer->context.subject.buf) { writer->sink(" .\n\n", 4, writer->stream); writer->context = WRITE_CONTEXT_NULL; } @@ -404,12 +401,12 @@ serd_writer_set_base_uri(SerdWriter writer, SERD_API void -serd_writer_set_prefix(SerdWriter writer, - const SerdString* name, - const SerdString* uri) +serd_writer_set_prefix(SerdWriter writer, + const SerdNode* name, + const SerdNode* uri) { if (writer->syntax != SERD_NTRIPLES) { - if (writer->context.graph || writer->context.subject) { + if (writer->context.graph.buf || writer->context.subject.buf) { writer->sink(" .\n\n", 4, writer->stream); writer->context = WRITE_CONTEXT_NULL; } |