aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Robillard <d@drobilla.net>2018-04-29 14:07:29 +0200
committerDavid Robillard <d@drobilla.net>2018-05-27 18:21:57 +0200
commitaa41376304f135bfc54d2b5c16fa8ecd7302ad24 (patch)
tree76f1eba1a5f7dd5ee1e56c74840b8babc4a0e474
parent9252a3f52012e6199b7a27b5c329c226614cc127 (diff)
downloadserd-aa41376304f135bfc54d2b5c16fa8ecd7302ad24.tar.gz
serd-aa41376304f135bfc54d2b5c16fa8ecd7302ad24.tar.bz2
serd-aa41376304f135bfc54d2b5c16fa8ecd7302ad24.zip
Clean up and separate internal headers
-rw-r--r--src/byte_sink.h93
-rw-r--r--src/byte_source.c2
-rw-r--r--src/byte_source.h79
-rw-r--r--src/env.c3
-rw-r--r--src/n3.c4
-rw-r--r--src/node.c13
-rw-r--r--src/node.h44
-rw-r--r--src/reader.c2
-rw-r--r--src/reader.h87
-rw-r--r--src/serd_internal.h502
-rw-r--r--src/serdi.c2
-rw-r--r--src/stack.h113
-rw-r--r--src/string.c30
-rw-r--r--src/string_utils.h165
-rw-r--r--src/uri.c2
-rw-r--r--src/uri_utils.h81
-rw-r--r--src/writer.c14
-rw-r--r--wscript32
18 files changed, 719 insertions, 549 deletions
diff --git a/src/byte_sink.h b/src/byte_sink.h
new file mode 100644
index 00000000..dca08825
--- /dev/null
+++ b/src/byte_sink.h
@@ -0,0 +1,93 @@
+/*
+ Copyright 2011-2018 David Robillard <http://drobilla.net>
+
+ Permission to use, copy, modify, and/or distribute this software for any
+ purpose with or without fee is hereby granted, provided that the above
+ copyright notice and this permission notice appear in all copies.
+
+ THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+*/
+
+#ifndef SERD_BYTE_SINK_H
+#define SERD_BYTE_SINK_H
+
+#include <stddef.h>
+#include <string.h>
+
+#include "serd/serd.h"
+
+typedef struct SerdByteSinkImpl {
+ SerdSink sink;
+ void* stream;
+ char* buf;
+ size_t size;
+ size_t block_size;
+} SerdByteSink;
+
+static inline SerdByteSink
+serd_byte_sink_new(SerdSink sink, void* stream, size_t block_size)
+{
+ SerdByteSink bsink;
+ bsink.sink = sink;
+ bsink.stream = stream;
+ bsink.size = 0;
+ bsink.block_size = block_size;
+ bsink.buf = ((block_size > 1)
+ ? (char*)serd_bufalloc(block_size)
+ : NULL);
+ return bsink;
+}
+
+static inline void
+serd_byte_sink_flush(SerdByteSink* bsink)
+{
+ if (bsink->block_size > 1 && bsink->size > 0) {
+ bsink->sink(bsink->buf, bsink->size, bsink->stream);
+ bsink->size = 0;
+ }
+}
+
+static inline void
+serd_byte_sink_free(SerdByteSink* bsink)
+{
+ serd_byte_sink_flush(bsink);
+ free(bsink->buf);
+ bsink->buf = NULL;
+}
+
+static inline size_t
+serd_byte_sink_write(const void* buf, size_t len, SerdByteSink* bsink)
+{
+ if (len == 0) {
+ return 0;
+ } else if (bsink->block_size == 1) {
+ return bsink->sink(buf, len, bsink->stream);
+ }
+
+ const size_t orig_len = len;
+ while (len) {
+ const size_t space = bsink->block_size - bsink->size;
+ const size_t n = MIN(space, len);
+
+ // Write as much as possible into the remaining buffer space
+ memcpy(bsink->buf + bsink->size, buf, n);
+ bsink->size += n;
+ buf = (const char*)buf + n;
+ len -= n;
+
+ // Flush page if buffer is full
+ if (bsink->size == bsink->block_size) {
+ bsink->sink(bsink->buf, bsink->block_size, bsink->stream);
+ bsink->size = 0;
+ }
+ }
+ return orig_len;
+}
+
+#endif // SERD_BYTE_SINK_H
diff --git a/src/byte_source.c b/src/byte_source.c
index 74ce14a4..b77c1885 100644
--- a/src/byte_source.c
+++ b/src/byte_source.c
@@ -16,6 +16,8 @@
#include "serd_internal.h"
+#include "byte_source.h"
+
static inline SerdStatus
serd_byte_source_page(SerdByteSource* source)
{
diff --git a/src/byte_source.h b/src/byte_source.h
new file mode 100644
index 00000000..c6c4702e
--- /dev/null
+++ b/src/byte_source.h
@@ -0,0 +1,79 @@
+/*
+ Copyright 2011-2018 David Robillard <http://drobilla.net>
+
+ Permission to use, copy, modify, and/or distribute this software for any
+ purpose with or without fee is hereby granted, provided that the above
+ copyright notice and this permission notice appear in all copies.
+
+ THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+*/
+
+#ifndef SERD_BYTE_SOURCE_H
+#define SERD_BYTE_SOURCE_H
+
+#include "serd_internal.h"
+
+#include <assert.h>
+#include <stddef.h>
+#include <stdio.h>
+
+typedef struct {
+ const char* filename;
+ unsigned line;
+ unsigned col;
+} Cursor;
+
+typedef struct {
+ SerdSource read_func; ///< Read function (e.g. fread)
+ SerdStreamErrorFunc error_func; ///< Error function (e.g. ferror)
+ void* stream; ///< Stream (e.g. FILE)
+ size_t page_size; ///< Number of bytes to read at a time
+ Cursor cur; ///< Cursor for error reporting
+ char* file_buf; ///< Buffer iff reading pages from a file
+ const char* read_buf; ///< Pointer to file_buf or read_byte
+ size_t read_head; ///< Offset into read_buf
+ char read_byte; ///< 1-byte 'buffer' used when not paging
+ bool from_stream; ///< True iff reading from `stream`
+ bool prepared; ///< True iff prepared for reading
+ bool eof; ///< True iff end of file reached
+} SerdByteSource;
+
+SerdStatus
+serd_byte_source_open_file(SerdByteSource* source,
+ FILE* file,
+ bool bulk);
+
+SerdStatus
+serd_byte_source_open_string(SerdByteSource* source, const char* utf8);
+
+SerdStatus
+serd_byte_source_open_source(SerdByteSource* source,
+ SerdSource read_func,
+ SerdStreamErrorFunc error_func,
+ void* stream,
+ const char* name,
+ size_t page_size);
+
+SerdStatus
+serd_byte_source_close(SerdByteSource* source);
+
+SerdStatus
+serd_byte_source_prepare(SerdByteSource* source);
+
+static inline uint8_t
+serd_byte_source_peek(SerdByteSource* source)
+{
+ assert(source->prepared);
+ return source->read_buf[source->read_head];
+}
+
+SerdStatus
+serd_byte_source_advance(SerdByteSource* source);
+
+#endif // SERD_BYTE_SOURCE_H
diff --git a/src/env.c b/src/env.c
index 6abcef72..fb1fd31d 100644
--- a/src/env.c
+++ b/src/env.c
@@ -19,6 +19,9 @@
#include <stdlib.h>
#include <string.h>
+#include "node.h"
+#include "string_utils.h"
+
typedef struct {
SerdNode* name;
SerdNode* uri;
diff --git a/src/n3.c b/src/n3.c
index 1f4a24ff..7b91f62d 100644
--- a/src/n3.c
+++ b/src/n3.c
@@ -25,6 +25,8 @@
#include <string.h>
#include "reader.h"
+#include "string_utils.h"
+#include "uri_utils.h"
#define TRY_THROW(exp) if (!(exp)) goto except;
#define TRY_RET(exp) if (!(exp)) return 0;
@@ -590,7 +592,7 @@ static bool
read_IRIREF_scheme(SerdReader* reader, Ref dest)
{
uint8_t c = peek_byte(reader);
- if (!isalpha(c)) {
+ if (!is_alpha(c)) {
return r_err(reader, SERD_ERR_BAD_SYNTAX,
"bad IRI scheme start `%c'\n", c);
}
diff --git a/src/node.c b/src/node.c
index 664680a9..3f841b18 100644
--- a/src/node.c
+++ b/src/node.c
@@ -16,11 +16,14 @@
#include "serd_internal.h"
+#include <assert.h>
+#include <float.h>
+#include <math.h>
#include <stdlib.h>
#include <string.h>
-#include <math.h>
-#include <float.h>
+#include "node.h"
+#include "string_utils.h"
#ifdef _WIN32
# ifndef isnan
@@ -64,12 +67,6 @@ serd_node_malloc(size_t n_bytes, SerdNodeFlags flags, SerdType type)
return node;
}
-char*
-serd_node_buffer(SerdNode* node)
-{
- return (char*)(node + 1);
-}
-
void
serd_node_set(SerdNode** dst, const SerdNode* src)
{
diff --git a/src/node.h b/src/node.h
new file mode 100644
index 00000000..a9380bcb
--- /dev/null
+++ b/src/node.h
@@ -0,0 +1,44 @@
+/*
+ Copyright 2011-2018 David Robillard <http://drobilla.net>
+
+ Permission to use, copy, modify, and/or distribute this software for any
+ purpose with or without fee is hereby granted, provided that the above
+ copyright notice and this permission notice appear in all copies.
+
+ THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+*/
+
+#ifndef SERD_NODE_H
+#define SERD_NODE_H
+
+#include "serd_internal.h"
+
+struct SerdNodeImpl {
+ size_t n_bytes; /**< Size in bytes (not including null) */
+ SerdNodeFlags flags; /**< Node flags (e.g. string properties) */
+ SerdType type; /**< Node type */
+};
+
+static inline char*
+serd_node_buffer(SerdNode* node)
+{
+ return (char*)(node + 1);
+}
+
+static inline const char*
+serd_node_buffer_c(const SerdNode* node)
+{
+ return (const char*)(node + 1);
+}
+
+SerdNode* serd_node_malloc(size_t n_bytes, SerdNodeFlags flags, SerdType type);
+void serd_node_set(SerdNode** dst, const SerdNode* src);
+size_t serd_node_total_size(const SerdNode* node);
+
+#endif // SERD_NODE_H
diff --git a/src/reader.c b/src/reader.c
index eb9ad516..b4cf4f34 100644
--- a/src/reader.c
+++ b/src/reader.c
@@ -24,6 +24,8 @@
#include <stdlib.h>
#include <string.h>
+#include "reader.h"
+
static SerdStatus serd_reader_prepare(SerdReader* reader);
int
diff --git a/src/reader.h b/src/reader.h
index d5c8595b..842ba8b8 100644
--- a/src/reader.h
+++ b/src/reader.h
@@ -14,8 +14,93 @@
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
+#ifndef SERD_READER_H
+#define SERD_READER_H
+
#include "serd_internal.h"
+#include "byte_source.h"
+#include "node.h"
+#include "stack.h"
+
+#ifdef SERD_STACK_CHECK
+# define SERD_STACK_ASSERT_TOP(reader, ref) \
+ assert(ref == reader->allocs[reader->n_allocs - 1]);
+#else
+# define SERD_STACK_ASSERT_TOP(reader, ref)
+#endif
+
+/* Reference to a node in the stack (we can not use pointers since the
+ stack may be reallocated, invalidating any pointers to elements).
+*/
+typedef size_t Ref;
+
+typedef struct {
+ Ref graph;
+ Ref subject;
+ Ref predicate;
+ Ref object;
+ Ref datatype;
+ Ref lang;
+ SerdStatementFlags* flags;
+} ReadContext;
+
+struct SerdReaderImpl {
+ void* handle;
+ void (*free_handle)(void* ptr);
+ SerdBaseSink base_sink;
+ SerdPrefixSink prefix_sink;
+ SerdStatementSink statement_sink;
+ SerdEndSink end_sink;
+ SerdErrorSink error_sink;
+ void* error_handle;
+ Ref rdf_first;
+ Ref rdf_rest;
+ Ref rdf_nil;
+ SerdNode* default_graph;
+ SerdByteSource source;
+ SerdStack stack;
+ SerdSyntax syntax;
+ unsigned next_id;
+ SerdStatus status;
+ uint8_t* buf;
+ char* bprefix;
+ size_t bprefix_len;
+ bool strict; ///< True iff strict parsing
+ bool seen_genid;
+#ifdef SERD_STACK_CHECK
+ Ref* allocs; ///< Stack of push offsets
+ size_t n_allocs; ///< Number of stack pushes
+#endif
+};
+
+int r_err(SerdReader* reader, SerdStatus st, const char* fmt, ...);
+
+Ref push_node_padded(SerdReader* reader,
+ size_t maxlen,
+ SerdType type,
+ const char* str,
+ size_t n_bytes);
+
+Ref push_node(SerdReader* reader,
+ SerdType type,
+ const char* str,
+ size_t n_bytes);
+
+size_t genid_size(SerdReader* reader);
+Ref blank_id(SerdReader* reader);
+void set_blank_id(SerdReader* reader, Ref ref, size_t buf_size);
+
+SerdNode* deref(SerdReader* reader, Ref ref);
+
+Ref pop_node(SerdReader* reader, Ref ref);
+
+bool emit_statement(SerdReader* reader, ReadContext ctx, Ref o, Ref d, Ref l);
+
+bool read_n3_statement(SerdReader* reader);
+SerdStatus read_nquadsDoc(SerdReader* reader);
+SerdStatus read_turtleTrigDoc(SerdReader* reader);
+
static inline uint8_t
peek_byte(SerdReader* reader)
{
@@ -81,3 +166,5 @@ push_bytes(SerdReader* reader, Ref ref, const uint8_t* bytes, unsigned len)
push_byte(reader, ref, bytes[i]);
}
}
+
+#endif // SERD_READER_H
diff --git a/src/serd_internal.h b/src/serd_internal.h
index 6e0d7a8c..fbf493e9 100644
--- a/src/serd_internal.h
+++ b/src/serd_internal.h
@@ -19,8 +19,6 @@
#define _POSIX_C_SOURCE 200809L /* for posix_memalign and posix_fadvise */
-#include <assert.h>
-#include <ctype.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
@@ -42,22 +40,6 @@
# define MIN(a, b) (((a) < (b)) ? (a) : (b))
#endif
-static const uint8_t replacement_char[] = { 0xEF, 0xBF, 0xBD };
-
-struct SerdNodeImpl {
- size_t n_bytes; /**< Size in bytes (not including null) */
- SerdNodeFlags flags; /**< Node flags (e.g. string properties) */
- SerdType type; /**< Node type */
-};
-
-static const SerdNode SERD_NODE_NULL = { 0, 0, SERD_NOTHING };
-
-SerdNode* serd_node_malloc(size_t n_bytes, SerdNodeFlags flags, SerdType type);
-void serd_node_set(SerdNode** dst, const SerdNode* src);
-char* serd_node_buffer(SerdNode* node);
-
-/* File and Buffer Utilities */
-
static inline FILE*
serd_fopen(const char* path, const char* mode)
{
@@ -85,401 +67,6 @@ serd_bufalloc(size_t size)
#endif
}
-/* Byte source */
-
-typedef struct {
- const char* filename;
- unsigned line;
- unsigned col;
-} Cursor;
-
-typedef struct {
- SerdSource read_func; ///< Read function (e.g. fread)
- SerdStreamErrorFunc error_func; ///< Error function (e.g. ferror)
- void* stream; ///< Stream (e.g. FILE)
- size_t page_size; ///< Number of bytes to read at a time
- Cursor cur; ///< Cursor for error reporting
- char* file_buf; ///< Buffer iff reading pages from a file
- const char* read_buf; ///< Pointer to file_buf or read_byte
- size_t read_head; ///< Offset into read_buf
- char read_byte; ///< 1-byte 'buffer' used when not paging
- bool from_stream; ///< True iff reading from `stream`
- bool prepared; ///< True iff prepared for reading
- bool eof; ///< True iff end of file reached
-} SerdByteSource;
-
-SerdStatus
-serd_byte_source_open_file(SerdByteSource* source,
- FILE* file,
- bool bulk);
-
-SerdStatus
-serd_byte_source_open_string(SerdByteSource* source, const char* utf8);
-
-SerdStatus
-serd_byte_source_open_source(SerdByteSource* source,
- SerdSource read_func,
- SerdStreamErrorFunc error_func,
- void* stream,
- const char* name,
- size_t page_size);
-
-SerdStatus
-serd_byte_source_close(SerdByteSource* source);
-
-SerdStatus
-serd_byte_source_prepare(SerdByteSource* source);
-
-static inline uint8_t
-serd_byte_source_peek(SerdByteSource* source)
-{
- assert(source->prepared);
- return source->read_buf[source->read_head];
-}
-
-SerdStatus
-serd_byte_source_advance(SerdByteSource* source);
-
-/* Stack */
-
-/** A dynamic stack in memory. */
-typedef struct {
- char* buf; ///< Stack memory
- size_t buf_size; ///< Allocated size of buf (>= size)
- size_t size; ///< Conceptual size of stack in buf
-} SerdStack;
-
-/** An offset to start the stack at. Note 0 is reserved for NULL. */
-#define SERD_STACK_BOTTOM sizeof(void*)
-
-static inline SerdStack
-serd_stack_new(size_t size)
-{
- SerdStack stack;
- stack.buf = (char*)malloc(size);
- stack.buf_size = size;
- stack.size = SERD_STACK_BOTTOM;
- return stack;
-}
-
-static inline bool
-serd_stack_is_empty(SerdStack* stack)
-{
- return stack->size <= SERD_STACK_BOTTOM;
-}
-
-static inline void
-serd_stack_free(SerdStack* stack)
-{
- free(stack->buf);
- stack->buf = NULL;
- stack->buf_size = 0;
- stack->size = 0;
-}
-
-static inline void*
-serd_stack_push(SerdStack* stack, size_t n_bytes)
-{
- const size_t new_size = stack->size + n_bytes;
- if (stack->buf_size < new_size) {
- stack->buf_size += (stack->buf_size >> 1); // *= 1.5
- stack->buf = (char*)realloc(stack->buf, stack->buf_size);
- }
- char* const ret = (stack->buf + stack->size);
- stack->size = new_size;
- return ret;
-}
-
-static inline void
-serd_stack_pop(SerdStack* stack, size_t n_bytes)
-{
- assert(stack->size >= n_bytes);
- stack->size -= n_bytes;
-}
-
-static inline void*
-serd_stack_push_aligned(SerdStack* stack, size_t n_bytes, size_t align)
-{
- // Push one byte to ensure space for a pad count
- serd_stack_push(stack, 1);
-
- // Push padding if necessary
- const uint8_t pad = align - stack->size % align;
- if (pad > 0) {
- serd_stack_push(stack, pad);
- }
-
- // Set top of stack to pad count so we can properly pop later
- stack->buf[stack->size - 1] = pad;
-
- // Push requested space at aligned location
- return serd_stack_push(stack, n_bytes);
-}
-
-static inline void
-serd_stack_pop_aligned(SerdStack* stack, size_t n_bytes)
-{
- // Pop requested space down to aligned location
- serd_stack_pop(stack, n_bytes);
-
- // Get amount of padding from top of stack
- const uint8_t pad = stack->buf[stack->size - 1];
-
- // Pop padding and pad count
- serd_stack_pop(stack, pad + 1);
-}
-
-/* Byte Sink */
-
-typedef struct SerdByteSinkImpl {
- SerdSink sink;
- void* stream;
- char* buf;
- size_t size;
- size_t block_size;
-} SerdByteSink;
-
-static inline SerdByteSink
-serd_byte_sink_new(SerdSink sink, void* stream, size_t block_size)
-{
- SerdByteSink bsink;
- bsink.sink = sink;
- bsink.stream = stream;
- bsink.size = 0;
- bsink.block_size = block_size;
- bsink.buf = ((block_size > 1)
- ? (char*)serd_bufalloc(block_size)
- : NULL);
- return bsink;
-}
-
-static inline void
-serd_byte_sink_flush(SerdByteSink* bsink)
-{
- if (bsink->block_size > 1 && bsink->size > 0) {
- bsink->sink(bsink->buf, bsink->size, bsink->stream);
- bsink->size = 0;
- }
-}
-
-static inline void
-serd_byte_sink_free(SerdByteSink* bsink)
-{
- serd_byte_sink_flush(bsink);
- free(bsink->buf);
- bsink->buf = NULL;
-}
-
-static inline size_t
-serd_byte_sink_write(const void* buf, size_t len, SerdByteSink* bsink)
-{
- if (len == 0) {
- return 0;
- } else if (bsink->block_size == 1) {
- return bsink->sink(buf, len, bsink->stream);
- }
-
- const size_t orig_len = len;
- while (len) {
- const size_t space = bsink->block_size - bsink->size;
- const size_t n = MIN(space, len);
-
- // Write as much as possible into the remaining buffer space
- memcpy(bsink->buf + bsink->size, buf, n);
- bsink->size += n;
- buf = (const char*)buf + n;
- len -= n;
-
- // Flush page if buffer is full
- if (bsink->size == bsink->block_size) {
- bsink->sink(bsink->buf, bsink->block_size, bsink->stream);
- bsink->size = 0;
- }
- }
- return orig_len;
-}
-
-/* Character utilities */
-
-/** Return true if `c` lies within [`min`...`max`] (inclusive) */
-static inline bool
-in_range(const char c, const char min, const char max)
-{
- return (c >= min && c <= max);
-}
-
-/** RFC2234: ALPHA ::= %x41-5A / %x61-7A ; A-Z / a-z */
-static inline bool
-is_alpha(const char c)
-{
- return in_range(c, 'A', 'Z') || in_range(c, 'a', 'z');
-}
-
-/** RFC2234: DIGIT ::= %x30-39 ; 0-9 */
-static inline bool
-is_digit(const char c)
-{
- return in_range(c, '0', '9');
-}
-
-/* RFC2234: HEXDIG ::= DIGIT / "A" / "B" / "C" / "D" / "E" / "F" */
-static inline bool
-is_hexdig(const uint8_t c)
-{
- return is_digit(c) || in_range(c, 'A', 'F');
-}
-
-/* Turtle / JSON / C: XDIGIT ::= DIGIT / A-F / a-f */
-static inline bool
-is_xdigit(const uint8_t c)
-{
- return is_hexdig(c) || in_range(c, 'a', 'f');
-}
-
-static inline bool
-is_space(const char c)
-{
- switch (c) {
- case ' ': case '\f': case '\n': case '\r': case '\t': case '\v':
- return true;
- default:
- return false;
- }
-}
-
-static inline bool
-is_base64(const char c)
-{
- return is_alpha(c) || is_digit(c) || c == '+' || c == '/' || c == '=';
-}
-
-static inline bool
-is_windows_path(const char* path)
-{
- return is_alpha(path[0]) && (path[1] == ':' || path[1] == '|')
- && (path[2] == '/' || path[2] == '\\');
-}
-
-/* String utilities */
-
-size_t
-serd_substrlen(const char* str,
- const size_t len,
- SerdNodeFlags* flags);
-
-static inline int
-serd_strncasecmp(const char* s1, const char* s2, size_t n)
-{
- for (; n > 0 && *s2; s1++, s2++, --n) {
- if (toupper(*s1) != toupper(*s2)) {
- return ((*(uint8_t*)s1 < *(uint8_t*)s2) ? -1 : +1);
- }
- }
- return 0;
-}
-
-static inline uint32_t
-utf8_num_bytes(const uint8_t c)
-{
- if ((c & 0x80) == 0) { // Starts with `0'
- return 1;
- } else if ((c & 0xE0) == 0xC0) { // Starts with `110'
- return 2;
- } else if ((c & 0xF0) == 0xE0) { // Starts with `1110'
- return 3;
- } else if ((c & 0xF8) == 0xF0) { // Starts with `11110'
- return 4;
- }
- return 0;
-}
-
-/// Return the code point of a UTF-8 character with known length
-static inline uint32_t
-parse_counted_utf8_char(const uint8_t* utf8, size_t size)
-{
- uint32_t c = utf8[0] & ((1 << (8 - size)) - 1);
- for (size_t i = 1; i < size; ++i) {
- const uint8_t in = utf8[i] & 0x3F;
- c = (c << 6) | in;
- }
- return c;
-}
-
-/// Parse a UTF-8 character, set *size to the length, and return the code point
-static inline uint32_t
-parse_utf8_char(const uint8_t* utf8, size_t* size)
-{
- switch (*size = utf8_num_bytes(utf8[0])) {
- case 1: case 2: case 3: case 4:
- return parse_counted_utf8_char(utf8, *size);
- default:
- return *size = 0;
- }
-}
-
-/* URI utilities */
-
-static inline bool
-slice_equals(const SerdSlice* a, const SerdSlice* b)
-{
- return a->len == b->len
- && !strncmp((const char*)a->buf, (const char*)b->buf, a->len);
-}
-
-static inline size_t
-uri_path_len(const SerdURI* uri)
-{
- return uri->path_base.len + uri->path.len;
-}
-
-static inline char
-uri_path_at(const SerdURI* uri, size_t i)
-{
- if (i < uri->path_base.len) {
- return uri->path_base.buf[i];
- } else {
- return uri->path.buf[i - uri->path_base.len];
- }
-}
-
-/** Return true iff `uri` is within the base of `root` */
-static inline bool
-uri_is_under(const SerdURI* uri, const SerdURI* root)
-{
- if (!root || !root->scheme.len ||
- !slice_equals(&root->scheme, &uri->scheme) ||
- !slice_equals(&root->authority, &uri->authority)) {
- return false;
- }
-
- bool differ = false;
- const size_t path_len = uri_path_len(uri);
- const size_t root_len = uri_path_len(root);
- for (size_t i = 0; i < path_len && i < root_len; ++i) {
- if (uri_path_at(uri, i) != uri_path_at(root, i)) {
- differ = true;
- }
- if (differ && uri_path_at(root, i) == '/') {
- return false;
- }
- }
-
- return true;
-}
-
-static inline bool
-is_uri_scheme_char(const uint8_t c)
-{
- switch (c) {
- case ':': case '+': case '-': case '.':
- return true;
- default:
- return is_alpha(c) || is_digit(c);
- }
-}
-
-/* Error reporting */
-
static inline void
serd_error(SerdErrorSink error_sink, void* handle, const SerdError* e)
{
@@ -491,93 +78,4 @@ serd_error(SerdErrorSink error_sink, void* handle, const SerdError* e)
}
}
-int
-r_err(SerdReader* reader, SerdStatus st, const char* fmt, ...);
-
-/* Reader */
-
-#ifdef SERD_STACK_CHECK
-# define SERD_STACK_ASSERT_TOP(reader, ref) \
- assert(ref == reader->allocs[reader->n_allocs - 1]);
-#else
-# define SERD_STACK_ASSERT_TOP(reader, ref)
-#endif
-
-/* Reference to a node in the stack (we can not use pointers since the
- stack may be reallocated, invalidating any pointers to elements).
-*/
-typedef size_t Ref;
-
-typedef struct {
- Ref graph;
- Ref subject;
- Ref predicate;
- Ref object;
- Ref datatype;
- Ref lang;
- SerdStatementFlags* flags;
-} ReadContext;
-
-struct SerdReaderImpl {
- void* handle;
- void (*free_handle)(void* ptr);
- SerdBaseSink base_sink;
- SerdPrefixSink prefix_sink;
- SerdStatementSink statement_sink;
- SerdEndSink end_sink;
- SerdErrorSink error_sink;
- void* error_handle;
- Ref rdf_first;
- Ref rdf_rest;
- Ref rdf_nil;
- SerdNode* default_graph;
- SerdByteSource source;
- SerdStack stack;
- SerdSyntax syntax;
- unsigned next_id;
- SerdStatus status;
- uint8_t* buf;
- char* bprefix;
- size_t bprefix_len;
- bool strict; ///< True iff strict parsing
- bool seen_genid;
-#ifdef SERD_STACK_CHECK
- Ref* allocs; ///< Stack of push offsets
- size_t n_allocs; ///< Number of stack pushes
-#endif
-};
-
-Ref push_node_padded(SerdReader* reader,
- size_t maxlen,
- SerdType type,
- const char* str,
- size_t n_bytes);
-
-Ref push_node(SerdReader* reader,
- SerdType type,
- const char* str,
- size_t n_bytes);
-
-size_t genid_size(SerdReader* reader);
-Ref blank_id(SerdReader* reader);
-void set_blank_id(SerdReader* reader, Ref ref, size_t buf_size);
-
-SerdNode* deref(SerdReader* reader, Ref ref);
-
-Ref pop_node(SerdReader* reader, Ref ref);
-
-bool emit_statement(SerdReader* reader, ReadContext ctx, Ref o, Ref d, Ref l);
-
-bool read_n3_statement(SerdReader* reader);
-SerdStatus read_nquadsDoc(SerdReader* reader);
-SerdStatus read_turtleTrigDoc(SerdReader* reader);
-
-typedef enum {
- FIELD_NONE,
- FIELD_SUBJECT,
- FIELD_PREDICATE,
- FIELD_OBJECT,
- FIELD_GRAPH
-} Field;
-
#endif // SERD_INTERNAL_H
diff --git a/src/serdi.c b/src/serdi.c
index caa1d9af..134cd369 100644
--- a/src/serdi.c
+++ b/src/serdi.c
@@ -21,6 +21,8 @@
#include <stdlib.h>
#include <string.h>
+#include "string_utils.h"
+
#define SERDI_ERROR(msg) fprintf(stderr, "serdi: " msg);
#define SERDI_ERRORF(fmt, ...) fprintf(stderr, "serdi: " fmt, __VA_ARGS__);
diff --git a/src/stack.h b/src/stack.h
new file mode 100644
index 00000000..e35041c8
--- /dev/null
+++ b/src/stack.h
@@ -0,0 +1,113 @@
+/*
+ Copyright 2011-2018 David Robillard <http://drobilla.net>
+
+ Permission to use, copy, modify, and/or distribute this software for any
+ purpose with or without fee is hereby granted, provided that the above
+ copyright notice and this permission notice appear in all copies.
+
+ THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+*/
+
+#ifndef SERD_STACK_H
+#define SERD_STACK_H
+
+#include "serd_internal.h"
+
+#include <assert.h>
+#include <stddef.h>
+#include <stdlib.h>
+
+/** An offset to start the stack at. Note 0 is reserved for NULL. */
+#define SERD_STACK_BOTTOM sizeof(void*)
+
+/** A dynamic stack in memory. */
+typedef struct {
+ char* buf; ///< Stack memory
+ size_t buf_size; ///< Allocated size of buf (>= size)
+ size_t size; ///< Conceptual size of stack in buf
+} SerdStack;
+
+static inline SerdStack
+serd_stack_new(size_t size)
+{
+ SerdStack stack;
+ stack.buf = (char*)malloc(size);
+ stack.buf_size = size;
+ stack.size = SERD_STACK_BOTTOM;
+ return stack;
+}
+
+static inline bool
+serd_stack_is_empty(SerdStack* stack)
+{
+ return stack->size <= SERD_STACK_BOTTOM;
+}
+
+static inline void
+serd_stack_free(SerdStack* stack)
+{
+ free(stack->buf);
+ stack->buf = NULL;
+ stack->buf_size = 0;
+ stack->size = 0;
+}
+
+static inline void*
+serd_stack_push(SerdStack* stack, size_t n_bytes)
+{
+ const size_t new_size = stack->size + n_bytes;
+ if (stack->buf_size < new_size) {
+ stack->buf_size += (stack->buf_size >> 1); // *= 1.5
+ stack->buf = (char*)realloc(stack->buf, stack->buf_size);
+ }
+ char* const ret = (stack->buf + stack->size);
+ stack->size = new_size;
+ return ret;
+}
+
+static inline void
+serd_stack_pop(SerdStack* stack, size_t n_bytes)
+{
+ assert(stack->size >= n_bytes);
+ stack->size -= n_bytes;
+}
+
+static inline void*
+serd_stack_push_aligned(SerdStack* stack, size_t n_bytes, size_t align)
+{
+ // Push one byte to ensure space for a pad count
+ serd_stack_push(stack, 1);
+
+ // Push padding if necessary
+ const uint8_t pad = align - stack->size % align;
+ if (pad > 0) {
+ serd_stack_push(stack, pad);
+ }
+
+ // Set top of stack to pad count so we can properly pop later
+ stack->buf[stack->size - 1] = pad;
+
+ // Push requested space at aligned location
+ return serd_stack_push(stack, n_bytes);
+}
+
+static inline void
+serd_stack_pop_aligned(SerdStack* stack, size_t n_bytes)
+{
+ // Pop requested space down to aligned location
+ serd_stack_pop(stack, n_bytes);
+
+ // Get amount of padding from top of stack
+ const uint8_t pad = stack->buf[stack->size - 1];
+
+ // Pop padding and pad count
+ serd_stack_pop(stack, pad + 1);
+}
+
+#endif // SERD_STACK_H
diff --git a/src/string.c b/src/string.c
index 238c41a9..7b8fc662 100644
--- a/src/string.c
+++ b/src/string.c
@@ -18,6 +18,8 @@
#include <math.h>
+#include "string_utils.h"
+
SERD_API
const char*
serd_strerror(SerdStatus status)
@@ -36,34 +38,6 @@ serd_strerror(SerdStatus status)
return "Unknown error"; // never reached
}
-static inline void
-serd_update_flags(const uint8_t c, SerdNodeFlags* const flags)
-{
- switch (c) {
- case '\r': case '\n':
- *flags |= SERD_HAS_NEWLINE;
- break;
- case '"':
- *flags |= SERD_HAS_QUOTE;
- }
-}
-
-size_t
-serd_substrlen(const char* const str,
- const size_t len,
- SerdNodeFlags* const flags)
-{
- if (flags) {
- size_t i = 0;
- *flags = 0;
- for (; i < len && str[i]; ++i) {
- serd_update_flags(str[i], flags);
- }
- return i;
- }
- return strlen(str);
-}
-
SERD_API
size_t
serd_strlen(const char* str, SerdNodeFlags* flags)
diff --git a/src/string_utils.h b/src/string_utils.h
new file mode 100644
index 00000000..939ddde0
--- /dev/null
+++ b/src/string_utils.h
@@ -0,0 +1,165 @@
+/*
+ Copyright 2011-2018 David Robillard <http://drobilla.net>
+
+ Permission to use, copy, modify, and/or distribute this software for any
+ purpose with or without fee is hereby granted, provided that the above
+ copyright notice and this permission notice appear in all copies.
+
+ THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+*/
+
+#ifndef SERD_STRING_UTILS_H
+#define SERD_STRING_UTILS_H
+
+#include <ctype.h>
+
+/** Unicode replacement character in UTF-8 */
+static const uint8_t replacement_char[] = { 0xEF, 0xBF, 0xBD };
+
+/** Return true if `c` lies within [`min`...`max`] (inclusive) */
+static inline bool
+in_range(const char c, const char min, const char max)
+{
+ return (c >= min && c <= max);
+}
+
+/** RFC2234: ALPHA ::= %x41-5A / %x61-7A ; A-Z / a-z */
+static inline bool
+is_alpha(const char c)
+{
+ return in_range(c, 'A', 'Z') || in_range(c, 'a', 'z');
+}
+
+/** RFC2234: DIGIT ::= %x30-39 ; 0-9 */
+static inline bool
+is_digit(const char c)
+{
+ return in_range(c, '0', '9');
+}
+
+/** RFC2234: HEXDIG ::= DIGIT / "A" / "B" / "C" / "D" / "E" / "F" */
+static inline bool
+is_hexdig(const uint8_t c)
+{
+ return is_digit(c) || in_range(c, 'A', 'F');
+}
+
+/** Turtle / JSON / C: XDIGIT ::= DIGIT / A-F / a-f */
+static inline bool
+is_xdigit(const uint8_t c)
+{
+ return is_hexdig(c) || in_range(c, 'a', 'f');
+}
+
+/** Return true iff `c` is ASCII whitespace. */
+static inline bool
+is_space(const char c)
+{
+ switch (c) {
+ case ' ': case '\f': case '\n': case '\r': case '\t': case '\v':
+ return true;
+ default:
+ return false;
+ }
+}
+
+/** Return true iff `c` is a valid encoded base64 character. */
+static inline bool
+is_base64(const char c)
+{
+ return is_alpha(c) || is_digit(c) || c == '+' || c == '/' || c == '=';
+}
+
+/** Return true iff `path` looks like a Windows path with a drive letter. */
+static inline bool
+is_windows_path(const char* path)
+{
+ return is_alpha(path[0]) && (path[1] == ':' || path[1] == '|')
+ && (path[2] == '/' || path[2] == '\\');
+}
+
+static inline void
+serd_update_flags(const uint8_t c, SerdNodeFlags* const flags)
+{
+ switch (c) {
+ case '\r': case '\n':
+ *flags |= SERD_HAS_NEWLINE;
+ break;
+ case '"':
+ *flags |= SERD_HAS_QUOTE;
+ }
+}
+
+static inline size_t
+serd_substrlen(const char* const str,
+ const size_t len,
+ SerdNodeFlags* const flags)
+{
+ if (flags) {
+ size_t i = 0;
+ *flags = 0;
+ for (; i < len && str[i]; ++i) {
+ serd_update_flags(str[i], flags);
+ }
+ return i;
+ }
+ return strlen(str);
+}
+
+static inline int
+serd_strncasecmp(const char* s1, const char* s2, size_t n)
+{
+ for (; n > 0 && *s2; s1++, s2++, --n) {
+ if (toupper(*s1) != toupper(*s2)) {
+ return ((*(const uint8_t*)s1 < *(const uint8_t*)s2) ? -1 : +1);
+ }
+ }
+ return 0;
+}
+
+static inline uint32_t
+utf8_num_bytes(const uint8_t c)
+{
+ if ((c & 0x80) == 0) { // Starts with `0'
+ return 1;
+ } else if ((c & 0xE0) == 0xC0) { // Starts with `110'
+ return 2;
+ } else if ((c & 0xF0) == 0xE0) { // Starts with `1110'
+ return 3;
+ } else if ((c & 0xF8) == 0xF0) { // Starts with `11110'
+ return 4;
+ }
+ return 0;
+}
+
+/// Return the code point of a UTF-8 character with known length
+static inline uint32_t
+parse_counted_utf8_char(const uint8_t* utf8, size_t size)
+{
+ uint32_t c = utf8[0] & ((1 << (8 - size)) - 1);
+ for (size_t i = 1; i < size; ++i) {
+ const uint8_t in = utf8[i] & 0x3F;
+ c = (c << 6) | in;
+ }
+ return c;
+}
+
+/// Parse a UTF-8 character, set *size to the length, and return the code point
+static inline uint32_t
+parse_utf8_char(const uint8_t* utf8, size_t* size)
+{
+ switch (*size = utf8_num_bytes(utf8[0])) {
+ case 1: case 2: case 3: case 4:
+ return parse_counted_utf8_char(utf8, *size);
+ default:
+ return *size = 0;
+ }
+}
+
+#endif // SERD_STRING_UTILS_H
diff --git a/src/uri.c b/src/uri.c
index b118b796..b128d609 100644
--- a/src/uri.c
+++ b/src/uri.c
@@ -19,6 +19,8 @@
#include <stdlib.h>
#include <string.h>
+#include "uri_utils.h"
+
// #define URI_DEBUG 1
SERD_API
diff --git a/src/uri_utils.h b/src/uri_utils.h
new file mode 100644
index 00000000..35430250
--- /dev/null
+++ b/src/uri_utils.h
@@ -0,0 +1,81 @@
+/*
+ Copyright 2011-2018 David Robillard <http://drobilla.net>
+
+ Permission to use, copy, modify, and/or distribute this software for any
+ purpose with or without fee is hereby granted, provided that the above
+ copyright notice and this permission notice appear in all copies.
+
+ THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+*/
+
+#ifndef SERD_URI_UTILS_H
+#define SERD_URI_UTILS_H
+
+#include "string_utils.h"
+
+static inline bool
+slice_equals(const SerdSlice* a, const SerdSlice* b)
+{
+ return a->len == b->len
+ && !strncmp((const char*)a->buf, (const char*)b->buf, a->len);
+}
+
+static inline size_t
+uri_path_len(const SerdURI* uri)
+{
+ return uri->path_base.len + uri->path.len;
+}
+
+static inline char
+uri_path_at(const SerdURI* uri, size_t i)
+{
+ if (i < uri->path_base.len) {
+ return uri->path_base.buf[i];
+ } else {
+ return uri->path.buf[i - uri->path_base.len];
+ }
+}
+
+/** Return true iff `uri` is within the base of `root` */
+static inline bool
+uri_is_under(const SerdURI* uri, const SerdURI* root)
+{
+ if (!root || !root->scheme.len ||
+ !slice_equals(&root->scheme, &uri->scheme) ||
+ !slice_equals(&root->authority, &uri->authority)) {
+ return false;
+ }
+
+ bool differ = false;
+ const size_t path_len = uri_path_len(uri);
+ const size_t root_len = uri_path_len(root);
+ for (size_t i = 0; i < path_len && i < root_len; ++i) {
+ if (uri_path_at(uri, i) != uri_path_at(root, i)) {
+ differ = true;
+ }
+ if (differ && uri_path_at(root, i) == '/') {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static inline bool
+is_uri_scheme_char(const uint8_t c)
+{
+ switch (c) {
+ case ':': case '+': case '-': case '.':
+ return true;
+ default:
+ return is_alpha(c) || is_digit(c);
+ }
+}
+
+#endif // SERD_URI_UTILS_H
diff --git a/src/writer.c b/src/writer.c
index 05f83b0f..e14814bb 100644
--- a/src/writer.c
+++ b/src/writer.c
@@ -21,12 +21,26 @@
#include <stdlib.h>
#include <string.h>
+#include "byte_sink.h"
+#include "node.h"
+#include "stack.h"
+#include "string_utils.h"
+#include "uri_utils.h"
+
typedef struct {
SerdNode* graph;
SerdNode* subject;
SerdNode* predicate;
} WriteContext;
+typedef enum {
+ FIELD_NONE,
+ FIELD_SUBJECT,
+ FIELD_PREDICATE,
+ FIELD_OBJECT,
+ FIELD_GRAPH
+} Field;
+
static const WriteContext WRITE_CONTEXT_NULL = { NULL, NULL, NULL };
typedef enum {
diff --git a/wscript b/wscript
index f77d2437..3849543d 100644
--- a/wscript
+++ b/wscript
@@ -188,23 +188,35 @@ def lint(ctx):
def amalgamate(ctx):
"builds single-file amalgamated source"
import shutil
+ import re
shutil.copy('serd/serd.h', 'build/serd.h')
+
+ def include_line(line):
+ return (not re.match('#include "[^/]*\.h"', line) and
+ not re.match('#include "serd/serd.h"', line))
+
with open('build/serd.c', 'w') as amalgamation:
- with open('src/serd_internal.h') as serd_internal_h:
- for l in serd_internal_h:
- amalgamation.write(l.replace('serd/serd.h', 'serd.h'))
+ amalgamation.write('/* This is amalgamated code, do not edit! */\n')
+ amalgamation.write('#include "serd.h"\n\n')
+
+ for header_path in ['src/serd_internal.h',
+ 'src/byte_sink.h',
+ 'src/byte_source.h',
+ 'src/stack.h',
+ 'src/string_utils.h',
+ 'src/uri_utils.h',
+ 'src/reader.h']:
+ with open(header_path) as header:
+ for l in header:
+ if include_line(l):
+ amalgamation.write(l)
for f in lib_source:
with open(f) as fd:
amalgamation.write('\n/**\n @file %s\n*/' % f)
- header = True
for l in fd:
- if header:
- if l == '*/\n':
- header = False
- else:
- if l != '#include "serd_internal.h"\n':
- amalgamation.write(l)
+ if include_line(l):
+ amalgamation.write(l)
for i in ['c', 'h']:
Logs.info('Wrote build/serd.%s' % i)