diff options
author | David Robillard <d@drobilla.net> | 2020-08-14 15:51:12 +0200 |
---|---|---|
committer | David Robillard <d@drobilla.net> | 2020-08-14 19:07:52 +0200 |
commit | 3f5ba5908117cf3351702c144a2509f4bf48d75b (patch) | |
tree | 3b0434d1ee5a6749ea1d4b5fecc7492b0272bbd0 | |
parent | 45cdfed515dbbcb85c6d54076b6103788d097400 (diff) | |
download | serd-3f5ba5908117cf3351702c144a2509f4bf48d75b.tar.gz serd-3f5ba5908117cf3351702c144a2509f4bf48d75b.tar.bz2 serd-3f5ba5908117cf3351702c144a2509f4bf48d75b.zip |
Clean up and separate internal headers
-rw-r--r-- | src/byte_sink.h | 96 | ||||
-rw-r--r-- | src/byte_source.c | 4 | ||||
-rw-r--r-- | src/byte_source.h | 118 | ||||
-rw-r--r-- | src/n3.c | 6 | ||||
-rw-r--r-- | src/node.c | 3 | ||||
-rw-r--r-- | src/node.h | 45 | ||||
-rw-r--r-- | src/reader.c | 2 | ||||
-rw-r--r-- | src/reader.h | 94 | ||||
-rw-r--r-- | src/serd_internal.h | 591 | ||||
-rw-r--r-- | src/serdi.c | 25 | ||||
-rw-r--r-- | src/stack.h | 117 | ||||
-rw-r--r-- | src/string.c | 2 | ||||
-rw-r--r-- | src/string_utils.h | 147 | ||||
-rw-r--r-- | src/system.c | 59 | ||||
-rw-r--r-- | src/system.h | 28 | ||||
-rw-r--r-- | src/uri.c | 3 | ||||
-rw-r--r-- | src/uri_utils.h | 106 | ||||
-rw-r--r-- | src/writer.c | 12 | ||||
-rw-r--r-- | wscript | 33 |
19 files changed, 885 insertions, 606 deletions
diff --git a/src/byte_sink.h b/src/byte_sink.h new file mode 100644 index 00000000..f39bc2ba --- /dev/null +++ b/src/byte_sink.h @@ -0,0 +1,96 @@ +/* + Copyright 2011-2020 David Robillard <http://drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#ifndef SERD_BYTE_SINK_H +#define SERD_BYTE_SINK_H + +#include "serd_internal.h" +#include "system.h" + +#include "serd/serd.h" + +#include <stddef.h> +#include <string.h> + +typedef struct SerdByteSinkImpl { + SerdSink sink; + void* stream; + uint8_t* buf; + size_t size; + size_t block_size; +} SerdByteSink; + +static inline SerdByteSink +serd_byte_sink_new(SerdSink sink, void* stream, size_t block_size) +{ + SerdByteSink bsink; + bsink.sink = sink; + bsink.stream = stream; + bsink.size = 0; + bsink.block_size = block_size; + bsink.buf = ((block_size > 1) + ? (uint8_t*)serd_bufalloc(block_size) + : NULL); + return bsink; +} + +static inline void +serd_byte_sink_flush(SerdByteSink* bsink) +{ + if (bsink->block_size > 1 && bsink->size > 0) { + bsink->sink(bsink->buf, bsink->size, bsink->stream); + bsink->size = 0; + } +} + +static inline void +serd_byte_sink_free(SerdByteSink* bsink) +{ + serd_byte_sink_flush(bsink); + free(bsink->buf); + bsink->buf = NULL; +} + +static inline size_t +serd_byte_sink_write(const void* buf, size_t len, SerdByteSink* bsink) +{ + if (len == 0) { + return 0; + } else if (bsink->block_size == 1) { + return bsink->sink(buf, len, bsink->stream); + } + + const size_t orig_len = len; + while (len) { + const size_t space = bsink->block_size - bsink->size; + const size_t n = MIN(space, len); + + // Write as much as possible into the remaining buffer space + memcpy(bsink->buf + bsink->size, buf, n); + bsink->size += n; + buf = (const uint8_t*)buf + n; + len -= n; + + // Flush page if buffer is full + if (bsink->size == bsink->block_size) { + bsink->sink(bsink->buf, bsink->block_size, bsink->stream); + bsink->size = 0; + } + } + return orig_len; +} + +#endif // SERD_BYTE_SINK_H diff --git a/src/byte_source.c b/src/byte_source.c index 329c89a3..d783959c 100644 --- a/src/byte_source.c +++ b/src/byte_source.c @@ -14,7 +14,9 @@ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ -#include "serd_internal.h" +#include "byte_source.h" + +#include "system.h" #include "serd/serd.h" diff --git a/src/byte_source.h b/src/byte_source.h new file mode 100644 index 00000000..a0dfa140 --- /dev/null +++ b/src/byte_source.h @@ -0,0 +1,118 @@ +/* + Copyright 2011-2020 David Robillard <http://drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#ifndef SERD_BYTE_SOURCE_H +#define SERD_BYTE_SOURCE_H + +#include "serd/serd.h" + +#include <assert.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> + +typedef struct { + const uint8_t* filename; + unsigned line; + unsigned col; +} Cursor; + +typedef struct { + SerdSource read_func; ///< Read function (e.g. fread) + SerdStreamErrorFunc error_func; ///< Error function (e.g. ferror) + void* stream; ///< Stream (e.g. FILE) + size_t page_size; ///< Number of bytes to read at a time + size_t buf_size; ///< Number of bytes in file_buf + Cursor cur; ///< Cursor for error reporting + uint8_t* file_buf; ///< Buffer iff reading pages from a file + const uint8_t* read_buf; ///< Pointer to file_buf or read_byte + size_t read_head; ///< Offset into read_buf + uint8_t read_byte; ///< 1-byte 'buffer' used when not paging + bool from_stream; ///< True iff reading from `stream` + bool prepared; ///< True iff prepared for reading + bool eof; ///< True iff end of file reached +} SerdByteSource; + +SerdStatus +serd_byte_source_open_file(SerdByteSource* source, + FILE* file, + bool bulk); + +SerdStatus +serd_byte_source_open_string(SerdByteSource* source, const uint8_t* utf8); + +SerdStatus +serd_byte_source_open_source(SerdByteSource* source, + SerdSource read_func, + SerdStreamErrorFunc error_func, + void* stream, + const uint8_t* name, + size_t page_size); + +SerdStatus +serd_byte_source_close(SerdByteSource* source); + +SerdStatus +serd_byte_source_prepare(SerdByteSource* source); + +SerdStatus +serd_byte_source_page(SerdByteSource* source); + +static inline uint8_t +serd_byte_source_peek(SerdByteSource* source) +{ + assert(source->prepared); + return source->read_buf[source->read_head]; +} + +static inline SerdStatus +serd_byte_source_advance(SerdByteSource* source) +{ + SerdStatus st = SERD_SUCCESS; + + switch (serd_byte_source_peek(source)) { + case '\n': ++source->cur.line; source->cur.col = 0; break; + default: ++source->cur.col; + } + + const bool was_eof = source->eof; + if (source->from_stream) { + source->eof = false; + if (source->page_size > 1) { + if (++source->read_head == source->page_size) { + st = serd_byte_source_page(source); + } else if (source->read_head == source->buf_size) { + source->eof = true; + } + } else { + if (!source->read_func(&source->read_byte, 1, 1, source->stream)) { + source->eof = true; + st = source->error_func(source->stream) ? SERD_ERR_UNKNOWN + : SERD_FAILURE; + } + } + } else if (!source->eof) { + ++source->read_head; // Move to next character in string + if (source->read_buf[source->read_head] == '\0') { + source->eof = true; + } + } + + return (was_eof && source->eof) ? SERD_FAILURE : st; +} + +#endif // SERD_BYTE_SOURCE_H @@ -14,8 +14,12 @@ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ +#include "byte_source.h" #include "reader.h" #include "serd_internal.h" +#include "stack.h" +#include "string_utils.h" +#include "uri_utils.h" #include "serd/serd.h" @@ -614,7 +618,7 @@ static bool read_IRIREF_scheme(SerdReader* reader, Ref dest) { int c = peek_byte(reader); - if (!isalpha(c)) { + if (!is_alpha(c)) { return r_err(reader, SERD_ERR_BAD_SYNTAX, "bad IRI scheme start `%c'\n", c); } @@ -14,7 +14,10 @@ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ +#include "node.h" + #include "serd_internal.h" +#include "string_utils.h" #include "serd/serd.h" diff --git a/src/node.h b/src/node.h new file mode 100644 index 00000000..f99da1a6 --- /dev/null +++ b/src/node.h @@ -0,0 +1,45 @@ +/* + Copyright 2011-2020 David Robillard <http://drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#ifndef SERD_NODE_H +#define SERD_NODE_H + +#include "serd/serd.h" + +#include <stddef.h> + +struct SerdNodeImpl { + size_t n_bytes; /**< Size in bytes (not including null) */ + SerdNodeFlags flags; /**< Node flags (e.g. string properties) */ + SerdType type; /**< Node type */ +}; + +static inline char* +serd_node_buffer(SerdNode* node) +{ + return (char*)(node + 1); +} + +static inline const char* +serd_node_buffer_c(const SerdNode* node) +{ + return (const char*)(node + 1); +} + +SerdNode* serd_node_malloc(size_t n_bytes, SerdNodeFlags flags, SerdType type); +void serd_node_set(SerdNode** dst, const SerdNode* src); + +#endif // SERD_NODE_H diff --git a/src/reader.c b/src/reader.c index 70576b7a..4831b89d 100644 --- a/src/reader.c +++ b/src/reader.c @@ -15,6 +15,8 @@ */ #include "reader.h" +#include "system.h" + #include "serd_internal.h" #include <errno.h> diff --git a/src/reader.h b/src/reader.h index adea6651..1b0a80c2 100644 --- a/src/reader.h +++ b/src/reader.h @@ -14,7 +14,11 @@ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ -#include "serd_internal.h" +#ifndef SERD_READER_H +#define SERD_READER_H + +#include "byte_source.h" +#include "stack.h" #include "serd/serd.h" @@ -23,6 +27,92 @@ #include <stdint.h> #include <stdio.h> +#if defined(__GNUC__) +# define SERD_LOG_FUNC(fmt, arg1) __attribute__((format(printf, fmt, arg1))) +#else +# define SERD_LOG_FUNC(fmt, arg1) +#endif + +SERD_LOG_FUNC(3, 4) +int +r_err(SerdReader* reader, SerdStatus st, const char* fmt, ...); + +#ifdef SERD_STACK_CHECK +# define SERD_STACK_ASSERT_TOP(reader, ref) \ + assert(ref == reader->allocs[reader->n_allocs - 1]); +#else +# define SERD_STACK_ASSERT_TOP(reader, ref) +#endif + +/* Reference to a node in the stack (we can not use pointers since the + stack may be reallocated, invalidating any pointers to elements). +*/ +typedef size_t Ref; + +typedef struct { + Ref graph; + Ref subject; + Ref predicate; + Ref object; + Ref datatype; + Ref lang; + SerdStatementFlags* flags; +} ReadContext; + +struct SerdReaderImpl { + void* handle; + void (*free_handle)(void* ptr); + SerdBaseSink base_sink; + SerdPrefixSink prefix_sink; + SerdStatementSink statement_sink; + SerdEndSink end_sink; + SerdErrorSink error_sink; + void* error_handle; + Ref rdf_first; + Ref rdf_rest; + Ref rdf_nil; + SerdNode default_graph; + SerdByteSource source; + SerdStack stack; + SerdSyntax syntax; + unsigned next_id; + SerdStatus status; + uint8_t* buf; + uint8_t* bprefix; + size_t bprefix_len; + bool strict; ///< True iff strict parsing + bool seen_genid; +#ifdef SERD_STACK_CHECK + Ref* allocs; ///< Stack of push offsets + size_t n_allocs; ///< Number of stack pushes +#endif +}; + +Ref push_node_padded(SerdReader* reader, + size_t maxlen, + SerdType type, + const char* str, + size_t n_bytes); + +Ref push_node(SerdReader* reader, + SerdType type, + const char* str, + size_t n_bytes); + +size_t genid_size(SerdReader* reader); +Ref blank_id(SerdReader* reader); +void set_blank_id(SerdReader* reader, Ref ref, size_t buf_size); + +SerdNode* deref(SerdReader* reader, Ref ref); + +Ref pop_node(SerdReader* reader, Ref ref); + +bool emit_statement(SerdReader* reader, ReadContext ctx, Ref o, Ref d, Ref l); + +bool read_n3_statement(SerdReader* reader); +bool read_nquadsDoc(SerdReader* reader); +bool read_turtleTrigDoc(SerdReader* reader); + static inline int peek_byte(SerdReader* reader) { @@ -97,3 +187,5 @@ push_bytes(SerdReader* reader, Ref ref, const uint8_t* bytes, unsigned len) push_byte(reader, ref, bytes[i]); } } + +#endif // SERD_READER_H diff --git a/src/serd_internal.h b/src/serd_internal.h index 7449c514..f057a5c5 100644 --- a/src/serd_internal.h +++ b/src/serd_internal.h @@ -17,16 +17,8 @@ #ifndef SERD_INTERNAL_H #define SERD_INTERNAL_H -#define _POSIX_C_SOURCE 200809L /* for posix_memalign and posix_fadvise */ - -#include "serd_config.h" - #include "serd/serd.h" -#if defined(HAVE_POSIX_FADVISE) && defined(HAVE_FILENO) -# include <fcntl.h> -#endif - #include <assert.h> #include <ctype.h> #include <errno.h> @@ -43,499 +35,6 @@ # define MIN(a, b) (((a) < (b)) ? (a) : (b)) #endif -#if defined(__GNUC__) -# define SERD_LOG_FUNC(fmt, arg1) __attribute__((format(printf, fmt, arg1))) -#else -# define SERD_LOG_FUNC(fmt, arg1) -#endif - -static const uint8_t replacement_char[] = { 0xEF, 0xBF, 0xBD }; - -/* File and Buffer Utilities */ - -static inline FILE* -serd_fopen(const char* path, const char* mode) -{ - FILE* fd = fopen(path, mode); - if (!fd) { - fprintf(stderr, "error: failed to open file %s (%s)\n", - path, strerror(errno)); - return NULL; - } -#if defined(HAVE_POSIX_FADVISE) && defined(HAVE_FILENO) - posix_fadvise(fileno(fd), 0, 0, POSIX_FADV_SEQUENTIAL); -#endif - return fd; -} - -static inline void* -serd_bufalloc(size_t size) -{ -#ifdef HAVE_POSIX_MEMALIGN - void* ptr = NULL; - const int ret = posix_memalign(&ptr, SERD_PAGE_SIZE, size); - return ret ? NULL : ptr; -#else - return malloc(size); -#endif -} - -/* Byte source */ - -typedef struct { - const uint8_t* filename; - unsigned line; - unsigned col; -} Cursor; - -typedef struct { - SerdSource read_func; ///< Read function (e.g. fread) - SerdStreamErrorFunc error_func; ///< Error function (e.g. ferror) - void* stream; ///< Stream (e.g. FILE) - size_t page_size; ///< Number of bytes to read at a time - size_t buf_size; ///< Number of bytes in file_buf - Cursor cur; ///< Cursor for error reporting - uint8_t* file_buf; ///< Buffer iff reading pages from a file - const uint8_t* read_buf; ///< Pointer to file_buf or read_byte - size_t read_head; ///< Offset into read_buf - uint8_t read_byte; ///< 1-byte 'buffer' used when not paging - bool from_stream; ///< True iff reading from `stream` - bool prepared; ///< True iff prepared for reading - bool eof; ///< True iff end of file reached -} SerdByteSource; - -SerdStatus -serd_byte_source_open_file(SerdByteSource* source, - FILE* file, - bool bulk); - -SerdStatus -serd_byte_source_open_string(SerdByteSource* source, const uint8_t* utf8); - -SerdStatus -serd_byte_source_open_source(SerdByteSource* source, - SerdSource read_func, - SerdStreamErrorFunc error_func, - void* stream, - const uint8_t* name, - size_t page_size); - -SerdStatus -serd_byte_source_close(SerdByteSource* source); - -SerdStatus -serd_byte_source_prepare(SerdByteSource* source); - -SerdStatus -serd_byte_source_page(SerdByteSource* source); - -static inline uint8_t -serd_byte_source_peek(SerdByteSource* source) -{ - assert(source->prepared); - return source->read_buf[source->read_head]; -} - -static inline SerdStatus -serd_byte_source_advance(SerdByteSource* source) -{ - SerdStatus st = SERD_SUCCESS; - - switch (serd_byte_source_peek(source)) { - case '\n': ++source->cur.line; source->cur.col = 0; break; - default: ++source->cur.col; - } - - const bool was_eof = source->eof; - if (source->from_stream) { - source->eof = false; - if (source->page_size > 1) { - if (++source->read_head == source->page_size) { - st = serd_byte_source_page(source); - } else if (source->read_head == source->buf_size) { - source->eof = true; - } - } else { - if (!source->read_func(&source->read_byte, 1, 1, source->stream)) { - source->eof = true; - st = source->error_func(source->stream) ? SERD_ERR_UNKNOWN - : SERD_FAILURE; - } - } - } else if (!source->eof) { - ++source->read_head; // Move to next character in string - if (source->read_buf[source->read_head] == '\0') { - source->eof = true; - } - } - - return (was_eof && source->eof) ? SERD_FAILURE : st; -} - -/* Stack */ - -/** A dynamic stack in memory. */ -typedef struct { - uint8_t* buf; ///< Stack memory - size_t buf_size; ///< Allocated size of buf (>= size) - size_t size; ///< Conceptual size of stack in buf -} SerdStack; - -/** An offset to start the stack at. Note 0 is reserved for NULL. */ -#define SERD_STACK_BOTTOM sizeof(void*) - -static inline SerdStack -serd_stack_new(size_t size) -{ - SerdStack stack; - stack.buf = (uint8_t*)calloc(size, 1); - stack.buf_size = size; - stack.size = SERD_STACK_BOTTOM; - return stack; -} - -static inline bool -serd_stack_is_empty(SerdStack* stack) -{ - return stack->size <= SERD_STACK_BOTTOM; -} - -static inline void -serd_stack_free(SerdStack* stack) -{ - free(stack->buf); - stack->buf = NULL; - stack->buf_size = 0; - stack->size = 0; -} - -static inline uint8_t* -serd_stack_push(SerdStack* stack, size_t n_bytes) -{ - const size_t new_size = stack->size + n_bytes; - if (stack->buf_size < new_size) { - stack->buf_size += (stack->buf_size >> 1); // *= 1.5 - stack->buf = (uint8_t*)realloc(stack->buf, stack->buf_size); - } - uint8_t* const ret = (stack->buf + stack->size); - stack->size = new_size; - return ret; -} - -static inline void -serd_stack_pop(SerdStack* stack, size_t n_bytes) -{ - assert(stack->size >= n_bytes); - stack->size -= n_bytes; -} - -static inline void* -serd_stack_push_aligned(SerdStack* stack, size_t n_bytes, size_t align) -{ - // Push one byte to ensure space for a pad count - serd_stack_push(stack, 1); - - // Push padding if necessary - const size_t pad = align - stack->size % align; - if (pad > 0) { - serd_stack_push(stack, pad); - } - - // Set top of stack to pad count so we can properly pop later - assert(pad < UINT8_MAX); - stack->buf[stack->size - 1] = (uint8_t)pad; - - // Push requested space at aligned location - return serd_stack_push(stack, n_bytes); -} - -static inline void -serd_stack_pop_aligned(SerdStack* stack, size_t n_bytes) -{ - // Pop requested space down to aligned location - serd_stack_pop(stack, n_bytes); - - // Get amount of padding from top of stack - const uint8_t pad = stack->buf[stack->size - 1]; - - // Pop padding and pad count - serd_stack_pop(stack, pad + 1u); -} - -/* Byte Sink */ - -typedef struct SerdByteSinkImpl { - SerdSink sink; - void* stream; - uint8_t* buf; - size_t size; - size_t block_size; -} SerdByteSink; - -static inline SerdByteSink -serd_byte_sink_new(SerdSink sink, void* stream, size_t block_size) -{ - SerdByteSink bsink; - bsink.sink = sink; - bsink.stream = stream; - bsink.size = 0; - bsink.block_size = block_size; - bsink.buf = ((block_size > 1) - ? (uint8_t*)serd_bufalloc(block_size) - : NULL); - return bsink; -} - -static inline void -serd_byte_sink_flush(SerdByteSink* bsink) -{ - if (bsink->block_size > 1 && bsink->size > 0) { - bsink->sink(bsink->buf, bsink->size, bsink->stream); - bsink->size = 0; - } -} - -static inline void -serd_byte_sink_free(SerdByteSink* bsink) -{ - serd_byte_sink_flush(bsink); - free(bsink->buf); - bsink->buf = NULL; -} - -static inline size_t -serd_byte_sink_write(const void* buf, size_t len, SerdByteSink* bsink) -{ - if (len == 0) { - return 0; - } else if (bsink->block_size == 1) { - return bsink->sink(buf, len, bsink->stream); - } - - const size_t orig_len = len; - while (len) { - const size_t space = bsink->block_size - bsink->size; - const size_t n = MIN(space, len); - - // Write as much as possible into the remaining buffer space - memcpy(bsink->buf + bsink->size, buf, n); - bsink->size += n; - buf = (const uint8_t*)buf + n; - len -= n; - - // Flush page if buffer is full - if (bsink->size == bsink->block_size) { - bsink->sink(bsink->buf, bsink->block_size, bsink->stream); - bsink->size = 0; - } - } - return orig_len; -} - -/* Character utilities */ - -/** Return true if `c` lies within [`min`...`max`] (inclusive) */ -static inline bool -in_range(const int c, const int min, const int max) -{ - return (c >= min && c <= max); -} - -/** RFC2234: ALPHA ::= %x41-5A / %x61-7A ; A-Z / a-z */ -static inline bool -is_alpha(const int c) -{ - return in_range(c, 'A', 'Z') || in_range(c, 'a', 'z'); -} - -/** RFC2234: DIGIT ::= %x30-39 ; 0-9 */ -static inline bool -is_digit(const int c) -{ - return in_range(c, '0', '9'); -} - -/* RFC2234: HEXDIG ::= DIGIT / "A" / "B" / "C" / "D" / "E" / "F" */ -static inline bool -is_hexdig(const int c) -{ - return is_digit(c) || in_range(c, 'A', 'F'); -} - -/* Turtle / JSON / C: XDIGIT ::= DIGIT / A-F / a-f */ -static inline bool -is_xdigit(const int c) -{ - return is_hexdig(c) || in_range(c, 'a', 'f'); -} - -static inline bool -is_space(const char c) -{ - switch (c) { - case ' ': case '\f': case '\n': case '\r': case '\t': case '\v': - return true; - default: - return false; - } -} - -static inline bool -is_base64(const uint8_t c) -{ - return is_alpha(c) || is_digit(c) || c == '+' || c == '/' || c == '='; -} - -static inline bool -is_windows_path(const uint8_t* path) -{ - return is_alpha(path[0]) && (path[1] == ':' || path[1] == '|') - && (path[2] == '/' || path[2] == '\\'); -} - -/* String utilities */ - -size_t -serd_substrlen(const uint8_t* str, - size_t len, - size_t* n_bytes, - SerdNodeFlags* flags); - -static inline int -serd_strncasecmp(const char* s1, const char* s2, size_t n) -{ - for (; n > 0 && *s2; s1++, s2++, --n) { - if (toupper(*s1) != toupper(*s2)) { - return ((*(const uint8_t*)s1 < *(const uint8_t*)s2) ? -1 : +1); - } - } - return 0; -} - -static inline uint32_t -utf8_num_bytes(const uint8_t c) -{ - if ((c & 0x80) == 0) { // Starts with `0' - return 1; - } else if ((c & 0xE0) == 0xC0) { // Starts with `110' - return 2; - } else if ((c & 0xF0) == 0xE0) { // Starts with `1110' - return 3; - } else if ((c & 0xF8) == 0xF0) { // Starts with `11110' - return 4; - } - return 0; -} - -/// Return the code point of a UTF-8 character with known length -static inline uint32_t -parse_counted_utf8_char(const uint8_t* utf8, size_t size) -{ - uint32_t c = utf8[0] & ((1u << (8 - size)) - 1); - for (size_t i = 1; i < size; ++i) { - const uint8_t in = utf8[i] & 0x3F; - c = (c << 6) | in; - } - return c; -} - -/// Parse a UTF-8 character, set *size to the length, and return the code point -static inline uint32_t -parse_utf8_char(const uint8_t* utf8, size_t* size) -{ - switch (*size = utf8_num_bytes(utf8[0])) { - case 1: case 2: case 3: case 4: - return parse_counted_utf8_char(utf8, *size); - default: - *size = 0; - return 0; - } -} - -/* URI utilities */ - -static inline bool -chunk_equals(const SerdChunk* a, const SerdChunk* b) -{ - return a->len == b->len - && !strncmp((const char*)a->buf, (const char*)b->buf, a->len); -} - -static inline size_t -uri_path_len(const SerdURI* uri) -{ - return uri->path_base.len + uri->path.len; -} - -static inline uint8_t -uri_path_at(const SerdURI* uri, size_t i) -{ - if (i < uri->path_base.len) { - return uri->path_base.buf[i]; - } else { - return uri->path.buf[i - uri->path_base.len]; - } -} - -/** - Return the index of the first differing character after the last root slash, - or zero if `uri` is not under `root`. -*/ -static inline size_t -uri_rooted_index(const SerdURI* uri, const SerdURI* root) -{ - if (!root || !root->scheme.len || - !chunk_equals(&root->scheme, &uri->scheme) || - !chunk_equals(&root->authority, &uri->authority)) { - return 0; - } - - bool differ = false; - const size_t path_len = uri_path_len(uri); - const size_t root_len = uri_path_len(root); - size_t last_root_slash = 0; - for (size_t i = 0; i < path_len && i < root_len; ++i) { - const uint8_t u = uri_path_at(uri, i); - const uint8_t r = uri_path_at(root, i); - - differ = differ || u != r; - if (r == '/') { - last_root_slash = i; - if (differ) { - return 0; - } - } - } - - return last_root_slash + 1; -} - -/** Return true iff `uri` shares path components with `root` */ -static inline bool -uri_is_related(const SerdURI* uri, const SerdURI* root) -{ - return uri_rooted_index(uri, root) > 0; -} - -/** Return true iff `uri` is within the base of `root` */ -static inline bool -uri_is_under(const SerdURI* uri, const SerdURI* root) -{ - const size_t index = uri_rooted_index(uri, root); - return index > 0 && uri->path.len > index; -} - -static inline bool -is_uri_scheme_char(const int c) -{ - switch (c) { - case ':': case '+': case '-': case '.': - return true; - default: - return is_alpha(c) || is_digit(c); - } -} - /* Error reporting */ static inline void @@ -549,94 +48,4 @@ serd_error(SerdErrorSink error_sink, void* handle, const SerdError* e) } } -SERD_LOG_FUNC(3, 4) -int -r_err(SerdReader* reader, SerdStatus st, const char* fmt, ...); - -/* Reader */ - -#ifdef SERD_STACK_CHECK -# define SERD_STACK_ASSERT_TOP(reader, ref) \ - assert(ref == reader->allocs[reader->n_allocs - 1]); -#else -# define SERD_STACK_ASSERT_TOP(reader, ref) -#endif - -/* Reference to a node in the stack (we can not use pointers since the - stack may be reallocated, invalidating any pointers to elements). -*/ -typedef size_t Ref; - -typedef struct { - Ref graph; - Ref subject; - Ref predicate; - Ref object; - Ref datatype; - Ref lang; - SerdStatementFlags* flags; -} ReadContext; - -struct SerdReaderImpl { - void* handle; - void (*free_handle)(void* ptr); - SerdBaseSink base_sink; - SerdPrefixSink prefix_sink; - SerdStatementSink statement_sink; - SerdEndSink end_sink; - SerdErrorSink error_sink; - void* error_handle; - Ref rdf_first; - Ref rdf_rest; - Ref rdf_nil; - SerdNode default_graph; - SerdByteSource source; - SerdStack stack; - SerdSyntax syntax; - unsigned next_id; - SerdStatus status; - uint8_t* buf; - uint8_t* bprefix; - size_t bprefix_len; - bool strict; ///< True iff strict parsing - bool seen_genid; -#ifdef SERD_STACK_CHECK - Ref* allocs; ///< Stack of push offsets - size_t n_allocs; ///< Number of stack pushes -#endif -}; - -Ref push_node_padded(SerdReader* reader, - size_t maxlen, - SerdType type, - const char* str, - size_t n_bytes); - -Ref push_node(SerdReader* reader, - SerdType type, - const char* str, - size_t n_bytes); - -size_t genid_size(SerdReader* reader); -Ref blank_id(SerdReader* reader); -void set_blank_id(SerdReader* reader, Ref ref, size_t buf_size); - -SerdNode* deref(SerdReader* reader, Ref ref); - -Ref pop_node(SerdReader* reader, Ref ref); - -bool emit_statement(SerdReader* reader, ReadContext ctx, Ref o, Ref d, Ref l); - -bool read_n3_statement(SerdReader* reader); -bool read_nquadsDoc(SerdReader* reader); -bool read_turtleTrigDoc(SerdReader* reader); - -typedef enum { - FIELD_NONE, - FIELD_SUBJECT, - FIELD_PREDICATE, - FIELD_OBJECT, - FIELD_GRAPH -} Field; - #endif // SERD_INTERNAL_H diff --git a/src/serdi.c b/src/serdi.c index 31e4ff91..604a54fa 100644 --- a/src/serdi.c +++ b/src/serdi.c @@ -14,8 +14,10 @@ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ +#define _POSIX_C_SOURCE 200809L /* for fileno and posix_fadvise */ + #include "serd_config.h" -#include "serd_internal.h" +#include "string_utils.h" #include "serd/serd.h" @@ -24,6 +26,11 @@ #include <io.h> #endif +#if defined(HAVE_POSIX_FADVISE) && defined(HAVE_FILENO) +#include <fcntl.h> +#endif + +#include <errno.h> #include <stdbool.h> #include <stdint.h> #include <stdio.h> @@ -123,6 +130,22 @@ quiet_error_sink(void* handle, const SerdError* e) return SERD_SUCCESS; } +static inline FILE* +serd_fopen(const char* path, const char* mode) +{ + FILE* fd = fopen(path, mode); + if (!fd) { + SERDI_ERRORF("failed to open file %s (%s)\n", path, strerror(errno)); + return NULL; + } + +#if defined(HAVE_POSIX_FADVISE) && defined(HAVE_FILENO) + posix_fadvise(fileno(fd), 0, 0, POSIX_FADV_SEQUENTIAL|POSIX_FADV_NOREUSE); +#endif + + return fd; +} + int main(int argc, char** argv) { diff --git a/src/stack.h b/src/stack.h new file mode 100644 index 00000000..e95c5770 --- /dev/null +++ b/src/stack.h @@ -0,0 +1,117 @@ +/* + Copyright 2011-2020 David Robillard <http://drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#ifndef SERD_STACK_H +#define SERD_STACK_H + +#include "serd_internal.h" + +#include <assert.h> +#include <stddef.h> +#include <stdlib.h> + +/** An offset to start the stack at. Note 0 is reserved for NULL. */ +#define SERD_STACK_BOTTOM sizeof(void*) + +/** A dynamic stack in memory. */ +typedef struct { + uint8_t* buf; ///< Stack memory + size_t buf_size; ///< Allocated size of buf (>= size) + size_t size; ///< Conceptual size of stack in buf +} SerdStack; + +/** An offset to start the stack at. Note 0 is reserved for NULL. */ +#define SERD_STACK_BOTTOM sizeof(void*) + +static inline SerdStack +serd_stack_new(size_t size) +{ + SerdStack stack; + stack.buf = (uint8_t*)calloc(size, 1); + stack.buf_size = size; + stack.size = SERD_STACK_BOTTOM; + return stack; +} + +static inline bool +serd_stack_is_empty(SerdStack* stack) +{ + return stack->size <= SERD_STACK_BOTTOM; +} + +static inline void +serd_stack_free(SerdStack* stack) +{ + free(stack->buf); + stack->buf = NULL; + stack->buf_size = 0; + stack->size = 0; +} + +static inline uint8_t* +serd_stack_push(SerdStack* stack, size_t n_bytes) +{ + const size_t new_size = stack->size + n_bytes; + if (stack->buf_size < new_size) { + stack->buf_size += (stack->buf_size >> 1); // *= 1.5 + stack->buf = (uint8_t*)realloc(stack->buf, stack->buf_size); + } + uint8_t* const ret = (stack->buf + stack->size); + stack->size = new_size; + return ret; +} + +static inline void +serd_stack_pop(SerdStack* stack, size_t n_bytes) +{ + assert(stack->size >= n_bytes); + stack->size -= n_bytes; +} + +static inline void* +serd_stack_push_aligned(SerdStack* stack, size_t n_bytes, size_t align) +{ + // Push one byte to ensure space for a pad count + serd_stack_push(stack, 1); + + // Push padding if necessary + const size_t pad = align - stack->size % align; + if (pad > 0) { + serd_stack_push(stack, pad); + } + + // Set top of stack to pad count so we can properly pop later + assert(pad < UINT8_MAX); + stack->buf[stack->size - 1] = (uint8_t)pad; + + // Push requested space at aligned location + return serd_stack_push(stack, n_bytes); +} + +static inline void +serd_stack_pop_aligned(SerdStack* stack, size_t n_bytes) +{ + // Pop requested space down to aligned location + serd_stack_pop(stack, n_bytes); + + // Get amount of padding from top of stack + const uint8_t pad = stack->buf[stack->size - 1]; + + // Pop padding and pad count + serd_stack_pop(stack, pad + 1u); +} + +#endif // SERD_STACK_H diff --git a/src/string.c b/src/string.c index 6a3219c7..86dc739e 100644 --- a/src/string.c +++ b/src/string.c @@ -14,7 +14,7 @@ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ -#include "serd_internal.h" +#include "string_utils.h" #include "serd/serd.h" diff --git a/src/string_utils.h b/src/string_utils.h new file mode 100644 index 00000000..b80bf5aa --- /dev/null +++ b/src/string_utils.h @@ -0,0 +1,147 @@ +/* + Copyright 2011-2020 David Robillard <http://drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#ifndef SERD_STRING_UTILS_H +#define SERD_STRING_UTILS_H + +#include "serd/serd.h" + +#include <assert.h> +#include <ctype.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> + +/** Unicode replacement character in UTF-8 */ +static const uint8_t replacement_char[] = { 0xEF, 0xBF, 0xBD }; + +/** Return true if `c` lies within [`min`...`max`] (inclusive) */ +static inline bool +in_range(const int c, const int min, const int max) +{ + return (c >= min && c <= max); +} + +/** RFC2234: ALPHA ::= %x41-5A / %x61-7A ; A-Z / a-z */ +static inline bool +is_alpha(const int c) +{ + return in_range(c, 'A', 'Z') || in_range(c, 'a', 'z'); +} + +/** RFC2234: DIGIT ::= %x30-39 ; 0-9 */ +static inline bool +is_digit(const int c) +{ + return in_range(c, '0', '9'); +} + +/* RFC2234: HEXDIG ::= DIGIT / "A" / "B" / "C" / "D" / "E" / "F" */ +static inline bool +is_hexdig(const int c) +{ + return is_digit(c) || in_range(c, 'A', 'F'); +} + +/* Turtle / JSON / C: XDIGIT ::= DIGIT / A-F / a-f */ +static inline bool +is_xdigit(const int c) +{ + return is_hexdig(c) || in_range(c, 'a', 'f'); +} + +static inline bool +is_space(const char c) +{ + switch (c) { + case ' ': case '\f': case '\n': case '\r': case '\t': case '\v': + return true; + default: + return false; + } +} + +static inline bool +is_base64(const uint8_t c) +{ + return is_alpha(c) || is_digit(c) || c == '+' || c == '/' || c == '='; +} + +static inline bool +is_windows_path(const uint8_t* path) +{ + return is_alpha(path[0]) && (path[1] == ':' || path[1] == '|') + && (path[2] == '/' || path[2] == '\\'); +} + +size_t +serd_substrlen(const uint8_t* str, + size_t len, + size_t* n_bytes, + SerdNodeFlags* flags); + +static inline int +serd_strncasecmp(const char* s1, const char* s2, size_t n) +{ + for (; n > 0 && *s2; s1++, s2++, --n) { + if (toupper(*s1) != toupper(*s2)) { + return ((*(const uint8_t*)s1 < *(const uint8_t*)s2) ? -1 : +1); + } + } + return 0; +} + +static inline uint32_t +utf8_num_bytes(const uint8_t c) +{ + if ((c & 0x80) == 0) { // Starts with `0' + return 1; + } else if ((c & 0xE0) == 0xC0) { // Starts with `110' + return 2; + } else if ((c & 0xF0) == 0xE0) { // Starts with `1110' + return 3; + } else if ((c & 0xF8) == 0xF0) { // Starts with `11110' + return 4; + } + return 0; +} + +/// Return the code point of a UTF-8 character with known length +static inline uint32_t +parse_counted_utf8_char(const uint8_t* utf8, size_t size) +{ + uint32_t c = utf8[0] & ((1u << (8 - size)) - 1); + for (size_t i = 1; i < size; ++i) { + const uint8_t in = utf8[i] & 0x3F; + c = (c << 6) | in; + } + return c; +} + +/// Parse a UTF-8 character, set *size to the length, and return the code point +static inline uint32_t +parse_utf8_char(const uint8_t* utf8, size_t* size) +{ + switch (*size = utf8_num_bytes(utf8[0])) { + case 1: case 2: case 3: case 4: + return parse_counted_utf8_char(utf8, *size); + default: + *size = 0; + return 0; + } +} + +#endif // SERD_STRING_UTILS_H diff --git a/src/system.c b/src/system.c new file mode 100644 index 00000000..0a6a5561 --- /dev/null +++ b/src/system.c @@ -0,0 +1,59 @@ +/* + Copyright 2011-2020 David Robillard <http://drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#define _POSIX_C_SOURCE 200809L /* for posix_memalign and posix_fadvise */ + +#include "system.h" + +#include "serd_config.h" +#include "serd_internal.h" + +#if defined(HAVE_POSIX_FADVISE) && defined(HAVE_FILENO) +# include <fcntl.h> +#endif + +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +FILE* +serd_fopen(const char* path, const char* mode) +{ + FILE* fd = fopen(path, mode); + if (!fd) { + fprintf(stderr, "error: failed to open file %s (%s)\n", + path, strerror(errno)); + return NULL; + } +#if defined(HAVE_POSIX_FADVISE) && defined(HAVE_FILENO) + posix_fadvise(fileno(fd), 0, 0, POSIX_FADV_SEQUENTIAL); +#endif + return fd; +} + +void* +serd_bufalloc(size_t size) +{ +#ifdef HAVE_POSIX_MEMALIGN + void* ptr = NULL; + const int ret = posix_memalign(&ptr, SERD_PAGE_SIZE, size); + return ret ? NULL : ptr; +#else + return malloc(size); +#endif +} + diff --git a/src/system.h b/src/system.h new file mode 100644 index 00000000..b1b84925 --- /dev/null +++ b/src/system.h @@ -0,0 +1,28 @@ +/* + Copyright 2011-2020 David Robillard <http://drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#ifndef SERD_SYSTEM_H +#define SERD_SYSTEM_H + +#include <stdio.h> + +FILE* +serd_fopen(const char* path, const char* mode); + +void* +serd_bufalloc(size_t size); + +#endif // SERD_SYSTEM_H @@ -14,7 +14,8 @@ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ -#include "serd_internal.h" +#include "string_utils.h" +#include "uri_utils.h" #include "serd/serd.h" diff --git a/src/uri_utils.h b/src/uri_utils.h new file mode 100644 index 00000000..4dbcdba5 --- /dev/null +++ b/src/uri_utils.h @@ -0,0 +1,106 @@ +/* + Copyright 2011-2020 David Robillard <http://drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#ifndef SERD_URI_UTILS_H +#define SERD_URI_UTILS_H + +#include "string_utils.h" + +#include <string.h> + +static inline bool +chunk_equals(const SerdChunk* a, const SerdChunk* b) +{ + return a->len == b->len + && !strncmp((const char*)a->buf, (const char*)b->buf, a->len); +} + +static inline size_t +uri_path_len(const SerdURI* uri) +{ + return uri->path_base.len + uri->path.len; +} + +static inline uint8_t +uri_path_at(const SerdURI* uri, size_t i) +{ + if (i < uri->path_base.len) { + return uri->path_base.buf[i]; + } else { + return uri->path.buf[i - uri->path_base.len]; + } +} + +/** + Return the index of the first differing character after the last root slash, + or zero if `uri` is not under `root`. +*/ +static inline size_t +uri_rooted_index(const SerdURI* uri, const SerdURI* root) +{ + if (!root || !root->scheme.len || + !chunk_equals(&root->scheme, &uri->scheme) || + !chunk_equals(&root->authority, &uri->authority)) { + return 0; + } + + bool differ = false; + const size_t path_len = uri_path_len(uri); + const size_t root_len = uri_path_len(root); + size_t last_root_slash = 0; + for (size_t i = 0; i < path_len && i < root_len; ++i) { + const uint8_t u = uri_path_at(uri, i); + const uint8_t r = uri_path_at(root, i); + + differ = differ || u != r; + if (r == '/') { + last_root_slash = i; + if (differ) { + return 0; + } + } + } + + return last_root_slash + 1; +} + +/** Return true iff `uri` shares path components with `root` */ +static inline bool +uri_is_related(const SerdURI* uri, const SerdURI* root) +{ + return uri_rooted_index(uri, root) > 0; +} + +/** Return true iff `uri` is within the base of `root` */ +static inline bool +uri_is_under(const SerdURI* uri, const SerdURI* root) +{ + const size_t index = uri_rooted_index(uri, root); + return index > 0 && uri->path.len > index; +} + +static inline bool +is_uri_scheme_char(const int c) +{ + switch (c) { + case ':': case '+': case '-': case '.': + return true; + default: + return is_alpha(c) || is_digit(c); + } +} + +#endif // SERD_URI_UTILS_H diff --git a/src/writer.c b/src/writer.c index 9881f00f..31050853 100644 --- a/src/writer.c +++ b/src/writer.c @@ -14,7 +14,11 @@ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ +#include "byte_sink.h" #include "serd_internal.h" +#include "stack.h" +#include "string_utils.h" +#include "uri_utils.h" #include "serd/serd.h" @@ -26,6 +30,14 @@ #include <stdlib.h> #include <string.h> +typedef enum { + FIELD_NONE, + FIELD_SUBJECT, + FIELD_PREDICATE, + FIELD_OBJECT, + FIELD_GRAPH +} Field; + typedef struct { SerdNode graph; SerdNode subject; @@ -91,6 +91,7 @@ def configure(conf): ], 'gcc': [ '-Wno-bad-function-cast', + '-Wno-suggest-attribute=malloc' ], 'msvc': [ '/wd4706', # assignment within conditional expression @@ -147,6 +148,7 @@ lib_source = ['src/byte_source.c', 'src/node.c', 'src/reader.c', 'src/string.c', + 'src/system.c', 'src/uri.c', 'src/writer.c'] @@ -298,22 +300,35 @@ def lint(ctx): def amalgamate(ctx): "builds single-file amalgamated source" import shutil + import re shutil.copy('serd/serd.h', 'build/serd.h') + + def include_line(line): + return (not re.match(r'#include "[^/]*\.h"', line) and + not re.match('#include "serd/serd.h"', line)) + with open('build/serd.c', 'w') as amalgamation: - with open('src/serd_internal.h') as serd_internal_h: - for l in serd_internal_h: - amalgamation.write(l.replace('serd/serd.h', 'serd.h')) + amalgamation.write('/* This is amalgamated code, do not edit! */\n') + amalgamation.write('#include "serd.h"\n\n') + + for header_path in ['src/serd_internal.h', + 'src/system.h', + 'src/byte_sink.h', + 'src/byte_source.h', + 'src/stack.h', + 'src/string_utils.h', + 'src/uri_utils.h', + 'src/reader.h']: + with open(header_path) as header: + for l in header: + if include_line(l): + amalgamation.write(l) for f in lib_headers + lib_source: with open(f) as fd: amalgamation.write('\n/**\n @file %s\n*/' % f) - header = True for l in fd: - if header: - if l == '*/\n': - header = False - elif (not l.startswith('#include "') and - l != '#include "serd.h"\n'): + if include_line(l): amalgamation.write(l) for i in ['c', 'h']: |