diff options
Diffstat (limited to 'src/reader.c')
-rw-r--r-- | src/reader.c | 402 |
1 files changed, 402 insertions, 0 deletions
diff --git a/src/reader.c b/src/reader.c new file mode 100644 index 00000000..1a7f58d4 --- /dev/null +++ b/src/reader.c @@ -0,0 +1,402 @@ +/* + Copyright 2011-2017 David Robillard <http://drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "serd_internal.h" + +#include <ctype.h> +#include <errno.h> +#include <stdarg.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +int +r_err(SerdReader* reader, SerdStatus st, const char* fmt, ...) +{ + va_list args; + va_start(args, fmt); + const Cursor* const cur = &reader->source.cur; + const SerdError e = { st, cur->filename, cur->line, cur->col, fmt, &args }; + serd_error(reader->error_sink, reader->error_handle, &e); + va_end(args); + return 0; +} + +void +set_blank_id(SerdReader* reader, Ref ref, size_t buf_size) +{ + SerdNode* node = deref(reader, ref); + const char* prefix = reader->bprefix ? (const char*)reader->bprefix : ""; + node->n_bytes = node->n_chars = snprintf( + (char*)node->buf, buf_size, "%sb%u", prefix, reader->next_id++); +} + +size_t +genid_size(SerdReader* reader) +{ + return reader->bprefix_len + 1 + 10 + 1; // + "b" + UINT32_MAX + \0 +} + +Ref +blank_id(SerdReader* reader) +{ + Ref ref = push_node_padded(reader, genid_size(reader), SERD_BLANK, "", 0); + set_blank_id(reader, ref, genid_size(reader)); + return ref; +} + +/** fread-like wrapper for getc (which is faster). */ +static size_t +serd_file_read_byte(void* buf, size_t size, size_t nmemb, void* stream) +{ + (void)size; + (void)nmemb; + + const int c = getc((FILE*)stream); + if (c == EOF) { + *((uint8_t*)buf) = 0; + return 0; + } + *((uint8_t*)buf) = (uint8_t)c; + return 1; +} + +Ref +push_node_padded(SerdReader* reader, size_t maxlen, + SerdType type, const char* str, size_t n_bytes) +{ + void* mem = serd_stack_push_aligned( + &reader->stack, sizeof(SerdNode) + maxlen + 1, sizeof(SerdNode)); + + SerdNode* const node = (SerdNode*)mem; + node->n_bytes = node->n_chars = n_bytes; + node->flags = 0; + node->type = type; + node->buf = NULL; + + uint8_t* buf = (uint8_t*)(node + 1); + memcpy(buf, str, n_bytes + 1); + +#ifdef SERD_STACK_CHECK + reader->allocs = realloc( + reader->allocs, sizeof(reader->allocs) * (++reader->n_allocs)); + reader->allocs[reader->n_allocs - 1] = ((uint8_t*)mem - reader->stack.buf); +#endif + return (uint8_t*)node - reader->stack.buf; +} + +Ref +push_node(SerdReader* reader, SerdType type, const char* str, size_t n_bytes) +{ + return push_node_padded(reader, n_bytes, type, str, n_bytes); +} + +SerdNode* +deref(SerdReader* reader, const Ref ref) +{ + if (ref) { + SerdNode* node = (SerdNode*)(reader->stack.buf + ref); + node->buf = (uint8_t*)node + sizeof(SerdNode); + return node; + } + return NULL; +} + +Ref +pop_node(SerdReader* reader, Ref ref) +{ + if (ref && ref != reader->rdf_first && ref != reader->rdf_rest + && ref != reader->rdf_nil) { +#ifdef SERD_STACK_CHECK + SERD_STACK_ASSERT_TOP(reader, ref); + --reader->n_allocs; +#endif + SerdNode* const node = deref(reader, ref); + uint8_t* const top = reader->stack.buf + reader->stack.size; + serd_stack_pop_aligned(&reader->stack, top - (uint8_t*)node); + } + return 0; +} + +bool +emit_statement(SerdReader* reader, ReadContext ctx, Ref o, Ref d, Ref l) +{ + SerdNode* graph = deref(reader, ctx.graph); + if (!graph && reader->default_graph.buf) { + graph = &reader->default_graph; + } + bool ret = !reader->statement_sink || + !reader->statement_sink( + reader->handle, *ctx.flags, graph, + deref(reader, ctx.subject), deref(reader, ctx.predicate), + deref(reader, o), deref(reader, d), deref(reader, l)); + *ctx.flags &= SERD_ANON_CONT|SERD_LIST_CONT; // Preserve only cont flags + return ret; +} + +static bool +read_statement(SerdReader* reader) +{ + switch (reader->syntax) { + default: return read_n3_statement(reader); + } +} + +static bool +read_doc(SerdReader* reader) +{ + switch (reader->syntax) { + case SERD_NQUADS: return read_nquadsDoc(reader); + default: return read_turtleTrigDoc(reader); + } +} + +SerdReader* +serd_reader_new(SerdSyntax syntax, + void* handle, + void (*free_handle)(void*), + SerdBaseSink base_sink, + SerdPrefixSink prefix_sink, + SerdStatementSink statement_sink, + SerdEndSink end_sink) +{ + SerdReader* me = (SerdReader*)calloc(1, sizeof(SerdReader)); + me->handle = handle; + me->free_handle = free_handle; + me->base_sink = base_sink; + me->prefix_sink = prefix_sink; + me->statement_sink = statement_sink; + me->end_sink = end_sink; + me->default_graph = SERD_NODE_NULL; + me->stack = serd_stack_new(SERD_PAGE_SIZE); + me->syntax = syntax; + me->next_id = 1; + me->strict = true; + + me->rdf_first = push_node(me, SERD_URI, NS_RDF "first", 48); + me->rdf_rest = push_node(me, SERD_URI, NS_RDF "rest", 47); + me->rdf_nil = push_node(me, SERD_URI, NS_RDF "nil", 46); + + return me; +} + +void +serd_reader_set_strict(SerdReader* reader, bool strict) +{ + reader->strict = strict; +} + +void +serd_reader_set_error_sink(SerdReader* reader, + SerdErrorSink error_sink, + void* error_handle) +{ + reader->error_sink = error_sink; + reader->error_handle = error_handle; +} + +void +serd_reader_free(SerdReader* reader) +{ + pop_node(reader, reader->rdf_nil); + pop_node(reader, reader->rdf_rest); + pop_node(reader, reader->rdf_first); + serd_node_free(&reader->default_graph); + +#ifdef SERD_STACK_CHECK + free(reader->allocs); +#endif + free(reader->stack.buf); + free(reader->bprefix); + if (reader->free_handle) { + reader->free_handle(reader->handle); + } + free(reader); +} + +void* +serd_reader_get_handle(const SerdReader* reader) +{ + return reader->handle; +} + +void +serd_reader_add_blank_prefix(SerdReader* reader, + const uint8_t* prefix) +{ + free(reader->bprefix); + reader->bprefix_len = 0; + reader->bprefix = NULL; + if (prefix) { + reader->bprefix_len = strlen((const char*)prefix); + reader->bprefix = (uint8_t*)malloc(reader->bprefix_len + 1); + memcpy(reader->bprefix, prefix, reader->bprefix_len + 1); + } +} + +void +serd_reader_set_default_graph(SerdReader* reader, + const SerdNode* graph) +{ + serd_node_free(&reader->default_graph); + reader->default_graph = serd_node_copy(graph); +} + +SerdStatus +serd_reader_read_file(SerdReader* reader, + const uint8_t* uri) +{ + uint8_t* const path = serd_file_uri_parse(uri, NULL); + if (!path) { + return SERD_ERR_BAD_ARG; + } + + FILE* fd = serd_fopen((const char*)path, "rb"); + if (!fd) { + serd_free(path); + return SERD_ERR_UNKNOWN; + } + + SerdStatus ret = serd_reader_read_file_handle(reader, fd, path); + fclose(fd); + free(path); + return ret; +} + +static SerdStatus +skip_bom(SerdReader* me) +{ + if (serd_byte_source_peek(&me->source) == 0xEF) { + serd_byte_source_advance(&me->source); + if (serd_byte_source_peek(&me->source) != 0xBB || + serd_byte_source_advance(&me->source) || + serd_byte_source_peek(&me->source) != 0xBF || + serd_byte_source_advance(&me->source)) { + r_err(me, SERD_ERR_BAD_SYNTAX, "corrupt byte order mark\n"); + return SERD_ERR_BAD_SYNTAX; + } + } + + return SERD_SUCCESS; +} + +SerdStatus +serd_reader_start_stream(SerdReader* reader, + FILE* file, + const uint8_t* name, + bool bulk) +{ + return serd_reader_start_source_stream( + reader, + bulk ? (SerdSource)fread : serd_file_read_byte, + (SerdStreamErrorFunc)ferror, + file, + name, + bulk ? SERD_PAGE_SIZE : 1); +} + +SerdStatus +serd_reader_start_source_stream(SerdReader* reader, + SerdSource read_func, + SerdStreamErrorFunc error_func, + void* stream, + const uint8_t* name, + size_t page_size) +{ + return serd_byte_source_open_source( + &reader->source, read_func, error_func, stream, name, page_size); +} + +static SerdStatus +serd_reader_prepare(SerdReader* reader) +{ + reader->status = serd_byte_source_prepare(&reader->source); + if (reader->status == SERD_SUCCESS) { + reader->status = skip_bom(reader); + } else if (reader->status == SERD_FAILURE) { + reader->source.eof = true; + } else { + r_err(reader, reader->status, "read error: %s\n", strerror(errno)); + } + return reader->status; +} + +SerdStatus +serd_reader_read_chunk(SerdReader* reader) +{ + SerdStatus st = SERD_SUCCESS; + if (!reader->source.prepared) { + st = serd_reader_prepare(reader); + } else if (reader->source.eof) { + st = serd_byte_source_advance(&reader->source); + } + + return st ? st : read_statement(reader) ? SERD_SUCCESS : SERD_FAILURE; +} + +SerdStatus +serd_reader_end_stream(SerdReader* reader) +{ + return serd_byte_source_close(&reader->source); +} + +SerdStatus +serd_reader_read_file_handle(SerdReader* reader, + FILE* file, + const uint8_t* name) +{ + return serd_reader_read_source( + reader, (SerdSource)fread, (SerdStreamErrorFunc)ferror, + file, name, SERD_PAGE_SIZE); +} + +SerdStatus +serd_reader_read_source(SerdReader* reader, + SerdSource source, + SerdStreamErrorFunc error, + void* stream, + const uint8_t* name, + size_t page_size) +{ + SerdStatus st = serd_reader_start_source_stream( + reader, source, error, stream, name, page_size); + + if (st || (st = serd_reader_prepare(reader))) { + serd_reader_end_stream(reader); + return st; + } else if (!read_doc(reader)) { + serd_reader_end_stream(reader); + return SERD_ERR_UNKNOWN; + } + + return serd_reader_end_stream(reader); +} + +SerdStatus +serd_reader_read_string(SerdReader* reader, const uint8_t* utf8) +{ + serd_byte_source_open_string(&reader->source, utf8); + + SerdStatus st = serd_reader_prepare(reader); + if (!st) { + st = read_doc(reader) ? SERD_SUCCESS : SERD_ERR_UNKNOWN; + } + + serd_byte_source_close(&reader->source); + + return st; +} |