From 02a41f39b216ca43056fc2e8183351c077b5dd9c Mon Sep 17 00:00:00 2001 From: David Robillard Date: Thu, 5 Jul 2012 17:35:14 +0000 Subject: Add error callback to reader and writer for custom error reporting. Add -q option to serdi to suppress all non-data output, e.g. errors. Resolves #815. git-svn-id: http://svn.drobilla.net/serd/trunk@354 490d8e77-9747-427b-9fa3-0b8f29cee8a0 --- NEWS | 2 ++ serd/serd.h | 51 +++++++++++++++++++++++++- src/env.c | 2 +- src/reader.c | 62 +++++++++++++++++++++----------- src/serd_internal.h | 13 +++++++ src/serdi.c | 15 ++++++++ src/string.c | 5 ++- src/writer.c | 102 ++++++++++++++++++++++++++++++++-------------------- tests/serd_test.c | 2 +- wscript | 1 + 10 files changed, 193 insertions(+), 62 deletions(-) diff --git a/NEWS b/NEWS index cbae6338..53ba478b 100644 --- a/NEWS +++ b/NEWS @@ -5,6 +5,8 @@ serd (9999) unstable; * Reset indent when finishing a write * Report write size correctly when invalid UTF-8 is encountered and a replacement character is written + * Add error callback to reader and writer for custom error reporting + * Add -q option to serdi to suppress all non-data output, e.g. errors -- David Robillard diff --git a/serd/serd.h b/serd/serd.h index cc2365f4..1cd54df8 100644 --- a/serd/serd.h +++ b/serd/serd.h @@ -21,6 +21,7 @@ #ifndef SERD_SERD_H #define SERD_SERD_H +#include #include #include #include @@ -89,7 +90,10 @@ typedef enum { SERD_ERR_UNKNOWN, /**< Unknown error */ SERD_ERR_BAD_SYNTAX, /**< Invalid syntax */ SERD_ERR_BAD_ARG, /**< Invalid argument */ - SERD_ERR_NOT_FOUND /**< Not found */ + SERD_ERR_NOT_FOUND, /**< Not found */ + SERD_ERR_ID_CLASH, /**< Encountered clashing blank node IDs */ + SERD_ERR_BAD_CURIE, /**< Invalid CURIE (e.g. prefix does not exist) */ + SERD_ERR_INTERNAL /**< Unexpected internal error (should not happen) */ } SerdStatus; /** @@ -215,6 +219,18 @@ typedef struct { size_t len; /**< Length of chunk in bytes */ } SerdChunk; +/** + An error description. +*/ +typedef struct { + SerdStatus status; /**< Error code */ + const uint8_t* filename; /**< File where error was encountered, or NULL */ + unsigned line; /**< Line where error was encountered, or 0 */ + unsigned col; /**< Column where error was encountered */ + const char* fmt; /**< Message format string (printf style) */ + va_list* args; /**< Arguments for fmt */ +} SerdError; + /** A parsed URI. @@ -512,6 +528,15 @@ serd_node_free(SerdNode* node); @{ */ +/** + Sink (callback) for errors. + + @param handle Handle for user data. + @param error Error description. +*/ +typedef SerdStatus (*SerdErrorSink)(void* handle, + const SerdError* error); + /** Sink (callback) for base URI changes. @@ -663,6 +688,18 @@ serd_reader_new(SerdSyntax syntax, SerdStatementSink statement_sink, SerdEndSink end_sink); +/** + Set a function to be called when errors occur during reading. + + The @p error_sink will be called with @p handle as its first argument. If + no error function is set, errors are printed to stderr in GCC style. +*/ +SERD_API +void +serd_reader_set_error_sink(SerdReader* reader, + SerdErrorSink error_sink, + void* handle); + /** Return the @c handle passed to @ref serd_reader_new. */ @@ -825,6 +862,18 @@ SERD_API uint8_t* serd_chunk_sink_finish(SerdChunk* stream); +/** + Set a function to be called when errors occur during writing. + + The @p error_sink will be called with @p handle as its first argument. If + no error function is set, errors are printed to stderr. +*/ +SERD_API +void +serd_writer_set_error_sink(SerdWriter* writer, + SerdErrorSink error_sink, + void* handle); + /** Set a prefix to be removed from matching blank node identifiers. */ diff --git a/src/env.c b/src/env.c index ecefb005..605be6e6 100644 --- a/src/env.c +++ b/src/env.c @@ -217,7 +217,7 @@ serd_env_expand(const SerdEnv* env, const uint8_t* const colon = (const uint8_t*)memchr( qname->buf, ':', qname->n_bytes + 1); if (!colon) { - return SERD_ERR_BAD_ARG; // Illegal qname + return SERD_ERR_BAD_ARG; // Invalid qname } const size_t name_len = colon - qname->buf; diff --git a/src/reader.c b/src/reader.c index 5c623039..d66dbb09 100644 --- a/src/reader.c +++ b/src/reader.c @@ -64,6 +64,8 @@ struct SerdReaderImpl { SerdPrefixSink prefix_sink; SerdStatementSink statement_sink; SerdEndSink end_sink; + SerdErrorSink error_sink; + void* error_handle; Ref rdf_first; Ref rdf_rest; Ref rdf_nil; @@ -90,13 +92,14 @@ struct SerdReaderImpl { }; static int -error(SerdReader* reader, const char* fmt, ...) +error(SerdReader* reader, SerdStatus st, const char* fmt, ...) { va_list args; va_start(args, fmt); - fprintf(stderr, "error: %s:%u:%u: ", - reader->cur.filename, reader->cur.line, reader->cur.col); - vfprintf(stderr, fmt, args); + const SerdError e = { + st, reader->cur.filename, reader->cur.line, reader->cur.col, fmt, &args + }; + serd_error(reader->error_sink, reader->error_handle, &e); va_end(args); return 0; } @@ -149,7 +152,8 @@ eat_byte_check(SerdReader* reader, const uint8_t byte) { const uint8_t c = peek_byte(reader); if (c != byte) { - return error(reader, "expected `%c', not `%c'\n", byte, c); + return error(reader, SERD_ERR_BAD_SYNTAX, + "expected `%c', not `%c'\n", byte, c); } return eat_byte_safe(reader, byte); } @@ -271,7 +275,8 @@ read_hex(SerdReader* reader) if (in_range(c, 0x30, 0x39) || in_range(c, 0x41, 0x46)) { return eat_byte_safe(reader, c); } else { - return error(reader, "illegal hexadecimal digit `%c'\n", c); + return error(reader, SERD_ERR_BAD_SYNTAX, + "invalid hexadecimal digit `%c'\n", c); } } @@ -298,7 +303,8 @@ read_hex_escape(SerdReader* reader, unsigned length, Ref dest) } else if (c < 0x00110000) { size = 4; } else { - error(reader, "unicode character 0x%X out of range\n", c); + error(reader, SERD_ERR_BAD_SYNTAX, + "unicode character 0x%X out of range\n", c); push_replacement(reader, dest); return true; } @@ -398,7 +404,7 @@ read_ucharacter_escape(SerdReader* reader, Ref dest) static inline SerdStatus bad_char(SerdReader* reader, Ref dest, const char* fmt, uint8_t c) { - error(reader, fmt, c); + error(reader, SERD_ERR_BAD_SYNTAX, fmt, c); push_replacement(reader, dest); // Skip bytes until the next start byte @@ -454,7 +460,7 @@ read_character(SerdReader* reader, Ref dest) const uint8_t c = peek_byte(reader); assert(c != '\\'); // Only called from methods that handle escapes first if (c == '\0') { - error(reader, "unexpected end of file\n", c); + error(reader, SERD_ERR_BAD_SYNTAX, "unexpected end of input\n", c); return SERD_ERR_BAD_SYNTAX; } else if (c < 0x20) { return bad_char(reader, dest, @@ -493,7 +499,8 @@ read_lcharacter(SerdReader* reader, Ref dest, SerdNodeFlags* flags) if (read_scharacter_escape(reader, dest, flags)) { return SERD_SUCCESS; } else { - error(reader, "illegal escape `\\%c'\n", peek_byte(reader)); + error(reader, SERD_ERR_BAD_SYNTAX, + "invalid escape `\\%c'\n", peek_byte(reader)); return SERD_ERR_BAD_SYNTAX; } case 0xA: case 0xD: @@ -517,7 +524,8 @@ read_scharacter(SerdReader* reader, Ref dest, SerdNodeFlags* flags) if (read_scharacter_escape(reader, dest, flags)) { return SERD_SUCCESS; } else { - error(reader, "illegal escape `\\%c'\n", peek_byte(reader)); + error(reader, SERD_ERR_BAD_SYNTAX, + "invalid escape `\\%c'\n", peek_byte(reader)); return SERD_ERR_BAD_SYNTAX; } case '\"': @@ -539,7 +547,8 @@ read_ucharacter(SerdReader* reader, Ref dest) if (read_ucharacter_escape(reader, dest)) { return SERD_SUCCESS; } else { - error(reader, "illegal escape `\\%c'\n", peek_byte(reader)); + error(reader, SERD_ERR_BAD_SYNTAX, + "invalid escape `\\%c'\n", peek_byte(reader)); return SERD_FAILURE; } case '>': @@ -707,7 +716,7 @@ read_prefixName(SerdReader* reader, Ref dest) { uint8_t c = peek_byte(reader); if (c == '_') { - error(reader, "unexpected `_'\n"); + error(reader, SERD_ERR_BAD_SYNTAX, "unexpected `_'\n"); return pop_node(reader, dest); } TRY_RET(c = read_nameStartChar(reader)); @@ -741,7 +750,7 @@ read_language(SerdReader* reader) { uint8_t c = peek_byte(reader); if (!in_range(c, 'a', 'z')) { - return error(reader, "unexpected `%c'\n", c); + return error(reader, SERD_ERR_BAD_SYNTAX, "unexpected `%c'\n", c); } Ref ref = push_node(reader, SERD_LITERAL, "", 0); push_byte(reader, ref, eat_byte_safe(reader, c)); @@ -795,7 +804,7 @@ read_0_9(SerdReader* reader, Ref str, bool at_least_one) uint8_t c; if (at_least_one) { if (!is_digit((c = peek_byte(reader)))) { - return error(reader, "expected digit\n"); + return error(reader, SERD_ERR_BAD_SYNTAX, "expected digit\n"); } push_byte(reader, str, eat_byte_safe(reader, c)); } @@ -957,7 +966,8 @@ read_nodeID(SerdReader* reader) reader->bprefix ? (char*)reader->bprefix : "", reader->bprefix_len); if (!read_name(reader, ref)) { - return error(reader, "illegal character at start of name\n"); + return error(reader, SERD_ERR_BAD_SYNTAX, + "invalid character at start of name\n"); } if (reader->syntax == SERD_TURTLE) { const char* const buf = (const char*)deref(reader, ref)->buf; @@ -965,8 +975,8 @@ read_nodeID(SerdReader* reader) memcpy((char*)buf, "docid", 5); // Prevent clash reader->seen_genid = true; } else if (reader->seen_genid && !strncmp(buf, "docid", 5)) { - error(reader, "found both `genid' and `docid' blank IDs\n"); - error(reader, "resolve this with a blank ID prefix\n"); + error(reader, SERD_ERR_ID_CLASH, + "found both `genid' and `docid' IDs, prefix required\n"); return pop_node(reader, ref); } } @@ -1039,7 +1049,7 @@ read_blank(SerdReader* reader, ReadContext ctx, bool subject, Ref* dest) case '(': return read_collection(reader, ctx, dest); default: - return error(reader, "illegal blank node\n"); + return error(reader, SERD_ERR_BAD_SYNTAX, "invalid blank node\n"); } } @@ -1311,7 +1321,7 @@ read_directive(SerdReader* reader) switch (peek_byte(reader)) { case 'b': return read_base(reader); case 'p': return read_prefixID(reader); - default: return error(reader, "illegal directive\n"); + default: return error(reader, SERD_ERR_BAD_SYNTAX, "invalid directive\n"); } } @@ -1366,6 +1376,8 @@ serd_reader_new(SerdSyntax syntax, me->prefix_sink = prefix_sink; me->statement_sink = statement_sink; me->end_sink = end_sink; + me->error_sink = NULL; + me->error_handle = NULL; me->default_graph = SERD_NODE_NULL; me->fd = 0; me->stack = serd_stack_new(SERD_PAGE_SIZE); @@ -1390,6 +1402,16 @@ serd_reader_new(SerdSyntax syntax, return me; } +SERD_API +void +serd_reader_set_error_sink(SerdReader* reader, + SerdErrorSink error_sink, + void* error_handle) +{ + reader->error_sink = error_sink; + reader->error_handle = error_handle; +} + SERD_API void serd_reader_free(SerdReader* reader) diff --git a/src/serd_internal.h b/src/serd_internal.h index ee55de27..9a875daa 100644 --- a/src/serd_internal.h +++ b/src/serd_internal.h @@ -290,4 +290,17 @@ uri_is_under(const SerdURI* uri, const SerdURI* root) return true; } +/* Error reporting */ + +static inline void +serd_error(SerdErrorSink error_sink, void* handle, const SerdError* e) +{ + if (error_sink) { + error_sink(handle, e); + } else { + fprintf(stderr, "error: %s:%u:%u: ", e->filename, e->line, e->col); + vfprintf(stderr, e->fmt, *e->args); + } +} + #endif // SERD_INTERNAL_H diff --git a/src/serdi.c b/src/serdi.c index 4c482884..b90c00e5 100644 --- a/src/serdi.c +++ b/src/serdi.c @@ -52,6 +52,7 @@ print_usage(const char* name, bool error) fprintf(os, " -i SYNTAX Input syntax (`turtle' or `ntriples').\n"); fprintf(os, " -o SYNTAX Output syntax (`turtle' or `ntriples').\n"); fprintf(os, " -p PREFIX Add PREFIX to blank node IDs.\n"); + fprintf(os, " -q Suppress all output except data.\n"); fprintf(os, " -r ROOT_URI Keep relative URIs within ROOT_URI.\n"); fprintf(os, " -s INPUT Parse INPUT as string (terminates options).\n"); fprintf(os, " -v Display version information and exit.\n"); @@ -79,6 +80,12 @@ bad_arg(const char* name, char opt) return 1; } +static SerdStatus +quiet_error_sink(void* handle, const SerdError* e) +{ + return SERD_SUCCESS; +} + int main(int argc, char** argv) { @@ -93,6 +100,7 @@ main(int argc, char** argv) bool bulk_read = true; bool bulk_write = false; bool full_uris = false; + bool quiet = false; const uint8_t* in_name = NULL; const uint8_t* add_prefix = NULL; const uint8_t* chop_prefix = NULL; @@ -111,6 +119,8 @@ main(int argc, char** argv) full_uris = true; } else if (argv[a][1] == 'h') { return print_usage(argv[0], false); + } else if (argv[a][1] == 'q') { + quiet = true; } else if (argv[a][1] == 'v') { return print_version(); } else if (argv[a][1] == 's') { @@ -205,6 +215,11 @@ main(int argc, char** argv) (SerdStatementSink)serd_writer_write_statement, (SerdEndSink)serd_writer_end_anon); + if (quiet) { + serd_reader_set_error_sink(reader, quiet_error_sink, NULL); + serd_writer_set_error_sink(writer, quiet_error_sink, NULL); + } + SerdNode root = serd_node_from_string(SERD_URI, root_uri); serd_writer_set_root_uri(writer, &root); serd_writer_chop_blank_prefix(writer, chop_prefix); diff --git a/src/string.c b/src/string.c index 24ab9792..a1a1dff1 100644 --- a/src/string.c +++ b/src/string.c @@ -29,8 +29,11 @@ serd_strerror(SerdStatus st) case SERD_ERR_BAD_SYNTAX: return (const uint8_t*)"Invalid syntax"; case SERD_ERR_BAD_ARG: return (const uint8_t*)"Invalid argument"; case SERD_ERR_NOT_FOUND: return (const uint8_t*)"Not found"; + case SERD_ERR_ID_CLASH: return (const uint8_t*)"Blank node ID clash"; + case SERD_ERR_BAD_CURIE: return (const uint8_t*)"Invalid CURIE"; + case SERD_ERR_INTERNAL: return (const uint8_t*)"Internal error"; } - return (const uint8_t*)"Unknown error code"; // never reached + return (const uint8_t*)"Unknown error"; // never reached } SERD_API diff --git a/src/writer.c b/src/writer.c index 02c362af..5c85073a 100644 --- a/src/writer.c +++ b/src/writer.c @@ -74,24 +74,26 @@ static const SepRule rules[] = { }; struct SerdWriterImpl { - SerdSyntax syntax; - SerdStyle style; - SerdEnv* env; - SerdNode root_node; - SerdURI root_uri; - SerdURI base_uri; - SerdStack anon_stack; - SerdBulkSink bulk_sink; - SerdSink sink; - void* stream; - WriteContext context; - SerdNode list_subj; - unsigned list_depth; - uint8_t* bprefix; - size_t bprefix_len; - unsigned indent; - Sep last_sep; - bool empty; + SerdSyntax syntax; + SerdStyle style; + SerdEnv* env; + SerdNode root_node; + SerdURI root_uri; + SerdURI base_uri; + SerdStack anon_stack; + SerdBulkSink bulk_sink; + SerdSink sink; + void* stream; + SerdErrorSink error_sink; + void* error_handle; + WriteContext context; + SerdNode list_subj; + unsigned list_depth; + uint8_t* bprefix; + size_t bprefix_len; + unsigned indent; + Sep last_sep; + bool empty; }; typedef enum { @@ -100,6 +102,16 @@ typedef enum { WRITE_LONG_STRING } TextContext; +static void +error(SerdWriter* writer, SerdStatus st, const char* fmt, ...) +{ + va_list args; + va_start(args, fmt); + const SerdError e = { st, NULL, 0, 0, fmt, &args }; + serd_error(writer->error_sink, writer->error_handle, &e); + va_end(args); +} + static inline WriteContext* anon_stack_top(SerdWriter* writer) { @@ -207,7 +219,7 @@ write_text(SerdWriter* writer, TextContext ctx, size = 4; c = in & 0x07; } else { - fprintf(stderr, "Invalid UTF-8: %X\n", in); + error(writer, SERD_ERR_BAD_ARG, "invalid UTF-8: %X\n", in); const uint8_t replacement_char[] = { 0xEF, 0xBF, 0xBD }; len += sink(replacement_char, sizeof(replacement_char), writer); return len; @@ -352,7 +364,8 @@ write_node(SerdWriter* writer, switch (writer->syntax) { case SERD_NTRIPLES: if (serd_env_expand(writer->env, node, &uri_prefix, &uri_suffix)) { - fprintf(stderr, "Undefined namespace prefix `%s'\n", node->buf); + error(writer, SERD_ERR_BAD_CURIE, + "undefined namespace prefix `%s'\n", node->buf); return false; } sink("<", 1, writer); @@ -590,7 +603,8 @@ serd_writer_end_anon(SerdWriter* writer, return SERD_SUCCESS; } if (serd_stack_is_empty(&writer->anon_stack)) { - fprintf(stderr, "Unexpected end of anonymous node\n"); + error(writer, SERD_ERR_UNKNOWN, + "unexpected end of anonymous node\n"); return SERD_ERR_UNKNOWN; } assert(writer->indent > 0); @@ -632,29 +646,41 @@ serd_writer_new(SerdSyntax syntax, { const WriteContext context = WRITE_CONTEXT_NULL; SerdWriter* writer = (SerdWriter*)malloc(sizeof(SerdWriter)); - writer->syntax = syntax; - writer->style = style; - writer->env = env; - writer->root_node = SERD_NODE_NULL; - writer->root_uri = SERD_URI_NULL; - writer->base_uri = base_uri ? *base_uri : SERD_URI_NULL; - writer->anon_stack = serd_stack_new(sizeof(WriteContext)); - writer->sink = sink; - writer->stream = stream; - writer->context = context; - writer->list_subj = SERD_NODE_NULL; - writer->list_depth = 0; - writer->bprefix = NULL; - writer->bprefix_len = 0; - writer->indent = 0; - writer->last_sep = SEP_NONE; - writer->empty = true; + writer->syntax = syntax; + writer->style = style; + writer->env = env; + writer->root_node = SERD_NODE_NULL; + writer->root_uri = SERD_URI_NULL; + writer->base_uri = base_uri ? *base_uri : SERD_URI_NULL; + writer->anon_stack = serd_stack_new(sizeof(WriteContext)); + writer->sink = sink; + writer->stream = stream; + writer->error_sink = NULL; + writer->error_handle = NULL; + writer->context = context; + writer->list_subj = SERD_NODE_NULL; + writer->list_depth = 0; + writer->bprefix = NULL; + writer->bprefix_len = 0; + writer->indent = 0; + writer->last_sep = SEP_NONE; + writer->empty = true; if (style & SERD_STYLE_BULK) { writer->bulk_sink = serd_bulk_sink_new(sink, stream, SERD_PAGE_SIZE); } return writer; } +SERD_API +void +serd_writer_set_error_sink(SerdWriter* writer, + SerdErrorSink error_sink, + void* error_handle) +{ + writer->error_sink = error_sink; + writer->error_handle = error_handle; +} + SERD_API void serd_writer_chop_blank_prefix(SerdWriter* writer, diff --git a/tests/serd_test.c b/tests/serd_test.c index 2b6bc099..2dea9fe7 100644 --- a/tests/serd_test.c +++ b/tests/serd_test.c @@ -215,7 +215,7 @@ main() if (strcmp((const char*)(msg = serd_strerror(SERD_SUCCESS)), "Success")) { return failure("Bad message `%s' for SERD_SUCCESS\n", msg); } - for (int i = SERD_FAILURE; i <= SERD_ERR_NOT_FOUND; ++i) { + for (int i = SERD_FAILURE; i <= SERD_ERR_INTERNAL; ++i) { msg = serd_strerror((SerdStatus)i); if (!strcmp((const char*)msg, "Success")) { return failure("Bad message `%s' for (SerdStatus)%d\n", msg, i); diff --git a/wscript b/wscript index c3db493f..51859e63 100644 --- a/wscript +++ b/wscript @@ -334,6 +334,7 @@ def test(ctx): 0, name='serdi-cmd-good') autowaf.run_tests(ctx, APPNAME, [ + 'serdi_static -q file://%s/tests/bad-id-clash.ttl > %s' % (srcdir, nul), 'serdi_static > %s' % nul, 'serdi_static ftp://example.org/unsupported.ttl > %s' % nul, 'serdi_static -i > %s' % nul, -- cgit v1.2.1