diff options
author | David Robillard <d@drobilla.net> | 2014-12-10 20:55:14 +0000 |
---|---|---|
committer | David Robillard <d@drobilla.net> | 2014-12-10 20:55:14 +0000 |
commit | d51be9b8d97791bff796d046d10fe16fd4e41311 (patch) | |
tree | 2b6ab1c33692332df65b060295b83e09b7587dd2 | |
parent | cb580bb689733d8db0921f2a3a2232133b9d26b5 (diff) | |
download | serd-d51be9b8d97791bff796d046d10fe16fd4e41311.tar.gz serd-d51be9b8d97791bff796d046d10fe16fd4e41311.tar.bz2 serd-d51be9b8d97791bff796d046d10fe16fd4e41311.zip |
Configurable strict parsing to tolerate invalid URIs.
git-svn-id: http://svn.drobilla.net/serd/trunk@480 490d8e77-9747-427b-9fa3-0b8f29cee8a0
-rw-r--r-- | NEWS | 6 | ||||
-rw-r--r-- | serd/serd.h | 11 | ||||
-rw-r--r-- | src/reader.c | 14 | ||||
-rw-r--r-- | src/serdi.c | 5 | ||||
-rw-r--r-- | wscript | 2 |
5 files changed, 34 insertions, 4 deletions
@@ -1,11 +1,13 @@ -serd (0.21.0) unstable; +serd (0.21.1) unstable; * Remove dependence on fmax() to avoid portability issues * Fix serd_reader_read_file() for URIs with escaped characters (spaces) + * Add serd_reader_set_strict() and -l (lax) option to serdi to tolerate + parsing URIs with escaped characters * Report errors for invalid IRI characters and missing terminators * Fix warnings when building with ISO C++ compilers - -- David Robillard <d@drobilla.net> Wed, 10 Dec 2014 14:29:19 -0500 + -- David Robillard <d@drobilla.net> Wed, 10 Dec 2014 15:49:04 -0500 serd (0.20.0) stable; diff --git a/serd/serd.h b/serd/serd.h index 9f9257ec..acc1ee4f 100644 --- a/serd/serd.h +++ b/serd/serd.h @@ -689,6 +689,17 @@ serd_reader_new(SerdSyntax syntax, SerdEndSink end_sink); /** + Enable or disable strict parsing. + + The reader is non-strict (lax) by default, which will tolerate URIs with + invalid characters. Setting strict will fail when parsing such files. An + error is printed for invalid input in either case. +*/ +SERD_API +void +serd_reader_set_strict(SerdReader* reader, bool strict); + +/** Set a function to be called when errors occur during reading. The `error_sink` will be called with `handle` as its first argument. If diff --git a/src/reader.c b/src/reader.c index 80f1c2ed..8dddb568 100644 --- a/src/reader.c +++ b/src/reader.c @@ -85,6 +85,7 @@ struct SerdReaderImpl { uint8_t read_byte; ///< 1-byte 'buffer' used when not paging bool from_file; ///< True iff reading from `fd` bool paging; ///< True iff reading a page at a time + bool strict; ///< True iff strict parsing bool eof; bool seen_genid; #ifdef SERD_STACK_CHECK @@ -810,7 +811,10 @@ read_IRIREF(SerdReader* reader) r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid IRI character (escape %%%02X)\n", c, c); } - return pop_node(reader, ref); + if (reader->strict) { + return pop_node(reader, ref); + } + push_byte(reader, ref, eat_byte_safe(reader, c)); } else { push_byte(reader, ref, eat_byte_safe(reader, c)); } @@ -1473,6 +1477,7 @@ serd_reader_new(SerdSyntax syntax, me->read_buf = 0; me->file_buf = 0; me->read_head = 0; + me->strict = false; me->eof = false; me->seen_genid = false; #ifdef SERD_STACK_CHECK @@ -1489,6 +1494,13 @@ serd_reader_new(SerdSyntax syntax, SERD_API void +serd_reader_set_strict(SerdReader* reader, bool strict) +{ + reader->strict = strict; +} + +SERD_API +void serd_reader_set_error_sink(SerdReader* reader, SerdErrorSink error_sink, void* error_handle) diff --git a/src/serdi.c b/src/serdi.c index 3543580a..7568378d 100644 --- a/src/serdi.c +++ b/src/serdi.c @@ -50,6 +50,7 @@ print_usage(const char* name, bool error) fprintf(os, " -f Keep full URIs in input (don't qualify).\n"); fprintf(os, " -h Display this help and exit.\n"); fprintf(os, " -i SYNTAX Input syntax (`turtle' or `ntriples').\n"); + fprintf(os, " -l Lax (non-strict) parsing.\n"); fprintf(os, " -o SYNTAX Output syntax (`turtle' or `ntriples').\n"); fprintf(os, " -p PREFIX Add PREFIX to blank node IDs.\n"); fprintf(os, " -q Suppress all output except data.\n"); @@ -100,6 +101,7 @@ main(int argc, char** argv) bool bulk_read = true; bool bulk_write = false; bool full_uris = false; + bool lax = false; bool quiet = false; const uint8_t* in_name = NULL; const uint8_t* add_prefix = NULL; @@ -119,6 +121,8 @@ main(int argc, char** argv) full_uris = true; } else if (argv[a][1] == 'h') { return print_usage(argv[0], false); + } else if (argv[a][1] == 'l') { + lax = true; } else if (argv[a][1] == 'q') { quiet = true; } else if (argv[a][1] == 'v') { @@ -215,6 +219,7 @@ main(int argc, char** argv) (SerdStatementSink)serd_writer_write_statement, (SerdEndSink)serd_writer_end_anon); + serd_reader_set_strict(reader, !lax); if (quiet) { serd_reader_set_error_sink(reader, quiet_error_sink, NULL); serd_writer_set_error_sink(writer, quiet_error_sink, NULL); @@ -11,7 +11,7 @@ import waflib.extras.autowaf as autowaf # major increment <=> incompatible changes # minor increment <=> compatible changes (additions) # micro increment <=> no interface changes -SERD_VERSION = '0.21.0' +SERD_VERSION = '0.21.1' SERD_MAJOR_VERSION = '0' # Mandatory waf variables |