aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorDavid Robillard <d@drobilla.net>2014-12-10 20:55:14 +0000
committerDavid Robillard <d@drobilla.net>2014-12-10 20:55:14 +0000
commitd51be9b8d97791bff796d046d10fe16fd4e41311 (patch)
tree2b6ab1c33692332df65b060295b83e09b7587dd2 /src
parentcb580bb689733d8db0921f2a3a2232133b9d26b5 (diff)
downloadserd-d51be9b8d97791bff796d046d10fe16fd4e41311.tar.gz
serd-d51be9b8d97791bff796d046d10fe16fd4e41311.tar.bz2
serd-d51be9b8d97791bff796d046d10fe16fd4e41311.zip
Configurable strict parsing to tolerate invalid URIs.
git-svn-id: http://svn.drobilla.net/serd/trunk@480 490d8e77-9747-427b-9fa3-0b8f29cee8a0
Diffstat (limited to 'src')
-rw-r--r--src/reader.c14
-rw-r--r--src/serdi.c5
2 files changed, 18 insertions, 1 deletions
diff --git a/src/reader.c b/src/reader.c
index 80f1c2ed..8dddb568 100644
--- a/src/reader.c
+++ b/src/reader.c
@@ -85,6 +85,7 @@ struct SerdReaderImpl {
uint8_t read_byte; ///< 1-byte 'buffer' used when not paging
bool from_file; ///< True iff reading from `fd`
bool paging; ///< True iff reading a page at a time
+ bool strict; ///< True iff strict parsing
bool eof;
bool seen_genid;
#ifdef SERD_STACK_CHECK
@@ -810,7 +811,10 @@ read_IRIREF(SerdReader* reader)
r_err(reader, SERD_ERR_BAD_SYNTAX,
"invalid IRI character (escape %%%02X)\n", c, c);
}
- return pop_node(reader, ref);
+ if (reader->strict) {
+ return pop_node(reader, ref);
+ }
+ push_byte(reader, ref, eat_byte_safe(reader, c));
} else {
push_byte(reader, ref, eat_byte_safe(reader, c));
}
@@ -1473,6 +1477,7 @@ serd_reader_new(SerdSyntax syntax,
me->read_buf = 0;
me->file_buf = 0;
me->read_head = 0;
+ me->strict = false;
me->eof = false;
me->seen_genid = false;
#ifdef SERD_STACK_CHECK
@@ -1489,6 +1494,13 @@ serd_reader_new(SerdSyntax syntax,
SERD_API
void
+serd_reader_set_strict(SerdReader* reader, bool strict)
+{
+ reader->strict = strict;
+}
+
+SERD_API
+void
serd_reader_set_error_sink(SerdReader* reader,
SerdErrorSink error_sink,
void* error_handle)
diff --git a/src/serdi.c b/src/serdi.c
index 3543580a..7568378d 100644
--- a/src/serdi.c
+++ b/src/serdi.c
@@ -50,6 +50,7 @@ print_usage(const char* name, bool error)
fprintf(os, " -f Keep full URIs in input (don't qualify).\n");
fprintf(os, " -h Display this help and exit.\n");
fprintf(os, " -i SYNTAX Input syntax (`turtle' or `ntriples').\n");
+ fprintf(os, " -l Lax (non-strict) parsing.\n");
fprintf(os, " -o SYNTAX Output syntax (`turtle' or `ntriples').\n");
fprintf(os, " -p PREFIX Add PREFIX to blank node IDs.\n");
fprintf(os, " -q Suppress all output except data.\n");
@@ -100,6 +101,7 @@ main(int argc, char** argv)
bool bulk_read = true;
bool bulk_write = false;
bool full_uris = false;
+ bool lax = false;
bool quiet = false;
const uint8_t* in_name = NULL;
const uint8_t* add_prefix = NULL;
@@ -119,6 +121,8 @@ main(int argc, char** argv)
full_uris = true;
} else if (argv[a][1] == 'h') {
return print_usage(argv[0], false);
+ } else if (argv[a][1] == 'l') {
+ lax = true;
} else if (argv[a][1] == 'q') {
quiet = true;
} else if (argv[a][1] == 'v') {
@@ -215,6 +219,7 @@ main(int argc, char** argv)
(SerdStatementSink)serd_writer_write_statement,
(SerdEndSink)serd_writer_end_anon);
+ serd_reader_set_strict(reader, !lax);
if (quiet) {
serd_reader_set_error_sink(reader, quiet_error_sink, NULL);
serd_writer_set_error_sink(writer, quiet_error_sink, NULL);