diff options
-rw-r--r-- | serd/serd.h | 16 | ||||
-rw-r--r-- | src/filter.c | 94 | ||||
-rw-r--r-- | src/serdi.c | 71 | ||||
-rw-r--r-- | wscript | 15 |
4 files changed, 196 insertions, 0 deletions
diff --git a/serd/serd.h b/serd/serd.h index 794fe285..78a1d0cf 100644 --- a/serd/serd.h +++ b/serd/serd.h @@ -1243,6 +1243,22 @@ SerdSink* serd_normaliser_new(const SerdSink* target, const SerdEnv* env); /** + Return a sink that filters out statements that do not match a pattern. + + The returned sink acts like `target` in all respects, except statements that + do not match the pattern are dropped. Only statements where each node is + either equivalent to the corresponding pattern node, or the pattern node is + null, will be passed through to the target sink. +*/ +SERD_API +SerdSink* +serd_filter_new(const SerdSink* target, + const SerdNode* subject, + const SerdNode* predicate, + const SerdNode* object, + const SerdNode* graph); + +/** @} @name Reader @{ diff --git a/src/filter.c b/src/filter.c new file mode 100644 index 00000000..da2e9611 --- /dev/null +++ b/src/filter.c @@ -0,0 +1,94 @@ +/* + Copyright 2019-2020 David Robillard <http://drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "serd/serd.h" + +#include <stdlib.h> + +typedef struct +{ + const SerdSink* target; + SerdNode* subject; + SerdNode* predicate; + SerdNode* object; + SerdNode* graph; +} SerdFilterData; + +static void +free_data(void* handle) +{ + if (handle) { + SerdFilterData* data = (SerdFilterData*)handle; + + serd_node_free(data->subject); + serd_node_free(data->predicate); + serd_node_free(data->object); + serd_node_free(data->graph); + free(data); + } +} + +static SerdStatus +serd_filter_on_event(void* handle, const SerdEvent* event) +{ + const SerdFilterData* const data = (SerdFilterData*)handle; + + if (event->type != SERD_STATEMENT) { + return serd_sink_write_event(data->target, event); + } else if (serd_statement_matches(event->statement.statement, + data->subject, + data->predicate, + data->object, + data->graph)) { + return serd_sink_write_event(data->target, event); + } + + return SERD_SUCCESS; +} + +SerdSink* +serd_filter_new(const SerdSink* target, + const SerdNode* subject, + const SerdNode* predicate, + const SerdNode* object, + const SerdNode* graph) +{ + SerdFilterData* data = (SerdFilterData*)calloc(1, sizeof(SerdFilterData)); + + data->target = target; + + if (subject && serd_node_type(subject) != SERD_VARIABLE) { + data->subject = serd_node_copy(subject); + } + + if (predicate && serd_node_type(predicate) != SERD_VARIABLE) { + data->predicate = serd_node_copy(predicate); + } + + if (object && serd_node_type(object) != SERD_VARIABLE) { + data->object = serd_node_copy(object); + } + + if (graph && serd_node_type(graph) != SERD_VARIABLE) { + data->graph = serd_node_copy(graph); + } + + SerdSink* sink = serd_sink_new(data, free_data); + + serd_sink_set_event_func(sink, serd_filter_on_event); + + return sink; +} diff --git a/src/serdi.c b/src/serdi.c index 1339e593..de422092 100644 --- a/src/serdi.c +++ b/src/serdi.c @@ -32,6 +32,13 @@ #define SERDI_ERROR(msg) fprintf(stderr, "serdi: " msg) #define SERDI_ERRORF(fmt, ...) fprintf(stderr, "serdi: " fmt, __VA_ARGS__) +typedef struct { + SerdNode* s; + SerdNode* p; + SerdNode* o; + SerdNode* g; +} FilterPattern; + static int print_version(void) { @@ -58,6 +65,7 @@ print_usage(const char* name, bool error) fprintf(os, " -c PREFIX Chop PREFIX from matching blank node IDs.\n"); fprintf(os, " -e Eat input one character at a time.\n"); fprintf(os, " -f Keep full URIs in input (don't qualify).\n"); + fprintf(os, " -g PATTERN Grep statements matching PATTERN.\n"); fprintf(os, " -h Display this help and exit.\n"); fprintf(os, " -i SYNTAX Input syntax: turtle/ntriples/trig/nquads.\n"); fprintf(os, " -k BYTES Parser stack size.\n"); @@ -84,6 +92,56 @@ missing_arg(const char* name, char opt) } static SerdStatus +on_filter_event(void* handle, const SerdEvent* event) +{ + FilterPattern* pat = (FilterPattern*)handle; + if (event->type != SERD_STATEMENT) { + fprintf(stderr, "other thing\n"); + return SERD_SUCCESS; + } else if (pat->s) { + fprintf(stderr, "err1\n"); + return SERD_ERR_INVALID; + } + + const SerdStatement* const statement = event->statement.statement; + pat->s = serd_node_copy(serd_statement_subject(statement)); + pat->p = serd_node_copy(serd_statement_predicate(statement)); + pat->o = serd_node_copy(serd_statement_object(statement)); + pat->g = serd_node_copy(serd_statement_graph(statement)); + + return SERD_SUCCESS; +} + +static SerdSink* +parse_filter(SerdWorld* world, const SerdSink* sink, const char* str) +{ + FilterPattern pat = {NULL, NULL, NULL, NULL}; + SerdSink* in_sink = serd_sink_new(&pat, NULL); + SerdReader* reader = + serd_reader_new(world, SERD_NQUADS, SERD_READ_VARIABLES, in_sink, 4096); + + serd_sink_set_event_func(in_sink, on_filter_event); + SerdStatus st = serd_reader_start_string(reader, str, NULL); + if (!st) { + st = serd_reader_read_document(reader); + } + + serd_reader_free(reader); + serd_sink_free(in_sink); + + if (st) { + return NULL; + } + + SerdSink* filter = serd_filter_new(sink, pat.s, pat.p, pat.o, pat.g); + serd_node_free(pat.s); + serd_node_free(pat.p); + serd_node_free(pat.o); + serd_node_free(pat.g); + return filter; +} + +static SerdStatus read_file(SerdWorld* const world, SerdSyntax syntax, const SerdReaderFlags flags, @@ -144,6 +202,7 @@ main(int argc, char** argv) bool quiet = false; size_t stack_size = 4194304; const char* input_string = NULL; + const char* pattern = NULL; const char* add_prefix = ""; const char* chop_prefix = NULL; const char* root_uri = NULL; @@ -175,6 +234,11 @@ main(int argc, char** argv) use_model = true; } else if (argv[a][1] == 'n') { normalise = true; + } else if (argv[a][1] == 'g') { + if (++a == argc) { + return missing_arg(argv[0], 'g'); + } + pattern = argv[a]; } else if (argv[a][1] == 'q') { quiet = true; } else if (argv[a][1] == 'v') { @@ -299,6 +363,12 @@ main(int argc, char** argv) sink = normaliser = serd_normaliser_new(out_sink, env); } + SerdSink* filter = NULL; + if (pattern) { + filter = parse_filter(world, sink, pattern); + sink = filter; + } + if (quiet) { serd_world_set_log_func(world, serd_quiet_error_func, NULL); } @@ -374,6 +444,7 @@ main(int argc, char** argv) } serd_sink_free(normaliser); + serd_sink_free(filter); serd_node_free(input_name); serd_sink_free(inserter); serd_model_free(model); @@ -172,6 +172,7 @@ lib_source = ['src/base64.c', 'src/cursor.c', 'src/decimal.c', 'src/env.c', + 'src/filter.c', 'src/inserter.c', 'src/int_math.c', 'src/iter.c', @@ -676,6 +677,7 @@ def validation_test_suite(tst, def test(tst): + import subprocess import tempfile # Create test output directories @@ -756,6 +758,18 @@ def test(tst): check.file_equals('%s/tests/multifile/output.nq' % srcdir, 'tests/multifile/output.out.nq') + with tst.group('GrepCommand') as check: + with tempfile.TemporaryFile(mode='w+') as stdout: + cmd = _wrapped_command([ + serdi, + '-g', '?s <urn:example:p> <urn:example:o> .', + '-s', + '<urn:example:s> <urn:example:p> <urn:example:o> .\n' + '<urn:example:s> <urn:example:q> <urn:example:r> .\n']) + check(lambda: subprocess.check_output(cmd).decode('utf-8') == + '<urn:example:s> <urn:example:p> <urn:example:o> .\n', + name='wildcard subject') + with tst.group('BadCommands', expected=1, stderr=autowaf.NONEMPTY) as check: @@ -764,6 +778,7 @@ def test(tst): check([serdi, 'ftp://example.org/unsupported.ttl']) check([serdi, '-I']) check([serdi, '-c']) + check([serdi, '-g']) check([serdi, '-i', 'illegal']) check([serdi, '-i', 'turtle']) check([serdi, '-i']) |