diff options
author | David Robillard <d@drobilla.net> | 2019-12-18 19:16:14 -0500 |
---|---|---|
committer | David Robillard <d@drobilla.net> | 2019-12-20 10:26:55 -0500 |
commit | 116c73a7886e281ef013a3d658c76c6a32160fb4 (patch) | |
tree | 568d25ea4b9cf0805bb5aaefb01c35f0a7178858 | |
parent | 1c3a97e8a0ca77f872783cc5471aa95230366e16 (diff) | |
download | serd-116c73a7886e281ef013a3d658c76c6a32160fb4.tar.gz serd-116c73a7886e281ef013a3d658c76c6a32160fb4.tar.bz2 serd-116c73a7886e281ef013a3d658c76c6a32160fb4.zip |
WIP: Add statement filtering
-rw-r--r-- | serd/serd.h | 44 | ||||
-rw-r--r-- | src/filter.c | 122 | ||||
-rw-r--r-- | src/serdi.c | 51 | ||||
-rw-r--r-- | wscript | 13 |
4 files changed, 230 insertions, 0 deletions
diff --git a/serd/serd.h b/serd/serd.h index e96d46f7..fe4631e9 100644 --- a/serd/serd.h +++ b/serd/serd.h @@ -1229,6 +1229,50 @@ serd_normaliser_get_sink(const SerdNormaliser* normaliser); /** @} + @name Filtering + @{ +*/ + +/// Sink wrapper that filters statements +typedef struct SerdFilterImpl SerdFilter; + +/** + Return a sink that filters out statements that do not match a pattern. + + The returned sink acts like `target` in all respects, except statements that + do not match the pattern are dropped. +*/ +SERD_API +SerdFilter* +serd_filter_new(const SerdSink* target); + +/// Free `filter` +SERD_API +void +serd_filter_free(SerdFilter* filter); + +/** + Set the statement to filter. + + Only statements where each node is either equivalent to the corresponding + pattern node, or the pattern node is null, will be passed through to the + target sink. +*/ +SERD_API +SerdStatus +serd_filter_set_statement(SerdFilter* filter, + const SerdNode* subject, + const SerdNode* predicate, + const SerdNode* object, + const SerdNode* graph); + +/// Return a sink interface that forwards unfiltered statements +SERD_API +const SerdSink* +serd_filter_get_sink(const SerdFilter* filter); + +/** + @} @name Reader @{ */ diff --git a/src/filter.c b/src/filter.c new file mode 100644 index 00000000..44b86466 --- /dev/null +++ b/src/filter.c @@ -0,0 +1,122 @@ +/* + Copyright 2019 David Robillard <http://drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "namespaces.h" +#include "node.h" +#include "sink.h" +#include "statement.h" +#include "string_utils.h" + +#include "serd/serd.h" + +#include <stdlib.h> +#include <string.h> + +typedef struct +{ + const SerdSink* target; + SerdNode* subject; + SerdNode* predicate; + SerdNode* object; + SerdNode* graph; +} SerdFilterData; + +struct SerdFilterImpl +{ + SerdSink sink; + SerdFilterData data; +}; + +static SerdStatus +serd_filter_on_statement(void* handle, + SerdStatementFlags flags, + const SerdStatement* statement) +{ + SerdFilterData* data = (SerdFilterData*)handle; + (void)handle; + (void)flags; + (void)statement; + + if (serd_statement_matches(statement, + data->subject, + data->predicate, + data->object, + data->graph)) { + serd_sink_write_statement(data->target, flags, statement); + } + + return SERD_SUCCESS; +} + +SerdFilter* +serd_filter_new(const SerdSink* target) +{ + SerdFilter* filter = (SerdFilter*)calloc(1, sizeof(SerdFilter)); + SerdSink* sink = &filter->sink; + SerdFilterData* data = &filter->data; + + sink->handle = data; + sink->free_handle = NULL; + sink->env = target->env; + data->target = target; + + serd_sink_set_statement_func(sink, serd_filter_on_statement); + + return filter; +} + +void +serd_filter_free(SerdFilter* filter) +{ + if (filter) { + serd_node_free(filter->data.subject); + serd_node_free(filter->data.predicate); + serd_node_free(filter->data.object); + serd_node_free(filter->data.graph); + free(filter); + } +} + +static void +set_field(SerdNode** field, const SerdNode* pattern) +{ + const bool is_var = + (!pattern || serd_node_get_type(pattern) == SERD_VARIABLE); + + serd_node_free(*field); + *field = (is_var ? NULL : serd_node_copy(pattern)); +} + +SerdStatus +serd_filter_set_statement(SerdFilter* filter, + const SerdNode* subject, + const SerdNode* predicate, + const SerdNode* object, + const SerdNode* graph) +{ + set_field(&filter->data.subject, subject); + set_field(&filter->data.predicate, predicate); + set_field(&filter->data.object, object); + set_field(&filter->data.graph, graph); + + return SERD_SUCCESS; +} + +const SerdSink* +serd_filter_get_sink(const SerdFilter* filter) +{ + return &filter->sink; +} diff --git a/src/serdi.c b/src/serdi.c index 14e88354..e528191c 100644 --- a/src/serdi.c +++ b/src/serdi.c @@ -14,6 +14,7 @@ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ +#include "reader.h" #include "serd_config.h" #include "system.h" @@ -60,6 +61,7 @@ print_usage(const char* name, bool error) fprintf(os, " -c PREFIX Chop PREFIX from matching blank node IDs.\n"); fprintf(os, " -e Eat input one character at a time.\n"); fprintf(os, " -f Fast serialisation without inlining.\n"); + fprintf(os, " -g PATTERN Grep statements matching PATTERN.\n"); fprintf(os, " -h Display this help and exit.\n"); fprintf(os, " -i SYNTAX Input syntax: turtle/ntriples/trig/nquads.\n"); fprintf(os, " -k BYTES Parser stack size.\n"); @@ -93,6 +95,42 @@ quiet_error_func(void* handle, const SerdLogEntry* entry) } static SerdStatus +on_filter_statement(void* handle, + SerdStatementFlags flags, + const SerdStatement* statement) +{ + (void)flags; + + serd_filter_set_statement((SerdFilter*)handle, + serd_statement_get_subject(statement), + serd_statement_get_predicate(statement), + serd_statement_get_object(statement), + serd_statement_get_graph(statement)); + + return SERD_SUCCESS; +} + +static SerdFilter* +parse_filter(SerdWorld* world, const SerdSink* sink, const char* str) +{ + SerdFilter* filter = serd_filter_new(sink); + SerdSink* in_sink = serd_sink_new(filter, NULL, NULL); + + serd_sink_set_statement_func(in_sink, on_filter_statement); + + SerdReader* reader = serd_reader_new( + world, SERD_NQUADS, SERD_READ_VARIABLES, in_sink, 4096); + + serd_reader_start_string(reader, str, NULL); + serd_reader_read_document(reader); + + serd_reader_free(reader); + serd_sink_free(in_sink); + + return filter; +} + +static SerdStatus read_file(SerdWorld* const world, SerdSyntax syntax, const SerdReaderFlags flags, @@ -153,6 +191,7 @@ main(int argc, char** argv) bool quiet = false; size_t stack_size = 4194304; const char* input_string = NULL; + const char* pattern = NULL; const char* add_prefix = ""; const char* chop_prefix = NULL; const char* root_uri = NULL; @@ -184,6 +223,11 @@ main(int argc, char** argv) use_model = true; } else if (argv[a][1] == 'n') { normalise = true; + } else if (argv[a][1] == 'g') { + if (++a == argc) { + return missing_arg(argv[0], 's'); + } + pattern = argv[a]; } else if (argv[a][1] == 'q') { quiet = true; } else if (argv[a][1] == 'v') { @@ -307,6 +351,12 @@ main(int argc, char** argv) sink = serd_normaliser_get_sink(normaliser); } + SerdFilter* filter = NULL; + if (pattern) { + filter = parse_filter(world, sink, pattern); + sink = serd_filter_get_sink(filter); + } + if (quiet) { serd_world_set_log_func(world, quiet_error_func, NULL); } @@ -381,6 +431,7 @@ main(int argc, char** argv) serd_range_free(range); } + serd_filter_free(filter); serd_normaliser_free(normaliser); serd_node_free(input_name); serd_inserter_free(inserter); @@ -131,6 +131,7 @@ lib_source = ['src/base64.c', 'src/cursor.c', 'src/decimal.c', 'src/env.c', + 'src/filter.c', 'src/inserter.c', 'src/int_math.c', 'src/iter.c', @@ -569,6 +570,7 @@ def validation_test_suite(tst, base_uri, testdir, isyntax, osyntax, options=''): check(command, expected=expected, name=action) def test(tst): + import subprocess import tempfile # Create test output directories @@ -638,12 +640,23 @@ def test(tst): check.file_equals('%s/tests/multifile/output.nq' % srcdir, 'tests/multifile/output.out.nq') + with tst.group('GrepCommand') as check: + with tempfile.TemporaryFile(mode='w+') as stdout: + check(lambda: subprocess.check_output( + [serdi, + '-g', '?s <urn:example:p> <urn:example:o> .', + '-s', + '<urn:example:s> <urn:example:p> <urn:example:o> .\n' + '<urn:example:s> <urn:example:q> <urn:example:r> .\n']).decode('utf-8') == '<urn:example:s> <urn:example:p> <urn:example:o> .\n', + name='wildcard subject') + with tst.group('BadCommands', expected=1, stderr=autowaf.NONEMPTY) as check: check([serdi]) check([serdi, '/no/such/file']) check([serdi, 'ftp://example.org/unsupported.ttl']) check([serdi, '-I']) check([serdi, '-c']) + check([serdi, '-g']) check([serdi, '-i', 'illegal']) check([serdi, '-i', 'turtle']) check([serdi, '-i']) |