aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Robillard <d@drobilla.net>2019-12-18 19:16:14 -0500
committerDavid Robillard <d@drobilla.net>2019-12-20 10:26:55 -0500
commit116c73a7886e281ef013a3d658c76c6a32160fb4 (patch)
tree568d25ea4b9cf0805bb5aaefb01c35f0a7178858
parent1c3a97e8a0ca77f872783cc5471aa95230366e16 (diff)
downloadserd-116c73a7886e281ef013a3d658c76c6a32160fb4.tar.gz
serd-116c73a7886e281ef013a3d658c76c6a32160fb4.tar.bz2
serd-116c73a7886e281ef013a3d658c76c6a32160fb4.zip
WIP: Add statement filtering
-rw-r--r--serd/serd.h44
-rw-r--r--src/filter.c122
-rw-r--r--src/serdi.c51
-rw-r--r--wscript13
4 files changed, 230 insertions, 0 deletions
diff --git a/serd/serd.h b/serd/serd.h
index e96d46f7..fe4631e9 100644
--- a/serd/serd.h
+++ b/serd/serd.h
@@ -1229,6 +1229,50 @@ serd_normaliser_get_sink(const SerdNormaliser* normaliser);
/**
@}
+ @name Filtering
+ @{
+*/
+
+/// Sink wrapper that filters statements
+typedef struct SerdFilterImpl SerdFilter;
+
+/**
+ Return a sink that filters out statements that do not match a pattern.
+
+ The returned sink acts like `target` in all respects, except statements that
+ do not match the pattern are dropped.
+*/
+SERD_API
+SerdFilter*
+serd_filter_new(const SerdSink* target);
+
+/// Free `filter`
+SERD_API
+void
+serd_filter_free(SerdFilter* filter);
+
+/**
+ Set the statement to filter.
+
+ Only statements where each node is either equivalent to the corresponding
+ pattern node, or the pattern node is null, will be passed through to the
+ target sink.
+*/
+SERD_API
+SerdStatus
+serd_filter_set_statement(SerdFilter* filter,
+ const SerdNode* subject,
+ const SerdNode* predicate,
+ const SerdNode* object,
+ const SerdNode* graph);
+
+/// Return a sink interface that forwards unfiltered statements
+SERD_API
+const SerdSink*
+serd_filter_get_sink(const SerdFilter* filter);
+
+/**
+ @}
@name Reader
@{
*/
diff --git a/src/filter.c b/src/filter.c
new file mode 100644
index 00000000..44b86466
--- /dev/null
+++ b/src/filter.c
@@ -0,0 +1,122 @@
+/*
+ Copyright 2019 David Robillard <http://drobilla.net>
+
+ Permission to use, copy, modify, and/or distribute this software for any
+ purpose with or without fee is hereby granted, provided that the above
+ copyright notice and this permission notice appear in all copies.
+
+ THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+*/
+
+#include "namespaces.h"
+#include "node.h"
+#include "sink.h"
+#include "statement.h"
+#include "string_utils.h"
+
+#include "serd/serd.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+typedef struct
+{
+ const SerdSink* target;
+ SerdNode* subject;
+ SerdNode* predicate;
+ SerdNode* object;
+ SerdNode* graph;
+} SerdFilterData;
+
+struct SerdFilterImpl
+{
+ SerdSink sink;
+ SerdFilterData data;
+};
+
+static SerdStatus
+serd_filter_on_statement(void* handle,
+ SerdStatementFlags flags,
+ const SerdStatement* statement)
+{
+ SerdFilterData* data = (SerdFilterData*)handle;
+ (void)handle;
+ (void)flags;
+ (void)statement;
+
+ if (serd_statement_matches(statement,
+ data->subject,
+ data->predicate,
+ data->object,
+ data->graph)) {
+ serd_sink_write_statement(data->target, flags, statement);
+ }
+
+ return SERD_SUCCESS;
+}
+
+SerdFilter*
+serd_filter_new(const SerdSink* target)
+{
+ SerdFilter* filter = (SerdFilter*)calloc(1, sizeof(SerdFilter));
+ SerdSink* sink = &filter->sink;
+ SerdFilterData* data = &filter->data;
+
+ sink->handle = data;
+ sink->free_handle = NULL;
+ sink->env = target->env;
+ data->target = target;
+
+ serd_sink_set_statement_func(sink, serd_filter_on_statement);
+
+ return filter;
+}
+
+void
+serd_filter_free(SerdFilter* filter)
+{
+ if (filter) {
+ serd_node_free(filter->data.subject);
+ serd_node_free(filter->data.predicate);
+ serd_node_free(filter->data.object);
+ serd_node_free(filter->data.graph);
+ free(filter);
+ }
+}
+
+static void
+set_field(SerdNode** field, const SerdNode* pattern)
+{
+ const bool is_var =
+ (!pattern || serd_node_get_type(pattern) == SERD_VARIABLE);
+
+ serd_node_free(*field);
+ *field = (is_var ? NULL : serd_node_copy(pattern));
+}
+
+SerdStatus
+serd_filter_set_statement(SerdFilter* filter,
+ const SerdNode* subject,
+ const SerdNode* predicate,
+ const SerdNode* object,
+ const SerdNode* graph)
+{
+ set_field(&filter->data.subject, subject);
+ set_field(&filter->data.predicate, predicate);
+ set_field(&filter->data.object, object);
+ set_field(&filter->data.graph, graph);
+
+ return SERD_SUCCESS;
+}
+
+const SerdSink*
+serd_filter_get_sink(const SerdFilter* filter)
+{
+ return &filter->sink;
+}
diff --git a/src/serdi.c b/src/serdi.c
index 14e88354..e528191c 100644
--- a/src/serdi.c
+++ b/src/serdi.c
@@ -14,6 +14,7 @@
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
+#include "reader.h"
#include "serd_config.h"
#include "system.h"
@@ -60,6 +61,7 @@ print_usage(const char* name, bool error)
fprintf(os, " -c PREFIX Chop PREFIX from matching blank node IDs.\n");
fprintf(os, " -e Eat input one character at a time.\n");
fprintf(os, " -f Fast serialisation without inlining.\n");
+ fprintf(os, " -g PATTERN Grep statements matching PATTERN.\n");
fprintf(os, " -h Display this help and exit.\n");
fprintf(os, " -i SYNTAX Input syntax: turtle/ntriples/trig/nquads.\n");
fprintf(os, " -k BYTES Parser stack size.\n");
@@ -93,6 +95,42 @@ quiet_error_func(void* handle, const SerdLogEntry* entry)
}
static SerdStatus
+on_filter_statement(void* handle,
+ SerdStatementFlags flags,
+ const SerdStatement* statement)
+{
+ (void)flags;
+
+ serd_filter_set_statement((SerdFilter*)handle,
+ serd_statement_get_subject(statement),
+ serd_statement_get_predicate(statement),
+ serd_statement_get_object(statement),
+ serd_statement_get_graph(statement));
+
+ return SERD_SUCCESS;
+}
+
+static SerdFilter*
+parse_filter(SerdWorld* world, const SerdSink* sink, const char* str)
+{
+ SerdFilter* filter = serd_filter_new(sink);
+ SerdSink* in_sink = serd_sink_new(filter, NULL, NULL);
+
+ serd_sink_set_statement_func(in_sink, on_filter_statement);
+
+ SerdReader* reader = serd_reader_new(
+ world, SERD_NQUADS, SERD_READ_VARIABLES, in_sink, 4096);
+
+ serd_reader_start_string(reader, str, NULL);
+ serd_reader_read_document(reader);
+
+ serd_reader_free(reader);
+ serd_sink_free(in_sink);
+
+ return filter;
+}
+
+static SerdStatus
read_file(SerdWorld* const world,
SerdSyntax syntax,
const SerdReaderFlags flags,
@@ -153,6 +191,7 @@ main(int argc, char** argv)
bool quiet = false;
size_t stack_size = 4194304;
const char* input_string = NULL;
+ const char* pattern = NULL;
const char* add_prefix = "";
const char* chop_prefix = NULL;
const char* root_uri = NULL;
@@ -184,6 +223,11 @@ main(int argc, char** argv)
use_model = true;
} else if (argv[a][1] == 'n') {
normalise = true;
+ } else if (argv[a][1] == 'g') {
+ if (++a == argc) {
+ return missing_arg(argv[0], 's');
+ }
+ pattern = argv[a];
} else if (argv[a][1] == 'q') {
quiet = true;
} else if (argv[a][1] == 'v') {
@@ -307,6 +351,12 @@ main(int argc, char** argv)
sink = serd_normaliser_get_sink(normaliser);
}
+ SerdFilter* filter = NULL;
+ if (pattern) {
+ filter = parse_filter(world, sink, pattern);
+ sink = serd_filter_get_sink(filter);
+ }
+
if (quiet) {
serd_world_set_log_func(world, quiet_error_func, NULL);
}
@@ -381,6 +431,7 @@ main(int argc, char** argv)
serd_range_free(range);
}
+ serd_filter_free(filter);
serd_normaliser_free(normaliser);
serd_node_free(input_name);
serd_inserter_free(inserter);
diff --git a/wscript b/wscript
index ba03695c..35323087 100644
--- a/wscript
+++ b/wscript
@@ -131,6 +131,7 @@ lib_source = ['src/base64.c',
'src/cursor.c',
'src/decimal.c',
'src/env.c',
+ 'src/filter.c',
'src/inserter.c',
'src/int_math.c',
'src/iter.c',
@@ -569,6 +570,7 @@ def validation_test_suite(tst, base_uri, testdir, isyntax, osyntax, options=''):
check(command, expected=expected, name=action)
def test(tst):
+ import subprocess
import tempfile
# Create test output directories
@@ -638,12 +640,23 @@ def test(tst):
check.file_equals('%s/tests/multifile/output.nq' % srcdir,
'tests/multifile/output.out.nq')
+ with tst.group('GrepCommand') as check:
+ with tempfile.TemporaryFile(mode='w+') as stdout:
+ check(lambda: subprocess.check_output(
+ [serdi,
+ '-g', '?s <urn:example:p> <urn:example:o> .',
+ '-s',
+ '<urn:example:s> <urn:example:p> <urn:example:o> .\n'
+ '<urn:example:s> <urn:example:q> <urn:example:r> .\n']).decode('utf-8') == '<urn:example:s> <urn:example:p> <urn:example:o> .\n',
+ name='wildcard subject')
+
with tst.group('BadCommands', expected=1, stderr=autowaf.NONEMPTY) as check:
check([serdi])
check([serdi, '/no/such/file'])
check([serdi, 'ftp://example.org/unsupported.ttl'])
check([serdi, '-I'])
check([serdi, '-c'])
+ check([serdi, '-g'])
check([serdi, '-i', 'illegal'])
check([serdi, '-i', 'turtle'])
check([serdi, '-i'])