aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--serd/serd.h16
-rw-r--r--src/filter.c94
-rw-r--r--src/serdi.c71
-rw-r--r--wscript15
4 files changed, 196 insertions, 0 deletions
diff --git a/serd/serd.h b/serd/serd.h
index 794fe285..78a1d0cf 100644
--- a/serd/serd.h
+++ b/serd/serd.h
@@ -1243,6 +1243,22 @@ SerdSink*
serd_normaliser_new(const SerdSink* target, const SerdEnv* env);
/**
+ Return a sink that filters out statements that do not match a pattern.
+
+ The returned sink acts like `target` in all respects, except statements that
+ do not match the pattern are dropped. Only statements where each node is
+ either equivalent to the corresponding pattern node, or the pattern node is
+ null, will be passed through to the target sink.
+*/
+SERD_API
+SerdSink*
+serd_filter_new(const SerdSink* target,
+ const SerdNode* subject,
+ const SerdNode* predicate,
+ const SerdNode* object,
+ const SerdNode* graph);
+
+/**
@}
@name Reader
@{
diff --git a/src/filter.c b/src/filter.c
new file mode 100644
index 00000000..da2e9611
--- /dev/null
+++ b/src/filter.c
@@ -0,0 +1,94 @@
+/*
+ Copyright 2019-2020 David Robillard <http://drobilla.net>
+
+ Permission to use, copy, modify, and/or distribute this software for any
+ purpose with or without fee is hereby granted, provided that the above
+ copyright notice and this permission notice appear in all copies.
+
+ THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+*/
+
+#include "serd/serd.h"
+
+#include <stdlib.h>
+
+typedef struct
+{
+ const SerdSink* target;
+ SerdNode* subject;
+ SerdNode* predicate;
+ SerdNode* object;
+ SerdNode* graph;
+} SerdFilterData;
+
+static void
+free_data(void* handle)
+{
+ if (handle) {
+ SerdFilterData* data = (SerdFilterData*)handle;
+
+ serd_node_free(data->subject);
+ serd_node_free(data->predicate);
+ serd_node_free(data->object);
+ serd_node_free(data->graph);
+ free(data);
+ }
+}
+
+static SerdStatus
+serd_filter_on_event(void* handle, const SerdEvent* event)
+{
+ const SerdFilterData* const data = (SerdFilterData*)handle;
+
+ if (event->type != SERD_STATEMENT) {
+ return serd_sink_write_event(data->target, event);
+ } else if (serd_statement_matches(event->statement.statement,
+ data->subject,
+ data->predicate,
+ data->object,
+ data->graph)) {
+ return serd_sink_write_event(data->target, event);
+ }
+
+ return SERD_SUCCESS;
+}
+
+SerdSink*
+serd_filter_new(const SerdSink* target,
+ const SerdNode* subject,
+ const SerdNode* predicate,
+ const SerdNode* object,
+ const SerdNode* graph)
+{
+ SerdFilterData* data = (SerdFilterData*)calloc(1, sizeof(SerdFilterData));
+
+ data->target = target;
+
+ if (subject && serd_node_type(subject) != SERD_VARIABLE) {
+ data->subject = serd_node_copy(subject);
+ }
+
+ if (predicate && serd_node_type(predicate) != SERD_VARIABLE) {
+ data->predicate = serd_node_copy(predicate);
+ }
+
+ if (object && serd_node_type(object) != SERD_VARIABLE) {
+ data->object = serd_node_copy(object);
+ }
+
+ if (graph && serd_node_type(graph) != SERD_VARIABLE) {
+ data->graph = serd_node_copy(graph);
+ }
+
+ SerdSink* sink = serd_sink_new(data, free_data);
+
+ serd_sink_set_event_func(sink, serd_filter_on_event);
+
+ return sink;
+}
diff --git a/src/serdi.c b/src/serdi.c
index 1339e593..de422092 100644
--- a/src/serdi.c
+++ b/src/serdi.c
@@ -32,6 +32,13 @@
#define SERDI_ERROR(msg) fprintf(stderr, "serdi: " msg)
#define SERDI_ERRORF(fmt, ...) fprintf(stderr, "serdi: " fmt, __VA_ARGS__)
+typedef struct {
+ SerdNode* s;
+ SerdNode* p;
+ SerdNode* o;
+ SerdNode* g;
+} FilterPattern;
+
static int
print_version(void)
{
@@ -58,6 +65,7 @@ print_usage(const char* name, bool error)
fprintf(os, " -c PREFIX Chop PREFIX from matching blank node IDs.\n");
fprintf(os, " -e Eat input one character at a time.\n");
fprintf(os, " -f Keep full URIs in input (don't qualify).\n");
+ fprintf(os, " -g PATTERN Grep statements matching PATTERN.\n");
fprintf(os, " -h Display this help and exit.\n");
fprintf(os, " -i SYNTAX Input syntax: turtle/ntriples/trig/nquads.\n");
fprintf(os, " -k BYTES Parser stack size.\n");
@@ -84,6 +92,56 @@ missing_arg(const char* name, char opt)
}
static SerdStatus
+on_filter_event(void* handle, const SerdEvent* event)
+{
+ FilterPattern* pat = (FilterPattern*)handle;
+ if (event->type != SERD_STATEMENT) {
+ fprintf(stderr, "other thing\n");
+ return SERD_SUCCESS;
+ } else if (pat->s) {
+ fprintf(stderr, "err1\n");
+ return SERD_ERR_INVALID;
+ }
+
+ const SerdStatement* const statement = event->statement.statement;
+ pat->s = serd_node_copy(serd_statement_subject(statement));
+ pat->p = serd_node_copy(serd_statement_predicate(statement));
+ pat->o = serd_node_copy(serd_statement_object(statement));
+ pat->g = serd_node_copy(serd_statement_graph(statement));
+
+ return SERD_SUCCESS;
+}
+
+static SerdSink*
+parse_filter(SerdWorld* world, const SerdSink* sink, const char* str)
+{
+ FilterPattern pat = {NULL, NULL, NULL, NULL};
+ SerdSink* in_sink = serd_sink_new(&pat, NULL);
+ SerdReader* reader =
+ serd_reader_new(world, SERD_NQUADS, SERD_READ_VARIABLES, in_sink, 4096);
+
+ serd_sink_set_event_func(in_sink, on_filter_event);
+ SerdStatus st = serd_reader_start_string(reader, str, NULL);
+ if (!st) {
+ st = serd_reader_read_document(reader);
+ }
+
+ serd_reader_free(reader);
+ serd_sink_free(in_sink);
+
+ if (st) {
+ return NULL;
+ }
+
+ SerdSink* filter = serd_filter_new(sink, pat.s, pat.p, pat.o, pat.g);
+ serd_node_free(pat.s);
+ serd_node_free(pat.p);
+ serd_node_free(pat.o);
+ serd_node_free(pat.g);
+ return filter;
+}
+
+static SerdStatus
read_file(SerdWorld* const world,
SerdSyntax syntax,
const SerdReaderFlags flags,
@@ -144,6 +202,7 @@ main(int argc, char** argv)
bool quiet = false;
size_t stack_size = 4194304;
const char* input_string = NULL;
+ const char* pattern = NULL;
const char* add_prefix = "";
const char* chop_prefix = NULL;
const char* root_uri = NULL;
@@ -175,6 +234,11 @@ main(int argc, char** argv)
use_model = true;
} else if (argv[a][1] == 'n') {
normalise = true;
+ } else if (argv[a][1] == 'g') {
+ if (++a == argc) {
+ return missing_arg(argv[0], 'g');
+ }
+ pattern = argv[a];
} else if (argv[a][1] == 'q') {
quiet = true;
} else if (argv[a][1] == 'v') {
@@ -299,6 +363,12 @@ main(int argc, char** argv)
sink = normaliser = serd_normaliser_new(out_sink, env);
}
+ SerdSink* filter = NULL;
+ if (pattern) {
+ filter = parse_filter(world, sink, pattern);
+ sink = filter;
+ }
+
if (quiet) {
serd_world_set_log_func(world, serd_quiet_error_func, NULL);
}
@@ -374,6 +444,7 @@ main(int argc, char** argv)
}
serd_sink_free(normaliser);
+ serd_sink_free(filter);
serd_node_free(input_name);
serd_sink_free(inserter);
serd_model_free(model);
diff --git a/wscript b/wscript
index 8a00a12f..178439b6 100644
--- a/wscript
+++ b/wscript
@@ -172,6 +172,7 @@ lib_source = ['src/base64.c',
'src/cursor.c',
'src/decimal.c',
'src/env.c',
+ 'src/filter.c',
'src/inserter.c',
'src/int_math.c',
'src/iter.c',
@@ -676,6 +677,7 @@ def validation_test_suite(tst,
def test(tst):
+ import subprocess
import tempfile
# Create test output directories
@@ -756,6 +758,18 @@ def test(tst):
check.file_equals('%s/tests/multifile/output.nq' % srcdir,
'tests/multifile/output.out.nq')
+ with tst.group('GrepCommand') as check:
+ with tempfile.TemporaryFile(mode='w+') as stdout:
+ cmd = _wrapped_command([
+ serdi,
+ '-g', '?s <urn:example:p> <urn:example:o> .',
+ '-s',
+ '<urn:example:s> <urn:example:p> <urn:example:o> .\n'
+ '<urn:example:s> <urn:example:q> <urn:example:r> .\n'])
+ check(lambda: subprocess.check_output(cmd).decode('utf-8') ==
+ '<urn:example:s> <urn:example:p> <urn:example:o> .\n',
+ name='wildcard subject')
+
with tst.group('BadCommands',
expected=1,
stderr=autowaf.NONEMPTY) as check:
@@ -764,6 +778,7 @@ def test(tst):
check([serdi, 'ftp://example.org/unsupported.ttl'])
check([serdi, '-I'])
check([serdi, '-c'])
+ check([serdi, '-g'])
check([serdi, '-i', 'illegal'])
check([serdi, '-i', 'turtle'])
check([serdi, '-i'])