diff options
author | David Robillard <d@drobilla.net> | 2019-12-18 19:16:14 -0500 |
---|---|---|
committer | David Robillard <d@drobilla.net> | 2021-03-08 23:36:32 -0500 |
commit | 0c347c9701af4595a68bb37eb7c69b5db2d452f8 (patch) | |
tree | 2c41e70b44211c00fbc0f3830db886c8d0b1b85e | |
parent | f7c7115e0555f25e0f2c6d09378b66aec2d41d76 (diff) | |
download | serd-0c347c9701af4595a68bb37eb7c69b5db2d452f8.tar.gz serd-0c347c9701af4595a68bb37eb7c69b5db2d452f8.tar.bz2 serd-0c347c9701af4595a68bb37eb7c69b5db2d452f8.zip |
WIP: Add statement filtering
-rw-r--r-- | doc/serdi.1 | 8 | ||||
-rw-r--r-- | include/serd/serd.h | 16 | ||||
-rw-r--r-- | meson.build | 1 | ||||
-rw-r--r-- | src/filter.c | 89 | ||||
-rw-r--r-- | src/serdi.c | 77 | ||||
-rw-r--r-- | test/meson.build | 4 | ||||
-rw-r--r-- | test/test_grep.py | 40 |
7 files changed, 232 insertions, 3 deletions
diff --git a/doc/serdi.1 b/doc/serdi.1 index fcd58535..b169347b 100644 --- a/doc/serdi.1 +++ b/doc/serdi.1 @@ -9,6 +9,7 @@ .Op Fl Cabefhlqv .Op Fl I Ar base .Op Fl c Ar prefix +.Op Fl g Ar pattern .Op Fl i Ar syntax .Op Fl k Ar bytes .Op Fl o Ar syntax @@ -95,6 +96,13 @@ If the model is enabled, then this writes the model quickly in sorted order. Note that doing so with TriG or Turtle may make the output ugly, since blank nodes will not be inlined. .Pp +.It Fl g Ar pattern +Filter statements that match +.Ar pattern +(like grep). +.Ar pattern +should be a single statement written in NTriples or NQuads. +.Pp .It Fl h Print the command line options. .Pp diff --git a/include/serd/serd.h b/include/serd/serd.h index 8eb5470d..68dec4f3 100644 --- a/include/serd/serd.h +++ b/include/serd/serd.h @@ -1487,6 +1487,22 @@ serd_canon_new(const SerdWorld* SERD_NULLABLE world, SerdReaderFlags flags); /** + Return a sink that filters out statements that do not match a pattern. + + The returned sink acts like `target` in all respects, except statements that + do not match the pattern are dropped. Only statements where each node is + either equivalent to the corresponding pattern node, or the pattern node is + null, will be passed through to the target sink. +*/ +SERD_API +SerdSink* SERD_ALLOCATED +serd_filter_new(const SerdSink* SERD_NONNULL target, + const SerdNode* SERD_NULLABLE subject, + const SerdNode* SERD_NULLABLE predicate, + const SerdNode* SERD_NULLABLE object, + const SerdNode* SERD_NULLABLE graph); + +/** @} @defgroup serd_reader Reader @{ diff --git a/meson.build b/meson.build index 80917230..35aeb2ad 100644 --- a/meson.build +++ b/meson.build @@ -92,6 +92,7 @@ sources = [ 'src/canon.c', 'src/cursor.c', 'src/env.c', + 'src/filter.c', 'src/inserter.c', 'src/iter.c', 'src/model.c', diff --git a/src/filter.c b/src/filter.c new file mode 100644 index 00000000..f91a950e --- /dev/null +++ b/src/filter.c @@ -0,0 +1,89 @@ +/* + Copyright 2019-2020 David Robillard <d@drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "serd/serd.h" + +#include <stdlib.h> + +typedef struct { + const SerdSink* target; + SerdNode* subject; + SerdNode* predicate; + SerdNode* object; + SerdNode* graph; +} SerdFilterData; + +static void +free_data(void* const handle) +{ + if (handle) { + SerdFilterData* data = (SerdFilterData*)handle; + + serd_node_free(data->subject); + serd_node_free(data->predicate); + serd_node_free(data->object); + serd_node_free(data->graph); + free(data); + } +} + +static SerdStatus +serd_filter_on_event(void* const handle, const SerdEvent* const event) +{ + const SerdFilterData* const data = (SerdFilterData*)handle; + + if (event->type == SERD_STATEMENT && + !serd_statement_matches(event->statement.statement, + data->subject, + data->predicate, + data->object, + data->graph)) { + return SERD_SUCCESS; + } + + return serd_sink_write_event(data->target, event); +} + +SerdSink* +serd_filter_new(const SerdSink* const target, + const SerdNode* const subject, + const SerdNode* const predicate, + const SerdNode* const object, + const SerdNode* const graph) +{ + SerdFilterData* const data = + (SerdFilterData*)calloc(1, sizeof(SerdFilterData)); + + data->target = target; + + if (subject && serd_node_type(subject) != SERD_VARIABLE) { + data->subject = serd_node_copy(subject); + } + + if (predicate && serd_node_type(predicate) != SERD_VARIABLE) { + data->predicate = serd_node_copy(predicate); + } + + if (object && serd_node_type(object) != SERD_VARIABLE) { + data->object = serd_node_copy(object); + } + + if (graph && serd_node_type(graph) != SERD_VARIABLE) { + data->graph = serd_node_copy(graph); + } + + return serd_sink_new(data, serd_filter_on_event, free_data); +} diff --git a/src/serdi.c b/src/serdi.c index b028b862..b0f96f6c 100644 --- a/src/serdi.c +++ b/src/serdi.c @@ -36,6 +36,13 @@ #define SERDI_ERROR(msg) fprintf(stderr, "serdi: " msg) #define SERDI_ERRORF(fmt, ...) fprintf(stderr, "serdi: " fmt, __VA_ARGS__) +typedef struct { + SerdNode* s; + SerdNode* p; + SerdNode* o; + SerdNode* g; +} FilterPattern; + static int print_version(void) { @@ -63,6 +70,7 @@ print_usage(const char* name, bool error) fprintf(os, " -c PREFIX Chop PREFIX from matching blank node IDs.\n"); fprintf(os, " -e Eat input one character at a time.\n"); fprintf(os, " -f Fast and loose mode (possibly ugly output).\n"); + fprintf(os, " -g PATTERN Only emit statements matching PATTERN (grep).\n"); fprintf(os, " -h Display this help and exit.\n"); fprintf(os, " -i SYNTAX Input syntax: turtle/ntriples/trig/nquads.\n"); fprintf(os, " -k BYTES Parser stack size.\n"); @@ -87,6 +95,58 @@ missing_arg(const char* name, char opt) } static SerdStatus +on_filter_event(void* const handle, const SerdEvent* const event) +{ + if (event->type != SERD_STATEMENT) { + return SERD_SUCCESS; + } + + FilterPattern* const pat = (FilterPattern*)handle; + if (pat->s) { + return SERD_ERR_INVALID; + } + + const SerdStatement* const statement = event->statement.statement; + pat->s = serd_node_copy(serd_statement_subject(statement)); + pat->p = serd_node_copy(serd_statement_predicate(statement)); + pat->o = serd_node_copy(serd_statement_object(statement)); + pat->g = serd_node_copy(serd_statement_graph(statement)); + return SERD_SUCCESS; +} + +static SerdSink* +parse_filter(SerdWorld* world, const SerdSink* sink, const char* str) +{ + SerdEnv* const env = serd_env_new(SERD_EMPTY_STRING()); + FilterPattern pat = {NULL, NULL, NULL, NULL}; + SerdSink* in_sink = serd_sink_new(&pat, on_filter_event, NULL); + SerdByteSource* byte_source = serd_byte_source_new_string(str, NULL); + SerdReader* reader = serd_reader_new( + world, SERD_NQUADS, SERD_READ_VARIABLES, env, in_sink, 4096); + + SerdStatus st = serd_reader_start(reader, byte_source); + if (!st) { + st = serd_reader_read_document(reader); + } + + serd_reader_free(reader); + serd_env_free(env); + serd_byte_source_free(byte_source); + serd_sink_free(in_sink); + + if (st) { + return NULL; + } + + SerdSink* filter = serd_filter_new(sink, pat.s, pat.p, pat.o, pat.g); + serd_node_free(pat.s); + serd_node_free(pat.p); + serd_node_free(pat.o); + serd_node_free(pat.g); + return filter; +} + +static SerdStatus read_file(SerdWorld* const world, SerdSyntax syntax, const SerdReaderFlags flags, @@ -155,6 +215,7 @@ main(int argc, char** argv) bool quiet = false; size_t stack_size = 4194304; const char* input_string = NULL; + const char* pattern = NULL; const char* add_prefix = ""; const char* chop_prefix = NULL; const char* root_uri = NULL; @@ -191,6 +252,11 @@ main(int argc, char** argv) writer_flags |= SERD_WRITE_LAX; } else if (argv[a][1] == 'm') { use_model = true; + } else if (argv[a][1] == 'g') { + if (++a == argc) { + return missing_arg(argv[0], 'g'); + } + pattern = argv[a]; } else if (argv[a][1] == 'q') { quiet = true; } else if (argv[a][1] == 'v') { @@ -340,6 +406,12 @@ main(int argc, char** argv) sink = canon = serd_canon_new(world, out_sink, reader_flags); } + SerdSink* filter = NULL; + if (pattern) { + filter = parse_filter(world, sink, pattern); + sink = filter; + } + if (quiet) { serd_world_set_log_func(world, serd_quiet_error_func, NULL); } @@ -349,8 +421,7 @@ main(int argc, char** argv) serd_writer_chop_blank_prefix(writer, chop_prefix); serd_node_free(root); - SerdStatus st = SERD_SUCCESS; - SerdNode* input_name = NULL; + SerdStatus st = SERD_SUCCESS; if (input_string) { SerdByteSource* const byte_source = serd_byte_source_new_string(input_string, NULL); @@ -422,10 +493,10 @@ main(int argc, char** argv) } serd_sink_free(canon); + serd_sink_free(filter); serd_sink_free(inserter); serd_model_free(model); serd_writer_free(writer); - serd_node_free(input_name); serd_env_free(env); serd_node_free(base); serd_world_free(world); diff --git a/test/meson.build b/test/meson.build index d364b4f2..501f8b13 100644 --- a/test/meson.build +++ b/test/meson.build @@ -88,6 +88,10 @@ if get_option('utils') args: script_args + files('bad/bad-base.ttl'), suite: ['serdi', 'options']) + test('grep', files('test_grep.py'), + args: script_args, + suite: ['serdi', 'options']) + # Inputs test('stdin', files('test_stdin.py'), diff --git a/test/test_grep.py b/test/test_grep.py new file mode 100644 index 00000000..a4c8d91c --- /dev/null +++ b/test/test_grep.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python3 + +"""Test filtering statements.""" + +import argparse +import sys +import shlex +import subprocess +import tempfile + +DOCUMENT = """<urn:example:s> <urn:example:p> <urn:example:o> . +<urn:example:s> <urn:example:q> <urn:example:r> . +""" + +parser = argparse.ArgumentParser(description=__doc__) + +parser.add_argument("--serdi", default="./serdi", help="path to serdi") +parser.add_argument("--wrapper", default="", help="executable wrapper") + +args = parser.parse_args(sys.argv[1:]) +command = shlex.split(args.wrapper) + [ + args.serdi, + "-g", + "?s <urn:example:p> <urn:example:o> .", + "-s", + DOCUMENT, +] + +with tempfile.TemporaryFile() as out: + proc = subprocess.run( + command, + check=False, + encoding="utf-8", + input=DOCUMENT, + capture_output=True, + ) + + assert proc.returncode == 0 + assert len(proc.stderr) == 0 + assert proc.stdout == "<urn:example:s> <urn:example:p> <urn:example:o> .\n" |