diff options
author | David Robillard <d@drobilla.net> | 2019-12-18 19:16:14 -0500 |
---|---|---|
committer | David Robillard <d@drobilla.net> | 2021-03-08 23:36:32 -0500 |
commit | 0c347c9701af4595a68bb37eb7c69b5db2d452f8 (patch) | |
tree | 2c41e70b44211c00fbc0f3830db886c8d0b1b85e /src | |
parent | f7c7115e0555f25e0f2c6d09378b66aec2d41d76 (diff) | |
download | serd-0c347c9701af4595a68bb37eb7c69b5db2d452f8.tar.gz serd-0c347c9701af4595a68bb37eb7c69b5db2d452f8.tar.bz2 serd-0c347c9701af4595a68bb37eb7c69b5db2d452f8.zip |
WIP: Add statement filtering
Diffstat (limited to 'src')
-rw-r--r-- | src/filter.c | 89 | ||||
-rw-r--r-- | src/serdi.c | 77 |
2 files changed, 163 insertions, 3 deletions
diff --git a/src/filter.c b/src/filter.c new file mode 100644 index 00000000..f91a950e --- /dev/null +++ b/src/filter.c @@ -0,0 +1,89 @@ +/* + Copyright 2019-2020 David Robillard <d@drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "serd/serd.h" + +#include <stdlib.h> + +typedef struct { + const SerdSink* target; + SerdNode* subject; + SerdNode* predicate; + SerdNode* object; + SerdNode* graph; +} SerdFilterData; + +static void +free_data(void* const handle) +{ + if (handle) { + SerdFilterData* data = (SerdFilterData*)handle; + + serd_node_free(data->subject); + serd_node_free(data->predicate); + serd_node_free(data->object); + serd_node_free(data->graph); + free(data); + } +} + +static SerdStatus +serd_filter_on_event(void* const handle, const SerdEvent* const event) +{ + const SerdFilterData* const data = (SerdFilterData*)handle; + + if (event->type == SERD_STATEMENT && + !serd_statement_matches(event->statement.statement, + data->subject, + data->predicate, + data->object, + data->graph)) { + return SERD_SUCCESS; + } + + return serd_sink_write_event(data->target, event); +} + +SerdSink* +serd_filter_new(const SerdSink* const target, + const SerdNode* const subject, + const SerdNode* const predicate, + const SerdNode* const object, + const SerdNode* const graph) +{ + SerdFilterData* const data = + (SerdFilterData*)calloc(1, sizeof(SerdFilterData)); + + data->target = target; + + if (subject && serd_node_type(subject) != SERD_VARIABLE) { + data->subject = serd_node_copy(subject); + } + + if (predicate && serd_node_type(predicate) != SERD_VARIABLE) { + data->predicate = serd_node_copy(predicate); + } + + if (object && serd_node_type(object) != SERD_VARIABLE) { + data->object = serd_node_copy(object); + } + + if (graph && serd_node_type(graph) != SERD_VARIABLE) { + data->graph = serd_node_copy(graph); + } + + return serd_sink_new(data, serd_filter_on_event, free_data); +} diff --git a/src/serdi.c b/src/serdi.c index b028b862..b0f96f6c 100644 --- a/src/serdi.c +++ b/src/serdi.c @@ -36,6 +36,13 @@ #define SERDI_ERROR(msg) fprintf(stderr, "serdi: " msg) #define SERDI_ERRORF(fmt, ...) fprintf(stderr, "serdi: " fmt, __VA_ARGS__) +typedef struct { + SerdNode* s; + SerdNode* p; + SerdNode* o; + SerdNode* g; +} FilterPattern; + static int print_version(void) { @@ -63,6 +70,7 @@ print_usage(const char* name, bool error) fprintf(os, " -c PREFIX Chop PREFIX from matching blank node IDs.\n"); fprintf(os, " -e Eat input one character at a time.\n"); fprintf(os, " -f Fast and loose mode (possibly ugly output).\n"); + fprintf(os, " -g PATTERN Only emit statements matching PATTERN (grep).\n"); fprintf(os, " -h Display this help and exit.\n"); fprintf(os, " -i SYNTAX Input syntax: turtle/ntriples/trig/nquads.\n"); fprintf(os, " -k BYTES Parser stack size.\n"); @@ -87,6 +95,58 @@ missing_arg(const char* name, char opt) } static SerdStatus +on_filter_event(void* const handle, const SerdEvent* const event) +{ + if (event->type != SERD_STATEMENT) { + return SERD_SUCCESS; + } + + FilterPattern* const pat = (FilterPattern*)handle; + if (pat->s) { + return SERD_ERR_INVALID; + } + + const SerdStatement* const statement = event->statement.statement; + pat->s = serd_node_copy(serd_statement_subject(statement)); + pat->p = serd_node_copy(serd_statement_predicate(statement)); + pat->o = serd_node_copy(serd_statement_object(statement)); + pat->g = serd_node_copy(serd_statement_graph(statement)); + return SERD_SUCCESS; +} + +static SerdSink* +parse_filter(SerdWorld* world, const SerdSink* sink, const char* str) +{ + SerdEnv* const env = serd_env_new(SERD_EMPTY_STRING()); + FilterPattern pat = {NULL, NULL, NULL, NULL}; + SerdSink* in_sink = serd_sink_new(&pat, on_filter_event, NULL); + SerdByteSource* byte_source = serd_byte_source_new_string(str, NULL); + SerdReader* reader = serd_reader_new( + world, SERD_NQUADS, SERD_READ_VARIABLES, env, in_sink, 4096); + + SerdStatus st = serd_reader_start(reader, byte_source); + if (!st) { + st = serd_reader_read_document(reader); + } + + serd_reader_free(reader); + serd_env_free(env); + serd_byte_source_free(byte_source); + serd_sink_free(in_sink); + + if (st) { + return NULL; + } + + SerdSink* filter = serd_filter_new(sink, pat.s, pat.p, pat.o, pat.g); + serd_node_free(pat.s); + serd_node_free(pat.p); + serd_node_free(pat.o); + serd_node_free(pat.g); + return filter; +} + +static SerdStatus read_file(SerdWorld* const world, SerdSyntax syntax, const SerdReaderFlags flags, @@ -155,6 +215,7 @@ main(int argc, char** argv) bool quiet = false; size_t stack_size = 4194304; const char* input_string = NULL; + const char* pattern = NULL; const char* add_prefix = ""; const char* chop_prefix = NULL; const char* root_uri = NULL; @@ -191,6 +252,11 @@ main(int argc, char** argv) writer_flags |= SERD_WRITE_LAX; } else if (argv[a][1] == 'm') { use_model = true; + } else if (argv[a][1] == 'g') { + if (++a == argc) { + return missing_arg(argv[0], 'g'); + } + pattern = argv[a]; } else if (argv[a][1] == 'q') { quiet = true; } else if (argv[a][1] == 'v') { @@ -340,6 +406,12 @@ main(int argc, char** argv) sink = canon = serd_canon_new(world, out_sink, reader_flags); } + SerdSink* filter = NULL; + if (pattern) { + filter = parse_filter(world, sink, pattern); + sink = filter; + } + if (quiet) { serd_world_set_log_func(world, serd_quiet_error_func, NULL); } @@ -349,8 +421,7 @@ main(int argc, char** argv) serd_writer_chop_blank_prefix(writer, chop_prefix); serd_node_free(root); - SerdStatus st = SERD_SUCCESS; - SerdNode* input_name = NULL; + SerdStatus st = SERD_SUCCESS; if (input_string) { SerdByteSource* const byte_source = serd_byte_source_new_string(input_string, NULL); @@ -422,10 +493,10 @@ main(int argc, char** argv) } serd_sink_free(canon); + serd_sink_free(filter); serd_sink_free(inserter); serd_model_free(model); serd_writer_free(writer); - serd_node_free(input_name); serd_env_free(env); serd_node_free(base); serd_world_free(world); |