diff options
author | David Robillard <d@drobilla.net> | 2021-07-27 19:01:45 -0400 |
---|---|---|
committer | David Robillard <d@drobilla.net> | 2022-01-14 19:37:51 -0500 |
commit | 1159aea45d9bc4ade2e82856be403d58e050f32d (patch) | |
tree | 870117b2a0a4b386727107bd85c736a020058309 /src | |
parent | 89612ec05f596d135640413e093251fb9691ca14 (diff) | |
download | serd-1159aea45d9bc4ade2e82856be403d58e050f32d.tar.gz serd-1159aea45d9bc4ade2e82856be403d58e050f32d.tar.bz2 serd-1159aea45d9bc4ade2e82856be403d58e050f32d.zip |
Add statement filtering
Diffstat (limited to 'src')
-rw-r--r-- | src/filter.c | 102 | ||||
-rw-r--r-- | src/serdi.c | 108 |
2 files changed, 207 insertions, 3 deletions
diff --git a/src/filter.c b/src/filter.c new file mode 100644 index 00000000..6d5e5a04 --- /dev/null +++ b/src/filter.c @@ -0,0 +1,102 @@ +/* + Copyright 2019-2020 David Robillard <d@drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "serd/serd.h" + +#include <stdbool.h> +#include <stdlib.h> + +typedef struct { + const SerdSink* target; + SerdNode* subject; + SerdNode* predicate; + SerdNode* object; + SerdNode* graph; + bool inclusive; +} SerdFilterData; + +static void +free_data(void* const handle) +{ + if (handle) { + SerdFilterData* data = (SerdFilterData*)handle; + + serd_node_free(data->subject); + serd_node_free(data->predicate); + serd_node_free(data->object); + serd_node_free(data->graph); + free(data); + } +} + +static SerdStatus +serd_filter_on_event(void* const handle, const SerdEvent* const event) +{ + const SerdFilterData* const data = (SerdFilterData*)handle; + + if (event->type == SERD_STATEMENT) { + const bool matches = serd_statement_matches(event->statement.statement, + data->subject, + data->predicate, + data->object, + data->graph); + + if (data->inclusive == matches) { + // Emit statement with reset flags to avoid confusing the writer + SerdEvent out_event = *event; + out_event.statement.flags = 0u; + return serd_sink_write_event(data->target, &out_event); + } + + return SERD_SUCCESS; // Skip statement + } + + return event->type == SERD_END ? SERD_SUCCESS + : serd_sink_write_event(data->target, event); +} + +SerdSink* +serd_filter_new(const SerdSink* const target, + const SerdNode* const subject, + const SerdNode* const predicate, + const SerdNode* const object, + const SerdNode* const graph, + const bool inclusive) +{ + SerdFilterData* const data = + (SerdFilterData*)calloc(1, sizeof(SerdFilterData)); + + data->target = target; + data->inclusive = inclusive; + + if (subject && serd_node_type(subject) != SERD_VARIABLE) { + data->subject = serd_node_copy(subject); + } + + if (predicate && serd_node_type(predicate) != SERD_VARIABLE) { + data->predicate = serd_node_copy(predicate); + } + + if (object && serd_node_type(object) != SERD_VARIABLE) { + data->object = serd_node_copy(object); + } + + if (graph && serd_node_type(graph) != SERD_VARIABLE) { + data->graph = serd_node_copy(graph); + } + + return serd_sink_new(data, serd_filter_on_event, free_data); +} diff --git a/src/serdi.c b/src/serdi.c index 243be98e..7c664b72 100644 --- a/src/serdi.c +++ b/src/serdi.c @@ -37,6 +37,13 @@ #define SERDI_ERROR(msg) fprintf(stderr, "serdi: " msg) #define SERDI_ERRORF(fmt, ...) fprintf(stderr, "serdi: " fmt, __VA_ARGS__) +typedef struct { + SerdNode* s; + SerdNode* p; + SerdNode* o; + SerdNode* g; +} FilterPattern; + static int print_version(void) { @@ -57,6 +64,8 @@ print_usage(const char* const name, const bool error) fprintf(os, "Read and write RDF syntax.\n"); fprintf(os, "Use - for INPUT to read from standard input.\n\n"); fprintf(os, " -C Convert literals to canonical form.\n"); + fprintf(os, " -F PATTERN Filter out statements that match PATTERN.\n"); + fprintf(os, " -G PATTERN Only include statements matching PATTERN.\n"); fprintf(os, " -I BASE_URI Input base URI.\n"); fprintf(os, " -a Write ASCII output if possible.\n"); fprintf(os, " -b Fast bulk output for large serialisations.\n"); @@ -88,6 +97,62 @@ missing_arg(const char* const name, const char opt) } static SerdStatus +on_filter_event(void* const handle, const SerdEvent* const event) +{ + if (event->type == SERD_STATEMENT) { + FilterPattern* const pat = (FilterPattern*)handle; + if (pat->s) { + return SERD_ERR_INVALID; + } + + const SerdStatement* const statement = event->statement.statement; + pat->s = serd_node_copy(serd_statement_subject(statement)); + pat->p = serd_node_copy(serd_statement_predicate(statement)); + pat->o = serd_node_copy(serd_statement_object(statement)); + pat->g = serd_node_copy(serd_statement_graph(statement)); + } + + return SERD_SUCCESS; +} + +static SerdSink* +parse_filter(SerdWorld* const world, + const SerdSink* const sink, + const char* const str, + const bool inclusive) +{ + SerdEnv* const env = serd_env_new(SERD_EMPTY_STRING()); + FilterPattern pat = {NULL, NULL, NULL, NULL}; + SerdSink* in_sink = serd_sink_new(&pat, on_filter_event, NULL); + SerdByteSource* byte_source = serd_byte_source_new_string(str, NULL); + SerdReader* reader = serd_reader_new( + world, SERD_NQUADS, SERD_READ_VARIABLES, env, in_sink, 4096); + + SerdStatus st = serd_reader_start(reader, byte_source); + if (!st) { + st = serd_reader_read_document(reader); + } + + serd_reader_free(reader); + serd_env_free(env); + serd_byte_source_free(byte_source); + serd_sink_free(in_sink); + + if (st) { + return NULL; + } + + SerdSink* filter = + serd_filter_new(sink, pat.s, pat.p, pat.o, pat.g, inclusive); + + serd_node_free(pat.s); + serd_node_free(pat.p); + serd_node_free(pat.o); + serd_node_free(pat.g); + return filter; +} + +static SerdStatus read_file(SerdWorld* const world, SerdSyntax syntax, const SerdReaderFlags flags, @@ -158,6 +223,8 @@ main(int argc, char** argv) bool quiet = false; size_t stack_size = 4194304; const char* input_string = NULL; + const char* in_pattern = NULL; + const char* out_pattern = NULL; const char* add_prefix = ""; const char* chop_prefix = NULL; const char* root_uri = NULL; @@ -197,6 +264,20 @@ main(int argc, char** argv) return print_version(); } else if (opt == 'x') { reader_flags |= SERD_READ_VARIABLES; + } else if (argv[a][1] == 'F') { + if (++a == argc) { + return missing_arg(argv[0], 'F'); + } + + out_pattern = argv[a]; + break; + } else if (argv[a][1] == 'G') { + if (++a == argc) { + return missing_arg(argv[0], 'g'); + } + + in_pattern = argv[a]; + break; } else if (argv[a][1] == 'I') { if (++a == argc) { return missing_arg(prog, 'I'); @@ -280,6 +361,11 @@ main(int argc, char** argv) } } + if (in_pattern && out_pattern) { + SERDI_ERROR("only one of -F and -G can be given at once\n"); + return 1; + } + if (a == argc && !input_string) { SERDI_ERROR("missing input\n"); return 1; @@ -370,6 +456,23 @@ main(int argc, char** argv) sink = canon = serd_canon_new(world, out_sink, reader_flags); } + SerdSink* filter = NULL; + if (in_pattern) { + if (!(filter = parse_filter(world, sink, in_pattern, true))) { + SERDI_ERROR("error parsing inclusive filter pattern\n"); + return EXIT_FAILURE; + } + + sink = filter; + } else if (out_pattern) { + if (!(filter = parse_filter(world, sink, out_pattern, false))) { + SERDI_ERROR("error parsing exclusive filter pattern\n"); + return EXIT_FAILURE; + } + + sink = filter; + } + if (quiet) { serd_set_log_func(world, serd_quiet_log_func, NULL); } @@ -382,8 +485,7 @@ main(int argc, char** argv) serd_writer_chop_blank_prefix(writer, chop_prefix); - SerdStatus st = SERD_SUCCESS; - SerdNode* input_name = NULL; + SerdStatus st = SERD_SUCCESS; if (input_string) { SerdByteSource* const byte_source = serd_byte_source_new_string(input_string, NULL); @@ -467,10 +569,10 @@ main(int argc, char** argv) } serd_sink_free(canon); + serd_sink_free(filter); serd_sink_free(inserter); serd_model_free(model); serd_writer_free(writer); - serd_node_free(input_name); serd_env_free(env); serd_node_free(base); serd_world_free(world); |