diff options
author | David Robillard <d@drobilla.net> | 2021-07-27 19:01:45 -0400 |
---|---|---|
committer | David Robillard <d@drobilla.net> | 2022-01-14 19:37:51 -0500 |
commit | 1159aea45d9bc4ade2e82856be403d58e050f32d (patch) | |
tree | 870117b2a0a4b386727107bd85c736a020058309 /src/serdi.c | |
parent | 89612ec05f596d135640413e093251fb9691ca14 (diff) | |
download | serd-1159aea45d9bc4ade2e82856be403d58e050f32d.tar.gz serd-1159aea45d9bc4ade2e82856be403d58e050f32d.tar.bz2 serd-1159aea45d9bc4ade2e82856be403d58e050f32d.zip |
Add statement filtering
Diffstat (limited to 'src/serdi.c')
-rw-r--r-- | src/serdi.c | 108 |
1 files changed, 105 insertions, 3 deletions
diff --git a/src/serdi.c b/src/serdi.c index 243be98e..7c664b72 100644 --- a/src/serdi.c +++ b/src/serdi.c @@ -37,6 +37,13 @@ #define SERDI_ERROR(msg) fprintf(stderr, "serdi: " msg) #define SERDI_ERRORF(fmt, ...) fprintf(stderr, "serdi: " fmt, __VA_ARGS__) +typedef struct { + SerdNode* s; + SerdNode* p; + SerdNode* o; + SerdNode* g; +} FilterPattern; + static int print_version(void) { @@ -57,6 +64,8 @@ print_usage(const char* const name, const bool error) fprintf(os, "Read and write RDF syntax.\n"); fprintf(os, "Use - for INPUT to read from standard input.\n\n"); fprintf(os, " -C Convert literals to canonical form.\n"); + fprintf(os, " -F PATTERN Filter out statements that match PATTERN.\n"); + fprintf(os, " -G PATTERN Only include statements matching PATTERN.\n"); fprintf(os, " -I BASE_URI Input base URI.\n"); fprintf(os, " -a Write ASCII output if possible.\n"); fprintf(os, " -b Fast bulk output for large serialisations.\n"); @@ -88,6 +97,62 @@ missing_arg(const char* const name, const char opt) } static SerdStatus +on_filter_event(void* const handle, const SerdEvent* const event) +{ + if (event->type == SERD_STATEMENT) { + FilterPattern* const pat = (FilterPattern*)handle; + if (pat->s) { + return SERD_ERR_INVALID; + } + + const SerdStatement* const statement = event->statement.statement; + pat->s = serd_node_copy(serd_statement_subject(statement)); + pat->p = serd_node_copy(serd_statement_predicate(statement)); + pat->o = serd_node_copy(serd_statement_object(statement)); + pat->g = serd_node_copy(serd_statement_graph(statement)); + } + + return SERD_SUCCESS; +} + +static SerdSink* +parse_filter(SerdWorld* const world, + const SerdSink* const sink, + const char* const str, + const bool inclusive) +{ + SerdEnv* const env = serd_env_new(SERD_EMPTY_STRING()); + FilterPattern pat = {NULL, NULL, NULL, NULL}; + SerdSink* in_sink = serd_sink_new(&pat, on_filter_event, NULL); + SerdByteSource* byte_source = serd_byte_source_new_string(str, NULL); + SerdReader* reader = serd_reader_new( + world, SERD_NQUADS, SERD_READ_VARIABLES, env, in_sink, 4096); + + SerdStatus st = serd_reader_start(reader, byte_source); + if (!st) { + st = serd_reader_read_document(reader); + } + + serd_reader_free(reader); + serd_env_free(env); + serd_byte_source_free(byte_source); + serd_sink_free(in_sink); + + if (st) { + return NULL; + } + + SerdSink* filter = + serd_filter_new(sink, pat.s, pat.p, pat.o, pat.g, inclusive); + + serd_node_free(pat.s); + serd_node_free(pat.p); + serd_node_free(pat.o); + serd_node_free(pat.g); + return filter; +} + +static SerdStatus read_file(SerdWorld* const world, SerdSyntax syntax, const SerdReaderFlags flags, @@ -158,6 +223,8 @@ main(int argc, char** argv) bool quiet = false; size_t stack_size = 4194304; const char* input_string = NULL; + const char* in_pattern = NULL; + const char* out_pattern = NULL; const char* add_prefix = ""; const char* chop_prefix = NULL; const char* root_uri = NULL; @@ -197,6 +264,20 @@ main(int argc, char** argv) return print_version(); } else if (opt == 'x') { reader_flags |= SERD_READ_VARIABLES; + } else if (argv[a][1] == 'F') { + if (++a == argc) { + return missing_arg(argv[0], 'F'); + } + + out_pattern = argv[a]; + break; + } else if (argv[a][1] == 'G') { + if (++a == argc) { + return missing_arg(argv[0], 'g'); + } + + in_pattern = argv[a]; + break; } else if (argv[a][1] == 'I') { if (++a == argc) { return missing_arg(prog, 'I'); @@ -280,6 +361,11 @@ main(int argc, char** argv) } } + if (in_pattern && out_pattern) { + SERDI_ERROR("only one of -F and -G can be given at once\n"); + return 1; + } + if (a == argc && !input_string) { SERDI_ERROR("missing input\n"); return 1; @@ -370,6 +456,23 @@ main(int argc, char** argv) sink = canon = serd_canon_new(world, out_sink, reader_flags); } + SerdSink* filter = NULL; + if (in_pattern) { + if (!(filter = parse_filter(world, sink, in_pattern, true))) { + SERDI_ERROR("error parsing inclusive filter pattern\n"); + return EXIT_FAILURE; + } + + sink = filter; + } else if (out_pattern) { + if (!(filter = parse_filter(world, sink, out_pattern, false))) { + SERDI_ERROR("error parsing exclusive filter pattern\n"); + return EXIT_FAILURE; + } + + sink = filter; + } + if (quiet) { serd_set_log_func(world, serd_quiet_log_func, NULL); } @@ -382,8 +485,7 @@ main(int argc, char** argv) serd_writer_chop_blank_prefix(writer, chop_prefix); - SerdStatus st = SERD_SUCCESS; - SerdNode* input_name = NULL; + SerdStatus st = SERD_SUCCESS; if (input_string) { SerdByteSource* const byte_source = serd_byte_source_new_string(input_string, NULL); @@ -467,10 +569,10 @@ main(int argc, char** argv) } serd_sink_free(canon); + serd_sink_free(filter); serd_sink_free(inserter); serd_model_free(model); serd_writer_free(writer); - serd_node_free(input_name); serd_env_free(env); serd_node_free(base); serd_world_free(world); |