From 0c347c9701af4595a68bb37eb7c69b5db2d452f8 Mon Sep 17 00:00:00 2001 From: David Robillard Date: Wed, 18 Dec 2019 19:16:14 -0500 Subject: WIP: Add statement filtering --- src/serdi.c | 77 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 74 insertions(+), 3 deletions(-) (limited to 'src/serdi.c') diff --git a/src/serdi.c b/src/serdi.c index b028b862..b0f96f6c 100644 --- a/src/serdi.c +++ b/src/serdi.c @@ -36,6 +36,13 @@ #define SERDI_ERROR(msg) fprintf(stderr, "serdi: " msg) #define SERDI_ERRORF(fmt, ...) fprintf(stderr, "serdi: " fmt, __VA_ARGS__) +typedef struct { + SerdNode* s; + SerdNode* p; + SerdNode* o; + SerdNode* g; +} FilterPattern; + static int print_version(void) { @@ -63,6 +70,7 @@ print_usage(const char* name, bool error) fprintf(os, " -c PREFIX Chop PREFIX from matching blank node IDs.\n"); fprintf(os, " -e Eat input one character at a time.\n"); fprintf(os, " -f Fast and loose mode (possibly ugly output).\n"); + fprintf(os, " -g PATTERN Only emit statements matching PATTERN (grep).\n"); fprintf(os, " -h Display this help and exit.\n"); fprintf(os, " -i SYNTAX Input syntax: turtle/ntriples/trig/nquads.\n"); fprintf(os, " -k BYTES Parser stack size.\n"); @@ -86,6 +94,58 @@ missing_arg(const char* name, char opt) return print_usage(name, true); } +static SerdStatus +on_filter_event(void* const handle, const SerdEvent* const event) +{ + if (event->type != SERD_STATEMENT) { + return SERD_SUCCESS; + } + + FilterPattern* const pat = (FilterPattern*)handle; + if (pat->s) { + return SERD_ERR_INVALID; + } + + const SerdStatement* const statement = event->statement.statement; + pat->s = serd_node_copy(serd_statement_subject(statement)); + pat->p = serd_node_copy(serd_statement_predicate(statement)); + pat->o = serd_node_copy(serd_statement_object(statement)); + pat->g = serd_node_copy(serd_statement_graph(statement)); + return SERD_SUCCESS; +} + +static SerdSink* +parse_filter(SerdWorld* world, const SerdSink* sink, const char* str) +{ + SerdEnv* const env = serd_env_new(SERD_EMPTY_STRING()); + FilterPattern pat = {NULL, NULL, NULL, NULL}; + SerdSink* in_sink = serd_sink_new(&pat, on_filter_event, NULL); + SerdByteSource* byte_source = serd_byte_source_new_string(str, NULL); + SerdReader* reader = serd_reader_new( + world, SERD_NQUADS, SERD_READ_VARIABLES, env, in_sink, 4096); + + SerdStatus st = serd_reader_start(reader, byte_source); + if (!st) { + st = serd_reader_read_document(reader); + } + + serd_reader_free(reader); + serd_env_free(env); + serd_byte_source_free(byte_source); + serd_sink_free(in_sink); + + if (st) { + return NULL; + } + + SerdSink* filter = serd_filter_new(sink, pat.s, pat.p, pat.o, pat.g); + serd_node_free(pat.s); + serd_node_free(pat.p); + serd_node_free(pat.o); + serd_node_free(pat.g); + return filter; +} + static SerdStatus read_file(SerdWorld* const world, SerdSyntax syntax, @@ -155,6 +215,7 @@ main(int argc, char** argv) bool quiet = false; size_t stack_size = 4194304; const char* input_string = NULL; + const char* pattern = NULL; const char* add_prefix = ""; const char* chop_prefix = NULL; const char* root_uri = NULL; @@ -191,6 +252,11 @@ main(int argc, char** argv) writer_flags |= SERD_WRITE_LAX; } else if (argv[a][1] == 'm') { use_model = true; + } else if (argv[a][1] == 'g') { + if (++a == argc) { + return missing_arg(argv[0], 'g'); + } + pattern = argv[a]; } else if (argv[a][1] == 'q') { quiet = true; } else if (argv[a][1] == 'v') { @@ -340,6 +406,12 @@ main(int argc, char** argv) sink = canon = serd_canon_new(world, out_sink, reader_flags); } + SerdSink* filter = NULL; + if (pattern) { + filter = parse_filter(world, sink, pattern); + sink = filter; + } + if (quiet) { serd_world_set_log_func(world, serd_quiet_error_func, NULL); } @@ -349,8 +421,7 @@ main(int argc, char** argv) serd_writer_chop_blank_prefix(writer, chop_prefix); serd_node_free(root); - SerdStatus st = SERD_SUCCESS; - SerdNode* input_name = NULL; + SerdStatus st = SERD_SUCCESS; if (input_string) { SerdByteSource* const byte_source = serd_byte_source_new_string(input_string, NULL); @@ -422,10 +493,10 @@ main(int argc, char** argv) } serd_sink_free(canon); + serd_sink_free(filter); serd_sink_free(inserter); serd_model_free(model); serd_writer_free(writer); - serd_node_free(input_name); serd_env_free(env); serd_node_free(base); serd_world_free(world); -- cgit v1.2.1