aboutsummaryrefslogtreecommitdiffstats
path: root/src/serdi.c
diff options
context:
space:
mode:
authorDavid Robillard <d@drobilla.net>2021-07-27 19:01:45 -0400
committerDavid Robillard <d@drobilla.net>2022-01-14 19:37:51 -0500
commit1159aea45d9bc4ade2e82856be403d58e050f32d (patch)
tree870117b2a0a4b386727107bd85c736a020058309 /src/serdi.c
parent89612ec05f596d135640413e093251fb9691ca14 (diff)
downloadserd-1159aea45d9bc4ade2e82856be403d58e050f32d.tar.gz
serd-1159aea45d9bc4ade2e82856be403d58e050f32d.tar.bz2
serd-1159aea45d9bc4ade2e82856be403d58e050f32d.zip
Add statement filtering
Diffstat (limited to 'src/serdi.c')
-rw-r--r--src/serdi.c108
1 files changed, 105 insertions, 3 deletions
diff --git a/src/serdi.c b/src/serdi.c
index 243be98e..7c664b72 100644
--- a/src/serdi.c
+++ b/src/serdi.c
@@ -37,6 +37,13 @@
#define SERDI_ERROR(msg) fprintf(stderr, "serdi: " msg)
#define SERDI_ERRORF(fmt, ...) fprintf(stderr, "serdi: " fmt, __VA_ARGS__)
+typedef struct {
+ SerdNode* s;
+ SerdNode* p;
+ SerdNode* o;
+ SerdNode* g;
+} FilterPattern;
+
static int
print_version(void)
{
@@ -57,6 +64,8 @@ print_usage(const char* const name, const bool error)
fprintf(os, "Read and write RDF syntax.\n");
fprintf(os, "Use - for INPUT to read from standard input.\n\n");
fprintf(os, " -C Convert literals to canonical form.\n");
+ fprintf(os, " -F PATTERN Filter out statements that match PATTERN.\n");
+ fprintf(os, " -G PATTERN Only include statements matching PATTERN.\n");
fprintf(os, " -I BASE_URI Input base URI.\n");
fprintf(os, " -a Write ASCII output if possible.\n");
fprintf(os, " -b Fast bulk output for large serialisations.\n");
@@ -88,6 +97,62 @@ missing_arg(const char* const name, const char opt)
}
static SerdStatus
+on_filter_event(void* const handle, const SerdEvent* const event)
+{
+ if (event->type == SERD_STATEMENT) {
+ FilterPattern* const pat = (FilterPattern*)handle;
+ if (pat->s) {
+ return SERD_ERR_INVALID;
+ }
+
+ const SerdStatement* const statement = event->statement.statement;
+ pat->s = serd_node_copy(serd_statement_subject(statement));
+ pat->p = serd_node_copy(serd_statement_predicate(statement));
+ pat->o = serd_node_copy(serd_statement_object(statement));
+ pat->g = serd_node_copy(serd_statement_graph(statement));
+ }
+
+ return SERD_SUCCESS;
+}
+
+static SerdSink*
+parse_filter(SerdWorld* const world,
+ const SerdSink* const sink,
+ const char* const str,
+ const bool inclusive)
+{
+ SerdEnv* const env = serd_env_new(SERD_EMPTY_STRING());
+ FilterPattern pat = {NULL, NULL, NULL, NULL};
+ SerdSink* in_sink = serd_sink_new(&pat, on_filter_event, NULL);
+ SerdByteSource* byte_source = serd_byte_source_new_string(str, NULL);
+ SerdReader* reader = serd_reader_new(
+ world, SERD_NQUADS, SERD_READ_VARIABLES, env, in_sink, 4096);
+
+ SerdStatus st = serd_reader_start(reader, byte_source);
+ if (!st) {
+ st = serd_reader_read_document(reader);
+ }
+
+ serd_reader_free(reader);
+ serd_env_free(env);
+ serd_byte_source_free(byte_source);
+ serd_sink_free(in_sink);
+
+ if (st) {
+ return NULL;
+ }
+
+ SerdSink* filter =
+ serd_filter_new(sink, pat.s, pat.p, pat.o, pat.g, inclusive);
+
+ serd_node_free(pat.s);
+ serd_node_free(pat.p);
+ serd_node_free(pat.o);
+ serd_node_free(pat.g);
+ return filter;
+}
+
+static SerdStatus
read_file(SerdWorld* const world,
SerdSyntax syntax,
const SerdReaderFlags flags,
@@ -158,6 +223,8 @@ main(int argc, char** argv)
bool quiet = false;
size_t stack_size = 4194304;
const char* input_string = NULL;
+ const char* in_pattern = NULL;
+ const char* out_pattern = NULL;
const char* add_prefix = "";
const char* chop_prefix = NULL;
const char* root_uri = NULL;
@@ -197,6 +264,20 @@ main(int argc, char** argv)
return print_version();
} else if (opt == 'x') {
reader_flags |= SERD_READ_VARIABLES;
+ } else if (argv[a][1] == 'F') {
+ if (++a == argc) {
+ return missing_arg(argv[0], 'F');
+ }
+
+ out_pattern = argv[a];
+ break;
+ } else if (argv[a][1] == 'G') {
+ if (++a == argc) {
+ return missing_arg(argv[0], 'g');
+ }
+
+ in_pattern = argv[a];
+ break;
} else if (argv[a][1] == 'I') {
if (++a == argc) {
return missing_arg(prog, 'I');
@@ -280,6 +361,11 @@ main(int argc, char** argv)
}
}
+ if (in_pattern && out_pattern) {
+ SERDI_ERROR("only one of -F and -G can be given at once\n");
+ return 1;
+ }
+
if (a == argc && !input_string) {
SERDI_ERROR("missing input\n");
return 1;
@@ -370,6 +456,23 @@ main(int argc, char** argv)
sink = canon = serd_canon_new(world, out_sink, reader_flags);
}
+ SerdSink* filter = NULL;
+ if (in_pattern) {
+ if (!(filter = parse_filter(world, sink, in_pattern, true))) {
+ SERDI_ERROR("error parsing inclusive filter pattern\n");
+ return EXIT_FAILURE;
+ }
+
+ sink = filter;
+ } else if (out_pattern) {
+ if (!(filter = parse_filter(world, sink, out_pattern, false))) {
+ SERDI_ERROR("error parsing exclusive filter pattern\n");
+ return EXIT_FAILURE;
+ }
+
+ sink = filter;
+ }
+
if (quiet) {
serd_set_log_func(world, serd_quiet_log_func, NULL);
}
@@ -382,8 +485,7 @@ main(int argc, char** argv)
serd_writer_chop_blank_prefix(writer, chop_prefix);
- SerdStatus st = SERD_SUCCESS;
- SerdNode* input_name = NULL;
+ SerdStatus st = SERD_SUCCESS;
if (input_string) {
SerdByteSource* const byte_source =
serd_byte_source_new_string(input_string, NULL);
@@ -467,10 +569,10 @@ main(int argc, char** argv)
}
serd_sink_free(canon);
+ serd_sink_free(filter);
serd_sink_free(inserter);
serd_model_free(model);
serd_writer_free(writer);
- serd_node_free(input_name);
serd_env_free(env);
serd_node_free(base);
serd_world_free(world);