From 8346ac7f529f5aeb8d8b0e48837e680ea14e8893 Mon Sep 17 00:00:00 2001 From: David Robillard Date: Thu, 12 Aug 2021 13:42:25 -0400 Subject: Make blank node prefixing automatic Though potentially useful, I don't think the complexity cost of the old interface (both to the implementation and to the user) is worth it. A special tool to transform blank node labels (for example with regular expressions) would be a better approach to this if it's ever needed in the future. --- doc/man/serd-pipe.1 | 33 -------------------------- include/serd/reader.h | 13 ----------- include/serd/writer.h | 10 -------- src/read_ntriples.c | 6 ++--- src/reader.c | 39 +++++++++++++------------------ src/reader.h | 2 +- src/world.h | 3 ++- src/writer.c | 35 ++-------------------------- test/extra/prefix/README.md | 4 ---- test/extra/prefix/manifest.ttl | 20 ---------------- test/extra/prefix/remove-prefixes.nt | 4 ---- test/extra/prefix/remove-prefixes.ttl | 7 ------ test/meson.build | 17 -------------- test/multifile/output.nq | 4 ++-- test/test_overflow.c | 2 +- test/test_reader_writer.c | 14 ----------- tools/serd-pipe.c | 44 ++++------------------------------- 17 files changed, 30 insertions(+), 227 deletions(-) delete mode 100644 test/extra/prefix/README.md delete mode 100644 test/extra/prefix/manifest.ttl delete mode 100644 test/extra/prefix/remove-prefixes.nt delete mode 100644 test/extra/prefix/remove-prefixes.ttl diff --git a/doc/man/serd-pipe.1 b/doc/man/serd-pipe.1 index 056d236f..b1e01990 100644 --- a/doc/man/serd-pipe.1 +++ b/doc/man/serd-pipe.1 @@ -13,9 +13,7 @@ .Op Fl I Ar syntax .Op Fl O Ar syntax .Op Fl b Ar bytes -.Op Fl c Ar prefix .Op Fl k Ar bytes -.Op Fl p Ar prefix .Op Fl r Ar root .Op Fl s Ar string .Op Fl w Ar filename @@ -162,23 +160,6 @@ I/O block size. This is the number of bytes in a file that will be read or written at once. The default is 4096, which should perform well in most cases. Note that this only applies to files, standard input and output are always processed one byte at a time. -.It Fl c Ar prefix -Chop -.Ar prefix -from matching blank node IDs. -This is typically used to revert the effects of -.Fl p . -For example, with -.Ar prefix -.Dq doc01 , -the blank node -.Li _:doc01b42 -will be emitted as -.Li _:b42 . -.It Fl e -Eat input one character at a time, rather than a page at a time which is the default. -This is useful when reading from a pipe since output will be generated immediately as input arrives, rather than waiting until an entire page of input has arrived. -With this option one less page of memory is used, but likely with a performance penalty. .It Fl f Fast and loose URI mode: preserve full URIs (without qualifying or making relative), @@ -191,20 +172,6 @@ Parsing is performed using a pre-allocated stack for performance and security re By default, the stack is 1 MiB, which should be sufficient for most data. This can be increased to support unusually structured data and huge literals, or decreased to reduce overall memory requirements and reduce startup time. -.It Fl p Ar prefix -Add -.Ar prefix -to blank node IDs. -This can be used to avoid clashes between blank node IDs in input documents. -The effects can be reversed in a later run with -.Fl c . -For example, with -.Ar prefix -.Dq doc01 , -the blank node -.Li _:b42 -will be emitted as -.Li _:doc01b42 . .It Fl q Suppress all output except data. .It Fl r Ar root diff --git a/include/serd/reader.h b/include/serd/reader.h index 34af6d04..a2bde202 100644 --- a/include/serd/reader.h +++ b/include/serd/reader.h @@ -104,19 +104,6 @@ serd_reader_new(SerdWorld* ZIX_NONNULL world, SerdEnv* ZIX_NONNULL env, const SerdSink* ZIX_NONNULL sink); -/** - Set a prefix to be added to all blank node identifiers. - - This is useful when multiple files are to be parsed into the same output (a - model or a file). Since Serd preserves blank node IDs, this could cause - conflicts where two non-equivalent blank nodes are merged, resulting in - corrupt data. By setting a unique blank node prefix for each parsed file, - this can be avoided, while preserving blank node names. -*/ -SERD_API void -serd_reader_add_blank_prefix(SerdReader* ZIX_NONNULL reader, - const char* ZIX_NULLABLE prefix); - /** Prepare to read some input. diff --git a/include/serd/writer.h b/include/serd/writer.h index 812b1851..91baa7e8 100644 --- a/include/serd/writer.h +++ b/include/serd/writer.h @@ -102,16 +102,6 @@ serd_writer_free(SerdWriter* ZIX_NULLABLE writer); SERD_CONST_API const SerdSink* ZIX_NONNULL serd_writer_sink(SerdWriter* ZIX_NONNULL writer); -/** - Set a prefix to be removed from matching blank node identifiers. - - This is the counterpart to serd_reader_add_blank_prefix() which can be used - to "undo" added prefixes. -*/ -SERD_API void -serd_writer_chop_blank_prefix(SerdWriter* ZIX_NONNULL writer, - const char* ZIX_NULLABLE prefix); - /** Set the current root URI. diff --git a/src/read_ntriples.c b/src/read_ntriples.c index 57b1e7be..dd5c28fc 100644 --- a/src/read_ntriples.c +++ b/src/read_ntriples.c @@ -273,10 +273,8 @@ read_BLANK_NODE_LABEL(SerdReader* const reader, return r_err(reader, SERD_BAD_SYNTAX, "expected blank node label"); } - if (!(*dest = push_node(reader, - SERD_BLANK, - reader->bprefix ? reader->bprefix : "", - reader->bprefix_len))) { + if (!(*dest = push_node( + reader, SERD_BLANK, reader->bprefix, reader->bprefix_len))) { return SERD_BAD_STACK; } diff --git a/src/reader.c b/src/reader.c index 39e85b47..73509c6f 100644 --- a/src/reader.c +++ b/src/reader.c @@ -68,11 +68,10 @@ set_blank_id(SerdReader* const reader, SerdNode* const node, const size_t buf_size) { - char* buf = (char*)(node + 1); - const char* prefix = reader->bprefix ? (const char*)reader->bprefix : ""; + char* const buf = (char*)(node + 1); - node->length = - (size_t)snprintf(buf, buf_size, "%sb%u", prefix, reader->next_id++); + node->length = (size_t)snprintf( + buf, buf_size, "%sb%u", reader->bprefix, reader->next_id++); } size_t @@ -201,6 +200,13 @@ serd_reader_read_document(SerdReader* const reader) return SERD_BAD_CALL; } + if (!(reader->flags & SERD_READ_GLOBAL)) { + reader->bprefix_len = (size_t)snprintf(reader->bprefix, + sizeof(reader->bprefix), + "f%u", + ++reader->world->next_document_id); + } + if (reader->syntax != SERD_SYNTAX_EMPTY && !reader->source->prepared) { SerdStatus st = serd_reader_prepare(reader); if (st) { @@ -271,6 +277,12 @@ serd_reader_new(SerdWorld* const world, assert(me->rdf_rest); assert(me->rdf_nil); + if (!(flags & SERD_READ_GLOBAL)) { + me->bprefix[0] = 'f'; + me->bprefix[1] = '0'; + me->bprefix_len = 2; + } + return me; } @@ -286,28 +298,9 @@ serd_reader_free(SerdReader* const reader) } serd_aaligned_free(reader->world->allocator, reader->stack.buf); - serd_wfree(reader->world, reader->bprefix); serd_wfree(reader->world, reader); } -void -serd_reader_add_blank_prefix(SerdReader* const reader, const char* const prefix) -{ - assert(reader); - - serd_wfree(reader->world, reader->bprefix); - reader->bprefix_len = 0; - reader->bprefix = NULL; - - const size_t prefix_len = prefix ? strlen(prefix) : 0; - if (prefix_len) { - reader->bprefix_len = prefix_len; - reader->bprefix = - (char*)serd_wmalloc(reader->world, reader->bprefix_len + 1); - memcpy(reader->bprefix, prefix, reader->bprefix_len + 1); - } -} - static SerdStatus skip_bom(SerdReader* const me) { diff --git a/src/reader.h b/src/reader.h index a98d5ef8..0bb3454b 100644 --- a/src/reader.h +++ b/src/reader.h @@ -46,7 +46,7 @@ struct SerdReaderImpl { SerdSyntax syntax; SerdReaderFlags flags; unsigned next_id; - char* bprefix; + char bprefix[24]; size_t bprefix_len; bool strict; ///< True iff strict parsing bool seen_genid; diff --git a/src/world.h b/src/world.h index 2499b761..f615868a 100644 --- a/src/world.h +++ b/src/world.h @@ -14,10 +14,11 @@ #include struct SerdWorldImpl { - SerdLimits limits; SerdAllocator* allocator; SerdLog log; + SerdLimits limits; uint32_t next_blank_id; + uint32_t next_document_id; SerdNode* blank_node; bool stderr_color; diff --git a/src/writer.c b/src/writer.c index 1de5e055..7afeb1c4 100644 --- a/src/writer.c +++ b/src/writer.c @@ -140,8 +140,6 @@ struct SerdWriterImpl { size_t anon_stack_size; SerdBlockDumper output; WriteContext context; - char* bprefix; - size_t bprefix_len; Sep last_sep; int indent; }; @@ -901,17 +899,7 @@ write_blank(SerdWriter* const writer, } TRY(st, esink("_:", 2, writer)); - if (writer->bprefix && - !strncmp(node_str, writer->bprefix, writer->bprefix_len)) { - TRY(st, - esink(node_str + writer->bprefix_len, - node->length - writer->bprefix_len, - writer)); - } else { - TRY(st, esink(node_str, node->length, writer)); - } - - return st; + return esink(node_str, node->length, writer); } SERD_NODISCARD static SerdStatus @@ -1382,25 +1370,7 @@ serd_writer_new(SerdWorld* world, return writer; } -void -serd_writer_chop_blank_prefix(SerdWriter* writer, const char* prefix) -{ - assert(writer); - - serd_wfree(writer->world, writer->bprefix); - writer->bprefix_len = 0; - writer->bprefix = NULL; - - const size_t prefix_len = prefix ? strlen(prefix) : 0; - if (prefix_len) { - writer->bprefix_len = prefix_len; - writer->bprefix = - (char*)serd_wmalloc(writer->world, writer->bprefix_len + 1); - memcpy(writer->bprefix, prefix, writer->bprefix_len + 1); - } -} - -SERD_NODISCARD static SerdStatus +static SerdStatus serd_writer_set_base_uri(SerdWriter* writer, const SerdNode* uri) { assert(writer); @@ -1482,7 +1452,6 @@ serd_writer_free(SerdWriter* writer) free_anon_stack(writer); serd_block_dumper_close(&writer->output); serd_wfree(writer->world, writer->anon_stack); - serd_wfree(writer->world, writer->bprefix); serd_node_free(writer->world->allocator, writer->root_node); serd_wfree(writer->world, writer); } diff --git a/test/extra/prefix/README.md b/test/extra/prefix/README.md deleted file mode 100644 index 56a07244..00000000 --- a/test/extra/prefix/README.md +++ /dev/null @@ -1,4 +0,0 @@ -Prefix Test Suite -================= - -This small suite tests adding/chopping prefixes to/from blank node labels. diff --git a/test/extra/prefix/manifest.ttl b/test/extra/prefix/manifest.ttl deleted file mode 100644 index 12a0f9ca..00000000 --- a/test/extra/prefix/manifest.ttl +++ /dev/null @@ -1,20 +0,0 @@ -@prefix mf: . -@prefix rdfs: . -@prefix rdft: . -@prefix serd: . - -<> - a mf:Manifest ; - rdfs:comment "Serd blank node prefixing test suite" ; - mf:entries ( - <#remove-prefixes> - ) . - -<#remove-prefixes> - a rdft:TestTurtleEval ; - mf:action ; - mf:name "remove-prefixes" ; - mf:result . - -rdft:Test - rdfs:subClassOf mf:ManifestEntry . diff --git a/test/extra/prefix/remove-prefixes.nt b/test/extra/prefix/remove-prefixes.nt deleted file mode 100644 index 26ac46ee..00000000 --- a/test/extra/prefix/remove-prefixes.nt +++ /dev/null @@ -1,4 +0,0 @@ -_:s1 _:o1 . -_:s1 _:o3 . -_:s1 _:o4 . -_:b0 _:b1 . diff --git a/test/extra/prefix/remove-prefixes.ttl b/test/extra/prefix/remove-prefixes.ttl deleted file mode 100644 index f3002a59..00000000 --- a/test/extra/prefix/remove-prefixes.ttl +++ /dev/null @@ -1,7 +0,0 @@ -_:tests1 - _:testo1 ; - _:testo3 , - _:testo4 . - -_:testb0 - _:testb1 . diff --git a/test/meson.build b/test/meson.build index 2de2348f..d7dcfa05 100644 --- a/test/meson.build +++ b/test/meson.build @@ -35,7 +35,6 @@ ttl_metadata_file_paths = [ 'extra/lax/manifest.ttl', 'extra/pattern/manifest.ttl', 'extra/perfect/manifest.ttl', - 'extra/prefix/manifest.ttl', 'extra/pretty/manifest.ttl', 'extra/qualify/manifest.ttl', 'extra/root/manifest.ttl', @@ -189,12 +188,10 @@ simple_command_tests = { ['-b', '1024junk'], ['-b', '9223372036854775807'], ['-b'], - ['-c'], ['-k', '-1'], ['-k', '1024junk'], ['-k', '9223372036854775807'], ['-k'], - ['-p'], ['-qi'], ['-r'], ['-s', ' a .'], @@ -500,20 +497,6 @@ test_suites = { files('extra/perfect/manifest.ttl'), ns_serdtest + 'perfect/', ], - 'prefix_add': [ - '--reverse', - files('extra/prefix/manifest.ttl'), - ns_serdtest + 'prefix/', - '--', - ['-I', 'generated'], - ['-p', 'test'], - ], - 'prefix_remove': [ - files('extra/prefix/manifest.ttl'), - ns_serdtest + 'prefix/', - '--', - ['-c', 'test'], - ], 'pretty': [ files('extra/pretty/manifest.ttl'), ns_serdtest + 'pretty/', diff --git a/test/multifile/output.nq b/test/multifile/output.nq index dd35dc4d..cf3ea93e 100644 --- a/test/multifile/output.nq +++ b/test/multifile/output.nq @@ -1,3 +1,3 @@ -_:f0b1 . _:f1b1 . -_:f1b2 . +_:f2b1 . +_:f2b2 . diff --git a/test/test_overflow.c b/test/test_overflow.c index 8b47cd2b..62a77022 100644 --- a/test/test_overflow.c +++ b/test/test_overflow.c @@ -9,7 +9,7 @@ #include static const size_t min_stack_size = 4U * sizeof(size_t) + 238U; -static const size_t max_stack_size = 1024U; +static const size_t max_stack_size = 2048U; static SerdStatus test_size(SerdWorld* const world, diff --git a/test/test_reader_writer.c b/test/test_reader_writer.c index aea0976d..9a4c988f 100644 --- a/test/test_reader_writer.c +++ b/test/test_reader_writer.c @@ -151,9 +151,6 @@ test_writer(const char* const path) assert(writer); - serd_writer_chop_blank_prefix(writer, "tmp"); - serd_writer_chop_blank_prefix(writer, NULL); - SerdNode* lit = serd_node_new(NULL, serd_a_string("hello")); const SerdSink* const iface = serd_writer_sink(writer); @@ -273,17 +270,6 @@ test_reader(const char* path) assert(serd_reader_read_chunk(reader) == SERD_BAD_CALL); assert(serd_reader_read_document(reader) == SERD_BAD_CALL); - serd_reader_add_blank_prefix(reader, "tmp"); - -#if defined(__GNUC__) -# pragma GCC diagnostic push -# pragma GCC diagnostic ignored "-Wnonnull" -#endif - serd_reader_add_blank_prefix(reader, NULL); -#if defined(__GNUC__) -# pragma GCC diagnostic pop -#endif - SerdInputStream in = serd_open_input_file(path); assert(!serd_reader_start(reader, &in, NULL, 4096)); assert(!serd_reader_read_document(reader)); diff --git a/tools/serd-pipe.c b/tools/serd-pipe.c index 16abbd2c..66600006 100644 --- a/tools/serd-pipe.c +++ b/tools/serd-pipe.c @@ -44,10 +44,8 @@ print_usage(const char* const name, const bool error) " -O SYNTAX Output syntax (empty/turtle/ntriples/nquads),\n" " or flag (ascii/expanded/verbatim/terse/lax).\n" " -b BYTES I/O block size.\n" - " -c PREFIX Chop PREFIX from matching blank node IDs.\n" " -h Display this help and exit.\n" " -k BYTES Parser stack size.\n" - " -p PREFIX Add PREFIX to blank node IDs.\n" " -q Suppress all output except data.\n" " -r ROOT_URI Keep relative URIs within ROOT_URI.\n" " -s STRING Parse STRING as input.\n" @@ -76,7 +74,6 @@ read_file(SerdWorld* const world, const SerdSink* const sink, const size_t stack_size, const char* const filename, - const char* const add_prefix, const size_t block_size) { SerdInputStream in = serd_open_tool_input(filename); @@ -91,9 +88,7 @@ read_file(SerdWorld* const world, serd_world_set_limits(world, limits); SerdReader* reader = serd_reader_new(world, syntax, flags, env, sink); - serd_reader_add_blank_prefix(reader, add_prefix); - - SerdStatus st = serd_reader_start(reader, &in, NULL, block_size); + SerdStatus st = serd_reader_start(reader, &in, NULL, block_size); st = st ? st : serd_reader_read_document(reader); @@ -119,8 +114,6 @@ main(int argc, char** argv) size_t block_size = 4096U; size_t stack_size = 1048576U; const char* input_string = NULL; - const char* add_prefix = ""; - const char* chop_prefix = NULL; const char* root_uri = NULL; const char* out_filename = NULL; int a = 1; @@ -192,13 +185,6 @@ main(int argc, char** argv) } block_size = (size_t)size; break; - } else if (opt == 'c') { - if (argv[a][o + 1] || ++a == argc) { - return missing_arg(prog, 'c'); - } - - chop_prefix = argv[a]; - break; } else if (opt == 'k') { if (argv[a][o + 1] || ++a == argc) { return missing_arg(prog, 'k'); @@ -212,13 +198,6 @@ main(int argc, char** argv) } stack_size = (size_t)size; break; - } else if (opt == 'p') { - if (argv[a][o + 1] || ++a == argc) { - return missing_arg(prog, 'p'); - } - - add_prefix = argv[a]; - break; } else if (opt == 'r') { if (argv[a][o + 1] || ++a == argc) { return missing_arg(prog, 'r'); @@ -313,10 +292,7 @@ main(int argc, char** argv) serd_writer_set_root_uri(writer, serd_string(root_uri)); } - serd_writer_chop_blank_prefix(writer, chop_prefix); - - SerdStatus st = SERD_SUCCESS; - SerdNode* input_name = NULL; + SerdStatus st = SERD_SUCCESS; if (input_string) { const char* position = input_string; SerdInputStream string_in = serd_open_input_string(&position); @@ -324,8 +300,6 @@ main(int argc, char** argv) SerdReader* const reader = serd_reader_new( world, input_syntax ? input_syntax : SERD_TRIG, reader_flags, env, sink); - serd_reader_add_blank_prefix(reader, add_prefix); - if (!(st = serd_reader_start(reader, &string_in, NULL, 1U))) { st = serd_reader_read_document(reader); } @@ -334,11 +308,8 @@ main(int argc, char** argv) serd_close_input(&string_in); } - size_t prefix_len = 0; - char* prefix = NULL; - if (n_inputs > 1) { - prefix_len = 8 + strlen(add_prefix); - prefix = (char*)calloc(1, prefix_len); + if (n_inputs == 1) { + reader_flags |= SERD_READ_GLOBAL; } for (int i = 0; !st && i < n_inputs; ++i) { @@ -349,10 +320,6 @@ main(int argc, char** argv) } } - if (n_inputs > 1) { - snprintf(prefix, prefix_len, "f%d%s", i, add_prefix); - } - if ((st = read_file(world, serd_choose_syntax(world, input_syntax, inputs[i]), reader_flags, @@ -360,16 +327,13 @@ main(int argc, char** argv) sink, stack_size, inputs[i], - n_inputs > 1 ? prefix : add_prefix, block_size))) { break; } } - free(prefix); serd_sink_free(canon); serd_writer_free(writer); - serd_node_free(NULL, input_name); serd_env_free(env); serd_node_free(NULL, base); serd_world_free(world); -- cgit v1.2.1