From 0e9169e24fcfc4599a62f85991f407f257fec520 Mon Sep 17 00:00:00 2001 From: David Robillard Date: Fri, 31 Mar 2023 13:54:20 -0400 Subject: Rename serdi to serd-pipe --- .reuse/dep5 | 4 +- NEWS | 1 + README.md | 20 +-- doc/man/meson.build | 4 +- doc/man/serd-pipe.1 | 210 ++++++++++++++++++++++++++++ doc/man/serdi.1 | 210 ---------------------------- doc/serd-memory.svg | 288 ++++++++++++++++++++++++++++++++++++++ doc/serd-memory.txt | 11 ++ doc/serd-throughput.svg | 296 +++++++++++++++++++++++++++++++++++++++ doc/serd-throughput.txt | 11 ++ doc/serd-time.svg | 299 ++++++++++++++++++++++++++++++++++++++++ doc/serd-time.txt | 11 ++ doc/serdi-memory.svg | 288 -------------------------------------- doc/serdi-memory.txt | 11 -- doc/serdi-throughput.svg | 296 --------------------------------------- doc/serdi-throughput.txt | 11 -- doc/serdi-time.svg | 299 ---------------------------------------- doc/serdi-time.txt | 11 -- meson.build | 2 +- scripts/serd_bench.py | 22 +-- serd.ttl | 2 +- test/meson.build | 51 ++++--- test/run_suite.py | 6 +- test/serd_test_util/__init__.py | 4 +- test/test_empty.py | 2 +- test/test_quiet.py | 4 +- test/test_stdin.py | 4 +- test/test_write_error.py | 2 +- tools/meson.build | 13 +- tools/serd-pipe.c | 290 ++++++++++++++++++++++++++++++++++++++ tools/serdi.c | 290 -------------------------------------- 31 files changed, 1489 insertions(+), 1484 deletions(-) create mode 100644 doc/man/serd-pipe.1 delete mode 100644 doc/man/serdi.1 create mode 100644 doc/serd-memory.svg create mode 100644 doc/serd-memory.txt create mode 100644 doc/serd-throughput.svg create mode 100644 doc/serd-throughput.txt create mode 100644 doc/serd-time.svg create mode 100644 doc/serd-time.txt delete mode 100644 doc/serdi-memory.svg delete mode 100644 doc/serdi-memory.txt delete mode 100644 doc/serdi-throughput.svg delete mode 100644 doc/serdi-throughput.txt delete mode 100644 doc/serdi-time.svg delete mode 100644 doc/serdi-time.txt create mode 100644 tools/serd-pipe.c delete mode 100644 tools/serdi.c diff --git a/.reuse/dep5 b/.reuse/dep5 index 3173226a..1c237f2b 100644 --- a/.reuse/dep5 +++ b/.reuse/dep5 @@ -23,7 +23,7 @@ Copyright: 2022 David Robillard Comment: Contributed to the Commons as a tool configuration License: 0BSD OR ISC -Files: doc/_static/serd.svg doc/serdi-*.svg doc/serdi-*.txt -Copyright: 2011-2022 David Robillard +Files: doc/_static/serd*.svg doc/serd*.svg doc/serd*.txt +Copyright: 2011-2023 David Robillard Comment: Documentation License: ISC diff --git a/NEWS b/NEWS index de5fe0fd..d7f5b5e9 100644 --- a/NEWS +++ b/NEWS @@ -11,6 +11,7 @@ serd (1.1.1) unstable; urgency=medium * Remove support for reading Turtle named inline nodes extension * Remove useless character counting from API * Rename SerdChunk to SerdStringView + * Replace serdi with more focused tools * Simplify statement flags * Simplify writer style options and write UTF-8 by default * Strengthen handling of corrupt UTF-8 input diff --git a/README.md b/README.md index 3e26e300..350a0e05 100644 --- a/README.md +++ b/README.md @@ -8,8 +8,8 @@ Serd is a lightweight C library for working with RDF data. Serd can be used by high-performance or resource-limited applications to read or write [Turtle][], [TriG][], [NTriples][], and [NQuads][]. The included -`serdi` tool can be used to efficiently process RDF documents in scripts or on -the command-line. +command line tools can be used to efficiently process RDF documents in scripts +or on the command-line. Features -------- @@ -42,14 +42,14 @@ Features Performance ----------- -The benchmarks below compare `serdi`, [rapper][], and [riot][] re-serialising -Turtle data generated by [sp2b][] on an AMD 1950x. Of the three, `serdi` is -the fastest by a wide margin, and the only one that uses a constant amount of -memory for all input sizes. +The benchmarks below compare `serd-pipe`, [rapper][], and [riot][] +re-serialising Turtle data generated by [sp2b][] on an AMD 1950x. Of the +three, `serd-pipe` is the fastest by a wide margin, and the only one that uses +a constant amount of memory for all input sizes. -![Throughput](doc/serdi-throughput.svg) -![Time](doc/serdi-time.svg) -![Memory](doc/serdi-memory.svg) +![Throughput](doc/serd-throughput.svg) +![Time](doc/serd-time.svg) +![Memory](doc/serd-memory.svg) Documentation ------------- @@ -57,7 +57,7 @@ Documentation * [Installation instructions](INSTALL.md) * [Single-page API reference](https://drobilla.gitlab.io/serd/doc/singlehtml/) * [Paginated API reference](https://drobilla.gitlab.io/serd/doc/html/) - * [`serdi` man page](https://drobilla.gitlab.io/serd/man/serdi.html) + * [`serd-pipe` man page](https://drobilla.gitlab.io/serd/man/serd-pipe.html) Versioning ---------- diff --git a/doc/man/meson.build b/doc/man/meson.build index 323a8c4d..ae0c1c51 100644 --- a/doc/man/meson.build +++ b/doc/man/meson.build @@ -21,7 +21,7 @@ if not get_option('tools').disabled() ) endif - install_man(files('serdi.1')) + install_man(files('serd-pipe.1')) endif # Build/install HTML man pages if mandoc is present @@ -44,7 +44,7 @@ if not get_option('tools').disabled() ] page_names = [ - 'serdi', + 'serd-pipe', ] html_mandir = docdir / versioned_name / 'man' diff --git a/doc/man/serd-pipe.1 b/doc/man/serd-pipe.1 new file mode 100644 index 00000000..32482c73 --- /dev/null +++ b/doc/man/serd-pipe.1 @@ -0,0 +1,210 @@ +.\" Copyright 2011-2023 David Robillard +.\" SPDX-License-Identifier: ISC +.Dd May 04, 2023 +.Dt SERD-PIPE 1 +.Os Serd 1.1.1 +.Sh NAME +.Nm serd-pipe +.Nd read and write RDF data +.Sh SYNOPSIS +.Nm serd-pipe +.Op Fl abefhlqtv +.Op Fl c Ar prefix +.Op Fl i Ar syntax +.Op Fl k Ar bytes +.Op Fl o Ar syntax +.Op Fl p Ar prefix +.Op Fl r Ar root +.Op Fl s Ar string +.Ar input +.Op Ar base_uri +.Sh DESCRIPTION +.Nm +is a fast command-line utility for streaming and processing RDF data. +It reads an RDF document and writes the data to stdout, +possibly transformed and/or in a different syntax. +By default, +the input syntax is guessed from the file extension, +and output is written in NTriples or NQuads. +.Pp +.Nm +can be used to check for syntax errors, +convert from one syntax to another, +pretty-print documents, +or transform URIs and blank node IDs. +.Pp +The options are as follows: +.Bl -tag -width 3n +.It Fl a +Write ASCII output. +If this is enabled, all non-ASCII characters will be escaped, even if the output syntax allows them to be written in UTF-8. +.It Fl b +Bulk output writing. +If this is enabled, output will be written a page at a time, rather than a byte at a time. +.It Fl c Ar prefix +Chop +.Ar prefix +from matching blank node IDs. +This is typically used to revert the effects of +.Fl p . +For example, with +.Ar prefix +.Dq doc01 , +the blank node +.Li _:doc01b42 +will be emitted as +.Li _:b42 . +.It Fl e +Eat input one character at a time, rather than a page at a time which is the default. +This is useful when reading from a pipe since output will be generated immediately as input arrives, rather than waiting until an entire page of input has arrived. +With this option one less page of memory is used, but likely with a performance penalty. +.It Fl f +Fast and loose URI mode: +preserve full URIs (without qualifying or making relative), +and pass prefixed names through as-is. +.It Fl h +Print the command line options. +.It Fl i Ar syntax +Read input as +.Ar syntax . +Case is ignored, valid values are: +.Dq NQuads , +.Dq NTriples , +.Dq TriG , +and +.Dq Turtle . +.It Fl k Ar bytes +Parser stack size. +Parsing is performed using a pre-allocated stack for performance and security reasons. +By default, the stack is 1 MiB, which should be sufficient for most data. +This can be increased to support unusually structured data and huge literals, +or decreased to reduce overall memory requirements and reduce startup time. +.It Fl l +Lax (non-strict) parsing. +If this is enabled, recoverable syntax errors will print a warning, but parsing will proceed starting at the next statement if possible. +Note that data may be lost when using this option. +.It Fl o Ar syntax +Write output as +.Ar syntax . +Case is ignored, valid values are: +.Dq empty , +.Dq NQuads , +.Dq NTriples , +.Dq TriG , +and +.Dq Turtle . +The +.Cm empty +syntax suppresses the output, +so that only warnings and errors will be printed. +.It Fl p Ar prefix +Add +.Ar prefix +to blank node IDs. +This can be used to avoid clashes between blank node IDs in input documents. +The effects can be reversed in a later run with +.Fl c . +For example, with +.Ar prefix +.Dq doc01 , +the blank node +.Li _:b42 +will be emitted as +.Li _:doc01b42 . +.It Fl q +Suppress all output except data. +.It Fl r Ar root +Keep relative URIs within a +.Ar root +URI. +This will avoid creating any relative URI references with leading path segments like +.Pa ../ +that enter a parent of +.Ar root . +.Pp +For example, +if +.Pa /home/you/file.ttl +is written to the file +.Pa /home/me/output.ttl +using the destination's base URI, +then it could be written as +.Li <../you/file.ttl> . +Setting +.Fl r Li file:///home/me/ +would prevent references from +.Dq escaping +like this, +so the above would instead be written as +.Li , +since it can't be expressed relative to the root URI. +.Pp +This is useful for keeping relative references within some directory. +.It Fl s Ar string +Parse +.Ar string +input instead of a file (terminates options). +.It Fl t +Write terser output without newlines. +.It Fl v +Display version information and exit. +.El +.Sh EXIT STATUS +.Nm +exits with a status of 0, or non-zero if an error occurred. +.Sh EXAMPLES +.Bl -tag -width 3n +.It Pretty-print a document: +.Nm Fl o +.Ar turtle +.Pa file.ttl +> +.Pa out.ttl +.It Print only errors and discard the output: +.Nm Fl O +.Ar empty +.Pa input.ttl +.El +.Sh SEE ALSO +.Bl -item -compact +.It +.Lk http://drobilla.net/software/serd/ +.It +.Lk http://gitlab.com/drobilla/serd/ +.El +.Sh STANDARDS +.Bl -item +.It +.Rs +.%A W3C +.%T RDF 1.1 NQuads +.%D February 2014 +.Re +.Lk https://www.w3.org/TR/n-quads/ +.It +.Rs +.%A W3C +.%D February 2014 +.%T RDF 1.1 NTriples +.Re +.Lk https://www.w3.org/TR/n-triples/ +.It +.Rs +.%A W3C +.%T RDF 1.1 TriG +.%D February 2014 +.Re +.Lk https://www.w3.org/TR/trig/ +.It +.Rs +.%A W3C +.%D February 2014 +.%T RDF 1.1 Turtle +.Re +.Lk https://www.w3.org/TR/turtle/ +.El +.Sh AUTHORS +.Nm +is a part of serd, by +.An David Robillard +.Mt d@drobilla.net . diff --git a/doc/man/serdi.1 b/doc/man/serdi.1 deleted file mode 100644 index 7822832a..00000000 --- a/doc/man/serdi.1 +++ /dev/null @@ -1,210 +0,0 @@ -.\" Copyright 2011-2022 David Robillard -.\" SPDX-License-Identifier: ISC -.Dd April 30, 2023 -.Dt SERDI 1 -.Os Serd 1.1.1 -.Sh NAME -.Nm serdi -.Nd read and write RDF syntax -.Sh SYNOPSIS -.Nm serdi -.Op Fl abefhlqtv -.Op Fl c Ar prefix -.Op Fl i Ar syntax -.Op Fl k Ar bytes -.Op Fl o Ar syntax -.Op Fl p Ar prefix -.Op Fl r Ar root -.Op Fl s Ar string -.Ar input -.Op Ar base_uri -.Sh DESCRIPTION -.Nm -is a fast command-line utility for streaming and processing RDF data. -It reads an RDF document and writes the data to stdout, -possibly transformed and/or in a different syntax. -By default, -the input syntax is guessed from the file extension, -and output is written in NTriples or NQuads. -.Pp -.Nm -can be used to check for syntax errors, -convert from one syntax to another, -pretty-print documents, -or transform URIs and blank node IDs. -.Pp -The options are as follows: -.Bl -tag -width 3n -.It Fl a -Write ASCII output. -If this is enabled, all non-ASCII characters will be escaped, even if the output syntax allows them to be written in UTF-8. -.It Fl b -Bulk output writing. -If this is enabled, output will be written a page at a time, rather than a byte at a time. -.It Fl c Ar prefix -Chop -.Ar prefix -from matching blank node IDs. -This is typically used to revert the effects of -.Fl p . -For example, with -.Ar prefix -.Dq doc01 , -the blank node -.Li _:doc01b42 -will be emitted as -.Li _:b42 . -.It Fl e -Eat input one character at a time, rather than a page at a time which is the default. -This is useful when reading from a pipe since output will be generated immediately as input arrives, rather than waiting until an entire page of input has arrived. -With this option serdi uses one page less memory, but will likely be significantly slower. -.It Fl f -Fast and loose URI mode: -preserve full URIs (without qualifying or making relative), -and pass prefixed names through as-is. -.It Fl h -Print the command line options. -.It Fl i Ar syntax -Read input as -.Ar syntax . -Case is ignored, valid values are: -.Dq NQuads , -.Dq NTriples , -.Dq TriG , -and -.Dq Turtle . -.It Fl k Ar bytes -Parser stack size. -Parsing is performed using a pre-allocated stack for performance and security reasons. -By default, the stack is 1 MiB, which should be sufficient for most data. -This can be increased to support unusually structured data and huge literals, -or decreased to reduce overall memory requirements and reduce startup time. -.It Fl l -Lax (non-strict) parsing. -If this is enabled, recoverable syntax errors will print a warning, but parsing will proceed starting at the next statement if possible. -Note that data may be lost when using this option. -.It Fl o Ar syntax -Write output as -.Ar syntax . -Case is ignored, valid values are: -.Dq empty , -.Dq NQuads , -.Dq NTriples , -.Dq TriG , -and -.Dq Turtle . -The -.Cm empty -syntax suppresses the output, -so that only warnings and errors will be printed. -.It Fl p Ar prefix -Add -.Ar prefix -to blank node IDs. -This can be used to avoid clashes between blank node IDs in input documents. -The effects can be reversed in a later run with -.Fl c . -For example, with -.Ar prefix -.Dq doc01 , -the blank node -.Li _:b42 -will be emitted as -.Li _:doc01b42 . -.It Fl q -Suppress all output except data. -.It Fl r Ar root -Keep relative URIs within a -.Ar root -URI. -This will avoid creating any relative URI references with leading path segments like -.Pa ../ -that enter a parent of -.Ar root . -.Pp -For example, -if -.Pa /home/you/file.ttl -is written to the file -.Pa /home/me/output.ttl -using the destination's base URI, -then it could be written as -.Li <../you/file.ttl> . -Setting -.Fl r Li file:///home/me/ -would prevent references from -.Dq escaping -like this, -so the above would instead be written as -.Li , -since it can't be expressed relative to the root URI. -.Pp -This is useful for keeping relative references within some directory. -.It Fl s Ar string -Parse -.Ar string -input instead of a file (terminates options). -.It Fl t -Write terser output without newlines. -.It Fl v -Display version information and exit. -.El -.Sh EXIT STATUS -.Nm -exits with a status of 0, or non-zero if an error occurred. -.Sh EXAMPLES -.Bl -tag -width 3n -.It Pretty-print a document: -.Nm Fl o -.Ar turtle -.Pa file.ttl -> -.Pa out.ttl -.It Print only errors and discard the output: -.Nm Fl O -.Ar empty -.Pa input.ttl -.El -.Sh SEE ALSO -.Bl -item -compact -.It -.Lk http://drobilla.net/software/serd/ -.It -.Lk http://gitlab.com/drobilla/serd/ -.El -.Sh STANDARDS -.Bl -item -.It -.Rs -.%A W3C -.%T RDF 1.1 NQuads -.%D February 2014 -.Re -.Lk https://www.w3.org/TR/n-quads/ -.It -.Rs -.%A W3C -.%D February 2014 -.%T RDF 1.1 NTriples -.Re -.Lk https://www.w3.org/TR/n-triples/ -.It -.Rs -.%A W3C -.%T RDF 1.1 TriG -.%D February 2014 -.Re -.Lk https://www.w3.org/TR/trig/ -.It -.Rs -.%A W3C -.%D February 2014 -.%T RDF 1.1 Turtle -.Re -.Lk https://www.w3.org/TR/turtle/ -.El -.Sh AUTHORS -.Nm -is a part of serd, by -.An David Robillard -.Mt d@drobilla.net . diff --git a/doc/serd-memory.svg b/doc/serd-memory.svg new file mode 100644 index 00000000..db837a07 --- /dev/null +++ b/doc/serd-memory.svg @@ -0,0 +1,288 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/serd-memory.txt b/doc/serd-memory.txt new file mode 100644 index 00000000..6749642a --- /dev/null +++ b/doc/serd-memory.txt @@ -0,0 +1,11 @@ +n serd-pipe rapper riot +1000000 1634304.0 492728320.0 589537280.0 +2000000 1523712.0 972505088.0 750370816.0 +3000000 1409024.0 1450397696.0 905318400.0 +4000000 1556480.0 1926307840.0 772521984.0 +5000000 1585152.0 2403586048.0 948658176.0 +6000000 1581056.0 2877878272.0 948121600.0 +7000000 1622016.0 3351150592.0 957530112.0 +8000000 1527808.0 3821490176.0 950579200.0 +9000000 1531904.0 4294303744.0 949751808.0 +10000000 1687552.0 4764184576.0 977002496.0 diff --git a/doc/serd-throughput.svg b/doc/serd-throughput.svg new file mode 100644 index 00000000..e32c3355 --- /dev/null +++ b/doc/serd-throughput.svg @@ -0,0 +1,296 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/serd-throughput.txt b/doc/serd-throughput.txt new file mode 100644 index 00000000..547c1f51 --- /dev/null +++ b/doc/serd-throughput.txt @@ -0,0 +1,11 @@ +n serd-pipe rapper riot +1000000 833333 121654 87796 +2000000 843881 119189 130804 +3000000 835654 115651 150602 +4000000 852878 116482 169348 +5000000 863557 112309 174703 +6000000 845070 113981 182592 +7000000 838323 112377 190891 +8000000 854700 110314 193143 +9000000 837209 112107 205198 +10000000 829875 111321 203252 diff --git a/doc/serd-time.svg b/doc/serd-time.svg new file mode 100644 index 00000000..3b8e5523 --- /dev/null +++ b/doc/serd-time.svg @@ -0,0 +1,299 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/serd-time.txt b/doc/serd-time.txt new file mode 100644 index 00000000..3482ef6f --- /dev/null +++ b/doc/serd-time.txt @@ -0,0 +1,11 @@ +n serd-pipe rapper riot +1000000 1.2000000 8.2200000 11.3900000 +2000000 2.3700000 16.7800000 15.2900000 +3000000 3.5900000 25.9400000 19.9200000 +4000000 4.6900000 34.3400000 23.6200000 +5000000 5.7900000 44.5200000 28.6200000 +6000000 7.1000000 52.6400000 32.8600000 +7000000 8.3500000 62.2900000 36.6700000 +8000000 9.3600000 72.5200000 41.4200000 +9000000 10.7500000 80.2800000 43.8600000 +10000000 12.0500000 89.8300000 49.2000000 diff --git a/doc/serdi-memory.svg b/doc/serdi-memory.svg deleted file mode 100644 index db837a07..00000000 --- a/doc/serdi-memory.svg +++ /dev/null @@ -1,288 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/doc/serdi-memory.txt b/doc/serdi-memory.txt deleted file mode 100644 index 82cac3a4..00000000 --- a/doc/serdi-memory.txt +++ /dev/null @@ -1,11 +0,0 @@ -n serdi rapper riot -1000000 1634304.0 492728320.0 589537280.0 -2000000 1523712.0 972505088.0 750370816.0 -3000000 1409024.0 1450397696.0 905318400.0 -4000000 1556480.0 1926307840.0 772521984.0 -5000000 1585152.0 2403586048.0 948658176.0 -6000000 1581056.0 2877878272.0 948121600.0 -7000000 1622016.0 3351150592.0 957530112.0 -8000000 1527808.0 3821490176.0 950579200.0 -9000000 1531904.0 4294303744.0 949751808.0 -10000000 1687552.0 4764184576.0 977002496.0 diff --git a/doc/serdi-throughput.svg b/doc/serdi-throughput.svg deleted file mode 100644 index e32c3355..00000000 --- a/doc/serdi-throughput.svg +++ /dev/null @@ -1,296 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/doc/serdi-throughput.txt b/doc/serdi-throughput.txt deleted file mode 100644 index cb0d2678..00000000 --- a/doc/serdi-throughput.txt +++ /dev/null @@ -1,11 +0,0 @@ -n serdi rapper riot -1000000 833333 121654 87796 -2000000 843881 119189 130804 -3000000 835654 115651 150602 -4000000 852878 116482 169348 -5000000 863557 112309 174703 -6000000 845070 113981 182592 -7000000 838323 112377 190891 -8000000 854700 110314 193143 -9000000 837209 112107 205198 -10000000 829875 111321 203252 diff --git a/doc/serdi-time.svg b/doc/serdi-time.svg deleted file mode 100644 index 3b8e5523..00000000 --- a/doc/serdi-time.svg +++ /dev/null @@ -1,299 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/doc/serdi-time.txt b/doc/serdi-time.txt deleted file mode 100644 index dd0022bd..00000000 --- a/doc/serdi-time.txt +++ /dev/null @@ -1,11 +0,0 @@ -n serdi rapper riot -1000000 1.2000000 8.2200000 11.3900000 -2000000 2.3700000 16.7800000 15.2900000 -3000000 3.5900000 25.9400000 19.9200000 -4000000 4.6900000 34.3400000 23.6200000 -5000000 5.7900000 44.5200000 28.6200000 -6000000 7.1000000 52.6400000 32.8600000 -7000000 8.3500000 62.2900000 36.6700000 -8000000 9.3600000 72.5200000 41.4200000 -9000000 10.7500000 80.2800000 43.8600000 -10000000 12.0500000 89.8300000 49.2000000 diff --git a/meson.build b/meson.build index 2c440224..642da5a9 100644 --- a/meson.build +++ b/meson.build @@ -232,7 +232,7 @@ install_headers(c_headers, subdir: versioned_name / 'serd') # Tools # ######### -# Build serdi command line utility +# Build serd-pipe command line utility if not get_option('tools').disabled() subdir('tools') endif diff --git a/scripts/serd_bench.py b/scripts/serd_bench.py index e342976b..1bd61855 100755 --- a/scripts/serd_bench.py +++ b/scripts/serd_bench.py @@ -171,9 +171,9 @@ def run(progs, n_min, n_max, step): "Benchmark each program with n_min ... n_max statements" with WorkingDirectory("build"): results = { - "time": open("serdi-time.txt", "w"), - "throughput": open("serdi-throughput.txt", "w"), - "memory": open("serdi-memory.txt", "w"), + "time": open("serd-time.txt", "w"), + "throughput": open("serd-throughput.txt", "w"), + "memory": open("serd-memory.txt", "w"), } # Write TSV header for all output files @@ -205,7 +205,7 @@ def run(progs, n_min, n_max, step): f.write("\t".join(rows[name]) + "\n") for name, f in results.items(): - tsv_filename = "serdi-%s.txt" % name + tsv_filename = "serd-pipe-%s.txt" % name sys.stderr.write("wrote %s\n" % tsv_filename) @@ -213,20 +213,20 @@ def plot_results(): "Plot all benchmark results" with WorkingDirectory("build"): plot( - open("serdi-time.txt", "r"), - "serdi-time.svg", + open("serd-time.txt", "r"), + "serd-time.svg", "Statements", "Time (s)", ) plot( - open("serdi-throughput.txt", "r"), - "serdi-throughput.svg", + open("serd-throughput.txt", "r"), + "serd-throughput.svg", "Statements", "Statements / s", ) plot( - open("serdi-memory.txt", "r"), - "serdi-memory.svg", + open("serd-memory.txt", "r"), + "serd-memory.svg", "Statements", "Bytes", ) @@ -271,7 +271,7 @@ example: args = ap.parse_args(sys.argv[1:]) - progs = ["serdi -b -f -i turtle -o turtle"] + args.run + progs = ["serd-pipe -b -f -i turtle -o turtle"] + args.run min_n = int(args.max / args.steps) max_n = args.max step = min_n diff --git a/serd.ttl b/serd.ttl index d3255be6..eea87c29 100644 --- a/serd.ttl +++ b/serd.ttl @@ -16,7 +16,7 @@ doap:homepage ; doap:license ; doap:shortdesc "Lightweight C library for working with RDF data" ; - doap:description "Serd is a lightweight C library for working with RDF data. Serd can be used by high-performance or resource-limited applications to read or write Turtle, TriG, NTriples, and NQuads. The included `serdi` tool can be used to efficiently process RDF documents in scripts or on the command-line." ; + doap:description "Serd is a fast and lightweight C library for reading and writing RDF in Turtle, NTriples, TriG, and NQuads." ; doap:created "2011-09-28"^^xsd:date ; doap:programming-language "C" ; doap:implements , diff --git a/test/meson.build b/test/meson.build index 496296c1..2178ce1f 100644 --- a/test/meson.build +++ b/test/meson.build @@ -99,12 +99,12 @@ if get_option('lint') test('pylint', pylint, args: pylint_args + pylint_scripts, suite: 'scripts') endif - # Check Turtle formatting with serdi + # Check Turtle formatting with serd-pipe foreach ttl_file_path : ttl_metadata_file_paths test( ttl_file_path.underscorify(), check_formatting_py, - args: [files(ttl_file_path), serdi, '-o', 'turtle'], + args: [files(ttl_file_path), serd_pipe, '-o', 'turtle'], suite: 'data', ) endforeach @@ -170,7 +170,7 @@ if wrapper != '' endif simple_command_tests = { - 'serdi': { + 'pipe': { 'bad': [ ['-c'], ['-fi'], @@ -197,22 +197,22 @@ simple_command_tests = { }, } -if is_variable('serdi') - script_args = common_script_args + ['--serdi', serdi] +if is_variable('serd_pipe') + pipe_script_args = common_script_args + ['--tool', serd_pipe] serd_ttl = files('../serd.ttl')[0] bad_input_file = files('extra/bad/bad-base.ttl') - test('serd_ttl', serdi, args: [serd_ttl], env: test_env, suite: 'data') + test('serd_ttl', serd_pipe, args: [serd_ttl], env: test_env, suite: 'data') # Command line options - cmd_suite = ['serdi', 'options'] + cmd_suite = ['tools', 'pipe', 'options'] - foreach kind, cases : simple_command_tests['serdi'] + foreach kind, cases : simple_command_tests['pipe'] foreach args : cases test( ' '.join(args).substring(1).underscorify(), - serdi, + serd_pipe, args: args, env: test_env, should_fail: kind == 'bad', @@ -221,19 +221,19 @@ if is_variable('serdi') endforeach endforeach - test('none', serdi, env: test_env, should_fail: true, suite: cmd_suite) + test('none', serd_pipe, env: test_env, should_fail: true, suite: cmd_suite) test( 'quiet', files('test_quiet.py'), - args: script_args + [bad_input_file], + args: pipe_script_args + [bad_input_file], env: test_env, suite: cmd_suite, ) # Inputs - input_suite = ['serdi', 'input'] + input_suite = ['tools', 'pipe', 'input'] bad_input_tests = { 'string': ['-s', ' a .'], @@ -244,7 +244,7 @@ if is_variable('serdi') foreach name, args : bad_input_tests test( name, - serdi, + serd_pipe, args: args, env: test_env, should_fail: true, @@ -255,7 +255,7 @@ if is_variable('serdi') test( 'stdin', files('test_stdin.py'), - args: script_args, + args: pipe_script_args, env: test_env, suite: input_suite, ) @@ -265,7 +265,7 @@ if is_variable('serdi') test( 'empty', files('test_empty.py'), - args: script_args + [serd_ttl], + args: pipe_script_args + [serd_ttl], env: test_env, suite: 'output', ) @@ -279,13 +279,20 @@ if is_variable('serdi') } foreach name, args : io_error_tests - test(name, serdi, args: args, env: test_env, should_fail: true, suite: 'io') + test( + name, + serd_pipe, + args: args, + env: test_env, + should_fail: true, + suite: 'io', + ) endforeach test( 'write_error', files('test_write_error.py'), - args: script_args + [serd_ttl], + args: pipe_script_args + [serd_ttl], env: test_env, suite: 'io', ) @@ -293,7 +300,7 @@ if is_variable('serdi') if host_machine.system() == 'linux' test( 'unreadable', - serdi, + serd_pipe, args: ['/sys/bus/pci/rescan'], env: test_env, should_fail: true, @@ -441,15 +448,15 @@ test_suites = { ], } -# Run every test suite with serdi -if is_variable('serdi') - script_args = common_script_args + ['--serdi', serdi] +# Run every test suite with serd-pipe +if is_variable('serd_pipe') + script_args = common_script_args + ['--tool', serd_pipe] foreach name, args : test_suites test( name, run_suite, - args: script_args + args, + args: pipe_script_args + args, env: test_env, suite: ['suite'], timeout: 240, diff --git a/test/run_suite.py b/test/run_suite.py index b44808b7..fdda0625 100755 --- a/test/run_suite.py +++ b/test/run_suite.py @@ -154,14 +154,14 @@ def main(): parser.add_argument("--lax", action="store_true", help="tolerate errors") parser.add_argument("--report", help="path to write result report to") parser.add_argument("--reverse", action="store_true", help="reverse test") - parser.add_argument("--serdi", default="tools/serdi", help="path to serdi") + parser.add_argument("--tool", default="tools/serd-pipe", help="executable") parser.add_argument("--wrapper", default="", help="executable wrapper") parser.add_argument("manifest", help="test suite manifest.ttl file") parser.add_argument("base_uri", help="base URI for tests") - parser.add_argument("arg", nargs=argparse.REMAINDER, help="serdi argument") + parser.add_argument("arg", nargs=argparse.REMAINDER, help="tool argument") args = parser.parse_args(sys.argv[1:]) - command = shlex.split(args.wrapper) + [args.serdi] + command = shlex.split(args.wrapper) + [args.tool] with tempfile.TemporaryDirectory() as temp: return run_suite(args, command, temp) diff --git a/test/serd_test_util/__init__.py b/test/serd_test_util/__init__.py index 4f426009..ac831054 100644 --- a/test/serd_test_util/__init__.py +++ b/test/serd_test_util/__init__.py @@ -57,7 +57,7 @@ def wrapper_args(description, with_input=False): """Return the command line arguments for a wrapped test.""" parser = argparse.ArgumentParser(description) - parser.add_argument("--serdi", default="tools/serdi", help="executable") + parser.add_argument("--tool", default="tools/serd-pipe", help="executable") parser.add_argument("--wrapper", default="", help="executable wrapper") if with_input: parser.add_argument("input", help="input file") @@ -149,7 +149,7 @@ def earl_assertion(test, passed, asserter): def load_rdf(filename, base_uri, command_prefix): - """Load an RDF file as dictionaries via serdi (only supports URIs).""" + """Load an RDF file as dictionaries via serd-pipe (only supports URIs).""" rdf_type = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type" model = {} diff --git a/test/test_empty.py b/test/test_empty.py index 3cd5517b..0ee641d5 100755 --- a/test/test_empty.py +++ b/test/test_empty.py @@ -14,7 +14,7 @@ import tempfile import serd_test_util as util args = util.wrapper_args(__doc__, True) -command = shlex.split(args.wrapper) + [args.serdi, "-o", "empty", args.input] +command = shlex.split(args.wrapper) + [args.tool, "-o", "empty", args.input] with tempfile.TemporaryFile() as out: proc = subprocess.run(command, check=False, stdout=out) diff --git a/test/test_quiet.py b/test/test_quiet.py index 676284bb..a58779ea 100755 --- a/test/test_quiet.py +++ b/test/test_quiet.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -# Copyright 2022 David Robillard +# Copyright 2022-2023 David Robillard # SPDX-License-Identifier: ISC """Test quiet command-line option.""" @@ -11,7 +11,7 @@ import subprocess import serd_test_util as util args = util.wrapper_args(__doc__, True) -command = shlex.split(args.wrapper) + [args.serdi, "-q", args.input] +command = shlex.split(args.wrapper) + [args.tool, "-q", args.input] proc = subprocess.run( command, check=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE ) diff --git a/test/test_stdin.py b/test/test_stdin.py index fb01f4ee..9975c547 100755 --- a/test/test_stdin.py +++ b/test/test_stdin.py @@ -3,14 +3,14 @@ # Copyright 2022-2023 David Robillard # SPDX-License-Identifier: ISC -"""Test reading from stdin with serdi.""" +"""Test reading from stdin with serd-pipe.""" # pylint: disable=consider-using-f-string import serd_test_util as util args = util.wrapper_args(__doc__) -command = [args.serdi, "-i", "ntriples", "-", "http://example.org"] +command = [args.tool, "-i", "ntriples", "-", "http://example.org"] DOC = "<{0}s> <{0}p> <{0}o> .".format("http://example.org/") diff --git a/test/test_write_error.py b/test/test_write_error.py index b62f981a..7d165a6a 100755 --- a/test/test_write_error.py +++ b/test/test_write_error.py @@ -13,7 +13,7 @@ import os import serd_test_util as util args = util.wrapper_args(__doc__, True) -command = shlex.split(args.wrapper) + [args.serdi, args.input] +command = shlex.split(args.wrapper) + [args.tool, args.input] if os.path.exists("/dev/full"): with open("/dev/full", "w", encoding="utf-8") as out: diff --git a/tools/meson.build b/tools/meson.build index 49fa32c7..ce82b212 100644 --- a/tools/meson.build +++ b/tools/meson.build @@ -1,4 +1,4 @@ -# Copyright 2021-2022 David Robillard +# Copyright 2021-2023 David Robillard # SPDX-License-Identifier: 0BSD OR ISC tool_c_args = c_suppressions + platform_c_args @@ -8,16 +8,13 @@ if get_option('static') and cc.get_id() != 'msvc' tool_link_args += ['-static'] endif -serdi = executable( - 'serdi', - files( - 'console.c', - 'serdi.c', - ), +serd_pipe = executable( + 'serd-pipe', + files('console.c', 'serd-pipe.c'), c_args: tool_c_args, dependencies: [serd_dep, zix_dep], install: true, link_args: tool_link_args, ) -meson.override_find_program('serdi', serdi) +meson.override_find_program('serd-pipe', serd_pipe) diff --git a/tools/serd-pipe.c b/tools/serd-pipe.c new file mode 100644 index 00000000..26a4a26a --- /dev/null +++ b/tools/serd-pipe.c @@ -0,0 +1,290 @@ +// Copyright 2011-2023 David Robillard +// SPDX-License-Identifier: ISC + +#include "console.h" + +#include "serd/env.h" +#include "serd/error.h" +#include "serd/input_stream.h" +#include "serd/node.h" +#include "serd/output_stream.h" +#include "serd/reader.h" +#include "serd/status.h" +#include "serd/stream.h" +#include "serd/string_view.h" +#include "serd/syntax.h" +#include "serd/world.h" +#include "serd/writer.h" + +#include +#include +#include +#include +#include + +#define SERDI_ERROR(msg) fprintf(stderr, "serd-pipe: " msg) +#define SERDI_ERRORF(fmt, ...) fprintf(stderr, "serd-pipe: " fmt, __VA_ARGS__) + +#define MAX_DEPTH 128U + +static int +print_usage(const char* const name, const bool error) +{ + static const char* const description = + "Read and write RDF syntax.\n" + "Use - for INPUT to read from standard input.\n\n" + " -a Write ASCII output.\n" + " -b Write output in blocks for performance.\n" + " -c PREFIX Chop PREFIX from matching blank node IDs.\n" + " -e Eat input one character at a time.\n" + " -f Fast and loose URI pass-through.\n" + " -h Display this help and exit.\n" + " -i SYNTAX Input syntax: turtle/ntriples/trig/nquads.\n" + " -k BYTES Parser stack size.\n" + " -l Lax (non-strict) parsing.\n" + " -o SYNTAX Output syntax: empty/turtle/ntriples/nquads.\n" + " -p PREFIX Add PREFIX to blank node IDs.\n" + " -q Suppress all output except data.\n" + " -r ROOT_URI Keep relative URIs within ROOT_URI.\n" + " -s INPUT Parse INPUT as string (terminates options).\n" + " -t Write terser output without newlines.\n" + " -v Display version information and exit.\n"; + + FILE* const os = error ? stderr : stdout; + fprintf(os, "%s", error ? "\n" : ""); + fprintf(os, "Usage: %s [OPTION]... INPUT [BASE_URI]\n", name); + fprintf(os, "%s", description); + return error ? 1 : 0; +} + +static int +missing_arg(const char* const name, const char opt) +{ + SERDI_ERRORF("option requires an argument -- '%c'\n", opt); + return print_usage(name, true); +} + +static SerdStatus +quiet_error_func(void* const handle, const SerdError* const e) +{ + (void)handle; + (void)e; + return SERD_SUCCESS; +} + +int +main(int argc, char** argv) +{ + const char* const prog = argv[0]; + + SerdSyntax input_syntax = SERD_SYNTAX_EMPTY; + SerdSyntax output_syntax = SERD_SYNTAX_EMPTY; + SerdReaderFlags reader_flags = 0; + SerdWriterFlags writer_flags = 0; + bool from_string = false; + bool from_stdin = false; + bool bulk_read = true; + bool bulk_write = false; + bool osyntax_set = false; + bool quiet = false; + size_t stack_size = 1048576U; + const char* add_prefix = NULL; + const char* chop_prefix = NULL; + const char* root_uri = NULL; + int a = 1; + for (; a < argc && !from_string && argv[a][0] == '-'; ++a) { + if (argv[a][1] == '\0') { + from_stdin = true; + break; + } + + if (!strcmp(argv[a], "--help")) { + return print_usage(prog, false); + } + + if (!strcmp(argv[a], "--version")) { + return serd_print_version(argv[0]); + } + + for (int o = 1; argv[a][o]; ++o) { + const char opt = argv[a][o]; + + if (opt == 'a') { + writer_flags |= SERD_WRITE_ASCII; + } else if (opt == 'b') { + bulk_write = true; + } else if (opt == 'e') { + bulk_read = false; + } else if (opt == 'f') { + writer_flags |= (SERD_WRITE_UNQUALIFIED | SERD_WRITE_UNRESOLVED); + } else if (opt == 'h') { + return print_usage(prog, false); + } else if (opt == 'l') { + reader_flags |= SERD_READ_LAX; + writer_flags |= SERD_WRITE_LAX; + } else if (opt == 'q') { + quiet = true; + } else if (opt == 't') { + writer_flags |= SERD_WRITE_TERSE; + } else if (opt == 'v') { + return serd_print_version(argv[0]); + } else if (opt == 's') { + from_string = true; + break; + } else if (opt == 'c') { + if (argv[a][o + 1] || ++a == argc) { + return missing_arg(prog, 'c'); + } + + chop_prefix = argv[a]; + break; + } else if (opt == 'i') { + if (argv[a][o + 1] || ++a == argc) { + return missing_arg(prog, 'i'); + } + + if (!(input_syntax = serd_syntax_by_name(argv[a]))) { + return print_usage(prog, true); + } + break; + } else if (opt == 'k') { + if (argv[a][o + 1] || ++a == argc) { + return missing_arg(prog, 'k'); + } + + char* endptr = NULL; + const long size = strtol(argv[a], &endptr, 10); + if (size <= 0 || size == LONG_MAX || *endptr != '\0') { + SERDI_ERRORF("invalid stack size '%s'\n", argv[a]); + return 1; + } + stack_size = (size_t)size; + break; + } else if (opt == 'o') { + osyntax_set = true; + if (argv[a][o + 1] || ++a == argc) { + return missing_arg(prog, 'o'); + } + + if (!strcmp(argv[a], "empty")) { + output_syntax = SERD_SYNTAX_EMPTY; + } else if (!(output_syntax = serd_syntax_by_name(argv[a]))) { + return print_usage(argv[0], true); + } + break; + } else if (opt == 'p') { + if (argv[a][o + 1] || ++a == argc) { + return missing_arg(prog, 'p'); + } + + add_prefix = argv[a]; + break; + } else if (opt == 'r') { + if (argv[a][o + 1] || ++a == argc) { + return missing_arg(prog, 'r'); + } + + root_uri = argv[a]; + break; + } else { + SERDI_ERRORF("invalid option -- '%s'\n", argv[a] + 1); + return print_usage(prog, true); + } + } + } + + if (a == argc) { + SERDI_ERROR("missing input\n"); + return print_usage(prog, true); + } + + serd_set_stream_utf8_mode(stdin); + serd_set_stream_utf8_mode(stdout); + + const char* input = argv[a++]; + + if (!input_syntax && !(input_syntax = serd_guess_syntax(input))) { + input_syntax = SERD_TRIG; + } + + const bool input_has_graphs = serd_syntax_has_graphs(input_syntax); + if (!output_syntax && !osyntax_set) { + output_syntax = input_has_graphs ? SERD_NQUADS : SERD_NTRIPLES; + } + + SerdNode* base = NULL; + if (a < argc) { // Base URI given on command line + base = serd_new_uri(serd_string(argv[a])); + } else if (!from_string && !from_stdin) { // Use input file URI + base = serd_new_file_uri(serd_string(input), serd_empty_string()); + } + + FILE* const out_fd = stdout; + SerdWorld* const world = serd_world_new(); + SerdEnv* const env = + serd_env_new(base ? serd_node_string_view(base) : serd_empty_string()); + + SerdOutputStream out = serd_open_output_stream((SerdWriteFunc)fwrite, + (SerdErrorFunc)ferror, + (SerdCloseFunc)fclose, + out_fd); + + SerdWriter* const writer = serd_writer_new( + world, output_syntax, writer_flags, env, &out, bulk_write ? 4096U : 1U); + + const SerdLimits limits = {stack_size, MAX_DEPTH}; + serd_world_set_limits(world, limits); + + SerdReader* const reader = serd_reader_new( + world, input_syntax, reader_flags, serd_writer_sink(writer)); + + if (quiet) { + serd_world_set_error_func(world, quiet_error_func, NULL); + } + + if (root_uri) { + serd_writer_set_root_uri(writer, serd_string(root_uri)); + } + + serd_writer_chop_blank_prefix(writer, chop_prefix); + serd_reader_add_blank_prefix(reader, add_prefix); + + SerdStatus st = SERD_SUCCESS; + SerdNode* input_name = NULL; + const char* position = NULL; + SerdInputStream in = {NULL, NULL, NULL, NULL}; + size_t block_size = 1U; + if (from_string) { + position = input; + in = serd_open_input_string(&position); + input_name = serd_new_string(serd_string("string")); + } else if (from_stdin) { + in = serd_open_input_stream( + (SerdReadFunc)fread, (SerdErrorFunc)ferror, (SerdCloseFunc)fclose, stdin); + input_name = serd_new_string(serd_string("stdin")); + } else { + block_size = bulk_read ? 4096U : 1U; + in = serd_open_input_file(input); + input_name = serd_new_string(serd_string(input)); + } + + if (!(st = serd_reader_start(reader, &in, input_name, block_size))) { + st = serd_reader_read_document(reader); + } + + serd_reader_finish(reader); + serd_reader_free(reader); + serd_writer_finish(writer); + serd_writer_free(writer); + serd_node_free(input_name); + serd_env_free(env); + serd_node_free(base); + serd_world_free(world); + + if (fclose(stdout)) { + perror("serd-pipe: write error"); + st = SERD_BAD_STREAM; + } + + return (st > SERD_FAILURE) ? 1 : 0; +} diff --git a/tools/serdi.c b/tools/serdi.c deleted file mode 100644 index fd7fdb15..00000000 --- a/tools/serdi.c +++ /dev/null @@ -1,290 +0,0 @@ -// Copyright 2011-2023 David Robillard -// SPDX-License-Identifier: ISC - -#include "console.h" - -#include "serd/env.h" -#include "serd/error.h" -#include "serd/input_stream.h" -#include "serd/node.h" -#include "serd/output_stream.h" -#include "serd/reader.h" -#include "serd/status.h" -#include "serd/stream.h" -#include "serd/string_view.h" -#include "serd/syntax.h" -#include "serd/world.h" -#include "serd/writer.h" - -#include -#include -#include -#include -#include - -#define SERDI_ERROR(msg) fprintf(stderr, "serdi: " msg) -#define SERDI_ERRORF(fmt, ...) fprintf(stderr, "serdi: " fmt, __VA_ARGS__) - -#define MAX_DEPTH 128U - -static int -print_usage(const char* const name, const bool error) -{ - static const char* const description = - "Read and write RDF syntax.\n" - "Use - for INPUT to read from standard input.\n\n" - " -a Write ASCII output.\n" - " -b Write output in blocks for performance.\n" - " -c PREFIX Chop PREFIX from matching blank node IDs.\n" - " -e Eat input one character at a time.\n" - " -f Fast and loose URI pass-through.\n" - " -h Display this help and exit.\n" - " -i SYNTAX Input syntax: turtle/ntriples/trig/nquads.\n" - " -k BYTES Parser stack size.\n" - " -l Lax (non-strict) parsing.\n" - " -o SYNTAX Output syntax: empty/turtle/ntriples/nquads.\n" - " -p PREFIX Add PREFIX to blank node IDs.\n" - " -q Suppress all output except data.\n" - " -r ROOT_URI Keep relative URIs within ROOT_URI.\n" - " -s INPUT Parse INPUT as string (terminates options).\n" - " -t Write terser output without newlines.\n" - " -v Display version information and exit.\n"; - - FILE* const os = error ? stderr : stdout; - fprintf(os, "%s", error ? "\n" : ""); - fprintf(os, "Usage: %s [OPTION]... INPUT [BASE_URI]\n", name); - fprintf(os, "%s", description); - return error ? 1 : 0; -} - -static int -missing_arg(const char* const name, const char opt) -{ - SERDI_ERRORF("option requires an argument -- '%c'\n", opt); - return print_usage(name, true); -} - -static SerdStatus -quiet_error_func(void* const handle, const SerdError* const e) -{ - (void)handle; - (void)e; - return SERD_SUCCESS; -} - -int -main(int argc, char** argv) -{ - const char* const prog = argv[0]; - - SerdSyntax input_syntax = SERD_SYNTAX_EMPTY; - SerdSyntax output_syntax = SERD_SYNTAX_EMPTY; - SerdReaderFlags reader_flags = 0; - SerdWriterFlags writer_flags = 0; - bool from_string = false; - bool from_stdin = false; - bool bulk_read = true; - bool bulk_write = false; - bool osyntax_set = false; - bool quiet = false; - size_t stack_size = 1048576U; - const char* add_prefix = NULL; - const char* chop_prefix = NULL; - const char* root_uri = NULL; - int a = 1; - for (; a < argc && !from_string && argv[a][0] == '-'; ++a) { - if (argv[a][1] == '\0') { - from_stdin = true; - break; - } - - if (!strcmp(argv[a], "--help")) { - return print_usage(prog, false); - } - - if (!strcmp(argv[a], "--version")) { - return serd_print_version(argv[0]); - } - - for (int o = 1; argv[a][o]; ++o) { - const char opt = argv[a][o]; - - if (opt == 'a') { - writer_flags |= SERD_WRITE_ASCII; - } else if (opt == 'b') { - bulk_write = true; - } else if (opt == 'e') { - bulk_read = false; - } else if (opt == 'f') { - writer_flags |= (SERD_WRITE_UNQUALIFIED | SERD_WRITE_UNRESOLVED); - } else if (opt == 'h') { - return print_usage(prog, false); - } else if (opt == 'l') { - reader_flags |= SERD_READ_LAX; - writer_flags |= SERD_WRITE_LAX; - } else if (opt == 'q') { - quiet = true; - } else if (opt == 't') { - writer_flags |= SERD_WRITE_TERSE; - } else if (opt == 'v') { - return serd_print_version(argv[0]); - } else if (opt == 's') { - from_string = true; - break; - } else if (opt == 'c') { - if (argv[a][o + 1] || ++a == argc) { - return missing_arg(prog, 'c'); - } - - chop_prefix = argv[a]; - break; - } else if (opt == 'i') { - if (argv[a][o + 1] || ++a == argc) { - return missing_arg(prog, 'i'); - } - - if (!(input_syntax = serd_syntax_by_name(argv[a]))) { - return print_usage(prog, true); - } - break; - } else if (opt == 'k') { - if (argv[a][o + 1] || ++a == argc) { - return missing_arg(prog, 'k'); - } - - char* endptr = NULL; - const long size = strtol(argv[a], &endptr, 10); - if (size <= 0 || size == LONG_MAX || *endptr != '\0') { - SERDI_ERRORF("invalid stack size '%s'\n", argv[a]); - return 1; - } - stack_size = (size_t)size; - break; - } else if (opt == 'o') { - osyntax_set = true; - if (argv[a][o + 1] || ++a == argc) { - return missing_arg(prog, 'o'); - } - - if (!strcmp(argv[a], "empty")) { - output_syntax = SERD_SYNTAX_EMPTY; - } else if (!(output_syntax = serd_syntax_by_name(argv[a]))) { - return print_usage(argv[0], true); - } - break; - } else if (opt == 'p') { - if (argv[a][o + 1] || ++a == argc) { - return missing_arg(prog, 'p'); - } - - add_prefix = argv[a]; - break; - } else if (opt == 'r') { - if (argv[a][o + 1] || ++a == argc) { - return missing_arg(prog, 'r'); - } - - root_uri = argv[a]; - break; - } else { - SERDI_ERRORF("invalid option -- '%s'\n", argv[a] + 1); - return print_usage(prog, true); - } - } - } - - if (a == argc) { - SERDI_ERROR("missing input\n"); - return print_usage(prog, true); - } - - serd_set_stream_utf8_mode(stdin); - serd_set_stream_utf8_mode(stdout); - - const char* input = argv[a++]; - - if (!input_syntax && !(input_syntax = serd_guess_syntax(input))) { - input_syntax = SERD_TRIG; - } - - const bool input_has_graphs = serd_syntax_has_graphs(input_syntax); - if (!output_syntax && !osyntax_set) { - output_syntax = input_has_graphs ? SERD_NQUADS : SERD_NTRIPLES; - } - - SerdNode* base = NULL; - if (a < argc) { // Base URI given on command line - base = serd_new_uri(serd_string(argv[a])); - } else if (!from_string && !from_stdin) { // Use input file URI - base = serd_new_file_uri(serd_string(input), serd_empty_string()); - } - - FILE* const out_fd = stdout; - SerdWorld* const world = serd_world_new(); - SerdEnv* const env = - serd_env_new(base ? serd_node_string_view(base) : serd_empty_string()); - - SerdOutputStream out = serd_open_output_stream((SerdWriteFunc)fwrite, - (SerdErrorFunc)ferror, - (SerdCloseFunc)fclose, - out_fd); - - SerdWriter* const writer = serd_writer_new( - world, output_syntax, writer_flags, env, &out, bulk_write ? 4096U : 1U); - - const SerdLimits limits = {stack_size, MAX_DEPTH}; - serd_world_set_limits(world, limits); - - SerdReader* const reader = serd_reader_new( - world, input_syntax, reader_flags, serd_writer_sink(writer)); - - if (quiet) { - serd_world_set_error_func(world, quiet_error_func, NULL); - } - - if (root_uri) { - serd_writer_set_root_uri(writer, serd_string(root_uri)); - } - - serd_writer_chop_blank_prefix(writer, chop_prefix); - serd_reader_add_blank_prefix(reader, add_prefix); - - SerdStatus st = SERD_SUCCESS; - SerdNode* input_name = NULL; - const char* position = NULL; - SerdInputStream in = {NULL, NULL, NULL, NULL}; - size_t block_size = 1U; - if (from_string) { - position = input; - in = serd_open_input_string(&position); - input_name = serd_new_string(serd_string("string")); - } else if (from_stdin) { - in = serd_open_input_stream( - (SerdReadFunc)fread, (SerdErrorFunc)ferror, (SerdCloseFunc)fclose, stdin); - input_name = serd_new_string(serd_string("stdin")); - } else { - block_size = bulk_read ? 4096U : 1U; - in = serd_open_input_file(input); - input_name = serd_new_string(serd_string(input)); - } - - if (!(st = serd_reader_start(reader, &in, input_name, block_size))) { - st = serd_reader_read_document(reader); - } - - serd_reader_finish(reader); - serd_reader_free(reader); - serd_writer_finish(writer); - serd_writer_free(writer); - serd_node_free(input_name); - serd_env_free(env); - serd_node_free(base); - serd_world_free(world); - - if (fclose(stdout)) { - perror("serdi: write error"); - st = SERD_BAD_STREAM; - } - - return (st > SERD_FAILURE) ? 1 : 0; -} -- cgit v1.2.1