diff options
author | David Robillard <d@drobilla.net> | 2021-10-21 15:38:10 -0400 |
---|---|---|
committer | David Robillard <d@drobilla.net> | 2022-01-28 21:57:07 -0500 |
commit | b404312686874e539b617d1f27ccbaa5a82936af (patch) | |
tree | c2fdb2cc046e6da53071629cd1750dcc327e6cd9 | |
parent | d4aec28ba8ad24d5aef3ee12beeb1b805148eab1 (diff) | |
download | serd-b404312686874e539b617d1f27ccbaa5a82936af.tar.gz serd-b404312686874e539b617d1f27ccbaa5a82936af.tar.bz2 serd-b404312686874e539b617d1f27ccbaa5a82936af.zip |
Replace serdi with more fine-grained tools
Especially with the new functionality, the complexity of the command-line
interface alone was really becoming unmanageable. The serdi implementation
also had the highest cyclomatic complexity of the entire codebase by a huge
margin.
So, take a page from the Unix philosophy and split serdi into several more
finely-honed tools that can be freely composed. Though there is still
unfortunately quite a bit of option overlap between them due to the common
details of reading RDF, I think the resulting tools are a lot easier to
understand, both from a user and a developer perspective.
64 files changed, 3228 insertions, 1132 deletions
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 6e9fc1d9..7a745c08 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -47,8 +47,10 @@ x64_dbg: - ninja -C build coverage-html - mkdir -p build/doc/ - cp doc/*.svg build/doc/ - - cp doc/mandoc.css build/doc/ - - mandoc -Thtml -Werror -O style=mandoc.css doc/serdi.1 > build/doc/serdi.html + - cp doc/man/mandoc.css build/doc/ + - mandoc -Thtml -Wwarning -O style=mandoc.css,man=%N.html doc/man/serd-filter.1 > build/doc/serd-filter.html + - mandoc -Thtml -Wwarning -O style=mandoc.css,man=%N.html doc/man/serd-pipe.1 > build/doc/serd-pipe.html + - mandoc -Thtml -Wwarning -O style=mandoc.css,man=%N.html doc/man/serd-sort.1 > build/doc/serd-sort.html artifacts: paths: - build/doc @@ -167,8 +169,7 @@ pages: - mv build/meson-logs/coveragereport/ public/coverage - mv build/doc/c/html/ public/c/html/ - mv build/doc/c/singlehtml/ public/c/singlehtml/ - - mv build/doc/serdi.html public/man/serdi.html - - mv build/doc/mandoc.css public/man/mandoc.css + - mv build/doc/man/ public/man/ dependencies: - x64_dbg artifacts: @@ -36,10 +36,10 @@ Features Performance ----------- -The benchmarks below compare `serdi`, [rapper][], and [riot][] re-serialising +The benchmarks below compare `serd-pipe`, [rapper][], and [riot][] rewriting Turtle data generated by [sp2b][] on an i7-4980HQ running Debian 9. Of the -three, `serdi` is the fastest by a wide margin, and the only one that uses a -constant amount of memory (a single page) for all input sizes. +three, `serd-pipe` is the fastest by a wide margin, and the only one that uses +a constant amount of memory (a single page) for all input sizes. ![Time](doc/serdi-time.svg) ![Throughput](doc/serdi-throughput.svg) diff --git a/doc/serd-filter.1 b/doc/serd-filter.1 new file mode 100644 index 00000000..44b3f861 --- /dev/null +++ b/doc/serd-filter.1 @@ -0,0 +1,185 @@ +.Dd October 21, 2021 +.Dt SERD-FILTER 1 +.Os Serd +.Sh NAME +.Nm serd-filter +.Nd print RDF statements that match a pattern +.Sh SYNOPSIS +.Nm serd-filter +.Op Fl hVv +.Op Fl B Ar base +.Op Fl I Ar syntax +.Op Fl O Ar syntax +.Op Fl b Ar bytes +.Op Fl f Ar pattern_file +.Op Fl k Ar bytes +.Op Fl o Ar filename +.Ar pattern +.Ar input ... +.Sh DESCRIPTION +.Nm +scans for statements in RDF data. +Its interface is similar to +.Xr grep 1 , +except patterns are structural: +instead of matching characters within a line, +.Nm +matches nodes within a statement. +.Pp +Data is read from files or standard input, +and only those statements that match the pattern +(or do not match the pattern, if +.Fl v +is given) are written. +By default, +the input syntax is guessed from the file extension, +and line-based output is written to standard output. +.Pp +Patterns are written in NTriples or NQuads with an extension that allows variables like +.Li ?some +or +.Li $thing . +.Pp +The +.Ar input +operands are processed in command-line order. +If +.Ar input +is +.Ar - +or absent, +.Nm +reads from standard input. +.Pp +The options are as follows: +.Pp +.Bl -tag -compact -width 3n +.It Fl B Ar base +Base URI, path, or +.Cm rebase +to use the output path. +See +.Xr serd-pipe 1 +for details. +.Pp +.It Fl I Ar syntax +Input syntax or option: +.Cm NQuads , +.Cm NTriples , +.Cm TriG , +.Cm Turtle , +.Cm lax , +.Cm variables , +.Cm relative , +or +.Cm labels . +See +.Xr serd-pipe 1 +for details. +.Pp +.It Fl O Ar syntax +Output syntax or option: +.Cm empty , +.Cm NQuads , +.Cm NTriples , +.Cm TriG , +.Cm Turtle , +.Cm ascii , +.Cm expanded , +.Cm verbatim , +.Cm terse , +or +.Cm lax . +See +.Xr serd-pipe 1 +for details. +.Pp +.It Fl V +Display version information and exit. +.Pp +.It Fl b Ar bytes +I/O block size. +See +.Xr serd-pipe 1 +for details. +.Pp +.It Fl f Ar pattern_file +Load pattern from +.Ar pattern_file +instead of the first positional argument. +.Pp +.It Fl h +Print the command line options. +.Pp +.It Fl k Ar bytes +Parser stack size. +See +.Xr serd-pipe 1 +for details. +.Pp +.It Fl o Ar filename +Write output to the given +.Ar filename +instead of stdout. +.Pp +.It Fl v +Invert filter to only emit statements that do +.Em not +match the pattern. +.El +.Sh EXIT STATUS +.Nm +exits with a status of 0, or non-zero if an error occured. +.Sh EXAMPLES +To print all type statements: +.Pp +.Dl $ serd-filter '?subject a ?type .' input.ttl +.Pp +To print every statement about http://example.org/subject: +.Pp +.Dl $ serd-filter '<http://example.org/subject> ?p ?o .' input.ttl +.Sh SEE ALSO +.Bl -item -compact +.It +.Xr serd-pipe 1 +.It +.Xr serd-sort 1 +.It +.Lk http://drobilla.net/software/serd/ +.El +.Sh STANDARDS +.Bl -item -compact +.It +.Rs +.%A W3C +.%T RDF 1.1 NQuads +.%D February 2014 +.Re +.Lk https://www.w3.org/TR/n-quads/ +.It +.Rs +.%A W3C +.%D February 2014 +.%T RDF 1.1 NTriples +.Re +.Lk https://www.w3.org/TR/n-triples/ +.It +.Rs +.%A W3C +.%T RDF 1.1 TriG +.%D February 2014 +.Re +.Lk https://www.w3.org/TR/trig/ +.It +.Rs +.%A W3C +.%D February 2014 +.%T RDF 1.1 Turtle +.Re +.Lk https://www.w3.org/TR/turtle/ +.El +.Sh AUTHORS +.Nm +is a part of serd, by +.An David Robillard +.Mt d@drobilla.net . diff --git a/doc/serd-pipe.1 b/doc/serd-pipe.1 new file mode 100644 index 00000000..c7f77c9e --- /dev/null +++ b/doc/serd-pipe.1 @@ -0,0 +1,349 @@ +.Dd October 21, 2021 +.Dt SERD-PIPE 1 +.Os Serd +.Sh NAME +.Nm serd-pipe +.Nd read and write RDF data +.Sh SYNOPSIS +.Nm serd-pipe +.Op Fl ChV +.Op Fl B Ar base +.Op Fl I Ar syntax +.Op Fl O Ar syntax +.Op Fl R Ar root +.Op Fl b Ar bytes +.Op Fl k Ar bytes +.Op Fl o Ar filename +.Op Fl s Ar string +.Op Ar input ... +.Sh DESCRIPTION +.Nm +is a fast command-line utility for streaming RDF data. +It reads one or more files and writes the data again, +possibly in a different form. +By default, +the input syntax is guessed from the file extension, +and line-based output is written to standard output. +.Pp +.Nm +writes statements as they are read, in the same order. +It uses very little memory and can process arbitrarily large files, +either directly or as part of a pipeline. +It is useful for things like checking syntax, +converting to a different syntax, +pretty-printing documents, +merging files, +expanding URIs, +and so on. +.Pp +The simplest usage is to use files for both input and output. +This way, reasonable options are chosen by default based on the filename. +For example, most common tasks can be accomplished with simple commands like: +.Pp +.Dl $ serd-pipe -o pretty.ttl input.nt +.Pp +The +.Ar input +operands are processed in command-line order. +If +.Ar input +is +.Ar - +or absent, +.Nm +reads from standard input. +.Pp +The options are as follows: +.Pp +.Bl -tag -compact -width 3n +.It Fl B Ar base +Base URI, path, or +.Cm rebase +to use the output path. +This is used to resolve any relative URI references in the input. +.Pp +If the input is a file, +its URI is used as the base by default. +This causes relative references to be written just as they are in the input. +Note, however, that this may not be desired if the output is in a different directory. +For example, +.Li <file.ttl> +would not point to the same file from the new location. +.Pp +The special +.Cm rebase +argument will instead use the output filename set by the +.Fl o +option. +This will write references relative to the output file, +so that parsing it will produce the same absolute URIs as the original input. +For example, +the above may be written as +.Li <../file.ttl> +if the output is written to some sibling directory. +.Pp +Generally, the default is best when copying data along with other bundled files, +while +.Cm rebase +is best for writing data in a new location which still refers to the original paths. +.Pp +These options are intended to make the most common tasks as simple as possible. +An arbitrary base URI can also be given explicitly. +.Pp +.It Fl C +Convert literals to canonical form. +Literals with supported XSD datatypes will be parsed and rewritten canonically. +Invalid literals will cause an error. +All numeric datatypes are supported, as well as +.Vt boolean , +.Vt duration , +.Vt datetime , +.Vt time , +.Vt hexBinary , +and +.Vt base64Binary . +.Pp +.It Fl I Ar syntax +Set an input syntax or option. +May be given multiple times. +The case-insensitive +.Ar syntax +can be +.Cm NQuads , +.Cm NTriples , +.Cm TriG , +.Cm Turtle , +or one of the following options: +.Pp +.Bl -tag -width "QvariablesQ" -compact -offset indent +.It Cm lax +Tolerate invalid input where possible. +Warnings will be printed for syntax errors, +but parsing will attempt to continue. +Note that data may be lost when using this option! +.Pp +.It Cm variables +Support parsing variable nodes. +Variables can be written in SPARQL style, for example +.Li ?name +or +.Li $name . +.Pp +.It Cm relative +Read relative URI references exactly without resolving them. +Normally, all relative URIs are expanded against the base URI when reading. +This flag disables that, +so URI references will be passed through exactly as they are in the input. +.Pp +.It Cm global +Assume a clean global namespace for blank node labels, +and do not automatically add prefixes. +Normally, +a prefix like +.Li f1 +is added to blank node labels when reading multiple files, +to prevent labels in different files from clashing. +This option disables that, +so blank node labels will be passed through without any added prefix. +Note that this may corrupt the output by merging distinct blank nodes. +.Pp +.It Cm generated +Read seemingly generated blank node labels exactly without adjusting them. +Normally, blank node labels like +.Li b123 +are adapted to avoid potential clashes with generated ones. +This flag disables that, +so such labels will be passed through exactly as they are in the input. +Note that this may corrupt the output by merging distinct blank nodes. +.El +.Pp +.It Fl O Ar syntax +Set an output syntax or option. +May be given multiple times. +The case-insensitive +.Ar syntax +can be +.Cm empty , +.Cm NQuads , +.Cm NTriples , +.Cm TriG , +.Cm Turtle , +or one of the following options: +.Pp +.Bl -tag -width "QverbatimQ" -compact -offset indent +.It Cm ascii +Escape all non-ASCII characters. +Normally, text is written in UTF-8. +This flag will escape non-ASCII characters in text as Unicode code points like +.Li \eU00B7 or +.Li \eU0001F600 . +.Pp +.It Cm expanded +Write expanded URIs instead of prefixed names. +.Pp +.It Cm verbatim +Write URI references exactly as they are in the input. +This avoids resolving URIs and making them relative to the output base URI. +.Pp +.It Cm terse +Write terser output without newlines. +This can be useful for writing a line-based description of suitably structured data. +.Pp +.It Cm lax +Tolerate invalid UTF-8 by writing the replacement character when necessary. +Note that data may be lost when using this option! +.El +.Pp +The +.Cm empty +syntax suppresses the output, +so that only warnings and errors will be printed. +.Pp +.It Fl R Ar root +Keep relative URIs within a +.Ar root +URI. +This will avoid creating any relative URI references with leading path segments like +.Pa ../ +that enter a parent of +.Ar root . +.Pp +For example, +if +.Pa /home/you/file.ttl +is written to the file +.Pa /home/me/output.ttl +using +.Fl B Cm rebase , +then it will be written as +.Li <../you/file.ttl> . +Setting +.Fl R Pa /home/me/ +would prevent references from +.Dq escaping +like this, +so the above would instead be written as +.Li <file:///home/you/file.ttl> . +.Pp +This is useful for making relocatable +.Dq bundles +of resources, +since it can keep all relative references within the bundle, +while still allowing up-references to be used. +.Pp +.It Fl V +Display version information and exit. +.Pp +.It Fl b Ar bytes +I/O block size. +This is the number of bytes in a file that will be read or written at once. +The default is 4096, which should perform well in most cases. +Note that this only applies to files, standard input and output are always processed one byte at a time. +.Pp +.It Fl h +Print the command line options. +.Pp +.It Fl k Ar bytes +Parser stack size. +For performance and security reasons, parsing is performed with a fixed-size stack. +This option sets a hard limit on the total amount of space used for parsing. +The default is 1 megabyte, which should be more than enough for most data. +This option can be used to reduce memory consumption, +or to enable parsing documents with extremely deep nesting or extremely large literal values. +.Pp +.It Fl o Ar filename +Write output to the given +.Ar filename +instead of stdout. +.Pp +.It Fl s Ar string +Parse +.Ar string +as input. +.El +.Sh ENVIRONMENT +Error messages and warnings are printed in color by default if the output is a terminal. +This can be controlled by common environment variables: +.Pp +.Bl -tag -compact -width 14n +.It Ev NO_COLOR +If present (regardless of value), color is disabled. +.It Ev CLICOLOR +If set to 0, color is disabled. +.It Ev CLICOLOR_FORCE +If set to anything other than 0, color is forced on. +.El +.Pp +See +.Lk http://no-color.org/ +and +.Lk https://bixense.com/clicolors/ +for details. +.Sh EXIT STATUS +.Nm +exits with a status of 0, or non-zero if an error occured. +.Sh EXAMPLES +To print an NTriples file as Turtle: +.Pp +.Dl $ serd-pipe -O turtle input.nt +.Pp +To print only errors and discard the output: +.Pp +.Dl $ serd-pipe -O empty input.ttl +.Pp +To pretty-print a file: +.Pp +.Dl $ serd-pipe -o pretty.ttl input.ttl +.Pp +To expand all prefixed names into full URIs: +.Pp +.Dl $ serd-pipe -O expanded -o expanded.ttl input.ttl +.Pp +To merge two files: +.Pp +.Dl $ serd-pipe -o merged.ttl header.ttl body.ttl +.Sh SEE ALSO +.Bl -item -compact +.It +.Xr serd-filter 1 +.It +.Xr serd-sort 1 +.It +.Lk http://drobilla.net/software/serd/ +.El +.Sh STANDARDS +.Bl -item -compact +.It +.Rs +.%A W3C +.%T RDF 1.1 NQuads +.%D February 2014 +.Re +.Lk https://www.w3.org/TR/n-quads/ +.It +.Rs +.%A W3C +.%D February 2014 +.%T RDF 1.1 NTriples +.Re +.Lk https://www.w3.org/TR/n-triples/ +.It +.Rs +.%A W3C +.%T RDF 1.1 TriG +.%D February 2014 +.Re +.Lk https://www.w3.org/TR/trig/ +.It +.Rs +.%A W3C +.%D February 2014 +.%T RDF 1.1 Turtle +.Re +.Lk https://www.w3.org/TR/turtle/ +.El +.Sh AUTHORS +.Nm +is a part of serd, by +.An David Robillard +.Mt d@drobilla.net . diff --git a/doc/serd-sort.1 b/doc/serd-sort.1 new file mode 100644 index 00000000..2d019ae3 --- /dev/null +++ b/doc/serd-sort.1 @@ -0,0 +1,194 @@ +.Dd October 21, 2021 +.Dt SERD-SORT 1 +.Os Serd +.Sh NAME +.Nm serd-sort +.Nd reorder RDF statements +.Sh SYNOPSIS +.Nm serd-sort +.Op Fl htV +.Op Fl B Ar base +.Op Fl I Ar syntax +.Op Fl O Ar syntax +.Op Fl b Ar bytes +.Op Fl c Ar collation +.Op Fl f Ar pattern_file +.Op Fl k Ar bytes +.Op Fl o Ar filename +.Ar pattern +.Ar input ... +.Sh DESCRIPTION +.Nm +reorders statements in RDF data by loading everything into memory then rewriting it. +By default, +a +.Dq pretty +ordering is used which is ideal for pretty-printing to Turtle or TriG. +The +.Fl c +option can be used to request a specific ordering, +which is mainly useful when emitting a line-based syntax like NTriples or NQuads in a pipeline. +.Pp +The +.Ar input +operands are processed in command-line order. +If +.Ar input +is +.Ar - +or absent, +.Nm +reads from standard input. +.Pp +The options are as follows: +.Pp +.Bl -tag -compact -width 3n +.It Fl B Ar base +Base URI, path, or +.Cm rebase +to use the output path. +See +.Xr serd-pipe 1 +for details. +.Pp +.It Fl I Ar syntax +Input syntax or option: +.Cm NQuads , +.Cm NTriples , +.Cm TriG , +.Cm Turtle , +.Cm lax , +.Cm variables , +.Cm relative , +or +.Cm labels . +See +.Xr serd-pipe 1 +for details. +.Pp +.It Fl O Ar syntax +Output syntax or option: +.Cm empty , +.Cm NQuads , +.Cm NTriples , +.Cm TriG , +.Cm Turtle , +.Cm ascii , +.Cm expanded , +.Cm verbatim , +.Cm terse , +or +.Cm lax . +See +.Xr serd-pipe 1 +for details. +.Pp +.It Fl V +Display version information and exit. +.Pp +.It Fl b Ar bytes +I/O block size. +See +.Xr serd-pipe 1 +for details. +.Pp +.It Fl c Ar collation +A specific collation (statement ordering) to use. +This can be any ordering of the characters +.Dq SPO , +which stand for the subject, predicate, and object of statements. +Optionally, +.Dq G +can be added as the first character, +which will sort graph-first. +Concretely, the valid values are: +.Cm SPO , +.Cm SOP , +.Cm OPS , +.Cm OSP , +.Cm PSO , +.Cm POS , +.Cm GSPO , +.Cm GSOP , +.Cm GOPS , +.Cm GOSP , +.Cm GPSO , +and +.Cm GPOS . +.Pp +.It Fl h +Print the command line options. +.Pp +.It Fl k Ar bytes +Parser stack size. +See +.Xr serd-pipe 1 +for details. +.Pp +.It Fl o Ar filename +Write output to the given +.Ar filename +instead of stdout. +.Pp +.It Fl t +Do not write type as +.Dq a +before other properties. +Instead, rdf:type will be written in order like any other property. +.El +.Sh EXIT STATUS +.Nm +exits with a status of 0, or non-zero if an error occured. +.Sh EXAMPLES +To pretty-print a file: +.Pp +.Dl $ serd-sort -o pretty.ttl input.ttl +.Pp +To print statements ordered by predicate, subject, then object: +.Pp +.Dl $ serd-sort -c PSO input.ttl +.Sh SEE ALSO +.Bl -item -compact +.It +.Xr serd-pipe 1 +.It +.Xr serd-filter 1 +.It +.Lk http://drobilla.net/software/serd/ +.El +.Sh STANDARDS +.Bl -item -compact +.It +.Rs +.%A W3C +.%T RDF 1.1 NQuads +.%D February 2014 +.Re +.Lk https://www.w3.org/TR/n-quads/ +.It +.Rs +.%A W3C +.%D February 2014 +.%T RDF 1.1 NTriples +.Re +.Lk https://www.w3.org/TR/n-triples/ +.It +.Rs +.%A W3C +.%T RDF 1.1 TriG +.%D February 2014 +.Re +.Lk https://www.w3.org/TR/trig/ +.It +.Rs +.%A W3C +.%D February 2014 +.%T RDF 1.1 Turtle +.Re +.Lk https://www.w3.org/TR/turtle/ +.El +.Sh AUTHORS +.Nm +is a part of serd, by +.An David Robillard +.Mt d@drobilla.net . diff --git a/doc/serdi.1 b/doc/serdi.1 deleted file mode 100644 index c6356953..00000000 --- a/doc/serdi.1 +++ /dev/null @@ -1,265 +0,0 @@ -.Dd April 14, 2021 -.Dt SERDI 1 -.Os Serd 0.30.11 -.Sh NAME -.Nm serdi -.Nd read, filter, transform, and write RDF data -.Sh SYNOPSIS -.Nm serdi -.Op Fl Cfhmqv -.Op Fl F Ar pattern | Fl G Ar pattern -.Op Fl I Ar base -.Op Fl b Ar bytes -.Op Fl i Ar syntax -.Op Fl k Ar bytes -.Op Fl o Ar syntax -.Op Fl r Ar root -.Op Fl s Ar string -.Op Fl w Ar filename -.Ar input ... -.Sh DESCRIPTION -.Nm -is a fast command-line utility for processing RDF data. -It reads one or more documents and writes the data again, -possibly transformed and/or in a different syntax. -By default, -the input syntax is guessed from the file extension, -and output is written in NTriples or NQuads. -.Pp -.Nm -can be used to check for syntax errors, -convert from one syntax to another, -pretty-print documents, -or transform URIs and blank node IDs. -.Pp -The options are as follows: -.Pp -.Bl -tag -compact -width 3n -.It Fl C -Convert literals to canonical form. -Literals with supported XSD datatypes will be parsed and rewritten canonically. -All numeric datatypes are supported, as well as -.Vt boolean , -.Vt duration , -.Vt datetime , -.Vt time , -.Vt hexBinary , -and -.Vt base64Binary . -.Pp -.It Fl F Ar pattern -Filter out statements that match -.Ar pattern . -The pattern must be a single statement written in NTriples or NQuads, -with variables like -.Dq ?name -for wildcards. -The names of variables in the pattern are insignificant. -.Pp -.It Fl G Ar pattern -Only include statements that match -.Ar pattern . -This option is like -.Fl p -but inverted, -so that only matching statements are included, like grep. -.Pp -.It Fl I Ar base -Input base URI. -Relative URI references in the input will be resolved against this. -When the input is a file, -the URI of the file is automatically used as the base URI. -This option can be used to override that, -or to provide a base URI for input from stdin or a string. -.Pp -.It Fl b Ar bytes -I/O block size. -This is the number of bytes in a file that will be read or written at once. -The default is 4096, which should perform well in most cases. -Note that this only applies to files, standard input and output are always processed one byte at a time. -.Pp -.It Fl f -Fast and loose mode. -This disables shortening URIs into prefixed names or relative URI references. -If the model is enabled, then this writes the model quickly in sorted order. -Note that doing so with TriG or Turtle may make the output ugly, -since blank nodes will not be inlined. -.Pp -.It Fl h -Print the command line options. -.Pp -.It Fl i Ar syntax -Set an input syntax option. -May be given multiple times. -The case-insensitive -.Ar syntax -can be either a syntax name or an input syntax option. -The supported syntaxes are -.Dq NQuads , -.Dq NTriples , -.Dq TriG , -and -.Dq Turtle . -.Pp -The supported input options are: -.Pp -.Bl -tag -width "QvariablesQ" -compact -offset indent -.It Dq lax -Tolerate invalid input where possible. -Warnings will be printed on syntax errors, -but parsing will attempt to continue. -Note that data may be lost when using this option! -.Pp -.It Dq variables -Support parsing variable nodes. -Variables can be written in SPARQL style, for example -.Dq ?var -or -.Dq $var . -.Pp -.It Dq verbatim -Normally, the reader expands all relative URIs, -and may adjust blank node labels to avoid clashing with generated ones. -This flag disables all of this processing, -so that URI references and blank nodes are passed to the sink exactly as they are in the input. -Note that this does not apply to CURIEs, since serd deliberately does not -have a way to represent CURIE nodes. A bad namespace prefix is considered -a syntax error. -.El -.Pp -.It Fl k Ar bytes -Parser stack size. -For performance and security reasons, parsing is performed with a fixed-size stack. -By default, the stack is 4096 bytes, which should be sufficient for most data. -If some data has very deep nesting or very large literal values, -it may exceed the default amount of space, -and this option can be used to increase it and allow the document to be parsed successfully. -.Pp -.It Fl m -Build a model in memory. -This loads all of the input into memory before writing the output. -This will reorder statements and eliminate duplicates, at the cost of performance and memory consumption. -When writing TriG or Turtle, this may enable better pretty-printing with more inline descriptions. -.Pp -.It Fl o Ar syntax -Set an output syntax option. -May be given multiple times. -The case-insensitive -.Ar syntax -can be either a syntax name or an output syntax option. -The supported syntaxes are -.Dq empty , -.Dq NQuads , -.Dq NTriples , -.Dq TriG , -and -.Dq Turtle . -.Pp -The supported output options are: -.Pp -.Bl -tag -width "QverbatimQ" -compact -offset indent -.It Dq ascii -Escape all non-ASCII characters. -.Pp -.It Dq expanded -Write expanded URIs instead of prefixed names. -.Pp -.It Dq verbatim -Write URI references exactly as they are in the input. -This avoids resolving URIs and making them relative to the output base URI. -.Pp -.It Dq terse -Write terser output without newlines. -.Pp -.It Dq lax -Tolerate invalid UTF-8 by writing the replacement character when necessary. -Note that data may be lost when using this option! -.El -.Pp -.It Fl q -Suppress all output except data. -.Pp -.It Fl r Ar root -Keep relative URIs within a -.Ar root -URI. -This will avoid creating any relative URI references with leading path segments like -.Dq ../ -that enter a parent of -.Ar root . -.Pp -.It Fl s Ar string -Parse -.Ar string -as input. -.Pp -.It Fl v -Display version information and exit. -.Pp -.It Fl w Ar filename -Write output to the given -.Ar filename -instead of stdout. -.El -.Sh EXIT STATUS -.Nm -exits with a status of 0, or non-zero if an error occured. -.Sh EXAMPLES -To pretty-print a document: -.Pp -.Dl $ serdi -o turtle file.ttl > out.ttl -.Pp -To print any errors: -.Pp -.Dl $ serdi file.ttl > /dev/null -.Pp -To remove any rdf:type properties: -.Pp -.Dl $ serdi -F \(dq?s <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> ?o .\(dq file.ttl -.Pp -To include only rdf:type properties: -.Pp -.Dl $ serdi -G \(dq?s <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> ?o .\(dq file.ttl -.Sh SEE ALSO -.Bl -item -compact -.It -.Lk http://drobilla.net/software/serd/ -.It -.Lk http://gitlab.com/drobilla/serd/ -.El -.Sh STANDARDS -.Bl -item -.It -.Rs -.%A W3C -.%T RDF 1.1 NQuads -.%D February 2014 -.Re -.Lk https://www.w3.org/TR/n-quads/ -.It -.Rs -.%A W3C -.%D February 2014 -.%T RDF 1.1 NTriples -.Re -.Lk https://www.w3.org/TR/n-triples/ -.It -.Rs -.%A W3C -.%T RDF 1.1 TriG -.%D February 2014 -.Re -.Lk https://www.w3.org/TR/trig/ -.It -.Rs -.%A W3C -.%D February 2014 -.%T RDF 1.1 Turtle -.Re -.Lk https://www.w3.org/TR/turtle/ -.El -.Sh AUTHORS -.Nm -is a part of serd, by -.An David Robillard -.Mt d@drobilla.net . diff --git a/meson.build b/meson.build index bddb7392..f75ce560 100644 --- a/meson.build +++ b/meson.build @@ -195,12 +195,14 @@ pkg.generate( version: meson.project_version(), description: 'A lightweight library for working with RDF') -# Build serdi command line utility +# Build command line tools if get_option('tools') subdir('tools') if not get_option('docs').disabled() - install_man('doc/serdi.1') + install_man('doc/serd-filter.1') + install_man('doc/serd-pipe.1') + install_man('doc/serd-sort.1') endif endif diff --git a/scripts/serd_bench.py b/scripts/serd_bench.py index 8a10dab0..25b75fe8 100755 --- a/scripts/serd_bench.py +++ b/scripts/serd_bench.py @@ -43,7 +43,7 @@ def gen(sp2b_dir, n_min, n_max, step): def write_header(results, progs): "Write the header line for TSV output" - results.write("n\tserdi_stream\tserdi_model") + results.write("n\tserd-pipe\tserd-sort") for prog in progs[2:]: results.write("\t" + os.path.basename(prog.split()[0])) results.write("\n") @@ -125,9 +125,9 @@ def run(progs, n_min, n_max, step): "Benchmark each program with n_min ... n_max statements" with WorkingDirectory("build"): results = { - "time": open("serdi-time.txt", "w"), - "throughput": open("serdi-throughput.txt", "w"), - "memory": open("serdi-memory.txt", "w"), + "time": open("serd-time.txt", "w"), + "throughput": open("serd-throughput.txt", "w"), + "memory": open("serd-memory.txt", "w"), } # Write TSV header for all output files @@ -169,20 +169,20 @@ def plot_results(): "Plot all benchmark results" with WorkingDirectory("build"): plot( - open("serdi-time.txt", "r"), - "serdi-time.svg", + open("serd-time.txt", "r"), + "serd-time.svg", "Statements", "Time (s)", ) plot( - open("serdi-throughput.txt", "r"), - "serdi-throughput.svg", + open("serd-throughput.txt", "r"), + "serd-throughput.svg", "Statements", "Statements / s", ) plot( - open("serdi-memory.txt", "r"), - "serdi-memory.svg", + open("serd-memory.txt", "r"), + "serd-memory.svg", "Statements", "Bytes", ) @@ -226,8 +226,8 @@ example: args = ap.parse_args(sys.argv[1:]) progs = [ - "serdi -i turtle -o turtle", - "serdi -m -i turtle -o turtle", + "serd-pipe -I turtle -O turtle", + "serd-sort -I turtle -O turtle", ] + args.run min_n = int(args.max / 10) @@ -15,7 +15,7 @@ doap:homepage <http://drobilla.net/software/serd> ; doap:license <http://opensource.org/licenses/isc> ; doap:shortdesc "A high-performance RDF reader/writer" ; - doap:description "Serd is a lightweight high-performance C library for reading and writing RDF in the Turtle, NTriples, TriG, and NQuads syntaxes." ; + doap:description "Serd is a fast and lightweight C library for reading and writing RDF in Turtle, NTriples, TriG, and NQuads." ; doap:created "2011-09-28"^^xsd:date ; doap:programming-language "C" ; doap:implements <http://www.w3.org/TR/n-quads/> , diff --git a/src/.clang-tidy b/src/.clang-tidy index 5cf5e873..6daee064 100644 --- a/src/.clang-tidy +++ b/src/.clang-tidy @@ -7,9 +7,7 @@ Checks: > -bugprone-branch-clone, -bugprone-easily-swappable-parameters, -bugprone-reserved-identifier, - -bugprone-suspicious-string-compare, -clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling, - -concurrency-mt-unsafe, -hicpp-multiway-paths-covered, -hicpp-signed-bitwise, -llvm-header-guard, diff --git a/test/NQuadsTests/meson.build b/test/NQuadsTests/meson.build index 4fe84dd5..deaf41e8 100644 --- a/test/NQuadsTests/meson.build +++ b/test/NQuadsTests/meson.build @@ -7,8 +7,15 @@ args = [ ] test('NQuads', - run_test_suite, - args: script_args + args, + run_pipe_suite, + args: pipe_test_script_args + args, env: test_env, - suite: ['suite', 'w3c'], + suite: ['suite', 'w3c', 'pipe'], + timeout: 240) + +test('NQuads', + run_sort_suite, + args: sort_test_script_args + args, + env: test_env, + suite: ['suite', 'w3c', 'sort'], timeout: 240) diff --git a/test/NTriplesTests/meson.build b/test/NTriplesTests/meson.build index cf773c64..f608e973 100644 --- a/test/NTriplesTests/meson.build +++ b/test/NTriplesTests/meson.build @@ -7,8 +7,15 @@ args = [ ] test('NTriples', - run_test_suite, - args: script_args + args, + run_pipe_suite, + args: pipe_test_script_args + args, env: test_env, - suite: ['suite', 'w3c'], + suite: ['suite', 'w3c', 'pipe'], + timeout: 240) + +test('NTriples', + run_sort_suite, + args: sort_test_script_args + args, + env: test_env, + suite: ['suite', 'w3c', 'sort'], timeout: 240) diff --git a/test/TriGTests/meson.build b/test/TriGTests/meson.build index e7c305e0..95641b44 100644 --- a/test/TriGTests/meson.build +++ b/test/TriGTests/meson.build @@ -7,8 +7,15 @@ args = [ ] test('TriG', - run_test_suite, - args: script_args + args, + run_pipe_suite, + args: pipe_test_script_args + args, env: test_env, - suite: ['suite', 'w3c'], + suite: ['suite', 'w3c', 'pipe'], + timeout: 240) + +test('TriG', + run_sort_suite, + args: sort_test_script_args + args, + env: test_env, + suite: ['suite', 'w3c', 'sort'], timeout: 240) diff --git a/test/TurtleTests/meson.build b/test/TurtleTests/meson.build index 492e1fe0..7a6d1475 100644 --- a/test/TurtleTests/meson.build +++ b/test/TurtleTests/meson.build @@ -7,8 +7,15 @@ args = [ ] test('Turtle', - run_test_suite, - args: script_args + args, + run_pipe_suite, + args: pipe_test_script_args + args, env: test_env, - suite: ['suite', 'w3c'], + suite: ['suite', 'w3c', 'pipe'], + timeout: 240) + +test('Turtle', + run_sort_suite, + args: sort_test_script_args + args, + env: test_env, + suite: ['suite', 'w3c', 'sort'], timeout: 240) diff --git a/test/bad/meson.build b/test/bad/meson.build index 9c423367..2c99bbac 100644 --- a/test/bad/meson.build +++ b/test/bad/meson.build @@ -1,8 +1,15 @@ base_uri = 'http://drobilla.net/sw/serd/test/bad/' test('bad', - run_test_suite, - args: script_args + [files('manifest.ttl'), base_uri], + run_pipe_suite, + args: pipe_test_script_args + [files('manifest.ttl'), base_uri], env: test_env, suite: ['suite', 'extra'], timeout: 240) + +test('bad', + run_sort_suite, + args: sort_test_script_args + [files('manifest.ttl'), base_uri], + env: test_env, + suite: ['suite', 'extra', 'sort'], + timeout: 240) diff --git a/test/canon/meson.build b/test/canon/meson.build index 11d95469..f73a3527 100644 --- a/test/canon/meson.build +++ b/test/canon/meson.build @@ -1,8 +1,8 @@ base_uri = 'http://drobilla.net/sw/serd/test/canon/' test('canon', - run_test_suite, - args: script_args + [ + run_pipe_suite, + args: pipe_test_script_args + [ files('manifest.ttl'), base_uri, '--', diff --git a/test/filter/input.ttl b/test/filter/input.ttl new file mode 100644 index 00000000..59aa67f7 --- /dev/null +++ b/test/filter/input.ttl @@ -0,0 +1,9 @@ +@prefix eg: <http://example.org/> . + +eg:s1 + eg:p1 eg:o1 ; + eg:p2 eg:o2 . + +eg:s2 + eg:p1 eg:o1 ; + eg:p2 eg:o2 . diff --git a/test/filter/manifest.ttl b/test/filter/manifest.ttl new file mode 100644 index 00000000..59ce3f55 --- /dev/null +++ b/test/filter/manifest.ttl @@ -0,0 +1,48 @@ +@prefix checks: <http://drobilla.net/ns/serd/checks#> . +@prefix mf: <http://www.w3.org/2001/sw/DataAccess/tests/test-manifest#> . +@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> . +@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> . +@prefix rdft: <http://www.w3.org/ns/rdftest#> . +@prefix serd: <http://drobilla.net/ns/serd#> . + +rdft:Test + rdfs:subClassOf mf:ManifestEntry . + +serd:patternFile + a rdf:Property ; + rdfs:label "pattern file" . + +serd:TestFilterPositive + a rdfs:Class ; + rdfs:label "Positive Filtering" ; + rdfs:subClassOf rdft:Test . + +<> + a mf:Manifest ; + rdfs:comment "Serd filter test cases" ; + mf:entries ( + <#s1> + <#p1> + <#o1> + ) . + +<#s1> + a serd:TestFilterPositive ; + serd:patternFile <s1.pattern.nt> ; + mf:name "s1" ; + mf:action <input.ttl> ; + mf:result <s1.result.nt> . + +<#p1> + a serd:TestFilterPositive ; + serd:patternFile <p1.pattern.nt> ; + mf:name "p1" ; + mf:action <input.ttl> ; + mf:result <p1.result.nt> . + +<#o1> + a serd:TestFilterPositive ; + serd:patternFile <o1.pattern.nt> ; + mf:name "o1" ; + mf:action <input.ttl> ; + mf:result <o1.result.nt> . diff --git a/test/filter/meson.build b/test/filter/meson.build new file mode 100644 index 00000000..fd2b1bca --- /dev/null +++ b/test/filter/meson.build @@ -0,0 +1,15 @@ +base_uri = 'http://drobilla.net/sw/serd/test/filter/' + +test('filter', + run_filter_suite, + args: common_script_options + [ + '--pipe', + serd_pipe, + '--filter', + serd_filter, + files('manifest.ttl'), + base_uri + ], + env: test_env, + suite: ['suite', 'extra'], + timeout: 240) diff --git a/test/filter/o1.pattern.nt b/test/filter/o1.pattern.nt new file mode 100644 index 00000000..41932fd7 --- /dev/null +++ b/test/filter/o1.pattern.nt @@ -0,0 +1 @@ +?s ?p <http://example.org/o1> . diff --git a/test/filter/o1.result.nt b/test/filter/o1.result.nt new file mode 100644 index 00000000..e7b1e759 --- /dev/null +++ b/test/filter/o1.result.nt @@ -0,0 +1,2 @@ +<http://example.org/s1> <http://example.org/p1> <http://example.org/o1> . +<http://example.org/s2> <http://example.org/p1> <http://example.org/o1> . diff --git a/test/filter/p1.pattern.nt b/test/filter/p1.pattern.nt new file mode 100644 index 00000000..fca20e94 --- /dev/null +++ b/test/filter/p1.pattern.nt @@ -0,0 +1 @@ +?s <http://example.org/p1> ?o . diff --git a/test/filter/p1.result.nt b/test/filter/p1.result.nt new file mode 100644 index 00000000..e7b1e759 --- /dev/null +++ b/test/filter/p1.result.nt @@ -0,0 +1,2 @@ +<http://example.org/s1> <http://example.org/p1> <http://example.org/o1> . +<http://example.org/s2> <http://example.org/p1> <http://example.org/o1> . diff --git a/test/filter/s1.pattern.nt b/test/filter/s1.pattern.nt new file mode 100644 index 00000000..f5b87db1 --- /dev/null +++ b/test/filter/s1.pattern.nt @@ -0,0 +1 @@ +<http://example.org/s1> ?p ?o . diff --git a/test/filter/s1.result.nt b/test/filter/s1.result.nt new file mode 100644 index 00000000..023faf42 --- /dev/null +++ b/test/filter/s1.result.nt @@ -0,0 +1,2 @@ +<http://example.org/s1> <http://example.org/p1> <http://example.org/o1> . +<http://example.org/s1> <http://example.org/p2> <http://example.org/o2> . diff --git a/test/good/meson.build b/test/good/meson.build index 38c672ac..368a91bc 100644 --- a/test/good/meson.build +++ b/test/good/meson.build @@ -1,8 +1,15 @@ base_uri = 'http://drobilla.net/sw/serd/test/good/' test('good', - run_test_suite, - args: script_args + [files('manifest.ttl'), base_uri], + run_pipe_suite, + args: pipe_test_script_args + [files('manifest.ttl'), base_uri], env: test_env, - suite: ['suite', 'extra'], + suite: ['suite', 'extra', 'pipe'], + timeout: 240) + +test('good', + run_sort_suite, + args: sort_test_script_args + [files('manifest.ttl'), base_uri], + env: test_env, + suite: ['suite', 'extra', 'sort'], timeout: 240) diff --git a/test/lax/meson.build b/test/lax/meson.build index 6d4d7903..e71a677c 100644 --- a/test/lax/meson.build +++ b/test/lax/meson.build @@ -4,8 +4,8 @@ base_uri = 'http://drobilla.net/sw/serd/test/lax/' # ... once with strict parsing to test the hard errors test('lax.strict', - run_test_suite, - args: script_args + [files('manifest.ttl'), base_uri], + run_pipe_suite, + args: pipe_test_script_args + [files('manifest.ttl'), base_uri], env: test_env, is_parallel: false, suite: ['suite', 'extra'], @@ -13,14 +13,13 @@ test('lax.strict', # ... and once with lax parsing to tolerate them test('lax.lax', - run_test_suite, - args: script_args + [ + run_pipe_suite, + args: pipe_test_script_args + [ files('manifest.ttl'), base_uri, '--', - '-i', - 'lax', - ], + '-I', 'lax', + '-O', 'lax'], env: test_env, is_parallel: false, suite: ['suite', 'extra'], diff --git a/test/meson.build b/test/meson.build index c19d99c0..9830a1dd 100644 --- a/test/meson.build +++ b/test/meson.build @@ -1,6 +1,8 @@ autoship = find_program('autoship', required: false) -run_test_suite = find_program('run_test_suite.py') +run_filter_suite = find_program('run_filter_suite.py') +run_pipe_suite = find_program('run_pipe_suite.py') +run_sort_suite = find_program('run_sort_suite.py') wrapper = meson.get_cross_property('exe_wrapper', '') @@ -50,185 +52,368 @@ if autoship.found() test('autoship', autoship, args: ['test', serd_src_root], suite: 'data') endif -if is_variable('serdi') - - if wrapper != '' - script_args = ['--wrapper', wrapper, '--serdi', serdi.full_path()] - else - script_args = ['--serdi', serdi.full_path()] - endif - - serd_ttl = files('../serd.ttl')[0] +serd_ttl = files('../serd.ttl')[0] +common_script_options = [] +if wrapper != '' + common_script_options = ['--wrapper', wrapper] +endif - test('serd.ttl', serdi, args: [serd_ttl], env: test_env, suite: 'data') +# Test serd-pipe as the main entry point to the common tool code +if is_variable('serd_pipe') + tool = serd_pipe + pipe_test_script_args = common_script_options + ['--tool', serd_pipe] - # Command line options + # Basic valid arguments good_args = [ - ['-v'], + ['-V'], ['-h'], - ['-k', '512', '-s', '<urn:eg:s> a <urn:eg:T> .'], ] foreach args : good_args - test(args[0], serdi, args: args, env: test_env, suite: ['serdi', 'options']) + test(args[0], + tool, + args: args, + env: test_env, + suite: ['tools', 'pipe', 'options']) endforeach + # Basic invalid arguments + bad_args = [ - ['/no/such/file'], - ['ftp://unsupported.org'], - ['-F', '', '-G', ''], - ['-F'], - ['-F', '?s ?p ?o . ?q ?r ?s .', '-s', ''], - ['-F', '?s ?p ?o .\n?q ?r ?s .\n', '-s', ''], - ['-F', 'bad_pattern', '-s', ''], - ['-G'], - ['-G', '?s ?p ?o . ?q ?r ?s .', '-s', ''], - ['-G', 'bad_pattern', '-s', ''], + ['-B', 'nonuriorpath'], + ['-B'], + ['-I', 'turtle'], + ['-I', 'unknown'], ['-I'], - ['-b'], + ['-O', 'unknown'], + ['-O'], ['-b', '-1'], - ['-b', '9223372036854775807'], ['-b', '1024junk'], - ['-c'], - ['-i', 'unknown'], - ['-i', 'turtle'], - ['-i'], - ['-fi'], - ['-k'], + ['-b', '9223372036854775807'], + ['-b'], ['-k', '-1'], - ['-k', '9223372036854775807'], ['-k', '1024junk'], - ['-o', 'unknown'], + ['-k', '9223372036854775807'], + ['-k'], ['-o'], - ['-p'], - ['-r'], + ['-s', '<foo> a <Bar> .'], ['-s'], - ['-w'], ['-z'], - ['-s', '<foo> a <Bar> .'], + ['/no/such/file'], ] foreach args : bad_args - name = ' '.join(args).underscorify() - test(name, serdi, + test(' '.join(args), + tool, args: args, env: test_env, should_fail: true, - suite: ['serdi', 'options']) + suite: ['tools', 'pipe', 'options']) endforeach - test('ansi_clicolor_force', - serdi, - args: files('bad/bad-lang.ttl'), - env: test_env + ['CLICOLOR_FORCE=1'], - should_fail: true) - - test('ansi_clicolor_off', - serdi, - args: files('bad/bad-lang.ttl'), - env: test_env + ['CLICOLOR=0'], - should_fail: true) - - test('ansi_no_color', - serdi, - args: files('bad/bad-lang.ttl'), - env: test_env + ['NO_COLOR=1'], - should_fail: true) - test('none', - serdi, + tool, env: test_env, should_fail: true, - suite: ['serdi', 'options']) + suite: ['tools', 'pipe', 'options']) - test('quiet', files('test_quiet.py'), - args: script_args + files('bad/bad-base.ttl'), + test('remote', + tool, + args: ['ftp://unsupported.org'], env: test_env, - suite: ['serdi', 'options']) + should_fail: true, + suite: ['tools', 'pipe', 'options']) - test('filter', files('test_filter.py'), - args: script_args, + test('bad_rebase', + tool, + args: ['-B', 'rebase', serd_ttl], env: test_env, - suite: ['serdi', 'options']) + should_fail: true, + suite: ['tools', 'pipe', 'options']) - test('grep', files('test_grep.py'), - args: script_args, + test('base', files('test_base.py'), + args: pipe_test_script_args, env: test_env, - suite: ['serdi', 'options']) + suite: ['tools', 'pipe', 'options']) - # Inputs + test('dir_base', + tool, + args: ['-B', serd_src_root / '', serd_ttl], + env: test_env, + suite: ['tools', 'pipe', 'options']) + + # Smoke test common handling code for environment color configuration + + test('CLICOLOR_FORCE', + tool, + args: files('bad/bad-lang.ttl'), + env: test_env + ['CLICOLOR_FORCE=1'], + should_fail: true, + suite: ['color']) + + test('CLICOLOR', + tool, + args: files('bad/bad-lang.ttl'), + env: test_env + ['CLICOLOR=0'], + should_fail: true, + suite: ['color']) + + test('NO_COLOR', + tool, + args: files('bad/bad-lang.ttl'), + env: test_env + ['NO_COLOR=1'], + should_fail: true, + suite: ['color']) + + # Different input sources test('stdin', files('test_stdin.py'), - args: script_args, + args: pipe_test_script_args, env: test_env, - suite: ['serdi', 'input']) + suite: ['tools', 'pipe', 'input']) - test('multiple', files('test_multifile.py'), - args: script_args + [meson.current_source_dir() / 'multifile'], + test('multifile', files('test_multifile.py'), + args: pipe_test_script_args + [meson.current_source_dir() / 'multifile'], env: test_env, - suite: ['serdi', 'input']) + suite: ['tools', 'pipe', 'input']) - test('string', serdi, - args: ['-s', '<foo> a <Bar> .'], + test('serd.ttl', + tool, + args: [serd_ttl], env: test_env, - should_fail: true, - suite: ['serdi', 'input']) + suite: ['tools', 'pipe', 'input']) - test('missing', serdi, - args: ['-i', 'turtle'], + test('good_string', + tool, + args: ['-I', 'turtle', '-s', '[] a [] .'], env: test_env, - should_fail: true, - suite: ['serdi', 'input']) + suite: ['tools', 'pipe', 'input']) - test('no_such_file', serdi, - args: ['no_such_file'], + test('baseless_string', + tool, + args: ['-s', '<foo> a <Bar> .'], env: test_env, should_fail: true, - suite: ['serdi', 'input']) + suite: ['tools', 'pipe', 'input']) - test('remote', serdi, - args: ['ftp://example.org/unsupported.ttl'], + test('unknown_type', + tool, + args: files('../README.md'), env: test_env, should_fail: true, - suite: ['serdi', 'input']) + suite: ['tools', 'pipe', 'input']) - # Output + # Suppressed output test('empty', files('test_empty.py'), - args: script_args + [serd_ttl], + args: pipe_test_script_args + files('../serd.ttl'), env: test_env, - suite: 'output') + suite: ['tools', 'pipe', 'output']) - # FIXME: Old base URI argument? + test('quiet', files('test_quiet.py'), + args: pipe_test_script_args + files('bad/bad-base.ttl'), + env: test_env, + suite: ['tools', 'pipe', 'output']) # IO errors - test('read_dir', serdi, + test('read_dir', + tool, args: [meson.source_root()], env: test_env, should_fail: true, - suite: 'io_errors') + suite: ['tools', 'pipe', 'input']) if host_machine.system() == 'linux' - test('unreadable', serdi, + test('unreadable', + tool, args: ['/sys/bus/pci/rescan'], env: test_env, should_fail: true, - suite: 'io_errors') + suite: ['tools', 'pipe', 'input']) endif test('write_error', files('test_write_error.py'), - args: script_args + [serd_ttl], + args: pipe_test_script_args + [serd_ttl], env: test_env, - suite: 'io_errors') + suite: ['tools', 'pipe', 'output']) - test('write_bad_file', serdi, - args: ['-w', '/does/not/exist.ttl', meson.source_root() / 'serd.ttl'], + test('missing_output', + tool, + args: ['-o', '/does/not/exist.ttl', meson.source_root() / 'serd.ttl'], env: test_env, should_fail: true, - suite: 'io_errors') + suite: ['tools', 'pipe', 'output']) +endif + +# Test specifics for serd-sort +if is_variable('serd_sort') + tool = serd_sort + sort_test_script_args = common_script_options + ['--tool', serd_sort] + + # Basic valid arguments + + good_args = [ + ['-V'], + ['-h'], + ] + + foreach args : good_args + test(args[0], + tool, + args: args, + env: test_env, + suite: ['tools', 'sort', 'options']) + endforeach + + # Basic invalid arguments + + bad_args = [ + ['-c', 'CHAOS', '-'], + ['-o'], + ['-s'], + ['-z', '-'], + ['-z', '-'], + ['/no/such/file'], + ] + + foreach args : bad_args + test(' '.join(args), + tool, + args: args, + env: test_env, + should_fail: true, + suite: ['tools', 'sort', 'options']) + endforeach + + test('none', + tool, + env: test_env, + should_fail: true, + suite: ['tools', 'sort', 'options']) + + # IO errors + + test('read_dir', + tool, + args: [meson.source_root()], + env: test_env, + should_fail: true, + suite: ['tools', 'sort', 'input']) + + test('missing_output', + tool, + args: ['-o', '/does/not/exist.ttl', meson.source_root() / 'serd.ttl'], + env: test_env, + should_fail: true, + suite: ['tools', 'sort', 'output']) + + # Collation test suite + + test('sort', files('test_sort.py'), + args: sort_test_script_args + files('sort/input.trig'), + env: test_env, + suite: ['tools', 'sort']) + +endif + +# Test specifics for serd-filter +if is_variable('serd_filter') + tool = serd_filter + filter_test_script_args = common_script_options + ['--tool', serd_filter] + + # Basic valid arguments + + good_args = [ + ['-V'], + ['-h'], + ] + + foreach args : good_args + test(args[0], + tool, + args: args, + env: test_env, + suite: ['tools', 'filter', 'options']) + endforeach + + # Basic invalid arguments + + bad_args = [ + ['-f', '/no/such/file.nt', '-'], + ['-z'], + ['?s ?p ?o .'], + ] + + foreach args : bad_args + test(' '.join(args), + tool, + args: args, + env: test_env, + should_fail: true, + suite: ['tools', 'filter', 'options']) + endforeach + + test('garbage_pattern', + tool, + args: ['junk', serd_ttl], + env: test_env, + should_fail: true, + suite: ['tools', 'filter', 'options']) + + test('multiple_patterns', + tool, + args: ['?s ?p ?o .\n?t ?u ?v .\n', + meson.source_root() / 'serd.ttl'], + env: test_env, + should_fail: true, + suite: ['tools', 'filter', 'output']) + + test('missing_output', + tool, + args: ['-o', '/does/not/exist.ttl', + '?s ?p ?o .', + meson.source_root() / 'serd.ttl'], + env: test_env, + should_fail: true, + suite: ['tools', 'filter', 'output']) + + # Different input sources + + test('missing_input', + tool, + args: ['?s ?p ?o .', '/does/not/exist.ttl'], + env: test_env, + should_fail: true, + suite: ['tools', 'filter', 'input']) + + test('filter_dir', + tool, + args: ['?s ?p ?o .', meson.source_root()], + env: test_env, + should_fail: true, + suite: ['tools', 'filter', 'input']) + + # Filtering + + test('filter', files('test_filter.py'), + args: filter_test_script_args, + env: test_env, + suite: ['tools']) + + test('grep', files('test_grep.py'), + args: filter_test_script_args, + env: test_env, + suite: ['tools']) + + # RDF-driven test suite + subdir('filter') + +endif +# Run RDF-driven test suites using serd-pipe and serd-sort +if is_variable('serd_pipe') and is_variable('serd_sort') # RDF-driven test suites from the W3C subdir('NQuadsTests') subdir('NTriplesTests') diff --git a/test/pattern/meson.build b/test/pattern/meson.build index 1d5a2140..1216cfe0 100644 --- a/test/pattern/meson.build +++ b/test/pattern/meson.build @@ -1,14 +1,27 @@ base_uri = 'http://drobilla.net/sw/serd/test/pattern/' test('pattern', - run_test_suite, - args: script_args + [ + run_pipe_suite, + args: pipe_test_script_args + [ files('manifest.ttl'), base_uri, '--', - '-i', + '-I', 'variables', ], env: test_env, - suite: ['suite', 'extra'], + suite: ['suite', 'extra', 'pipe'], + timeout: 240) + +test('pattern', + run_sort_suite, + args: sort_test_script_args + [ + files('manifest.ttl'), + base_uri, + '--', + '-I', + 'variables', + ], + env: test_env, + suite: ['suite', 'extra', 'sort'], timeout: 240) diff --git a/test/run_filter_suite.py b/test/run_filter_suite.py new file mode 100755 index 00000000..222b98e4 --- /dev/null +++ b/test/run_filter_suite.py @@ -0,0 +1,160 @@ +#!/usr/bin/env python3 + +"""Run the RDF-based test suite for serd-filter.""" + +import serd_test_util + +import argparse +import datetime +import difflib +import itertools +import os +import re +import shlex +import subprocess +import sys +import tempfile +import urllib.parse + + +def log_error(message): + """Log an error message to stderr""" + + sys.stderr.write("error: ") + sys.stderr.write(message) + + +def _uri_path(test_dir, uri): + path = urllib.parse.urlparse(uri).path + drive = os.path.splitdrive(path[1:])[0] + path = path if not drive else path[1:] + return os.path.join(test_dir, os.path.basename(path)) + + +def test_suite( + manifest_path, + base_uri, + filter_command_prefix, + pipe_command_prefix, + out_dir, +): + """Run all tests in the manifest.""" + + mf = "http://www.w3.org/2001/sw/DataAccess/tests/test-manifest#" + suite_dir = os.path.dirname(manifest_path) + + model, instances = serd_test_util.load_rdf( + pipe_command_prefix + ["-B", base_uri], manifest_path + ) + + class Results: + def __init__(self): + self.n_tests = 0 + self.n_failures = 0 + + def run_test(entry, results): + """Run a single test entry from the manifest.""" + + pattern_uri = model[entry]["http://drobilla.net/ns/serd#patternFile"][0] + input_uri = model[entry][mf + "action"][0] + result_uri = model[entry][mf + "result"][0] + pattern_path = _uri_path(suite_dir, pattern_uri) + input_path = _uri_path(suite_dir, input_uri) + result_path = _uri_path(suite_dir, result_uri) + + output_path = os.path.join( + out_dir, os.path.basename(result_path).replace(".result", "") + ) + + command = filter_command_prefix + [ + "-B", + base_uri, + "-f", + pattern_path, + "-o", + output_path, + input_path, + ] + + # Run the filter (which should return success) + results.n_tests += 1 + try: + subprocess.run(command, check=True) + + # Check output against the expected result + if not serd_test_util.file_equals(result_path, output_path): + results.n_failures += 1 + log_error( + "Output {} differs from {}\n".format( + output_path, check_path + ) + ) + + except Exception as e: + log_error(e) + results.n_failures += 1 + + # Run all test types in the test suite + results = Results() + for klass, instances in instances.items(): + if klass == "http://drobilla.net/ns/serd#TestFilterPositive": + for entry in instances: + run_test(entry, results) + + # Print result summary + if results.n_failures > 0: + log_error( + "{}/{} tests failed\n".format(results.n_failures, results.n_tests) + ) + else: + sys.stdout.write("All {} tests passed\n".format(results.n_tests)) + + return results.n_failures + + +def main(): + """Run the command line tool.""" + + parser = argparse.ArgumentParser( + usage="%(prog)s [OPTION]... MANIFEST BASE_URI -- [TOOL_OPTION]...", + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + + parser.add_argument( + "--pipe", default="tools/serd-pipe", help="serd-pipe executable" + ) + parser.add_argument( + "--filter", default="tools/serd-filter", help="serd-filter executable" + ) + parser.add_argument("--wrapper", default="", help="executable wrapper") + parser.add_argument("manifest", help="test suite manifest.ttl file") + parser.add_argument("base_uri", help="base URI for tests") + parser.add_argument( + "tool_option", nargs=argparse.REMAINDER, help="option for serd-filter" + ) + + args = parser.parse_args(sys.argv[1:]) + wrapper_prefix = shlex.split(args.wrapper) + filter_command_prefix = wrapper_prefix + [args.filter] + pipe_command_prefix = wrapper_prefix + [args.pipe] + + with tempfile.TemporaryDirectory() as test_out_dir: + return test_suite( + args.manifest, + args.base_uri, + filter_command_prefix, + pipe_command_prefix, + test_out_dir, + ) + + +if __name__ == "__main__": + try: + sys.exit(main()) + except subprocess.CalledProcessError as e: + if e.stderr is not None: + sys.stderr.write(e.stderr.decode("utf-8")) + + sys.stderr.write("error: %s\n" % e) + sys.exit(e.returncode) diff --git a/test/run_test_suite.py b/test/run_pipe_suite.py index 457e7f81..65a894c4 100755 --- a/test/run_test_suite.py +++ b/test/run_pipe_suite.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -"""Run an RDF test suite with serdi.""" +"""Run an RDF test suite with serd-pipe.""" import serd_test_util @@ -41,7 +41,14 @@ def test_thru( osyntax, command_prefix, ): - """Test lossless round-tripping through two different syntaxes.""" + """Test rewriting a file in the input syntax. + + This rewrites a source test file in the original fancy syntax, then + rewrites that output again in the simple syntax used for test output + (NTriples or NQuads). Checking the final output against the expected test + output tests that piping the file through serd with pretty-printing was + lossless. + """ assert isyntax is not None assert osyntax is not None @@ -54,51 +61,45 @@ def test_thru( command_prefix + [f for sublist in flags for f in sublist] + [ - "-i", + "-B", + base_uri, + "-I", isyntax, - "-o", + "-O", isyntax, - "-w", + "-o", out_path, - "-I", - base_uri, path, ] ) + subprocess.run(out_cmd, check=True) + thru_cmd = ( command_prefix + test_osyntax_options(osyntax) + [ - "-i", + "-B", + base_uri, + "-I", isyntax, - "-o", + "-O", + "ascii", + "-O", osyntax, - "-w", - thru_path, "-o", - "ascii", - "-I", - base_uri, + thru_path, out_path, ] ) - subprocess.run(out_cmd, check=True) subprocess.run(thru_cmd, check=True) - with open(thru_path, "wb") as out: - subprocess.run(thru_cmd, check=True, stdout=out) - - if not _file_equals(check_path, thru_path): - log_error( - "Round-tripped output {} does not match {}\n".format( - check_path, thru_path - ) - ) - return 1 + if serd_test_util.file_equals(check_path, thru_path): + return 0 - return 0 + log_error("Rewritten {} differs from {}\n".format(thru_path, check_path)) + return 1 def _uri_path(uri): @@ -107,36 +108,6 @@ def _uri_path(uri): return path if not drive else path[1:] -def _test_input_syntax(test_class): - """Return the output syntax use for a given test class.""" - - if "NTriples" in test_class: - return "NTriples" - - if "Turtle" in test_class: - return "Turtle" - - if "NQuads" in test_class: - return "NQuads" - - if "Trig" in test_class: - return "Trig" - - raise Exception("Unknown test class <{}>".format(test_class)) - - -def _test_output_syntax(test_class): - """Return the output syntax use for a given test class.""" - - if "NTriples" in test_class or "Turtle" in test_class: - return "NTriples" - - if "NQuads" in test_class or "Trig" in test_class: - return "NQuads" - - raise Exception("Unknown test class <{}>".format(test_class)) - - def _option_combinations(options): """Return an iterator that cycles through all combinations of options.""" @@ -147,49 +118,6 @@ def _option_combinations(options): return itertools.cycle(combinations) -def _show_diff(from_lines, to_lines, from_filename, to_filename): - same = True - for line in difflib.unified_diff( - from_lines, - to_lines, - fromfile=os.path.abspath(from_filename), - tofile=os.path.abspath(to_filename), - ): - sys.stderr.write(line) - same = False - - return same - - -def _file_equals(patha, pathb): - - for path in (patha, pathb): - if not os.access(path, os.F_OK): - log_error("missing file {}\n".format(path)) - return False - - with open(patha, "r", encoding="utf-8") as fa: - with open(pathb, "r", encoding="utf-8") as fb: - return _show_diff(fa.readlines(), fb.readlines(), patha, pathb) - - -def _file_lines_equal(patha, pathb, subst_from="", subst_to=""): - import io - - for path in (patha, pathb): - if not os.access(path, os.F_OK): - sys.stderr.write("error: missing file %s" % path) - return False - - la = sorted(set(io.open(patha, encoding="utf-8").readlines())) - lb = sorted(set(io.open(pathb, encoding="utf-8").readlines())) - if la != lb: - _show_diff(la, lb, patha, pathb) - return False - - return True - - def test_suite( manifest_path, base_uri, @@ -204,7 +132,7 @@ def test_suite( mf = "http://www.w3.org/2001/sw/DataAccess/tests/test-manifest#" test_dir = os.path.dirname(manifest_path) model, instances = serd_test_util.load_rdf( - command_prefix + ["-I", base_uri], manifest_path + command_prefix + ["-B", base_uri], manifest_path ) asserter = "" @@ -217,17 +145,21 @@ def test_suite( self.n_failures = 0 def run_tests(test_class, tests, expected_return, results): - thru_flags = [["-f"], ["-b", "1"], ["-r", "http://example.org/"]] + thru_flags = [ + ["-R", "http://example.org/"], + ["-b", "1"], + ["-b", "16384"], + ] thru_options_iter = _option_combinations(thru_flags) if output_syntax is not None: osyntax = output_syntax else: - osyntax = _test_output_syntax(test_class) + osyntax = serd_test_util.test_output_syntax(test_class) if input_syntax is not None: isyntax = input_syntax else: - isyntax = _test_input_syntax(test_class) + isyntax = serd_test_util.test_input_syntax(test_class) for test in sorted(tests): test_uri = model[test][mf + "action"][0] @@ -236,11 +168,11 @@ def test_suite( test_path = os.path.join(test_dir, test_name) command = command_prefix + [ - "-o", + "-O", osyntax, - "-o", + "-O", "ascii", - "-I", + "-B", test_uri, test_path, ] @@ -272,7 +204,7 @@ def test_suite( check_filename = os.path.basename(_uri_path(check_uri)) check_path = os.path.join(test_dir, check_filename) - if not _file_equals(check_path, out_filename): + if not serd_test_util.file_equals(check_path, out_filename): results.n_failures += 1 log_error( "Output {} does not match {}\n".format( @@ -292,32 +224,6 @@ def test_suite( command_prefix, ) - # Run model test for positive test (must succeed) - out_filename = os.path.join( - out_test_dir, test_name + ".model.out" - ) - - model_command = command_prefix + [ - "-m", - "-o", - osyntax, - "-o", - "ascii", - "-w", - out_filename, - "-I", - test_uri, - test_path, - ] - - proc = subprocess.run(model_command, check=True) - - if proc.returncode == 0 and ( - (mf + "result") in model[test] - ): - if not _file_lines_equal(check_path, out_filename): - results.n_failures += 1 - else: # Negative test with open(out_filename, "w") as stdout: with tempfile.TemporaryFile() as stderr: @@ -380,13 +286,13 @@ def main(): """Run the command line tool.""" parser = argparse.ArgumentParser( - usage="%(prog)s [OPTION]... MANIFEST BASE_URI -- [SERDI_OPTION]...", + usage="%(prog)s [OPTION]... MANIFEST BASE_URI -- [TOOL_OPTION]...", description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter, ) parser.add_argument("--report", help="path to write result report to") - parser.add_argument("--serdi", default="serdi", help="path to serdi") + parser.add_argument("--tool", default="tools/serd-pipe", help="executable") parser.add_argument("--syntax", default=None, help="input syntax") parser.add_argument("--osyntax", default=None, help="output syntax") parser.add_argument("--wrapper", default="", help="executable wrapper") @@ -394,14 +300,11 @@ def main(): parser.add_argument("base_uri", help="base URI for tests") parser.add_argument( - "serdi_option", nargs=argparse.REMAINDER, help="option for serdi" + "tool_option", nargs=argparse.REMAINDER, help="option to pass to tool" ) args = parser.parse_args(sys.argv[1:]) - - command_prefix = ( - shlex.split(args.wrapper) + [args.serdi] + args.serdi_option - ) + command_prefix = shlex.split(args.wrapper) + [args.tool] + args.tool_option with tempfile.TemporaryDirectory() as test_out_dir: return test_suite( diff --git a/test/run_sort_suite.py b/test/run_sort_suite.py new file mode 100755 index 00000000..27205665 --- /dev/null +++ b/test/run_sort_suite.py @@ -0,0 +1,234 @@ +#!/usr/bin/env python3 + +"""Run an RDF test suite with serd-sort.""" + +import serd_test_util + +import argparse +import datetime +import difflib +import itertools +import os +import re +import shlex +import subprocess +import sys +import tempfile +import urllib.parse + + +def log_error(message): + """Log an error message to stderr""" + + sys.stderr.write("error: ") + sys.stderr.write(message) + + +def _uri_path(uri): + path = urllib.parse.urlparse(uri).path + drive = os.path.splitdrive(path[1:])[0] + return path if not drive else path[1:] + + +def _file_lines_equal(patha, pathb, subst_from="", subst_to=""): + import io + + for path in (patha, pathb): + if not os.access(path, os.F_OK): + sys.stderr.write("error: missing file %s" % path) + return False + + la = sorted(set(io.open(patha, encoding="utf-8").readlines())) + lb = sorted(set(io.open(pathb, encoding="utf-8").readlines())) + if la != lb: + serd_test_util.show_diff(la, lb, patha, pathb) + return False + + return True + + +def _add_extension(filename, extension): + first_dot = filename.find(".") + + return filename[0:first_dot] + extension + filename[first_dot:] + + +def test_suite( + manifest_path, + base_uri, + report_filename, + input_syntax, + output_syntax, + command_prefix, + out_test_dir, +): + """Run all tests in a test suite manifest.""" + + mf = "http://www.w3.org/2001/sw/DataAccess/tests/test-manifest#" + test_dir = os.path.dirname(manifest_path) + model, instances = serd_test_util.load_rdf( + command_prefix + ["-B", base_uri], manifest_path + ) + + asserter = "" + if os.getenv("USER") == "drobilla": + asserter = "http://drobilla.net/drobilla#me" + + class Results: + def __init__(self): + self.n_tests = 0 + self.n_failures = 0 + + def run_tests(test_class, tests, expected_return, results): + osyntax = output_syntax + if osyntax is None: + osyntax = serd_test_util.test_output_syntax(test_class) + + isyntax = input_syntax + if isyntax is None: + isyntax = serd_test_util.test_input_syntax(test_class) + + for test in sorted(tests): + test_uri = model[test][mf + "action"][0] + test_uri_path = _uri_path(test_uri) + test_name = os.path.basename(test_uri_path) + test_path = os.path.join(test_dir, test_name) + + command = command_prefix + [ + "-B", + test_uri, + "-O", + osyntax, + "-O", + "ascii", + test_path, + ] + + command_string = " ".join(shlex.quote(c) for c in command) + out_filename = os.path.join( + out_test_dir, _add_extension(test_name, ".sort") + ) + + results.n_tests += 1 + + if expected_return == 0: # Positive test + + with open(out_filename, "w") as stdout: + proc = subprocess.run(command, check=False, stdout=stdout) + passed = proc.returncode == expected_return + if not passed: + results.n_failures += 1 + log_error( + "Unexpected failure of command: {}\n".format( + command_string + ) + ) + + if passed and (mf + "result") in model[test]: + # Check output against expected output from test suite + check_uri = model[test][mf + "result"][0] + check_filename = os.path.basename(_uri_path(check_uri)) + check_path = os.path.join(test_dir, check_filename) + + if not _file_lines_equal(check_path, out_filename): + results.n_failures += 1 + log_error( + "Output {} differs from {}\n".format( + out_filename, check_path + ) + ) + + else: # Negative test + + with tempfile.TemporaryFile() as stderr: + with open(out_filename, "w") as stdout: + proc = subprocess.run( + command, check=False, stdout=stdout, stderr=stderr + ) + + passed = proc.returncode != 0 + if passed: + # Check that an error message was printed + stderr.seek(0, 2) # Seek to end + if stderr.tell() == 0: # Empty + results.n_failures += 1 + log_error("No error: {}\n".format(command_string)) + + else: + results.n_failures += 1 + log_error("Should fail: {}\n".format(command_string)) + + # Write test report entry + if report_filename: + with open(report_filename, "a") as report: + report.write( + serd_test_util.earl_assertion(test, passed, asserter) + ) + + # Run all test types in the test suite + results = Results() + ns_rdftest = "http://www.w3.org/ns/rdftest#" + for test_class, instances in instances.items(): + if test_class.startswith(ns_rdftest): + expected = ( + 1 + if "lax" not in command_prefix and "Negative" in test_class + else 0 + ) + run_tests(test_class, instances, expected, results) + + # Print result summary + if results.n_failures > 0: + log_error( + "{}/{} tests failed\n".format(results.n_failures, results.n_tests) + ) + else: + sys.stdout.write("All {} tests passed\n".format(results.n_tests)) + + return results.n_failures + + +def main(): + """Run the command line tool.""" + + parser = argparse.ArgumentParser( + usage="%(prog)s [OPTION]... MANIFEST BASE_URI -- [TOOL_OPTION]...", + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + + parser.add_argument("--report", help="path to write result report to") + parser.add_argument("--tool", default="tools/serd-sort", help="executable") + parser.add_argument("--syntax", default=None, help="input syntax") + parser.add_argument("--osyntax", default=None, help="output syntax") + parser.add_argument("--wrapper", default="", help="executable wrapper") + parser.add_argument("manifest", help="test suite manifest.ttl file") + parser.add_argument("base_uri", help="base URI for tests") + parser.add_argument( + "tool_option", nargs=argparse.REMAINDER, help="option to pass to tool" + ) + + args = parser.parse_args(sys.argv[1:]) + command_prefix = shlex.split(args.wrapper) + [args.tool] + args.tool_option + + with tempfile.TemporaryDirectory() as test_out_dir: + return test_suite( + args.manifest, + args.base_uri, + args.report, + args.syntax, + args.osyntax, + command_prefix, + test_out_dir, + ) + + +if __name__ == "__main__": + try: + sys.exit(main()) + except subprocess.CalledProcessError as e: + if e.stderr is not None: + sys.stderr.write(e.stderr.decode("utf-8")) + + sys.stderr.write("error: %s\n" % e) + sys.exit(e.returncode) diff --git a/test/serd_test_util/__init__.py b/test/serd_test_util/__init__.py index f0b1c19a..45cc6e64 100644 --- a/test/serd_test_util/__init__.py +++ b/test/serd_test_util/__init__.py @@ -3,8 +3,48 @@ """Utilities for data-driven tests.""" import datetime +import difflib +import os import re import subprocess +import sys + + +def error(message): + """Log an error message to stderr""" + + sys.stderr.write("error: ") + sys.stderr.write(message) + + +def test_input_syntax(test_class): + """Return the output syntax use for a given test class.""" + + if "NTriples" in test_class: + return "NTriples" + + if "Turtle" in test_class: + return "Turtle" + + if "NQuads" in test_class: + return "NQuads" + + if "Trig" in test_class: + return "Trig" + + raise Exception("Unknown test class <{}>".format(test_class)) + + +def test_output_syntax(test_class): + """Return the output syntax use for a given test class.""" + + if "NTriples" in test_class or "Turtle" in test_class: + return "NTriples" + + if "NQuads" in test_class or "Trig" in test_class: + return "NQuads" + + raise Exception("Unknown test class <{}>".format(test_class)) def earl_assertion(test, passed, asserter): @@ -33,7 +73,7 @@ def earl_assertion(test, passed, asserter): def load_rdf(command_prefix, filename): - """Load an RDF file as dictionaries via serdi (only supports URIs).""" + """Load an RDF file as dictionaries via serd-pipe (only supports URIs).""" rdf_type = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type" model = {} @@ -61,3 +101,32 @@ def load_rdf(command_prefix, filename): instances[o].update([s]) return model, instances + + +def file_equals(patha, pathb): + """Return true if the file at patha is the same as the file at pathb.""" + + for path in (patha, pathb): + if not os.access(path, os.F_OK): + error("missing file {}\n".format(path)) + return False + + with open(patha, "r", encoding="utf-8") as fa: + with open(pathb, "r", encoding="utf-8") as fb: + return show_diff(fa.readlines(), fb.readlines(), patha, pathb) + + +def show_diff(from_lines, to_lines, from_filename, to_filename): + """Print a diff between files to stderr.""" + + same = True + for line in difflib.unified_diff( + from_lines, + to_lines, + fromfile=os.path.abspath(from_filename), + tofile=os.path.abspath(to_filename), + ): + sys.stderr.write(line) + same = False + + return same diff --git a/test/sort/GOPS.nq b/test/sort/GOPS.nq new file mode 100644 index 00000000..c7472e03 --- /dev/null +++ b/test/sort/GOPS.nq @@ -0,0 +1,10 @@ +_:b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> "1"^^<http://www.w3.org/2001/XMLSchema#integer> <http://example.org/graph1> . +_:b3 <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> "2"^^<http://www.w3.org/2001/XMLSchema#integer> <http://example.org/graph1> . +<http://example.org/s> <http://example.org/literal> "s1" <http://example.org/graph1> . +_:b1 <http://example.org/with> <http://example.org/aProperty> <http://example.org/graph1> . +_:b1 <http://example.org/with> <http://example.org/orAnother> <http://example.org/graph1> . +_:b3 <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> <http://www.w3.org/1999/02/22-rdf-syntax-ns#nil> <http://example.org/graph1> . +<http://example.org/s> <http://example.org/blank> _:b1 <http://example.org/graph1> . +<http://example.org/s> <http://example.org/list> _:b2 <http://example.org/graph1> . +_:b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> _:b3 <http://example.org/graph1> . +<http://example.org/a> <http://example.org/b> <http://example.org/c> <http://example.org/graph2> . diff --git a/test/sort/GOSP.nq b/test/sort/GOSP.nq new file mode 100644 index 00000000..c7472e03 --- /dev/null +++ b/test/sort/GOSP.nq @@ -0,0 +1,10 @@ +_:b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> "1"^^<http://www.w3.org/2001/XMLSchema#integer> <http://example.org/graph1> . +_:b3 <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> "2"^^<http://www.w3.org/2001/XMLSchema#integer> <http://example.org/graph1> . +<http://example.org/s> <http://example.org/literal> "s1" <http://example.org/graph1> . +_:b1 <http://example.org/with> <http://example.org/aProperty> <http://example.org/graph1> . +_:b1 <http://example.org/with> <http://example.org/orAnother> <http://example.org/graph1> . +_:b3 <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> <http://www.w3.org/1999/02/22-rdf-syntax-ns#nil> <http://example.org/graph1> . +<http://example.org/s> <http://example.org/blank> _:b1 <http://example.org/graph1> . +<http://example.org/s> <http://example.org/list> _:b2 <http://example.org/graph1> . +_:b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> _:b3 <http://example.org/graph1> . +<http://example.org/a> <http://example.org/b> <http://example.org/c> <http://example.org/graph2> . diff --git a/test/sort/GPSO.nq b/test/sort/GPSO.nq new file mode 100644 index 00000000..1a858017 --- /dev/null +++ b/test/sort/GPSO.nq @@ -0,0 +1,10 @@ +<http://example.org/s> <http://example.org/blank> _:b1 <http://example.org/graph1> . +<http://example.org/s> <http://example.org/list> _:b2 <http://example.org/graph1> . +<http://example.org/s> <http://example.org/literal> "s1" <http://example.org/graph1> . +_:b1 <http://example.org/with> <http://example.org/aProperty> <http://example.org/graph1> . +_:b1 <http://example.org/with> <http://example.org/orAnother> <http://example.org/graph1> . +_:b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> "1"^^<http://www.w3.org/2001/XMLSchema#integer> <http://example.org/graph1> . +_:b3 <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> "2"^^<http://www.w3.org/2001/XMLSchema#integer> <http://example.org/graph1> . +_:b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> _:b3 <http://example.org/graph1> . +_:b3 <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> <http://www.w3.org/1999/02/22-rdf-syntax-ns#nil> <http://example.org/graph1> . +<http://example.org/a> <http://example.org/b> <http://example.org/c> <http://example.org/graph2> . diff --git a/test/sort/GSOP.nq b/test/sort/GSOP.nq new file mode 100644 index 00000000..fc073a00 --- /dev/null +++ b/test/sort/GSOP.nq @@ -0,0 +1,10 @@ +<http://example.org/s> <http://example.org/literal> "s1" <http://example.org/graph1> . +<http://example.org/s> <http://example.org/blank> _:b1 <http://example.org/graph1> . +<http://example.org/s> <http://example.org/list> _:b2 <http://example.org/graph1> . +_:b1 <http://example.org/with> <http://example.org/aProperty> <http://example.org/graph1> . +_:b1 <http://example.org/with> <http://example.org/orAnother> <http://example.org/graph1> . +_:b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> "1"^^<http://www.w3.org/2001/XMLSchema#integer> <http://example.org/graph1> . +_:b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> _:b3 <http://example.org/graph1> . +_:b3 <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> "2"^^<http://www.w3.org/2001/XMLSchema#integer> <http://example.org/graph1> . +_:b3 <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> <http://www.w3.org/1999/02/22-rdf-syntax-ns#nil> <http://example.org/graph1> . +<http://example.org/a> <http://example.org/b> <http://example.org/c> <http://example.org/graph2> . diff --git a/test/sort/GSPO.nq b/test/sort/GSPO.nq new file mode 100644 index 00000000..726b1d42 --- /dev/null +++ b/test/sort/GSPO.nq @@ -0,0 +1,10 @@ +<http://example.org/s> <http://example.org/blank> _:b1 <http://example.org/graph1> . +<http://example.org/s> <http://example.org/list> _:b2 <http://example.org/graph1> . +<http://example.org/s> <http://example.org/literal> "s1" <http://example.org/graph1> . +_:b1 <http://example.org/with> <http://example.org/aProperty> <http://example.org/graph1> . +_:b1 <http://example.org/with> <http://example.org/orAnother> <http://example.org/graph1> . +_:b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> "1"^^<http://www.w3.org/2001/XMLSchema#integer> <http://example.org/graph1> . +_:b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> _:b3 <http://example.org/graph1> . +_:b3 <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> "2"^^<http://www.w3.org/2001/XMLSchema#integer> <http://example.org/graph1> . +_:b3 <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> <http://www.w3.org/1999/02/22-rdf-syntax-ns#nil> <http://example.org/graph1> . +<http://example.org/a> <http://example.org/b> <http://example.org/c> <http://example.org/graph2> . diff --git a/test/sort/OPS.nq b/test/sort/OPS.nq new file mode 100644 index 00000000..456ade7f --- /dev/null +++ b/test/sort/OPS.nq @@ -0,0 +1,10 @@ +_:b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> "1"^^<http://www.w3.org/2001/XMLSchema#integer> <http://example.org/graph1> . +_:b3 <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> "2"^^<http://www.w3.org/2001/XMLSchema#integer> <http://example.org/graph1> . +<http://example.org/s> <http://example.org/literal> "s1" <http://example.org/graph1> . +_:b1 <http://example.org/with> <http://example.org/aProperty> <http://example.org/graph1> . +<http://example.org/a> <http://example.org/b> <http://example.org/c> <http://example.org/graph2> . +_:b1 <http://example.org/with> <http://example.org/orAnother> <http://example.org/graph1> . +_:b3 <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> <http://www.w3.org/1999/02/22-rdf-syntax-ns#nil> <http://example.org/graph1> . +<http://example.org/s> <http://example.org/blank> _:b1 <http://example.org/graph1> . +<http://example.org/s> <http://example.org/list> _:b2 <http://example.org/graph1> . +_:b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> _:b3 <http://example.org/graph1> . diff --git a/test/sort/OSP.nq b/test/sort/OSP.nq new file mode 100644 index 00000000..456ade7f --- /dev/null +++ b/test/sort/OSP.nq @@ -0,0 +1,10 @@ +_:b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> "1"^^<http://www.w3.org/2001/XMLSchema#integer> <http://example.org/graph1> . +_:b3 <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> "2"^^<http://www.w3.org/2001/XMLSchema#integer> <http://example.org/graph1> . +<http://example.org/s> <http://example.org/literal> "s1" <http://example.org/graph1> . +_:b1 <http://example.org/with> <http://example.org/aProperty> <http://example.org/graph1> . +<http://example.org/a> <http://example.org/b> <http://example.org/c> <http://example.org/graph2> . +_:b1 <http://example.org/with> <http://example.org/orAnother> <http://example.org/graph1> . +_:b3 <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> <http://www.w3.org/1999/02/22-rdf-syntax-ns#nil> <http://example.org/graph1> . +<http://example.org/s> <http://example.org/blank> _:b1 <http://example.org/graph1> . +<http://example.org/s> <http://example.org/list> _:b2 <http://example.org/graph1> . +_:b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> _:b3 <http://example.org/graph1> . diff --git a/test/sort/POS.nq b/test/sort/POS.nq new file mode 100644 index 00000000..51c675de --- /dev/null +++ b/test/sort/POS.nq @@ -0,0 +1,10 @@ +<http://example.org/a> <http://example.org/b> <http://example.org/c> <http://example.org/graph2> . +<http://example.org/s> <http://example.org/blank> _:b1 <http://example.org/graph1> . +<http://example.org/s> <http://example.org/list> _:b2 <http://example.org/graph1> . +<http://example.org/s> <http://example.org/literal> "s1" <http://example.org/graph1> . +_:b1 <http://example.org/with> <http://example.org/aProperty> <http://example.org/graph1> . +_:b1 <http://example.org/with> <http://example.org/orAnother> <http://example.org/graph1> . +_:b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> "1"^^<http://www.w3.org/2001/XMLSchema#integer> <http://example.org/graph1> . +_:b3 <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> "2"^^<http://www.w3.org/2001/XMLSchema#integer> <http://example.org/graph1> . +_:b3 <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> <http://www.w3.org/1999/02/22-rdf-syntax-ns#nil> <http://example.org/graph1> . +_:b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> _:b3 <http://example.org/graph1> . diff --git a/test/sort/PSO.nq b/test/sort/PSO.nq new file mode 100644 index 00000000..0fb7bd68 --- /dev/null +++ b/test/sort/PSO.nq @@ -0,0 +1,10 @@ +<http://example.org/a> <http://example.org/b> <http://example.org/c> <http://example.org/graph2> . +<http://example.org/s> <http://example.org/blank> _:b1 <http://example.org/graph1> . +<http://example.org/s> <http://example.org/list> _:b2 <http://example.org/graph1> . +<http://example.org/s> <http://example.org/literal> "s1" <http://example.org/graph1> . +_:b1 <http://example.org/with> <http://example.org/aProperty> <http://example.org/graph1> . +_:b1 <http://example.org/with> <http://example.org/orAnother> <http://example.org/graph1> . +_:b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> "1"^^<http://www.w3.org/2001/XMLSchema#integer> <http://example.org/graph1> . +_:b3 <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> "2"^^<http://www.w3.org/2001/XMLSchema#integer> <http://example.org/graph1> . +_:b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> _:b3 <http://example.org/graph1> . +_:b3 <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> <http://www.w3.org/1999/02/22-rdf-syntax-ns#nil> <http://example.org/graph1> . diff --git a/test/sort/SOP.nq b/test/sort/SOP.nq new file mode 100644 index 00000000..1692689c --- /dev/null +++ b/test/sort/SOP.nq @@ -0,0 +1,10 @@ +<http://example.org/a> <http://example.org/b> <http://example.org/c> <http://example.org/graph2> . +<http://example.org/s> <http://example.org/literal> "s1" <http://example.org/graph1> . +<http://example.org/s> <http://example.org/blank> _:b1 <http://example.org/graph1> . +<http://example.org/s> <http://example.org/list> _:b2 <http://example.org/graph1> . +_:b1 <http://example.org/with> <http://example.org/aProperty> <http://example.org/graph1> . +_:b1 <http://example.org/with> <http://example.org/orAnother> <http://example.org/graph1> . +_:b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> "1"^^<http://www.w3.org/2001/XMLSchema#integer> <http://example.org/graph1> . +_:b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> _:b3 <http://example.org/graph1> . +_:b3 <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> "2"^^<http://www.w3.org/2001/XMLSchema#integer> <http://example.org/graph1> . +_:b3 <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> <http://www.w3.org/1999/02/22-rdf-syntax-ns#nil> <http://example.org/graph1> . diff --git a/test/sort/SPO.nq b/test/sort/SPO.nq new file mode 100644 index 00000000..508debc7 --- /dev/null +++ b/test/sort/SPO.nq @@ -0,0 +1,10 @@ +<http://example.org/a> <http://example.org/b> <http://example.org/c> <http://example.org/graph2> . +<http://example.org/s> <http://example.org/blank> _:b1 <http://example.org/graph1> . +<http://example.org/s> <http://example.org/list> _:b2 <http://example.org/graph1> . +<http://example.org/s> <http://example.org/literal> "s1" <http://example.org/graph1> . +_:b1 <http://example.org/with> <http://example.org/aProperty> <http://example.org/graph1> . +_:b1 <http://example.org/with> <http://example.org/orAnother> <http://example.org/graph1> . +_:b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> "1"^^<http://www.w3.org/2001/XMLSchema#integer> <http://example.org/graph1> . +_:b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> _:b3 <http://example.org/graph1> . +_:b3 <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> "2"^^<http://www.w3.org/2001/XMLSchema#integer> <http://example.org/graph1> . +_:b3 <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> <http://www.w3.org/1999/02/22-rdf-syntax-ns#nil> <http://example.org/graph1> . diff --git a/test/sort/input.trig b/test/sort/input.trig new file mode 100644 index 00000000..154a9fb8 --- /dev/null +++ b/test/sort/input.trig @@ -0,0 +1,19 @@ +@prefix eg: <http://example.org/> . + +eg:graph1 { +eg:s + eg:blank [ + eg:with eg:aProperty , + eg:orAnother + ] ; + eg:list ( + 1 + 2 + ) ; + eg:literal "s1" . +} + +eg:graph2 { +eg:a + eg:b eg:c . +} diff --git a/test/sort/pretty.nq b/test/sort/pretty.nq new file mode 100644 index 00000000..451247d4 --- /dev/null +++ b/test/sort/pretty.nq @@ -0,0 +1,10 @@ +<http://example.org/s> <http://example.org/blank> _:b1 <http://example.org/graph1> . +_:b1 <http://example.org/with> <http://example.org/aProperty> <http://example.org/graph1> . +_:b1 <http://example.org/with> <http://example.org/orAnother> <http://example.org/graph1> . +<http://example.org/s> <http://example.org/list> _:b2 <http://example.org/graph1> . +_:b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> "1"^^<http://www.w3.org/2001/XMLSchema#integer> <http://example.org/graph1> . +_:b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> _:b3 <http://example.org/graph1> . +_:b3 <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> "2"^^<http://www.w3.org/2001/XMLSchema#integer> <http://example.org/graph1> . +_:b3 <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> <http://www.w3.org/1999/02/22-rdf-syntax-ns#nil> <http://example.org/graph1> . +<http://example.org/s> <http://example.org/literal> "s1" <http://example.org/graph1> . +<http://example.org/a> <http://example.org/b> <http://example.org/c> <http://example.org/graph2> . diff --git a/test/terse/meson.build b/test/terse/meson.build index 538a8d16..b0516fcb 100644 --- a/test/terse/meson.build +++ b/test/terse/meson.build @@ -1,16 +1,23 @@ base_uri = 'http://drobilla.net/sw/serd/test/terse/' +args = [ + '--osyntax', 'turtle', + files('manifest.ttl'), + base_uri, + '--', + '-O', 'terse' +] + +test('terse', + run_pipe_suite, + args: pipe_test_script_args + args, + env: test_env, + suite: ['suite', 'extra', 'pipe'], + timeout: 240) + test('terse', - run_test_suite, - args: script_args + [ - '--osyntax', 'turtle', - files('manifest.ttl'), - base_uri, - '--', - '-o', - 'terse', - ], + run_sort_suite, + args: sort_test_script_args + args, env: test_env, - is_parallel: false, - suite: ['suite', 'extra'], + suite: ['suite', 'extra', 'sort'], timeout: 240) diff --git a/test/test_base.py b/test/test_base.py new file mode 100755 index 00000000..c3018da3 --- /dev/null +++ b/test/test_base.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python3 + +"""Test reading from stdin with serd-pipe.""" + +import argparse +import sys +import shlex +import subprocess +import tempfile + +parser = argparse.ArgumentParser(description=__doc__) + +parser.add_argument("--tool", default="tools/serd-pipe", help="executable") +parser.add_argument("--wrapper", default="", help="executable wrapper") + +args = parser.parse_args(sys.argv[1:]) +command = shlex.split(args.wrapper) + [ + args.tool, + "-B", + "http://example.org", + "-I", + "turtle", + "-", +] + +IN_DOCUMENT = "<s> <p> <o> ." +OUT_DOCUMENT = "<{0}s> <{0}p> <{0}o> .".format("http://example.org/") + +with tempfile.TemporaryFile() as out: + proc = subprocess.run( + command, + check=False, + encoding="utf-8", + input=IN_DOCUMENT, + stdout=out, + stderr=subprocess.PIPE, + ) + + assert proc.returncode == 0 + assert args.wrapper or len(proc.stderr) == 0 + + out.seek(0) + lines = out.readlines() + + assert len(lines) == 1 + assert lines[0].decode("utf-8").strip() == OUT_DOCUMENT diff --git a/test/test_empty.py b/test/test_empty.py index a7978e6c..03264d8c 100755 --- a/test/test_empty.py +++ b/test/test_empty.py @@ -10,12 +10,12 @@ import tempfile parser = argparse.ArgumentParser(description=__doc__) -parser.add_argument("--serdi", default="./serdi", help="path to serdi") +parser.add_argument("--tool", default="tools/serd-read", help="executable") parser.add_argument("--wrapper", default="", help="executable wrapper") parser.add_argument("input", help="valid input file") args = parser.parse_args(sys.argv[1:]) -command = shlex.split(args.wrapper) + [args.serdi, "-o", "empty", args.input] +command = shlex.split(args.wrapper) + [args.tool, "-O", "empty", args.input] with tempfile.TemporaryFile() as out: diff --git a/test/test_filter.py b/test/test_filter.py index d44677f5..5f25f22e 100755 --- a/test/test_filter.py +++ b/test/test_filter.py @@ -21,7 +21,7 @@ DOCUMENTS = { parser = argparse.ArgumentParser(description=__doc__) -parser.add_argument("--serdi", default="./serdi", help="path to serdi") +parser.add_argument("--tool", default="tools/serd-filter", help="executable") parser.add_argument("--wrapper", default="", help="executable wrapper") args = parser.parse_args(sys.argv[1:]) @@ -29,21 +29,21 @@ args = parser.parse_args(sys.argv[1:]) def check_pattern(syntax, pattern, result): command = shlex.split(args.wrapper) + [ - args.serdi, - "-i", + args.tool, + "-I", syntax, - "-F", + "-v", pattern, - "-s", - DOCUMENTS[syntax], + "-", ] with tempfile.TemporaryFile() as out: proc = subprocess.run( command, + capture_output=True, check=False, encoding="utf-8", - capture_output=True, + input=DOCUMENTS[syntax], ) assert proc.returncode == 0 diff --git a/test/test_grep.py b/test/test_grep.py index 0c8c5228..44c3ce1f 100755 --- a/test/test_grep.py +++ b/test/test_grep.py @@ -21,7 +21,7 @@ DOCUMENTS = { parser = argparse.ArgumentParser(description=__doc__) -parser.add_argument("--serdi", default="./serdi", help="path to serdi") +parser.add_argument("--tool", default="tools/serd-filter", help="executable") parser.add_argument("--wrapper", default="", help="executable wrapper") args = parser.parse_args(sys.argv[1:]) @@ -29,21 +29,20 @@ args = parser.parse_args(sys.argv[1:]) def check_pattern(syntax, pattern, result): command = shlex.split(args.wrapper) + [ - args.serdi, - "-i", + args.tool, + "-I", syntax, - "-G", pattern, - "-s", - DOCUMENTS[syntax], + "-", ] with tempfile.TemporaryFile() as out: proc = subprocess.run( command, + capture_output=True, check=False, encoding="utf-8", - capture_output=True, + input=DOCUMENTS[syntax], ) assert proc.returncode == 0 diff --git a/test/test_multifile.py b/test/test_multifile.py index 5fb44bc5..c5e11bf3 100755 --- a/test/test_multifile.py +++ b/test/test_multifile.py @@ -12,7 +12,7 @@ import tempfile parser = argparse.ArgumentParser(description=__doc__) -parser.add_argument("--serdi", default="./serdi", help="path to serdi") +parser.add_argument("--tool", default="tools/serd-pipe", help="executable") parser.add_argument("--wrapper", default="", help="executable wrapper") parser.add_argument("testdir", help="multifile test directory") @@ -20,7 +20,7 @@ args = parser.parse_args(sys.argv[1:]) in1_path = os.path.join(args.testdir, "input1.ttl") in2_path = os.path.join(args.testdir, "input2.trig") check_path = os.path.join(args.testdir, "output.nq") -command = shlex.split(args.wrapper) + [args.serdi, in1_path, in2_path] +command = shlex.split(args.wrapper) + [args.tool, in1_path, in2_path] def _show_diff(from_lines, to_lines, from_filename, to_filename): diff --git a/test/test_node_syntax.c b/test/test_node_syntax.c index a9829688..9875e656 100644 --- a/test/test_node_syntax.c +++ b/test/test_node_syntax.c @@ -66,6 +66,9 @@ test_common(const SerdSyntax syntax) assert(test(syntax, serd_new_token(SERD_BLANK, SERD_STRING("b0")), "_:b0")); + assert(test( + syntax, serd_new_token(SERD_BLANK, SERD_STRING("named1")), "_:named1")); + assert(test(syntax, serd_new_uri(SERD_STRING("http://example.org/")), "<http://example.org/>")); diff --git a/test/test_quiet.py b/test/test_quiet.py index 7f141943..b88f0270 100755 --- a/test/test_quiet.py +++ b/test/test_quiet.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -"""Test serdi quiet option.""" +"""Test quiet command-line option.""" import argparse import sys @@ -9,12 +9,12 @@ import subprocess parser = argparse.ArgumentParser(description=__doc__) -parser.add_argument("--serdi", default="./serdi", help="path to serdi") +parser.add_argument("--tool", default="tools/serd-pipe", help="executable") parser.add_argument("--wrapper", default="", help="executable wrapper") parser.add_argument("input", help="invalid input file") args = parser.parse_args(sys.argv[1:]) -command = shlex.split(args.wrapper) + [args.serdi, "-q", args.input] +command = shlex.split(args.wrapper) + [args.tool, "-q", args.input] proc = subprocess.run(command, check=False, capture_output=True) assert proc.returncode != 0 diff --git a/test/test_sort.py b/test/test_sort.py new file mode 100755 index 00000000..4080b93c --- /dev/null +++ b/test/test_sort.py @@ -0,0 +1,112 @@ +#!/usr/bin/env python3 + +"""Run the collation tests for serd-sort.""" + +import argparse +import os +import random +import shlex +import subprocess +import sys +import tempfile + +import serd_test_util + +collations = [ + "GOPS", + "GOSP", + "GPSO", + "GSOP", + "GSPO", + "OPS", + "OSP", + "POS", + "PSO", + "SOP", + "SPO", + "pretty", +] + + +def check(test_dir, command_prefix, out_dir, input_path, name): + """Sort a single input in the named order and check the output. + + The expected output is assumed to exist at test_dir/NAME.nq. + """ + + output_path = os.path.join(out_dir, name + ".nq") + result_path = os.path.join(test_dir, name + ".nq") + options = [] if name == "pretty" else ["-c", name] + + # Randomly add irrelevant options just to cover them + if random.choice([True, False]): + options += ["-R", "http://example.org/"] + if random.choice([True, False]): + options += ["-I", "TriG"] + + command = command_prefix + options + ["-o", output_path, input_path] + + proc = subprocess.run(command, capture_output=True, check=False) + if proc.returncode != 0: + cmd_string = " ".join(shlex.quote(c) for c in command) + serd_test_util.error("Unexpected failure: {}".format(cmd_string)) + sys.stderr.write(proc.stderr.decode("utf-8")) + return False + + if not serd_test_util.file_equals(result_path, output_path): + serd_test_util.error( + "Output {} differs from {}\n".format(output_path, result_path) + ) + return False + + return True + + +def run_tests(test_dir, command_prefix, out_dir): + """Run all the tests in the suite.""" + + input_trig = os.path.join(test_dir, "input.trig") + + n_failures = 0 + for name in collations: + if not check(test_dir, command_prefix, out_dir, input_trig, name): + n_failures += 1 + + return n_failures + + +def main(): + """Run the command line tool.""" + + parser = argparse.ArgumentParser( + usage="%(prog)s [OPTION]... INPUT", + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + + parser.add_argument( + "--tool", default="tools/serd-sort", help="serd-sort executable" + ) + + parser.add_argument("--wrapper", default="", help="executable wrapper") + parser.add_argument( + "input", help="path to input.trig in the test directory" + ) + + args = parser.parse_args(sys.argv[1:]) + wrapper_prefix = shlex.split(args.wrapper) + command_prefix = wrapper_prefix + [args.tool] + + with tempfile.TemporaryDirectory() as out_dir: + return run_tests(os.path.dirname(args.input), command_prefix, out_dir) + + +if __name__ == "__main__": + try: + sys.exit(main()) + except subprocess.CalledProcessError as error: + if error.stderr is not None: + sys.stderr.write(error.stderr.decode("utf-8")) + + sys.stderr.write("error: %s\n" % error) + sys.exit(error.returncode) diff --git a/test/test_stdin.py b/test/test_stdin.py index 11b1ca21..2161a95a 100755 --- a/test/test_stdin.py +++ b/test/test_stdin.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -"""Test reading from stdin with serdi.""" +"""Test reading from stdin with serd-pipe.""" import argparse import sys @@ -10,15 +10,15 @@ import tempfile parser = argparse.ArgumentParser(description=__doc__) -parser.add_argument("--serdi", default="./serdi", help="path to serdi") +parser.add_argument("--tool", default="tools/serd-pipe", help="executable") parser.add_argument("--wrapper", default="", help="executable wrapper") args = parser.parse_args(sys.argv[1:]) command = shlex.split(args.wrapper) + [ - args.serdi, - "-I", + args.tool, + "-B", "http://example.org", - "-i", + "-I", "ntriples", "-", ] diff --git a/test/test_write_error.py b/test/test_write_error.py index 35b4693b..bc955ce9 100755 --- a/test/test_write_error.py +++ b/test/test_write_error.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -"""Test errors writing to a file.""" +"""Test errors when writing to a file.""" import argparse import sys @@ -10,12 +10,12 @@ import os parser = argparse.ArgumentParser(description=__doc__) -parser.add_argument("--serdi", default="./serdi", help="path to serdi") +parser.add_argument("--tool", default="tools/serd-pipe", help="executable") parser.add_argument("--wrapper", default="", help="executable wrapper") parser.add_argument("input", help="valid input file") args = parser.parse_args(sys.argv[1:]) -command = shlex.split(args.wrapper) + [args.serdi, args.input] +command = shlex.split(args.wrapper) + [args.tool, args.input] if os.path.exists("/dev/full"): diff --git a/tools/console.c b/tools/console.c index ea5fd7ee..f1e78d75 100644 --- a/tools/console.c +++ b/tools/console.c @@ -26,9 +26,64 @@ # include <io.h> #endif +#include <errno.h> +#include <limits.h> #include <stdint.h> +#include <stdlib.h> #include <string.h> +SerdStatus +serd_tool_setup(SerdTool* const tool, + const char* const program, + SerdCommonOptions options) +{ + // Open the output first, since if that fails we have nothing to do + const char* const out_path = options.out_filename; + if (!(tool->out = serd_open_output(out_path, options.block_size))) { + fprintf(stderr, + "%s: failed to open output file (%s)\n", + program, + strerror(errno)); + return SERD_ERR_UNKNOWN; + } + + // We have something to write to, so build the writing environment + if (!(tool->world = serd_world_new()) || + !(tool->env = + serd_create_env(program, options.base_uri, options.out_filename)) || + !(tool->writer = serd_writer_new( + tool->world, + serd_choose_syntax( + tool->world, options.output, options.out_filename, SERD_NQUADS), + options.output.flags, + tool->env, + tool->out))) { + fprintf(stderr, "%s: failed to set up writing environment\n", program); + return SERD_ERR_INTERNAL; + } + + return SERD_SUCCESS; +} + +SerdStatus +serd_tool_cleanup(const SerdTool tool) +{ + SerdStatus st = SERD_SUCCESS; + if (tool.out) { + // Close the output stream explicitly to check if there were any errors + if (serd_byte_sink_close(tool.out)) { + perror("write error"); + st = SERD_ERR_BAD_WRITE; + } + } + + serd_writer_free(tool.writer); + serd_env_free(tool.env); + serd_world_free(tool.world); + serd_byte_sink_free(tool.out); + return st; +} + void serd_set_stream_utf8_mode(FILE* const stream) { @@ -39,7 +94,7 @@ serd_set_stream_utf8_mode(FILE* const stream) #endif } -int +SerdStatus serd_print_version(const char* const program) { printf("%s %d.%d.%d <http://drobilla.net/software/serd>\n", @@ -53,7 +108,43 @@ serd_print_version(const char* const program) "This is free software; you are free to change and redistribute it.\n" "There is NO WARRANTY, to the extent permitted by law.\n"); - return 0; + return SERD_FAILURE; +} + +SerdStatus +serd_get_argument(OptionIter* const iter, const char** const argument) +{ + const char flag = iter->argv[iter->a][iter->f++]; + + if (iter->argv[iter->a][iter->f] || (iter->a + 1) == iter->argc) { + fprintf( + stderr, "%s: option requires an argument -- %c\n", iter->argv[0], flag); + return SERD_ERR_BAD_ARG; + } + + *argument = iter->argv[++iter->a]; + ++iter->a; + iter->f = 1; + return SERD_SUCCESS; +} + +SerdStatus +serd_get_size_argument(OptionIter* const iter, size_t* const argument) +{ + SerdStatus st = SERD_SUCCESS; + const char* string = NULL; + if ((st = serd_get_argument(iter, &string))) { + return st; + } + + char* endptr = NULL; + const long size = strtol(string, &endptr, 10); + if (size <= 0 || size == LONG_MAX || *endptr != '\0') { + return SERD_ERR_BAD_ARG; + } + + *argument = (size_t)size; + return SERD_SUCCESS; } SerdStatus @@ -89,8 +180,26 @@ serd_set_input_option(const SerdStringView name, } } - // SERDI_ERRORF("invalid input option `%s'\n", name.buf); - return SERD_FAILURE; + return SERD_ERR_BAD_ARG; +} + +SerdStatus +serd_parse_input_argument(OptionIter* const iter, + SerdSyntaxOptions* const options) +{ + SerdStatus st = SERD_SUCCESS; + const char* argument = NULL; + + if (!(st = serd_get_argument(iter, &argument))) { + if ((st = serd_set_input_option( + SERD_STRING(argument), &options->syntax, &options->flags))) { + fprintf(stderr, "%s: unknown option \"%s\"\n", iter->argv[0], argument); + } else if (!strcmp(argument, "empty") || options->syntax) { + options->overridden = true; + } + } + + return st; } SerdStatus @@ -126,16 +235,90 @@ serd_set_output_option(const SerdStringView name, } } + return SERD_ERR_BAD_ARG; +} + +SerdStatus +serd_parse_output_argument(OptionIter* const iter, + SerdSyntaxOptions* const options) +{ + SerdStatus st = SERD_SUCCESS; + const char* argument = NULL; + + if (!(st = serd_get_argument(iter, &argument))) { + if ((st = serd_set_output_option( + SERD_STRING(argument), &options->syntax, &options->flags))) { + fprintf(stderr, "%s: unknown option \"%s\"\n", iter->argv[0], argument); + } else if (!strcmp(argument, "empty") || options->syntax) { + options->overridden = true; + } + } + + return st; +} + +SerdStatus +serd_parse_common_option(OptionIter* const iter, SerdCommonOptions* const opts) +{ + const char opt = iter->argv[iter->a][iter->f]; + switch (opt) { + case 'B': + return serd_get_argument(iter, &opts->base_uri); + + case 'I': + return serd_parse_input_argument(iter, &opts->input); + + case 'O': + return serd_parse_output_argument(iter, &opts->output); + + case 'b': + return serd_get_size_argument(iter, &opts->block_size); + + case 'k': + return serd_get_size_argument(iter, &opts->stack_size); + + case 'o': + return serd_get_argument(iter, &opts->out_filename); + + default: + break; + } + return SERD_FAILURE; } +SerdEnv* +serd_create_env(const char* const program, + const char* const base_string, + const char* const out_filename) +{ + const bool is_rebase = base_string && !strcmp(base_string, "rebase"); + if (is_rebase && !out_filename) { + fprintf(stderr, "%s: rebase requires an output filename\n", program); + return NULL; + } + + if (base_string && serd_uri_string_has_scheme(base_string)) { + return serd_env_new(SERD_STRING(base_string)); + } + + SerdEnv* const env = serd_env_new(SERD_EMPTY_STRING()); + serd_set_base_uri_from_path(env, is_rebase ? out_filename : base_string); + return env; +} + SerdSyntax -serd_choose_input_syntax(SerdWorld* const world, - const SerdSyntax requested, - const char* const filename) +serd_choose_syntax(SerdWorld* const world, + const SerdSyntaxOptions options, + const char* const filename, + const SerdSyntax fallback) { - if (requested) { - return requested; + if (options.overridden || options.syntax != SERD_SYNTAX_EMPTY) { + return options.syntax; + } + + if (!filename || !strcmp(filename, "-")) { + return fallback; } const SerdSyntax guessed = serd_guess_syntax(filename); @@ -202,17 +385,90 @@ serd_open_output(const char* const filename, const size_t block_size) SerdStatus serd_set_base_uri_from_path(SerdEnv* const env, const char* const path) { - char* const input_path = serd_canonical_path(path); - if (!input_path) { + const size_t path_len = path ? strlen(path) : 0u; + if (!path_len) { return SERD_ERR_BAD_ARG; } - SerdNode* const file_uri = - serd_new_file_uri(SERD_STRING(input_path), SERD_EMPTY_STRING()); + char* const real_path = serd_canonical_path(path); + if (!real_path) { + return SERD_ERR_BAD_ARG; + } + + const size_t real_path_len = strlen(real_path); + SerdNode* base_node = NULL; + if (path[path_len - 1] == '/' || path[path_len - 1] == '\\') { + char* const base_path = (char*)calloc(real_path_len + 2, 1); + memcpy(base_path, real_path, real_path_len); + base_path[real_path_len] = path[path_len - 1]; + + base_node = serd_new_file_uri(SERD_STRING(base_path), SERD_EMPTY_STRING()); + free(base_path); + } else { + base_node = serd_new_file_uri(SERD_STRING(real_path), SERD_EMPTY_STRING()); + } - serd_env_set_base_uri(env, serd_node_string_view(file_uri)); - serd_node_free(file_uri); - serd_free(input_path); + serd_env_set_base_uri(env, serd_node_string_view(base_node)); + serd_node_free(base_node); + serd_free(real_path); return SERD_SUCCESS; } + +SerdStatus +serd_read_source(SerdWorld* const world, + const SerdCommonOptions opts, + SerdEnv* const env, + const SerdSyntax syntax, + SerdByteSource* const in, + const SerdSink* const sink) +{ + SerdReader* const reader = serd_reader_new( + world, syntax, opts.input.flags, env, sink, opts.stack_size); + + SerdStatus st = serd_reader_start(reader, in); + if (!st) { + st = serd_reader_read_document(reader); + } + + serd_reader_free(reader); + return st; +} + +SerdStatus +serd_read_inputs(SerdWorld* const world, + const SerdCommonOptions opts, + SerdEnv* const env, + const intptr_t n_inputs, + char* const* const inputs, + const SerdSink* const sink) +{ + SerdStatus st = SERD_SUCCESS; + + for (intptr_t i = 0; !st && i < n_inputs; ++i) { + // Use the filename as the base URI if possible if user didn't override it + const char* const in_path = inputs[i]; + if (!opts.base_uri[0] && strcmp(in_path, "-")) { + serd_set_base_uri_from_path(env, in_path); + } + + // Open the input stream + SerdByteSource* const in = serd_open_input(in_path, opts.block_size); + if (!in) { + return SERD_ERR_BAD_ARG; + } + + // Read the entire file + st = serd_read_source( + world, + opts, + env, + serd_choose_syntax(world, opts.input, in_path, SERD_TRIG), + in, + sink); + + serd_byte_source_free(in); + } + + return st; +} diff --git a/tools/console.h b/tools/console.h index 16f6fd14..cb227e8e 100644 --- a/tools/console.h +++ b/tools/console.h @@ -16,28 +16,108 @@ #include "serd/serd.h" +#include <stdbool.h> +#include <stdint.h> #include <stdio.h> +// Iterator over command-line options with support for BSD-style flag merging +typedef struct { + char* const* argv; ///< Complete argument vector (from main) + int argc; ///< Total number of arguments (from main) + int a; ///< Argument index (index into argv) + int f; ///< Flag index (offset in argv[arg]) +} OptionIter; + +// Options for the input or output syntax +typedef struct { + SerdSyntax syntax; ///< User-specified syntax, or empty + uint32_t flags; ///< SerdReaderFlags or SerdWriterFlags + bool overridden; ///< True if syntax was explicitly given +} SerdSyntaxOptions; + +// Options common to all command-line tools +typedef struct { + const char* base_uri; + const char* out_filename; + size_t block_size; + size_t stack_size; + SerdSyntaxOptions input; + SerdSyntaxOptions output; +} SerdCommonOptions; + +// Common "global" state of a command-line tool that writes data +typedef struct { + SerdByteSink* out; + SerdWorld* world; + SerdEnv* env; + SerdWriter* writer; +} SerdTool; + +static inline bool +serd_option_iter_is_end(const OptionIter iter) +{ + return iter.a >= iter.argc || iter.argv[iter.a][0] != '-' || + !iter.argv[iter.a][iter.f]; +} + +static inline SerdStatus +serd_option_iter_advance(OptionIter* const iter) +{ + if (!iter->argv[iter->a][++iter->f]) { + ++iter->a; + iter->f = 1; + } + + return SERD_SUCCESS; +} + +SerdStatus +serd_tool_setup(SerdTool* tool, const char* program, SerdCommonOptions options); + +SerdStatus +serd_tool_cleanup(SerdTool tool); + void serd_set_stream_utf8_mode(FILE* stream); -int +SerdStatus serd_print_version(const char* program); SerdStatus +serd_get_argument(OptionIter* iter, const char** argument); + +SerdStatus +serd_get_size_argument(OptionIter* iter, size_t* argument); + +SerdStatus serd_set_input_option(SerdStringView name, SerdSyntax* syntax, SerdReaderFlags* flags); SerdStatus +serd_parse_input_argument(OptionIter* iter, SerdSyntaxOptions* options); + +SerdStatus serd_set_output_option(SerdStringView name, SerdSyntax* syntax, SerdWriterFlags* flags); +SerdStatus +serd_parse_output_argument(OptionIter* iter, SerdSyntaxOptions* options); + +SerdStatus +serd_parse_common_option(OptionIter* iter, SerdCommonOptions* opts); + +SerdEnv* +serd_create_env(const char* program, + const char* base_string, + const char* out_filename); + SerdSyntax -serd_choose_input_syntax(SerdWorld* world, - SerdSyntax requested, - const char* filename); +serd_choose_syntax(SerdWorld* world, + SerdSyntaxOptions options, + const char* filename, + SerdSyntax fallback); SerdByteSource* serd_open_input(const char* filename, size_t block_size); @@ -47,3 +127,19 @@ serd_open_output(const char* filename, size_t block_size); SerdStatus serd_set_base_uri_from_path(SerdEnv* env, const char* path); + +SerdStatus +serd_read_source(SerdWorld* world, + SerdCommonOptions opts, + SerdEnv* env, + SerdSyntax syntax, + SerdByteSource* in, + const SerdSink* sink); + +SerdStatus +serd_read_inputs(SerdWorld* world, + SerdCommonOptions opts, + SerdEnv* env, + intptr_t n_inputs, + char* const* inputs, + const SerdSink* sink); diff --git a/tools/meson.build b/tools/meson.build index 3054364a..d4964784 100644 --- a/tools/meson.build +++ b/tools/meson.build @@ -5,9 +5,29 @@ if get_option('static') tool_link_args += ['-static'] endif -serdi = executable('serdi', - ['serdi.c', 'console.c'], - c_args: tool_c_args, - link_args: tool_link_args, - install: true, - dependencies: serd_dep) +tools = [ + 'filter', + 'pipe', + 'sort', +] + +serd_filter = executable('serd-filter', + ['serd-filter.c', 'console.c'], + c_args: tool_c_args, + link_args: tool_link_args, + install: true, + dependencies: serd_dep) + +serd_pipe = executable('serd-pipe', + ['serd-pipe.c', 'console.c'], + c_args: tool_c_args, + link_args: tool_link_args, + install: true, + dependencies: serd_dep) + +serd_sort = executable('serd-sort', + ['serd-sort.c', 'console.c'], + c_args: tool_c_args, + link_args: tool_link_args, + install: true, + dependencies: serd_dep) diff --git a/tools/serd-filter.c b/tools/serd-filter.c new file mode 100644 index 00000000..789d3149 --- /dev/null +++ b/tools/serd-filter.c @@ -0,0 +1,287 @@ +/* + Copyright 2011-2021 David Robillard <d@drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "console.h" + +#include "serd/serd.h" + +#include <stdarg.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> + +/* Application (after parsing command-line arguments) */ + +// All options +typedef struct { + SerdCommonOptions common; + const char* pattern; + const char* pattern_file; + char* const* inputs; + intptr_t n_inputs; + bool invert; +} Options; + +// A single statement pattern +typedef struct { + SerdNode* s; + SerdNode* p; + SerdNode* o; + SerdNode* g; +} FilterPattern; + +// Handler for events read from a pattern +static SerdStatus +on_pattern_event(void* const handle, const SerdEvent* const event) +{ + if (event->type == SERD_STATEMENT) { + FilterPattern* const pat = (FilterPattern*)handle; + if (pat->s) { + return SERD_ERR_INVALID; + } + + const SerdStatement* const statement = event->statement.statement; + pat->s = serd_node_copy(serd_statement_subject(statement)); + pat->p = serd_node_copy(serd_statement_predicate(statement)); + pat->o = serd_node_copy(serd_statement_object(statement)); + pat->g = serd_node_copy(serd_statement_graph(statement)); + } + + return SERD_SUCCESS; +} + +// Parse a pattern from some input and return a new filter for it +static SerdSink* +parse_pattern(SerdWorld* const world, + const SerdSink* const sink, + SerdByteSource* const byte_source, + const bool inclusive) +{ + SerdEnv* const env = serd_env_new(SERD_EMPTY_STRING()); + FilterPattern pat = {NULL, NULL, NULL, NULL}; + SerdSink* in_sink = serd_sink_new(&pat, on_pattern_event, NULL); + SerdReader* reader = serd_reader_new( + world, SERD_NQUADS, SERD_READ_VARIABLES, env, in_sink, 4096); + + SerdStatus st = serd_reader_start(reader, byte_source); + if (!st) { + st = serd_reader_read_document(reader); + } + + serd_reader_free(reader); + serd_env_free(env); + serd_sink_free(in_sink); + + if (st) { + serd_logf(world, + SERD_LOG_LEVEL_ERROR, + "failed to parse pattern (%s)", + serd_strerror(st)); + return NULL; + } + + SerdSink* filter = + serd_filter_new(sink, pat.s, pat.p, pat.o, pat.g, inclusive); + + serd_node_free(pat.s); + serd_node_free(pat.p); + serd_node_free(pat.o); + serd_node_free(pat.g); + return filter; +} + +SERD_LOG_FUNC(2, 3) +static SerdStatus +log_error(SerdWorld* const world, const char* const fmt, ...) +{ + va_list args; + va_start(args, fmt); + + const SerdLogField file = {"SERD_FILE", "serd-filter"}; + const SerdStatus st = + serd_vxlogf(world, SERD_LOG_LEVEL_ERROR, 1, &file, fmt, args); + + va_end(args); + return st; +} + +// Run the tool using the given options +static SerdStatus +run(Options opts) +{ + SerdTool app = {NULL, NULL, NULL, NULL}; + + // Set up the writing environment + SerdStatus st = SERD_SUCCESS; + if ((st = serd_tool_setup(&app, "serd-filter", opts.common))) { + serd_tool_cleanup(app); + return st; + } + + const SerdSink* const target = serd_writer_sink(app.writer); + + // Open the pattern input (either a string or filename) + SerdByteSource* const pattern = + opts.pattern ? serd_byte_source_new_string(opts.pattern, NULL) + : opts.pattern_file + ? serd_byte_source_new_filename(opts.pattern_file, opts.common.block_size) + : NULL; + if (!pattern) { + log_error(app.world, "failed to open pattern"); + return SERD_ERR_UNKNOWN; + } + + // Set up the output pipeline: filter -> writer + SerdSink* const filter = + parse_pattern(app.world, target, pattern, !opts.invert); + if (!filter) { + log_error(app.world, "failed to set up filter"); + return SERD_ERR_UNKNOWN; + } + + // Read all the inputs, which drives the writer to emit the output + if (!(st = serd_read_inputs(app.world, + opts.common, + app.env, + opts.n_inputs, + opts.inputs, + filter))) { + st = serd_writer_finish(app.writer); + } + + if (st) { + log_error(app.world, "failed to read input (%s)", serd_strerror(st)); + } + + serd_sink_free(filter); + serd_byte_source_free(pattern); + + const SerdStatus cst = serd_tool_cleanup(app); + return st ? st : cst; +} + +/* Command-line interface (before setting up serd) */ + +static int +print_usage(const char* const name, const bool error) +{ + static const char* const description = + "Search for statements matching PATTERN in each INPUT.\n" + "INPUT can be a local filename, or \"-\" to read from standard input.\n\n" + " -B BASE_URI Base URI or path for resolving relative references.\n" + " -I SYNTAX Input syntax (turtle/ntriples/trig/nquads),\n" + " or option (lax/variables/relative/global/generated).\n" + " -O SYNTAX Output syntax (empty/turtle/ntriples/nquads),\n" + " or option (ascii/expanded/verbatim/terse/lax).\n" + " -V Display version information and exit.\n" + " -f PATTERN_FILE Read pattern from PATTERN_FILE instead.\n" + " -h Display this help and exit.\n" + " -k BYTES Parser stack size.\n" + " -o FILENAME Write output to FILENAME instead of stdout.\n" + " -v Invert filter to select non-matching statements.\n"; + + FILE* const os = error ? stderr : stdout; + fprintf(os, "%s", error ? "\n" : ""); + fprintf(os, "Usage: %s [OPTION]... PATTERN INPUT...\n", name); + fprintf(os, " %s [OPTION]... -f PATTERN_FILE INPUT...\n", name); + fprintf(os, "\n%s", description); + return error ? EXIT_FAILURE : EXIT_SUCCESS; +} + +// Parse the option pointed to by `iter`, and advance it to the next one +static SerdStatus +parse_option(OptionIter* const iter, Options* const opts) +{ +#define ARG_ERRORF(fmt, ...) \ + fprintf(stderr, "%s: " fmt, iter->argv[0], __VA_ARGS__) + + SerdStatus st = serd_parse_common_option(iter, &opts->common); + if (st != SERD_FAILURE) { + return st; + } + + const char opt = iter->argv[iter->a][iter->f]; + + switch (opt) { + case 'V': + return serd_print_version("serd-filter"); + + case 'f': + return serd_get_argument(iter, &opts->pattern_file); + + case 'h': + print_usage(iter->argv[0], false); + return SERD_FAILURE; + + case 'v': + opts->invert = true; + return serd_option_iter_advance(iter); + + default: + break; + } + + ARG_ERRORF("invalid option -- '%c'\n", opt); + return SERD_ERR_BAD_ARG; + +#undef ARG_ERRORF +} + +int +main(int argc, char** argv) +{ + Options opts = {{"", + NULL, + 4096u, + 1048576u, + {SERD_SYNTAX_EMPTY, 0u, false}, + {SERD_NQUADS, 0u, false}}, + NULL, + NULL, + NULL, + 0u, + false}; + + // Parse all command line options (which must precede inputs) + SerdStatus st = SERD_SUCCESS; + OptionIter iter = {argv, argc, 1, 1}; + while (!serd_option_iter_is_end(iter)) { + if ((st = parse_option(&iter, &opts))) { + return (st == SERD_FAILURE) ? 0 : print_usage(argv[0], true); + } + } + + // If -f isn't used, then the first positional argument is the pattern + if (!opts.pattern_file) { + opts.pattern = argv[iter.a++]; + } + + // Every argument past that is an input + opts.inputs = argv + iter.a; + opts.n_inputs = argc - iter.a; + if (opts.n_inputs == 0) { + fprintf(stderr, "%s: missing input\n", argv[0]); + return print_usage(argv[0], true); + } + + // Don't add prefixes to blank node labels if there is only one input + if (opts.n_inputs == 1) { + opts.common.input.flags |= SERD_READ_GLOBAL; + } + + return run(opts) > SERD_FAILURE; +} diff --git a/tools/serd-pipe.c b/tools/serd-pipe.c new file mode 100644 index 00000000..75b3e0d4 --- /dev/null +++ b/tools/serd-pipe.c @@ -0,0 +1,209 @@ +/* + Copyright 2011-2021 David Robillard <d@drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "console.h" + +#include "serd/serd.h" + +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> + +/* Application (after parsing command-line arguments) */ + +// All options +typedef struct { + SerdCommonOptions common; + const char* root_uri; + const char* input_string; + char* const* inputs; + intptr_t n_inputs; + bool canonical; + bool quiet; +} Options; + +// Run the tool using the given options +static SerdStatus +run(const Options opts) +{ + SerdTool app = {NULL, NULL, NULL, NULL}; + + // Set up the writing environment + SerdStatus st = SERD_SUCCESS; + if ((st = serd_tool_setup(&app, "serd-pipe", opts.common))) { + serd_tool_cleanup(app); + return st; + } + + if (opts.quiet) { + serd_set_log_func(app.world, serd_quiet_log_func, NULL); + } + + serd_writer_set_root_uri(app.writer, SERD_STRING(opts.root_uri)); + + // Set up the output pipeline: [canon] -> writer + const SerdSink* const target = serd_writer_sink(app.writer); + const SerdSink* sink = target; + SerdSink* canon = NULL; + if (opts.canonical) { + canon = serd_canon_new(app.world, target, opts.common.input.flags); + sink = canon; + } + + if (opts.input_string) { + SerdByteSource* const in = + serd_byte_source_new_string(opts.input_string, NULL); + + st = serd_read_source( + app.world, + opts.common, + app.env, + serd_choose_syntax(app.world, opts.common.input, NULL, SERD_TRIG), + in, + sink); + + serd_byte_source_free(in); + } + + // Read all the inputs, which drives the writer to emit the output + if (st || + (st = serd_read_inputs( + app.world, opts.common, app.env, opts.n_inputs, opts.inputs, sink)) || + (st = serd_writer_finish(app.writer))) { + serd_tool_cleanup(app); + return st; + } + + return serd_tool_cleanup(app); +} + +/* Command-line interface (before setting up serd) */ + +static int +print_usage(const char* const name, const bool error) +{ + static const char* const description = + "Read and write RDF data.\n" + "INPUT can be a local filename, or \"-\" to read from standard input.\n\n" + " -B BASE_URI Base URI or path for resolving relative references.\n" + " -C Convert literals to canonical form.\n" + " -I SYNTAX Input syntax (turtle/ntriples/trig/nquads),\n" + " or option (lax/variables/relative/global/generated).\n" + " -O SYNTAX Output syntax (empty/turtle/ntriples/nquads),\n" + " or option (ascii/expanded/verbatim/terse/lax).\n" + " -R ROOT_URI Keep relative URIs within ROOT_URI.\n" + " -V Display version information and exit.\n" + " -b BYTES I/O block size.\n" + " -h Display this help and exit.\n" + " -k BYTES Parser stack size.\n" + " -o FILENAME Write output to FILENAME instead of stdout.\n" + " -q Suppress warning and error output.\n" + " -s STRING Parse STRING as input.\n"; + + FILE* const os = error ? stderr : stdout; + fprintf(os, "%s", error ? "\n" : ""); + fprintf(os, "Usage: %s [OPTION]... INPUT...\n", name); + fprintf(os, "%s", description); + return error; +} + +// Parse the option pointed to by `iter`, and advance it to the next one +static SerdStatus +parse_option(OptionIter* const iter, Options* const opts) +{ +#define ARG_ERRORF(fmt, ...) \ + fprintf(stderr, "%s: " fmt, iter->argv[0], __VA_ARGS__) + + SerdStatus st = serd_parse_common_option(iter, &opts->common); + if (st != SERD_FAILURE) { + return st; + } + + const char opt = iter->argv[iter->a][iter->f]; + switch (opt) { + case 'C': + opts->canonical = true; + return serd_option_iter_advance(iter); + + case 'R': + return serd_get_argument(iter, &opts->root_uri); + + case 'V': + return serd_print_version("serd-pipe"); + + case 'h': + print_usage(iter->argv[0], false); + return SERD_FAILURE; + + case 'q': + opts->quiet = true; + return serd_option_iter_advance(iter); + + case 's': + return serd_get_argument(iter, &opts->input_string); + + default: + break; + } + + ARG_ERRORF("invalid option -- '%c'\n", opt); + return SERD_ERR_BAD_ARG; + +#undef ARG_ERRORF +} + +int +main(const int argc, char* const* const argv) +{ + char* const default_input[] = {"-"}; + + Options opts = {{"", + NULL, + 4096u, + 1048576u, + {SERD_SYNTAX_EMPTY, 0u, false}, + {SERD_SYNTAX_EMPTY, 0u, false}}, + "", + NULL, + NULL, + 0u, + false, + false}; + + // Parse all command line options (which must precede inputs) + SerdStatus st = SERD_SUCCESS; + OptionIter iter = {argv, argc, 1, 1}; + while (!serd_option_iter_is_end(iter)) { + if ((st = parse_option(&iter, &opts))) { + return (st == SERD_FAILURE) ? 0 : print_usage(argv[0], true); + } + } + + // Every argument past the last option is an input + opts.inputs = argv + iter.a; + opts.n_inputs = argc - iter.a; + if (opts.n_inputs + (bool)opts.input_string == 0) { + opts.n_inputs = 1; + opts.inputs = default_input; + } + + // Don't add prefixes to blank node labels if there is only one input + if (opts.n_inputs + (bool)opts.input_string == 1) { + opts.common.input.flags |= SERD_READ_GLOBAL; + } + + return run(opts) > SERD_FAILURE; +} diff --git a/tools/serd-sort.c b/tools/serd-sort.c new file mode 100644 index 00000000..deb79cb5 --- /dev/null +++ b/tools/serd-sort.c @@ -0,0 +1,274 @@ +/* + Copyright 2011-2021 David Robillard <d@drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "console.h" + +#include "serd/serd.h" + +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <string.h> + +/* Application (after parsing command-line arguments) */ + +// All options +typedef struct { + SerdCommonOptions common; + const char* root_uri; + const char* input_string; + const char* collation; + char* const* inputs; + intptr_t n_inputs; + SerdStatementOrder order; + SerdDescribeFlags flags; +} Options; + +static bool +input_has_graphs(const Options opts) +{ + if (opts.common.input.syntax) { + return serd_syntax_has_graphs(opts.common.input.syntax); + } + + for (intptr_t i = 0u; i < opts.n_inputs; ++i) { + if (serd_syntax_has_graphs(serd_guess_syntax(opts.inputs[i]))) { + return true; + } + } + + return false; +} + +// Run the tool using the given options +static SerdStatus +run(const Options opts) +{ + SerdTool app = {NULL, NULL, NULL, NULL}; + + // Set up the writing environment + SerdStatus st = SERD_SUCCESS; + if ((st = serd_tool_setup(&app, "serd-sort", opts.common))) { + serd_tool_cleanup(app); + return st; + } + + // Determine the default order to store statements in the model + const bool with_graphs = input_has_graphs(opts); + const SerdStatementOrder default_order = opts.collation ? opts.order + : with_graphs ? SERD_ORDER_GSPO + : SERD_ORDER_SPO; + + const SerdModelFlags flags = + (SerdModelFlags)(with_graphs * SERD_STORE_GRAPHS); + + SerdModel* const model = serd_model_new(app.world, default_order, flags); + + if (!opts.collation) { + // If we are pretty-printing, we need an O** index + serd_model_add_index(model, SERD_ORDER_OPS); + + if (with_graphs) { + // If we have graphs we still need the SPO index for finding subjects + serd_model_add_index(model, SERD_ORDER_SPO); + } + } + + // Read all the inputs into an inserter to load the model + SerdSink* const inserter = serd_inserter_new(model, NULL); + if (st || (st = serd_read_inputs(app.world, + opts.common, + app.env, + opts.n_inputs, + opts.inputs, + inserter))) { + serd_tool_cleanup(app); + return st; + } + + // Write the model to the output + const SerdSink* const target = serd_writer_sink(app.writer); + if (opts.collation) { + SerdCursor* const cursor = serd_model_begin_ordered(model, opts.order); + + serd_env_write_prefixes(app.env, target); + + for (const SerdStatement* statement = NULL; + !st && (statement = serd_cursor_get(cursor)); + serd_cursor_advance(cursor)) { + st = serd_sink_write_statement(target, 0u, statement); + } + + serd_cursor_free(cursor); + } else { + SerdCursor* const cursor = serd_model_begin(model); + + serd_env_write_prefixes(app.env, target); + + st = serd_describe_range(cursor, target, opts.flags); + + serd_cursor_free(cursor); + } + + if (!st) { + st = serd_writer_finish(app.writer); + } + + const SerdStatus cst = serd_tool_cleanup(app); + return st ? st : cst; +} + +/* Command-line interface (before setting up serd) */ + +static SerdStatus +parse_statement_order(const char* const string, SerdStatementOrder* const order) +{ + static const char* const strings[] = {"SPO", + "SOP", + "OPS", + "OSP", + "PSO", + "POS", + "GSPO", + "GSOP", + "GOPS", + "GOSP", + "GPSO", + "GPOS", + NULL}; + + for (unsigned i = 0; strings[i]; ++i) { + if (!strcmp(string, strings[i])) { + *order = (SerdStatementOrder)i; + return SERD_SUCCESS; + } + } + + return SERD_ERR_BAD_ARG; +} + +static int +print_usage(const char* const name, const bool error) +{ + static const char* const description = + "Reorder RDF data by loading everything into a model then writing it.\n" + "INPUT can be a local filename, or \"-\" to read from standard input.\n\n" + " -B BASE_URI Base URI or path for resolving relative references.\n" + " -I SYNTAX Input syntax (turtle/ntriples/trig/nquads),\n" + " or option (lax/variables/relative/global/generated).\n" + " -O SYNTAX Output syntax (empty/turtle/ntriples/nquads),\n" + " or option (ascii/expanded/verbatim/terse/lax).\n" + " -V Display version information and exit.\n" + " -b BYTES I/O block size.\n" + " -c COLLATION An optional \"G\" then the letters \"SPO\" in any order.\n" + " -h Display this help and exit.\n" + " -k BYTES Parser stack size.\n" + " -o FILENAME Write output to FILENAME instead of stdout.\n"; + + FILE* const os = error ? stderr : stdout; + fprintf(os, "%s", error ? "\n" : ""); + fprintf(os, "Usage: %s [OPTION]... INPUT...\n", name); + fprintf(os, "%s", description); + return error; +} + +// Parse the option pointed to by `iter`, and advance it to the next one +static SerdStatus +parse_option(OptionIter* const iter, Options* const opts) +{ +#define ARG_ERRORF(fmt, ...) \ + fprintf(stderr, "%s: " fmt, iter->argv[0], __VA_ARGS__) + + SerdStatus st = serd_parse_common_option(iter, &opts->common); + if (st != SERD_FAILURE) { + return st; + } + + const char opt = iter->argv[iter->a][iter->f]; + switch (opt) { + case 'R': + return serd_get_argument(iter, &opts->root_uri); + + case 'V': + return serd_print_version("serd-sort"); + + case 'c': + if (!(st = serd_get_argument(iter, &opts->collation))) { + if ((st = parse_statement_order(opts->collation, &opts->order))) { + ARG_ERRORF("unknown collation \"%s\"\n", opts->collation); + return st; + } + } + return st; + + case 'h': + print_usage(iter->argv[0], false); + return SERD_FAILURE; + + case 's': + return serd_get_argument(iter, &opts->input_string); + + default: + break; + } + + ARG_ERRORF("invalid option -- '%c'\n", opt); + return SERD_ERR_BAD_ARG; + +#undef ARG_ERRORF +} + +int +main(const int argc, char* const* const argv) +{ + Options opts = {{"", + NULL, + 4096u, + 1048576u, + {SERD_SYNTAX_EMPTY, 0u, false}, + {SERD_SYNTAX_EMPTY, 0u, false}}, + "", + NULL, + NULL, + NULL, + 0u, + SERD_ORDER_SPO, + 0u}; + + // Parse all command line options (which must precede inputs) + SerdStatus st = SERD_SUCCESS; + OptionIter iter = {argv, argc, 1, 1}; + while (!serd_option_iter_is_end(iter)) { + if ((st = parse_option(&iter, &opts))) { + return (st == SERD_FAILURE) ? 0 : print_usage(argv[0], true); + } + } + + // Every argument past the last option is an input + opts.inputs = argv + iter.a; + opts.n_inputs = argc - iter.a; + if (opts.n_inputs + (bool)opts.input_string == 0) { + fprintf(stderr, "%s: missing input\n", argv[0]); + return print_usage(argv[0], true); + } + + // Don't add prefixes to blank node labels if there is only one input + if (opts.n_inputs + (bool)opts.input_string == 1) { + opts.common.input.flags |= SERD_READ_GLOBAL; + } + + return run(opts) > SERD_FAILURE; +} diff --git a/tools/serdi.c b/tools/serdi.c deleted file mode 100644 index b1542727..00000000 --- a/tools/serdi.c +++ /dev/null @@ -1,502 +0,0 @@ -/* - Copyright 2011-2021 David Robillard <d@drobilla.net> - - Permission to use, copy, modify, and/or distribute this software for any - purpose with or without fee is hereby granted, provided that the above - copyright notice and this permission notice appear in all copies. - - THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. -*/ - -#include "console.h" - -#include "serd/serd.h" - -#include <errno.h> -#include <limits.h> -#include <stdbool.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - -#define SERDI_ERROR(msg) fprintf(stderr, "serdi: " msg) -#define SERDI_ERRORF(fmt, ...) fprintf(stderr, "serdi: " fmt, __VA_ARGS__) - -typedef struct { - SerdNode* s; - SerdNode* p; - SerdNode* o; - SerdNode* g; -} FilterPattern; - -static int -print_usage(const char* const name, const bool error) -{ - static const char* const description = - "Read and write RDF syntax.\n" - "Use - for INPUT to read from standard input.\n\n" - " -C Convert literals to canonical form.\n" - " -F PATTERN Filter out statements that match PATTERN.\n" - " -G PATTERN Only include statements matching PATTERN.\n" - " -I BASE_URI Input base URI.\n" - " -b BYTES I/O block size.\n" - " -f Fast and loose mode (possibly ugly output).\n" - " -h Display this help and exit.\n" - " -i SYNTAX Input syntax (turtle/ntriples/trig/nquads),\n" - " or flag (lax/variables/verbatim).\n" - " -k BYTES Parser stack size.\n" - " -m Build a model in memory before writing.\n" - " -o SYNTAX Output syntax (empty/turtle/ntriples/nquads),\n" - " or flag (ascii/expanded/verbatim/terse/lax).\n" - " -q Suppress all output except data.\n" - " -r ROOT_URI Keep relative URIs within ROOT_URI.\n" - " -s STRING Parse STRING as input.\n" - " -v Display version information and exit.\n" - " -w FILENAME Write output to FILENAME instead of stdout.\n"; - - FILE* const os = error ? stderr : stdout; - fprintf(os, "%s", error ? "\n" : ""); - fprintf(os, "Usage: %s [OPTION]... INPUT...\n", name); - fprintf(os, "%s", description); - return error ? 1 : 0; -} - -static int -missing_arg(const char* const name, const char opt) -{ - SERDI_ERRORF("option requires an argument -- '%c'\n", opt); - return print_usage(name, true); -} - -static SerdStatus -on_filter_event(void* const handle, const SerdEvent* const event) -{ - if (event->type == SERD_STATEMENT) { - FilterPattern* const pat = (FilterPattern*)handle; - if (pat->s) { - return SERD_ERR_INVALID; - } - - const SerdStatement* const statement = event->statement.statement; - pat->s = serd_node_copy(serd_statement_subject(statement)); - pat->p = serd_node_copy(serd_statement_predicate(statement)); - pat->o = serd_node_copy(serd_statement_object(statement)); - pat->g = serd_node_copy(serd_statement_graph(statement)); - } - - return SERD_SUCCESS; -} - -static SerdSink* -parse_filter(SerdWorld* const world, - const SerdSink* const sink, - const char* const str, - const bool inclusive) -{ - SerdEnv* const env = serd_env_new(SERD_EMPTY_STRING()); - FilterPattern pat = {NULL, NULL, NULL, NULL}; - SerdSink* in_sink = serd_sink_new(&pat, on_filter_event, NULL); - SerdByteSource* byte_source = serd_byte_source_new_string(str, NULL); - SerdReader* reader = serd_reader_new( - world, SERD_NQUADS, SERD_READ_VARIABLES, env, in_sink, 4096); - - SerdStatus st = serd_reader_start(reader, byte_source); - if (!st) { - st = serd_reader_read_document(reader); - } - - serd_reader_free(reader); - serd_env_free(env); - serd_byte_source_free(byte_source); - serd_sink_free(in_sink); - - if (st) { - return NULL; - } - - SerdSink* filter = - serd_filter_new(sink, pat.s, pat.p, pat.o, pat.g, inclusive); - - serd_node_free(pat.s); - serd_node_free(pat.p); - serd_node_free(pat.o); - serd_node_free(pat.g); - return filter; -} - -static SerdStatus -read_file(SerdWorld* const world, - const SerdSyntax syntax, - const SerdReaderFlags flags, - SerdEnv* const env, - const SerdSink* const sink, - const size_t stack_size, - const char* const filename, - const size_t block_size) -{ - SerdByteSource* byte_source = serd_open_input(filename, block_size); - - if (!byte_source) { - SERDI_ERRORF( - "failed to open input file `%s' (%s)\n", filename, strerror(errno)); - - return SERD_ERR_UNKNOWN; - } - - SerdReader* reader = - serd_reader_new(world, syntax, flags, env, sink, stack_size); - - SerdStatus st = serd_reader_start(reader, byte_source); - - st = st ? st : serd_reader_read_document(reader); - - serd_reader_free(reader); - serd_byte_source_free(byte_source); - - return st; -} - -int -main(int argc, char** argv) -{ - const char* const prog = argv[0]; - - SerdNode* base = NULL; - SerdSyntax input_syntax = SERD_SYNTAX_EMPTY; - SerdSyntax output_syntax = SERD_SYNTAX_EMPTY; - SerdReaderFlags reader_flags = 0; - SerdWriterFlags writer_flags = 0; - bool no_inline = false; - bool osyntax_set = false; - bool use_model = false; - bool canonical = false; - bool quiet = false; - size_t block_size = 4096u; - size_t stack_size = 4194304; - const char* input_string = NULL; - const char* in_pattern = NULL; - const char* out_pattern = NULL; - const char* root_uri = NULL; - const char* out_filename = NULL; - int a = 1; - for (; a < argc && argv[a][0] == '-'; ++a) { - if (argv[a][1] == '\0') { - break; - } - - for (int o = 1; argv[a][o]; ++o) { - const char opt = argv[a][o]; - - if (opt == 'C') { - canonical = true; - } else if (opt == 'f') { - no_inline = true; - writer_flags |= (SERD_WRITE_EXPANDED | SERD_WRITE_VERBATIM); - } else if (opt == 'h') { - return print_usage(prog, false); - } else if (argv[a][1] == 'm') { - use_model = true; - } else if (opt == 'q') { - quiet = true; - } else if (opt == 'v') { - return serd_print_version(argv[0]); - } else if (argv[a][1] == 'F') { - if (++a == argc) { - return missing_arg(argv[0], 'F'); - } - - out_pattern = argv[a]; - break; - } else if (argv[a][1] == 'G') { - if (++a == argc) { - return missing_arg(argv[0], 'g'); - } - - in_pattern = argv[a]; - break; - } else if (argv[a][1] == 'I') { - if (++a == argc) { - return missing_arg(prog, 'I'); - } - - base = serd_new_uri(SERD_STRING(argv[a])); - break; - } else if (opt == 'b') { - if (argv[a][o + 1] || ++a == argc) { - return missing_arg(prog, 'b'); - } - - char* endptr = NULL; - const long size = strtol(argv[a], &endptr, 10); - if (size < 1 || size == LONG_MAX || *endptr != '\0') { - SERDI_ERRORF("invalid block size `%s'\n", argv[a]); - return 1; - } - block_size = (size_t)size; - break; - } else if (opt == 'i') { - if (argv[a][o + 1] || ++a == argc) { - return missing_arg(prog, 'i'); - } - - if (serd_set_input_option( - SERD_STRING(argv[a]), &input_syntax, &reader_flags)) { - return print_usage(argv[0], true); - } - break; - } else if (opt == 'k') { - if (argv[a][o + 1] || ++a == argc) { - return missing_arg(prog, 'k'); - } - - char* endptr = NULL; - const long size = strtol(argv[a], &endptr, 10); - if (size <= 0 || size == LONG_MAX || *endptr != '\0') { - SERDI_ERRORF("invalid stack size `%s'\n", argv[a]); - return 1; - } - stack_size = (size_t)size; - break; - } else if (opt == 'o') { - if (argv[a][o + 1] || ++a == argc) { - return missing_arg(prog, 'o'); - } - - if (serd_set_output_option( - SERD_STRING(argv[a]), &output_syntax, &writer_flags)) { - return print_usage(argv[0], true); - } - - osyntax_set = - output_syntax != SERD_SYNTAX_EMPTY || !strcmp(argv[a], "empty"); - - break; - } else if (opt == 'r') { - if (argv[a][o + 1] || ++a == argc) { - return missing_arg(prog, 'r'); - } - - root_uri = argv[a]; - break; - } else if (opt == 's') { - if (argv[a][o + 1] || ++a == argc) { - return missing_arg(prog, 's'); - } - - input_string = argv[a]; - break; - } else if (opt == 'w') { - if (argv[a][o + 1] || ++a == argc) { - return missing_arg(argv[0], 'w'); - } - - out_filename = argv[a]; - break; - } else { - SERDI_ERRORF("invalid option -- '%s'\n", argv[a] + 1); - return print_usage(prog, true); - } - } - } - - if (in_pattern && out_pattern) { - SERDI_ERROR("only one of -F and -G can be given at once\n"); - return 1; - } - - if (a == argc && !input_string) { - SERDI_ERROR("missing input\n"); - return print_usage(prog, true); - } - - char* const* const inputs = argv + a; - const int n_inputs = argc - a; - - bool input_has_graphs = serd_syntax_has_graphs(input_syntax); - for (int i = a; i < argc; ++i) { - if (serd_syntax_has_graphs(serd_guess_syntax(argv[i]))) { - input_has_graphs = true; - break; - } - } - - if (!output_syntax && !osyntax_set) { - output_syntax = input_has_graphs ? SERD_NQUADS : SERD_NTRIPLES; - } - - if (!base && n_inputs == 1 && - (output_syntax == SERD_NQUADS || output_syntax == SERD_NTRIPLES)) { - // Choose base URI from the single input path - char* const input_path = serd_canonical_path(inputs[0]); - if (!input_path || !(base = serd_new_file_uri(SERD_STRING(input_path), - SERD_EMPTY_STRING()))) { - SERDI_ERRORF("unable to determine base URI from path %s\n", inputs[0]); - } - serd_free(input_path); - } - - SerdWorld* const world = serd_world_new(); - SerdEnv* const env = - serd_env_new(base ? serd_node_string_view(base) : SERD_EMPTY_STRING()); - - serd_set_stream_utf8_mode(stdin); - if (!out_filename) { - serd_set_stream_utf8_mode(stdout); - } - - const SerdDescribeFlags describe_flags = - no_inline ? SERD_NO_INLINE_OBJECTS : 0u; - - SerdByteSink* const byte_sink = serd_open_output(out_filename, block_size); - if (!byte_sink) { - perror("serdi: error opening output file"); - return 1; - } - - SerdWriter* const writer = - serd_writer_new(world, output_syntax, writer_flags, env, byte_sink); - - SerdModel* model = NULL; - SerdSink* inserter = NULL; - const SerdSink* out_sink = NULL; - if (use_model) { - const SerdModelFlags flags = (input_has_graphs ? SERD_STORE_GRAPHS : 0u); - - model = serd_model_new(world, SERD_ORDER_SPO, flags); - if (input_has_graphs) { - serd_model_add_index(model, SERD_ORDER_GSPO); - } - - if (!no_inline) { - serd_model_add_index(model, SERD_ORDER_OPS); - if (input_has_graphs) { - serd_model_add_index(model, SERD_ORDER_GOPS); - } - } - - inserter = serd_inserter_new(model, NULL); - out_sink = inserter; - } else { - out_sink = serd_writer_sink(writer); - } - - const SerdSink* sink = out_sink; - - SerdSink* canon = NULL; - if (canonical) { - sink = canon = serd_canon_new(world, out_sink, reader_flags); - } - - SerdSink* filter = NULL; - if (in_pattern) { - if (!(filter = parse_filter(world, sink, in_pattern, true))) { - SERDI_ERROR("error parsing inclusive filter pattern\n"); - return EXIT_FAILURE; - } - - sink = filter; - } else if (out_pattern) { - if (!(filter = parse_filter(world, sink, out_pattern, false))) { - SERDI_ERROR("error parsing exclusive filter pattern\n"); - return EXIT_FAILURE; - } - - sink = filter; - } - - if (quiet) { - serd_set_log_func(world, serd_quiet_log_func, NULL); - } - - if (root_uri) { - serd_writer_set_root_uri(writer, SERD_STRING(root_uri)); - } - - SerdStatus st = SERD_SUCCESS; - if (input_string) { - SerdByteSource* const byte_source = - serd_byte_source_new_string(input_string, NULL); - - SerdReader* const reader = - serd_reader_new(world, - input_syntax ? input_syntax : SERD_TRIG, - reader_flags, - env, - sink, - stack_size); - - if (!(st = serd_reader_start(reader, byte_source))) { - st = serd_reader_read_document(reader); - } - - serd_reader_free(reader); - serd_byte_source_free(byte_source); - } - - if (n_inputs == 1) { - reader_flags |= SERD_READ_GLOBAL; - } - - for (int i = 0; !st && i < n_inputs; ++i) { - if (!base && strcmp(inputs[i], "-")) { - if ((st = serd_set_base_uri_from_path(env, inputs[i]))) { - SERDI_ERRORF("failed to set base URI from path %s\n", inputs[i]); - break; - } - } - - if ((st = - read_file(world, - serd_choose_input_syntax(world, input_syntax, inputs[i]), - reader_flags, - env, - sink, - stack_size, - inputs[i], - block_size))) { - break; - } - } - - if (st <= SERD_FAILURE && use_model) { - const SerdSink* writer_sink = serd_writer_sink(writer); - SerdCursor* everything = serd_model_begin_ordered( - model, input_has_graphs ? SERD_ORDER_GSPO : SERD_ORDER_SPO); - - serd_env_write_prefixes(env, writer_sink); - - st = serd_describe_range( - everything, - writer_sink, - describe_flags | - ((output_syntax == SERD_NTRIPLES || output_syntax == SERD_NQUADS) - ? SERD_NO_INLINE_OBJECTS - : 0u)); - - serd_cursor_free(everything); - } - - serd_sink_free(canon); - serd_sink_free(filter); - serd_sink_free(inserter); - serd_model_free(model); - serd_writer_free(writer); - serd_env_free(env); - serd_node_free(base); - serd_world_free(world); - - if (serd_byte_sink_close(byte_sink)) { - perror("serdi: write error"); - st = SERD_ERR_UNKNOWN; - } - - serd_byte_sink_free(byte_sink); - - return (st > SERD_FAILURE) ? 1 : 0; -} |