aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Robillard <d@drobilla.net>2021-10-21 15:38:10 -0400
committerDavid Robillard <d@drobilla.net>2022-01-28 21:57:07 -0500
commitb404312686874e539b617d1f27ccbaa5a82936af (patch)
treec2fdb2cc046e6da53071629cd1750dcc327e6cd9
parentd4aec28ba8ad24d5aef3ee12beeb1b805148eab1 (diff)
downloadserd-b404312686874e539b617d1f27ccbaa5a82936af.tar.gz
serd-b404312686874e539b617d1f27ccbaa5a82936af.tar.bz2
serd-b404312686874e539b617d1f27ccbaa5a82936af.zip
Replace serdi with more fine-grained tools
Especially with the new functionality, the complexity of the command-line interface alone was really becoming unmanageable. The serdi implementation also had the highest cyclomatic complexity of the entire codebase by a huge margin. So, take a page from the Unix philosophy and split serdi into several more finely-honed tools that can be freely composed. Though there is still unfortunately quite a bit of option overlap between them due to the common details of reading RDF, I think the resulting tools are a lot easier to understand, both from a user and a developer perspective.
-rw-r--r--.gitlab-ci.yml9
-rw-r--r--README.md6
-rw-r--r--doc/serd-filter.1185
-rw-r--r--doc/serd-pipe.1349
-rw-r--r--doc/serd-sort.1194
-rw-r--r--doc/serdi.1265
-rw-r--r--meson.build6
-rwxr-xr-xscripts/serd_bench.py24
-rw-r--r--serd.ttl2
-rw-r--r--src/.clang-tidy2
-rw-r--r--test/NQuadsTests/meson.build13
-rw-r--r--test/NTriplesTests/meson.build13
-rw-r--r--test/TriGTests/meson.build13
-rw-r--r--test/TurtleTests/meson.build13
-rw-r--r--test/bad/meson.build11
-rw-r--r--test/canon/meson.build4
-rw-r--r--test/filter/input.ttl9
-rw-r--r--test/filter/manifest.ttl48
-rw-r--r--test/filter/meson.build15
-rw-r--r--test/filter/o1.pattern.nt1
-rw-r--r--test/filter/o1.result.nt2
-rw-r--r--test/filter/p1.pattern.nt1
-rw-r--r--test/filter/p1.result.nt2
-rw-r--r--test/filter/s1.pattern.nt1
-rw-r--r--test/filter/s1.result.nt2
-rw-r--r--test/good/meson.build13
-rw-r--r--test/lax/meson.build13
-rw-r--r--test/meson.build393
-rw-r--r--test/pattern/meson.build21
-rwxr-xr-xtest/run_filter_suite.py160
-rwxr-xr-xtest/run_pipe_suite.py (renamed from test/run_test_suite.py)183
-rwxr-xr-xtest/run_sort_suite.py234
-rw-r--r--test/serd_test_util/__init__.py71
-rw-r--r--test/sort/GOPS.nq10
-rw-r--r--test/sort/GOSP.nq10
-rw-r--r--test/sort/GPSO.nq10
-rw-r--r--test/sort/GSOP.nq10
-rw-r--r--test/sort/GSPO.nq10
-rw-r--r--test/sort/OPS.nq10
-rw-r--r--test/sort/OSP.nq10
-rw-r--r--test/sort/POS.nq10
-rw-r--r--test/sort/PSO.nq10
-rw-r--r--test/sort/SOP.nq10
-rw-r--r--test/sort/SPO.nq10
-rw-r--r--test/sort/input.trig19
-rw-r--r--test/sort/pretty.nq10
-rw-r--r--test/terse/meson.build29
-rwxr-xr-xtest/test_base.py46
-rwxr-xr-xtest/test_empty.py4
-rwxr-xr-xtest/test_filter.py14
-rwxr-xr-xtest/test_grep.py13
-rwxr-xr-xtest/test_multifile.py4
-rw-r--r--test/test_node_syntax.c3
-rwxr-xr-xtest/test_quiet.py6
-rwxr-xr-xtest/test_sort.py112
-rwxr-xr-xtest/test_stdin.py10
-rwxr-xr-xtest/test_write_error.py6
-rw-r--r--tools/console.c288
-rw-r--r--tools/console.h104
-rw-r--r--tools/meson.build32
-rw-r--r--tools/serd-filter.c287
-rw-r--r--tools/serd-pipe.c209
-rw-r--r--tools/serd-sort.c274
-rw-r--r--tools/serdi.c502
64 files changed, 3228 insertions, 1132 deletions
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 6e9fc1d9..7a745c08 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -47,8 +47,10 @@ x64_dbg:
- ninja -C build coverage-html
- mkdir -p build/doc/
- cp doc/*.svg build/doc/
- - cp doc/mandoc.css build/doc/
- - mandoc -Thtml -Werror -O style=mandoc.css doc/serdi.1 > build/doc/serdi.html
+ - cp doc/man/mandoc.css build/doc/
+ - mandoc -Thtml -Wwarning -O style=mandoc.css,man=%N.html doc/man/serd-filter.1 > build/doc/serd-filter.html
+ - mandoc -Thtml -Wwarning -O style=mandoc.css,man=%N.html doc/man/serd-pipe.1 > build/doc/serd-pipe.html
+ - mandoc -Thtml -Wwarning -O style=mandoc.css,man=%N.html doc/man/serd-sort.1 > build/doc/serd-sort.html
artifacts:
paths:
- build/doc
@@ -167,8 +169,7 @@ pages:
- mv build/meson-logs/coveragereport/ public/coverage
- mv build/doc/c/html/ public/c/html/
- mv build/doc/c/singlehtml/ public/c/singlehtml/
- - mv build/doc/serdi.html public/man/serdi.html
- - mv build/doc/mandoc.css public/man/mandoc.css
+ - mv build/doc/man/ public/man/
dependencies:
- x64_dbg
artifacts:
diff --git a/README.md b/README.md
index a02db6e0..c99b320a 100644
--- a/README.md
+++ b/README.md
@@ -36,10 +36,10 @@ Features
Performance
-----------
-The benchmarks below compare `serdi`, [rapper][], and [riot][] re-serialising
+The benchmarks below compare `serd-pipe`, [rapper][], and [riot][] rewriting
Turtle data generated by [sp2b][] on an i7-4980HQ running Debian 9. Of the
-three, `serdi` is the fastest by a wide margin, and the only one that uses a
-constant amount of memory (a single page) for all input sizes.
+three, `serd-pipe` is the fastest by a wide margin, and the only one that uses
+a constant amount of memory (a single page) for all input sizes.
![Time](doc/serdi-time.svg)
![Throughput](doc/serdi-throughput.svg)
diff --git a/doc/serd-filter.1 b/doc/serd-filter.1
new file mode 100644
index 00000000..44b3f861
--- /dev/null
+++ b/doc/serd-filter.1
@@ -0,0 +1,185 @@
+.Dd October 21, 2021
+.Dt SERD-FILTER 1
+.Os Serd
+.Sh NAME
+.Nm serd-filter
+.Nd print RDF statements that match a pattern
+.Sh SYNOPSIS
+.Nm serd-filter
+.Op Fl hVv
+.Op Fl B Ar base
+.Op Fl I Ar syntax
+.Op Fl O Ar syntax
+.Op Fl b Ar bytes
+.Op Fl f Ar pattern_file
+.Op Fl k Ar bytes
+.Op Fl o Ar filename
+.Ar pattern
+.Ar input ...
+.Sh DESCRIPTION
+.Nm
+scans for statements in RDF data.
+Its interface is similar to
+.Xr grep 1 ,
+except patterns are structural:
+instead of matching characters within a line,
+.Nm
+matches nodes within a statement.
+.Pp
+Data is read from files or standard input,
+and only those statements that match the pattern
+(or do not match the pattern, if
+.Fl v
+is given) are written.
+By default,
+the input syntax is guessed from the file extension,
+and line-based output is written to standard output.
+.Pp
+Patterns are written in NTriples or NQuads with an extension that allows variables like
+.Li ?some
+or
+.Li $thing .
+.Pp
+The
+.Ar input
+operands are processed in command-line order.
+If
+.Ar input
+is
+.Ar -
+or absent,
+.Nm
+reads from standard input.
+.Pp
+The options are as follows:
+.Pp
+.Bl -tag -compact -width 3n
+.It Fl B Ar base
+Base URI, path, or
+.Cm rebase
+to use the output path.
+See
+.Xr serd-pipe 1
+for details.
+.Pp
+.It Fl I Ar syntax
+Input syntax or option:
+.Cm NQuads ,
+.Cm NTriples ,
+.Cm TriG ,
+.Cm Turtle ,
+.Cm lax ,
+.Cm variables ,
+.Cm relative ,
+or
+.Cm labels .
+See
+.Xr serd-pipe 1
+for details.
+.Pp
+.It Fl O Ar syntax
+Output syntax or option:
+.Cm empty ,
+.Cm NQuads ,
+.Cm NTriples ,
+.Cm TriG ,
+.Cm Turtle ,
+.Cm ascii ,
+.Cm expanded ,
+.Cm verbatim ,
+.Cm terse ,
+or
+.Cm lax .
+See
+.Xr serd-pipe 1
+for details.
+.Pp
+.It Fl V
+Display version information and exit.
+.Pp
+.It Fl b Ar bytes
+I/O block size.
+See
+.Xr serd-pipe 1
+for details.
+.Pp
+.It Fl f Ar pattern_file
+Load pattern from
+.Ar pattern_file
+instead of the first positional argument.
+.Pp
+.It Fl h
+Print the command line options.
+.Pp
+.It Fl k Ar bytes
+Parser stack size.
+See
+.Xr serd-pipe 1
+for details.
+.Pp
+.It Fl o Ar filename
+Write output to the given
+.Ar filename
+instead of stdout.
+.Pp
+.It Fl v
+Invert filter to only emit statements that do
+.Em not
+match the pattern.
+.El
+.Sh EXIT STATUS
+.Nm
+exits with a status of 0, or non-zero if an error occured.
+.Sh EXAMPLES
+To print all type statements:
+.Pp
+.Dl $ serd-filter '?subject a ?type .' input.ttl
+.Pp
+To print every statement about http://example.org/subject:
+.Pp
+.Dl $ serd-filter '<http://example.org/subject> ?p ?o .' input.ttl
+.Sh SEE ALSO
+.Bl -item -compact
+.It
+.Xr serd-pipe 1
+.It
+.Xr serd-sort 1
+.It
+.Lk http://drobilla.net/software/serd/
+.El
+.Sh STANDARDS
+.Bl -item -compact
+.It
+.Rs
+.%A W3C
+.%T RDF 1.1 NQuads
+.%D February 2014
+.Re
+.Lk https://www.w3.org/TR/n-quads/
+.It
+.Rs
+.%A W3C
+.%D February 2014
+.%T RDF 1.1 NTriples
+.Re
+.Lk https://www.w3.org/TR/n-triples/
+.It
+.Rs
+.%A W3C
+.%T RDF 1.1 TriG
+.%D February 2014
+.Re
+.Lk https://www.w3.org/TR/trig/
+.It
+.Rs
+.%A W3C
+.%D February 2014
+.%T RDF 1.1 Turtle
+.Re
+.Lk https://www.w3.org/TR/turtle/
+.El
+.Sh AUTHORS
+.Nm
+is a part of serd, by
+.An David Robillard
+.Mt d@drobilla.net .
diff --git a/doc/serd-pipe.1 b/doc/serd-pipe.1
new file mode 100644
index 00000000..c7f77c9e
--- /dev/null
+++ b/doc/serd-pipe.1
@@ -0,0 +1,349 @@
+.Dd October 21, 2021
+.Dt SERD-PIPE 1
+.Os Serd
+.Sh NAME
+.Nm serd-pipe
+.Nd read and write RDF data
+.Sh SYNOPSIS
+.Nm serd-pipe
+.Op Fl ChV
+.Op Fl B Ar base
+.Op Fl I Ar syntax
+.Op Fl O Ar syntax
+.Op Fl R Ar root
+.Op Fl b Ar bytes
+.Op Fl k Ar bytes
+.Op Fl o Ar filename
+.Op Fl s Ar string
+.Op Ar input ...
+.Sh DESCRIPTION
+.Nm
+is a fast command-line utility for streaming RDF data.
+It reads one or more files and writes the data again,
+possibly in a different form.
+By default,
+the input syntax is guessed from the file extension,
+and line-based output is written to standard output.
+.Pp
+.Nm
+writes statements as they are read, in the same order.
+It uses very little memory and can process arbitrarily large files,
+either directly or as part of a pipeline.
+It is useful for things like checking syntax,
+converting to a different syntax,
+pretty-printing documents,
+merging files,
+expanding URIs,
+and so on.
+.Pp
+The simplest usage is to use files for both input and output.
+This way, reasonable options are chosen by default based on the filename.
+For example, most common tasks can be accomplished with simple commands like:
+.Pp
+.Dl $ serd-pipe -o pretty.ttl input.nt
+.Pp
+The
+.Ar input
+operands are processed in command-line order.
+If
+.Ar input
+is
+.Ar -
+or absent,
+.Nm
+reads from standard input.
+.Pp
+The options are as follows:
+.Pp
+.Bl -tag -compact -width 3n
+.It Fl B Ar base
+Base URI, path, or
+.Cm rebase
+to use the output path.
+This is used to resolve any relative URI references in the input.
+.Pp
+If the input is a file,
+its URI is used as the base by default.
+This causes relative references to be written just as they are in the input.
+Note, however, that this may not be desired if the output is in a different directory.
+For example,
+.Li <file.ttl>
+would not point to the same file from the new location.
+.Pp
+The special
+.Cm rebase
+argument will instead use the output filename set by the
+.Fl o
+option.
+This will write references relative to the output file,
+so that parsing it will produce the same absolute URIs as the original input.
+For example,
+the above may be written as
+.Li <../file.ttl>
+if the output is written to some sibling directory.
+.Pp
+Generally, the default is best when copying data along with other bundled files,
+while
+.Cm rebase
+is best for writing data in a new location which still refers to the original paths.
+.Pp
+These options are intended to make the most common tasks as simple as possible.
+An arbitrary base URI can also be given explicitly.
+.Pp
+.It Fl C
+Convert literals to canonical form.
+Literals with supported XSD datatypes will be parsed and rewritten canonically.
+Invalid literals will cause an error.
+All numeric datatypes are supported, as well as
+.Vt boolean ,
+.Vt duration ,
+.Vt datetime ,
+.Vt time ,
+.Vt hexBinary ,
+and
+.Vt base64Binary .
+.Pp
+.It Fl I Ar syntax
+Set an input syntax or option.
+May be given multiple times.
+The case-insensitive
+.Ar syntax
+can be
+.Cm NQuads ,
+.Cm NTriples ,
+.Cm TriG ,
+.Cm Turtle ,
+or one of the following options:
+.Pp
+.Bl -tag -width "QvariablesQ" -compact -offset indent
+.It Cm lax
+Tolerate invalid input where possible.
+Warnings will be printed for syntax errors,
+but parsing will attempt to continue.
+Note that data may be lost when using this option!
+.Pp
+.It Cm variables
+Support parsing variable nodes.
+Variables can be written in SPARQL style, for example
+.Li ?name
+or
+.Li $name .
+.Pp
+.It Cm relative
+Read relative URI references exactly without resolving them.
+Normally, all relative URIs are expanded against the base URI when reading.
+This flag disables that,
+so URI references will be passed through exactly as they are in the input.
+.Pp
+.It Cm global
+Assume a clean global namespace for blank node labels,
+and do not automatically add prefixes.
+Normally,
+a prefix like
+.Li f1
+is added to blank node labels when reading multiple files,
+to prevent labels in different files from clashing.
+This option disables that,
+so blank node labels will be passed through without any added prefix.
+Note that this may corrupt the output by merging distinct blank nodes.
+.Pp
+.It Cm generated
+Read seemingly generated blank node labels exactly without adjusting them.
+Normally, blank node labels like
+.Li b123
+are adapted to avoid potential clashes with generated ones.
+This flag disables that,
+so such labels will be passed through exactly as they are in the input.
+Note that this may corrupt the output by merging distinct blank nodes.
+.El
+.Pp
+.It Fl O Ar syntax
+Set an output syntax or option.
+May be given multiple times.
+The case-insensitive
+.Ar syntax
+can be
+.Cm empty ,
+.Cm NQuads ,
+.Cm NTriples ,
+.Cm TriG ,
+.Cm Turtle ,
+or one of the following options:
+.Pp
+.Bl -tag -width "QverbatimQ" -compact -offset indent
+.It Cm ascii
+Escape all non-ASCII characters.
+Normally, text is written in UTF-8.
+This flag will escape non-ASCII characters in text as Unicode code points like
+.Li \eU00B7 or
+.Li \eU0001F600 .
+.Pp
+.It Cm expanded
+Write expanded URIs instead of prefixed names.
+.Pp
+.It Cm verbatim
+Write URI references exactly as they are in the input.
+This avoids resolving URIs and making them relative to the output base URI.
+.Pp
+.It Cm terse
+Write terser output without newlines.
+This can be useful for writing a line-based description of suitably structured data.
+.Pp
+.It Cm lax
+Tolerate invalid UTF-8 by writing the replacement character when necessary.
+Note that data may be lost when using this option!
+.El
+.Pp
+The
+.Cm empty
+syntax suppresses the output,
+so that only warnings and errors will be printed.
+.Pp
+.It Fl R Ar root
+Keep relative URIs within a
+.Ar root
+URI.
+This will avoid creating any relative URI references with leading path segments like
+.Pa ../
+that enter a parent of
+.Ar root .
+.Pp
+For example,
+if
+.Pa /home/you/file.ttl
+is written to the file
+.Pa /home/me/output.ttl
+using
+.Fl B Cm rebase ,
+then it will be written as
+.Li <../you/file.ttl> .
+Setting
+.Fl R Pa /home/me/
+would prevent references from
+.Dq escaping
+like this,
+so the above would instead be written as
+.Li <file:///home/you/file.ttl> .
+.Pp
+This is useful for making relocatable
+.Dq bundles
+of resources,
+since it can keep all relative references within the bundle,
+while still allowing up-references to be used.
+.Pp
+.It Fl V
+Display version information and exit.
+.Pp
+.It Fl b Ar bytes
+I/O block size.
+This is the number of bytes in a file that will be read or written at once.
+The default is 4096, which should perform well in most cases.
+Note that this only applies to files, standard input and output are always processed one byte at a time.
+.Pp
+.It Fl h
+Print the command line options.
+.Pp
+.It Fl k Ar bytes
+Parser stack size.
+For performance and security reasons, parsing is performed with a fixed-size stack.
+This option sets a hard limit on the total amount of space used for parsing.
+The default is 1 megabyte, which should be more than enough for most data.
+This option can be used to reduce memory consumption,
+or to enable parsing documents with extremely deep nesting or extremely large literal values.
+.Pp
+.It Fl o Ar filename
+Write output to the given
+.Ar filename
+instead of stdout.
+.Pp
+.It Fl s Ar string
+Parse
+.Ar string
+as input.
+.El
+.Sh ENVIRONMENT
+Error messages and warnings are printed in color by default if the output is a terminal.
+This can be controlled by common environment variables:
+.Pp
+.Bl -tag -compact -width 14n
+.It Ev NO_COLOR
+If present (regardless of value), color is disabled.
+.It Ev CLICOLOR
+If set to 0, color is disabled.
+.It Ev CLICOLOR_FORCE
+If set to anything other than 0, color is forced on.
+.El
+.Pp
+See
+.Lk http://no-color.org/
+and
+.Lk https://bixense.com/clicolors/
+for details.
+.Sh EXIT STATUS
+.Nm
+exits with a status of 0, or non-zero if an error occured.
+.Sh EXAMPLES
+To print an NTriples file as Turtle:
+.Pp
+.Dl $ serd-pipe -O turtle input.nt
+.Pp
+To print only errors and discard the output:
+.Pp
+.Dl $ serd-pipe -O empty input.ttl
+.Pp
+To pretty-print a file:
+.Pp
+.Dl $ serd-pipe -o pretty.ttl input.ttl
+.Pp
+To expand all prefixed names into full URIs:
+.Pp
+.Dl $ serd-pipe -O expanded -o expanded.ttl input.ttl
+.Pp
+To merge two files:
+.Pp
+.Dl $ serd-pipe -o merged.ttl header.ttl body.ttl
+.Sh SEE ALSO
+.Bl -item -compact
+.It
+.Xr serd-filter 1
+.It
+.Xr serd-sort 1
+.It
+.Lk http://drobilla.net/software/serd/
+.El
+.Sh STANDARDS
+.Bl -item -compact
+.It
+.Rs
+.%A W3C
+.%T RDF 1.1 NQuads
+.%D February 2014
+.Re
+.Lk https://www.w3.org/TR/n-quads/
+.It
+.Rs
+.%A W3C
+.%D February 2014
+.%T RDF 1.1 NTriples
+.Re
+.Lk https://www.w3.org/TR/n-triples/
+.It
+.Rs
+.%A W3C
+.%T RDF 1.1 TriG
+.%D February 2014
+.Re
+.Lk https://www.w3.org/TR/trig/
+.It
+.Rs
+.%A W3C
+.%D February 2014
+.%T RDF 1.1 Turtle
+.Re
+.Lk https://www.w3.org/TR/turtle/
+.El
+.Sh AUTHORS
+.Nm
+is a part of serd, by
+.An David Robillard
+.Mt d@drobilla.net .
diff --git a/doc/serd-sort.1 b/doc/serd-sort.1
new file mode 100644
index 00000000..2d019ae3
--- /dev/null
+++ b/doc/serd-sort.1
@@ -0,0 +1,194 @@
+.Dd October 21, 2021
+.Dt SERD-SORT 1
+.Os Serd
+.Sh NAME
+.Nm serd-sort
+.Nd reorder RDF statements
+.Sh SYNOPSIS
+.Nm serd-sort
+.Op Fl htV
+.Op Fl B Ar base
+.Op Fl I Ar syntax
+.Op Fl O Ar syntax
+.Op Fl b Ar bytes
+.Op Fl c Ar collation
+.Op Fl f Ar pattern_file
+.Op Fl k Ar bytes
+.Op Fl o Ar filename
+.Ar pattern
+.Ar input ...
+.Sh DESCRIPTION
+.Nm
+reorders statements in RDF data by loading everything into memory then rewriting it.
+By default,
+a
+.Dq pretty
+ordering is used which is ideal for pretty-printing to Turtle or TriG.
+The
+.Fl c
+option can be used to request a specific ordering,
+which is mainly useful when emitting a line-based syntax like NTriples or NQuads in a pipeline.
+.Pp
+The
+.Ar input
+operands are processed in command-line order.
+If
+.Ar input
+is
+.Ar -
+or absent,
+.Nm
+reads from standard input.
+.Pp
+The options are as follows:
+.Pp
+.Bl -tag -compact -width 3n
+.It Fl B Ar base
+Base URI, path, or
+.Cm rebase
+to use the output path.
+See
+.Xr serd-pipe 1
+for details.
+.Pp
+.It Fl I Ar syntax
+Input syntax or option:
+.Cm NQuads ,
+.Cm NTriples ,
+.Cm TriG ,
+.Cm Turtle ,
+.Cm lax ,
+.Cm variables ,
+.Cm relative ,
+or
+.Cm labels .
+See
+.Xr serd-pipe 1
+for details.
+.Pp
+.It Fl O Ar syntax
+Output syntax or option:
+.Cm empty ,
+.Cm NQuads ,
+.Cm NTriples ,
+.Cm TriG ,
+.Cm Turtle ,
+.Cm ascii ,
+.Cm expanded ,
+.Cm verbatim ,
+.Cm terse ,
+or
+.Cm lax .
+See
+.Xr serd-pipe 1
+for details.
+.Pp
+.It Fl V
+Display version information and exit.
+.Pp
+.It Fl b Ar bytes
+I/O block size.
+See
+.Xr serd-pipe 1
+for details.
+.Pp
+.It Fl c Ar collation
+A specific collation (statement ordering) to use.
+This can be any ordering of the characters
+.Dq SPO ,
+which stand for the subject, predicate, and object of statements.
+Optionally,
+.Dq G
+can be added as the first character,
+which will sort graph-first.
+Concretely, the valid values are:
+.Cm SPO ,
+.Cm SOP ,
+.Cm OPS ,
+.Cm OSP ,
+.Cm PSO ,
+.Cm POS ,
+.Cm GSPO ,
+.Cm GSOP ,
+.Cm GOPS ,
+.Cm GOSP ,
+.Cm GPSO ,
+and
+.Cm GPOS .
+.Pp
+.It Fl h
+Print the command line options.
+.Pp
+.It Fl k Ar bytes
+Parser stack size.
+See
+.Xr serd-pipe 1
+for details.
+.Pp
+.It Fl o Ar filename
+Write output to the given
+.Ar filename
+instead of stdout.
+.Pp
+.It Fl t
+Do not write type as
+.Dq a
+before other properties.
+Instead, rdf:type will be written in order like any other property.
+.El
+.Sh EXIT STATUS
+.Nm
+exits with a status of 0, or non-zero if an error occured.
+.Sh EXAMPLES
+To pretty-print a file:
+.Pp
+.Dl $ serd-sort -o pretty.ttl input.ttl
+.Pp
+To print statements ordered by predicate, subject, then object:
+.Pp
+.Dl $ serd-sort -c PSO input.ttl
+.Sh SEE ALSO
+.Bl -item -compact
+.It
+.Xr serd-pipe 1
+.It
+.Xr serd-filter 1
+.It
+.Lk http://drobilla.net/software/serd/
+.El
+.Sh STANDARDS
+.Bl -item -compact
+.It
+.Rs
+.%A W3C
+.%T RDF 1.1 NQuads
+.%D February 2014
+.Re
+.Lk https://www.w3.org/TR/n-quads/
+.It
+.Rs
+.%A W3C
+.%D February 2014
+.%T RDF 1.1 NTriples
+.Re
+.Lk https://www.w3.org/TR/n-triples/
+.It
+.Rs
+.%A W3C
+.%T RDF 1.1 TriG
+.%D February 2014
+.Re
+.Lk https://www.w3.org/TR/trig/
+.It
+.Rs
+.%A W3C
+.%D February 2014
+.%T RDF 1.1 Turtle
+.Re
+.Lk https://www.w3.org/TR/turtle/
+.El
+.Sh AUTHORS
+.Nm
+is a part of serd, by
+.An David Robillard
+.Mt d@drobilla.net .
diff --git a/doc/serdi.1 b/doc/serdi.1
deleted file mode 100644
index c6356953..00000000
--- a/doc/serdi.1
+++ /dev/null
@@ -1,265 +0,0 @@
-.Dd April 14, 2021
-.Dt SERDI 1
-.Os Serd 0.30.11
-.Sh NAME
-.Nm serdi
-.Nd read, filter, transform, and write RDF data
-.Sh SYNOPSIS
-.Nm serdi
-.Op Fl Cfhmqv
-.Op Fl F Ar pattern | Fl G Ar pattern
-.Op Fl I Ar base
-.Op Fl b Ar bytes
-.Op Fl i Ar syntax
-.Op Fl k Ar bytes
-.Op Fl o Ar syntax
-.Op Fl r Ar root
-.Op Fl s Ar string
-.Op Fl w Ar filename
-.Ar input ...
-.Sh DESCRIPTION
-.Nm
-is a fast command-line utility for processing RDF data.
-It reads one or more documents and writes the data again,
-possibly transformed and/or in a different syntax.
-By default,
-the input syntax is guessed from the file extension,
-and output is written in NTriples or NQuads.
-.Pp
-.Nm
-can be used to check for syntax errors,
-convert from one syntax to another,
-pretty-print documents,
-or transform URIs and blank node IDs.
-.Pp
-The options are as follows:
-.Pp
-.Bl -tag -compact -width 3n
-.It Fl C
-Convert literals to canonical form.
-Literals with supported XSD datatypes will be parsed and rewritten canonically.
-All numeric datatypes are supported, as well as
-.Vt boolean ,
-.Vt duration ,
-.Vt datetime ,
-.Vt time ,
-.Vt hexBinary ,
-and
-.Vt base64Binary .
-.Pp
-.It Fl F Ar pattern
-Filter out statements that match
-.Ar pattern .
-The pattern must be a single statement written in NTriples or NQuads,
-with variables like
-.Dq ?name
-for wildcards.
-The names of variables in the pattern are insignificant.
-.Pp
-.It Fl G Ar pattern
-Only include statements that match
-.Ar pattern .
-This option is like
-.Fl p
-but inverted,
-so that only matching statements are included, like grep.
-.Pp
-.It Fl I Ar base
-Input base URI.
-Relative URI references in the input will be resolved against this.
-When the input is a file,
-the URI of the file is automatically used as the base URI.
-This option can be used to override that,
-or to provide a base URI for input from stdin or a string.
-.Pp
-.It Fl b Ar bytes
-I/O block size.
-This is the number of bytes in a file that will be read or written at once.
-The default is 4096, which should perform well in most cases.
-Note that this only applies to files, standard input and output are always processed one byte at a time.
-.Pp
-.It Fl f
-Fast and loose mode.
-This disables shortening URIs into prefixed names or relative URI references.
-If the model is enabled, then this writes the model quickly in sorted order.
-Note that doing so with TriG or Turtle may make the output ugly,
-since blank nodes will not be inlined.
-.Pp
-.It Fl h
-Print the command line options.
-.Pp
-.It Fl i Ar syntax
-Set an input syntax option.
-May be given multiple times.
-The case-insensitive
-.Ar syntax
-can be either a syntax name or an input syntax option.
-The supported syntaxes are
-.Dq NQuads ,
-.Dq NTriples ,
-.Dq TriG ,
-and
-.Dq Turtle .
-.Pp
-The supported input options are:
-.Pp
-.Bl -tag -width "QvariablesQ" -compact -offset indent
-.It Dq lax
-Tolerate invalid input where possible.
-Warnings will be printed on syntax errors,
-but parsing will attempt to continue.
-Note that data may be lost when using this option!
-.Pp
-.It Dq variables
-Support parsing variable nodes.
-Variables can be written in SPARQL style, for example
-.Dq ?var
-or
-.Dq $var .
-.Pp
-.It Dq verbatim
-Normally, the reader expands all relative URIs,
-and may adjust blank node labels to avoid clashing with generated ones.
-This flag disables all of this processing,
-so that URI references and blank nodes are passed to the sink exactly as they are in the input.
-Note that this does not apply to CURIEs, since serd deliberately does not
-have a way to represent CURIE nodes. A bad namespace prefix is considered
-a syntax error.
-.El
-.Pp
-.It Fl k Ar bytes
-Parser stack size.
-For performance and security reasons, parsing is performed with a fixed-size stack.
-By default, the stack is 4096 bytes, which should be sufficient for most data.
-If some data has very deep nesting or very large literal values,
-it may exceed the default amount of space,
-and this option can be used to increase it and allow the document to be parsed successfully.
-.Pp
-.It Fl m
-Build a model in memory.
-This loads all of the input into memory before writing the output.
-This will reorder statements and eliminate duplicates, at the cost of performance and memory consumption.
-When writing TriG or Turtle, this may enable better pretty-printing with more inline descriptions.
-.Pp
-.It Fl o Ar syntax
-Set an output syntax option.
-May be given multiple times.
-The case-insensitive
-.Ar syntax
-can be either a syntax name or an output syntax option.
-The supported syntaxes are
-.Dq empty ,
-.Dq NQuads ,
-.Dq NTriples ,
-.Dq TriG ,
-and
-.Dq Turtle .
-.Pp
-The supported output options are:
-.Pp
-.Bl -tag -width "QverbatimQ" -compact -offset indent
-.It Dq ascii
-Escape all non-ASCII characters.
-.Pp
-.It Dq expanded
-Write expanded URIs instead of prefixed names.
-.Pp
-.It Dq verbatim
-Write URI references exactly as they are in the input.
-This avoids resolving URIs and making them relative to the output base URI.
-.Pp
-.It Dq terse
-Write terser output without newlines.
-.Pp
-.It Dq lax
-Tolerate invalid UTF-8 by writing the replacement character when necessary.
-Note that data may be lost when using this option!
-.El
-.Pp
-.It Fl q
-Suppress all output except data.
-.Pp
-.It Fl r Ar root
-Keep relative URIs within a
-.Ar root
-URI.
-This will avoid creating any relative URI references with leading path segments like
-.Dq ../
-that enter a parent of
-.Ar root .
-.Pp
-.It Fl s Ar string
-Parse
-.Ar string
-as input.
-.Pp
-.It Fl v
-Display version information and exit.
-.Pp
-.It Fl w Ar filename
-Write output to the given
-.Ar filename
-instead of stdout.
-.El
-.Sh EXIT STATUS
-.Nm
-exits with a status of 0, or non-zero if an error occured.
-.Sh EXAMPLES
-To pretty-print a document:
-.Pp
-.Dl $ serdi -o turtle file.ttl > out.ttl
-.Pp
-To print any errors:
-.Pp
-.Dl $ serdi file.ttl > /dev/null
-.Pp
-To remove any rdf:type properties:
-.Pp
-.Dl $ serdi -F \(dq?s <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> ?o .\(dq file.ttl
-.Pp
-To include only rdf:type properties:
-.Pp
-.Dl $ serdi -G \(dq?s <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> ?o .\(dq file.ttl
-.Sh SEE ALSO
-.Bl -item -compact
-.It
-.Lk http://drobilla.net/software/serd/
-.It
-.Lk http://gitlab.com/drobilla/serd/
-.El
-.Sh STANDARDS
-.Bl -item
-.It
-.Rs
-.%A W3C
-.%T RDF 1.1 NQuads
-.%D February 2014
-.Re
-.Lk https://www.w3.org/TR/n-quads/
-.It
-.Rs
-.%A W3C
-.%D February 2014
-.%T RDF 1.1 NTriples
-.Re
-.Lk https://www.w3.org/TR/n-triples/
-.It
-.Rs
-.%A W3C
-.%T RDF 1.1 TriG
-.%D February 2014
-.Re
-.Lk https://www.w3.org/TR/trig/
-.It
-.Rs
-.%A W3C
-.%D February 2014
-.%T RDF 1.1 Turtle
-.Re
-.Lk https://www.w3.org/TR/turtle/
-.El
-.Sh AUTHORS
-.Nm
-is a part of serd, by
-.An David Robillard
-.Mt d@drobilla.net .
diff --git a/meson.build b/meson.build
index bddb7392..f75ce560 100644
--- a/meson.build
+++ b/meson.build
@@ -195,12 +195,14 @@ pkg.generate(
version: meson.project_version(),
description: 'A lightweight library for working with RDF')
-# Build serdi command line utility
+# Build command line tools
if get_option('tools')
subdir('tools')
if not get_option('docs').disabled()
- install_man('doc/serdi.1')
+ install_man('doc/serd-filter.1')
+ install_man('doc/serd-pipe.1')
+ install_man('doc/serd-sort.1')
endif
endif
diff --git a/scripts/serd_bench.py b/scripts/serd_bench.py
index 8a10dab0..25b75fe8 100755
--- a/scripts/serd_bench.py
+++ b/scripts/serd_bench.py
@@ -43,7 +43,7 @@ def gen(sp2b_dir, n_min, n_max, step):
def write_header(results, progs):
"Write the header line for TSV output"
- results.write("n\tserdi_stream\tserdi_model")
+ results.write("n\tserd-pipe\tserd-sort")
for prog in progs[2:]:
results.write("\t" + os.path.basename(prog.split()[0]))
results.write("\n")
@@ -125,9 +125,9 @@ def run(progs, n_min, n_max, step):
"Benchmark each program with n_min ... n_max statements"
with WorkingDirectory("build"):
results = {
- "time": open("serdi-time.txt", "w"),
- "throughput": open("serdi-throughput.txt", "w"),
- "memory": open("serdi-memory.txt", "w"),
+ "time": open("serd-time.txt", "w"),
+ "throughput": open("serd-throughput.txt", "w"),
+ "memory": open("serd-memory.txt", "w"),
}
# Write TSV header for all output files
@@ -169,20 +169,20 @@ def plot_results():
"Plot all benchmark results"
with WorkingDirectory("build"):
plot(
- open("serdi-time.txt", "r"),
- "serdi-time.svg",
+ open("serd-time.txt", "r"),
+ "serd-time.svg",
"Statements",
"Time (s)",
)
plot(
- open("serdi-throughput.txt", "r"),
- "serdi-throughput.svg",
+ open("serd-throughput.txt", "r"),
+ "serd-throughput.svg",
"Statements",
"Statements / s",
)
plot(
- open("serdi-memory.txt", "r"),
- "serdi-memory.svg",
+ open("serd-memory.txt", "r"),
+ "serd-memory.svg",
"Statements",
"Bytes",
)
@@ -226,8 +226,8 @@ example:
args = ap.parse_args(sys.argv[1:])
progs = [
- "serdi -i turtle -o turtle",
- "serdi -m -i turtle -o turtle",
+ "serd-pipe -I turtle -O turtle",
+ "serd-sort -I turtle -O turtle",
] + args.run
min_n = int(args.max / 10)
diff --git a/serd.ttl b/serd.ttl
index 9a8db362..bc7d6f77 100644
--- a/serd.ttl
+++ b/serd.ttl
@@ -15,7 +15,7 @@
doap:homepage <http://drobilla.net/software/serd> ;
doap:license <http://opensource.org/licenses/isc> ;
doap:shortdesc "A high-performance RDF reader/writer" ;
- doap:description "Serd is a lightweight high-performance C library for reading and writing RDF in the Turtle, NTriples, TriG, and NQuads syntaxes." ;
+ doap:description "Serd is a fast and lightweight C library for reading and writing RDF in Turtle, NTriples, TriG, and NQuads." ;
doap:created "2011-09-28"^^xsd:date ;
doap:programming-language "C" ;
doap:implements <http://www.w3.org/TR/n-quads/> ,
diff --git a/src/.clang-tidy b/src/.clang-tidy
index 5cf5e873..6daee064 100644
--- a/src/.clang-tidy
+++ b/src/.clang-tidy
@@ -7,9 +7,7 @@ Checks: >
-bugprone-branch-clone,
-bugprone-easily-swappable-parameters,
-bugprone-reserved-identifier,
- -bugprone-suspicious-string-compare,
-clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling,
- -concurrency-mt-unsafe,
-hicpp-multiway-paths-covered,
-hicpp-signed-bitwise,
-llvm-header-guard,
diff --git a/test/NQuadsTests/meson.build b/test/NQuadsTests/meson.build
index 4fe84dd5..deaf41e8 100644
--- a/test/NQuadsTests/meson.build
+++ b/test/NQuadsTests/meson.build
@@ -7,8 +7,15 @@ args = [
]
test('NQuads',
- run_test_suite,
- args: script_args + args,
+ run_pipe_suite,
+ args: pipe_test_script_args + args,
env: test_env,
- suite: ['suite', 'w3c'],
+ suite: ['suite', 'w3c', 'pipe'],
+ timeout: 240)
+
+test('NQuads',
+ run_sort_suite,
+ args: sort_test_script_args + args,
+ env: test_env,
+ suite: ['suite', 'w3c', 'sort'],
timeout: 240)
diff --git a/test/NTriplesTests/meson.build b/test/NTriplesTests/meson.build
index cf773c64..f608e973 100644
--- a/test/NTriplesTests/meson.build
+++ b/test/NTriplesTests/meson.build
@@ -7,8 +7,15 @@ args = [
]
test('NTriples',
- run_test_suite,
- args: script_args + args,
+ run_pipe_suite,
+ args: pipe_test_script_args + args,
env: test_env,
- suite: ['suite', 'w3c'],
+ suite: ['suite', 'w3c', 'pipe'],
+ timeout: 240)
+
+test('NTriples',
+ run_sort_suite,
+ args: sort_test_script_args + args,
+ env: test_env,
+ suite: ['suite', 'w3c', 'sort'],
timeout: 240)
diff --git a/test/TriGTests/meson.build b/test/TriGTests/meson.build
index e7c305e0..95641b44 100644
--- a/test/TriGTests/meson.build
+++ b/test/TriGTests/meson.build
@@ -7,8 +7,15 @@ args = [
]
test('TriG',
- run_test_suite,
- args: script_args + args,
+ run_pipe_suite,
+ args: pipe_test_script_args + args,
env: test_env,
- suite: ['suite', 'w3c'],
+ suite: ['suite', 'w3c', 'pipe'],
+ timeout: 240)
+
+test('TriG',
+ run_sort_suite,
+ args: sort_test_script_args + args,
+ env: test_env,
+ suite: ['suite', 'w3c', 'sort'],
timeout: 240)
diff --git a/test/TurtleTests/meson.build b/test/TurtleTests/meson.build
index 492e1fe0..7a6d1475 100644
--- a/test/TurtleTests/meson.build
+++ b/test/TurtleTests/meson.build
@@ -7,8 +7,15 @@ args = [
]
test('Turtle',
- run_test_suite,
- args: script_args + args,
+ run_pipe_suite,
+ args: pipe_test_script_args + args,
env: test_env,
- suite: ['suite', 'w3c'],
+ suite: ['suite', 'w3c', 'pipe'],
+ timeout: 240)
+
+test('Turtle',
+ run_sort_suite,
+ args: sort_test_script_args + args,
+ env: test_env,
+ suite: ['suite', 'w3c', 'sort'],
timeout: 240)
diff --git a/test/bad/meson.build b/test/bad/meson.build
index 9c423367..2c99bbac 100644
--- a/test/bad/meson.build
+++ b/test/bad/meson.build
@@ -1,8 +1,15 @@
base_uri = 'http://drobilla.net/sw/serd/test/bad/'
test('bad',
- run_test_suite,
- args: script_args + [files('manifest.ttl'), base_uri],
+ run_pipe_suite,
+ args: pipe_test_script_args + [files('manifest.ttl'), base_uri],
env: test_env,
suite: ['suite', 'extra'],
timeout: 240)
+
+test('bad',
+ run_sort_suite,
+ args: sort_test_script_args + [files('manifest.ttl'), base_uri],
+ env: test_env,
+ suite: ['suite', 'extra', 'sort'],
+ timeout: 240)
diff --git a/test/canon/meson.build b/test/canon/meson.build
index 11d95469..f73a3527 100644
--- a/test/canon/meson.build
+++ b/test/canon/meson.build
@@ -1,8 +1,8 @@
base_uri = 'http://drobilla.net/sw/serd/test/canon/'
test('canon',
- run_test_suite,
- args: script_args + [
+ run_pipe_suite,
+ args: pipe_test_script_args + [
files('manifest.ttl'),
base_uri,
'--',
diff --git a/test/filter/input.ttl b/test/filter/input.ttl
new file mode 100644
index 00000000..59aa67f7
--- /dev/null
+++ b/test/filter/input.ttl
@@ -0,0 +1,9 @@
+@prefix eg: <http://example.org/> .
+
+eg:s1
+ eg:p1 eg:o1 ;
+ eg:p2 eg:o2 .
+
+eg:s2
+ eg:p1 eg:o1 ;
+ eg:p2 eg:o2 .
diff --git a/test/filter/manifest.ttl b/test/filter/manifest.ttl
new file mode 100644
index 00000000..59ce3f55
--- /dev/null
+++ b/test/filter/manifest.ttl
@@ -0,0 +1,48 @@
+@prefix checks: <http://drobilla.net/ns/serd/checks#> .
+@prefix mf: <http://www.w3.org/2001/sw/DataAccess/tests/test-manifest#> .
+@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
+@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
+@prefix rdft: <http://www.w3.org/ns/rdftest#> .
+@prefix serd: <http://drobilla.net/ns/serd#> .
+
+rdft:Test
+ rdfs:subClassOf mf:ManifestEntry .
+
+serd:patternFile
+ a rdf:Property ;
+ rdfs:label "pattern file" .
+
+serd:TestFilterPositive
+ a rdfs:Class ;
+ rdfs:label "Positive Filtering" ;
+ rdfs:subClassOf rdft:Test .
+
+<>
+ a mf:Manifest ;
+ rdfs:comment "Serd filter test cases" ;
+ mf:entries (
+ <#s1>
+ <#p1>
+ <#o1>
+ ) .
+
+<#s1>
+ a serd:TestFilterPositive ;
+ serd:patternFile <s1.pattern.nt> ;
+ mf:name "s1" ;
+ mf:action <input.ttl> ;
+ mf:result <s1.result.nt> .
+
+<#p1>
+ a serd:TestFilterPositive ;
+ serd:patternFile <p1.pattern.nt> ;
+ mf:name "p1" ;
+ mf:action <input.ttl> ;
+ mf:result <p1.result.nt> .
+
+<#o1>
+ a serd:TestFilterPositive ;
+ serd:patternFile <o1.pattern.nt> ;
+ mf:name "o1" ;
+ mf:action <input.ttl> ;
+ mf:result <o1.result.nt> .
diff --git a/test/filter/meson.build b/test/filter/meson.build
new file mode 100644
index 00000000..fd2b1bca
--- /dev/null
+++ b/test/filter/meson.build
@@ -0,0 +1,15 @@
+base_uri = 'http://drobilla.net/sw/serd/test/filter/'
+
+test('filter',
+ run_filter_suite,
+ args: common_script_options + [
+ '--pipe',
+ serd_pipe,
+ '--filter',
+ serd_filter,
+ files('manifest.ttl'),
+ base_uri
+ ],
+ env: test_env,
+ suite: ['suite', 'extra'],
+ timeout: 240)
diff --git a/test/filter/o1.pattern.nt b/test/filter/o1.pattern.nt
new file mode 100644
index 00000000..41932fd7
--- /dev/null
+++ b/test/filter/o1.pattern.nt
@@ -0,0 +1 @@
+?s ?p <http://example.org/o1> .
diff --git a/test/filter/o1.result.nt b/test/filter/o1.result.nt
new file mode 100644
index 00000000..e7b1e759
--- /dev/null
+++ b/test/filter/o1.result.nt
@@ -0,0 +1,2 @@
+<http://example.org/s1> <http://example.org/p1> <http://example.org/o1> .
+<http://example.org/s2> <http://example.org/p1> <http://example.org/o1> .
diff --git a/test/filter/p1.pattern.nt b/test/filter/p1.pattern.nt
new file mode 100644
index 00000000..fca20e94
--- /dev/null
+++ b/test/filter/p1.pattern.nt
@@ -0,0 +1 @@
+?s <http://example.org/p1> ?o .
diff --git a/test/filter/p1.result.nt b/test/filter/p1.result.nt
new file mode 100644
index 00000000..e7b1e759
--- /dev/null
+++ b/test/filter/p1.result.nt
@@ -0,0 +1,2 @@
+<http://example.org/s1> <http://example.org/p1> <http://example.org/o1> .
+<http://example.org/s2> <http://example.org/p1> <http://example.org/o1> .
diff --git a/test/filter/s1.pattern.nt b/test/filter/s1.pattern.nt
new file mode 100644
index 00000000..f5b87db1
--- /dev/null
+++ b/test/filter/s1.pattern.nt
@@ -0,0 +1 @@
+<http://example.org/s1> ?p ?o .
diff --git a/test/filter/s1.result.nt b/test/filter/s1.result.nt
new file mode 100644
index 00000000..023faf42
--- /dev/null
+++ b/test/filter/s1.result.nt
@@ -0,0 +1,2 @@
+<http://example.org/s1> <http://example.org/p1> <http://example.org/o1> .
+<http://example.org/s1> <http://example.org/p2> <http://example.org/o2> .
diff --git a/test/good/meson.build b/test/good/meson.build
index 38c672ac..368a91bc 100644
--- a/test/good/meson.build
+++ b/test/good/meson.build
@@ -1,8 +1,15 @@
base_uri = 'http://drobilla.net/sw/serd/test/good/'
test('good',
- run_test_suite,
- args: script_args + [files('manifest.ttl'), base_uri],
+ run_pipe_suite,
+ args: pipe_test_script_args + [files('manifest.ttl'), base_uri],
env: test_env,
- suite: ['suite', 'extra'],
+ suite: ['suite', 'extra', 'pipe'],
+ timeout: 240)
+
+test('good',
+ run_sort_suite,
+ args: sort_test_script_args + [files('manifest.ttl'), base_uri],
+ env: test_env,
+ suite: ['suite', 'extra', 'sort'],
timeout: 240)
diff --git a/test/lax/meson.build b/test/lax/meson.build
index 6d4d7903..e71a677c 100644
--- a/test/lax/meson.build
+++ b/test/lax/meson.build
@@ -4,8 +4,8 @@ base_uri = 'http://drobilla.net/sw/serd/test/lax/'
# ... once with strict parsing to test the hard errors
test('lax.strict',
- run_test_suite,
- args: script_args + [files('manifest.ttl'), base_uri],
+ run_pipe_suite,
+ args: pipe_test_script_args + [files('manifest.ttl'), base_uri],
env: test_env,
is_parallel: false,
suite: ['suite', 'extra'],
@@ -13,14 +13,13 @@ test('lax.strict',
# ... and once with lax parsing to tolerate them
test('lax.lax',
- run_test_suite,
- args: script_args + [
+ run_pipe_suite,
+ args: pipe_test_script_args + [
files('manifest.ttl'),
base_uri,
'--',
- '-i',
- 'lax',
- ],
+ '-I', 'lax',
+ '-O', 'lax'],
env: test_env,
is_parallel: false,
suite: ['suite', 'extra'],
diff --git a/test/meson.build b/test/meson.build
index c19d99c0..9830a1dd 100644
--- a/test/meson.build
+++ b/test/meson.build
@@ -1,6 +1,8 @@
autoship = find_program('autoship', required: false)
-run_test_suite = find_program('run_test_suite.py')
+run_filter_suite = find_program('run_filter_suite.py')
+run_pipe_suite = find_program('run_pipe_suite.py')
+run_sort_suite = find_program('run_sort_suite.py')
wrapper = meson.get_cross_property('exe_wrapper', '')
@@ -50,185 +52,368 @@ if autoship.found()
test('autoship', autoship, args: ['test', serd_src_root], suite: 'data')
endif
-if is_variable('serdi')
-
- if wrapper != ''
- script_args = ['--wrapper', wrapper, '--serdi', serdi.full_path()]
- else
- script_args = ['--serdi', serdi.full_path()]
- endif
-
- serd_ttl = files('../serd.ttl')[0]
+serd_ttl = files('../serd.ttl')[0]
+common_script_options = []
+if wrapper != ''
+ common_script_options = ['--wrapper', wrapper]
+endif
- test('serd.ttl', serdi, args: [serd_ttl], env: test_env, suite: 'data')
+# Test serd-pipe as the main entry point to the common tool code
+if is_variable('serd_pipe')
+ tool = serd_pipe
+ pipe_test_script_args = common_script_options + ['--tool', serd_pipe]
- # Command line options
+ # Basic valid arguments
good_args = [
- ['-v'],
+ ['-V'],
['-h'],
- ['-k', '512', '-s', '<urn:eg:s> a <urn:eg:T> .'],
]
foreach args : good_args
- test(args[0], serdi, args: args, env: test_env, suite: ['serdi', 'options'])
+ test(args[0],
+ tool,
+ args: args,
+ env: test_env,
+ suite: ['tools', 'pipe', 'options'])
endforeach
+ # Basic invalid arguments
+
bad_args = [
- ['/no/such/file'],
- ['ftp://unsupported.org'],
- ['-F', '', '-G', ''],
- ['-F'],
- ['-F', '?s ?p ?o . ?q ?r ?s .', '-s', ''],
- ['-F', '?s ?p ?o .\n?q ?r ?s .\n', '-s', ''],
- ['-F', 'bad_pattern', '-s', ''],
- ['-G'],
- ['-G', '?s ?p ?o . ?q ?r ?s .', '-s', ''],
- ['-G', 'bad_pattern', '-s', ''],
+ ['-B', 'nonuriorpath'],
+ ['-B'],
+ ['-I', 'turtle'],
+ ['-I', 'unknown'],
['-I'],
- ['-b'],
+ ['-O', 'unknown'],
+ ['-O'],
['-b', '-1'],
- ['-b', '9223372036854775807'],
['-b', '1024junk'],
- ['-c'],
- ['-i', 'unknown'],
- ['-i', 'turtle'],
- ['-i'],
- ['-fi'],
- ['-k'],
+ ['-b', '9223372036854775807'],
+ ['-b'],
['-k', '-1'],
- ['-k', '9223372036854775807'],
['-k', '1024junk'],
- ['-o', 'unknown'],
+ ['-k', '9223372036854775807'],
+ ['-k'],
['-o'],
- ['-p'],
- ['-r'],
+ ['-s', '<foo> a <Bar> .'],
['-s'],
- ['-w'],
['-z'],
- ['-s', '<foo> a <Bar> .'],
+ ['/no/such/file'],
]
foreach args : bad_args
- name = ' '.join(args).underscorify()
- test(name, serdi,
+ test(' '.join(args),
+ tool,
args: args,
env: test_env,
should_fail: true,
- suite: ['serdi', 'options'])
+ suite: ['tools', 'pipe', 'options'])
endforeach
- test('ansi_clicolor_force',
- serdi,
- args: files('bad/bad-lang.ttl'),
- env: test_env + ['CLICOLOR_FORCE=1'],
- should_fail: true)
-
- test('ansi_clicolor_off',
- serdi,
- args: files('bad/bad-lang.ttl'),
- env: test_env + ['CLICOLOR=0'],
- should_fail: true)
-
- test('ansi_no_color',
- serdi,
- args: files('bad/bad-lang.ttl'),
- env: test_env + ['NO_COLOR=1'],
- should_fail: true)
-
test('none',
- serdi,
+ tool,
env: test_env,
should_fail: true,
- suite: ['serdi', 'options'])
+ suite: ['tools', 'pipe', 'options'])
- test('quiet', files('test_quiet.py'),
- args: script_args + files('bad/bad-base.ttl'),
+ test('remote',
+ tool,
+ args: ['ftp://unsupported.org'],
env: test_env,
- suite: ['serdi', 'options'])
+ should_fail: true,
+ suite: ['tools', 'pipe', 'options'])
- test('filter', files('test_filter.py'),
- args: script_args,
+ test('bad_rebase',
+ tool,
+ args: ['-B', 'rebase', serd_ttl],
env: test_env,
- suite: ['serdi', 'options'])
+ should_fail: true,
+ suite: ['tools', 'pipe', 'options'])
- test('grep', files('test_grep.py'),
- args: script_args,
+ test('base', files('test_base.py'),
+ args: pipe_test_script_args,
env: test_env,
- suite: ['serdi', 'options'])
+ suite: ['tools', 'pipe', 'options'])
- # Inputs
+ test('dir_base',
+ tool,
+ args: ['-B', serd_src_root / '', serd_ttl],
+ env: test_env,
+ suite: ['tools', 'pipe', 'options'])
+
+ # Smoke test common handling code for environment color configuration
+
+ test('CLICOLOR_FORCE',
+ tool,
+ args: files('bad/bad-lang.ttl'),
+ env: test_env + ['CLICOLOR_FORCE=1'],
+ should_fail: true,
+ suite: ['color'])
+
+ test('CLICOLOR',
+ tool,
+ args: files('bad/bad-lang.ttl'),
+ env: test_env + ['CLICOLOR=0'],
+ should_fail: true,
+ suite: ['color'])
+
+ test('NO_COLOR',
+ tool,
+ args: files('bad/bad-lang.ttl'),
+ env: test_env + ['NO_COLOR=1'],
+ should_fail: true,
+ suite: ['color'])
+
+ # Different input sources
test('stdin', files('test_stdin.py'),
- args: script_args,
+ args: pipe_test_script_args,
env: test_env,
- suite: ['serdi', 'input'])
+ suite: ['tools', 'pipe', 'input'])
- test('multiple', files('test_multifile.py'),
- args: script_args + [meson.current_source_dir() / 'multifile'],
+ test('multifile', files('test_multifile.py'),
+ args: pipe_test_script_args + [meson.current_source_dir() / 'multifile'],
env: test_env,
- suite: ['serdi', 'input'])
+ suite: ['tools', 'pipe', 'input'])
- test('string', serdi,
- args: ['-s', '<foo> a <Bar> .'],
+ test('serd.ttl',
+ tool,
+ args: [serd_ttl],
env: test_env,
- should_fail: true,
- suite: ['serdi', 'input'])
+ suite: ['tools', 'pipe', 'input'])
- test('missing', serdi,
- args: ['-i', 'turtle'],
+ test('good_string',
+ tool,
+ args: ['-I', 'turtle', '-s', '[] a [] .'],
env: test_env,
- should_fail: true,
- suite: ['serdi', 'input'])
+ suite: ['tools', 'pipe', 'input'])
- test('no_such_file', serdi,
- args: ['no_such_file'],
+ test('baseless_string',
+ tool,
+ args: ['-s', '<foo> a <Bar> .'],
env: test_env,
should_fail: true,
- suite: ['serdi', 'input'])
+ suite: ['tools', 'pipe', 'input'])
- test('remote', serdi,
- args: ['ftp://example.org/unsupported.ttl'],
+ test('unknown_type',
+ tool,
+ args: files('../README.md'),
env: test_env,
should_fail: true,
- suite: ['serdi', 'input'])
+ suite: ['tools', 'pipe', 'input'])
- # Output
+ # Suppressed output
test('empty', files('test_empty.py'),
- args: script_args + [serd_ttl],
+ args: pipe_test_script_args + files('../serd.ttl'),
env: test_env,
- suite: 'output')
+ suite: ['tools', 'pipe', 'output'])
- # FIXME: Old base URI argument?
+ test('quiet', files('test_quiet.py'),
+ args: pipe_test_script_args + files('bad/bad-base.ttl'),
+ env: test_env,
+ suite: ['tools', 'pipe', 'output'])
# IO errors
- test('read_dir', serdi,
+ test('read_dir',
+ tool,
args: [meson.source_root()],
env: test_env,
should_fail: true,
- suite: 'io_errors')
+ suite: ['tools', 'pipe', 'input'])
if host_machine.system() == 'linux'
- test('unreadable', serdi,
+ test('unreadable',
+ tool,
args: ['/sys/bus/pci/rescan'],
env: test_env,
should_fail: true,
- suite: 'io_errors')
+ suite: ['tools', 'pipe', 'input'])
endif
test('write_error', files('test_write_error.py'),
- args: script_args + [serd_ttl],
+ args: pipe_test_script_args + [serd_ttl],
env: test_env,
- suite: 'io_errors')
+ suite: ['tools', 'pipe', 'output'])
- test('write_bad_file', serdi,
- args: ['-w', '/does/not/exist.ttl', meson.source_root() / 'serd.ttl'],
+ test('missing_output',
+ tool,
+ args: ['-o', '/does/not/exist.ttl', meson.source_root() / 'serd.ttl'],
env: test_env,
should_fail: true,
- suite: 'io_errors')
+ suite: ['tools', 'pipe', 'output'])
+endif
+
+# Test specifics for serd-sort
+if is_variable('serd_sort')
+ tool = serd_sort
+ sort_test_script_args = common_script_options + ['--tool', serd_sort]
+
+ # Basic valid arguments
+
+ good_args = [
+ ['-V'],
+ ['-h'],
+ ]
+
+ foreach args : good_args
+ test(args[0],
+ tool,
+ args: args,
+ env: test_env,
+ suite: ['tools', 'sort', 'options'])
+ endforeach
+
+ # Basic invalid arguments
+
+ bad_args = [
+ ['-c', 'CHAOS', '-'],
+ ['-o'],
+ ['-s'],
+ ['-z', '-'],
+ ['-z', '-'],
+ ['/no/such/file'],
+ ]
+
+ foreach args : bad_args
+ test(' '.join(args),
+ tool,
+ args: args,
+ env: test_env,
+ should_fail: true,
+ suite: ['tools', 'sort', 'options'])
+ endforeach
+
+ test('none',
+ tool,
+ env: test_env,
+ should_fail: true,
+ suite: ['tools', 'sort', 'options'])
+
+ # IO errors
+
+ test('read_dir',
+ tool,
+ args: [meson.source_root()],
+ env: test_env,
+ should_fail: true,
+ suite: ['tools', 'sort', 'input'])
+
+ test('missing_output',
+ tool,
+ args: ['-o', '/does/not/exist.ttl', meson.source_root() / 'serd.ttl'],
+ env: test_env,
+ should_fail: true,
+ suite: ['tools', 'sort', 'output'])
+
+ # Collation test suite
+
+ test('sort', files('test_sort.py'),
+ args: sort_test_script_args + files('sort/input.trig'),
+ env: test_env,
+ suite: ['tools', 'sort'])
+
+endif
+
+# Test specifics for serd-filter
+if is_variable('serd_filter')
+ tool = serd_filter
+ filter_test_script_args = common_script_options + ['--tool', serd_filter]
+
+ # Basic valid arguments
+
+ good_args = [
+ ['-V'],
+ ['-h'],
+ ]
+
+ foreach args : good_args
+ test(args[0],
+ tool,
+ args: args,
+ env: test_env,
+ suite: ['tools', 'filter', 'options'])
+ endforeach
+
+ # Basic invalid arguments
+
+ bad_args = [
+ ['-f', '/no/such/file.nt', '-'],
+ ['-z'],
+ ['?s ?p ?o .'],
+ ]
+
+ foreach args : bad_args
+ test(' '.join(args),
+ tool,
+ args: args,
+ env: test_env,
+ should_fail: true,
+ suite: ['tools', 'filter', 'options'])
+ endforeach
+
+ test('garbage_pattern',
+ tool,
+ args: ['junk', serd_ttl],
+ env: test_env,
+ should_fail: true,
+ suite: ['tools', 'filter', 'options'])
+
+ test('multiple_patterns',
+ tool,
+ args: ['?s ?p ?o .\n?t ?u ?v .\n',
+ meson.source_root() / 'serd.ttl'],
+ env: test_env,
+ should_fail: true,
+ suite: ['tools', 'filter', 'output'])
+
+ test('missing_output',
+ tool,
+ args: ['-o', '/does/not/exist.ttl',
+ '?s ?p ?o .',
+ meson.source_root() / 'serd.ttl'],
+ env: test_env,
+ should_fail: true,
+ suite: ['tools', 'filter', 'output'])
+
+ # Different input sources
+
+ test('missing_input',
+ tool,
+ args: ['?s ?p ?o .', '/does/not/exist.ttl'],
+ env: test_env,
+ should_fail: true,
+ suite: ['tools', 'filter', 'input'])
+
+ test('filter_dir',
+ tool,
+ args: ['?s ?p ?o .', meson.source_root()],
+ env: test_env,
+ should_fail: true,
+ suite: ['tools', 'filter', 'input'])
+
+ # Filtering
+
+ test('filter', files('test_filter.py'),
+ args: filter_test_script_args,
+ env: test_env,
+ suite: ['tools'])
+
+ test('grep', files('test_grep.py'),
+ args: filter_test_script_args,
+ env: test_env,
+ suite: ['tools'])
+
+ # RDF-driven test suite
+ subdir('filter')
+
+endif
+# Run RDF-driven test suites using serd-pipe and serd-sort
+if is_variable('serd_pipe') and is_variable('serd_sort')
# RDF-driven test suites from the W3C
subdir('NQuadsTests')
subdir('NTriplesTests')
diff --git a/test/pattern/meson.build b/test/pattern/meson.build
index 1d5a2140..1216cfe0 100644
--- a/test/pattern/meson.build
+++ b/test/pattern/meson.build
@@ -1,14 +1,27 @@
base_uri = 'http://drobilla.net/sw/serd/test/pattern/'
test('pattern',
- run_test_suite,
- args: script_args + [
+ run_pipe_suite,
+ args: pipe_test_script_args + [
files('manifest.ttl'),
base_uri,
'--',
- '-i',
+ '-I',
'variables',
],
env: test_env,
- suite: ['suite', 'extra'],
+ suite: ['suite', 'extra', 'pipe'],
+ timeout: 240)
+
+test('pattern',
+ run_sort_suite,
+ args: sort_test_script_args + [
+ files('manifest.ttl'),
+ base_uri,
+ '--',
+ '-I',
+ 'variables',
+ ],
+ env: test_env,
+ suite: ['suite', 'extra', 'sort'],
timeout: 240)
diff --git a/test/run_filter_suite.py b/test/run_filter_suite.py
new file mode 100755
index 00000000..222b98e4
--- /dev/null
+++ b/test/run_filter_suite.py
@@ -0,0 +1,160 @@
+#!/usr/bin/env python3
+
+"""Run the RDF-based test suite for serd-filter."""
+
+import serd_test_util
+
+import argparse
+import datetime
+import difflib
+import itertools
+import os
+import re
+import shlex
+import subprocess
+import sys
+import tempfile
+import urllib.parse
+
+
+def log_error(message):
+ """Log an error message to stderr"""
+
+ sys.stderr.write("error: ")
+ sys.stderr.write(message)
+
+
+def _uri_path(test_dir, uri):
+ path = urllib.parse.urlparse(uri).path
+ drive = os.path.splitdrive(path[1:])[0]
+ path = path if not drive else path[1:]
+ return os.path.join(test_dir, os.path.basename(path))
+
+
+def test_suite(
+ manifest_path,
+ base_uri,
+ filter_command_prefix,
+ pipe_command_prefix,
+ out_dir,
+):
+ """Run all tests in the manifest."""
+
+ mf = "http://www.w3.org/2001/sw/DataAccess/tests/test-manifest#"
+ suite_dir = os.path.dirname(manifest_path)
+
+ model, instances = serd_test_util.load_rdf(
+ pipe_command_prefix + ["-B", base_uri], manifest_path
+ )
+
+ class Results:
+ def __init__(self):
+ self.n_tests = 0
+ self.n_failures = 0
+
+ def run_test(entry, results):
+ """Run a single test entry from the manifest."""
+
+ pattern_uri = model[entry]["http://drobilla.net/ns/serd#patternFile"][0]
+ input_uri = model[entry][mf + "action"][0]
+ result_uri = model[entry][mf + "result"][0]
+ pattern_path = _uri_path(suite_dir, pattern_uri)
+ input_path = _uri_path(suite_dir, input_uri)
+ result_path = _uri_path(suite_dir, result_uri)
+
+ output_path = os.path.join(
+ out_dir, os.path.basename(result_path).replace(".result", "")
+ )
+
+ command = filter_command_prefix + [
+ "-B",
+ base_uri,
+ "-f",
+ pattern_path,
+ "-o",
+ output_path,
+ input_path,
+ ]
+
+ # Run the filter (which should return success)
+ results.n_tests += 1
+ try:
+ subprocess.run(command, check=True)
+
+ # Check output against the expected result
+ if not serd_test_util.file_equals(result_path, output_path):
+ results.n_failures += 1
+ log_error(
+ "Output {} differs from {}\n".format(
+ output_path, check_path
+ )
+ )
+
+ except Exception as e:
+ log_error(e)
+ results.n_failures += 1
+
+ # Run all test types in the test suite
+ results = Results()
+ for klass, instances in instances.items():
+ if klass == "http://drobilla.net/ns/serd#TestFilterPositive":
+ for entry in instances:
+ run_test(entry, results)
+
+ # Print result summary
+ if results.n_failures > 0:
+ log_error(
+ "{}/{} tests failed\n".format(results.n_failures, results.n_tests)
+ )
+ else:
+ sys.stdout.write("All {} tests passed\n".format(results.n_tests))
+
+ return results.n_failures
+
+
+def main():
+ """Run the command line tool."""
+
+ parser = argparse.ArgumentParser(
+ usage="%(prog)s [OPTION]... MANIFEST BASE_URI -- [TOOL_OPTION]...",
+ description=__doc__,
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ )
+
+ parser.add_argument(
+ "--pipe", default="tools/serd-pipe", help="serd-pipe executable"
+ )
+ parser.add_argument(
+ "--filter", default="tools/serd-filter", help="serd-filter executable"
+ )
+ parser.add_argument("--wrapper", default="", help="executable wrapper")
+ parser.add_argument("manifest", help="test suite manifest.ttl file")
+ parser.add_argument("base_uri", help="base URI for tests")
+ parser.add_argument(
+ "tool_option", nargs=argparse.REMAINDER, help="option for serd-filter"
+ )
+
+ args = parser.parse_args(sys.argv[1:])
+ wrapper_prefix = shlex.split(args.wrapper)
+ filter_command_prefix = wrapper_prefix + [args.filter]
+ pipe_command_prefix = wrapper_prefix + [args.pipe]
+
+ with tempfile.TemporaryDirectory() as test_out_dir:
+ return test_suite(
+ args.manifest,
+ args.base_uri,
+ filter_command_prefix,
+ pipe_command_prefix,
+ test_out_dir,
+ )
+
+
+if __name__ == "__main__":
+ try:
+ sys.exit(main())
+ except subprocess.CalledProcessError as e:
+ if e.stderr is not None:
+ sys.stderr.write(e.stderr.decode("utf-8"))
+
+ sys.stderr.write("error: %s\n" % e)
+ sys.exit(e.returncode)
diff --git a/test/run_test_suite.py b/test/run_pipe_suite.py
index 457e7f81..65a894c4 100755
--- a/test/run_test_suite.py
+++ b/test/run_pipe_suite.py
@@ -1,6 +1,6 @@
#!/usr/bin/env python3
-"""Run an RDF test suite with serdi."""
+"""Run an RDF test suite with serd-pipe."""
import serd_test_util
@@ -41,7 +41,14 @@ def test_thru(
osyntax,
command_prefix,
):
- """Test lossless round-tripping through two different syntaxes."""
+ """Test rewriting a file in the input syntax.
+
+ This rewrites a source test file in the original fancy syntax, then
+ rewrites that output again in the simple syntax used for test output
+ (NTriples or NQuads). Checking the final output against the expected test
+ output tests that piping the file through serd with pretty-printing was
+ lossless.
+ """
assert isyntax is not None
assert osyntax is not None
@@ -54,51 +61,45 @@ def test_thru(
command_prefix
+ [f for sublist in flags for f in sublist]
+ [
- "-i",
+ "-B",
+ base_uri,
+ "-I",
isyntax,
- "-o",
+ "-O",
isyntax,
- "-w",
+ "-o",
out_path,
- "-I",
- base_uri,
path,
]
)
+ subprocess.run(out_cmd, check=True)
+
thru_cmd = (
command_prefix
+ test_osyntax_options(osyntax)
+ [
- "-i",
+ "-B",
+ base_uri,
+ "-I",
isyntax,
- "-o",
+ "-O",
+ "ascii",
+ "-O",
osyntax,
- "-w",
- thru_path,
"-o",
- "ascii",
- "-I",
- base_uri,
+ thru_path,
out_path,
]
)
- subprocess.run(out_cmd, check=True)
subprocess.run(thru_cmd, check=True)
- with open(thru_path, "wb") as out:
- subprocess.run(thru_cmd, check=True, stdout=out)
-
- if not _file_equals(check_path, thru_path):
- log_error(
- "Round-tripped output {} does not match {}\n".format(
- check_path, thru_path
- )
- )
- return 1
+ if serd_test_util.file_equals(check_path, thru_path):
+ return 0
- return 0
+ log_error("Rewritten {} differs from {}\n".format(thru_path, check_path))
+ return 1
def _uri_path(uri):
@@ -107,36 +108,6 @@ def _uri_path(uri):
return path if not drive else path[1:]
-def _test_input_syntax(test_class):
- """Return the output syntax use for a given test class."""
-
- if "NTriples" in test_class:
- return "NTriples"
-
- if "Turtle" in test_class:
- return "Turtle"
-
- if "NQuads" in test_class:
- return "NQuads"
-
- if "Trig" in test_class:
- return "Trig"
-
- raise Exception("Unknown test class <{}>".format(test_class))
-
-
-def _test_output_syntax(test_class):
- """Return the output syntax use for a given test class."""
-
- if "NTriples" in test_class or "Turtle" in test_class:
- return "NTriples"
-
- if "NQuads" in test_class or "Trig" in test_class:
- return "NQuads"
-
- raise Exception("Unknown test class <{}>".format(test_class))
-
-
def _option_combinations(options):
"""Return an iterator that cycles through all combinations of options."""
@@ -147,49 +118,6 @@ def _option_combinations(options):
return itertools.cycle(combinations)
-def _show_diff(from_lines, to_lines, from_filename, to_filename):
- same = True
- for line in difflib.unified_diff(
- from_lines,
- to_lines,
- fromfile=os.path.abspath(from_filename),
- tofile=os.path.abspath(to_filename),
- ):
- sys.stderr.write(line)
- same = False
-
- return same
-
-
-def _file_equals(patha, pathb):
-
- for path in (patha, pathb):
- if not os.access(path, os.F_OK):
- log_error("missing file {}\n".format(path))
- return False
-
- with open(patha, "r", encoding="utf-8") as fa:
- with open(pathb, "r", encoding="utf-8") as fb:
- return _show_diff(fa.readlines(), fb.readlines(), patha, pathb)
-
-
-def _file_lines_equal(patha, pathb, subst_from="", subst_to=""):
- import io
-
- for path in (patha, pathb):
- if not os.access(path, os.F_OK):
- sys.stderr.write("error: missing file %s" % path)
- return False
-
- la = sorted(set(io.open(patha, encoding="utf-8").readlines()))
- lb = sorted(set(io.open(pathb, encoding="utf-8").readlines()))
- if la != lb:
- _show_diff(la, lb, patha, pathb)
- return False
-
- return True
-
-
def test_suite(
manifest_path,
base_uri,
@@ -204,7 +132,7 @@ def test_suite(
mf = "http://www.w3.org/2001/sw/DataAccess/tests/test-manifest#"
test_dir = os.path.dirname(manifest_path)
model, instances = serd_test_util.load_rdf(
- command_prefix + ["-I", base_uri], manifest_path
+ command_prefix + ["-B", base_uri], manifest_path
)
asserter = ""
@@ -217,17 +145,21 @@ def test_suite(
self.n_failures = 0
def run_tests(test_class, tests, expected_return, results):
- thru_flags = [["-f"], ["-b", "1"], ["-r", "http://example.org/"]]
+ thru_flags = [
+ ["-R", "http://example.org/"],
+ ["-b", "1"],
+ ["-b", "16384"],
+ ]
thru_options_iter = _option_combinations(thru_flags)
if output_syntax is not None:
osyntax = output_syntax
else:
- osyntax = _test_output_syntax(test_class)
+ osyntax = serd_test_util.test_output_syntax(test_class)
if input_syntax is not None:
isyntax = input_syntax
else:
- isyntax = _test_input_syntax(test_class)
+ isyntax = serd_test_util.test_input_syntax(test_class)
for test in sorted(tests):
test_uri = model[test][mf + "action"][0]
@@ -236,11 +168,11 @@ def test_suite(
test_path = os.path.join(test_dir, test_name)
command = command_prefix + [
- "-o",
+ "-O",
osyntax,
- "-o",
+ "-O",
"ascii",
- "-I",
+ "-B",
test_uri,
test_path,
]
@@ -272,7 +204,7 @@ def test_suite(
check_filename = os.path.basename(_uri_path(check_uri))
check_path = os.path.join(test_dir, check_filename)
- if not _file_equals(check_path, out_filename):
+ if not serd_test_util.file_equals(check_path, out_filename):
results.n_failures += 1
log_error(
"Output {} does not match {}\n".format(
@@ -292,32 +224,6 @@ def test_suite(
command_prefix,
)
- # Run model test for positive test (must succeed)
- out_filename = os.path.join(
- out_test_dir, test_name + ".model.out"
- )
-
- model_command = command_prefix + [
- "-m",
- "-o",
- osyntax,
- "-o",
- "ascii",
- "-w",
- out_filename,
- "-I",
- test_uri,
- test_path,
- ]
-
- proc = subprocess.run(model_command, check=True)
-
- if proc.returncode == 0 and (
- (mf + "result") in model[test]
- ):
- if not _file_lines_equal(check_path, out_filename):
- results.n_failures += 1
-
else: # Negative test
with open(out_filename, "w") as stdout:
with tempfile.TemporaryFile() as stderr:
@@ -380,13 +286,13 @@ def main():
"""Run the command line tool."""
parser = argparse.ArgumentParser(
- usage="%(prog)s [OPTION]... MANIFEST BASE_URI -- [SERDI_OPTION]...",
+ usage="%(prog)s [OPTION]... MANIFEST BASE_URI -- [TOOL_OPTION]...",
description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter,
)
parser.add_argument("--report", help="path to write result report to")
- parser.add_argument("--serdi", default="serdi", help="path to serdi")
+ parser.add_argument("--tool", default="tools/serd-pipe", help="executable")
parser.add_argument("--syntax", default=None, help="input syntax")
parser.add_argument("--osyntax", default=None, help="output syntax")
parser.add_argument("--wrapper", default="", help="executable wrapper")
@@ -394,14 +300,11 @@ def main():
parser.add_argument("base_uri", help="base URI for tests")
parser.add_argument(
- "serdi_option", nargs=argparse.REMAINDER, help="option for serdi"
+ "tool_option", nargs=argparse.REMAINDER, help="option to pass to tool"
)
args = parser.parse_args(sys.argv[1:])
-
- command_prefix = (
- shlex.split(args.wrapper) + [args.serdi] + args.serdi_option
- )
+ command_prefix = shlex.split(args.wrapper) + [args.tool] + args.tool_option
with tempfile.TemporaryDirectory() as test_out_dir:
return test_suite(
diff --git a/test/run_sort_suite.py b/test/run_sort_suite.py
new file mode 100755
index 00000000..27205665
--- /dev/null
+++ b/test/run_sort_suite.py
@@ -0,0 +1,234 @@
+#!/usr/bin/env python3
+
+"""Run an RDF test suite with serd-sort."""
+
+import serd_test_util
+
+import argparse
+import datetime
+import difflib
+import itertools
+import os
+import re
+import shlex
+import subprocess
+import sys
+import tempfile
+import urllib.parse
+
+
+def log_error(message):
+ """Log an error message to stderr"""
+
+ sys.stderr.write("error: ")
+ sys.stderr.write(message)
+
+
+def _uri_path(uri):
+ path = urllib.parse.urlparse(uri).path
+ drive = os.path.splitdrive(path[1:])[0]
+ return path if not drive else path[1:]
+
+
+def _file_lines_equal(patha, pathb, subst_from="", subst_to=""):
+ import io
+
+ for path in (patha, pathb):
+ if not os.access(path, os.F_OK):
+ sys.stderr.write("error: missing file %s" % path)
+ return False
+
+ la = sorted(set(io.open(patha, encoding="utf-8").readlines()))
+ lb = sorted(set(io.open(pathb, encoding="utf-8").readlines()))
+ if la != lb:
+ serd_test_util.show_diff(la, lb, patha, pathb)
+ return False
+
+ return True
+
+
+def _add_extension(filename, extension):
+ first_dot = filename.find(".")
+
+ return filename[0:first_dot] + extension + filename[first_dot:]
+
+
+def test_suite(
+ manifest_path,
+ base_uri,
+ report_filename,
+ input_syntax,
+ output_syntax,
+ command_prefix,
+ out_test_dir,
+):
+ """Run all tests in a test suite manifest."""
+
+ mf = "http://www.w3.org/2001/sw/DataAccess/tests/test-manifest#"
+ test_dir = os.path.dirname(manifest_path)
+ model, instances = serd_test_util.load_rdf(
+ command_prefix + ["-B", base_uri], manifest_path
+ )
+
+ asserter = ""
+ if os.getenv("USER") == "drobilla":
+ asserter = "http://drobilla.net/drobilla#me"
+
+ class Results:
+ def __init__(self):
+ self.n_tests = 0
+ self.n_failures = 0
+
+ def run_tests(test_class, tests, expected_return, results):
+ osyntax = output_syntax
+ if osyntax is None:
+ osyntax = serd_test_util.test_output_syntax(test_class)
+
+ isyntax = input_syntax
+ if isyntax is None:
+ isyntax = serd_test_util.test_input_syntax(test_class)
+
+ for test in sorted(tests):
+ test_uri = model[test][mf + "action"][0]
+ test_uri_path = _uri_path(test_uri)
+ test_name = os.path.basename(test_uri_path)
+ test_path = os.path.join(test_dir, test_name)
+
+ command = command_prefix + [
+ "-B",
+ test_uri,
+ "-O",
+ osyntax,
+ "-O",
+ "ascii",
+ test_path,
+ ]
+
+ command_string = " ".join(shlex.quote(c) for c in command)
+ out_filename = os.path.join(
+ out_test_dir, _add_extension(test_name, ".sort")
+ )
+
+ results.n_tests += 1
+
+ if expected_return == 0: # Positive test
+
+ with open(out_filename, "w") as stdout:
+ proc = subprocess.run(command, check=False, stdout=stdout)
+ passed = proc.returncode == expected_return
+ if not passed:
+ results.n_failures += 1
+ log_error(
+ "Unexpected failure of command: {}\n".format(
+ command_string
+ )
+ )
+
+ if passed and (mf + "result") in model[test]:
+ # Check output against expected output from test suite
+ check_uri = model[test][mf + "result"][0]
+ check_filename = os.path.basename(_uri_path(check_uri))
+ check_path = os.path.join(test_dir, check_filename)
+
+ if not _file_lines_equal(check_path, out_filename):
+ results.n_failures += 1
+ log_error(
+ "Output {} differs from {}\n".format(
+ out_filename, check_path
+ )
+ )
+
+ else: # Negative test
+
+ with tempfile.TemporaryFile() as stderr:
+ with open(out_filename, "w") as stdout:
+ proc = subprocess.run(
+ command, check=False, stdout=stdout, stderr=stderr
+ )
+
+ passed = proc.returncode != 0
+ if passed:
+ # Check that an error message was printed
+ stderr.seek(0, 2) # Seek to end
+ if stderr.tell() == 0: # Empty
+ results.n_failures += 1
+ log_error("No error: {}\n".format(command_string))
+
+ else:
+ results.n_failures += 1
+ log_error("Should fail: {}\n".format(command_string))
+
+ # Write test report entry
+ if report_filename:
+ with open(report_filename, "a") as report:
+ report.write(
+ serd_test_util.earl_assertion(test, passed, asserter)
+ )
+
+ # Run all test types in the test suite
+ results = Results()
+ ns_rdftest = "http://www.w3.org/ns/rdftest#"
+ for test_class, instances in instances.items():
+ if test_class.startswith(ns_rdftest):
+ expected = (
+ 1
+ if "lax" not in command_prefix and "Negative" in test_class
+ else 0
+ )
+ run_tests(test_class, instances, expected, results)
+
+ # Print result summary
+ if results.n_failures > 0:
+ log_error(
+ "{}/{} tests failed\n".format(results.n_failures, results.n_tests)
+ )
+ else:
+ sys.stdout.write("All {} tests passed\n".format(results.n_tests))
+
+ return results.n_failures
+
+
+def main():
+ """Run the command line tool."""
+
+ parser = argparse.ArgumentParser(
+ usage="%(prog)s [OPTION]... MANIFEST BASE_URI -- [TOOL_OPTION]...",
+ description=__doc__,
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ )
+
+ parser.add_argument("--report", help="path to write result report to")
+ parser.add_argument("--tool", default="tools/serd-sort", help="executable")
+ parser.add_argument("--syntax", default=None, help="input syntax")
+ parser.add_argument("--osyntax", default=None, help="output syntax")
+ parser.add_argument("--wrapper", default="", help="executable wrapper")
+ parser.add_argument("manifest", help="test suite manifest.ttl file")
+ parser.add_argument("base_uri", help="base URI for tests")
+ parser.add_argument(
+ "tool_option", nargs=argparse.REMAINDER, help="option to pass to tool"
+ )
+
+ args = parser.parse_args(sys.argv[1:])
+ command_prefix = shlex.split(args.wrapper) + [args.tool] + args.tool_option
+
+ with tempfile.TemporaryDirectory() as test_out_dir:
+ return test_suite(
+ args.manifest,
+ args.base_uri,
+ args.report,
+ args.syntax,
+ args.osyntax,
+ command_prefix,
+ test_out_dir,
+ )
+
+
+if __name__ == "__main__":
+ try:
+ sys.exit(main())
+ except subprocess.CalledProcessError as e:
+ if e.stderr is not None:
+ sys.stderr.write(e.stderr.decode("utf-8"))
+
+ sys.stderr.write("error: %s\n" % e)
+ sys.exit(e.returncode)
diff --git a/test/serd_test_util/__init__.py b/test/serd_test_util/__init__.py
index f0b1c19a..45cc6e64 100644
--- a/test/serd_test_util/__init__.py
+++ b/test/serd_test_util/__init__.py
@@ -3,8 +3,48 @@
"""Utilities for data-driven tests."""
import datetime
+import difflib
+import os
import re
import subprocess
+import sys
+
+
+def error(message):
+ """Log an error message to stderr"""
+
+ sys.stderr.write("error: ")
+ sys.stderr.write(message)
+
+
+def test_input_syntax(test_class):
+ """Return the output syntax use for a given test class."""
+
+ if "NTriples" in test_class:
+ return "NTriples"
+
+ if "Turtle" in test_class:
+ return "Turtle"
+
+ if "NQuads" in test_class:
+ return "NQuads"
+
+ if "Trig" in test_class:
+ return "Trig"
+
+ raise Exception("Unknown test class <{}>".format(test_class))
+
+
+def test_output_syntax(test_class):
+ """Return the output syntax use for a given test class."""
+
+ if "NTriples" in test_class or "Turtle" in test_class:
+ return "NTriples"
+
+ if "NQuads" in test_class or "Trig" in test_class:
+ return "NQuads"
+
+ raise Exception("Unknown test class <{}>".format(test_class))
def earl_assertion(test, passed, asserter):
@@ -33,7 +73,7 @@ def earl_assertion(test, passed, asserter):
def load_rdf(command_prefix, filename):
- """Load an RDF file as dictionaries via serdi (only supports URIs)."""
+ """Load an RDF file as dictionaries via serd-pipe (only supports URIs)."""
rdf_type = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
model = {}
@@ -61,3 +101,32 @@ def load_rdf(command_prefix, filename):
instances[o].update([s])
return model, instances
+
+
+def file_equals(patha, pathb):
+ """Return true if the file at patha is the same as the file at pathb."""
+
+ for path in (patha, pathb):
+ if not os.access(path, os.F_OK):
+ error("missing file {}\n".format(path))
+ return False
+
+ with open(patha, "r", encoding="utf-8") as fa:
+ with open(pathb, "r", encoding="utf-8") as fb:
+ return show_diff(fa.readlines(), fb.readlines(), patha, pathb)
+
+
+def show_diff(from_lines, to_lines, from_filename, to_filename):
+ """Print a diff between files to stderr."""
+
+ same = True
+ for line in difflib.unified_diff(
+ from_lines,
+ to_lines,
+ fromfile=os.path.abspath(from_filename),
+ tofile=os.path.abspath(to_filename),
+ ):
+ sys.stderr.write(line)
+ same = False
+
+ return same
diff --git a/test/sort/GOPS.nq b/test/sort/GOPS.nq
new file mode 100644
index 00000000..c7472e03
--- /dev/null
+++ b/test/sort/GOPS.nq
@@ -0,0 +1,10 @@
+_:b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> "1"^^<http://www.w3.org/2001/XMLSchema#integer> <http://example.org/graph1> .
+_:b3 <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> "2"^^<http://www.w3.org/2001/XMLSchema#integer> <http://example.org/graph1> .
+<http://example.org/s> <http://example.org/literal> "s1" <http://example.org/graph1> .
+_:b1 <http://example.org/with> <http://example.org/aProperty> <http://example.org/graph1> .
+_:b1 <http://example.org/with> <http://example.org/orAnother> <http://example.org/graph1> .
+_:b3 <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> <http://www.w3.org/1999/02/22-rdf-syntax-ns#nil> <http://example.org/graph1> .
+<http://example.org/s> <http://example.org/blank> _:b1 <http://example.org/graph1> .
+<http://example.org/s> <http://example.org/list> _:b2 <http://example.org/graph1> .
+_:b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> _:b3 <http://example.org/graph1> .
+<http://example.org/a> <http://example.org/b> <http://example.org/c> <http://example.org/graph2> .
diff --git a/test/sort/GOSP.nq b/test/sort/GOSP.nq
new file mode 100644
index 00000000..c7472e03
--- /dev/null
+++ b/test/sort/GOSP.nq
@@ -0,0 +1,10 @@
+_:b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> "1"^^<http://www.w3.org/2001/XMLSchema#integer> <http://example.org/graph1> .
+_:b3 <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> "2"^^<http://www.w3.org/2001/XMLSchema#integer> <http://example.org/graph1> .
+<http://example.org/s> <http://example.org/literal> "s1" <http://example.org/graph1> .
+_:b1 <http://example.org/with> <http://example.org/aProperty> <http://example.org/graph1> .
+_:b1 <http://example.org/with> <http://example.org/orAnother> <http://example.org/graph1> .
+_:b3 <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> <http://www.w3.org/1999/02/22-rdf-syntax-ns#nil> <http://example.org/graph1> .
+<http://example.org/s> <http://example.org/blank> _:b1 <http://example.org/graph1> .
+<http://example.org/s> <http://example.org/list> _:b2 <http://example.org/graph1> .
+_:b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> _:b3 <http://example.org/graph1> .
+<http://example.org/a> <http://example.org/b> <http://example.org/c> <http://example.org/graph2> .
diff --git a/test/sort/GPSO.nq b/test/sort/GPSO.nq
new file mode 100644
index 00000000..1a858017
--- /dev/null
+++ b/test/sort/GPSO.nq
@@ -0,0 +1,10 @@
+<http://example.org/s> <http://example.org/blank> _:b1 <http://example.org/graph1> .
+<http://example.org/s> <http://example.org/list> _:b2 <http://example.org/graph1> .
+<http://example.org/s> <http://example.org/literal> "s1" <http://example.org/graph1> .
+_:b1 <http://example.org/with> <http://example.org/aProperty> <http://example.org/graph1> .
+_:b1 <http://example.org/with> <http://example.org/orAnother> <http://example.org/graph1> .
+_:b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> "1"^^<http://www.w3.org/2001/XMLSchema#integer> <http://example.org/graph1> .
+_:b3 <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> "2"^^<http://www.w3.org/2001/XMLSchema#integer> <http://example.org/graph1> .
+_:b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> _:b3 <http://example.org/graph1> .
+_:b3 <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> <http://www.w3.org/1999/02/22-rdf-syntax-ns#nil> <http://example.org/graph1> .
+<http://example.org/a> <http://example.org/b> <http://example.org/c> <http://example.org/graph2> .
diff --git a/test/sort/GSOP.nq b/test/sort/GSOP.nq
new file mode 100644
index 00000000..fc073a00
--- /dev/null
+++ b/test/sort/GSOP.nq
@@ -0,0 +1,10 @@
+<http://example.org/s> <http://example.org/literal> "s1" <http://example.org/graph1> .
+<http://example.org/s> <http://example.org/blank> _:b1 <http://example.org/graph1> .
+<http://example.org/s> <http://example.org/list> _:b2 <http://example.org/graph1> .
+_:b1 <http://example.org/with> <http://example.org/aProperty> <http://example.org/graph1> .
+_:b1 <http://example.org/with> <http://example.org/orAnother> <http://example.org/graph1> .
+_:b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> "1"^^<http://www.w3.org/2001/XMLSchema#integer> <http://example.org/graph1> .
+_:b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> _:b3 <http://example.org/graph1> .
+_:b3 <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> "2"^^<http://www.w3.org/2001/XMLSchema#integer> <http://example.org/graph1> .
+_:b3 <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> <http://www.w3.org/1999/02/22-rdf-syntax-ns#nil> <http://example.org/graph1> .
+<http://example.org/a> <http://example.org/b> <http://example.org/c> <http://example.org/graph2> .
diff --git a/test/sort/GSPO.nq b/test/sort/GSPO.nq
new file mode 100644
index 00000000..726b1d42
--- /dev/null
+++ b/test/sort/GSPO.nq
@@ -0,0 +1,10 @@
+<http://example.org/s> <http://example.org/blank> _:b1 <http://example.org/graph1> .
+<http://example.org/s> <http://example.org/list> _:b2 <http://example.org/graph1> .
+<http://example.org/s> <http://example.org/literal> "s1" <http://example.org/graph1> .
+_:b1 <http://example.org/with> <http://example.org/aProperty> <http://example.org/graph1> .
+_:b1 <http://example.org/with> <http://example.org/orAnother> <http://example.org/graph1> .
+_:b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> "1"^^<http://www.w3.org/2001/XMLSchema#integer> <http://example.org/graph1> .
+_:b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> _:b3 <http://example.org/graph1> .
+_:b3 <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> "2"^^<http://www.w3.org/2001/XMLSchema#integer> <http://example.org/graph1> .
+_:b3 <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> <http://www.w3.org/1999/02/22-rdf-syntax-ns#nil> <http://example.org/graph1> .
+<http://example.org/a> <http://example.org/b> <http://example.org/c> <http://example.org/graph2> .
diff --git a/test/sort/OPS.nq b/test/sort/OPS.nq
new file mode 100644
index 00000000..456ade7f
--- /dev/null
+++ b/test/sort/OPS.nq
@@ -0,0 +1,10 @@
+_:b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> "1"^^<http://www.w3.org/2001/XMLSchema#integer> <http://example.org/graph1> .
+_:b3 <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> "2"^^<http://www.w3.org/2001/XMLSchema#integer> <http://example.org/graph1> .
+<http://example.org/s> <http://example.org/literal> "s1" <http://example.org/graph1> .
+_:b1 <http://example.org/with> <http://example.org/aProperty> <http://example.org/graph1> .
+<http://example.org/a> <http://example.org/b> <http://example.org/c> <http://example.org/graph2> .
+_:b1 <http://example.org/with> <http://example.org/orAnother> <http://example.org/graph1> .
+_:b3 <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> <http://www.w3.org/1999/02/22-rdf-syntax-ns#nil> <http://example.org/graph1> .
+<http://example.org/s> <http://example.org/blank> _:b1 <http://example.org/graph1> .
+<http://example.org/s> <http://example.org/list> _:b2 <http://example.org/graph1> .
+_:b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> _:b3 <http://example.org/graph1> .
diff --git a/test/sort/OSP.nq b/test/sort/OSP.nq
new file mode 100644
index 00000000..456ade7f
--- /dev/null
+++ b/test/sort/OSP.nq
@@ -0,0 +1,10 @@
+_:b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> "1"^^<http://www.w3.org/2001/XMLSchema#integer> <http://example.org/graph1> .
+_:b3 <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> "2"^^<http://www.w3.org/2001/XMLSchema#integer> <http://example.org/graph1> .
+<http://example.org/s> <http://example.org/literal> "s1" <http://example.org/graph1> .
+_:b1 <http://example.org/with> <http://example.org/aProperty> <http://example.org/graph1> .
+<http://example.org/a> <http://example.org/b> <http://example.org/c> <http://example.org/graph2> .
+_:b1 <http://example.org/with> <http://example.org/orAnother> <http://example.org/graph1> .
+_:b3 <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> <http://www.w3.org/1999/02/22-rdf-syntax-ns#nil> <http://example.org/graph1> .
+<http://example.org/s> <http://example.org/blank> _:b1 <http://example.org/graph1> .
+<http://example.org/s> <http://example.org/list> _:b2 <http://example.org/graph1> .
+_:b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> _:b3 <http://example.org/graph1> .
diff --git a/test/sort/POS.nq b/test/sort/POS.nq
new file mode 100644
index 00000000..51c675de
--- /dev/null
+++ b/test/sort/POS.nq
@@ -0,0 +1,10 @@
+<http://example.org/a> <http://example.org/b> <http://example.org/c> <http://example.org/graph2> .
+<http://example.org/s> <http://example.org/blank> _:b1 <http://example.org/graph1> .
+<http://example.org/s> <http://example.org/list> _:b2 <http://example.org/graph1> .
+<http://example.org/s> <http://example.org/literal> "s1" <http://example.org/graph1> .
+_:b1 <http://example.org/with> <http://example.org/aProperty> <http://example.org/graph1> .
+_:b1 <http://example.org/with> <http://example.org/orAnother> <http://example.org/graph1> .
+_:b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> "1"^^<http://www.w3.org/2001/XMLSchema#integer> <http://example.org/graph1> .
+_:b3 <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> "2"^^<http://www.w3.org/2001/XMLSchema#integer> <http://example.org/graph1> .
+_:b3 <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> <http://www.w3.org/1999/02/22-rdf-syntax-ns#nil> <http://example.org/graph1> .
+_:b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> _:b3 <http://example.org/graph1> .
diff --git a/test/sort/PSO.nq b/test/sort/PSO.nq
new file mode 100644
index 00000000..0fb7bd68
--- /dev/null
+++ b/test/sort/PSO.nq
@@ -0,0 +1,10 @@
+<http://example.org/a> <http://example.org/b> <http://example.org/c> <http://example.org/graph2> .
+<http://example.org/s> <http://example.org/blank> _:b1 <http://example.org/graph1> .
+<http://example.org/s> <http://example.org/list> _:b2 <http://example.org/graph1> .
+<http://example.org/s> <http://example.org/literal> "s1" <http://example.org/graph1> .
+_:b1 <http://example.org/with> <http://example.org/aProperty> <http://example.org/graph1> .
+_:b1 <http://example.org/with> <http://example.org/orAnother> <http://example.org/graph1> .
+_:b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> "1"^^<http://www.w3.org/2001/XMLSchema#integer> <http://example.org/graph1> .
+_:b3 <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> "2"^^<http://www.w3.org/2001/XMLSchema#integer> <http://example.org/graph1> .
+_:b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> _:b3 <http://example.org/graph1> .
+_:b3 <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> <http://www.w3.org/1999/02/22-rdf-syntax-ns#nil> <http://example.org/graph1> .
diff --git a/test/sort/SOP.nq b/test/sort/SOP.nq
new file mode 100644
index 00000000..1692689c
--- /dev/null
+++ b/test/sort/SOP.nq
@@ -0,0 +1,10 @@
+<http://example.org/a> <http://example.org/b> <http://example.org/c> <http://example.org/graph2> .
+<http://example.org/s> <http://example.org/literal> "s1" <http://example.org/graph1> .
+<http://example.org/s> <http://example.org/blank> _:b1 <http://example.org/graph1> .
+<http://example.org/s> <http://example.org/list> _:b2 <http://example.org/graph1> .
+_:b1 <http://example.org/with> <http://example.org/aProperty> <http://example.org/graph1> .
+_:b1 <http://example.org/with> <http://example.org/orAnother> <http://example.org/graph1> .
+_:b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> "1"^^<http://www.w3.org/2001/XMLSchema#integer> <http://example.org/graph1> .
+_:b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> _:b3 <http://example.org/graph1> .
+_:b3 <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> "2"^^<http://www.w3.org/2001/XMLSchema#integer> <http://example.org/graph1> .
+_:b3 <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> <http://www.w3.org/1999/02/22-rdf-syntax-ns#nil> <http://example.org/graph1> .
diff --git a/test/sort/SPO.nq b/test/sort/SPO.nq
new file mode 100644
index 00000000..508debc7
--- /dev/null
+++ b/test/sort/SPO.nq
@@ -0,0 +1,10 @@
+<http://example.org/a> <http://example.org/b> <http://example.org/c> <http://example.org/graph2> .
+<http://example.org/s> <http://example.org/blank> _:b1 <http://example.org/graph1> .
+<http://example.org/s> <http://example.org/list> _:b2 <http://example.org/graph1> .
+<http://example.org/s> <http://example.org/literal> "s1" <http://example.org/graph1> .
+_:b1 <http://example.org/with> <http://example.org/aProperty> <http://example.org/graph1> .
+_:b1 <http://example.org/with> <http://example.org/orAnother> <http://example.org/graph1> .
+_:b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> "1"^^<http://www.w3.org/2001/XMLSchema#integer> <http://example.org/graph1> .
+_:b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> _:b3 <http://example.org/graph1> .
+_:b3 <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> "2"^^<http://www.w3.org/2001/XMLSchema#integer> <http://example.org/graph1> .
+_:b3 <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> <http://www.w3.org/1999/02/22-rdf-syntax-ns#nil> <http://example.org/graph1> .
diff --git a/test/sort/input.trig b/test/sort/input.trig
new file mode 100644
index 00000000..154a9fb8
--- /dev/null
+++ b/test/sort/input.trig
@@ -0,0 +1,19 @@
+@prefix eg: <http://example.org/> .
+
+eg:graph1 {
+eg:s
+ eg:blank [
+ eg:with eg:aProperty ,
+ eg:orAnother
+ ] ;
+ eg:list (
+ 1
+ 2
+ ) ;
+ eg:literal "s1" .
+}
+
+eg:graph2 {
+eg:a
+ eg:b eg:c .
+}
diff --git a/test/sort/pretty.nq b/test/sort/pretty.nq
new file mode 100644
index 00000000..451247d4
--- /dev/null
+++ b/test/sort/pretty.nq
@@ -0,0 +1,10 @@
+<http://example.org/s> <http://example.org/blank> _:b1 <http://example.org/graph1> .
+_:b1 <http://example.org/with> <http://example.org/aProperty> <http://example.org/graph1> .
+_:b1 <http://example.org/with> <http://example.org/orAnother> <http://example.org/graph1> .
+<http://example.org/s> <http://example.org/list> _:b2 <http://example.org/graph1> .
+_:b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> "1"^^<http://www.w3.org/2001/XMLSchema#integer> <http://example.org/graph1> .
+_:b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> _:b3 <http://example.org/graph1> .
+_:b3 <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> "2"^^<http://www.w3.org/2001/XMLSchema#integer> <http://example.org/graph1> .
+_:b3 <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> <http://www.w3.org/1999/02/22-rdf-syntax-ns#nil> <http://example.org/graph1> .
+<http://example.org/s> <http://example.org/literal> "s1" <http://example.org/graph1> .
+<http://example.org/a> <http://example.org/b> <http://example.org/c> <http://example.org/graph2> .
diff --git a/test/terse/meson.build b/test/terse/meson.build
index 538a8d16..b0516fcb 100644
--- a/test/terse/meson.build
+++ b/test/terse/meson.build
@@ -1,16 +1,23 @@
base_uri = 'http://drobilla.net/sw/serd/test/terse/'
+args = [
+ '--osyntax', 'turtle',
+ files('manifest.ttl'),
+ base_uri,
+ '--',
+ '-O', 'terse'
+]
+
+test('terse',
+ run_pipe_suite,
+ args: pipe_test_script_args + args,
+ env: test_env,
+ suite: ['suite', 'extra', 'pipe'],
+ timeout: 240)
+
test('terse',
- run_test_suite,
- args: script_args + [
- '--osyntax', 'turtle',
- files('manifest.ttl'),
- base_uri,
- '--',
- '-o',
- 'terse',
- ],
+ run_sort_suite,
+ args: sort_test_script_args + args,
env: test_env,
- is_parallel: false,
- suite: ['suite', 'extra'],
+ suite: ['suite', 'extra', 'sort'],
timeout: 240)
diff --git a/test/test_base.py b/test/test_base.py
new file mode 100755
index 00000000..c3018da3
--- /dev/null
+++ b/test/test_base.py
@@ -0,0 +1,46 @@
+#!/usr/bin/env python3
+
+"""Test reading from stdin with serd-pipe."""
+
+import argparse
+import sys
+import shlex
+import subprocess
+import tempfile
+
+parser = argparse.ArgumentParser(description=__doc__)
+
+parser.add_argument("--tool", default="tools/serd-pipe", help="executable")
+parser.add_argument("--wrapper", default="", help="executable wrapper")
+
+args = parser.parse_args(sys.argv[1:])
+command = shlex.split(args.wrapper) + [
+ args.tool,
+ "-B",
+ "http://example.org",
+ "-I",
+ "turtle",
+ "-",
+]
+
+IN_DOCUMENT = "<s> <p> <o> ."
+OUT_DOCUMENT = "<{0}s> <{0}p> <{0}o> .".format("http://example.org/")
+
+with tempfile.TemporaryFile() as out:
+ proc = subprocess.run(
+ command,
+ check=False,
+ encoding="utf-8",
+ input=IN_DOCUMENT,
+ stdout=out,
+ stderr=subprocess.PIPE,
+ )
+
+ assert proc.returncode == 0
+ assert args.wrapper or len(proc.stderr) == 0
+
+ out.seek(0)
+ lines = out.readlines()
+
+ assert len(lines) == 1
+ assert lines[0].decode("utf-8").strip() == OUT_DOCUMENT
diff --git a/test/test_empty.py b/test/test_empty.py
index a7978e6c..03264d8c 100755
--- a/test/test_empty.py
+++ b/test/test_empty.py
@@ -10,12 +10,12 @@ import tempfile
parser = argparse.ArgumentParser(description=__doc__)
-parser.add_argument("--serdi", default="./serdi", help="path to serdi")
+parser.add_argument("--tool", default="tools/serd-read", help="executable")
parser.add_argument("--wrapper", default="", help="executable wrapper")
parser.add_argument("input", help="valid input file")
args = parser.parse_args(sys.argv[1:])
-command = shlex.split(args.wrapper) + [args.serdi, "-o", "empty", args.input]
+command = shlex.split(args.wrapper) + [args.tool, "-O", "empty", args.input]
with tempfile.TemporaryFile() as out:
diff --git a/test/test_filter.py b/test/test_filter.py
index d44677f5..5f25f22e 100755
--- a/test/test_filter.py
+++ b/test/test_filter.py
@@ -21,7 +21,7 @@ DOCUMENTS = {
parser = argparse.ArgumentParser(description=__doc__)
-parser.add_argument("--serdi", default="./serdi", help="path to serdi")
+parser.add_argument("--tool", default="tools/serd-filter", help="executable")
parser.add_argument("--wrapper", default="", help="executable wrapper")
args = parser.parse_args(sys.argv[1:])
@@ -29,21 +29,21 @@ args = parser.parse_args(sys.argv[1:])
def check_pattern(syntax, pattern, result):
command = shlex.split(args.wrapper) + [
- args.serdi,
- "-i",
+ args.tool,
+ "-I",
syntax,
- "-F",
+ "-v",
pattern,
- "-s",
- DOCUMENTS[syntax],
+ "-",
]
with tempfile.TemporaryFile() as out:
proc = subprocess.run(
command,
+ capture_output=True,
check=False,
encoding="utf-8",
- capture_output=True,
+ input=DOCUMENTS[syntax],
)
assert proc.returncode == 0
diff --git a/test/test_grep.py b/test/test_grep.py
index 0c8c5228..44c3ce1f 100755
--- a/test/test_grep.py
+++ b/test/test_grep.py
@@ -21,7 +21,7 @@ DOCUMENTS = {
parser = argparse.ArgumentParser(description=__doc__)
-parser.add_argument("--serdi", default="./serdi", help="path to serdi")
+parser.add_argument("--tool", default="tools/serd-filter", help="executable")
parser.add_argument("--wrapper", default="", help="executable wrapper")
args = parser.parse_args(sys.argv[1:])
@@ -29,21 +29,20 @@ args = parser.parse_args(sys.argv[1:])
def check_pattern(syntax, pattern, result):
command = shlex.split(args.wrapper) + [
- args.serdi,
- "-i",
+ args.tool,
+ "-I",
syntax,
- "-G",
pattern,
- "-s",
- DOCUMENTS[syntax],
+ "-",
]
with tempfile.TemporaryFile() as out:
proc = subprocess.run(
command,
+ capture_output=True,
check=False,
encoding="utf-8",
- capture_output=True,
+ input=DOCUMENTS[syntax],
)
assert proc.returncode == 0
diff --git a/test/test_multifile.py b/test/test_multifile.py
index 5fb44bc5..c5e11bf3 100755
--- a/test/test_multifile.py
+++ b/test/test_multifile.py
@@ -12,7 +12,7 @@ import tempfile
parser = argparse.ArgumentParser(description=__doc__)
-parser.add_argument("--serdi", default="./serdi", help="path to serdi")
+parser.add_argument("--tool", default="tools/serd-pipe", help="executable")
parser.add_argument("--wrapper", default="", help="executable wrapper")
parser.add_argument("testdir", help="multifile test directory")
@@ -20,7 +20,7 @@ args = parser.parse_args(sys.argv[1:])
in1_path = os.path.join(args.testdir, "input1.ttl")
in2_path = os.path.join(args.testdir, "input2.trig")
check_path = os.path.join(args.testdir, "output.nq")
-command = shlex.split(args.wrapper) + [args.serdi, in1_path, in2_path]
+command = shlex.split(args.wrapper) + [args.tool, in1_path, in2_path]
def _show_diff(from_lines, to_lines, from_filename, to_filename):
diff --git a/test/test_node_syntax.c b/test/test_node_syntax.c
index a9829688..9875e656 100644
--- a/test/test_node_syntax.c
+++ b/test/test_node_syntax.c
@@ -66,6 +66,9 @@ test_common(const SerdSyntax syntax)
assert(test(syntax, serd_new_token(SERD_BLANK, SERD_STRING("b0")), "_:b0"));
+ assert(test(
+ syntax, serd_new_token(SERD_BLANK, SERD_STRING("named1")), "_:named1"));
+
assert(test(syntax,
serd_new_uri(SERD_STRING("http://example.org/")),
"<http://example.org/>"));
diff --git a/test/test_quiet.py b/test/test_quiet.py
index 7f141943..b88f0270 100755
--- a/test/test_quiet.py
+++ b/test/test_quiet.py
@@ -1,6 +1,6 @@
#!/usr/bin/env python3
-"""Test serdi quiet option."""
+"""Test quiet command-line option."""
import argparse
import sys
@@ -9,12 +9,12 @@ import subprocess
parser = argparse.ArgumentParser(description=__doc__)
-parser.add_argument("--serdi", default="./serdi", help="path to serdi")
+parser.add_argument("--tool", default="tools/serd-pipe", help="executable")
parser.add_argument("--wrapper", default="", help="executable wrapper")
parser.add_argument("input", help="invalid input file")
args = parser.parse_args(sys.argv[1:])
-command = shlex.split(args.wrapper) + [args.serdi, "-q", args.input]
+command = shlex.split(args.wrapper) + [args.tool, "-q", args.input]
proc = subprocess.run(command, check=False, capture_output=True)
assert proc.returncode != 0
diff --git a/test/test_sort.py b/test/test_sort.py
new file mode 100755
index 00000000..4080b93c
--- /dev/null
+++ b/test/test_sort.py
@@ -0,0 +1,112 @@
+#!/usr/bin/env python3
+
+"""Run the collation tests for serd-sort."""
+
+import argparse
+import os
+import random
+import shlex
+import subprocess
+import sys
+import tempfile
+
+import serd_test_util
+
+collations = [
+ "GOPS",
+ "GOSP",
+ "GPSO",
+ "GSOP",
+ "GSPO",
+ "OPS",
+ "OSP",
+ "POS",
+ "PSO",
+ "SOP",
+ "SPO",
+ "pretty",
+]
+
+
+def check(test_dir, command_prefix, out_dir, input_path, name):
+ """Sort a single input in the named order and check the output.
+
+ The expected output is assumed to exist at test_dir/NAME.nq.
+ """
+
+ output_path = os.path.join(out_dir, name + ".nq")
+ result_path = os.path.join(test_dir, name + ".nq")
+ options = [] if name == "pretty" else ["-c", name]
+
+ # Randomly add irrelevant options just to cover them
+ if random.choice([True, False]):
+ options += ["-R", "http://example.org/"]
+ if random.choice([True, False]):
+ options += ["-I", "TriG"]
+
+ command = command_prefix + options + ["-o", output_path, input_path]
+
+ proc = subprocess.run(command, capture_output=True, check=False)
+ if proc.returncode != 0:
+ cmd_string = " ".join(shlex.quote(c) for c in command)
+ serd_test_util.error("Unexpected failure: {}".format(cmd_string))
+ sys.stderr.write(proc.stderr.decode("utf-8"))
+ return False
+
+ if not serd_test_util.file_equals(result_path, output_path):
+ serd_test_util.error(
+ "Output {} differs from {}\n".format(output_path, result_path)
+ )
+ return False
+
+ return True
+
+
+def run_tests(test_dir, command_prefix, out_dir):
+ """Run all the tests in the suite."""
+
+ input_trig = os.path.join(test_dir, "input.trig")
+
+ n_failures = 0
+ for name in collations:
+ if not check(test_dir, command_prefix, out_dir, input_trig, name):
+ n_failures += 1
+
+ return n_failures
+
+
+def main():
+ """Run the command line tool."""
+
+ parser = argparse.ArgumentParser(
+ usage="%(prog)s [OPTION]... INPUT",
+ description=__doc__,
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ )
+
+ parser.add_argument(
+ "--tool", default="tools/serd-sort", help="serd-sort executable"
+ )
+
+ parser.add_argument("--wrapper", default="", help="executable wrapper")
+ parser.add_argument(
+ "input", help="path to input.trig in the test directory"
+ )
+
+ args = parser.parse_args(sys.argv[1:])
+ wrapper_prefix = shlex.split(args.wrapper)
+ command_prefix = wrapper_prefix + [args.tool]
+
+ with tempfile.TemporaryDirectory() as out_dir:
+ return run_tests(os.path.dirname(args.input), command_prefix, out_dir)
+
+
+if __name__ == "__main__":
+ try:
+ sys.exit(main())
+ except subprocess.CalledProcessError as error:
+ if error.stderr is not None:
+ sys.stderr.write(error.stderr.decode("utf-8"))
+
+ sys.stderr.write("error: %s\n" % error)
+ sys.exit(error.returncode)
diff --git a/test/test_stdin.py b/test/test_stdin.py
index 11b1ca21..2161a95a 100755
--- a/test/test_stdin.py
+++ b/test/test_stdin.py
@@ -1,6 +1,6 @@
#!/usr/bin/env python3
-"""Test reading from stdin with serdi."""
+"""Test reading from stdin with serd-pipe."""
import argparse
import sys
@@ -10,15 +10,15 @@ import tempfile
parser = argparse.ArgumentParser(description=__doc__)
-parser.add_argument("--serdi", default="./serdi", help="path to serdi")
+parser.add_argument("--tool", default="tools/serd-pipe", help="executable")
parser.add_argument("--wrapper", default="", help="executable wrapper")
args = parser.parse_args(sys.argv[1:])
command = shlex.split(args.wrapper) + [
- args.serdi,
- "-I",
+ args.tool,
+ "-B",
"http://example.org",
- "-i",
+ "-I",
"ntriples",
"-",
]
diff --git a/test/test_write_error.py b/test/test_write_error.py
index 35b4693b..bc955ce9 100755
--- a/test/test_write_error.py
+++ b/test/test_write_error.py
@@ -1,6 +1,6 @@
#!/usr/bin/env python3
-"""Test errors writing to a file."""
+"""Test errors when writing to a file."""
import argparse
import sys
@@ -10,12 +10,12 @@ import os
parser = argparse.ArgumentParser(description=__doc__)
-parser.add_argument("--serdi", default="./serdi", help="path to serdi")
+parser.add_argument("--tool", default="tools/serd-pipe", help="executable")
parser.add_argument("--wrapper", default="", help="executable wrapper")
parser.add_argument("input", help="valid input file")
args = parser.parse_args(sys.argv[1:])
-command = shlex.split(args.wrapper) + [args.serdi, args.input]
+command = shlex.split(args.wrapper) + [args.tool, args.input]
if os.path.exists("/dev/full"):
diff --git a/tools/console.c b/tools/console.c
index ea5fd7ee..f1e78d75 100644
--- a/tools/console.c
+++ b/tools/console.c
@@ -26,9 +26,64 @@
# include <io.h>
#endif
+#include <errno.h>
+#include <limits.h>
#include <stdint.h>
+#include <stdlib.h>
#include <string.h>
+SerdStatus
+serd_tool_setup(SerdTool* const tool,
+ const char* const program,
+ SerdCommonOptions options)
+{
+ // Open the output first, since if that fails we have nothing to do
+ const char* const out_path = options.out_filename;
+ if (!(tool->out = serd_open_output(out_path, options.block_size))) {
+ fprintf(stderr,
+ "%s: failed to open output file (%s)\n",
+ program,
+ strerror(errno));
+ return SERD_ERR_UNKNOWN;
+ }
+
+ // We have something to write to, so build the writing environment
+ if (!(tool->world = serd_world_new()) ||
+ !(tool->env =
+ serd_create_env(program, options.base_uri, options.out_filename)) ||
+ !(tool->writer = serd_writer_new(
+ tool->world,
+ serd_choose_syntax(
+ tool->world, options.output, options.out_filename, SERD_NQUADS),
+ options.output.flags,
+ tool->env,
+ tool->out))) {
+ fprintf(stderr, "%s: failed to set up writing environment\n", program);
+ return SERD_ERR_INTERNAL;
+ }
+
+ return SERD_SUCCESS;
+}
+
+SerdStatus
+serd_tool_cleanup(const SerdTool tool)
+{
+ SerdStatus st = SERD_SUCCESS;
+ if (tool.out) {
+ // Close the output stream explicitly to check if there were any errors
+ if (serd_byte_sink_close(tool.out)) {
+ perror("write error");
+ st = SERD_ERR_BAD_WRITE;
+ }
+ }
+
+ serd_writer_free(tool.writer);
+ serd_env_free(tool.env);
+ serd_world_free(tool.world);
+ serd_byte_sink_free(tool.out);
+ return st;
+}
+
void
serd_set_stream_utf8_mode(FILE* const stream)
{
@@ -39,7 +94,7 @@ serd_set_stream_utf8_mode(FILE* const stream)
#endif
}
-int
+SerdStatus
serd_print_version(const char* const program)
{
printf("%s %d.%d.%d <http://drobilla.net/software/serd>\n",
@@ -53,7 +108,43 @@ serd_print_version(const char* const program)
"This is free software; you are free to change and redistribute it.\n"
"There is NO WARRANTY, to the extent permitted by law.\n");
- return 0;
+ return SERD_FAILURE;
+}
+
+SerdStatus
+serd_get_argument(OptionIter* const iter, const char** const argument)
+{
+ const char flag = iter->argv[iter->a][iter->f++];
+
+ if (iter->argv[iter->a][iter->f] || (iter->a + 1) == iter->argc) {
+ fprintf(
+ stderr, "%s: option requires an argument -- %c\n", iter->argv[0], flag);
+ return SERD_ERR_BAD_ARG;
+ }
+
+ *argument = iter->argv[++iter->a];
+ ++iter->a;
+ iter->f = 1;
+ return SERD_SUCCESS;
+}
+
+SerdStatus
+serd_get_size_argument(OptionIter* const iter, size_t* const argument)
+{
+ SerdStatus st = SERD_SUCCESS;
+ const char* string = NULL;
+ if ((st = serd_get_argument(iter, &string))) {
+ return st;
+ }
+
+ char* endptr = NULL;
+ const long size = strtol(string, &endptr, 10);
+ if (size <= 0 || size == LONG_MAX || *endptr != '\0') {
+ return SERD_ERR_BAD_ARG;
+ }
+
+ *argument = (size_t)size;
+ return SERD_SUCCESS;
}
SerdStatus
@@ -89,8 +180,26 @@ serd_set_input_option(const SerdStringView name,
}
}
- // SERDI_ERRORF("invalid input option `%s'\n", name.buf);
- return SERD_FAILURE;
+ return SERD_ERR_BAD_ARG;
+}
+
+SerdStatus
+serd_parse_input_argument(OptionIter* const iter,
+ SerdSyntaxOptions* const options)
+{
+ SerdStatus st = SERD_SUCCESS;
+ const char* argument = NULL;
+
+ if (!(st = serd_get_argument(iter, &argument))) {
+ if ((st = serd_set_input_option(
+ SERD_STRING(argument), &options->syntax, &options->flags))) {
+ fprintf(stderr, "%s: unknown option \"%s\"\n", iter->argv[0], argument);
+ } else if (!strcmp(argument, "empty") || options->syntax) {
+ options->overridden = true;
+ }
+ }
+
+ return st;
}
SerdStatus
@@ -126,16 +235,90 @@ serd_set_output_option(const SerdStringView name,
}
}
+ return SERD_ERR_BAD_ARG;
+}
+
+SerdStatus
+serd_parse_output_argument(OptionIter* const iter,
+ SerdSyntaxOptions* const options)
+{
+ SerdStatus st = SERD_SUCCESS;
+ const char* argument = NULL;
+
+ if (!(st = serd_get_argument(iter, &argument))) {
+ if ((st = serd_set_output_option(
+ SERD_STRING(argument), &options->syntax, &options->flags))) {
+ fprintf(stderr, "%s: unknown option \"%s\"\n", iter->argv[0], argument);
+ } else if (!strcmp(argument, "empty") || options->syntax) {
+ options->overridden = true;
+ }
+ }
+
+ return st;
+}
+
+SerdStatus
+serd_parse_common_option(OptionIter* const iter, SerdCommonOptions* const opts)
+{
+ const char opt = iter->argv[iter->a][iter->f];
+ switch (opt) {
+ case 'B':
+ return serd_get_argument(iter, &opts->base_uri);
+
+ case 'I':
+ return serd_parse_input_argument(iter, &opts->input);
+
+ case 'O':
+ return serd_parse_output_argument(iter, &opts->output);
+
+ case 'b':
+ return serd_get_size_argument(iter, &opts->block_size);
+
+ case 'k':
+ return serd_get_size_argument(iter, &opts->stack_size);
+
+ case 'o':
+ return serd_get_argument(iter, &opts->out_filename);
+
+ default:
+ break;
+ }
+
return SERD_FAILURE;
}
+SerdEnv*
+serd_create_env(const char* const program,
+ const char* const base_string,
+ const char* const out_filename)
+{
+ const bool is_rebase = base_string && !strcmp(base_string, "rebase");
+ if (is_rebase && !out_filename) {
+ fprintf(stderr, "%s: rebase requires an output filename\n", program);
+ return NULL;
+ }
+
+ if (base_string && serd_uri_string_has_scheme(base_string)) {
+ return serd_env_new(SERD_STRING(base_string));
+ }
+
+ SerdEnv* const env = serd_env_new(SERD_EMPTY_STRING());
+ serd_set_base_uri_from_path(env, is_rebase ? out_filename : base_string);
+ return env;
+}
+
SerdSyntax
-serd_choose_input_syntax(SerdWorld* const world,
- const SerdSyntax requested,
- const char* const filename)
+serd_choose_syntax(SerdWorld* const world,
+ const SerdSyntaxOptions options,
+ const char* const filename,
+ const SerdSyntax fallback)
{
- if (requested) {
- return requested;
+ if (options.overridden || options.syntax != SERD_SYNTAX_EMPTY) {
+ return options.syntax;
+ }
+
+ if (!filename || !strcmp(filename, "-")) {
+ return fallback;
}
const SerdSyntax guessed = serd_guess_syntax(filename);
@@ -202,17 +385,90 @@ serd_open_output(const char* const filename, const size_t block_size)
SerdStatus
serd_set_base_uri_from_path(SerdEnv* const env, const char* const path)
{
- char* const input_path = serd_canonical_path(path);
- if (!input_path) {
+ const size_t path_len = path ? strlen(path) : 0u;
+ if (!path_len) {
return SERD_ERR_BAD_ARG;
}
- SerdNode* const file_uri =
- serd_new_file_uri(SERD_STRING(input_path), SERD_EMPTY_STRING());
+ char* const real_path = serd_canonical_path(path);
+ if (!real_path) {
+ return SERD_ERR_BAD_ARG;
+ }
+
+ const size_t real_path_len = strlen(real_path);
+ SerdNode* base_node = NULL;
+ if (path[path_len - 1] == '/' || path[path_len - 1] == '\\') {
+ char* const base_path = (char*)calloc(real_path_len + 2, 1);
+ memcpy(base_path, real_path, real_path_len);
+ base_path[real_path_len] = path[path_len - 1];
+
+ base_node = serd_new_file_uri(SERD_STRING(base_path), SERD_EMPTY_STRING());
+ free(base_path);
+ } else {
+ base_node = serd_new_file_uri(SERD_STRING(real_path), SERD_EMPTY_STRING());
+ }
- serd_env_set_base_uri(env, serd_node_string_view(file_uri));
- serd_node_free(file_uri);
- serd_free(input_path);
+ serd_env_set_base_uri(env, serd_node_string_view(base_node));
+ serd_node_free(base_node);
+ serd_free(real_path);
return SERD_SUCCESS;
}
+
+SerdStatus
+serd_read_source(SerdWorld* const world,
+ const SerdCommonOptions opts,
+ SerdEnv* const env,
+ const SerdSyntax syntax,
+ SerdByteSource* const in,
+ const SerdSink* const sink)
+{
+ SerdReader* const reader = serd_reader_new(
+ world, syntax, opts.input.flags, env, sink, opts.stack_size);
+
+ SerdStatus st = serd_reader_start(reader, in);
+ if (!st) {
+ st = serd_reader_read_document(reader);
+ }
+
+ serd_reader_free(reader);
+ return st;
+}
+
+SerdStatus
+serd_read_inputs(SerdWorld* const world,
+ const SerdCommonOptions opts,
+ SerdEnv* const env,
+ const intptr_t n_inputs,
+ char* const* const inputs,
+ const SerdSink* const sink)
+{
+ SerdStatus st = SERD_SUCCESS;
+
+ for (intptr_t i = 0; !st && i < n_inputs; ++i) {
+ // Use the filename as the base URI if possible if user didn't override it
+ const char* const in_path = inputs[i];
+ if (!opts.base_uri[0] && strcmp(in_path, "-")) {
+ serd_set_base_uri_from_path(env, in_path);
+ }
+
+ // Open the input stream
+ SerdByteSource* const in = serd_open_input(in_path, opts.block_size);
+ if (!in) {
+ return SERD_ERR_BAD_ARG;
+ }
+
+ // Read the entire file
+ st = serd_read_source(
+ world,
+ opts,
+ env,
+ serd_choose_syntax(world, opts.input, in_path, SERD_TRIG),
+ in,
+ sink);
+
+ serd_byte_source_free(in);
+ }
+
+ return st;
+}
diff --git a/tools/console.h b/tools/console.h
index 16f6fd14..cb227e8e 100644
--- a/tools/console.h
+++ b/tools/console.h
@@ -16,28 +16,108 @@
#include "serd/serd.h"
+#include <stdbool.h>
+#include <stdint.h>
#include <stdio.h>
+// Iterator over command-line options with support for BSD-style flag merging
+typedef struct {
+ char* const* argv; ///< Complete argument vector (from main)
+ int argc; ///< Total number of arguments (from main)
+ int a; ///< Argument index (index into argv)
+ int f; ///< Flag index (offset in argv[arg])
+} OptionIter;
+
+// Options for the input or output syntax
+typedef struct {
+ SerdSyntax syntax; ///< User-specified syntax, or empty
+ uint32_t flags; ///< SerdReaderFlags or SerdWriterFlags
+ bool overridden; ///< True if syntax was explicitly given
+} SerdSyntaxOptions;
+
+// Options common to all command-line tools
+typedef struct {
+ const char* base_uri;
+ const char* out_filename;
+ size_t block_size;
+ size_t stack_size;
+ SerdSyntaxOptions input;
+ SerdSyntaxOptions output;
+} SerdCommonOptions;
+
+// Common "global" state of a command-line tool that writes data
+typedef struct {
+ SerdByteSink* out;
+ SerdWorld* world;
+ SerdEnv* env;
+ SerdWriter* writer;
+} SerdTool;
+
+static inline bool
+serd_option_iter_is_end(const OptionIter iter)
+{
+ return iter.a >= iter.argc || iter.argv[iter.a][0] != '-' ||
+ !iter.argv[iter.a][iter.f];
+}
+
+static inline SerdStatus
+serd_option_iter_advance(OptionIter* const iter)
+{
+ if (!iter->argv[iter->a][++iter->f]) {
+ ++iter->a;
+ iter->f = 1;
+ }
+
+ return SERD_SUCCESS;
+}
+
+SerdStatus
+serd_tool_setup(SerdTool* tool, const char* program, SerdCommonOptions options);
+
+SerdStatus
+serd_tool_cleanup(SerdTool tool);
+
void
serd_set_stream_utf8_mode(FILE* stream);
-int
+SerdStatus
serd_print_version(const char* program);
SerdStatus
+serd_get_argument(OptionIter* iter, const char** argument);
+
+SerdStatus
+serd_get_size_argument(OptionIter* iter, size_t* argument);
+
+SerdStatus
serd_set_input_option(SerdStringView name,
SerdSyntax* syntax,
SerdReaderFlags* flags);
SerdStatus
+serd_parse_input_argument(OptionIter* iter, SerdSyntaxOptions* options);
+
+SerdStatus
serd_set_output_option(SerdStringView name,
SerdSyntax* syntax,
SerdWriterFlags* flags);
+SerdStatus
+serd_parse_output_argument(OptionIter* iter, SerdSyntaxOptions* options);
+
+SerdStatus
+serd_parse_common_option(OptionIter* iter, SerdCommonOptions* opts);
+
+SerdEnv*
+serd_create_env(const char* program,
+ const char* base_string,
+ const char* out_filename);
+
SerdSyntax
-serd_choose_input_syntax(SerdWorld* world,
- SerdSyntax requested,
- const char* filename);
+serd_choose_syntax(SerdWorld* world,
+ SerdSyntaxOptions options,
+ const char* filename,
+ SerdSyntax fallback);
SerdByteSource*
serd_open_input(const char* filename, size_t block_size);
@@ -47,3 +127,19 @@ serd_open_output(const char* filename, size_t block_size);
SerdStatus
serd_set_base_uri_from_path(SerdEnv* env, const char* path);
+
+SerdStatus
+serd_read_source(SerdWorld* world,
+ SerdCommonOptions opts,
+ SerdEnv* env,
+ SerdSyntax syntax,
+ SerdByteSource* in,
+ const SerdSink* sink);
+
+SerdStatus
+serd_read_inputs(SerdWorld* world,
+ SerdCommonOptions opts,
+ SerdEnv* env,
+ intptr_t n_inputs,
+ char* const* inputs,
+ const SerdSink* sink);
diff --git a/tools/meson.build b/tools/meson.build
index 3054364a..d4964784 100644
--- a/tools/meson.build
+++ b/tools/meson.build
@@ -5,9 +5,29 @@ if get_option('static')
tool_link_args += ['-static']
endif
-serdi = executable('serdi',
- ['serdi.c', 'console.c'],
- c_args: tool_c_args,
- link_args: tool_link_args,
- install: true,
- dependencies: serd_dep)
+tools = [
+ 'filter',
+ 'pipe',
+ 'sort',
+]
+
+serd_filter = executable('serd-filter',
+ ['serd-filter.c', 'console.c'],
+ c_args: tool_c_args,
+ link_args: tool_link_args,
+ install: true,
+ dependencies: serd_dep)
+
+serd_pipe = executable('serd-pipe',
+ ['serd-pipe.c', 'console.c'],
+ c_args: tool_c_args,
+ link_args: tool_link_args,
+ install: true,
+ dependencies: serd_dep)
+
+serd_sort = executable('serd-sort',
+ ['serd-sort.c', 'console.c'],
+ c_args: tool_c_args,
+ link_args: tool_link_args,
+ install: true,
+ dependencies: serd_dep)
diff --git a/tools/serd-filter.c b/tools/serd-filter.c
new file mode 100644
index 00000000..789d3149
--- /dev/null
+++ b/tools/serd-filter.c
@@ -0,0 +1,287 @@
+/*
+ Copyright 2011-2021 David Robillard <d@drobilla.net>
+
+ Permission to use, copy, modify, and/or distribute this software for any
+ purpose with or without fee is hereby granted, provided that the above
+ copyright notice and this permission notice appear in all copies.
+
+ THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+*/
+
+#include "console.h"
+
+#include "serd/serd.h"
+
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+/* Application (after parsing command-line arguments) */
+
+// All options
+typedef struct {
+ SerdCommonOptions common;
+ const char* pattern;
+ const char* pattern_file;
+ char* const* inputs;
+ intptr_t n_inputs;
+ bool invert;
+} Options;
+
+// A single statement pattern
+typedef struct {
+ SerdNode* s;
+ SerdNode* p;
+ SerdNode* o;
+ SerdNode* g;
+} FilterPattern;
+
+// Handler for events read from a pattern
+static SerdStatus
+on_pattern_event(void* const handle, const SerdEvent* const event)
+{
+ if (event->type == SERD_STATEMENT) {
+ FilterPattern* const pat = (FilterPattern*)handle;
+ if (pat->s) {
+ return SERD_ERR_INVALID;
+ }
+
+ const SerdStatement* const statement = event->statement.statement;
+ pat->s = serd_node_copy(serd_statement_subject(statement));
+ pat->p = serd_node_copy(serd_statement_predicate(statement));
+ pat->o = serd_node_copy(serd_statement_object(statement));
+ pat->g = serd_node_copy(serd_statement_graph(statement));
+ }
+
+ return SERD_SUCCESS;
+}
+
+// Parse a pattern from some input and return a new filter for it
+static SerdSink*
+parse_pattern(SerdWorld* const world,
+ const SerdSink* const sink,
+ SerdByteSource* const byte_source,
+ const bool inclusive)
+{
+ SerdEnv* const env = serd_env_new(SERD_EMPTY_STRING());
+ FilterPattern pat = {NULL, NULL, NULL, NULL};
+ SerdSink* in_sink = serd_sink_new(&pat, on_pattern_event, NULL);
+ SerdReader* reader = serd_reader_new(
+ world, SERD_NQUADS, SERD_READ_VARIABLES, env, in_sink, 4096);
+
+ SerdStatus st = serd_reader_start(reader, byte_source);
+ if (!st) {
+ st = serd_reader_read_document(reader);
+ }
+
+ serd_reader_free(reader);
+ serd_env_free(env);
+ serd_sink_free(in_sink);
+
+ if (st) {
+ serd_logf(world,
+ SERD_LOG_LEVEL_ERROR,
+ "failed to parse pattern (%s)",
+ serd_strerror(st));
+ return NULL;
+ }
+
+ SerdSink* filter =
+ serd_filter_new(sink, pat.s, pat.p, pat.o, pat.g, inclusive);
+
+ serd_node_free(pat.s);
+ serd_node_free(pat.p);
+ serd_node_free(pat.o);
+ serd_node_free(pat.g);
+ return filter;
+}
+
+SERD_LOG_FUNC(2, 3)
+static SerdStatus
+log_error(SerdWorld* const world, const char* const fmt, ...)
+{
+ va_list args;
+ va_start(args, fmt);
+
+ const SerdLogField file = {"SERD_FILE", "serd-filter"};
+ const SerdStatus st =
+ serd_vxlogf(world, SERD_LOG_LEVEL_ERROR, 1, &file, fmt, args);
+
+ va_end(args);
+ return st;
+}
+
+// Run the tool using the given options
+static SerdStatus
+run(Options opts)
+{
+ SerdTool app = {NULL, NULL, NULL, NULL};
+
+ // Set up the writing environment
+ SerdStatus st = SERD_SUCCESS;
+ if ((st = serd_tool_setup(&app, "serd-filter", opts.common))) {
+ serd_tool_cleanup(app);
+ return st;
+ }
+
+ const SerdSink* const target = serd_writer_sink(app.writer);
+
+ // Open the pattern input (either a string or filename)
+ SerdByteSource* const pattern =
+ opts.pattern ? serd_byte_source_new_string(opts.pattern, NULL)
+ : opts.pattern_file
+ ? serd_byte_source_new_filename(opts.pattern_file, opts.common.block_size)
+ : NULL;
+ if (!pattern) {
+ log_error(app.world, "failed to open pattern");
+ return SERD_ERR_UNKNOWN;
+ }
+
+ // Set up the output pipeline: filter -> writer
+ SerdSink* const filter =
+ parse_pattern(app.world, target, pattern, !opts.invert);
+ if (!filter) {
+ log_error(app.world, "failed to set up filter");
+ return SERD_ERR_UNKNOWN;
+ }
+
+ // Read all the inputs, which drives the writer to emit the output
+ if (!(st = serd_read_inputs(app.world,
+ opts.common,
+ app.env,
+ opts.n_inputs,
+ opts.inputs,
+ filter))) {
+ st = serd_writer_finish(app.writer);
+ }
+
+ if (st) {
+ log_error(app.world, "failed to read input (%s)", serd_strerror(st));
+ }
+
+ serd_sink_free(filter);
+ serd_byte_source_free(pattern);
+
+ const SerdStatus cst = serd_tool_cleanup(app);
+ return st ? st : cst;
+}
+
+/* Command-line interface (before setting up serd) */
+
+static int
+print_usage(const char* const name, const bool error)
+{
+ static const char* const description =
+ "Search for statements matching PATTERN in each INPUT.\n"
+ "INPUT can be a local filename, or \"-\" to read from standard input.\n\n"
+ " -B BASE_URI Base URI or path for resolving relative references.\n"
+ " -I SYNTAX Input syntax (turtle/ntriples/trig/nquads),\n"
+ " or option (lax/variables/relative/global/generated).\n"
+ " -O SYNTAX Output syntax (empty/turtle/ntriples/nquads),\n"
+ " or option (ascii/expanded/verbatim/terse/lax).\n"
+ " -V Display version information and exit.\n"
+ " -f PATTERN_FILE Read pattern from PATTERN_FILE instead.\n"
+ " -h Display this help and exit.\n"
+ " -k BYTES Parser stack size.\n"
+ " -o FILENAME Write output to FILENAME instead of stdout.\n"
+ " -v Invert filter to select non-matching statements.\n";
+
+ FILE* const os = error ? stderr : stdout;
+ fprintf(os, "%s", error ? "\n" : "");
+ fprintf(os, "Usage: %s [OPTION]... PATTERN INPUT...\n", name);
+ fprintf(os, " %s [OPTION]... -f PATTERN_FILE INPUT...\n", name);
+ fprintf(os, "\n%s", description);
+ return error ? EXIT_FAILURE : EXIT_SUCCESS;
+}
+
+// Parse the option pointed to by `iter`, and advance it to the next one
+static SerdStatus
+parse_option(OptionIter* const iter, Options* const opts)
+{
+#define ARG_ERRORF(fmt, ...) \
+ fprintf(stderr, "%s: " fmt, iter->argv[0], __VA_ARGS__)
+
+ SerdStatus st = serd_parse_common_option(iter, &opts->common);
+ if (st != SERD_FAILURE) {
+ return st;
+ }
+
+ const char opt = iter->argv[iter->a][iter->f];
+
+ switch (opt) {
+ case 'V':
+ return serd_print_version("serd-filter");
+
+ case 'f':
+ return serd_get_argument(iter, &opts->pattern_file);
+
+ case 'h':
+ print_usage(iter->argv[0], false);
+ return SERD_FAILURE;
+
+ case 'v':
+ opts->invert = true;
+ return serd_option_iter_advance(iter);
+
+ default:
+ break;
+ }
+
+ ARG_ERRORF("invalid option -- '%c'\n", opt);
+ return SERD_ERR_BAD_ARG;
+
+#undef ARG_ERRORF
+}
+
+int
+main(int argc, char** argv)
+{
+ Options opts = {{"",
+ NULL,
+ 4096u,
+ 1048576u,
+ {SERD_SYNTAX_EMPTY, 0u, false},
+ {SERD_NQUADS, 0u, false}},
+ NULL,
+ NULL,
+ NULL,
+ 0u,
+ false};
+
+ // Parse all command line options (which must precede inputs)
+ SerdStatus st = SERD_SUCCESS;
+ OptionIter iter = {argv, argc, 1, 1};
+ while (!serd_option_iter_is_end(iter)) {
+ if ((st = parse_option(&iter, &opts))) {
+ return (st == SERD_FAILURE) ? 0 : print_usage(argv[0], true);
+ }
+ }
+
+ // If -f isn't used, then the first positional argument is the pattern
+ if (!opts.pattern_file) {
+ opts.pattern = argv[iter.a++];
+ }
+
+ // Every argument past that is an input
+ opts.inputs = argv + iter.a;
+ opts.n_inputs = argc - iter.a;
+ if (opts.n_inputs == 0) {
+ fprintf(stderr, "%s: missing input\n", argv[0]);
+ return print_usage(argv[0], true);
+ }
+
+ // Don't add prefixes to blank node labels if there is only one input
+ if (opts.n_inputs == 1) {
+ opts.common.input.flags |= SERD_READ_GLOBAL;
+ }
+
+ return run(opts) > SERD_FAILURE;
+}
diff --git a/tools/serd-pipe.c b/tools/serd-pipe.c
new file mode 100644
index 00000000..75b3e0d4
--- /dev/null
+++ b/tools/serd-pipe.c
@@ -0,0 +1,209 @@
+/*
+ Copyright 2011-2021 David Robillard <d@drobilla.net>
+
+ Permission to use, copy, modify, and/or distribute this software for any
+ purpose with or without fee is hereby granted, provided that the above
+ copyright notice and this permission notice appear in all copies.
+
+ THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+*/
+
+#include "console.h"
+
+#include "serd/serd.h"
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+
+/* Application (after parsing command-line arguments) */
+
+// All options
+typedef struct {
+ SerdCommonOptions common;
+ const char* root_uri;
+ const char* input_string;
+ char* const* inputs;
+ intptr_t n_inputs;
+ bool canonical;
+ bool quiet;
+} Options;
+
+// Run the tool using the given options
+static SerdStatus
+run(const Options opts)
+{
+ SerdTool app = {NULL, NULL, NULL, NULL};
+
+ // Set up the writing environment
+ SerdStatus st = SERD_SUCCESS;
+ if ((st = serd_tool_setup(&app, "serd-pipe", opts.common))) {
+ serd_tool_cleanup(app);
+ return st;
+ }
+
+ if (opts.quiet) {
+ serd_set_log_func(app.world, serd_quiet_log_func, NULL);
+ }
+
+ serd_writer_set_root_uri(app.writer, SERD_STRING(opts.root_uri));
+
+ // Set up the output pipeline: [canon] -> writer
+ const SerdSink* const target = serd_writer_sink(app.writer);
+ const SerdSink* sink = target;
+ SerdSink* canon = NULL;
+ if (opts.canonical) {
+ canon = serd_canon_new(app.world, target, opts.common.input.flags);
+ sink = canon;
+ }
+
+ if (opts.input_string) {
+ SerdByteSource* const in =
+ serd_byte_source_new_string(opts.input_string, NULL);
+
+ st = serd_read_source(
+ app.world,
+ opts.common,
+ app.env,
+ serd_choose_syntax(app.world, opts.common.input, NULL, SERD_TRIG),
+ in,
+ sink);
+
+ serd_byte_source_free(in);
+ }
+
+ // Read all the inputs, which drives the writer to emit the output
+ if (st ||
+ (st = serd_read_inputs(
+ app.world, opts.common, app.env, opts.n_inputs, opts.inputs, sink)) ||
+ (st = serd_writer_finish(app.writer))) {
+ serd_tool_cleanup(app);
+ return st;
+ }
+
+ return serd_tool_cleanup(app);
+}
+
+/* Command-line interface (before setting up serd) */
+
+static int
+print_usage(const char* const name, const bool error)
+{
+ static const char* const description =
+ "Read and write RDF data.\n"
+ "INPUT can be a local filename, or \"-\" to read from standard input.\n\n"
+ " -B BASE_URI Base URI or path for resolving relative references.\n"
+ " -C Convert literals to canonical form.\n"
+ " -I SYNTAX Input syntax (turtle/ntriples/trig/nquads),\n"
+ " or option (lax/variables/relative/global/generated).\n"
+ " -O SYNTAX Output syntax (empty/turtle/ntriples/nquads),\n"
+ " or option (ascii/expanded/verbatim/terse/lax).\n"
+ " -R ROOT_URI Keep relative URIs within ROOT_URI.\n"
+ " -V Display version information and exit.\n"
+ " -b BYTES I/O block size.\n"
+ " -h Display this help and exit.\n"
+ " -k BYTES Parser stack size.\n"
+ " -o FILENAME Write output to FILENAME instead of stdout.\n"
+ " -q Suppress warning and error output.\n"
+ " -s STRING Parse STRING as input.\n";
+
+ FILE* const os = error ? stderr : stdout;
+ fprintf(os, "%s", error ? "\n" : "");
+ fprintf(os, "Usage: %s [OPTION]... INPUT...\n", name);
+ fprintf(os, "%s", description);
+ return error;
+}
+
+// Parse the option pointed to by `iter`, and advance it to the next one
+static SerdStatus
+parse_option(OptionIter* const iter, Options* const opts)
+{
+#define ARG_ERRORF(fmt, ...) \
+ fprintf(stderr, "%s: " fmt, iter->argv[0], __VA_ARGS__)
+
+ SerdStatus st = serd_parse_common_option(iter, &opts->common);
+ if (st != SERD_FAILURE) {
+ return st;
+ }
+
+ const char opt = iter->argv[iter->a][iter->f];
+ switch (opt) {
+ case 'C':
+ opts->canonical = true;
+ return serd_option_iter_advance(iter);
+
+ case 'R':
+ return serd_get_argument(iter, &opts->root_uri);
+
+ case 'V':
+ return serd_print_version("serd-pipe");
+
+ case 'h':
+ print_usage(iter->argv[0], false);
+ return SERD_FAILURE;
+
+ case 'q':
+ opts->quiet = true;
+ return serd_option_iter_advance(iter);
+
+ case 's':
+ return serd_get_argument(iter, &opts->input_string);
+
+ default:
+ break;
+ }
+
+ ARG_ERRORF("invalid option -- '%c'\n", opt);
+ return SERD_ERR_BAD_ARG;
+
+#undef ARG_ERRORF
+}
+
+int
+main(const int argc, char* const* const argv)
+{
+ char* const default_input[] = {"-"};
+
+ Options opts = {{"",
+ NULL,
+ 4096u,
+ 1048576u,
+ {SERD_SYNTAX_EMPTY, 0u, false},
+ {SERD_SYNTAX_EMPTY, 0u, false}},
+ "",
+ NULL,
+ NULL,
+ 0u,
+ false,
+ false};
+
+ // Parse all command line options (which must precede inputs)
+ SerdStatus st = SERD_SUCCESS;
+ OptionIter iter = {argv, argc, 1, 1};
+ while (!serd_option_iter_is_end(iter)) {
+ if ((st = parse_option(&iter, &opts))) {
+ return (st == SERD_FAILURE) ? 0 : print_usage(argv[0], true);
+ }
+ }
+
+ // Every argument past the last option is an input
+ opts.inputs = argv + iter.a;
+ opts.n_inputs = argc - iter.a;
+ if (opts.n_inputs + (bool)opts.input_string == 0) {
+ opts.n_inputs = 1;
+ opts.inputs = default_input;
+ }
+
+ // Don't add prefixes to blank node labels if there is only one input
+ if (opts.n_inputs + (bool)opts.input_string == 1) {
+ opts.common.input.flags |= SERD_READ_GLOBAL;
+ }
+
+ return run(opts) > SERD_FAILURE;
+}
diff --git a/tools/serd-sort.c b/tools/serd-sort.c
new file mode 100644
index 00000000..deb79cb5
--- /dev/null
+++ b/tools/serd-sort.c
@@ -0,0 +1,274 @@
+/*
+ Copyright 2011-2021 David Robillard <d@drobilla.net>
+
+ Permission to use, copy, modify, and/or distribute this software for any
+ purpose with or without fee is hereby granted, provided that the above
+ copyright notice and this permission notice appear in all copies.
+
+ THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+*/
+
+#include "console.h"
+
+#include "serd/serd.h"
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
+/* Application (after parsing command-line arguments) */
+
+// All options
+typedef struct {
+ SerdCommonOptions common;
+ const char* root_uri;
+ const char* input_string;
+ const char* collation;
+ char* const* inputs;
+ intptr_t n_inputs;
+ SerdStatementOrder order;
+ SerdDescribeFlags flags;
+} Options;
+
+static bool
+input_has_graphs(const Options opts)
+{
+ if (opts.common.input.syntax) {
+ return serd_syntax_has_graphs(opts.common.input.syntax);
+ }
+
+ for (intptr_t i = 0u; i < opts.n_inputs; ++i) {
+ if (serd_syntax_has_graphs(serd_guess_syntax(opts.inputs[i]))) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+// Run the tool using the given options
+static SerdStatus
+run(const Options opts)
+{
+ SerdTool app = {NULL, NULL, NULL, NULL};
+
+ // Set up the writing environment
+ SerdStatus st = SERD_SUCCESS;
+ if ((st = serd_tool_setup(&app, "serd-sort", opts.common))) {
+ serd_tool_cleanup(app);
+ return st;
+ }
+
+ // Determine the default order to store statements in the model
+ const bool with_graphs = input_has_graphs(opts);
+ const SerdStatementOrder default_order = opts.collation ? opts.order
+ : with_graphs ? SERD_ORDER_GSPO
+ : SERD_ORDER_SPO;
+
+ const SerdModelFlags flags =
+ (SerdModelFlags)(with_graphs * SERD_STORE_GRAPHS);
+
+ SerdModel* const model = serd_model_new(app.world, default_order, flags);
+
+ if (!opts.collation) {
+ // If we are pretty-printing, we need an O** index
+ serd_model_add_index(model, SERD_ORDER_OPS);
+
+ if (with_graphs) {
+ // If we have graphs we still need the SPO index for finding subjects
+ serd_model_add_index(model, SERD_ORDER_SPO);
+ }
+ }
+
+ // Read all the inputs into an inserter to load the model
+ SerdSink* const inserter = serd_inserter_new(model, NULL);
+ if (st || (st = serd_read_inputs(app.world,
+ opts.common,
+ app.env,
+ opts.n_inputs,
+ opts.inputs,
+ inserter))) {
+ serd_tool_cleanup(app);
+ return st;
+ }
+
+ // Write the model to the output
+ const SerdSink* const target = serd_writer_sink(app.writer);
+ if (opts.collation) {
+ SerdCursor* const cursor = serd_model_begin_ordered(model, opts.order);
+
+ serd_env_write_prefixes(app.env, target);
+
+ for (const SerdStatement* statement = NULL;
+ !st && (statement = serd_cursor_get(cursor));
+ serd_cursor_advance(cursor)) {
+ st = serd_sink_write_statement(target, 0u, statement);
+ }
+
+ serd_cursor_free(cursor);
+ } else {
+ SerdCursor* const cursor = serd_model_begin(model);
+
+ serd_env_write_prefixes(app.env, target);
+
+ st = serd_describe_range(cursor, target, opts.flags);
+
+ serd_cursor_free(cursor);
+ }
+
+ if (!st) {
+ st = serd_writer_finish(app.writer);
+ }
+
+ const SerdStatus cst = serd_tool_cleanup(app);
+ return st ? st : cst;
+}
+
+/* Command-line interface (before setting up serd) */
+
+static SerdStatus
+parse_statement_order(const char* const string, SerdStatementOrder* const order)
+{
+ static const char* const strings[] = {"SPO",
+ "SOP",
+ "OPS",
+ "OSP",
+ "PSO",
+ "POS",
+ "GSPO",
+ "GSOP",
+ "GOPS",
+ "GOSP",
+ "GPSO",
+ "GPOS",
+ NULL};
+
+ for (unsigned i = 0; strings[i]; ++i) {
+ if (!strcmp(string, strings[i])) {
+ *order = (SerdStatementOrder)i;
+ return SERD_SUCCESS;
+ }
+ }
+
+ return SERD_ERR_BAD_ARG;
+}
+
+static int
+print_usage(const char* const name, const bool error)
+{
+ static const char* const description =
+ "Reorder RDF data by loading everything into a model then writing it.\n"
+ "INPUT can be a local filename, or \"-\" to read from standard input.\n\n"
+ " -B BASE_URI Base URI or path for resolving relative references.\n"
+ " -I SYNTAX Input syntax (turtle/ntriples/trig/nquads),\n"
+ " or option (lax/variables/relative/global/generated).\n"
+ " -O SYNTAX Output syntax (empty/turtle/ntriples/nquads),\n"
+ " or option (ascii/expanded/verbatim/terse/lax).\n"
+ " -V Display version information and exit.\n"
+ " -b BYTES I/O block size.\n"
+ " -c COLLATION An optional \"G\" then the letters \"SPO\" in any order.\n"
+ " -h Display this help and exit.\n"
+ " -k BYTES Parser stack size.\n"
+ " -o FILENAME Write output to FILENAME instead of stdout.\n";
+
+ FILE* const os = error ? stderr : stdout;
+ fprintf(os, "%s", error ? "\n" : "");
+ fprintf(os, "Usage: %s [OPTION]... INPUT...\n", name);
+ fprintf(os, "%s", description);
+ return error;
+}
+
+// Parse the option pointed to by `iter`, and advance it to the next one
+static SerdStatus
+parse_option(OptionIter* const iter, Options* const opts)
+{
+#define ARG_ERRORF(fmt, ...) \
+ fprintf(stderr, "%s: " fmt, iter->argv[0], __VA_ARGS__)
+
+ SerdStatus st = serd_parse_common_option(iter, &opts->common);
+ if (st != SERD_FAILURE) {
+ return st;
+ }
+
+ const char opt = iter->argv[iter->a][iter->f];
+ switch (opt) {
+ case 'R':
+ return serd_get_argument(iter, &opts->root_uri);
+
+ case 'V':
+ return serd_print_version("serd-sort");
+
+ case 'c':
+ if (!(st = serd_get_argument(iter, &opts->collation))) {
+ if ((st = parse_statement_order(opts->collation, &opts->order))) {
+ ARG_ERRORF("unknown collation \"%s\"\n", opts->collation);
+ return st;
+ }
+ }
+ return st;
+
+ case 'h':
+ print_usage(iter->argv[0], false);
+ return SERD_FAILURE;
+
+ case 's':
+ return serd_get_argument(iter, &opts->input_string);
+
+ default:
+ break;
+ }
+
+ ARG_ERRORF("invalid option -- '%c'\n", opt);
+ return SERD_ERR_BAD_ARG;
+
+#undef ARG_ERRORF
+}
+
+int
+main(const int argc, char* const* const argv)
+{
+ Options opts = {{"",
+ NULL,
+ 4096u,
+ 1048576u,
+ {SERD_SYNTAX_EMPTY, 0u, false},
+ {SERD_SYNTAX_EMPTY, 0u, false}},
+ "",
+ NULL,
+ NULL,
+ NULL,
+ 0u,
+ SERD_ORDER_SPO,
+ 0u};
+
+ // Parse all command line options (which must precede inputs)
+ SerdStatus st = SERD_SUCCESS;
+ OptionIter iter = {argv, argc, 1, 1};
+ while (!serd_option_iter_is_end(iter)) {
+ if ((st = parse_option(&iter, &opts))) {
+ return (st == SERD_FAILURE) ? 0 : print_usage(argv[0], true);
+ }
+ }
+
+ // Every argument past the last option is an input
+ opts.inputs = argv + iter.a;
+ opts.n_inputs = argc - iter.a;
+ if (opts.n_inputs + (bool)opts.input_string == 0) {
+ fprintf(stderr, "%s: missing input\n", argv[0]);
+ return print_usage(argv[0], true);
+ }
+
+ // Don't add prefixes to blank node labels if there is only one input
+ if (opts.n_inputs + (bool)opts.input_string == 1) {
+ opts.common.input.flags |= SERD_READ_GLOBAL;
+ }
+
+ return run(opts) > SERD_FAILURE;
+}
diff --git a/tools/serdi.c b/tools/serdi.c
deleted file mode 100644
index b1542727..00000000
--- a/tools/serdi.c
+++ /dev/null
@@ -1,502 +0,0 @@
-/*
- Copyright 2011-2021 David Robillard <d@drobilla.net>
-
- Permission to use, copy, modify, and/or distribute this software for any
- purpose with or without fee is hereby granted, provided that the above
- copyright notice and this permission notice appear in all copies.
-
- THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
-*/
-
-#include "console.h"
-
-#include "serd/serd.h"
-
-#include <errno.h>
-#include <limits.h>
-#include <stdbool.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#define SERDI_ERROR(msg) fprintf(stderr, "serdi: " msg)
-#define SERDI_ERRORF(fmt, ...) fprintf(stderr, "serdi: " fmt, __VA_ARGS__)
-
-typedef struct {
- SerdNode* s;
- SerdNode* p;
- SerdNode* o;
- SerdNode* g;
-} FilterPattern;
-
-static int
-print_usage(const char* const name, const bool error)
-{
- static const char* const description =
- "Read and write RDF syntax.\n"
- "Use - for INPUT to read from standard input.\n\n"
- " -C Convert literals to canonical form.\n"
- " -F PATTERN Filter out statements that match PATTERN.\n"
- " -G PATTERN Only include statements matching PATTERN.\n"
- " -I BASE_URI Input base URI.\n"
- " -b BYTES I/O block size.\n"
- " -f Fast and loose mode (possibly ugly output).\n"
- " -h Display this help and exit.\n"
- " -i SYNTAX Input syntax (turtle/ntriples/trig/nquads),\n"
- " or flag (lax/variables/verbatim).\n"
- " -k BYTES Parser stack size.\n"
- " -m Build a model in memory before writing.\n"
- " -o SYNTAX Output syntax (empty/turtle/ntriples/nquads),\n"
- " or flag (ascii/expanded/verbatim/terse/lax).\n"
- " -q Suppress all output except data.\n"
- " -r ROOT_URI Keep relative URIs within ROOT_URI.\n"
- " -s STRING Parse STRING as input.\n"
- " -v Display version information and exit.\n"
- " -w FILENAME Write output to FILENAME instead of stdout.\n";
-
- FILE* const os = error ? stderr : stdout;
- fprintf(os, "%s", error ? "\n" : "");
- fprintf(os, "Usage: %s [OPTION]... INPUT...\n", name);
- fprintf(os, "%s", description);
- return error ? 1 : 0;
-}
-
-static int
-missing_arg(const char* const name, const char opt)
-{
- SERDI_ERRORF("option requires an argument -- '%c'\n", opt);
- return print_usage(name, true);
-}
-
-static SerdStatus
-on_filter_event(void* const handle, const SerdEvent* const event)
-{
- if (event->type == SERD_STATEMENT) {
- FilterPattern* const pat = (FilterPattern*)handle;
- if (pat->s) {
- return SERD_ERR_INVALID;
- }
-
- const SerdStatement* const statement = event->statement.statement;
- pat->s = serd_node_copy(serd_statement_subject(statement));
- pat->p = serd_node_copy(serd_statement_predicate(statement));
- pat->o = serd_node_copy(serd_statement_object(statement));
- pat->g = serd_node_copy(serd_statement_graph(statement));
- }
-
- return SERD_SUCCESS;
-}
-
-static SerdSink*
-parse_filter(SerdWorld* const world,
- const SerdSink* const sink,
- const char* const str,
- const bool inclusive)
-{
- SerdEnv* const env = serd_env_new(SERD_EMPTY_STRING());
- FilterPattern pat = {NULL, NULL, NULL, NULL};
- SerdSink* in_sink = serd_sink_new(&pat, on_filter_event, NULL);
- SerdByteSource* byte_source = serd_byte_source_new_string(str, NULL);
- SerdReader* reader = serd_reader_new(
- world, SERD_NQUADS, SERD_READ_VARIABLES, env, in_sink, 4096);
-
- SerdStatus st = serd_reader_start(reader, byte_source);
- if (!st) {
- st = serd_reader_read_document(reader);
- }
-
- serd_reader_free(reader);
- serd_env_free(env);
- serd_byte_source_free(byte_source);
- serd_sink_free(in_sink);
-
- if (st) {
- return NULL;
- }
-
- SerdSink* filter =
- serd_filter_new(sink, pat.s, pat.p, pat.o, pat.g, inclusive);
-
- serd_node_free(pat.s);
- serd_node_free(pat.p);
- serd_node_free(pat.o);
- serd_node_free(pat.g);
- return filter;
-}
-
-static SerdStatus
-read_file(SerdWorld* const world,
- const SerdSyntax syntax,
- const SerdReaderFlags flags,
- SerdEnv* const env,
- const SerdSink* const sink,
- const size_t stack_size,
- const char* const filename,
- const size_t block_size)
-{
- SerdByteSource* byte_source = serd_open_input(filename, block_size);
-
- if (!byte_source) {
- SERDI_ERRORF(
- "failed to open input file `%s' (%s)\n", filename, strerror(errno));
-
- return SERD_ERR_UNKNOWN;
- }
-
- SerdReader* reader =
- serd_reader_new(world, syntax, flags, env, sink, stack_size);
-
- SerdStatus st = serd_reader_start(reader, byte_source);
-
- st = st ? st : serd_reader_read_document(reader);
-
- serd_reader_free(reader);
- serd_byte_source_free(byte_source);
-
- return st;
-}
-
-int
-main(int argc, char** argv)
-{
- const char* const prog = argv[0];
-
- SerdNode* base = NULL;
- SerdSyntax input_syntax = SERD_SYNTAX_EMPTY;
- SerdSyntax output_syntax = SERD_SYNTAX_EMPTY;
- SerdReaderFlags reader_flags = 0;
- SerdWriterFlags writer_flags = 0;
- bool no_inline = false;
- bool osyntax_set = false;
- bool use_model = false;
- bool canonical = false;
- bool quiet = false;
- size_t block_size = 4096u;
- size_t stack_size = 4194304;
- const char* input_string = NULL;
- const char* in_pattern = NULL;
- const char* out_pattern = NULL;
- const char* root_uri = NULL;
- const char* out_filename = NULL;
- int a = 1;
- for (; a < argc && argv[a][0] == '-'; ++a) {
- if (argv[a][1] == '\0') {
- break;
- }
-
- for (int o = 1; argv[a][o]; ++o) {
- const char opt = argv[a][o];
-
- if (opt == 'C') {
- canonical = true;
- } else if (opt == 'f') {
- no_inline = true;
- writer_flags |= (SERD_WRITE_EXPANDED | SERD_WRITE_VERBATIM);
- } else if (opt == 'h') {
- return print_usage(prog, false);
- } else if (argv[a][1] == 'm') {
- use_model = true;
- } else if (opt == 'q') {
- quiet = true;
- } else if (opt == 'v') {
- return serd_print_version(argv[0]);
- } else if (argv[a][1] == 'F') {
- if (++a == argc) {
- return missing_arg(argv[0], 'F');
- }
-
- out_pattern = argv[a];
- break;
- } else if (argv[a][1] == 'G') {
- if (++a == argc) {
- return missing_arg(argv[0], 'g');
- }
-
- in_pattern = argv[a];
- break;
- } else if (argv[a][1] == 'I') {
- if (++a == argc) {
- return missing_arg(prog, 'I');
- }
-
- base = serd_new_uri(SERD_STRING(argv[a]));
- break;
- } else if (opt == 'b') {
- if (argv[a][o + 1] || ++a == argc) {
- return missing_arg(prog, 'b');
- }
-
- char* endptr = NULL;
- const long size = strtol(argv[a], &endptr, 10);
- if (size < 1 || size == LONG_MAX || *endptr != '\0') {
- SERDI_ERRORF("invalid block size `%s'\n", argv[a]);
- return 1;
- }
- block_size = (size_t)size;
- break;
- } else if (opt == 'i') {
- if (argv[a][o + 1] || ++a == argc) {
- return missing_arg(prog, 'i');
- }
-
- if (serd_set_input_option(
- SERD_STRING(argv[a]), &input_syntax, &reader_flags)) {
- return print_usage(argv[0], true);
- }
- break;
- } else if (opt == 'k') {
- if (argv[a][o + 1] || ++a == argc) {
- return missing_arg(prog, 'k');
- }
-
- char* endptr = NULL;
- const long size = strtol(argv[a], &endptr, 10);
- if (size <= 0 || size == LONG_MAX || *endptr != '\0') {
- SERDI_ERRORF("invalid stack size `%s'\n", argv[a]);
- return 1;
- }
- stack_size = (size_t)size;
- break;
- } else if (opt == 'o') {
- if (argv[a][o + 1] || ++a == argc) {
- return missing_arg(prog, 'o');
- }
-
- if (serd_set_output_option(
- SERD_STRING(argv[a]), &output_syntax, &writer_flags)) {
- return print_usage(argv[0], true);
- }
-
- osyntax_set =
- output_syntax != SERD_SYNTAX_EMPTY || !strcmp(argv[a], "empty");
-
- break;
- } else if (opt == 'r') {
- if (argv[a][o + 1] || ++a == argc) {
- return missing_arg(prog, 'r');
- }
-
- root_uri = argv[a];
- break;
- } else if (opt == 's') {
- if (argv[a][o + 1] || ++a == argc) {
- return missing_arg(prog, 's');
- }
-
- input_string = argv[a];
- break;
- } else if (opt == 'w') {
- if (argv[a][o + 1] || ++a == argc) {
- return missing_arg(argv[0], 'w');
- }
-
- out_filename = argv[a];
- break;
- } else {
- SERDI_ERRORF("invalid option -- '%s'\n", argv[a] + 1);
- return print_usage(prog, true);
- }
- }
- }
-
- if (in_pattern && out_pattern) {
- SERDI_ERROR("only one of -F and -G can be given at once\n");
- return 1;
- }
-
- if (a == argc && !input_string) {
- SERDI_ERROR("missing input\n");
- return print_usage(prog, true);
- }
-
- char* const* const inputs = argv + a;
- const int n_inputs = argc - a;
-
- bool input_has_graphs = serd_syntax_has_graphs(input_syntax);
- for (int i = a; i < argc; ++i) {
- if (serd_syntax_has_graphs(serd_guess_syntax(argv[i]))) {
- input_has_graphs = true;
- break;
- }
- }
-
- if (!output_syntax && !osyntax_set) {
- output_syntax = input_has_graphs ? SERD_NQUADS : SERD_NTRIPLES;
- }
-
- if (!base && n_inputs == 1 &&
- (output_syntax == SERD_NQUADS || output_syntax == SERD_NTRIPLES)) {
- // Choose base URI from the single input path
- char* const input_path = serd_canonical_path(inputs[0]);
- if (!input_path || !(base = serd_new_file_uri(SERD_STRING(input_path),
- SERD_EMPTY_STRING()))) {
- SERDI_ERRORF("unable to determine base URI from path %s\n", inputs[0]);
- }
- serd_free(input_path);
- }
-
- SerdWorld* const world = serd_world_new();
- SerdEnv* const env =
- serd_env_new(base ? serd_node_string_view(base) : SERD_EMPTY_STRING());
-
- serd_set_stream_utf8_mode(stdin);
- if (!out_filename) {
- serd_set_stream_utf8_mode(stdout);
- }
-
- const SerdDescribeFlags describe_flags =
- no_inline ? SERD_NO_INLINE_OBJECTS : 0u;
-
- SerdByteSink* const byte_sink = serd_open_output(out_filename, block_size);
- if (!byte_sink) {
- perror("serdi: error opening output file");
- return 1;
- }
-
- SerdWriter* const writer =
- serd_writer_new(world, output_syntax, writer_flags, env, byte_sink);
-
- SerdModel* model = NULL;
- SerdSink* inserter = NULL;
- const SerdSink* out_sink = NULL;
- if (use_model) {
- const SerdModelFlags flags = (input_has_graphs ? SERD_STORE_GRAPHS : 0u);
-
- model = serd_model_new(world, SERD_ORDER_SPO, flags);
- if (input_has_graphs) {
- serd_model_add_index(model, SERD_ORDER_GSPO);
- }
-
- if (!no_inline) {
- serd_model_add_index(model, SERD_ORDER_OPS);
- if (input_has_graphs) {
- serd_model_add_index(model, SERD_ORDER_GOPS);
- }
- }
-
- inserter = serd_inserter_new(model, NULL);
- out_sink = inserter;
- } else {
- out_sink = serd_writer_sink(writer);
- }
-
- const SerdSink* sink = out_sink;
-
- SerdSink* canon = NULL;
- if (canonical) {
- sink = canon = serd_canon_new(world, out_sink, reader_flags);
- }
-
- SerdSink* filter = NULL;
- if (in_pattern) {
- if (!(filter = parse_filter(world, sink, in_pattern, true))) {
- SERDI_ERROR("error parsing inclusive filter pattern\n");
- return EXIT_FAILURE;
- }
-
- sink = filter;
- } else if (out_pattern) {
- if (!(filter = parse_filter(world, sink, out_pattern, false))) {
- SERDI_ERROR("error parsing exclusive filter pattern\n");
- return EXIT_FAILURE;
- }
-
- sink = filter;
- }
-
- if (quiet) {
- serd_set_log_func(world, serd_quiet_log_func, NULL);
- }
-
- if (root_uri) {
- serd_writer_set_root_uri(writer, SERD_STRING(root_uri));
- }
-
- SerdStatus st = SERD_SUCCESS;
- if (input_string) {
- SerdByteSource* const byte_source =
- serd_byte_source_new_string(input_string, NULL);
-
- SerdReader* const reader =
- serd_reader_new(world,
- input_syntax ? input_syntax : SERD_TRIG,
- reader_flags,
- env,
- sink,
- stack_size);
-
- if (!(st = serd_reader_start(reader, byte_source))) {
- st = serd_reader_read_document(reader);
- }
-
- serd_reader_free(reader);
- serd_byte_source_free(byte_source);
- }
-
- if (n_inputs == 1) {
- reader_flags |= SERD_READ_GLOBAL;
- }
-
- for (int i = 0; !st && i < n_inputs; ++i) {
- if (!base && strcmp(inputs[i], "-")) {
- if ((st = serd_set_base_uri_from_path(env, inputs[i]))) {
- SERDI_ERRORF("failed to set base URI from path %s\n", inputs[i]);
- break;
- }
- }
-
- if ((st =
- read_file(world,
- serd_choose_input_syntax(world, input_syntax, inputs[i]),
- reader_flags,
- env,
- sink,
- stack_size,
- inputs[i],
- block_size))) {
- break;
- }
- }
-
- if (st <= SERD_FAILURE && use_model) {
- const SerdSink* writer_sink = serd_writer_sink(writer);
- SerdCursor* everything = serd_model_begin_ordered(
- model, input_has_graphs ? SERD_ORDER_GSPO : SERD_ORDER_SPO);
-
- serd_env_write_prefixes(env, writer_sink);
-
- st = serd_describe_range(
- everything,
- writer_sink,
- describe_flags |
- ((output_syntax == SERD_NTRIPLES || output_syntax == SERD_NQUADS)
- ? SERD_NO_INLINE_OBJECTS
- : 0u));
-
- serd_cursor_free(everything);
- }
-
- serd_sink_free(canon);
- serd_sink_free(filter);
- serd_sink_free(inserter);
- serd_model_free(model);
- serd_writer_free(writer);
- serd_env_free(env);
- serd_node_free(base);
- serd_world_free(world);
-
- if (serd_byte_sink_close(byte_sink)) {
- perror("serdi: write error");
- st = SERD_ERR_UNKNOWN;
- }
-
- serd_byte_sink_free(byte_sink);
-
- return (st > SERD_FAILURE) ? 1 : 0;
-}