From ea272b5e88c02117a2a3ef7e12d221bd196844dc Mon Sep 17 00:00:00 2001 From: David Robillard Date: Sat, 9 Oct 2021 15:48:18 -0400 Subject: Isolate man pages and build HTML versions with mandoc if possible The HTML versions are mainly useful for generating the online documentation as part of a CI pipeline. They are installed anyway, which is redundant for systems with a man page reader, but handy elsewhere. --- doc/man/mandoc.css | 251 +++++++++++++++++++++++++++++++++ doc/man/meson.build | 39 ++++++ doc/man/serd-filter.1 | 188 +++++++++++++++++++++++++ doc/man/serd-pipe.1 | 359 ++++++++++++++++++++++++++++++++++++++++++++++++ doc/man/serd-sort.1 | 195 ++++++++++++++++++++++++++ doc/man/serd-validate.1 | 247 +++++++++++++++++++++++++++++++++ 6 files changed, 1279 insertions(+) create mode 100644 doc/man/mandoc.css create mode 100644 doc/man/meson.build create mode 100644 doc/man/serd-filter.1 create mode 100644 doc/man/serd-pipe.1 create mode 100644 doc/man/serd-sort.1 create mode 100644 doc/man/serd-validate.1 (limited to 'doc/man') diff --git a/doc/man/mandoc.css b/doc/man/mandoc.css new file mode 100644 index 00000000..cebb50bf --- /dev/null +++ b/doc/man/mandoc.css @@ -0,0 +1,251 @@ +/* Generic page style */ + +/* + Smaller sizes: 0.236em 0.271em 0.382em 0.438em 0.618em 0.708em + Larger sizes: 1.146em 1.618em 1.854em 2.618em 3em 4.236em +*/ + +html { + margin: 0 1.618em; + background: #FFF; + color: #000; +} + +body { + font-style: normal; + line-height: 1.618em; + margin: 0 auto auto; + padding: 0; + max-width: 60em; + font-family: "SF Pro Text", Verdana, "DejaVu Sans", sans-serif; + text-rendering: optimizeLegibility; +} + +h1 { + font-family: Helvetica, Arial, "DejaVu Sans Condensed", Verdana, sans-serif; + font-size: 1.854em; + font-weight: 600; + line-height: 114.6%; + margin: 1.146em 0; +} + +a { + text-decoration: none; +} + +h1 a, h2 a, h3 a, h4 a, h5 a, h6 a { + color: #222; +} + +a:hover { + text-decoration: underline; +} + +h1 a:link, h2 a:link, h3 a:link, h4 a:link, h5 a:link, h6 a:link { + color: #222; +} + +h1 a:visited, h2 a:visited, h3 a:visited, h4 a:visited, h5 a:visited, h6 a:visited { + color: #222; +} + +pre, tt, code { + overflow: auto; + font-family: "SF Mono", Menlo, Consolas, "DejaVu Sans Mono", monospace, fixed; + -epub-hyphens: none; + -moz-hyphens: none; + -ms-hyphens: none; + -webkit-hyphens: none; + hyphens: none; +} + +ul, ol, dl { + margin: 0; + padding: 0; +} + +ul { + padding: 0; + hyphens: auto; +} + +dt { + font-weight: 600; + padding: 0.618em 0 0; +} + +dd { + margin: 0 0 0 2.618em; + hyphens: auto; +} + +dd > ul:only-child, dd > ol:only-child { + padding-left: 0; +} + +li { + margin-left: 2.618em; +} + +dt:empty { + margin: 0; + display: none; +} + +dd:empty { + margin: 0; + display: none; +} + +dt:blank { + margin: 0; + display: none; +} + +dd:blank { + margin: 0; + display: none; +} + +/* Media-specific style */ + +/* Color links on screens */ +@media screen { + a { + color: #546E00; + } +} + +@media print { + body { + color: #000; + } + + a, h1 a, h2 a, h3 a, h4 a, h5 a, h6 a { + color: #000; + } + + a:link { + color: #000; + } + + a:visited { + color: #000; + } +} + +/* Mandoc specific style */ + +table.head { + font-size: 0.708em; + margin: 0.438em 0 1.854em; + width: 100%; +} + +table.foot { + font-size: 0.708em; + margin: 2.618em 0 0.438em; + width: 100%; +} + +td.head-rtitle, td.foot-os { + text-align: right; +} + +td.head-vol { + text-align: center; +} + +div.Pp { + margin: 1ex 0; +} + +a.permalink { + color: #222; +} + +div.Nd, div.Bf, div.Op { + display: inline; +} + +span.Pa, span.Ad { + font-style: italic; +} + +span.Ms { + font-weight: bold; +} + +dl.Bl-diag > dt { + font-weight: bold; +} + +table.Nm tbody tr { + vertical-align: baseline; +} + +code.Nm, code.Fl, code.Cm, code.Ic, code.In, code.Fd, code.Fn, code.Cd { + font-weight: bold; + color: #444; +} + +code.Ev { + font-weight: bold; + color: #444; +} + +code.Li { + color: #333; +} + +var.Ar { + font-style: italic; +} + +/* Dark mode */ +@media (prefers-color-scheme: dark) { + html { + background: #222; + color: #DDD; + } + + a { + color: #B4C342; + } + + a.permalink { + color: #DDD; + } + + h1 a, h2 a, h3 a, h4 a, h5 a, h6 a { + color: #DDD; + } + + h1 a:link, h2 a:link, h3 a:link, h4 a:link, h5 a:link, h6 a:link { + color: #DDD; + } + + h1 a:visited, h2 a:visited, h3 a:visited, h4 a:visited, h5 a:visited, h6 a:visited { + color: #DDD; + } + + code.Nm, code.Fl, code.Cm, code.Ic, code.In, code.Fd, code.Fn, code.Cd { + color: #AAA; + } + + code.Ev { + color: #AAA; + } + + code.Li { + color: #CCC; + } +} + +/* Hard black for dark mode on mobile (since it's likely to be an OLED screen) */ +@media only screen and (hover: none) and (pointer: coarse) and (prefers-color-scheme: dark) { + html { + background: #000; + color: #CCC; + } +} diff --git a/doc/man/meson.build b/doc/man/meson.build new file mode 100644 index 00000000..fe970889 --- /dev/null +++ b/doc/man/meson.build @@ -0,0 +1,39 @@ +if get_option('tools') + # Install tool man pages + install_man('serd-filter.1') + install_man('serd-pipe.1') + install_man('serd-sort.1') + install_man('serd-validate.1') + + # Build/install HTML man pages if mandoc is present + if mandoc.found() + configure_file(copy: true, + input: files('mandoc.css'), + output: 'mandoc.css', + # install: true, + install_dir: docdir / versioned_name / 'man') + + mandoc_html_command = [mandoc, + '-Thtml', + '-Wwarning', + '-Ostyle=mandoc.css,man=%N.html', + '@INPUT@'] + + page_names = [ + 'serd-filter', + 'serd-pipe', + 'serd-sort', + 'serd-validate', + ] + + foreach name : page_names + custom_target(name + '.1', + capture: true, + command: mandoc_html_command, + input: files(name + '.1'), + install: true, + install_dir: docdir / versioned_name / 'man', + output: name + '.html') + endforeach + endif +endif diff --git a/doc/man/serd-filter.1 b/doc/man/serd-filter.1 new file mode 100644 index 00000000..e05559e2 --- /dev/null +++ b/doc/man/serd-filter.1 @@ -0,0 +1,188 @@ +.Dd October 21, 2021 +.Dt SERD-FILTER 1 +.Os Serd +.Sh NAME +.Nm serd-filter +.Nd print RDF statements that match a pattern +.Sh SYNOPSIS +.Nm serd-filter +.Op Fl hVv +.Op Fl B Ar base +.Op Fl I Ar syntax +.Op Fl O Ar syntax +.Op Fl b Ar bytes +.Op Fl f Ar pattern_file +.Op Fl k Ar bytes +.Op Fl o Ar filename +.Ar pattern +.Op Ar input ... +.Sh DESCRIPTION +.Nm +scans for statements in RDF data. +Its interface is similar to +.Xr grep 1 , +except patterns are structural: +instead of matching characters within a line, +.Nm +matches nodes within a statement. +.Pp +Data is read from files or standard input, +and only those statements that match the pattern +(or do not match the pattern, if +.Fl v +is given) are written. +By default, +the input syntax is guessed from the file extension, +and line-based output is written to standard output. +.Pp +Patterns are written in NTriples or NQuads with an extension that allows variables like +.Li ?some +or +.Li $thing . +.Pp +The +.Ar input +operands are processed in command-line order. +If +.Ar input +is +.Ar - +or absent, +.Nm +reads from standard input. +.Pp +The options are as follows: +.Pp +.Bl -tag -compact -width 3n +.It Fl B Ar base +Base URI, path, or +.Cm rebase +to use the output path. +See +.Xr serd-pipe 1 +for details. +.Pp +.It Fl I Ar syntax +Input syntax or option: +.Cm NQuads , +.Cm NTriples , +.Cm TriG , +.Cm Turtle , +.Cm lax , +.Cm variables , +.Cm relative , +or +.Cm labels . +See +.Xr serd-pipe 1 +for details. +.Pp +.It Fl O Ar syntax +Output syntax or option: +.Cm empty , +.Cm NQuads , +.Cm NTriples , +.Cm TriG , +.Cm Turtle , +.Cm ascii , +.Cm contextual , +.Cm expanded , +.Cm verbatim , +.Cm terse , +or +.Cm lax . +See +.Xr serd-pipe 1 +for details. +.Pp +.It Fl V +Display version information and exit. +.Pp +.It Fl b Ar bytes +I/O block size. +See +.Xr serd-pipe 1 +for details. +.Pp +.It Fl f Ar pattern_file +Load pattern from +.Ar pattern_file +instead of the first positional argument. +.Pp +.It Fl h +Print the command line options. +.Pp +.It Fl k Ar bytes +Parser stack size. +See +.Xr serd-pipe 1 +for details. +.Pp +.It Fl o Ar filename +Write output to the given +.Ar filename +instead of stdout. +.Pp +.It Fl v +Invert filter to only emit statements that do +.Em not +match the pattern. +.El +.Sh EXIT STATUS +.Nm +exits with a status of 0, or non-zero if an error occured. +.Sh EXAMPLES +To print all type statements: +.Pp +.Dl $ serd-filter '?subject a ?type .' input.ttl +.Pp +To print every statement about http://example.org/subject: +.Pp +.Dl $ serd-filter ' ?p ?o .' input.ttl +.Sh SEE ALSO +.Bl -item -compact +.It +.Xr serd-pipe 1 +.It +.Xr serd-sort 1 +.It +.Xr serd-validate 1 +.It +.Lk http://drobilla.net/software/serd/ +.El +.Sh STANDARDS +.Bl -item -compact +.It +.Rs +.%A W3C +.%T RDF 1.1 NQuads +.%D February 2014 +.Re +.Lk https://www.w3.org/TR/n-quads/ +.It +.Rs +.%A W3C +.%D February 2014 +.%T RDF 1.1 NTriples +.Re +.Lk https://www.w3.org/TR/n-triples/ +.It +.Rs +.%A W3C +.%T RDF 1.1 TriG +.%D February 2014 +.Re +.Lk https://www.w3.org/TR/trig/ +.It +.Rs +.%A W3C +.%D February 2014 +.%T RDF 1.1 Turtle +.Re +.Lk https://www.w3.org/TR/turtle/ +.El +.Sh AUTHORS +.Nm +is a part of serd, by +.An David Robillard +.Mt d@drobilla.net . diff --git a/doc/man/serd-pipe.1 b/doc/man/serd-pipe.1 new file mode 100644 index 00000000..d94d4445 --- /dev/null +++ b/doc/man/serd-pipe.1 @@ -0,0 +1,359 @@ +.Dd October 21, 2021 +.Dt SERD-PIPE 1 +.Os Serd +.Sh NAME +.Nm serd-pipe +.Nd read and write RDF data +.Sh SYNOPSIS +.Nm serd-pipe +.Op Fl ChV +.Op Fl B Ar base +.Op Fl I Ar syntax +.Op Fl O Ar syntax +.Op Fl R Ar root +.Op Fl b Ar bytes +.Op Fl k Ar bytes +.Op Fl o Ar filename +.Op Fl s Ar string +.Op Ar input ... +.Sh DESCRIPTION +.Nm +is a fast command-line utility for streaming RDF data. +It reads one or more files and writes the data again, +possibly in a different form. +By default, +the input syntax is guessed from the file extension, +and line-based output is written to standard output. +.Pp +.Nm +writes statements as they are read, in the same order. +It uses very little memory and can process arbitrarily large files, +either directly or as part of a pipeline. +It is useful for things like checking syntax, +converting to a different syntax, +pretty-printing documents, +merging files, +expanding URIs, +and so on. +.Pp +The simplest usage is to use files for both input and output. +This way, reasonable options are chosen by default based on the filename. +For example, most common tasks can be accomplished with simple commands like: +.Pp +.Dl $ serd-pipe -o pretty.ttl input.nt +.Pp +The +.Ar input +operands are processed in command-line order. +If +.Ar input +is +.Ar - +or absent, +.Nm +reads from standard input. +.Pp +The options are as follows: +.Pp +.Bl -tag -compact -width 3n +.It Fl B Ar base +Base URI, path, or +.Cm rebase +to use the output path. +This is used to resolve any relative URI references in the input. +.Pp +If the input is a file, +its URI is used as the base by default. +This causes relative references to be written just as they are in the input. +Note, however, that this may not be desired if the output is in a different directory. +For example, +.Li +would not point to the same file from the new location. +.Pp +The special +.Cm rebase +argument will instead use the output filename set by the +.Fl o +option. +This will write references relative to the output file, +so that parsing it will produce the same absolute URIs as the original input. +For example, +the above may be written as +.Li <../file.ttl> +if the output is written to some sibling directory. +.Pp +Generally, the default is best when copying data along with other bundled files, +while +.Cm rebase +is best for writing data in a new location which still refers to the original paths. +.Pp +These options are intended to make the most common tasks as simple as possible. +An arbitrary base URI can also be given explicitly. +.Pp +.It Fl C +Convert literals to canonical form. +Literals with supported XSD datatypes will be parsed and rewritten canonically. +Invalid literals will cause an error. +All numeric datatypes are supported, as well as +.Vt boolean , +.Vt duration , +.Vt datetime , +.Vt time , +.Vt hexBinary , +and +.Vt base64Binary . +.Pp +.It Fl I Ar syntax +Set an input syntax or option. +May be given multiple times. +The case-insensitive +.Ar syntax +can be +.Cm NQuads , +.Cm NTriples , +.Cm TriG , +.Cm Turtle , +or one of the following options: +.Pp +.Bl -tag -width "QvariablesQ" -compact -offset indent +.It Cm lax +Tolerate invalid input where possible. +Warnings will be printed for syntax errors, +but parsing will attempt to continue. +Note that data may be lost when using this option! +.Pp +.It Cm variables +Support parsing variable nodes. +Variables can be written in SPARQL style, for example +.Li ?name +or +.Li $name . +.Pp +.It Cm relative +Read relative URI references exactly without resolving them. +Normally, all relative URIs are expanded against the base URI when reading. +This flag disables that, +so URI references will be passed through exactly as they are in the input. +.Pp +.It Cm global +Assume a clean global namespace for blank node labels, +and do not automatically add prefixes. +Normally, +a prefix like +.Li f1 +is added to blank node labels when reading multiple files, +to prevent labels in different files from clashing. +This option disables that, +so blank node labels will be passed through without any added prefix. +Note that this may corrupt the output by merging distinct blank nodes. +.Pp +.It Cm generated +Read seemingly generated blank node labels exactly without adjusting them. +Normally, blank node labels like +.Li b123 +are adapted to avoid potential clashes with generated ones. +This flag disables that, +so such labels will be passed through exactly as they are in the input. +Note that this may corrupt the output by merging distinct blank nodes. +.El +.Pp +.It Fl O Ar syntax +Set an output syntax or option. +May be given multiple times. +The case-insensitive +.Ar syntax +can be +.Cm empty , +.Cm NQuads , +.Cm NTriples , +.Cm TriG , +.Cm Turtle , +or one of the following options: +.Pp +.Bl -tag -width "QcontextualQ" -compact -offset indent +.It Cm ascii +Escape all non-ASCII characters. +Normally, text is written in UTF-8. +This flag will escape non-ASCII characters in text as Unicode code points like +.Li \eU00B7 or +.Li \eU0001F600 . +.Pp +.It Cm contextual +Suppress writing directives that describe the context. +Normally when writing Turtle or Trig, +a document will have a header that defines all the prefixes used in the input. +This flag will disable writing those directives, +so the output is document fragment with an implicit context. +This can be useful for writing output intended for humans. +.Pp +.It Cm expanded +Write expanded URIs instead of prefixed names. +.Pp +.It Cm verbatim +Write URI references exactly as they are in the input. +This avoids resolving URIs and making them relative to the output base URI. +.Pp +.It Cm terse +Write terser output without newlines. +This can be useful for writing a line-based description of suitably structured data. +.Pp +.It Cm lax +Tolerate invalid UTF-8 by writing the replacement character when necessary. +Note that data may be lost when using this option! +.El +.Pp +The +.Cm empty +syntax suppresses the output, +so that only warnings and errors will be printed. +.Pp +.It Fl R Ar root +Keep relative URIs within a +.Ar root +URI. +This will avoid creating any relative URI references with leading path segments like +.Pa ../ +that enter a parent of +.Ar root . +.Pp +For example, +if +.Pa /home/you/file.ttl +is written to the file +.Pa /home/me/output.ttl +using +.Fl B Cm rebase , +then it will be written as +.Li <../you/file.ttl> . +Setting +.Fl R Pa /home/me/ +would prevent references from +.Dq escaping +like this, +so the above would instead be written as +.Li . +.Pp +This is useful for making relocatable +.Dq bundles +of resources, +since it can keep all relative references within the bundle, +while still allowing up-references to be used. +.Pp +.It Fl V +Display version information and exit. +.Pp +.It Fl b Ar bytes +I/O block size. +This is the number of bytes in a file that will be read or written at once. +The default is 4096, which should perform well in most cases. +Note that this only applies to files, standard input and output are always processed one byte at a time. +.Pp +.It Fl h +Print the command line options. +.Pp +.It Fl k Ar bytes +Parser stack size. +For performance and security reasons, parsing is performed with a fixed-size stack. +This option sets a hard limit on the total amount of space used for parsing. +The default is 1 megabyte, which should be more than enough for most data. +This option can be used to reduce memory consumption, +or to enable parsing documents with extremely deep nesting or extremely large literal values. +.Pp +.It Fl o Ar filename +Write output to the given +.Ar filename +instead of stdout. +.Pp +.It Fl s Ar string +Parse +.Ar string +as input. +.El +.Sh ENVIRONMENT +Error messages and warnings are printed in color by default if the output is a terminal. +This can be controlled by common environment variables: +.Pp +.Bl -tag -compact -width 14n +.It Ev NO_COLOR +If present (regardless of value), color is disabled. +.It Ev CLICOLOR +If set to 0, color is disabled. +.It Ev CLICOLOR_FORCE +If set to anything other than 0, color is forced on. +.El +.Pp +See +.Lk http://no-color.org/ +and +.Lk https://bixense.com/clicolors/ +for details. +.Sh EXIT STATUS +.Nm +exits with a status of 0, or non-zero if an error occured. +.Sh EXAMPLES +To print an NTriples file as Turtle: +.Pp +.Dl $ serd-pipe -O turtle input.nt +.Pp +To print only errors and discard the output: +.Pp +.Dl $ serd-pipe -O empty input.ttl +.Pp +To pretty-print a file: +.Pp +.Dl $ serd-pipe -o pretty.ttl input.ttl +.Pp +To expand all prefixed names into full URIs: +.Pp +.Dl $ serd-pipe -O expanded -o expanded.ttl input.ttl +.Pp +To merge two files: +.Pp +.Dl $ serd-pipe -o merged.ttl header.ttl body.ttl +.Sh SEE ALSO +.Bl -item -compact +.It +.Xr serd-filter 1 +.It +.Xr serd-sort 1 +.It +.Xr serd-validate 1 +.It +.Lk http://drobilla.net/software/serd/ +.El +.Sh STANDARDS +.Bl -item -compact +.It +.Rs +.%A W3C +.%T RDF 1.1 NQuads +.%D February 2014 +.Re +.Lk https://www.w3.org/TR/n-quads/ +.It +.Rs +.%A W3C +.%D February 2014 +.%T RDF 1.1 NTriples +.Re +.Lk https://www.w3.org/TR/n-triples/ +.It +.Rs +.%A W3C +.%T RDF 1.1 TriG +.%D February 2014 +.Re +.Lk https://www.w3.org/TR/trig/ +.It +.Rs +.%A W3C +.%D February 2014 +.%T RDF 1.1 Turtle +.Re +.Lk https://www.w3.org/TR/turtle/ +.El +.Sh AUTHORS +.Nm +is a part of serd, by +.An David Robillard +.Mt d@drobilla.net . diff --git a/doc/man/serd-sort.1 b/doc/man/serd-sort.1 new file mode 100644 index 00000000..7fb02978 --- /dev/null +++ b/doc/man/serd-sort.1 @@ -0,0 +1,195 @@ +.Dd October 21, 2021 +.Dt SERD-SORT 1 +.Os Serd +.Sh NAME +.Nm serd-sort +.Nd reorder RDF statements +.Sh SYNOPSIS +.Nm serd-sort +.Op Fl htV +.Op Fl B Ar base +.Op Fl I Ar syntax +.Op Fl O Ar syntax +.Op Fl b Ar bytes +.Op Fl c Ar collation +.Op Fl k Ar bytes +.Op Fl o Ar filename +.Op Ar input ... +.Sh DESCRIPTION +.Nm +reorders statements in RDF data by loading everything into memory then rewriting it. +By default, +a +.Dq pretty +ordering is used which is ideal for pretty-printing to Turtle or TriG. +The +.Fl c +option can be used to request a specific ordering, +which is mainly useful when emitting a line-based syntax like NTriples or NQuads in a pipeline. +.Pp +The +.Ar input +operands are processed in command-line order. +If +.Ar input +is +.Ar - +or absent, +.Nm +reads from standard input. +.Pp +The options are as follows: +.Pp +.Bl -tag -compact -width 3n +.It Fl B Ar base +Base URI, path, or +.Cm rebase +to use the output path. +See +.Xr serd-pipe 1 +for details. +.Pp +.It Fl I Ar syntax +Input syntax or option: +.Cm NQuads , +.Cm NTriples , +.Cm TriG , +.Cm Turtle , +.Cm lax , +.Cm variables , +.Cm relative , +or +.Cm labels . +See +.Xr serd-pipe 1 +for details. +.Pp +.It Fl O Ar syntax +Output syntax or option: +.Cm empty , +.Cm NQuads , +.Cm NTriples , +.Cm TriG , +.Cm Turtle , +.Cm ascii , +.Cm contextual , +.Cm expanded , +.Cm verbatim , +.Cm terse , +or +.Cm lax . +See +.Xr serd-pipe 1 +for details. +.Pp +.It Fl V +Display version information and exit. +.Pp +.It Fl b Ar bytes +I/O block size. +See +.Xr serd-pipe 1 +for details. +.Pp +.It Fl c Ar collation +A specific collation (statement ordering) to use. +This can be any ordering of the characters +.Dq SPO , +which stand for the subject, predicate, and object of statements. +Optionally, +.Dq G +can be added as the first character, +which will sort graph-first. +Concretely, the valid values are: +.Cm SPO , +.Cm SOP , +.Cm OPS , +.Cm OSP , +.Cm PSO , +.Cm POS , +.Cm GSPO , +.Cm GSOP , +.Cm GOPS , +.Cm GOSP , +.Cm GPSO , +and +.Cm GPOS . +.Pp +.It Fl h +Print the command line options. +.Pp +.It Fl k Ar bytes +Parser stack size. +See +.Xr serd-pipe 1 +for details. +.Pp +.It Fl o Ar filename +Write output to the given +.Ar filename +instead of stdout. +.Pp +.It Fl t +Do not write type as +.Dq a +before other properties. +Instead, rdf:type will be written in order like any other property. +.El +.Sh EXIT STATUS +.Nm +exits with a status of 0, or non-zero if an error occured. +.Sh EXAMPLES +To pretty-print a file: +.Pp +.Dl $ serd-sort -o pretty.ttl input.ttl +.Pp +To print statements ordered by predicate, subject, then object: +.Pp +.Dl $ serd-sort -c PSO input.ttl +.Sh SEE ALSO +.Bl -item -compact +.It +.Xr serd-pipe 1 +.It +.Xr serd-filter 1 +.It +.Xr serd-validate 1 +.It +.Lk http://drobilla.net/software/serd/ +.El +.Sh STANDARDS +.Bl -item -compact +.It +.Rs +.%A W3C +.%T RDF 1.1 NQuads +.%D February 2014 +.Re +.Lk https://www.w3.org/TR/n-quads/ +.It +.Rs +.%A W3C +.%D February 2014 +.%T RDF 1.1 NTriples +.Re +.Lk https://www.w3.org/TR/n-triples/ +.It +.Rs +.%A W3C +.%T RDF 1.1 TriG +.%D February 2014 +.Re +.Lk https://www.w3.org/TR/trig/ +.It +.Rs +.%A W3C +.%D February 2014 +.%T RDF 1.1 Turtle +.Re +.Lk https://www.w3.org/TR/turtle/ +.El +.Sh AUTHORS +.Nm +is a part of serd, by +.An David Robillard +.Mt d@drobilla.net . diff --git a/doc/man/serd-validate.1 b/doc/man/serd-validate.1 new file mode 100644 index 00000000..3d8c0035 --- /dev/null +++ b/doc/man/serd-validate.1 @@ -0,0 +1,247 @@ +.Dd October 21, 2021 +.Dt SERD-VALIDATE 1 +.Os Serd +.Sh NAME +.Nm serd-validate +.Nd validate RDF data +.Sh SYNOPSIS +.Nm serd-validate +.Op Fl hVv +.Op Fl B Ar base +.Op Fl I Ar syntax +.Op Fl W Ar checks +.Op Fl X Ar checks +.Op Fl k Ar bytes +.Op Fl s Ar schema +.Ar input ... +.Sh DESCRIPTION +.Nm +validates RDF data against one or more RDFS or OWL schemas. +Unlike other serd tools, +.Nm +does not write any data, +it only prints messages and returns a status that indicates whether the data is valid. +.Pp +All inputs to check must be given as an +.Ar input . +Schemas can also be given as an +.Ar input , +in which case they will also be checked for issues. +Alternatively, schemas can be given using the +.Fl s +option, which will load the schema to check against, +but not initiate checks for schema statements themselves. +This roughly means that error messages will not be printed for the schema files, +although some may still appear if they are related to issues in the input files. +When using schemas in this way, it is highly recommended to ensure that the schemas are valid first, +otherwise the validator may not work correctly. +.Pp +The options are as follows: +.Pp +.Bl -tag -compact -width 3n +.It Fl B Ar base +Base URI or path. +See +.Xr serd-pipe 1 +for details. +.Pp +.It Fl I Ar syntax +Input syntax or option: +.Cm NQuads , +.Cm NTriples , +.Cm TriG , +.Cm Turtle , +.Cm lax , +.Cm variables , +.Cm relative , +or +.Cm labels . +See +.Xr serd-pipe 1 +for details. +.Pp +.It Fl W Ar checks +Warn about any issues found by the given +.Ar checks , +which is a regular expression that matches a set of check names to enable, +or a special name for standard sets of checks: +.Bl -tag -width 3n +.It Cm all +Enables all checks that should pass on typical well-formed data. +.It Cm everything +Enables all the checks of +.Cm all , +along with additional strict checks that are useful for making data simpler and more explicit. +.El +.Pp +See +.Sx CHECKS +below for a detailed list of all checks. +.Pp +.It Fl X Ar checks +Exclude +.Ar checks +from the set of checks enabled by a previous +.Fl W +option. +This is typically used after +.Fl W Cm all +or +.Fl W Cm everything +to suppress a few specific checks. +.Pp +.It Fl V +Display version information and exit. +.Pp +.It Fl h +Print the command line options. +.Pp +.It Fl k Ar bytes +Parser stack size. +See +.Xr serd-pipe 1 +for details. +.Pp +.It Fl s Ar schema +Schema input file. +The schema will be loaded like an input, +but into a separate graph that is not checked for issues. +.Pp +.It Fl v +Print verbose messages about loaded resources. +.El +.Sh CHECKS +.Bl -tag -compact -width 3n +.It Cm allValuesFrom +Checks that all properties with owl:allValuesFrom restrictions have valid value types. +.It Cm anyUri +Checks that the value of any property with range xsd:anyURI is a URI. +.It Cm cardinalityEqual +Checks that any instance of a class with a owl:cardinality property restriction has exactly that many values of that property. +.It Cm cardinalityMax +Checks that any instance of a class with a owl:maxCardinality property restriction has no more than that many values of that property. +.It Cm cardinalityMin +Checks that any instance of a class with a owl:minCardinality property restriction has at least that many values of that property. +.It Cm classCycle +Checks that no class is a sub-class of itself, recursively. +This ensures that the graph is acyclic with respect to rdfs:subClassOf. +If this check fails, all further checks are aborted. +.It Cm classLabel +Checks that every rdfs:Class has an rdfs:label. +.It Cm datatypeCycle +Checks that no datatype is a sub-datatype of itself, recursively. +This ensures that the graph is acyclic with respect to owl:onDatatype. +If this check fails, all further checks are aborted. +.It Cm datatypeProperty +Checks that datatype properties have literal (not instance) values. +.It Cm datatypeType +Checks that every datatype is defined as a rdfs:Datatype. +.It Cm deprecatedClass +Checks that there are no instances of deprecated classes. +.It Cm deprecatedProperty +Checks that there are no uses of deprecated properties. +.It Cm explicitInstanceType +Checks that every instance explicitly has every type required of it. +This is a (often overly) strict check that assumes a closed world and requires every instance to explicitly have the type(s) required of it. +.It Cm functionalProperty +Checks that no instance has several values of a functional property. +.It Cm instanceLiteral +Checks that there are no instances where a literal is expected. +.It Cm instanceType +Checks that every instance with an explicit type matches that type. +This is a broad check that triggers other type-related checks, but mainly it will check that every instance of a class conforms to any restrictions on that class. +.It Cm inverseFunctionalProperty +Checks that at most one instance has a given value of an inverse functional property. +.It Cm literalInstance +Checks that there are no literals where an instance is expected. +.It Cm literalMaxExclusive +Checks that literal values are not greater than or equal to any applicable xsd:maxExclusive datatype restrictions. +.It Cm literalMaxInclusive +Checks that literal values are not greater than any applicable xsd:maxInclusive datatype restrictions. +.It Cm literalMinExclusive +Checks that literal values are not less than or equal to any applicable xsd:minExclusive datatype restrictions. +.It Cm literalMinInclusive +Checks that literal values are not less than any applicable xsd:minInclusive datatype restrictions. +.It Cm literalPattern +Checks that literals with xsd:pattern restrictions match the regular expression pattern for their datatype. +.It Cm literalRestriction +Checks that literals with supported restrictions conform to those restrictions. +This is a high-level check that triggers the more specific individual literal restriction checks. +.It Cm literalValue +Checks that literals with supported XSD datatypes are valid. +The set of supported types is the same as when writing canonical forms. +.It Cm objectProperty +Checks that object properties have instance (not literal) values. +.It Cm plainLiteralDatatype +Checks that there are no typed literals where a plain literal is expected. +A plain literal may have an optional language tag, but not a datatype. +.It Cm predicateType +Checks that every predicate is defined as an rdf:Property. +.It Cm propertyCycle +Checks that no property is a sub-property of itself, recursively. +This ensures that the graph is acyclic with respect to rdfs:subPropertyOf. +If this check fails, all further checks are aborted. +.It Cm propertyDomain +Checks that any instance with a property with an rdfs:domain is in that domain. +.It Cm propertyLabel +Checks that every rdf:Property has an rdfs:label. +.It Cm propertyRange +Checks that the value for any property with an rdfs:range is in that range. +.It Cm someValuesFrom +Checks that instances of classes with owl:someValuesFrom property restrictions have at least one matching property value. +.El +.Sh EXIT STATUS +.Nm +exits with a status of 0 on success, +17 if the data is invalid, +or another non-zero status if an error occured. +.Sh EXAMPLES +To run all checks on a file: +.Pp +.Dl $ serd-validate -W all input.ttl +.Pp +To run only the +.Li classLabel +check on a file: +.Pp +.Dl $ serd-validate -W classLabel input.ttl +.Pp +To run all checks +.Em except +.Li classLabel +on a file: +.Pp +.Dl $ serd-validate -W all -X classLabel input.ttl +.Sh SEE ALSO +.Bl -item -compact +.It +.Xr serd-pipe 1 +.It +.Xr serd-filter 1 +.It +.Xr serd-sort 1 +.It +.Lk http://drobilla.net/software/serd/ +.El +.Sh STANDARDS +.Bl -item -compact +.It +.Rs +.%A W3C +.%T OWL 2 Web Ontology Language +.%D December 2012 +.Re +.Lk https://www.w3.org/TR/owl2-syntax/ +.It +.Rs +.%A W3C +.%D February 2014 +.%T RDF Schema 1.1 +.Re +.Lk https://www.w3.org/TR/rdf-schema/ +.El +.Sh AUTHORS +.Nm +is a part of serd, by +.An David Robillard +.Mt d@drobilla.net . -- cgit v1.2.1