diff options
author | David Robillard <d@drobilla.net> | 2023-02-05 18:39:49 -0500 |
---|---|---|
committer | David Robillard <d@drobilla.net> | 2023-12-02 18:49:08 -0500 |
commit | 343124df71010055c2c1e6cdcadd13d23b2c013a (patch) | |
tree | 7c2de6a72021adaac89e9c4fa97e7cc5503e0657 /include | |
parent | 530edb265fbbed20e6d3a6fd7a36461ff83d9b46 (diff) | |
download | serd-343124df71010055c2c1e6cdcadd13d23b2c013a.tar.gz serd-343124df71010055c2c1e6cdcadd13d23b2c013a.tar.bz2 serd-343124df71010055c2c1e6cdcadd13d23b2c013a.zip |
[WIP] Add support for URI hex escape decoding
Diffstat (limited to 'include')
-rw-r--r-- | include/serd/reader.h | 9 | ||||
-rw-r--r-- | include/serd/writer.h | 16 |
2 files changed, 25 insertions, 0 deletions
diff --git a/include/serd/reader.h b/include/serd/reader.h index 78b51d00..57c8b2c3 100644 --- a/include/serd/reader.h +++ b/include/serd/reader.h @@ -101,6 +101,15 @@ typedef enum { from documents when the statements are sorted, such as in a model. */ SERD_READ_ORDERED = 1U << 5U, + + /** + Read URIs with percent-encoded UTF-8 characters decoded. + + Normally, percent-encoded octets in URIs are treated as plain text and + preserved. This flags enables UTF-8 decoding of URIs, so octet escapes + like "%7E" in URIs will be decoded to UTF-8 characters like "~". + */ + SERD_READ_DECODED = 1U << 6U, } SerdReaderFlag; /// Bitwise OR of SerdReaderFlag values diff --git a/include/serd/writer.h b/include/serd/writer.h index bf54c46c..e3915a86 100644 --- a/include/serd/writer.h +++ b/include/serd/writer.h @@ -42,6 +42,10 @@ typedef enum { Although all the supported syntaxes are UTF-8 by definition, this can be used to escape all non-ASCII characters so that data will survive transmission through ASCII-only channels. + + Non-printable-ASCII characters will be written as "U" escapes like + "\u007F" in string literals, and as hex-encoded UTF-8 bytes like "%07F" in + URIs. */ SERD_WRITE_ASCII = 1U << 0U, @@ -96,6 +100,18 @@ typedef enum { implicit context, so it will only be readable in a suitable enviromnent. */ SERD_WRITE_CONTEXTUAL = 1U << 6U, + + /** + Escape additional characters in RDF Test Cases format. + + This writes "extended" characters as printable ASCII, using "U" escapes in + URIs instead of hex-encoding (escapes like "\u007F" instead of "%7F"). + This is the format used by the outputs in the Turtle test suite (which + predates RDF 1.1 NTriples). This style makes NTriples output + non-canonical, so it generally shouldn't be used except for compatibility + purposes. See <https://www.w3.org/TR/rdf-testcases/>. + */ + SERD_WRITE_ESCAPES = 1U << 7U, } SerdWriterFlag; /// Bitwise OR of #SerdWriterFlag values |