From f4365012b555699b916dbeec4d81425bf663579c Mon Sep 17 00:00:00 2001 From: David Robillard Date: Thu, 8 Mar 2012 15:57:20 +0000 Subject: Add serd_writer_get_env(). Add serd_node_new_uri_from_path() and serd_file_uri_parse() and implement proper URI to/from path hex escaping, etc. Add serd_uri_serialise_relative() for making URIs relative to a base where possible (by chopping a common prefix and adding dot segments). Make URIs serialised by the writer properly escape characters. git-svn-id: http://svn.drobilla.net/serd/trunk@330 490d8e77-9747-427b-9fa3-0b8f29cee8a0 --- ChangeLog | 6 ++ serd/serd.h | 48 ++++++++++- src/node.c | 62 ++++++++++++++ src/serd_internal.h | 7 ++ src/serdi.c | 3 +- src/uri.c | 235 +++++++++++++++++++++++++++++++++++++++------------- src/writer.c | 102 ++++++++++++++--------- tests/serd_test.c | 100 +++++++++++++++++++++- wscript | 2 +- 9 files changed, 464 insertions(+), 101 deletions(-) diff --git a/ChangeLog b/ChangeLog index b44938da..623a371f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -30,6 +30,12 @@ serd (UNRELEASED) unstable; urgency=low * Report read error if both "genid" and "docid" IDs are found in the same document, to prevent silent merging of distinct blank nodes. * Handle files and strings that start with a UTF-8 Byte Order Mark. + * Add serd_writer_get_env(). + * Add serd_node_new_uri_from_path() and serd_file_uri_parse() and implement + proper URI to/from path hex escaping, etc. + * Add serd_uri_serialise_relative() for making URIs relative to a base + where possible (by chopping a common prefix and adding dot segments). + * Make URIs serialised by the writer properly escape characters. -- David Robillard (UNRELEASED) diff --git a/serd/serd.h b/serd/serd.h index ba4e6fb5..3a2f9e3b 100644 --- a/serd/serd.h +++ b/serd/serd.h @@ -244,7 +244,7 @@ typedef enum { SERD_STYLE_ASCII = 1 << 1, /**< Escape all non-ASCII characters. */ SERD_STYLE_RESOLVED = 1 << 2, /**< Resolve URIs against base URI. */ SERD_STYLE_CURIED = 1 << 3, /**< Shorten URIs into CURIEs. */ - SERD_STYLE_BULK = 1 << 4, /**< Write output in pages. */ + SERD_STYLE_BULK = 1 << 4 /**< Write output in pages. */ } SerdStyle; /** @@ -305,11 +305,28 @@ static const SerdURI SERD_URI_NULL = {{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}}; /** Return the local path for @c uri, or NULL if @c uri is not a file URI. + Note this (inappropriately named) function only removes the file scheme if + necessary, and returns @c uri unmodified if it is an absolute path. Percent + encoding and other issues are not handled, to properly convert a file URI to + a path, use serd_file_uri_parse(). */ SERD_API const uint8_t* serd_uri_to_path(const uint8_t* uri); +/** + Get the unescaped path and hostname from a file URI. + @param uri A file URI. + @param hostname If non-NULL, set to the hostname, if present. + @return The path component of the URI. + + Both the returned path and @c hostname (if applicable) are owned by the + caller and must be freed with free(). +*/ +SERD_API +uint8_t* +serd_file_uri_parse(const uint8_t* uri, uint8_t** hostname); + /** Return true iff @c utf8 starts with a valid URI scheme. */ @@ -343,6 +360,16 @@ SERD_API size_t serd_uri_serialise(const SerdURI* uri, SerdSink sink, void* stream); +/** + Serialise @c uri relative to @c base with a series of calls to @c sink. +*/ +SERD_API +size_t +serd_uri_serialise_relative(const SerdURI* uri, + const SerdURI* base, + SerdSink sink, + void* stream); + /** @} @name Node @@ -394,6 +421,17 @@ serd_node_new_uri_from_string(const uint8_t* str, const SerdURI* base, SerdURI* out); +/** + Create a new file URI node from a file system path. + If @c path is relative, @c hostname is ignored. + If @c out is not NULL, it will be set to the parsed URI. +*/ +SERD_API +SerdNode +serd_node_new_uri_from_path(const uint8_t* path, + const uint8_t* hostname, + SerdURI* out); + /** Create a new node by serialising @c uri into a new string. @@ -439,7 +477,6 @@ serd_node_new_integer(int64_t i); This function can be used to make a serialisable node out of arbitrary binary data, which can be decoded using serd_base64_decode(). - @param buf Raw binary input data. @param size Size of @c buf. @param wrap_lines Wrap lines at 76 characters to conform to RFC 2045. @@ -692,6 +729,13 @@ SERD_API void serd_writer_free(SerdWriter* writer); +/** + Return the env used by @c writer. +*/ +SERD_API +SerdEnv* +serd_writer_get_env(SerdWriter* writer); + /** A convenience sink function for writing to a FILE*. diff --git a/src/node.c b/src/node.c index 4d8c620f..dc352389 100644 --- a/src/node.c +++ b/src/node.c @@ -112,6 +112,68 @@ serd_node_new_uri_from_string(const uint8_t* str, return serd_node_new_uri(&uri, base, out); // Resolve/Serialise } +static inline bool +is_uri_path_char(const uint8_t c) +{ + if (is_alpha(c) || is_digit(c)) { + return true; + } + switch (c) { + case '-': case '.': case '_': case '~': // unreserved + case ':': case '@': // pchar + case '/': // separator + // sub-delims + case '!': case '$': case '&': case '\'': case '(': case ')': + case '*': case '+': case ',': case ';': case '=': + return true; + default: + return false; + } +} + +SERD_API +SerdNode +serd_node_new_uri_from_path(const uint8_t* path, + const uint8_t* hostname, + SerdURI* out) +{ + const size_t path_len = strlen((const char*)path); + const size_t hostname_len = hostname ? strlen((const char*)hostname) : 0; + const bool evil = is_windows_path(path); + size_t uri_len = 0; + uint8_t* uri = NULL; + + if (path[0] == '/' || is_windows_path(path)) { + uri_len = strlen("file://") + hostname_len + evil; + uri = (uint8_t*)malloc(uri_len + 1); + snprintf((char*)uri, uri_len + 1, "file://%s%s", + hostname ? (const char*)hostname : "", + evil ? "/" : ""); + } + + SerdChunk chunk = { uri, uri_len }; + for (size_t i = 0; i < path_len; ++i) { + if (evil && path[i] == '\\') { + serd_chunk_sink("/", 1, &chunk); + } else if (path[i] == '%') { + serd_chunk_sink("%%", 2, &chunk); + } else if (is_uri_path_char(path[i])) { + serd_chunk_sink(path + i, 1, &chunk); + } else { + char escape[4] = { '%', 0, 0, 0 }; + snprintf(escape + 1, sizeof(escape) - 1, "%X", path[i]); + serd_chunk_sink(escape, 3, &chunk); + } + } + serd_chunk_sink_finish(&chunk); + + if (out) { + serd_uri_parse(chunk.buf, out); + } + + return serd_node_from_string(SERD_URI, chunk.buf); +} + SERD_API SerdNode serd_node_new_uri(const SerdURI* uri, const SerdURI* base, SerdURI* out) diff --git a/src/serd_internal.h b/src/serd_internal.h index 6e535402..f0137f28 100644 --- a/src/serd_internal.h +++ b/src/serd_internal.h @@ -233,4 +233,11 @@ is_base64(const uint8_t c) return is_alpha(c) || is_digit(c) || c == '+' || c == '/' || c == '='; } +static inline bool +is_windows_path(const uint8_t* path) +{ + return is_alpha(path[0]) && (path[1] == ':' || path[1] == '|') + && (path[2] == '/' || path[2] == '\\'); +} + #endif // SERD_INTERNAL_H diff --git a/src/serdi.c b/src/serdi.c index 661b60e0..ff1f8d51 100644 --- a/src/serdi.c +++ b/src/serdi.c @@ -178,7 +178,8 @@ main(int argc, char** argv) } } - if (input_syntax != SERD_NTRIPLES) { // Base URI may change (@base) + if (input_syntax != SERD_NTRIPLES // Base URI may change (@base) + || (output_syntax == SERD_TURTLE)) { output_style |= SERD_STYLE_RESOLVED; } diff --git a/src/uri.c b/src/uri.c index df36564f..b67116f9 100644 --- a/src/uri.c +++ b/src/uri.c @@ -21,21 +21,12 @@ // #define URI_DEBUG 1 -static inline bool -is_windows_path(const uint8_t* path) -{ - return is_alpha(path[0]) && (path[1] == ':' || path[1] == '|') - && (path[2] == '/' || path[2] == '\\'); -} - SERD_API const uint8_t* serd_uri_to_path(const uint8_t* uri) { const uint8_t* path = uri; - if (uri[0] == '/' || is_windows_path(uri)) { - return uri; - } else if (serd_uri_string_has_scheme(uri)) { + if (!is_windows_path(uri) && serd_uri_string_has_scheme(uri)) { if (strncmp((const char*)uri, "file:", 5)) { fprintf(stderr, "Non-file URI `%s'\n", uri); return NULL; @@ -54,6 +45,56 @@ serd_uri_to_path(const uint8_t* uri) return path; } +SERD_API +uint8_t* +serd_file_uri_parse(const uint8_t* uri, uint8_t** hostname) +{ + const uint8_t* path = uri; + if (hostname) { + *hostname = NULL; + } + if (!strncmp((const char*)uri, "file://", 7)) { + const uint8_t* auth = uri + 7; + if (*auth == '/') { // No hostname + path = auth; + } else { // Has hostname + if (!(path = (const uint8_t*)strchr((const char*)auth, '/'))) { + return NULL; + } + if (hostname) { + *hostname = (uint8_t*)calloc(1, path - auth + 1); + memcpy(*hostname, auth, path - auth); + } + } + } + + if (is_windows_path(path + 1)) { + ++path; + } + + SerdChunk chunk = { NULL, 0 }; + for (const uint8_t* s = path; *s; ++s) { + if (*s == '%') { + if (*(s + 1) == '%') { + serd_chunk_sink("%", 1, &chunk); + ++s; + } else if (is_digit(*(s + 1)) && is_digit(*(s + 2))) { + const uint8_t code[3] = { *(s + 1), *(s + 2), 0 }; + uint32_t num; + sscanf((const char*)code, "%X", &num); + const uint8_t c = num; + serd_chunk_sink(&c, 1, &chunk); + s += 2; + } else { + s += 2; // Junk escape, ignore + } + } else { + serd_chunk_sink(s, 1, &chunk); + } + } + return serd_chunk_sink_finish(&chunk); +} + SERD_API bool serd_uri_string_has_scheme(const uint8_t* utf8) @@ -276,12 +317,11 @@ remove_dot_segments(const uint8_t* path, size_t len, size_t* up) return begin; } +/// See http://tools.ietf.org/html/rfc3986#section-5.2.2 SERD_API void serd_uri_resolve(const SerdURI* r, const SerdURI* base, SerdURI* t) { - // See http://tools.ietf.org/html/rfc3986#section-5.2.2 - t->path_base.buf = NULL; t->path_base.len = 0; if (r->scheme.len) { @@ -323,66 +363,147 @@ serd_uri_resolve(const SerdURI* r, const SerdURI* base, SerdURI* t) #endif } -SERD_API -size_t -serd_uri_serialise(const SerdURI* uri, SerdSink sink, void* stream) +/** Write a relative path relative to a base path. */ +static size_t +write_rel_path(SerdSink sink, + void* stream, + const SerdChunk* base, + const SerdChunk* path) { - // See http://tools.ietf.org/html/rfc3986#section-5.3 - - size_t write_size = 0; -#define WRITE(buf, len) \ - write_size += len; \ - sink((const uint8_t*)buf, len, stream); + size_t up; + size_t len = 0; + const uint8_t* begin = remove_dot_segments(path->buf, path->len, &up); + const uint8_t* end = path->buf + path->len; + + if (base && base->buf) { + // Find the up'th last slash + const uint8_t* base_last = (base->buf + base->len - 1); + ++up; + do { + if (*base_last == '/') { + --up; + } + } while (up > 0 && (--base_last > base->buf)); - if (uri->scheme.buf) { - WRITE(uri->scheme.buf, uri->scheme.len); - WRITE(":", 1); + // Write base URI prefix + if (*base_last == '/') { + const size_t base_len = base_last - base->buf + 1; + len += sink(base->buf, base_len, stream); + } } - if (uri->authority.buf) { - WRITE("//", 2); - WRITE(uri->authority.buf, uri->authority.len); + + // Write URI suffix + len += sink(begin, end - begin, stream); + + return len; +} + +/** Write an absolute path relative to a base path. */ +static size_t +write_abs_path(SerdSink sink, + void* stream, + const SerdChunk* base, + const SerdChunk* path) +{ + size_t len = 0; + const size_t min_len = (path->len < base->len) ? path->len : base->len; + + // Find the last separator common to both paths + size_t last_shared_sep = 0; + size_t i = 0; + for (; i < min_len && path->buf[i] == base->buf[i]; ++i) { + if (path->buf[i] == '/') { + last_shared_sep = i; + } } - if (!uri->path.buf) { - WRITE(uri->path_base.buf, uri->path_base.len); - } else { - const uint8_t* begin = uri->path.buf; - const uint8_t* const end = uri->path.buf + uri->path.len; - size_t up; - begin = remove_dot_segments(uri->path.buf, uri->path.len, &up); + if (i == path->len && i == base->len) { // Paths are identical + return 0; + } else if (last_shared_sep == 0) { // No common components + return sink(path->buf, path->len, stream); + } - if (uri->path_base.buf) { - // Find the up'th last slash - const uint8_t* base_last = (uri->path_base.buf - + uri->path_base.len - 1); + // Find the number of up references ("..") required + size_t up = 0; + for (size_t i = last_shared_sep + 1; i < base->len; ++i) { + if (base->buf[i] == '/') { ++up; - do { - if (*base_last == '/') { - --up; - } - } while (up > 0 && (--base_last > uri->path_base.buf)); + } + } - // Write base URI prefix - if (*base_last == '/') { - const size_t base_len = base_last - uri->path_base.buf + 1; - WRITE(uri->path_base.buf, base_len); - } + // Write up references + for (size_t i = 0; i < up; ++i) { + len += sink("../", 3, stream); + } + + // Write suffix + const size_t suffix_len = path->len - last_shared_sep - 1; + len += sink(path->buf + last_shared_sep + 1, suffix_len, stream); + + return len; +} + +static inline bool +chunk_equals(const SerdChunk* a, const SerdChunk* b) +{ + return a->len == b->len + && !strncmp((const char*)a->buf, (const char*)b->buf, a->len); +} +/** Return true iff both are absolute URIs on the same host. */ +static inline bool +same_host(const SerdURI* base, const SerdURI* uri) +{ + return base && uri && base->scheme.len + && chunk_equals(&base->scheme, &uri->scheme) + && chunk_equals(&base->authority, &uri->authority); +} + +/// See http://tools.ietf.org/html/rfc3986#section-5.3 +SERD_API +size_t +serd_uri_serialise_relative(const SerdURI* uri, + const SerdURI* base, + SerdSink sink, + void* stream) +{ + size_t len = 0; + const bool relative = same_host(base, uri); + if (relative) { + len = write_abs_path(sink, stream, base ? &base->path : 0, &uri->path); + } + if (!relative || (!len && base->query.buf)) { + if (uri->scheme.buf) { + len += sink(uri->scheme.buf, uri->scheme.len, stream); + len += sink(":", 1, stream); + } + if (uri->authority.buf) { + len += sink("//", 2, stream); + len += sink(uri->authority.buf, uri->authority.len, stream); + } + if (uri->path.buf && uri->path_base.buf) { + len += write_rel_path(sink, stream, &uri->path_base, &uri->path); + } else if (uri->path.buf) { + len += write_rel_path(sink, stream, NULL, &uri->path); } else { - // Relative path is just query or fragment, append to base URI - WRITE(uri->path_base.buf, uri->path_base.len); + len += sink(uri->path_base.buf, uri->path_base.len, stream); } - - // Write URI suffix - WRITE(begin, end - begin); } if (uri->query.buf) { - WRITE("?", 1); - WRITE(uri->query.buf, uri->query.len); + len += sink("?", 1, stream); + len += sink(uri->query.buf, uri->query.len, stream); } if (uri->fragment.buf) { // Note uri->fragment.buf includes the leading `#' - WRITE(uri->fragment.buf, uri->fragment.len); + len += sink(uri->fragment.buf, uri->fragment.len, stream); } - return write_size; + return len; +} + +/// See http://tools.ietf.org/html/rfc3986#section-5.3 +SERD_API +size_t +serd_uri_serialise(const SerdURI* uri, SerdSink sink, void* stream) +{ + return serd_uri_serialise_relative(uri, NULL, sink, stream); } diff --git a/src/writer.c b/src/writer.c index eb9c5b2d..849d4e7c 100644 --- a/src/writer.c +++ b/src/writer.c @@ -131,23 +131,24 @@ sink(const void* buf, size_t len, SerdWriter* writer) } } -static bool +static size_t write_text(SerdWriter* writer, TextContext ctx, - const uint8_t* utf8, size_t n_bytes, uint8_t terminator) + const uint8_t* utf8, size_t n_bytes) { - char escape[11] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + size_t len = 0; + char escape[11] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; for (size_t i = 0; i < n_bytes;) { // Fast bulk write for long strings of printable ASCII size_t j = i; for (; j < n_bytes; ++j) { - if (utf8[j] == terminator || utf8[j] == '\\' || utf8[j] == '"' + if (utf8[j] == '>' || utf8[j] == '\\' || utf8[j] == '"' || (!in_range(utf8[j], 0x20, 0x7E))) { break; } } if (j > i) { - sink(&utf8[i], j - i, writer); + len += sink(&utf8[i], j - i, writer); i = j; continue; } @@ -155,27 +156,29 @@ write_text(SerdWriter* writer, TextContext ctx, uint8_t in = utf8[i++]; if (ctx == WRITE_LONG_STRING) { if (in == '\\') { - sink("\\\\", 2, writer); continue; + len += sink("\\\\", 2, writer); continue; } else if (in == '\"' && i == n_bytes) { - sink("\\\"", 2, writer); continue; // '"' at end of string + len += sink("\\\"", 2, writer); continue; // '"' at string end } } else { switch (in) { - case '\\': sink("\\\\", 2, writer); continue; - case '\n': sink("\\n", 2, writer); continue; - case '\r': sink("\\r", 2, writer); continue; - case '\t': sink("\\t", 2, writer); continue; + case '\\': len += sink("\\\\", 2, writer); continue; + case '\n': len += sink("\\n", 2, writer); continue; + case '\r': len += sink("\\r", 2, writer); continue; + case '\t': len += sink("\\t", 2, writer); continue; case '"': - if (terminator == '"') { - sink("\\\"", 2, writer); + if (ctx == WRITE_STRING) { + len += sink("\\\"", 2, writer); continue; } // else fall-through default: break; } - if (in == terminator) { - snprintf(escape, sizeof(escape), "\\u%04X", terminator); - sink(escape, 6, writer); + if ((ctx == WRITE_STRING && in == '"') || + (ctx == WRITE_URI && in == '>')) { + snprintf(escape, sizeof(escape), "\\u%04X", + ctx == WRITE_STRING ? '"' : '>'); + len += sink(escape, 6, writer); continue; } } @@ -186,10 +189,10 @@ write_text(SerdWriter* writer, TextContext ctx, c = in & 0x7F; if (in_range(c, 0x20, 0x7E) || (is_space(c) && ctx == WRITE_LONG_STRING)) { - sink(&in, 1, writer); // Print ASCII character + len += sink(&in, 1, writer); // Print ASCII character } else { snprintf(escape, sizeof(escape), "\\u%04X", c); - sink(escape, 6, writer); // Escape ASCII control character + len += sink(escape, 6, writer); // ASCII control character } continue; } else if ((in & 0xE0) == 0xC0) { // Starts with `110' @@ -204,14 +207,14 @@ write_text(SerdWriter* writer, TextContext ctx, } else { fprintf(stderr, "Invalid UTF-8: %X\n", in); const uint8_t replacement_char[] = { 0xEF, 0xBF, 0xBD }; - sink(replacement_char, sizeof(replacement_char), writer); - return false; + len += sink(replacement_char, sizeof(replacement_char), writer); + return 0; } if (ctx != WRITE_URI && !(writer->style & SERD_STYLE_ASCII)) { // Write UTF-8 character directly to UTF-8 output // TODO: Always parse and validate character? - sink(utf8 + i - 1, size, writer); + len += sink(utf8 + i - 1, size, writer); i += size - 1; continue; } @@ -228,13 +231,19 @@ write_text(SerdWriter* writer, TextContext ctx, if (c < 0xFFFF) { snprintf(escape, sizeof(escape), "\\u%04X", c); - sink(escape, 6, writer); + len += sink(escape, 6, writer); } else { snprintf(escape, sizeof(escape), "\\U%08X", c); - sink(escape, 10, writer); + len += sink(escape, 10, writer); } } - return true; + return len; +} + +static size_t +uri_sink(const void* buf, size_t len, void* stream) +{ + return write_text((SerdWriter*)stream, WRITE_URI, buf, len); } static void @@ -299,6 +308,7 @@ write_node(SerdWriter* writer, { SerdChunk uri_prefix; SerdChunk uri_suffix; + bool has_scheme; switch (node->type) { case SERD_BLANK: if (writer->syntax != SERD_NTRIPLES @@ -343,8 +353,8 @@ write_node(SerdWriter* writer, return false; } sink("<", 1, writer); - write_text(writer, WRITE_URI, uri_prefix.buf, uri_prefix.len, '>'); - write_text(writer, WRITE_URI, uri_suffix.buf, uri_suffix.len, '>'); + write_text(writer, WRITE_URI, uri_prefix.buf, uri_prefix.len); + write_text(writer, WRITE_URI, uri_suffix.buf, uri_suffix.len); sink(">", 1, writer); break; case SERD_TURTLE: @@ -365,12 +375,11 @@ write_node(SerdWriter* writer, if (writer->syntax != SERD_NTRIPLES && (node->flags & (SERD_HAS_NEWLINE|SERD_HAS_QUOTE))) { sink("\"\"\"", 3, writer); - write_text(writer, WRITE_LONG_STRING, - node->buf, node->n_bytes, '\0'); + write_text(writer, WRITE_LONG_STRING, node->buf, node->n_bytes); sink("\"\"\"", 3, writer); } else { sink("\"", 1, writer); - write_text(writer, WRITE_STRING, node->buf, node->n_bytes, '"'); + write_text(writer, WRITE_STRING, node->buf, node->n_bytes); sink("\"", 1, writer); } if (lang && lang->buf) { @@ -382,6 +391,7 @@ write_node(SerdWriter* writer, } break; case SERD_URI: + has_scheme = serd_uri_string_has_scheme(node->buf); if ((writer->syntax == SERD_TURTLE) && !strcmp((const char*)node->buf, NS_RDF "type")) { sink("a", 1, writer); @@ -390,29 +400,38 @@ write_node(SerdWriter* writer, && !strcmp((const char*)node->buf, NS_RDF "nil")) { sink("()", 2, writer); break; - } else if ((writer->style & SERD_STYLE_CURIED) - && serd_uri_string_has_scheme(node->buf)) { + } else if (has_scheme && (writer->style & SERD_STYLE_CURIED)) { SerdNode prefix; SerdChunk suffix; if (serd_env_qualify(writer->env, node, &prefix, &suffix)) { - write_text(writer, WRITE_URI, prefix.buf, prefix.n_bytes, '>'); + write_text(writer, WRITE_URI, prefix.buf, prefix.n_bytes); sink(":", 1, writer); - write_text(writer, WRITE_URI, suffix.buf, suffix.len, '>'); + write_text(writer, WRITE_URI, suffix.buf, suffix.len); break; } - } else if ((writer->style & SERD_STYLE_RESOLVED) - && !serd_uri_string_has_scheme(node->buf)) { + } + if (!has_scheme && (writer->style & SERD_STYLE_RESOLVED)) { SerdURI uri; serd_uri_parse(node->buf, &uri); SerdURI abs_uri; serd_uri_resolve(&uri, &writer->base_uri, &abs_uri); sink("<", 1, writer); - serd_uri_serialise(&abs_uri, (SerdSink)sink, writer); + serd_uri_serialise(&abs_uri, uri_sink, writer); + sink(">", 1, writer); + break; + } else if (has_scheme && (writer->syntax == SERD_TURTLE) + && (writer->style & SERD_STYLE_RESOLVED)) { + SerdURI uri; + serd_uri_parse(node->buf, &uri); + sink("<", 1, writer); + serd_uri_serialise_relative( + &uri, &writer->base_uri, uri_sink, writer); sink(">", 1, writer); break; } + sink("<", 1, writer); - write_text(writer, WRITE_URI, node->buf, node->n_bytes, '>'); + write_text(writer, WRITE_URI, node->buf, node->n_bytes); sink(">", 1, writer); default: break; @@ -687,7 +706,7 @@ serd_writer_set_prefix(SerdWriter* writer, sink("@prefix ", 8, writer); sink(name->buf, name->n_bytes, writer); sink(": <", 3, writer); - write_text(writer, WRITE_URI, uri->buf, uri->n_bytes, '>'); + write_text(writer, WRITE_URI, uri->buf, uri->n_bytes); sink("> .\n", 4, writer); } return reset_context(writer, false); @@ -708,6 +727,13 @@ serd_writer_free(SerdWriter* writer) free(writer); } +SERD_API +SerdEnv* +serd_writer_get_env(SerdWriter* writer) +{ + return writer->env; +} + SERD_API size_t serd_file_sink(const void* buf, size_t len, void* stream) diff --git a/tests/serd_test.c b/tests/serd_test.c index 1474c37d..000415de 100644 --- a/tests/serd_test.c +++ b/tests/serd_test.c @@ -96,8 +96,7 @@ main() }; for (unsigned i = 0; i < sizeof(expt_test_nums) / sizeof(double); ++i) { - char* endptr; - const double num = serd_strtod(expt_test_strs[i], &endptr); + const double num = serd_strtod(expt_test_strs[i], NULL); const double delta = fabs(num - expt_test_nums[i]); if (delta > DBL_EPSILON) { return failure("Parsed `%s' %lf != %lf (delta %lf)\n", @@ -253,6 +252,99 @@ main() return failure("Bad path %s for %s\n", serd_uri_to_path(uri), uri); } + // Test serd_node_new_uri_from_path and serd_file_uri_parse + SerdURI furi; + const uint8_t* path_str = USTR("C:/My 100%"); + SerdNode file_node = serd_node_new_uri_from_path(path_str, 0, &furi); + uint8_t* hostname = NULL; + uint8_t* out_path = serd_file_uri_parse(file_node.buf, &hostname); + if (strcmp((const char*)file_node.buf, "file:///C:/My%20100%%")) { + return failure("Bad URI %s\n", file_node.buf); + } else if (hostname) { + return failure("hostname `%s' shouldn't exist\n", hostname); + } else if (strcmp((const char*)path_str, (const char*)out_path)) { + return failure("path=>URI=>path failure %s => %s => %s\n", + path_str, file_node.buf, out_path); + } + free(out_path); + serd_node_free(&file_node); + + path_str = USTR("C:\\Pointless Space"); + file_node = serd_node_new_uri_from_path(path_str, USTR("pwned"), 0); + hostname = NULL; + out_path = serd_file_uri_parse(file_node.buf, &hostname); + if (strcmp((const char*)file_node.buf, "file://pwned/C:/Pointless%20Space")) { + return failure("Bad URI %s\n", file_node.buf); + } else if (!hostname || strcmp((const char*)hostname, "pwned")) { + return failure("Bad hostname `%s'\n", hostname); + } else if (strcmp((const char*)out_path, "C:/Pointless Space")) { + return failure("path=>URI=>path failure %s => %s => %s\n", + path_str, file_node.buf, out_path); + } + free(hostname); + free(out_path); + serd_node_free(&file_node); + + path_str = USTR("/foo/bar"); + file_node = serd_node_new_uri_from_path(path_str, 0, 0); + hostname = NULL; + out_path = serd_file_uri_parse(file_node.buf, &hostname); + if (strcmp((const char*)file_node.buf, "file:///foo/bar")) { + return failure("Bad URI %s\n", file_node.buf); + } else if (hostname) { + return failure("hostname `%s' shouldn't exist\n", hostname); + } else if (strcmp((const char*)path_str, (const char*)out_path)) { + return failure("path=>URI=>path failure %s => %s => %s\n", + path_str, file_node.buf, out_path); + } + free(out_path); + serd_node_free(&file_node); + + path_str = USTR("/foo/bar"); + file_node = serd_node_new_uri_from_path(path_str, USTR("localhost"), 0); + out_path = serd_file_uri_parse(file_node.buf, &hostname); + if (strcmp((const char*)file_node.buf, "file://localhost/foo/bar")) { + return failure("Bad URI %s\n", file_node.buf); + } else if (strcmp((const char*)hostname, "localhost")) { + return failure("incorrect hostname `%s'\n", hostname); + } else if (strcmp((const char*)path_str, (const char*)out_path)) { + return failure("path=>URI=>path failure %s => %s => %s\n", + path_str, file_node.buf, out_path); + } + free(hostname); + free(out_path); + serd_node_free(&file_node); + + path_str = USTR("a/relative path"); + file_node = serd_node_new_uri_from_path(path_str, 0, 0); + out_path = serd_file_uri_parse(file_node.buf, &hostname); + if (strcmp((const char*)file_node.buf, "a/relative%20path")) { + return failure("Bad URI %s\n", file_node.buf); + } else if (hostname) { + return failure("hostname `%s' shouldn't exist\n", hostname); + } else if (strcmp((const char*)path_str, (const char*)out_path)) { + return failure("path=>URI=>path failure %s => %s => %s\n", + path_str, file_node.buf, out_path); + } + free(hostname); + free(out_path); + serd_node_free(&file_node); + + if (serd_file_uri_parse(USTR("file://invalid"), NULL)) { + return failure("successfully parsed bogus URI \n"); + } + + out_path = serd_file_uri_parse(USTR("file://host/foo/%XYbar"), NULL); + if (strcmp((const char*)out_path, "/foo/bar")) { + return failure("bad tolerance of junk escape: `%s'\n", out_path); + } + free(out_path); + out_path = serd_file_uri_parse(USTR("file://host/foo/%0Abar"), NULL); + if (strcmp((const char*)out_path, "/foo/bar")) { + return failure("bad tolerance of junk escape: `%s'\n", out_path); + } + free(out_path); + // Test serd_node_equals const uint8_t replacement_char_str[] = { 0xEF, 0xBF, 0xBD, 0 }; @@ -386,6 +478,10 @@ main() return failure("Ended non-existent anonymous node\n"); } + if (serd_writer_get_env(writer) != env) { + return failure("Writer has incorrect env\n"); + } + uint8_t buf[] = { 0x80, 0, 0, 0, 0 }; SerdNode s = serd_node_from_string(SERD_URI, USTR("")); SerdNode p = serd_node_from_string(SERD_URI, USTR("http://example.org/pred")); diff --git a/wscript b/wscript index c4c65a37..c0cd413b 100644 --- a/wscript +++ b/wscript @@ -9,7 +9,7 @@ from waflib.extras import autowaf as autowaf import waflib.Logs as Logs, waflib.Options as Options # Version of this package (even if built as a child) -SERD_VERSION = '0.11.0' +SERD_VERSION = '0.12.0' SERD_MAJOR_VERSION = '0' # Library version (UNIX style major, minor, micro) -- cgit v1.2.1