aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/node.c2
-rw-r--r--src/serd_internal.h51
-rw-r--r--src/serdi.c41
-rw-r--r--src/uri.c150
-rw-r--r--src/writer.c59
5 files changed, 188 insertions, 115 deletions
diff --git a/src/node.c b/src/node.c
index fce2b4cb..575b2ff0 100644
--- a/src/node.c
+++ b/src/node.c
@@ -41,7 +41,7 @@ SERD_API
SerdNode
serd_node_copy(const SerdNode* node)
{
- if (!node) {
+ if (!node || !node->buf) {
return SERD_NODE_NULL;
}
diff --git a/src/serd_internal.h b/src/serd_internal.h
index f0137f28..08d68fd5 100644
--- a/src/serd_internal.h
+++ b/src/serd_internal.h
@@ -240,4 +240,55 @@ is_windows_path(const uint8_t* path)
&& (path[2] == '/' || path[2] == '\\');
}
+/* URI utilities */
+
+static inline bool
+chunk_equals(const SerdChunk* a, const SerdChunk* b)
+{
+ return a->len == b->len
+ && !strncmp((const char*)a->buf, (const char*)b->buf, a->len);
+}
+
+static inline size_t
+uri_path_len(const SerdURI* uri)
+{
+ return uri->path_base.len + uri->path.len;
+}
+
+static inline uint8_t
+uri_path_at(const SerdURI* uri, size_t i)
+{
+ if (i < uri->path_base.len) {
+ return uri->path_base.buf[i];
+ } else {
+ return uri->path.buf[i - uri->path_base.len];
+ }
+}
+
+/** Return true iff @p uri is within the base of @p root */
+static inline bool
+uri_is_under(const SerdURI* uri, const SerdURI* root)
+{
+ if (!root || !uri || !root->scheme.len ||
+ !chunk_equals(&root->scheme, &uri->scheme) ||
+ !chunk_equals(&root->authority, &uri->authority)) {
+ return false;
+ }
+
+ bool differ = false;
+ const size_t path_len = uri_path_len(uri);
+ const size_t root_len = uri_path_len(root);
+ for (size_t i = 0; i < path_len && i < root_len; ++i) {
+ if (uri_path_at(uri, i) != uri_path_at(root, i)) {
+ differ = true;
+ }
+ if (differ && uri_path_at(root, i) == '/') {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+
#endif // SERD_INTERNAL_H
diff --git a/src/serdi.c b/src/serdi.c
index ff1f8d51..0236156e 100644
--- a/src/serdi.c
+++ b/src/serdi.c
@@ -45,14 +45,15 @@ print_usage(const char* name, bool error)
fprintf(os, "Read and write RDF syntax.\n");
fprintf(os, "Use - for INPUT to read from standard input.\n\n");
fprintf(os, " -b Fast bulk output for large serialisations.\n");
- fprintf(os, " -c PREFIX Chop PREFIX from matching blank node IDs\n");
+ fprintf(os, " -c PREFIX Chop PREFIX from matching blank node IDs.\n");
fprintf(os, " -f Keep full URIs in input (don't qualify).\n");
- fprintf(os, " -h Display this help and exit\n");
- fprintf(os, " -i SYNTAX Input syntax (`turtle' or `ntriples')\n");
- fprintf(os, " -o SYNTAX Output syntax (`turtle' or `ntriples')\n");
- fprintf(os, " -p PREFIX Add PREFIX to blank node IDs\n");
- fprintf(os, " -s INPUT Parse INPUT as string (terminates options)\n");
- fprintf(os, " -v Display version information and exit\n");
+ fprintf(os, " -h Display this help and exit.\n");
+ fprintf(os, " -i SYNTAX Input syntax (`turtle' or `ntriples').\n");
+ fprintf(os, " -o SYNTAX Output syntax (`turtle' or `ntriples').\n");
+ fprintf(os, " -p PREFIX Add PREFIX to blank node IDs.\n");
+ fprintf(os, " -r ROOT_URI Keep relative URIs within ROOT_URI.\n");
+ fprintf(os, " -s INPUT Parse INPUT as string (terminates options).\n");
+ fprintf(os, " -v Display version information and exit.\n");
return error ? 1 : 0;
}
@@ -93,6 +94,7 @@ main(int argc, char** argv)
const uint8_t* in_name = NULL;
const uint8_t* add_prefix = NULL;
const uint8_t* chop_prefix = NULL;
+ const uint8_t* root_uri = NULL;
int a = 1;
for (; a < argc && argv[a][0] == '-'; ++a) {
if (argv[a][1] == '\0') {
@@ -130,6 +132,11 @@ main(int argc, char** argv)
return bad_arg(argv[0], 'c');
}
chop_prefix = (const uint8_t*)argv[a];
+ } else if (argv[a][1] == 'r') {
+ if (++a == argc) {
+ return bad_arg(argv[0], 'r');
+ }
+ root_uri = (const uint8_t*)argv[a];
} else {
fprintf(stderr, "%s: Unknown option `%s'\n", argv[0], argv[a]);
return print_usage(argv[0], true);
@@ -152,21 +159,17 @@ main(int argc, char** argv)
}
}
- const uint8_t* base_uri_str = NULL;
+ SerdURI base_uri = SERD_URI_NULL;
+ SerdNode base = SERD_NODE_NULL;
if (a < argc) { // Base URI given on command line
- base_uri_str = (const uint8_t*)argv[a];
+ base = serd_node_new_uri_from_string(
+ (const uint8_t*)argv[a], NULL, &base_uri);
} else if (from_file) { // Use input file URI
- base_uri_str = input;
- } else {
- base_uri_str = (const uint8_t*)"";
+ base = serd_node_new_file_uri(input, NULL, &base_uri, false);
}
- SerdURI base_uri = SERD_URI_NULL;
- SerdNode base_uri_node = serd_node_new_uri_from_string(
- base_uri_str, &base_uri, &base_uri);
-
FILE* out_fd = stdout;
- SerdEnv* env = serd_env_new(&base_uri_node);
+ SerdEnv* env = serd_env_new(&base);
int output_style = 0;
if (output_syntax == SERD_NTRIPLES) {
@@ -198,6 +201,8 @@ main(int argc, char** argv)
(SerdStatementSink)serd_writer_write_statement,
(SerdEndSink)serd_writer_end_anon);
+ SerdNode root = serd_node_from_string(SERD_URI, root_uri);
+ serd_writer_set_root_uri(writer, &root);
serd_writer_chop_blank_prefix(writer, chop_prefix);
serd_reader_add_blank_prefix(reader, add_prefix);
@@ -214,7 +219,7 @@ main(int argc, char** argv)
serd_writer_finish(writer);
serd_writer_free(writer);
serd_env_free(env);
- serd_node_free(&base_uri_node);
+ serd_node_free(&base);
return (status > SERD_FAILURE) ? 1 : 0;
}
diff --git a/src/uri.c b/src/uri.c
index 26f8ae97..781f9b68 100644
--- a/src/uri.c
+++ b/src/uri.c
@@ -130,12 +130,12 @@ serd_uri_dump(const SerdURI* uri, FILE* file)
fprintf(stderr, "\n"); \
}
- PRINT_PART(uri->scheme, "scheme");
+ PRINT_PART(uri->scheme, "scheme ");
PRINT_PART(uri->authority, "authority");
PRINT_PART(uri->path_base, "path_base");
- PRINT_PART(uri->path, "path");
- PRINT_PART(uri->query, "query");
- PRINT_PART(uri->fragment, "fragment");
+ PRINT_PART(uri->path, "path ");
+ PRINT_PART(uri->query, "query ");
+ PRINT_PART(uri->fragment, "fragment ");
}
#endif
@@ -317,6 +317,39 @@ remove_dot_segments(const uint8_t* path, size_t len, size_t* up)
return begin;
}
+/// Merge @p base and @p path in-place
+static void
+merge(SerdChunk* base, SerdChunk* path)
+{
+ size_t up;
+ const uint8_t* begin = remove_dot_segments(path->buf, path->len, &up);
+ const uint8_t* end = path->buf + path->len;
+
+ if (base->buf) {
+ assert(base->len > 0);
+ // Find the up'th last slash
+ const uint8_t* base_last = (base->buf + base->len - 1);
+ ++up;
+ do {
+ if (*base_last == '/') {
+ --up;
+ }
+ } while (up > 0 && (--base_last > base->buf));
+
+ // Set path prefix
+ if (*base_last == '/') {
+ base->len = base_last - base->buf + 1;
+ } else {
+ base->len = 0;
+ base->buf = NULL;
+ }
+ }
+
+ // Set path suffix
+ path->buf = begin;
+ path->len = end - begin;
+}
+
/// See http://tools.ietf.org/html/rfc3986#section-5.2.2
SERD_API
void
@@ -344,6 +377,7 @@ serd_uri_resolve(const SerdURI* r, const SerdURI* base, SerdURI* t)
if (r->path.buf[0] != '/') {
t->path_base = base->path;
}
+ merge(&t->path_base, &t->path);
t->query = r->query;
}
t->authority = base->authority;
@@ -353,110 +387,77 @@ serd_uri_resolve(const SerdURI* r, const SerdURI* base, SerdURI* t)
}
#ifdef URI_DEBUG
- fprintf(stderr, "RESOLVE URI\nBASE:\n");
+ fprintf(stderr, "## RESOLVE URI\n# BASE\n");
serd_uri_dump(base, stderr);
- fprintf(stderr, "URI:\n");
+ fprintf(stderr, "# URI\n");
serd_uri_dump(r, stderr);
- fprintf(stderr, "RESULT:\n");
+ fprintf(stderr, "# RESULT\n");
serd_uri_dump(t, stderr);
fprintf(stderr, "\n");
#endif
}
-/** Write a relative path relative to a base path. */
+/** Write the path of @p uri starting at index @p i */
static size_t
-write_rel_path(SerdSink sink,
- void* stream,
- const SerdChunk* base,
- const SerdChunk* path)
+write_path_tail(SerdSink sink, void* stream, const SerdURI* uri, size_t i)
{
- size_t up;
- size_t len = 0;
- const uint8_t* begin = remove_dot_segments(path->buf, path->len, &up);
- const uint8_t* end = path->buf + path->len;
-
- if (base && base->buf) {
- // Find the up'th last slash
- const uint8_t* base_last = (base->buf + base->len - 1);
- ++up;
- do {
- if (*base_last == '/') {
- --up;
- }
- } while (up > 0 && (--base_last > base->buf));
-
- // Write base URI prefix
- if (*base_last == '/') {
- const size_t base_len = base_last - base->buf + 1;
- len += sink(base->buf, base_len, stream);
+ size_t len = 0;
+ if (i < uri->path_base.len) {
+ len += sink(uri->path_base.buf + i, uri->path_base.len - i, stream);
+ }
+ if (uri->path.buf) {
+ if (i < uri->path_base.len) {
+ len += sink(uri->path.buf, uri->path.len, stream);
+ } else {
+ const size_t j = (i - uri->path_base.len);
+ len += sink(uri->path.buf + j, uri->path.len - j, stream);
}
}
-
- // Write URI suffix
- len += sink(begin, end - begin, stream);
-
return len;
}
-/** Write an absolute path relative to a base path. */
+/** Write the path of @p uri relative to the path of @p base. */
static size_t
-write_abs_path(SerdSink sink,
- void* stream,
- const SerdChunk* base,
- const SerdChunk* path)
+write_rel_path(SerdSink sink,
+ void* stream,
+ const SerdURI* uri,
+ const SerdURI* base)
{
- size_t len = 0;
- const size_t min_len = (path->len < base->len) ? path->len : base->len;
+ const size_t path_len = uri_path_len(uri);
+ const size_t base_len = uri_path_len(base);
+ const size_t min_len = (path_len < base_len) ? path_len : base_len;
// Find the last separator common to both paths
size_t last_shared_sep = 0;
size_t i = 0;
- for (; i < min_len && path->buf[i] == base->buf[i]; ++i) {
- if (path->buf[i] == '/') {
+ for (; i < min_len && uri_path_at(uri, i) == uri_path_at(base, i); ++i) {
+ if (uri_path_at(uri, i) == '/') {
last_shared_sep = i;
}
}
- if (i == path->len && i == base->len) { // Paths are identical
+ if (i == path_len && i == base_len) { // Paths are identical
return 0;
} else if (last_shared_sep == 0) { // No common components
- return sink(path->buf, path->len, stream);
+ return write_path_tail(sink, stream, uri, 0);
}
// Find the number of up references ("..") required
size_t up = 0;
- for (size_t i = last_shared_sep + 1; i < base->len; ++i) {
- if (base->buf[i] == '/') {
+ for (size_t i = last_shared_sep + 1; i < base_len; ++i) {
+ if (uri_path_at(base, i) == '/') {
++up;
}
}
// Write up references
+ size_t len = 0;
for (size_t i = 0; i < up; ++i) {
len += sink("../", 3, stream);
}
// Write suffix
- const size_t suffix_len = path->len - last_shared_sep - 1;
- len += sink(path->buf + last_shared_sep + 1, suffix_len, stream);
-
- return len;
-}
-
-static inline bool
-chunk_equals(const SerdChunk* a, const SerdChunk* b)
-{
- return a->len == b->len
- && !strncmp((const char*)a->buf, (const char*)b->buf, a->len);
-}
-
-/** Return true iff both are absolute URIs on the same host. */
-static inline bool
-same_host(const SerdURI* base, const SerdURI* uri)
-{
- return base && uri && base->scheme.len
- && chunk_equals(&base->scheme, &uri->scheme)
- && chunk_equals(&base->authority, &uri->authority);
+ return len += write_path_tail(sink, stream, uri, last_shared_sep + 1);
}
/// See http://tools.ietf.org/html/rfc3986#section-5.3
@@ -464,13 +465,14 @@ SERD_API
size_t
serd_uri_serialise_relative(const SerdURI* uri,
const SerdURI* base,
+ const SerdURI* root,
SerdSink sink,
void* stream)
{
size_t len = 0;
- const bool relative = same_host(base, uri);
+ const bool relative = uri_is_under(uri, root ? root : base);
if (relative) {
- len = write_abs_path(sink, stream, base ? &base->path : 0, &uri->path);
+ len = write_rel_path(sink, stream, uri, base);
}
if (!relative || (!len && base->query.buf)) {
if (uri->scheme.buf) {
@@ -481,11 +483,7 @@ serd_uri_serialise_relative(const SerdURI* uri,
len += sink("//", 2, stream);
len += sink(uri->authority.buf, uri->authority.len, stream);
}
- if (uri->path.buf) {
- len += write_rel_path(sink, stream, &uri->path_base, &uri->path);
- } else {
- len += sink(uri->path_base.buf, uri->path_base.len, stream);
- }
+ len += write_path_tail(sink, stream, uri, 0);
}
if (uri->query.buf) {
len += sink("?", 1, stream);
@@ -503,5 +501,5 @@ SERD_API
size_t
serd_uri_serialise(const SerdURI* uri, SerdSink sink, void* stream)
{
- return serd_uri_serialise_relative(uri, NULL, sink, stream);
+ return serd_uri_serialise_relative(uri, NULL, NULL, sink, stream);
}
diff --git a/src/writer.c b/src/writer.c
index f538e486..0870c785 100644
--- a/src/writer.c
+++ b/src/writer.c
@@ -77,6 +77,8 @@ struct SerdWriterImpl {
SerdSyntax syntax;
SerdStyle style;
SerdEnv* env;
+ SerdNode root_node;
+ SerdURI root_uri;
SerdURI base_uri;
SerdStack anon_stack;
SerdBulkSink bulk_sink;
@@ -411,28 +413,24 @@ write_node(SerdWriter* writer,
break;
}
}
- if (!has_scheme && (writer->style & SERD_STYLE_RESOLVED)) {
- SerdURI uri;
- serd_uri_parse(node->buf, &uri);
- SerdURI abs_uri;
- serd_uri_resolve(&uri, &writer->base_uri, &abs_uri);
- sink("<", 1, writer);
- serd_uri_serialise(&abs_uri, uri_sink, writer);
- sink(">", 1, writer);
- break;
- } else if (has_scheme && (writer->syntax == SERD_TURTLE)
- && (writer->style & SERD_STYLE_RESOLVED)) {
- SerdURI uri;
+ sink("<", 1, writer);
+ if (writer->style & SERD_STYLE_RESOLVED) {
+ SerdURI in_base_uri, uri, abs_uri;
+ serd_env_get_base_uri(writer->env, &in_base_uri);
serd_uri_parse(node->buf, &uri);
- sink("<", 1, writer);
- serd_uri_serialise_relative(
- &uri, &writer->base_uri, uri_sink, writer);
- sink(">", 1, writer);
- break;
+ serd_uri_resolve(&uri, &in_base_uri, &abs_uri);
+ bool rooted = uri_is_under(&writer->base_uri, &writer->root_uri);
+ SerdURI* root = rooted ? &writer->root_uri : & writer->base_uri;
+ if (!uri_is_under(&abs_uri, root) ||
+ writer->syntax == SERD_NTRIPLES) {
+ serd_uri_serialise(&abs_uri, uri_sink, writer);
+ } else {
+ serd_uri_serialise_relative(
+ &uri, &writer->base_uri, root, uri_sink, writer);
+ }
+ } else {
+ write_text(writer, WRITE_URI, node->buf, node->n_bytes);
}
-
- sink("<", 1, writer);
- write_text(writer, WRITE_URI, node->buf, node->n_bytes);
sink(">", 1, writer);
default:
break;
@@ -637,6 +635,8 @@ serd_writer_new(SerdSyntax syntax,
writer->syntax = syntax;
writer->style = style;
writer->env = env;
+ writer->root_node = SERD_NODE_NULL;
+ writer->root_uri = SERD_URI_NULL;
writer->base_uri = base_uri ? *base_uri : SERD_URI_NULL;
writer->anon_stack = serd_stack_new(sizeof(WriteContext));
writer->sink = sink;
@@ -687,6 +687,7 @@ serd_writer_set_base_uri(SerdWriter* writer,
sink(uri->buf, uri->n_bytes, writer);
sink("> .\n", 4, writer);
}
+ writer->indent = 0;
return reset_context(writer, false);
}
return SERD_ERR_UNKNOWN;
@@ -694,6 +695,22 @@ serd_writer_set_base_uri(SerdWriter* writer,
SERD_API
SerdStatus
+serd_writer_set_root_uri(SerdWriter* writer,
+ const SerdNode* uri)
+{
+ serd_node_free(&writer->root_node);
+ if (uri && uri->buf) {
+ writer->root_node = serd_node_copy(uri);
+ serd_uri_parse(uri->buf, &writer->root_uri);
+ } else {
+ writer->root_node = SERD_NODE_NULL;
+ writer->root_uri = SERD_URI_NULL;
+ }
+ return SERD_SUCCESS;
+}
+
+SERD_API
+SerdStatus
serd_writer_set_prefix(SerdWriter* writer,
const SerdNode* name,
const SerdNode* uri)
@@ -710,6 +727,7 @@ serd_writer_set_prefix(SerdWriter* writer,
write_text(writer, WRITE_URI, uri->buf, uri->n_bytes);
sink("> .\n", 4, writer);
}
+ writer->indent = 0;
return reset_context(writer, false);
}
return SERD_ERR_UNKNOWN;
@@ -725,6 +743,7 @@ serd_writer_free(SerdWriter* writer)
if (writer->style & SERD_STYLE_BULK) {
serd_bulk_sink_free(&writer->bulk_sink);
}
+ serd_node_free(&writer->root_node);
free(writer);
}