aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/env.c5
-rw-r--r--src/reader.c159
-rw-r--r--src/serdi.c94
-rw-r--r--src/writer.c52
4 files changed, 241 insertions, 69 deletions
diff --git a/src/env.c b/src/env.c
index 64fccaac..a9b8e474 100644
--- a/src/env.c
+++ b/src/env.c
@@ -35,13 +35,16 @@ struct SerdEnvImpl {
SERD_API
SerdEnv*
-serd_env_new()
+serd_env_new(const SerdNode* base_uri)
{
SerdEnv* env = malloc(sizeof(struct SerdEnvImpl));
env->prefixes = NULL;
env->n_prefixes = 0;
env->base_uri_node = SERD_NODE_NULL;
env->base_uri = SERD_URI_NULL;
+ if (base_uri) {
+ serd_env_set_base_uri(env, base_uri);
+ }
return env;
}
diff --git a/src/reader.c b/src/reader.c
index 44910ddf..12109ad3 100644
--- a/src/reader.c
+++ b/src/reader.c
@@ -65,6 +65,7 @@ static const Node INTERNAL_NODE_NULL = { 0, 0 };
struct SerdReaderImpl {
void* handle;
+ void (*free_handle)(void*);
SerdBaseSink base_sink;
SerdPrefixSink prefix_sink;
SerdStatementSink statement_sink;
@@ -75,9 +76,11 @@ struct SerdReaderImpl {
Node rdf_nil;
FILE* fd;
SerdStack stack;
+ SerdSyntax syntax;
Cursor cur;
uint8_t* buf;
- const uint8_t* blank_prefix;
+ uint8_t* bprefix;
+ size_t bprefix_len;
unsigned next_id;
int err;
uint8_t* read_buf;
@@ -241,6 +244,23 @@ push_byte(SerdReader* reader, Ref ref, const uint8_t c)
str->buf[str->n_bytes] = '\0';
}
+static inline void
+append_string(SerdReader* reader, Ref ref, const uint8_t* suffix)
+{
+ #ifdef SERD_STACK_CHECK
+ assert(stack_is_top_string(reader, ref));
+ #endif
+ size_t n_bytes;
+ uint32_t flags = 0;
+ size_t n_chars = serd_strlen(suffix, &n_bytes, &flags);
+ serd_stack_push(&reader->stack, n_bytes);
+ SerdString* const str = deref(reader, ref);
+ assert(str->n_bytes >= str->n_chars);
+ memcpy(str->buf + str->n_bytes, suffix, n_bytes + 1);
+ str->n_bytes += n_bytes;
+ str->n_chars += n_chars;
+}
+
static void
pop_string(SerdReader* reader, Ref ref)
{
@@ -968,20 +988,33 @@ read_nodeID(SerdReader* reader)
{
eat_byte(reader, '_');
eat_byte(reader, ':');
- Ref str = push_string(reader, "", 0);
- return read_name(reader, str, true);
+ Ref ref = push_string(reader, "", 0);
+ read_name(reader, ref, true);
+ SerdString* const str = deref(reader, ref);
+ if (reader->syntax == SERD_TURTLE
+ && !strncmp((const char*)str->buf, "genid", 5)) {
+ // Replace "genid" nodes with "docid" to prevent clashing
+ memcpy(str->buf, "docid", 5);
+ }
+ return ref;
}
static Ref
blank_id(SerdReader* reader)
{
- const char* prefix = reader->blank_prefix
- ? (const char*)reader->blank_prefix
- : "genid";
- char str[32]; // FIXME: ensure length of reader->blank_prefix is OK
- const int len = snprintf(str, sizeof(str), "%s%u",
- prefix, reader->next_id++);
- return push_string(reader, str, len);
+ Ref str;
+ if (reader->bprefix) {
+ str = push_string(reader,
+ (const char*)reader->bprefix,
+ reader->bprefix_len);
+ } else {
+ str = push_string(reader, "", 0);
+ }
+ char num[32];
+ snprintf(num, sizeof(num), "%u", reader->next_id++);
+ append_string(reader, str, (const uint8_t*)"genid");
+ append_string(reader, str, (const uint8_t*)num);
+ return str;
}
// Spec: [21] blank ::= nodeID | '[]'
@@ -1368,29 +1401,33 @@ SERD_API
SerdReader*
serd_reader_new(SerdSyntax syntax,
void* handle,
+ void (*free_handle)(void*),
SerdBaseSink base_sink,
SerdPrefixSink prefix_sink,
SerdStatementSink statement_sink,
SerdEndSink end_sink)
{
const Cursor cur = { NULL, 0, 0 };
- SerdReader* me = malloc(sizeof(struct SerdReaderImpl));
- me->handle = handle;
- me->base_sink = base_sink;
- me->prefix_sink = prefix_sink;
- me->statement_sink = statement_sink;
- me->end_sink = end_sink;
- me->fd = 0;
- me->stack = serd_stack_new(STACK_PAGE_SIZE);
- me->cur = cur;
- me->blank_prefix = NULL;
- me->next_id = 1;
- me->read_buf = 0;
- me->read_head = 0;
- me->eof = false;
+ SerdReader* me = malloc(sizeof(struct SerdReaderImpl));
+ me->handle = handle;
+ me->free_handle = free_handle;
+ me->base_sink = base_sink;
+ me->prefix_sink = prefix_sink;
+ me->statement_sink = statement_sink;
+ me->end_sink = end_sink;
+ me->fd = 0;
+ me->stack = serd_stack_new(STACK_PAGE_SIZE);
+ me->syntax = syntax;
+ me->cur = cur;
+ me->bprefix = NULL;
+ me->bprefix_len = 0;
+ me->next_id = 1;
+ me->read_buf = 0;
+ me->read_head = 0;
+ me->eof = false;
#ifdef SERD_STACK_CHECK
- me->alloc_stack = 0;
- me->n_allocs = 0;
+ me->alloc_stack = 0;
+ me->n_allocs = 0;
#endif
#define RDF_FIRST NS_RDF "first"
@@ -1415,20 +1452,84 @@ serd_reader_free(SerdReader* reader)
free(reader->alloc_stack);
#endif
free(reader->stack.buf);
+ free(reader->bprefix);
+ if (reader->free_handle) {
+ reader->free_handle(reader->handle);
+ }
free(reader);
}
SERD_API
+void*
+serd_reader_get_handle(const SerdReader* reader)
+{
+ return reader->handle;
+}
+
+SERD_API
void
-serd_reader_set_blank_prefix(SerdReader* reader,
+serd_reader_add_blank_prefix(SerdReader* reader,
const uint8_t* prefix)
{
- reader->blank_prefix = prefix;
+ if (reader->bprefix) {
+ free(reader->bprefix);
+ reader->bprefix_len = 0;
+ reader->bprefix = NULL;
+ }
+ if (prefix) {
+ reader->bprefix_len = strlen((const char*)prefix);
+ reader->bprefix = malloc(reader->bprefix_len + 1);
+ memcpy(reader->bprefix, prefix, reader->bprefix_len + 1);
+ }
+}
+
+static const uint8_t*
+file_uri_to_path(const uint8_t* uri)
+{
+ const uint8_t* filename = NULL;
+ if (serd_uri_string_has_scheme(uri)) {
+ // Absolute URI, ensure it a file and chop scheme
+ if (strncmp((const char*)uri, "file:", 5)) {
+ fprintf(stderr, "Unsupported URI scheme `%s'\n", uri);
+ return NULL;
+#ifdef __WIN32__
+ } else if (!strncmp((const char*)uri, "file:///", 8)) {
+ filename = uri + 8;
+#else
+ } else if (!strncmp((const char*)uri, "file://", 7)) {
+ filename = uri + 7;
+#endif
+ } else {
+ filename = uri + 5;
+ }
+ } else {
+ filename = uri;
+ }
+ return filename;
+}
+
+SERD_API
+SerdStatus
+serd_reader_read_file(SerdReader* reader,
+ const uint8_t* uri)
+{
+ const uint8_t* path = file_uri_to_path(uri);
+ if (!path) {
+ return SERD_ERR_BAD_ARG;
+ }
+
+ FILE* fd = fopen((const char*)path, "r");
+ if (!fd) {
+ return SERD_ERR_UNKNOWN;
+ }
+ SerdStatus ret = serd_reader_read_file_handle(reader, fd, path);
+ fclose(fd);
+ return ret;
}
SERD_API
SerdStatus
-serd_reader_read_file(SerdReader* me, FILE* file, const uint8_t* name)
+serd_reader_read_file_handle(SerdReader* me, FILE* file, const uint8_t* name)
{
const Cursor cur = { name, 1, 1 };
me->fd = file;
diff --git a/src/serdi.c b/src/serdi.c
index bc7c9d18..833e92d2 100644
--- a/src/serdi.c
+++ b/src/serdi.c
@@ -45,8 +45,11 @@ print_usage(const char* name, bool error)
fprintf(os, "Read and write RDF syntax.\n");
fprintf(os, "Use - for INPUT to read from standard input.\n\n");
fprintf(os, " -h Display this help and exit\n");
+ fprintf(os, " -i SYNTAX Input syntax (`turtle' or `ntriples')\n");
fprintf(os, " -o SYNTAX Output syntax (`turtle' or `ntriples')\n");
fprintf(os, " -s INPUT Parse INPUT as string (terminates options)\n");
+ fprintf(os, " -p PREFIX Add PREFIX to blank node IDs\n");
+ fprintf(os, " -c PREFIX Chop PREFIX from matching blank node IDs\n");
fprintf(os, " -v Display version information and exit\n");
return error ? 1 : 0;
}
@@ -58,6 +61,20 @@ file_sink(const void* buf, size_t len, void* stream)
return fwrite(buf, 1, len, file);
}
+bool
+set_syntax(SerdSyntax* syntax, const char* name)
+{
+ if (!strcmp(name, "turtle")) {
+ *syntax = SERD_TURTLE;
+ } else if (!strcmp(name, "ntriples")) {
+ *syntax = SERD_NTRIPLES;
+ } else {
+ fprintf(stderr, "Unknown input format `%s'\n", name);
+ return false;
+ }
+ return true;
+}
+
int
main(int argc, char** argv)
{
@@ -65,14 +82,17 @@ main(int argc, char** argv)
return print_usage(argv[0], true);
}
- FILE* in_fd = NULL;
- SerdSyntax output_syntax = SERD_NTRIPLES;
- bool from_file = true;
- const char* in_name = NULL;
+ FILE* in_fd = NULL;
+ SerdSyntax input_syntax = SERD_TURTLE;
+ SerdSyntax output_syntax = SERD_NTRIPLES;
+ bool from_file = true;
+ const uint8_t* in_name = NULL;
+ const uint8_t* add_prefix = NULL;
+ const uint8_t* chop_prefix = NULL;
int a = 1;
for (; a < argc && argv[a][0] == '-'; ++a) {
if (argv[a][1] == '\0') {
- in_name = "(stdin)";
+ in_name = (const uint8_t*)"(stdin)";
in_fd = stdin;
break;
} else if (argv[a][1] == 'h') {
@@ -80,23 +100,38 @@ main(int argc, char** argv)
} else if (argv[a][1] == 'v') {
return print_version();
} else if (argv[a][1] == 's') {
- in_name = "(string)";
+ in_name = (const uint8_t*)"(string)";
from_file = false;
++a;
break;
+ } else if (argv[a][1] == 'i') {
+ if (++a == argc) {
+ fprintf(stderr, "Missing value for -i\n");
+ return 1;
+ }
+ if (!set_syntax(&input_syntax, argv[a])) {
+ return 1;
+ }
} else if (argv[a][1] == 'o') {
if (++a == argc) {
fprintf(stderr, "Missing value for -o\n");
return 1;
}
- if (!strcmp(argv[a], "turtle")) {
- output_syntax = SERD_TURTLE;
- } else if (!strcmp(argv[a], "ntriples")) {
- output_syntax = SERD_NTRIPLES;
- } else {
- fprintf(stderr, "Unknown output format `%s'\n", argv[a]);
+ if (!set_syntax(&output_syntax, argv[a])) {
return 1;
}
+ } else if (argv[a][1] == 'p') {
+ if (++a == argc) {
+ fprintf(stderr, "Missing value for -p\n");
+ return 1;
+ }
+ add_prefix = (const uint8_t*)argv[a];
+ } else if (argv[a][1] == 'c') {
+ if (++a == argc) {
+ fprintf(stderr, "Missing value for -c\n");
+ return 1;
+ }
+ chop_prefix = (const uint8_t*)argv[a];
} else {
fprintf(stderr, "Unknown option `%s'\n", argv[a]);
return print_usage(argv[0], true);
@@ -105,7 +140,7 @@ main(int argc, char** argv)
const uint8_t* input = (const uint8_t*)argv[a++];
if (from_file) {
- in_name = in_name ? in_name : (const char*)input;
+ in_name = in_name ? in_name : input;
if (!in_fd) {
if (serd_uri_string_has_scheme(input)) {
// INPUT is an absolute URI, ensure it a file and chop scheme
@@ -132,27 +167,25 @@ main(int argc, char** argv)
}
const uint8_t* base_uri_str = NULL;
- SerdURI base_uri;
if (a < argc) { // Base URI given on command line
- const uint8_t* const in_base_uri = (const uint8_t*)argv[a];
- if (serd_uri_parse((const uint8_t*)in_base_uri, &base_uri)) {
- fprintf(stderr, "Invalid base URI <%s>\n", argv[2]);
- return 1;
- }
- base_uri_str = in_base_uri;
+ base_uri_str = (const uint8_t*)argv[a];
} else if (from_file) { // Use input file URI
base_uri_str = input;
} else {
base_uri_str = (const uint8_t*)"";
}
- if (serd_uri_parse(base_uri_str, &base_uri)) {
+ SerdURI base_uri = SERD_URI_NULL;
+ SerdNode base_uri_node = serd_node_new_uri_from_string(
+ base_uri_str, &base_uri, &base_uri);
+
+ if (!base_uri_node.buf) {
fprintf(stderr, "Invalid base URI <%s>\n", base_uri_str);
return 1;
}
FILE* out_fd = stdout;
- SerdEnv* env = serd_env_new();
+ SerdEnv* env = serd_env_new(&base_uri_node);
SerdStyle output_style = SERD_STYLE_RESOLVED;
if (output_syntax == SERD_NTRIPLES) {
@@ -161,24 +194,28 @@ main(int argc, char** argv)
output_style |= SERD_STYLE_ABBREVIATED;
}
- SerdNode base_uri_node = serd_node_from_string(SERD_URI, base_uri_str);
- serd_env_set_base_uri(env, &base_uri_node);
- serd_env_get_base_uri(env, &base_uri);
-
SerdWriter* writer = serd_writer_new(
output_syntax, output_style, env, &base_uri, file_sink, out_fd);
+ if (chop_prefix) {
+ serd_writer_chop_blank_prefix(writer, chop_prefix);
+ }
+
State state = { env, writer };
SerdReader* reader = serd_reader_new(
- SERD_TURTLE, state.writer,
+ input_syntax, state.writer, NULL,
(SerdBaseSink)serd_writer_set_base_uri,
(SerdPrefixSink)serd_writer_set_prefix,
(SerdStatementSink)serd_writer_write_statement,
(SerdEndSink)serd_writer_end_anon);
+ if (add_prefix) {
+ serd_reader_add_blank_prefix(reader, add_prefix);
+ }
+
const SerdStatus status = (from_file)
- ? serd_reader_read_file(reader, in_fd, (const uint8_t*)in_name)
+ ? serd_reader_read_file_handle(reader, in_fd, in_name)
: serd_reader_read_string(reader, input);
serd_reader_free(reader);
@@ -190,6 +227,7 @@ main(int argc, char** argv)
serd_writer_finish(state.writer);
serd_writer_free(state.writer);
serd_env_free(state.env);
+ serd_node_free(&base_uri_node);
return (status == SERD_SUCCESS) ? 0 : 1;
}
diff --git a/src/writer.c b/src/writer.c
index d0120cde..fafde07c 100644
--- a/src/writer.c
+++ b/src/writer.c
@@ -44,6 +44,8 @@ struct SerdWriterImpl {
SerdSink sink;
void* stream;
WriteContext context;
+ uint8_t* bprefix;
+ size_t bprefix_len;
unsigned indent;
bool empty;
};
@@ -214,7 +216,15 @@ write_node(SerdWriter* writer,
} // else fall through
case SERD_BLANK_ID:
writer->sink("_:", 2, writer->stream);
- writer->sink(node->buf, node->n_bytes, writer->stream);
+ if (writer->bprefix
+ && !strncmp((const char*)node->buf, (const char*)writer->bprefix,
+ writer->bprefix_len)) {
+ writer->sink(node->buf + writer->bprefix_len,
+ node->n_bytes - writer->bprefix_len,
+ writer->stream);
+ } else {
+ writer->sink(node->buf, node->n_bytes, writer->stream);
+ }
break;
case SERD_CURIE:
switch (writer->syntax) {
@@ -445,20 +455,39 @@ serd_writer_new(SerdSyntax syntax,
{
const WriteContext context = WRITE_CONTEXT_NULL;
SerdWriter* writer = malloc(sizeof(struct SerdWriterImpl));
- writer->syntax = syntax;
- writer->style = style;
- writer->env = env;
- writer->base_uri = base_uri ? *base_uri : SERD_URI_NULL;
- writer->anon_stack = serd_stack_new(sizeof(WriteContext));
- writer->sink = sink;
- writer->stream = stream;
- writer->context = context;
- writer->indent = 0;
- writer->empty = true;
+ writer->syntax = syntax;
+ writer->style = style;
+ writer->env = env;
+ writer->base_uri = base_uri ? *base_uri : SERD_URI_NULL;
+ writer->anon_stack = serd_stack_new(sizeof(WriteContext));
+ writer->sink = sink;
+ writer->stream = stream;
+ writer->context = context;
+ writer->bprefix = NULL;
+ writer->bprefix_len = 0;
+ writer->indent = 0;
+ writer->empty = true;
return writer;
}
SERD_API
+void
+serd_writer_chop_blank_prefix(SerdWriter* writer,
+ const uint8_t* prefix)
+{
+ if (writer->bprefix) {
+ free(writer->bprefix);
+ writer->bprefix_len = 0;
+ writer->bprefix = NULL;
+ }
+ if (prefix) {
+ writer->bprefix_len = strlen((const char*)prefix);
+ writer->bprefix = malloc(writer->bprefix_len + 1);
+ memcpy(writer->bprefix, prefix, writer->bprefix_len + 1);
+ }
+}
+
+SERD_API
SerdStatus
serd_writer_set_base_uri(SerdWriter* writer,
const SerdNode* uri)
@@ -512,5 +541,6 @@ serd_writer_free(SerdWriter* writer)
SerdWriter* const me = (SerdWriter*)writer;
serd_writer_finish(me);
serd_stack_free(&writer->anon_stack);
+ free(writer->bprefix);
free(me);
}