From b92d598a22fdad8c96a1167362d4bb79015af006 Mon Sep 17 00:00:00 2001
From: David Robillard <d@drobilla.net>
Date: Wed, 18 May 2011 02:00:03 +0000
Subject: Add test to ensure blank node IDs don't clash with generated IDs. Add
 handle destructor parameter to serd_reader_new. Add serd_reader_get_handle.
 Rename serd_reader_set_blank_prefix to serd_reader_add_blank_prefix. Rename
 serd_reader_read_file to serd_reader_read_file_handle. Add new
 serd_reader_read_file that takes a path/URI parameter. Add serdi -i option to
 select input syntax. Add serdi -p and -c options to add/chop a prefix to/from
 blank IDs. Add optional base_uri parameter to serd_env_new. Add
 serd_writer_chop_blank_prefix. Bump version to 0.3.0.

git-svn-id: http://svn.drobilla.net/serd/trunk@183 490d8e77-9747-427b-9fa3-0b8f29cee8a0
---
 src/env.c    |   5 +-
 src/reader.c | 159 ++++++++++++++++++++++++++++++++++++++++++++++++-----------
 src/serdi.c  |  94 ++++++++++++++++++++++++-----------
 src/writer.c |  52 ++++++++++++++-----
 4 files changed, 241 insertions(+), 69 deletions(-)

(limited to 'src')

diff --git a/src/env.c b/src/env.c
index 64fccaac..a9b8e474 100644
--- a/src/env.c
+++ b/src/env.c
@@ -35,13 +35,16 @@ struct SerdEnvImpl {
 
 SERD_API
 SerdEnv*
-serd_env_new()
+serd_env_new(const SerdNode* base_uri)
 {
 	SerdEnv* env = malloc(sizeof(struct SerdEnvImpl));
 	env->prefixes      = NULL;
 	env->n_prefixes    = 0;
 	env->base_uri_node = SERD_NODE_NULL;
 	env->base_uri      = SERD_URI_NULL;
+	if (base_uri) {
+		serd_env_set_base_uri(env, base_uri);
+	}
 	return env;
 }
 
diff --git a/src/reader.c b/src/reader.c
index 44910ddf..12109ad3 100644
--- a/src/reader.c
+++ b/src/reader.c
@@ -65,6 +65,7 @@ static const Node INTERNAL_NODE_NULL = { 0, 0 };
 
 struct SerdReaderImpl {
 	void*             handle;
+	void              (*free_handle)(void*);
 	SerdBaseSink      base_sink;
 	SerdPrefixSink    prefix_sink;
 	SerdStatementSink statement_sink;
@@ -75,9 +76,11 @@ struct SerdReaderImpl {
 	Node              rdf_nil;
 	FILE*             fd;
 	SerdStack         stack;
+	SerdSyntax        syntax;
 	Cursor            cur;
 	uint8_t*          buf;
-	const uint8_t*    blank_prefix;
+	uint8_t*          bprefix;
+	size_t            bprefix_len;
 	unsigned          next_id;
 	int               err;
 	uint8_t*          read_buf;
@@ -241,6 +244,23 @@ push_byte(SerdReader* reader, Ref ref, const uint8_t c)
 	str->buf[str->n_bytes]     = '\0';
 }
 
+static inline void
+append_string(SerdReader* reader, Ref ref, const uint8_t* suffix)
+{
+	#ifdef SERD_STACK_CHECK
+	assert(stack_is_top_string(reader, ref));
+	#endif
+	size_t   n_bytes;
+	uint32_t flags   = 0;
+	size_t   n_chars = serd_strlen(suffix, &n_bytes, &flags);
+	serd_stack_push(&reader->stack, n_bytes);
+	SerdString* const str = deref(reader, ref);
+	assert(str->n_bytes >= str->n_chars);
+	memcpy(str->buf + str->n_bytes, suffix, n_bytes + 1);
+	str->n_bytes += n_bytes;
+	str->n_chars += n_chars;
+}
+
 static void
 pop_string(SerdReader* reader, Ref ref)
 {
@@ -968,20 +988,33 @@ read_nodeID(SerdReader* reader)
 {
 	eat_byte(reader, '_');
 	eat_byte(reader, ':');
-	Ref str = push_string(reader, "", 0);
-	return read_name(reader, str, true);
+	Ref ref = push_string(reader, "", 0);
+	read_name(reader, ref, true);
+	SerdString* const str = deref(reader, ref);
+	if (reader->syntax == SERD_TURTLE
+	    && !strncmp((const char*)str->buf, "genid", 5)) {
+		// Replace "genid" nodes with "docid" to prevent clashing
+		memcpy(str->buf, "docid", 5);
+	}
+	return ref;
 }
 
 static Ref
 blank_id(SerdReader* reader)
 {
-	const char* prefix = reader->blank_prefix
-		? (const char*)reader->blank_prefix
-		: "genid";
-	char str[32];  // FIXME: ensure length of reader->blank_prefix is OK
-	const int len = snprintf(str, sizeof(str), "%s%u",
-	                         prefix, reader->next_id++);
-	return push_string(reader, str, len);
+	Ref str;
+	if (reader->bprefix) {
+		str = push_string(reader,
+		                  (const char*)reader->bprefix,
+		                  reader->bprefix_len);
+	} else {
+		str = push_string(reader, "", 0);
+	}
+	char num[32];
+	snprintf(num, sizeof(num), "%u", reader->next_id++);
+	append_string(reader, str, (const uint8_t*)"genid");
+	append_string(reader, str, (const uint8_t*)num);
+	return str;
 }
 
 // Spec: [21] blank ::= nodeID | '[]'
@@ -1368,29 +1401,33 @@ SERD_API
 SerdReader*
 serd_reader_new(SerdSyntax        syntax,
                 void*             handle,
+                void              (*free_handle)(void*),
                 SerdBaseSink      base_sink,
                 SerdPrefixSink    prefix_sink,
                 SerdStatementSink statement_sink,
                 SerdEndSink       end_sink)
 {
 	const Cursor cur = { NULL, 0, 0 };
-	SerdReader*   me = malloc(sizeof(struct SerdReaderImpl));
-	me->handle         = handle;
-	me->base_sink      = base_sink;
-	me->prefix_sink    = prefix_sink;
-	me->statement_sink = statement_sink;
-	me->end_sink       = end_sink;
-	me->fd             = 0;
-	me->stack          = serd_stack_new(STACK_PAGE_SIZE);
-	me->cur            = cur;
-	me->blank_prefix   = NULL;
-	me->next_id        = 1;
-	me->read_buf       = 0;
-	me->read_head      = 0;
-	me->eof            = false;
+	SerdReader*  me  = malloc(sizeof(struct SerdReaderImpl));
+	me->handle           = handle;
+	me->free_handle      = free_handle;
+	me->base_sink        = base_sink;
+	me->prefix_sink      = prefix_sink;
+	me->statement_sink   = statement_sink;
+	me->end_sink         = end_sink;
+	me->fd               = 0;
+	me->stack            = serd_stack_new(STACK_PAGE_SIZE);
+	me->syntax           = syntax;
+	me->cur              = cur;
+	me->bprefix          = NULL;
+	me->bprefix_len      = 0;
+	me->next_id          = 1;
+	me->read_buf         = 0;
+	me->read_head        = 0;
+	me->eof              = false;
 #ifdef SERD_STACK_CHECK
-	me->alloc_stack    = 0;
-	me->n_allocs       = 0;
+	me->alloc_stack      = 0;
+	me->n_allocs         = 0;
 #endif
 
 #define RDF_FIRST NS_RDF "first"
@@ -1415,20 +1452,84 @@ serd_reader_free(SerdReader* reader)
 	free(reader->alloc_stack);
 #endif
 	free(reader->stack.buf);
+	free(reader->bprefix);
+	if (reader->free_handle) {
+		reader->free_handle(reader->handle);
+	}
 	free(reader);
 }
 
+SERD_API
+void*
+serd_reader_get_handle(const SerdReader* reader)
+{
+	return reader->handle;
+}
+
 SERD_API
 void
-serd_reader_set_blank_prefix(SerdReader*    reader,
+serd_reader_add_blank_prefix(SerdReader*    reader,
                              const uint8_t* prefix)
 {
-	reader->blank_prefix = prefix;
+	if (reader->bprefix) {
+		free(reader->bprefix);
+		reader->bprefix_len = 0;
+		reader->bprefix     = NULL;
+	}
+	if (prefix) {
+		reader->bprefix_len = strlen((const char*)prefix);
+		reader->bprefix     = malloc(reader->bprefix_len + 1);
+		memcpy(reader->bprefix, prefix, reader->bprefix_len + 1);
+	}
+}
+
+static const uint8_t*
+file_uri_to_path(const uint8_t* uri)
+{
+	const uint8_t* filename = NULL;
+	if (serd_uri_string_has_scheme(uri)) {
+		// Absolute URI, ensure it a file and chop scheme
+		if (strncmp((const char*)uri, "file:", 5)) {
+			fprintf(stderr, "Unsupported URI scheme `%s'\n", uri);
+			return NULL;
+#ifdef __WIN32__
+		} else if (!strncmp((const char*)uri, "file:///", 8)) {
+			filename = uri + 8;
+#else
+		} else if (!strncmp((const char*)uri, "file://", 7)) {
+			filename = uri + 7;
+#endif
+		} else {
+			filename = uri + 5;
+		}
+	} else {
+		filename = uri;
+	}
+	return filename;
+}
+
+SERD_API
+SerdStatus
+serd_reader_read_file(SerdReader*    reader,
+                      const uint8_t* uri)
+{
+	const uint8_t* path = file_uri_to_path(uri);
+	if (!path) {
+		return SERD_ERR_BAD_ARG;
+	}
+
+	FILE* fd = fopen((const char*)path, "r");
+	if (!fd) {
+		return SERD_ERR_UNKNOWN;
+	}
+	SerdStatus ret = serd_reader_read_file_handle(reader, fd, path);
+	fclose(fd);
+	return ret;
 }
 
 SERD_API
 SerdStatus
-serd_reader_read_file(SerdReader* me, FILE* file, const uint8_t* name)
+serd_reader_read_file_handle(SerdReader* me, FILE* file, const uint8_t* name)
 {
 	const Cursor cur = { name, 1, 1 };
 	me->fd        = file;
diff --git a/src/serdi.c b/src/serdi.c
index bc7c9d18..833e92d2 100644
--- a/src/serdi.c
+++ b/src/serdi.c
@@ -45,8 +45,11 @@ print_usage(const char* name, bool error)
 	fprintf(os, "Read and write RDF syntax.\n");
 	fprintf(os, "Use - for INPUT to read from standard input.\n\n");
 	fprintf(os, "  -h           Display this help and exit\n");
+	fprintf(os, "  -i SYNTAX    Input syntax (`turtle' or `ntriples')\n");
 	fprintf(os, "  -o SYNTAX    Output syntax (`turtle' or `ntriples')\n");
 	fprintf(os, "  -s INPUT     Parse INPUT as string (terminates options)\n");
+	fprintf(os, "  -p PREFIX    Add PREFIX to blank node IDs\n");
+	fprintf(os, "  -c PREFIX    Chop PREFIX from matching blank node IDs\n");
 	fprintf(os, "  -v           Display version information and exit\n");
 	return error ? 1 : 0;
 }
@@ -58,6 +61,20 @@ file_sink(const void* buf, size_t len, void* stream)
 	return fwrite(buf, 1, len, file);
 }
 
+bool
+set_syntax(SerdSyntax* syntax, const char* name)
+{
+	if (!strcmp(name, "turtle")) {
+		*syntax = SERD_TURTLE;
+	} else if (!strcmp(name, "ntriples")) {
+		*syntax = SERD_NTRIPLES;
+	} else {
+		fprintf(stderr, "Unknown input format `%s'\n", name);
+		return false;
+	}
+	return true;
+}
+	
 int
 main(int argc, char** argv)
 {
@@ -65,14 +82,17 @@ main(int argc, char** argv)
 		return print_usage(argv[0], true);
 	}
 
-	FILE*       in_fd         = NULL;
-	SerdSyntax  output_syntax = SERD_NTRIPLES;
-	bool        from_file     = true;
-	const char* in_name       = NULL;
+	FILE*          in_fd         = NULL;
+	SerdSyntax     input_syntax  = SERD_TURTLE;
+	SerdSyntax     output_syntax = SERD_NTRIPLES;
+	bool           from_file     = true;
+	const uint8_t* in_name       = NULL;
+	const uint8_t* add_prefix    = NULL;
+	const uint8_t* chop_prefix   = NULL;
 	int a = 1;
 	for (; a < argc && argv[a][0] == '-'; ++a) {
 		if (argv[a][1] == '\0') {
-			in_name = "(stdin)";
+			in_name = (const uint8_t*)"(stdin)";
 			in_fd   = stdin;
 			break;
 		} else if (argv[a][1] == 'h') {
@@ -80,23 +100,38 @@ main(int argc, char** argv)
 		} else if (argv[a][1] == 'v') {
 			return print_version();
 		} else if (argv[a][1] == 's') {
-			in_name = "(string)";
+			in_name = (const uint8_t*)"(string)";
 			from_file = false;
 			++a;
 			break;
+		} else if (argv[a][1] == 'i') {
+			if (++a == argc) {
+				fprintf(stderr, "Missing value for -i\n");
+				return 1;
+			}
+			if (!set_syntax(&input_syntax, argv[a])) {
+				return 1;
+			}
 		} else if (argv[a][1] == 'o') {
 			if (++a == argc) {
 				fprintf(stderr, "Missing value for -o\n");
 				return 1;
 			}
-			if (!strcmp(argv[a], "turtle")) {
-				output_syntax = SERD_TURTLE;
-			} else if (!strcmp(argv[a], "ntriples")) {
-				output_syntax = SERD_NTRIPLES;
-			} else {
-				fprintf(stderr, "Unknown output format `%s'\n",  argv[a]);
+			if (!set_syntax(&output_syntax, argv[a])) {
 				return 1;
 			}
+		} else if (argv[a][1] == 'p') {
+			if (++a == argc) {
+				fprintf(stderr, "Missing value for -p\n");
+				return 1;
+			}
+			add_prefix = (const uint8_t*)argv[a];
+		} else if (argv[a][1] == 'c') {
+			if (++a == argc) {
+				fprintf(stderr, "Missing value for -c\n");
+				return 1;
+			}
+			chop_prefix = (const uint8_t*)argv[a];
 		} else {
 			fprintf(stderr, "Unknown option `%s'\n", argv[a]);
 			return print_usage(argv[0], true);
@@ -105,7 +140,7 @@ main(int argc, char** argv)
 
 	const uint8_t* input = (const uint8_t*)argv[a++];
 	if (from_file) {
-		in_name = in_name ? in_name : (const char*)input;
+		in_name = in_name ? in_name : input;
 		if (!in_fd) {
 			if (serd_uri_string_has_scheme(input)) {
 				// INPUT is an absolute URI, ensure it a file and chop scheme
@@ -132,27 +167,25 @@ main(int argc, char** argv)
 	}
 
 	const uint8_t* base_uri_str = NULL;
-	SerdURI        base_uri;
 	if (a < argc) {  // Base URI given on command line
-		const uint8_t* const in_base_uri = (const uint8_t*)argv[a];
-		if (serd_uri_parse((const uint8_t*)in_base_uri, &base_uri)) {
-			fprintf(stderr, "Invalid base URI <%s>\n", argv[2]);
-			return 1;
-		}
-		base_uri_str = in_base_uri;
+		base_uri_str = (const uint8_t*)argv[a];
 	} else if (from_file) {  // Use input file URI
 		base_uri_str = input;
 	} else {
 		base_uri_str = (const uint8_t*)"";
 	}
 
-	if (serd_uri_parse(base_uri_str, &base_uri)) {
+	SerdURI  base_uri = SERD_URI_NULL;
+	SerdNode base_uri_node = serd_node_new_uri_from_string(
+		base_uri_str, &base_uri, &base_uri);
+	
+	if (!base_uri_node.buf) {
 		fprintf(stderr, "Invalid base URI <%s>\n", base_uri_str);
 		return 1;
 	}
 
 	FILE*    out_fd = stdout;
-	SerdEnv* env    = serd_env_new();
+	SerdEnv* env    = serd_env_new(&base_uri_node);
 
 	SerdStyle output_style = SERD_STYLE_RESOLVED;
 	if (output_syntax == SERD_NTRIPLES) {
@@ -161,24 +194,28 @@ main(int argc, char** argv)
 		output_style |= SERD_STYLE_ABBREVIATED;
 	}
 
-	SerdNode base_uri_node = serd_node_from_string(SERD_URI, base_uri_str);
-	serd_env_set_base_uri(env, &base_uri_node);
-	serd_env_get_base_uri(env, &base_uri);
-
 	SerdWriter* writer = serd_writer_new(
 		output_syntax, output_style, env, &base_uri, file_sink, out_fd);
 
+	if (chop_prefix) {
+		serd_writer_chop_blank_prefix(writer, chop_prefix);
+	}
+
 	State state = { env, writer };
 
 	SerdReader* reader = serd_reader_new(
-		SERD_TURTLE, state.writer,
+		input_syntax, state.writer, NULL,
 		(SerdBaseSink)serd_writer_set_base_uri,
 		(SerdPrefixSink)serd_writer_set_prefix,
 		(SerdStatementSink)serd_writer_write_statement,
 		(SerdEndSink)serd_writer_end_anon);
 
+	if (add_prefix) {
+		serd_reader_add_blank_prefix(reader, add_prefix);
+	}
+
 	const SerdStatus status = (from_file)
-		? serd_reader_read_file(reader, in_fd, (const uint8_t*)in_name)
+		? serd_reader_read_file_handle(reader, in_fd, in_name)
 		: serd_reader_read_string(reader, input);
 
 	serd_reader_free(reader);
@@ -190,6 +227,7 @@ main(int argc, char** argv)
 	serd_writer_finish(state.writer);
 	serd_writer_free(state.writer);
 	serd_env_free(state.env);
+	serd_node_free(&base_uri_node);
 
 	return (status == SERD_SUCCESS) ? 0 : 1;
 }
diff --git a/src/writer.c b/src/writer.c
index d0120cde..fafde07c 100644
--- a/src/writer.c
+++ b/src/writer.c
@@ -44,6 +44,8 @@ struct SerdWriterImpl {
 	SerdSink     sink;
 	void*        stream;
 	WriteContext context;
+	uint8_t*     bprefix;
+	size_t       bprefix_len;
 	unsigned     indent;
 	bool         empty;
 };
@@ -214,7 +216,15 @@ write_node(SerdWriter*     writer,
 		}  // else fall through
 	case SERD_BLANK_ID:
 		writer->sink("_:", 2, writer->stream);
-		writer->sink(node->buf, node->n_bytes, writer->stream);
+		if (writer->bprefix
+		    && !strncmp((const char*)node->buf, (const char*)writer->bprefix,
+		                writer->bprefix_len)) {
+			writer->sink(node->buf + writer->bprefix_len,
+			             node->n_bytes - writer->bprefix_len,
+			             writer->stream);
+		} else {
+			writer->sink(node->buf, node->n_bytes, writer->stream);
+		}
 		break;
 	case SERD_CURIE:
 		switch (writer->syntax) {
@@ -445,19 +455,38 @@ serd_writer_new(SerdSyntax     syntax,
 {
 	const WriteContext context = WRITE_CONTEXT_NULL;
 	SerdWriter*        writer  = malloc(sizeof(struct SerdWriterImpl));
-	writer->syntax     = syntax;
-	writer->style      = style;
-	writer->env        = env;
-	writer->base_uri   = base_uri ? *base_uri : SERD_URI_NULL;
-	writer->anon_stack = serd_stack_new(sizeof(WriteContext));
-	writer->sink       = sink;
-	writer->stream     = stream;
-	writer->context    = context;
-	writer->indent     = 0;
-	writer->empty      = true;
+	writer->syntax      = syntax;
+	writer->style       = style;
+	writer->env         = env;
+	writer->base_uri    = base_uri ? *base_uri : SERD_URI_NULL;
+	writer->anon_stack  = serd_stack_new(sizeof(WriteContext));
+	writer->sink        = sink;
+	writer->stream      = stream;
+	writer->context     = context;
+	writer->bprefix     = NULL;
+	writer->bprefix_len = 0;
+	writer->indent      = 0;
+	writer->empty       = true;
 	return writer;
 }
 
+SERD_API
+void
+serd_writer_chop_blank_prefix(SerdWriter*    writer,
+                              const uint8_t* prefix)
+{
+	if (writer->bprefix) {
+		free(writer->bprefix);
+		writer->bprefix_len = 0;
+		writer->bprefix     = NULL;
+	}
+	if (prefix) {
+		writer->bprefix_len = strlen((const char*)prefix);
+		writer->bprefix     = malloc(writer->bprefix_len + 1);
+		memcpy(writer->bprefix, prefix, writer->bprefix_len + 1);
+	}
+}
+
 SERD_API
 SerdStatus
 serd_writer_set_base_uri(SerdWriter*     writer,
@@ -512,5 +541,6 @@ serd_writer_free(SerdWriter* writer)
 	SerdWriter* const me = (SerdWriter*)writer;
 	serd_writer_finish(me);
 	serd_stack_free(&writer->anon_stack);
+	free(writer->bprefix);
 	free(me);
 }
-- 
cgit v1.2.1