/*
  Copyright 2011-2020 David Robillard <http://drobilla.net>

  Permission to use, copy, modify, and/or distribute this software for any
  purpose with or without fee is hereby granted, provided that the above
  copyright notice and this permission notice appear in all copies.

  THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/

#define _XOPEN_SOURCE 600 /* for realpath */

#include "system.h"

#include "serd/serd.h"

#ifdef _WIN32
#define WIN32_LEAN_AND_MEAN 1
#include <fcntl.h>
#include <io.h>
#include <windows.h>
#endif

#include <limits.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define SERDI_ERROR(msg)       fprintf(stderr, "serdi: " msg)
#define SERDI_ERRORF(fmt, ...) fprintf(stderr, "serdi: " fmt, __VA_ARGS__)

typedef struct {
	SerdNode* s;
	SerdNode* p;
	SerdNode* o;
	SerdNode* g;
} FilterPattern;

static char*
serd_realpath(const char* const path)
{
	if (!path) {
		return NULL;
	}

#ifdef _WIN32
	const DWORD size = GetFullPathName(path, 0, NULL, NULL);
	if (size == 0) {
		return NULL;
	}

	char* const out = (char*)calloc(size, 1);
	const DWORD ret = GetFullPathName(path, MAX_PATH, out, NULL);
	if (ret == 0 || ret >= size) {
		free(out);
		return NULL;
	}

	return out;
#else
	return realpath(path, NULL);
#endif
}

static int
print_version(void)
{
	printf("serdi " SERD_VERSION " <http://drobilla.net/software/serd>\n");
	printf("Copyright 2011-2020 David Robillard <http://drobilla.net>.\n"
	       "License: <http://www.opensource.org/licenses/isc>\n"
	       "This is free software; you are free to change and redistribute it."
	       "\nThere is NO WARRANTY, to the extent permitted by law.\n");
	return 0;
}

static int
print_usage(const char* name, bool error)
{
	FILE* const os = error ? stderr : stdout;
	fprintf(os, "%s", error ? "\n" : "");
	fprintf(os, "Usage: %s [OPTION]... INPUT...\n", name);
	fprintf(os, "Read and write RDF syntax.\n");
	fprintf(os, "Use - for INPUT to read from standard input.\n\n");
	fprintf(os, "  -I BASE_URI  Input base URI.\n");
	fprintf(os, "  -V           Validate inputs.\n");
	fprintf(os, "  -a           Write ASCII output if possible.\n");
	fprintf(os, "  -b           Fast bulk output for large serialisations.\n");
	fprintf(os, "  -c PREFIX    Chop PREFIX from matching blank node IDs.\n");
	fprintf(os, "  -e           Eat input one character at a time.\n");
	fprintf(os, "  -f           Keep full URIs in input (don't qualify).\n");
	fprintf(os, "  -g PATTERN   Grep statements matching PATTERN.\n");
	fprintf(os, "  -h           Display this help and exit.\n");
	fprintf(os, "  -i SYNTAX    Input syntax: turtle/ntriples/trig/nquads.\n");
	fprintf(os, "  -k BYTES     Parser stack size.\n");
	fprintf(os, "  -l           Lax (non-strict) parsing.\n");
	fprintf(os, "  -m           Build and serialise a model (no streaming).\n");
	fprintf(os, "  -n           Normalise literals.\n");
	fprintf(os, "  -o SYNTAX    Output syntax: turtle/ntriples/nquads.\n");
	fprintf(os, "  -p PREFIX    Add PREFIX to blank node IDs.\n");
	fprintf(os, "  -q           Suppress all output except data.\n");
	fprintf(os, "  -r ROOT_URI  Keep relative URIs within ROOT_URI.\n");
	fprintf(os, "  -S           Stream model quickly without inlining.\n");
	fprintf(os, "  -s INPUT     Parse INPUT as string.\n");
	fprintf(os, "  -t           Write terser output without newlines.\n");
	fprintf(os, "  -v           Display version information and exit.\n");
	fprintf(os, "  -x           Support parsing variable nodes like `?x'.\n");
	return error ? 1 : 0;
}

static int
missing_arg(const char* name, char opt)
{
	SERDI_ERRORF("option requires an argument -- '%c'\n", opt);
	return print_usage(name, true);
}

static SerdStatus
on_filter_event(void* handle, const SerdEvent* event)
{
	FilterPattern* pat = (FilterPattern*)handle;
	if (event->type != SERD_STATEMENT) {
		fprintf(stderr, "other thing\n");
		return SERD_SUCCESS;
	} else if (pat->s) {
		fprintf(stderr, "err1\n");
		return SERD_ERR_INVALID;
	}

	const SerdStatement* const statement = event->statement.statement;
	pat->s = serd_node_copy(serd_statement_subject(statement));
	pat->p = serd_node_copy(serd_statement_predicate(statement));
	pat->o = serd_node_copy(serd_statement_object(statement));
	pat->g = serd_node_copy(serd_statement_graph(statement));

	return SERD_SUCCESS;
}

static SerdSink*
parse_filter(SerdWorld* world, const SerdSink* sink, const char* str)
{
	FilterPattern   pat         = {NULL, NULL, NULL, NULL};
	SerdSink*       in_sink     = serd_sink_new(&pat, NULL);
	SerdByteSource* byte_source = serd_byte_source_new_string(str, NULL);
	SerdReader*     reader =
	    serd_reader_new(world, SERD_NQUADS, SERD_READ_VARIABLES, in_sink, 4096);

	serd_sink_set_event_func(in_sink, on_filter_event);

	SerdStatus st = serd_reader_start(reader, byte_source);
	if (!st) {
		st = serd_reader_read_document(reader);
	}

	serd_reader_free(reader);
	serd_byte_source_free(byte_source);
	serd_sink_free(in_sink);

	if (st) {
		return NULL;
	}

	SerdSink* filter = serd_filter_new(sink, pat.s, pat.p, pat.o, pat.g);
	serd_node_free(pat.s);
	serd_node_free(pat.p);
	serd_node_free(pat.o);
	serd_node_free(pat.g);
	return filter;
}

static SerdStatus
read_file(SerdWorld* const      world,
          SerdSyntax            syntax,
          const SerdReaderFlags flags,
          const SerdSink* const sink,
          const size_t          stack_size,
          const char*           filename,
          const char*           add_prefix,
          bool                  bulk_read)
{
	syntax = syntax ? syntax : serd_guess_syntax(filename);
	syntax = syntax ? syntax : SERD_TRIG;

	SerdByteSource* byte_source = NULL;
	if (!strcmp(filename, "-")) {
		SerdNode* name = serd_new_string("stdin");

		byte_source = serd_byte_source_new_function(
		    serd_file_read_byte, (SerdStreamErrorFunc)ferror, stdin, name, 1);

		serd_node_free(name);
	} else {
		byte_source =
		    serd_byte_source_new_filename(filename,
		                                  bulk_read ? SERD_PAGE_SIZE : 1u);
	}

	if (!byte_source) {
		SERDI_ERRORF("failed to open input file `%s'\n", filename);
		return SERD_ERR_UNKNOWN;
	}

	SerdReader* reader =
	        serd_reader_new(world, syntax, flags, sink, stack_size);

	serd_reader_add_blank_prefix(reader, add_prefix);

	SerdStatus st = serd_reader_start(reader, byte_source);

	st = st ? st : serd_reader_read_document(reader);

	serd_reader_free(reader);
	serd_byte_source_free(byte_source);

	return st;
}

int
main(int argc, char** argv)
{
	if (argc < 2) {
		return print_usage(argv[0], true);
	}

	SerdNode*       base          = NULL;
	SerdSyntax      input_syntax  = SERD_SYNTAX_EMPTY;
	SerdSyntax      output_syntax = SERD_SYNTAX_EMPTY;
	SerdReaderFlags reader_flags  = 0;
	SerdWriterFlags writer_flags  = 0;
	bool            bulk_read     = true;
	bool            bulk_write    = false;
	bool            no_inline     = false;
	bool            osyntax_set   = false;
	bool            validate      = false;
	bool            use_model     = false;
	bool            normalise     = false;
	bool            quiet         = false;
	size_t          stack_size    = 4194304;
	const char*     input_string  = NULL;
	const char*     pattern       = NULL;
	const char*     add_prefix    = "";
	const char*     chop_prefix   = NULL;
	const char*     root_uri      = NULL;
	const char*     out_filename  = NULL;
	int             a             = 1;
	for (; a < argc && argv[a][0] == '-'; ++a) {
		if (argv[a][1] == '\0') {
			break;
		} else if (argv[a][1] == 'I') {
			if (++a == argc) {
				return missing_arg(argv[0], 'I');
			}
			base = serd_new_uri(argv[a]);
		} else if (argv[a][1] == 'V') {
			validate = use_model = true;
		} else if (argv[a][1] == 'a') {
			writer_flags |= SERD_WRITE_ASCII;
		} else if (argv[a][1] == 'b') {
			bulk_write = true;
		} else if (argv[a][1] == 'e') {
			bulk_read = false;
		} else if (argv[a][1] == 'f') {
			writer_flags |= (SERD_WRITE_UNQUALIFIED | SERD_WRITE_UNRESOLVED);
		} else if (argv[a][1] == 'h') {
			return print_usage(argv[0], false);
		} else if (argv[a][1] == 'l') {
			reader_flags |= SERD_READ_LAX;
			writer_flags |= SERD_WRITE_LAX;
		} else if (argv[a][1] == 'm') {
			use_model = true;
		} else if (argv[a][1] == 'n') {
			normalise = true;
		} else if (argv[a][1] == 'g') {
			if (++a == argc) {
				return missing_arg(argv[0], 'g');
			}
			pattern = argv[a];
		} else if (argv[a][1] == 'q') {
			quiet = true;
		} else if (argv[a][1] == 'v') {
			return print_version();
		} else if (argv[a][1] == 'S') {
			no_inline = true;
		} else if (argv[a][1] == 's') {
			if (++a == argc) {
				return missing_arg(argv[0], 's');
			}
			input_string = argv[a];
		} else if (argv[a][1] == 't') {
			writer_flags |= SERD_WRITE_TERSE;
		} else if (argv[a][1] == 'i') {
			if (++a == argc) {
				return missing_arg(argv[0], 'i');
			} else if (!(input_syntax = serd_syntax_by_name(argv[a]))) {
				return print_usage(argv[0], true);
			}
		} else if (argv[a][1] == 'k') {
			if (++a == argc) {
				return missing_arg(argv[0], 'k');
			}
			char*      endptr = NULL;
			const long size   = strtol(argv[a], &endptr, 10);
			if (size <= 0 || size == LONG_MAX || *endptr != '\0') {
				SERDI_ERRORF("invalid stack size `%s'\n", argv[a]);
				return 1;
			}
			stack_size = (size_t)size;
		} else if (argv[a][1] == 'o') {
			osyntax_set = true;
			if (++a == argc) {
				return missing_arg(argv[0], 'o');
			} else if (!strcmp(argv[a], "empty")) {
				output_syntax = SERD_SYNTAX_EMPTY;
			} else if (!(output_syntax = serd_syntax_by_name(argv[a]))) {
				return print_usage(argv[0], true);
			}
		} else if (argv[a][1] == 'p') {
			if (++a == argc) {
				return missing_arg(argv[0], 'p');
			}
			add_prefix = argv[a];
		} else if (argv[a][1] == 'c') {
			if (++a == argc) {
				return missing_arg(argv[0], 'c');
			}
			chop_prefix = argv[a];
		} else if (argv[a][1] == 'r') {
			if (++a == argc) {
				return missing_arg(argv[0], 'r');
			}
			root_uri = argv[a];
		} else if (argv[a][1] == 'w') {
			if (++a == argc) {
				return missing_arg(argv[0], 'w');
			}
			out_filename = argv[a];
		} else if (argv[a][1] == 'x') {
			reader_flags |= SERD_READ_VARIABLES;
		} else {
			SERDI_ERRORF("invalid option -- '%s'\n", argv[a] + 1);
			return print_usage(argv[0], true);
		}
	}

	if (a == argc && !input_string) {
		SERDI_ERROR("missing input\n");
		return 1;
	}

	char** inputs   = argv + a;
	int    n_inputs = argc - a;

	bool input_has_graphs = serd_syntax_has_graphs(input_syntax);
	for (int i = a; i < argc; ++i) {
		if (serd_syntax_has_graphs(serd_guess_syntax(argv[i]))) {
			input_has_graphs = true;
			break;
		}
	}

	if (!output_syntax && !osyntax_set) {
		output_syntax = input_has_graphs ? SERD_NQUADS : SERD_NTRIPLES;
	}

	if (!base && n_inputs == 1 &&
	    (output_syntax == SERD_NQUADS || output_syntax == SERD_NTRIPLES)) {
		// Choose base URI from the single input path
		char* const abs_path = serd_realpath(inputs[0]);
		if (abs_path) {
			base = serd_new_file_uri(abs_path, NULL);
			free(abs_path);
		}
	}

	SerdWorld* world  = serd_world_new();
	SerdEnv*   env    = serd_env_new(base);

#ifdef _WIN32
	_setmode(_fileno(stdin), _O_BINARY);
	if (!out_filename) {
		_setmode(_fileno(stdout), _O_BINARY);
	}
#endif

	const SerdSerialisationFlags serialisation_flags =
		no_inline ? SERD_NO_INLINE_OBJECTS : 0u;

	const size_t  block_size = bulk_write ? 4096u : 1u;
	SerdByteSink* byte_sink =
	    out_filename ? serd_byte_sink_new_filename(out_filename, block_size)
	                 : serd_byte_sink_new_function((SerdWriteFunc)fwrite,
	                                               stdout,
	                                               block_size);

	if (!byte_sink) {
		perror("serdi: error opening output file");
		return 1;
	}

	SerdWriter* writer = serd_writer_new(world,
	                                     output_syntax,
	                                     writer_flags,
	                                     env,
	                                     byte_sink);

	SerdModel*      model    = NULL;
	SerdSink*       inserter = NULL;
	const SerdSink* out_sink = NULL;
	if (use_model) {
		const SerdModelFlags flags =
		        SERD_INDEX_SPO | (input_has_graphs ? SERD_INDEX_GRAPHS : 0u) |
		        (no_inline ? 0u : SERD_INDEX_OPS) |
		        (validate ? SERD_STORE_CURSORS : 0u);

		model    = serd_model_new(world, flags);
		inserter = serd_inserter_new(model, env, NULL);
		out_sink = inserter;
	} else {
		out_sink = serd_writer_get_sink(writer);
	}

	const SerdSink* sink = out_sink;

	SerdSink* normaliser = NULL;
	if (normalise) {
		sink = normaliser = serd_normaliser_new(out_sink, env);
	}

	SerdSink* filter = NULL;
	if (pattern) {
		filter = parse_filter(world, sink, pattern);
		sink   = filter;
	}

	if (quiet) {
		serd_world_set_log_func(world, serd_quiet_error_func, NULL);
	}

	SerdNode* root = serd_new_uri(root_uri);
	serd_writer_set_root_uri(writer, root);
	serd_writer_chop_blank_prefix(writer, chop_prefix);
	serd_node_free(root);

	SerdStatus st         = SERD_SUCCESS;
	SerdNode*  input_name = NULL;
	if (input_string) {
		SerdByteSource* byte_source =
		    serd_byte_source_new_string(input_string, NULL);

		SerdReader* reader =
		        serd_reader_new(world,
		                        input_syntax ? input_syntax : SERD_TRIG,
		                        reader_flags,
		                        sink,
		                        stack_size);

		serd_reader_add_blank_prefix(reader, add_prefix);

		if (!(st = serd_reader_start(reader, byte_source))) {
			st = serd_reader_read_document(reader);
		}

		serd_reader_free(reader);
	}

	size_t prefix_len = 0;
	char*  prefix     = NULL;
	if (n_inputs > 1) {
		prefix_len = 8 + strlen(add_prefix);
		prefix     = (char*)calloc(1, prefix_len);
	}

	for (int i = 0; i < n_inputs; ++i) {
		if (!base) {
			SerdNode* file_uri = serd_new_file_uri(inputs[i], NULL);
			serd_env_set_base_uri(env, file_uri);
			serd_node_free(file_uri);
		}

		if (n_inputs > 1) {
			snprintf(prefix, prefix_len, "f%d%s", i, add_prefix);
		}

		if ((st = read_file(world,
		                    input_syntax,
		                    reader_flags,
		                    sink,
		                    stack_size,
		                    inputs[i],
		                    n_inputs > 1 ? prefix : add_prefix,
		                    bulk_read))) {
			break;
		}
	}
	free(prefix);

	if (!st && validate) {
		st = serd_validate(model);
	}

	if (st <= SERD_FAILURE && use_model) {
		const SerdSink* wsink = serd_writer_get_sink(writer);
		serd_env_write_prefixes(env, wsink);

		SerdRange* range = serd_model_all(model);
		st = serd_range_serialise(range, wsink, serialisation_flags);
		serd_range_free(range);
	}

	serd_sink_free(normaliser);
	serd_sink_free(filter);
	serd_node_free(input_name);
	serd_sink_free(inserter);
	serd_model_free(model);
	serd_writer_free(writer);
	serd_env_free(env);
	serd_node_free(base);
	serd_world_free(world);

	if (serd_byte_sink_close(byte_sink) || (!out_filename && fclose(stdout))) {
		perror("serdi: write error");
		st = SERD_ERR_UNKNOWN;
	}

	serd_byte_sink_free(byte_sink);

	return (st > SERD_FAILURE) ? 1 : 0;
}