diff options
-rw-r--r-- | src/env.c | 8 | ||||
-rw-r--r-- | src/node.c | 8 | ||||
-rw-r--r-- | src/reader.c | 96 | ||||
-rw-r--r-- | src/serdi.c | 2 | ||||
-rw-r--r-- | src/writer.c | 33 | ||||
-rw-r--r-- | tests/bad-blank.ttl | 3 | ||||
-rw-r--r-- | tests/bad-eof-in-escape.ttl | 3 | ||||
-rw-r--r-- | tests/bad-eof-in-triple-quote.ttl | 3 | ||||
-rw-r--r-- | tests/bad-escape.ttl | 2 | ||||
-rw-r--r-- | tests/bad-hex-escape.ttl | 1 | ||||
-rw-r--r-- | tests/bad-prefix.ttl | 1 | ||||
-rw-r--r-- | tests/bad-uri-escape.ttl | 1 | ||||
-rw-r--r-- | tests/serd_test.c | 173 | ||||
-rw-r--r-- | tests/test-bad-utf8.out | 1 | ||||
-rw-r--r-- | tests/test-bad-utf8.ttl | 1 | ||||
-rw-r--r-- | tests/test-escapes.out | 2 | ||||
-rw-r--r-- | tests/test-escapes.ttl | 2 | ||||
-rw-r--r-- | tests/test-semi-dot.out | 1 | ||||
-rw-r--r-- | tests/test-semi-dot.ttl | 1 |
19 files changed, 255 insertions, 87 deletions
@@ -132,7 +132,9 @@ serd_env_set_prefix(SerdEnv* env, const SerdNode* name, const SerdNode* uri_node) { - if (serd_uri_string_has_scheme(uri_node->buf)) { + if (!name->buf || !uri_node->buf || uri_node->type != SERD_URI) { + return SERD_ERR_BAD_ARG; + } else if (serd_uri_string_has_scheme(uri_node->buf)) { // Set prefix to absolute URI serd_env_add(env, name, uri_node); } else { @@ -141,10 +143,6 @@ serd_env_set_prefix(SerdEnv* env, SerdNode abs_uri_node = serd_node_new_uri_from_node( uri_node, &env->base_uri, &abs_uri); - if (!abs_uri_node.buf) { - return SERD_ERR_BAD_ARG; - } - // Set prefix to resolved (absolute) URI serd_env_add(env, name, &abs_uri_node); serd_node_free(&abs_uri_node); @@ -134,11 +134,7 @@ serd_node_new_uri(const SerdURI* uri, const SerdURI* base, SerdURI* out) node.n_bytes = actual_len; node.n_chars = actual_len; - // FIXME: double parse - if (serd_uri_parse(buf, out)) { - fprintf(stderr, "Failed to parse URI <%s>\n", buf); - return SERD_NODE_NULL; - } + serd_uri_parse(buf, out); // TODO: cleverly avoid double parse return node; } @@ -201,7 +197,7 @@ serd_node_new_integer(long i) { long abs_i = labs(i); const long digits = (long)fmax(1.0, ceil(log10((double)abs_i + 1))); - char* buf = calloc(digits + 1, 1); + char* buf = calloc(digits + 2, 1); SerdNode node = { (const uint8_t*)buf, 0, 0, 0, SERD_LITERAL }; // Point s to the end diff --git a/src/reader.c b/src/reader.c index f99d5741..fbc7a711 100644 --- a/src/reader.c +++ b/src/reader.c @@ -490,26 +490,6 @@ read_character(SerdReader* reader, Ref dest) } } -// [39] echaracter ::= character | '\t' | '\n' | '\r' -static inline SerdStatus -read_echaracter(SerdReader* reader, Ref dest) -{ - SerdNodeFlags flags = 0; - uint8_t c = peek_byte(reader); - switch (c) { - case '\\': - eat_byte_safe(reader, '\\'); - if (read_echaracter_escape(reader, peek_byte(reader), &flags)) { - return SERD_SUCCESS; - } else { - error(reader, "illegal escape `\\%c'\n", peek_byte(reader)); - return SERD_ERR_BAD_SYNTAX; - } - default: - return read_character(reader, dest); - } -} - // [43] lcharacter ::= echaracter | '\"' | #x9 | #xA | #xD static inline SerdStatus read_lcharacter(SerdReader* reader, Ref dest, SerdNodeFlags* flags) @@ -544,7 +524,7 @@ read_lcharacter(SerdReader* reader, Ref dest, SerdNodeFlags* flags) push_byte(reader, dest, eat_byte_safe(reader, c)); return SERD_SUCCESS; default: - return read_echaracter(reader, dest); + return read_character(reader, dest); } } @@ -581,7 +561,8 @@ read_ucharacter(SerdReader* reader, Ref dest) if (read_ucharacter_escape(reader, dest)) { return SERD_SUCCESS; } else { - return error(reader, "illegal escape `\\%c'\n", peek_byte(reader)); + error(reader, "illegal escape `\\%c'\n", peek_byte(reader)); + return SERD_FAILURE; } case '>': return SERD_FAILURE; @@ -703,17 +684,13 @@ read_relativeURI(SerdReader* reader) // | [#x037F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] // | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF] static inline uchar -read_nameStartChar(SerdReader* reader, bool required) +read_nameStartChar(SerdReader* reader) { const uint8_t c = peek_byte(reader); if (c == '_' || is_alpha(c)) { // TODO: not strictly correct return eat_byte_safe(reader, c); - } else { - if (required) { - error(reader, "illegal character `%c'\n", c); - } - return 0; } + return 0; } // [31] nameChar ::= nameStartChar | '-' | [0-9] @@ -721,7 +698,7 @@ read_nameStartChar(SerdReader* reader, bool required) static inline uchar read_nameChar(SerdReader* reader) { - uchar c = read_nameStartChar(reader, false); + uchar c = read_nameStartChar(reader); if (c) return c; @@ -745,12 +722,12 @@ read_prefixName(SerdReader* reader, Ref dest) pop_node(reader, dest); return 0; } - TRY_RET(c = read_nameStartChar(reader, false)); + TRY_RET(c = read_nameStartChar(reader)); if (!dest) { dest = push_node(reader, SERD_CURIE, "", 0); } push_byte(reader, dest, c); - while ((c = read_nameChar(reader)) != 0) { + while ((c = read_nameChar(reader))) { push_byte(reader, dest, c); } return dest; @@ -760,7 +737,7 @@ read_prefixName(SerdReader* reader, Ref dest) static Ref read_name(SerdReader* reader, Ref dest, bool required) { - uchar c = read_nameStartChar(reader, required); + uchar c = read_nameStartChar(reader); if (!c) { if (required) { error(reader, "illegal character at start of name\n"); @@ -921,36 +898,26 @@ read_resource(SerdReader* reader, Ref* dest) return *dest != 0; } -// [14] literal ::= quotedString ( '@' language )? | datatypeString -// | integer | double | decimal | boolean static bool read_literal(SerdReader* reader, Ref* dest, Ref* datatype, Ref* lang, SerdNodeFlags* flags) { - Ref str = 0; - const uint8_t c = peek_byte(reader); - if (c == '-' || c == '+' || c == '.' || is_digit(c)) { - return read_number(reader, dest, datatype); - } else if (c == '\"') { - str = read_quotedString(reader, flags); - if (!str) { - return false; - } + Ref str = read_quotedString(reader, flags); + if (!str) { + return false; + } - switch (peek_byte(reader)) { - case '^': - eat_byte_safe(reader, '^'); - eat_byte_check(reader, '^'); - TRY_THROW(read_resource(reader, datatype)); - break; - case '@': - eat_byte_safe(reader, '@'); - TRY_THROW(*lang = read_language(reader)); - } - *dest = str; - } else { - return error(reader, "unknown literal type\n"); + switch (peek_byte(reader)) { + case '^': + eat_byte_safe(reader, '^'); + eat_byte_check(reader, '^'); + TRY_THROW(read_resource(reader, datatype)); + break; + case '@': + eat_byte_safe(reader, '@'); + TRY_THROW(*lang = read_language(reader)); } + *dest = str; return true; except: pop_node(reader, str); @@ -1126,9 +1093,11 @@ read_object(SerdReader* reader, ReadContext ctx) case '<': case ':': TRY_THROW(ret = read_resource(reader, &o)); break; - case '\"': case '+': case '-': - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': case '.': + case '+': case '-': case '.': case '0': case '1': case '2': case '3': + case '4': case '5': case '6': case '7': case '8': case '9': + TRY_THROW(ret = read_number(reader, &o, &datatype)); + break; + case '\"': TRY_THROW(ret = read_literal(reader, &o, &datatype, &lang, &flags)); break; default: @@ -1470,16 +1439,15 @@ serd_reader_get_handle(const SerdReader* reader) { return reader->handle; } + SERD_API void serd_reader_add_blank_prefix(SerdReader* reader, const uint8_t* prefix) { - if (reader->bprefix) { - free(reader->bprefix); - reader->bprefix_len = 0; - reader->bprefix = NULL; - } + free(reader->bprefix); + reader->bprefix_len = 0; + reader->bprefix = NULL; if (prefix) { reader->bprefix_len = strlen((const char*)prefix); reader->bprefix = malloc(reader->bprefix_len + 1); diff --git a/src/serdi.c b/src/serdi.c index 3fbc9165..e69b226d 100644 --- a/src/serdi.c +++ b/src/serdi.c @@ -149,7 +149,7 @@ main(int argc, char** argv) in_name = in_name ? in_name : input; if (!in_fd) { input = serd_uri_to_path(in_name); - if (!(in_fd = serd_fopen((const char*)input, "r"))) { + if (!input || !(in_fd = serd_fopen((const char*)input, "r"))) { return 1; } } diff --git a/src/writer.c b/src/writer.c index 96f8d1e4..c388188e 100644 --- a/src/writer.c +++ b/src/writer.c @@ -163,6 +163,8 @@ write_text(SerdWriter* writer, TextContext ctx, c = in & 0x07; } else { fprintf(stderr, "Invalid UTF-8 at offset %zu: %X\n", i, in); + const uint8_t replacement_char[] = { 0xEF, 0xBF, 0xBD }; + sink(replacement_char, sizeof(replacement_char), writer); return false; } @@ -249,8 +251,6 @@ write_node(SerdWriter* writer, SerdChunk uri_prefix; SerdChunk uri_suffix; switch (node->type) { - case SERD_NOTHING: - return false; case SERD_BLANK: if (writer->syntax != SERD_NTRIPLES && ((field == FIELD_SUBJECT && (flags & SERD_ANON_S_BEGIN)) @@ -350,11 +350,23 @@ write_node(SerdWriter* writer, sink("<", 1, writer); write_text(writer, WRITE_URI, node->buf, node->n_bytes, '>'); sink(">", 1, writer); - return true; + default: + break; } return true; } +static inline bool +is_resource(const SerdNode* node) +{ + switch (node->type) { + case SERD_URI: case SERD_CURIE: case SERD_BLANK: + return true; + default: + return false; + } +} + SERD_API SerdStatus serd_writer_write_statement(SerdWriter* writer, @@ -366,7 +378,12 @@ serd_writer_write_statement(SerdWriter* writer, const SerdNode* object_datatype, const SerdNode* object_lang) { - assert(subject && predicate && object); + if (!subject || !predicate || !object + || !subject->buf || !predicate->buf || !object->buf + || !is_resource(subject) || !is_resource(predicate)) { + return SERD_ERR_BAD_ARG; + } + switch (writer->syntax) { case SERD_NTRIPLES: write_node(writer, subject, NULL, NULL, FIELD_SUBJECT, flags); @@ -536,11 +553,9 @@ void serd_writer_chop_blank_prefix(SerdWriter* writer, const uint8_t* prefix) { - if (writer->bprefix) { - free(writer->bprefix); - writer->bprefix_len = 0; - writer->bprefix = NULL; - } + free(writer->bprefix); + writer->bprefix_len = 0; + writer->bprefix = NULL; if (prefix) { writer->bprefix_len = strlen((const char*)prefix); writer->bprefix = malloc(writer->bprefix_len + 1); diff --git a/tests/bad-blank.ttl b/tests/bad-blank.ttl new file mode 100644 index 00000000..a6543f2d --- /dev/null +++ b/tests/bad-blank.ttl @@ -0,0 +1,3 @@ +@prefix eg: <http://example.org/> . + +_:.bad a eg:Thing .
\ No newline at end of file diff --git a/tests/bad-eof-in-escape.ttl b/tests/bad-eof-in-escape.ttl new file mode 100644 index 00000000..d60acd15 --- /dev/null +++ b/tests/bad-eof-in-escape.ttl @@ -0,0 +1,3 @@ +@prefix eg: <http://example.org> . + +<> eg:comment """Hello""
\ No newline at end of file diff --git a/tests/bad-eof-in-triple-quote.ttl b/tests/bad-eof-in-triple-quote.ttl new file mode 100644 index 00000000..d60acd15 --- /dev/null +++ b/tests/bad-eof-in-triple-quote.ttl @@ -0,0 +1,3 @@ +@prefix eg: <http://example.org> . + +<> eg:comment """Hello""
\ No newline at end of file diff --git a/tests/bad-escape.ttl b/tests/bad-escape.ttl index 0fd6c5bf..c03f395f 100644 --- a/tests/bad-escape.ttl +++ b/tests/bad-escape.ttl @@ -1 +1 @@ -<http://example.org/thing> <http://example.org/comment> "\!" . +<http://example.org/thing> <http://example.org/comment> """\!""" . diff --git a/tests/bad-hex-escape.ttl b/tests/bad-hex-escape.ttl new file mode 100644 index 00000000..ba6ff5b9 --- /dev/null +++ b/tests/bad-hex-escape.ttl @@ -0,0 +1 @@ +<http://example.org/thing> <http://example.org/comment> "\UFFFFFFFF" . diff --git a/tests/bad-prefix.ttl b/tests/bad-prefix.ttl new file mode 100644 index 00000000..6c286355 --- /dev/null +++ b/tests/bad-prefix.ttl @@ -0,0 +1 @@ +@prefix _invalid <http://example.org/> . diff --git a/tests/bad-uri-escape.ttl b/tests/bad-uri-escape.ttl new file mode 100644 index 00000000..16c63754 --- /dev/null +++ b/tests/bad-uri-escape.ttl @@ -0,0 +1 @@ +<http://example.org/thing> <http://example.org/comment> <http://example.org/\!> . diff --git a/tests/serd_test.c b/tests/serd_test.c index c5c650da..aca88127 100644 --- a/tests/serd_test.c +++ b/tests/serd_test.c @@ -16,11 +16,14 @@ #include <float.h> #include <math.h> +#include <stdio.h> #include <stdlib.h> #include <string.h> #include "serd/serd.h" +#define USTR(s) ((const uint8_t*)(s)) + static bool test_strtod(double dbl, double max_delta) { @@ -39,6 +42,27 @@ test_strtod(double dbl, double max_delta) return true; } +static SerdStatus +count_prefixes(void* handle, const SerdNode* name, const SerdNode* uri) +{ + ++*(int*)handle; + return SERD_SUCCESS; +} + +static SerdStatus +count_statements(void* handle, + SerdStatementFlags flags, + const SerdNode* graph, + const SerdNode* subject, + const SerdNode* predicate, + const SerdNode* object, + const SerdNode* object_datatype, + const SerdNode* object_lang) +{ + ++*(int*)handle; + return SERD_SUCCESS; +} + int main() { @@ -186,7 +210,154 @@ main() node.buf, node.n_bytes, node.n_chars, node.flags, node.type); return 1; } - + + // Test SerdEnv + + SerdNode u = serd_node_from_string(SERD_URI, USTR("http://example.org/foo")); + SerdNode b = serd_node_from_string(SERD_CURIE, USTR("invalid")); + SerdNode c = serd_node_from_string(SERD_CURIE, USTR("eg:b")); + SerdEnv* env = serd_env_new(NULL); + serd_env_set_prefix_from_strings(env, USTR("eg"), USTR("http://example.org/")); + + SerdChunk prefix, suffix; + if (!serd_env_expand(env, &b, &prefix, &suffix)) { + fprintf(stderr, "Expanded invalid curie %s\n", b.buf); + return 1; + } + + SerdNode xu = serd_env_expand_node(env, &u); + if (strcmp((const char*)xu.buf, "http://example.org/foo")) { + fprintf(stderr, "Expanded %s to %s\n", c.buf, xu.buf); + return 1; + } + serd_node_free(&xu); + + SerdNode xc = serd_env_expand_node(env, &c); + if (strcmp((const char*)xc.buf, "http://example.org/b")) { + fprintf(stderr, "Expanded %s to %s\n", c.buf, xc.buf); + return 1; + } + serd_node_free(&xc); + + if (!serd_env_set_prefix(env, &SERD_NODE_NULL, &SERD_NODE_NULL)) { + fprintf(stderr, "Set NULL prefix\n"); + return 1; + } + + const SerdNode lit = serd_node_from_string(SERD_LITERAL, USTR("hello")); + if (!serd_env_set_prefix(env, &b, &lit)) { + fprintf(stderr, "Set prefix to literal\n"); + return 1; + } + + int n_prefixes = 0; + serd_env_set_prefix_from_strings(env, USTR("eg"), USTR("http://example.org/")); + serd_env_foreach(env, count_prefixes, &n_prefixes); + if (n_prefixes != 1) { + fprintf(stderr, "Bad prefix count %d\n", n_prefixes); + return 1; + } + + // Test SerdReader and SerdWriter + + const char* path = tmpnam(NULL); + FILE* fd = fopen(path, "w"); + if (!fd) { + fprintf(stderr, "Failed to open file %s\n", path); + return 1; + } + + int* n_statements = malloc(sizeof(int)); + *n_statements = 0; + + SerdWriter* writer = serd_writer_new( + SERD_TURTLE, 0, env, NULL, serd_file_sink, fd); + if (!writer) { + fprintf(stderr, "Failed to create writer\n"); + return 1; + } + + if (!serd_writer_end_anon(writer, NULL)) { + fprintf(stderr, "Ended non-existent anonymous node\n"); + return 1; + } + + uint8_t buf[] = { 0x80, 0, 0, 0, 0 }; + SerdNode s = serd_node_from_string(SERD_URI, USTR("")); + SerdNode p = serd_node_from_string(SERD_URI, USTR("http://example.org/pred")); + SerdNode o = serd_node_from_string(SERD_LITERAL, buf); + + // Write 3 invalid statements (should write nothing) + if (!serd_writer_write_statement(writer, 0, NULL, + &s, &p, NULL, NULL, NULL)) { + fprintf(stderr, "Successfully wrote junk statement 1\n"); + return 1; + } + if (!serd_writer_write_statement(writer, 0, NULL, + &s, &p, &SERD_NODE_NULL, NULL, NULL)) { + fprintf(stderr, "Successfully wrote junk statement 1\n"); + return 1; + } + if (!serd_writer_write_statement(writer, 0, NULL, + &s, &o, &o, NULL, NULL)) { + fprintf(stderr, "Successfully wrote junk statement 3\n"); + return 1; + } + + // Write 1 statement with bad UTF-8 (should be replaced) + if (serd_writer_write_statement(writer, 0, NULL, + &s, &p, &o, NULL, NULL)) { + fprintf(stderr, "Failed to write junk UTF-8\n"); + return 1; + } + + // Write 1 valid statement + o = serd_node_from_string(SERD_LITERAL, USTR("hello")); + if (serd_writer_write_statement(writer, 0, NULL, + &s, &p, &o, NULL, NULL)) { + fprintf(stderr, "Failed to write valid statement\n"); + return 1; + } + + serd_writer_free(writer); + fseek(fd, 0, SEEK_SET); + + SerdReader* reader = serd_reader_new( + SERD_TURTLE, n_statements, free, + NULL, NULL, count_statements, NULL); + if (!reader) { + fprintf(stderr, "Failed to create reader\n"); + return 1; + } + if (serd_reader_get_handle(reader) != n_statements) { + fprintf(stderr, "Corrupt reader handle\n"); + return 1; + } + + if (!serd_reader_read_file(reader, USTR("http://notafile"))) { + fprintf(stderr, "Apparently read an http URI\n"); + return 1; + } + if (!serd_reader_read_file(reader, USTR("file:///better/not/exist"))) { + fprintf(stderr, "Apprently read a non-existent file\n"); + return 1; + } + SerdStatus st = serd_reader_read_file(reader, USTR(path)); + if (st) { + fprintf(stderr, "Error reading file (%s)\n", serd_strerror(st)); + return 1; + } + + if (*n_statements != 2) { + fprintf(stderr, "Bad statement count %d\n", *n_statements); + return 1; + } + + serd_reader_free(reader); + fclose(fd); + + serd_env_free(env); + printf("Success\n"); return 0; } diff --git a/tests/test-bad-utf8.out b/tests/test-bad-utf8.out index df2ada0d..6d73993d 100644 --- a/tests/test-bad-utf8.out +++ b/tests/test-bad-utf8.out @@ -1,2 +1,3 @@ <http://example.org/thing> <http://example.org/comment> "Impossible bytes: \uFFFD \uFFFD" . <http://example.org/thing> <http://example.org/comment> "2 continuation bytes: \uFFFD" . +<http://example.org/thing> <http://example.org/comment> "Missing continuation: \uFFFD" . diff --git a/tests/test-bad-utf8.ttl b/tests/test-bad-utf8.ttl index e3c9ae81..2c105f5b 100644 --- a/tests/test-bad-utf8.ttl +++ b/tests/test-bad-utf8.ttl @@ -1,2 +1,3 @@ <http://example.org/thing> <http://example.org/comment> "Impossible bytes: � �" . <http://example.org/thing> <http://example.org/comment> "2 continuation bytes: ��" . +<http://example.org/thing> <http://example.org/comment> "Missing continuation: �" . diff --git a/tests/test-escapes.out b/tests/test-escapes.out new file mode 100644 index 00000000..725b832c --- /dev/null +++ b/tests/test-escapes.out @@ -0,0 +1,2 @@ +<http://example.org/thing> <http://example.org/comment> "\\\r\n\t" . +<http://example.org/thing> <http://example.org/comment> <http://example.org/\u003E> . diff --git a/tests/test-escapes.ttl b/tests/test-escapes.ttl new file mode 100644 index 00000000..ff306b15 --- /dev/null +++ b/tests/test-escapes.ttl @@ -0,0 +1,2 @@ +<http://example.org/thing> <http://example.org/comment> "\\\r\n\t" . +<http://example.org/thing> <http://example.org/comment> <http://example.org/\>> .
\ No newline at end of file diff --git a/tests/test-semi-dot.out b/tests/test-semi-dot.out new file mode 100644 index 00000000..aea1655b --- /dev/null +++ b/tests/test-semi-dot.out @@ -0,0 +1 @@ +<http://example.org/thing> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/Thing> . diff --git a/tests/test-semi-dot.ttl b/tests/test-semi-dot.ttl new file mode 100644 index 00000000..6d4b4146 --- /dev/null +++ b/tests/test-semi-dot.ttl @@ -0,0 +1 @@ +<http://example.org/thing> a <http://example.org/Thing> ; .
\ No newline at end of file |