diff options
47 files changed, 1184 insertions, 780 deletions
@@ -2,10 +2,17 @@ serd (0.32.3) unstable; urgency=medium * Clean up enum declarations * Fix library current_version on MacOS + * Fix overly permissive parsing of syntax names on the command line * Fix parsing NQuads lines with no space before the final dot + * Fix rewriting special literals when datatype URIs are prefixed names + * Gracefully handle errors while writing the end of anonymous nodes + * Improve test suite coverage + * Simplify some character classification and comparison code * Support reading lone lists in lax mode + * Treat out of range unicode characters as errors + * Write blank lines between graphs and statements in TriG - -- David Robillard <d@drobilla.net> Fri, 29 Mar 2024 13:36:36 +0000 + -- David Robillard <d@drobilla.net> Tue, 25 Jun 2024 22:38:16 +0000 serd (0.32.2) stable; urgency=medium diff --git a/doc/.stylelintrc.json b/doc/.stylelintrc.json index cc4c675d..40db42c6 100644 --- a/doc/.stylelintrc.json +++ b/doc/.stylelintrc.json @@ -1,7 +1,3 @@ { - "extends": "stylelint-config-standard", - "rules": { - "color-hex-case": "upper", - "selector-list-comma-newline-after": "always-multi-line" - } + "extends": "stylelint-config-standard" } diff --git a/doc/man/mandoc.css b/doc/man/mandoc.css index 9d255992..aee69e5e 100644 --- a/doc/man/mandoc.css +++ b/doc/man/mandoc.css @@ -12,7 +12,7 @@ html { margin: 0 1.618em; - background: #FFF; + background: #fff; color: #000; } @@ -38,7 +38,12 @@ a { text-decoration: none; } -h1 a, h2 a, h3 a, h4 a, h5 a, h6 a { +h1 a, +h2 a, +h3 a, +h4 a, +h5 a, +h6 a { color: #222; } @@ -46,15 +51,27 @@ a:hover { text-decoration: underline; } -h1 a:link, h2 a:link, h3 a:link, h4 a:link, h5 a:link, h6 a:link { +h1 a:link, +h2 a:link, +h3 a:link, +h4 a:link, +h5 a:link, +h6 a:link { color: #222; } -h1 a:visited, h2 a:visited, h3 a:visited, h4 a:visited, h5 a:visited, h6 a:visited { +h1 a:visited, +h2 a:visited, +h3 a:visited, +h4 a:visited, +h5 a:visited, +h6 a:visited { color: #222; } -pre, tt, code { +pre, +tt, +code { overflow: auto; font-family: "SF Mono", Menlo, Consolas, "DejaVu Sans Mono", monospace, fixed; hyphens: none; @@ -68,7 +85,9 @@ pre, tt, code { /* stylelint-enable property-no-vendor-prefix */ } -ul, ol, dl { +ul, +ol, +dl { margin: 0; padding: 0; } @@ -88,7 +107,8 @@ dd { hyphens: auto; } -dd > ul:only-child, dd > ol:only-child { +dd > ul:only-child, +dd > ol:only-child { padding-left: 0; } @@ -121,7 +141,7 @@ dd:blank { /* Color links on screens */ @media screen { a { - color: #546E00; + color: #546e00; } } @@ -130,7 +150,13 @@ dd:blank { color: #000; } - a, h1 a, h2 a, h3 a, h4 a, h5 a, h6 a { + a, + h1 a, + h2 a, + h3 a, + h4 a, + h5 a, + h6 a { color: #000; } @@ -159,7 +185,8 @@ table.foot { width: 100%; } -td.head-rtitle, td.foot-os { +td.head-rtitle, +td.foot-os { text-align: right; } @@ -175,11 +202,14 @@ a.permalink { color: #222; } -div.Nd, div.Bf, div.Op { +div.Nd, +div.Bf, +div.Op { display: inline; } -span.Pa, span.Ad { +span.Pa, +span.Ad { font-style: italic; } @@ -195,7 +225,14 @@ table.Nm tbody tr { vertical-align: baseline; } -code.Nm, code.Fl, code.Cm, code.Ic, code.In, code.Fd, code.Fn, code.Cd { +code.Nm, +code.Fl, +code.Cm, +code.Ic, +code.In, +code.Fd, +code.Fn, +code.Cd { font-weight: bold; color: #444; } @@ -219,41 +256,63 @@ var.Ar { @media (prefers-color-scheme: dark) { html { background: #222; - color: #DDD; + color: #ddd; } a { - color: #B4C342; + color: #b4c342; } a.permalink { - color: #DDD; + color: #ddd; } - h1 a, h2 a, h3 a, h4 a, h5 a, h6 a { - color: #DDD; + h1 a, + h2 a, + h3 a, + h4 a, + h5 a, + h6 a { + color: #ddd; } - h1 a:link, h2 a:link, h3 a:link, h4 a:link, h5 a:link, h6 a:link { - color: #DDD; + h1 a:link, + h2 a:link, + h3 a:link, + h4 a:link, + h5 a:link, + h6 a:link { + color: #ddd; } - h1 a:visited, h2 a:visited, h3 a:visited, h4 a:visited, h5 a:visited, h6 a:visited { - color: #DDD; + h1 a:visited, + h2 a:visited, + h3 a:visited, + h4 a:visited, + h5 a:visited, + h6 a:visited { + color: #ddd; } /* stylelint-disable selector-class-pattern */ - code.Nm, code.Fl, code.Cm, code.Ic, code.In, code.Fd, code.Fn, code.Cd { - color: #AAA; + code.Nm, + code.Fl, + code.Cm, + code.Ic, + code.In, + code.Fd, + code.Fn, + code.Cd { + color: #aaa; } code.Ev { - color: #AAA; + color: #aaa; } code.Li { - color: #CCC; + color: #ccc; } /* stylelint-enable selector-class-pattern */ @@ -263,6 +322,6 @@ var.Ar { @media only screen and (hover: none) and (pointer: coarse) and (prefers-color-scheme: dark) { html { background: #000; - color: #CCC; + color: #ccc; } } diff --git a/doc/man/meson.build b/doc/man/meson.build index 323a8c4d..0ce65d01 100644 --- a/doc/man/meson.build +++ b/doc/man/meson.build @@ -8,6 +8,17 @@ if get_option('lint') if stylelint.found() test('stylelint', stylelint, args: [mandoc_css], suite: 'data') endif + + prettier = find_program('prettier', required: get_option('tests')) + if prettier.found() + test( + 'prettier', + prettier, + args: ['--parser', 'css', '-c', mandoc_css], + suite: 'data', + workdir: meson.current_source_dir(), + ) + endif endif if not get_option('tools').disabled() diff --git a/src/.clang-tidy b/src/.clang-tidy index b6882d38..52862fe4 100644 --- a/src/.clang-tidy +++ b/src/.clang-tidy @@ -4,11 +4,8 @@ Checks: > -*-magic-numbers, -bugprone-easily-swappable-parameters, - -bugprone-switch-missing-default-case, -cert-err33-c, -clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling, - -clang-analyzer-valist.Uninitialized, - -clang-diagnostic-unused-function, -concurrency-mt-unsafe, -google-readability-todo, -hicpp-multiway-paths-covered, @@ -16,5 +13,9 @@ Checks: > -llvm-header-guard, -misc-no-recursion, -modernize-macro-to-enum, - -readability-function-cognitive-complexity, +CheckOptions: + - key: readability-function-cognitive-complexity.IgnoreMacros + value: 'true' + - key: readability-function-cognitive-complexity.Threshold + value: '91' InheritParentConfig: true diff --git a/src/byte_source.h b/src/byte_source.h index 3eafe4b8..02aab663 100644 --- a/src/byte_source.h +++ b/src/byte_source.h @@ -69,12 +69,10 @@ serd_byte_source_advance(SerdByteSource* source) { SerdStatus st = SERD_SUCCESS; - switch (serd_byte_source_peek(source)) { - case '\n': + if (serd_byte_source_peek(source) == '\n') { ++source->cur.line; source->cur.col = 0; - break; - default: + } else { ++source->cur.col; } @@ -140,6 +140,8 @@ serd_env_set_prefix(SerdEnv* const env, const SerdNode* const uri) { assert(env); + assert(name); + assert(uri); if (!name->buf || uri->type != SERD_URI) { return SERD_ERR_BAD_ARG; @@ -167,6 +169,10 @@ serd_env_set_prefix_from_strings(SerdEnv* const env, const uint8_t* const name, const uint8_t* const uri) { + assert(env); + assert(name); + assert(uri); + const SerdNode name_node = serd_node_from_string(SERD_LITERAL, name); const SerdNode uri_node = serd_node_from_string(SERD_URI, uri); @@ -179,6 +185,10 @@ serd_env_qualify(const SerdEnv* const env, SerdNode* const prefix, SerdChunk* const suffix) { + assert(uri); + assert(prefix); + assert(suffix); + if (!env) { return false; } @@ -205,6 +215,10 @@ serd_env_expand(const SerdEnv* const env, SerdChunk* const uri_prefix, SerdChunk* const uri_suffix) { + assert(curie); + assert(uri_prefix); + assert(uri_suffix); + if (!env) { return SERD_ERR_BAD_CURIE; } @@ -230,26 +244,24 @@ serd_env_expand(const SerdEnv* const env, SerdNode serd_env_expand_node(const SerdEnv* const env, const SerdNode* const node) { + assert(node); + if (!env) { return SERD_NODE_NULL; } - switch (node->type) { - case SERD_NOTHING: - case SERD_LITERAL: - break; - - case SERD_URI: { + if (node->type == SERD_URI) { SerdURI ignored; return serd_node_new_uri_from_node(node, &env->base_uri, &ignored); } - case SERD_CURIE: { + if (node->type == SERD_CURIE) { SerdChunk prefix; SerdChunk suffix; if (serd_env_expand(env, node, &prefix, &suffix)) { return SERD_NODE_NULL; } + const size_t len = prefix.len + suffix.len; uint8_t* buf = (uint8_t*)malloc(len + 1); SerdNode ret = {buf, len, 0, 0, SERD_URI}; @@ -258,10 +270,6 @@ serd_env_expand_node(const SerdEnv* const env, const SerdNode* const node) return ret; } - case SERD_BLANK: - break; - } - return SERD_NODE_NULL; } @@ -56,14 +56,11 @@ read_UCHAR(SerdReader* const reader, const Ref dest, uint32_t* const char_code) { const int b = peek_byte(reader); unsigned length = 0; - switch (b) { - case 'U': + if (b == 'U') { length = 8; - break; - case 'u': + } else if (b == 'u') { length = 4; - break; - default: + } else { return SERD_ERR_BAD_SYNTAX; } @@ -97,7 +94,7 @@ read_UCHAR(SerdReader* const reader, const Ref dest, uint32_t* const char_code) code); push_bytes(reader, dest, replacement_char, 3); *char_code = 0xFFFD; - return SERD_SUCCESS; + return reader->strict ? SERD_ERR_BAD_SYNTAX : SERD_SUCCESS; } // Build output in buf @@ -179,7 +176,7 @@ read_utf8_bytes(SerdReader* const reader, const uint8_t c) { *size = utf8_num_bytes(c); - if (*size <= 1 || *size > 4) { + if (*size <= 1) { return bad_char(reader, "invalid UTF-8 start 0x%X\n", c); } @@ -239,18 +236,12 @@ read_character(SerdReader* const reader, const uint8_t c) { if (!(c & 0x80)) { - switch (c) { - case 0xA: - case 0xD: + if (c == 0xA || c == 0xD) { *flags |= SERD_HAS_NEWLINE; - break; - case '"': - case '\'': + } else if (c == '"' || c == '\'') { *flags |= SERD_HAS_QUOTE; - break; - default: - break; } + return push_byte(reader, dest, c); } @@ -264,7 +255,7 @@ read_comment(SerdReader* const reader) skip_byte(reader, '#'); int c = 0; - while (((c = peek_byte(reader)) != 0xA) && c != 0xD && c != EOF && c) { + while (((c = peek_byte(reader)) > 0) && c != 0xA && c != 0xD) { skip_byte(reader, c); } } @@ -325,7 +316,7 @@ read_string_escape(SerdReader* const reader, uint32_t code = 0; if ((st = read_ECHAR(reader, ref, flags)) && (st = read_UCHAR(reader, ref, &code))) { - return r_err(reader, st, "invalid escape '\\%c'\n", peek_byte(reader)); + return r_err(reader, st, "expected string escape sequence\n"); } return st; @@ -537,48 +528,20 @@ read_PN_LOCAL_ESC(SerdReader* const reader, const Ref dest) skip_byte(reader, '\\'); const int c = peek_byte(reader); - switch (c) { - case '!': - case '#': - case '$': - case '%': - case '&': - case '\'': - case '(': - case ')': - case '*': - case '+': - case ',': - case '-': - case '.': - case '/': - case ';': - case '=': - case '?': - case '@': - case '_': - case '~': - push_byte(reader, dest, eat_byte_safe(reader, c)); - break; - default: - return r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid escape\n"); - } - - return SERD_SUCCESS; + return ((c == '!') || in_range(c, '#', '/') || (c == ';') || (c == '=') || + (c == '?') || (c == '@') || (c == '_') || (c == '~')) + ? push_byte(reader, dest, eat_byte_safe(reader, c)) + : r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid escape\n"); } static SerdStatus read_PLX(SerdReader* const reader, const Ref dest) { const int c = peek_byte(reader); - switch (c) { - case '%': - return read_PERCENT(reader, dest); - case '\\': - return read_PN_LOCAL_ESC(reader, dest); - default: - return SERD_FAILURE; - } + + return (c == '%') ? read_PERCENT(reader, dest) + : (c == '\\') ? read_PN_LOCAL_ESC(reader, dest) + : SERD_FAILURE; } static SerdStatus @@ -612,7 +575,7 @@ read_PN_LOCAL(SerdReader* const reader, const Ref dest, bool* const ate_dot) } } - while ((c = peek_byte(reader))) { // Middle: (PN_CHARS | '.' | ':')* + while ((c = peek_byte(reader)) > 0) { // Middle: (PN_CHARS | '.' | ':')* if (c == '.' || c == ':') { push_byte(reader, dest, eat_byte_safe(reader, c)); } else if ((st = read_PLX(reader, dest)) > SERD_FAILURE) { @@ -639,7 +602,7 @@ static SerdStatus read_PN_PREFIX_tail(SerdReader* const reader, const Ref dest) { int c = 0; - while ((c = peek_byte(reader))) { // Middle: (PN_CHARS | '.')* + while ((c = peek_byte(reader)) > 0) { // Middle: (PN_CHARS | '.')* if (c == '.') { push_byte(reader, dest, eat_byte_safe(reader, c)); } else if (read_PN_CHARS(reader, dest)) { @@ -675,13 +638,13 @@ read_LANGTAG(SerdReader* const reader, Ref* const dest) SerdStatus st = SERD_SUCCESS; TRY(st, push_byte(reader, *dest, eat_byte_safe(reader, c))); - while ((c = peek_byte(reader)) && is_alpha(c)) { + while (((c = peek_byte(reader)) > 0) && is_alpha(c)) { TRY(st, push_byte(reader, *dest, eat_byte_safe(reader, c))); } while (peek_byte(reader) == '-') { TRY(st, push_byte(reader, *dest, eat_byte_safe(reader, '-'))); - while ((c = peek_byte(reader)) && (is_alpha(c) || is_digit(c))) { + while (((c = peek_byte(reader)) > 0) && (is_alpha(c) || is_digit(c))) { TRY(st, push_byte(reader, *dest, eat_byte_safe(reader, c))); } } @@ -697,7 +660,7 @@ read_IRIREF_scheme(SerdReader* const reader, const Ref dest) return r_err(reader, SERD_ERR_BAD_SYNTAX, "bad IRI scheme start '%c'\n", c); } - while ((c = peek_byte(reader)) != EOF) { + while ((c = peek_byte(reader)) > 0) { if (c == '>') { return r_err(reader, SERD_ERR_BAD_SYNTAX, "missing IRI scheme\n"); } @@ -753,18 +716,12 @@ read_IRIREF(SerdReader* const reader, Ref* const dest) return r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid IRI escape\n"); } - switch (code) { - case 0: - case ' ': - case '<': - case '>': + if (code == ' ' || code == '<' || code == '>') { *dest = pop_node(reader, *dest); return r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid escaped IRI character U+%04X\n", code); - default: - break; } break; @@ -886,13 +843,9 @@ read_number(SerdReader* const reader, if (c == 'e' || c == 'E') { // double push_byte(reader, *dest, eat_byte_safe(reader, c)); - switch ((c = peek_byte(reader))) { - case '+': - case '-': + c = peek_byte(reader); + if (c == '+' || c == '-') { push_byte(reader, *dest, eat_byte_safe(reader, c)); - break; - default: - break; } TRY(st, read_0_9(reader, *dest, true)); *datatype = push_node(reader, SERD_URI, XSD_DOUBLE, sizeof(XSD_DOUBLE) - 1); @@ -910,13 +863,12 @@ read_number(SerdReader* const reader, static SerdStatus read_iri(SerdReader* const reader, Ref* const dest, bool* const ate_dot) { - switch (peek_byte(reader)) { - case '<': + if (peek_byte(reader) == '<') { return read_IRIREF(reader, dest); - default: - *dest = push_node(reader, SERD_CURIE, "", 0); - return read_PrefixedName(reader, *dest, true, ate_dot); } + + *dest = push_node(reader, SERD_CURIE, "", 0); + return read_PrefixedName(reader, *dest, true, ate_dot); } static SerdStatus @@ -935,8 +887,8 @@ read_literal(SerdReader* const reader, return st; } - switch (peek_byte(reader)) { - case '@': + const int next = peek_byte(reader); + if (next == '@') { skip_byte(reader, '@'); if ((st = read_LANGTAG(reader, lang))) { *datatype = pop_node(reader, *datatype); @@ -944,8 +896,7 @@ read_literal(SerdReader* const reader, *dest = pop_node(reader, *dest); return r_err(reader, st, "bad language tag\n"); } - break; - case '^': + } else if (next == '^') { skip_byte(reader, '^'); if (!eat_byte_check(reader, '^')) { return r_err(reader, SERD_ERR_BAD_SYNTAX, "expected '^'\n"); @@ -957,7 +908,6 @@ read_literal(SerdReader* const reader, *dest = pop_node(reader, *dest); return r_err(reader, st, "bad datatype\n"); } - break; } return SERD_SUCCESS; @@ -1020,7 +970,7 @@ read_BLANK_NODE_LABEL(SerdReader* const reader, return r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid name start\n"); } - while ((c = peek_byte(reader))) { // Middle: (PN_CHARS | '.')* + while ((c = peek_byte(reader)) > 0) { // Middle: (PN_CHARS | '.')* if (c == '.') { push_byte(reader, ref, eat_byte_safe(reader, c)); } else if (read_PN_CHARS(reader, ref)) { @@ -1117,7 +1067,7 @@ read_anon(SerdReader* const reader, read_ws_star(reader); if (reader->end_sink) { - reader->end_sink(reader->handle, deref(reader, *dest)); + st = reader->end_sink(reader->handle, deref(reader, *dest)); } *ctx.flags = old_flags; @@ -1154,13 +1104,7 @@ read_object(SerdReader* const reader, uint32_t flags = 0; const int c = peek_byte(reader); if (!fancy_syntax(reader)) { - switch (c) { - case '"': - case ':': - case '<': - case '_': - break; - default: + if (c != '"' && c != ':' && c != '<' && c != '_') { return r_err(reader, SERD_ERR_BAD_SYNTAX, "expected: ':', '<', or '_'\n"); } } @@ -1282,14 +1226,16 @@ read_predicateObjectList(SerdReader* const reader, int c = 0; do { read_ws_star(reader); - switch (c = peek_byte(reader)) { - case EOF: + c = peek_byte(reader); + if (c < 0) { return r_err(reader, SERD_ERR_BAD_SYNTAX, "unexpected end of file\n"); - case '.': - case ']': - case '}': + } + + if (c == '.' || c == ']' || c == '}') { return SERD_SUCCESS; - case ';': + } + + if (c == ';') { skip_byte(reader, c); ate_semi = true; } @@ -1443,13 +1389,16 @@ read_triples(SerdReader* const reader, ReadContext ctx, bool* const ate_dot) SerdStatus st = SERD_FAILURE; if (ctx.subject) { read_ws_star(reader); - switch (peek_byte(reader)) { - case '.': + const int c = peek_byte(reader); + if (c == '.') { *ate_dot = eat_byte_safe(reader, '.'); return SERD_FAILURE; - case '}': + } + + if (c == '}') { return SERD_FAILURE; } + st = read_predicateObjectList(reader, ctx, ate_dot); } @@ -1528,22 +1477,20 @@ read_directive(SerdReader* const reader) const bool sparql = peek_byte(reader) != '@'; if (!sparql) { skip_byte(reader, '@'); - switch (peek_byte(reader)) { - case 'B': - case 'P': + const int next = peek_byte(reader); + if (next == 'B' || next == 'P') { return r_err(reader, SERD_ERR_BAD_SYNTAX, "uppercase directive\n"); } } - switch (peek_byte(reader)) { - case 'B': - case 'b': + const int next = peek_byte(reader); + + if (next == 'B' || next == 'b') { return read_base(reader, sparql, true); - case 'P': - case 'p': + } + + if (next == 'P' || next == 'p') { return read_prefixID(reader, sparql, true); - default: - break; } return r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid directive\n"); @@ -1587,18 +1534,25 @@ read_wrappedGraph(SerdReader* const reader, ReadContext* const ctx) return SERD_SUCCESS; } -static int -tokcmp(SerdReader* const reader, - const Ref ref, - const char* const tok, - const size_t n) +static bool +token_equals(SerdReader* const reader, + const Ref ref, + const char* const tok, + const size_t n) { - SerdNode* node = deref(reader, ref); + SerdNode* const node = deref(reader, ref); if (!node || node->n_bytes != n) { - return -1; + return false; + } + + const char* const node_string = (const char*)node->buf; + for (size_t i = 0U; i < n; ++i) { + if (serd_to_upper(node_string[i]) != serd_to_upper(tok[i])) { + return false; + } } - return serd_strncasecmp((const char*)node->buf, tok, n); + return tok[n] == '\0'; } SerdStatus @@ -1640,11 +1594,11 @@ read_n3_statement(SerdReader* const reader) default: TRY_FAILING(st, read_subject(reader, ctx, &ctx.subject, &s_type)); - if (!tokcmp(reader, ctx.subject, "base", 4)) { + if (token_equals(reader, ctx.subject, "base", 4)) { st = read_base(reader, true, false); - } else if (!tokcmp(reader, ctx.subject, "prefix", 6)) { + } else if (token_equals(reader, ctx.subject, "prefix", 6)) { st = read_prefixID(reader, true, false); - } else if (!tokcmp(reader, ctx.subject, "graph", 5)) { + } else if (token_equals(reader, ctx.subject, "graph", 5)) { ctx.subject = pop_node(reader, ctx.subject); read_ws_star(reader); TRY(st, read_labelOrSubject(reader, &ctx.graph)); @@ -1653,8 +1607,8 @@ read_n3_statement(SerdReader* const reader) pop_node(reader, ctx.graph); ctx.graph = 0; read_ws_star(reader); - } else if (!tokcmp(reader, ctx.subject, "true", 4) || - !tokcmp(reader, ctx.subject, "false", 5)) { + } else if (token_equals(reader, ctx.subject, "true", 4) || + token_equals(reader, ctx.subject, "false", 5)) { return r_err(reader, SERD_ERR_BAD_SYNTAX, "expected subject\n"); } else if (read_ws_star(reader) && peek_byte(reader) == '{') { if (s_type == '(' || (s_type == '[' && !*ctx.flags)) { @@ -142,37 +142,7 @@ serd_node_new_uri_from_string(const uint8_t* const str, static bool is_uri_path_char(const uint8_t c) { - if (is_alpha(c) || is_digit(c)) { - return true; - } - - switch (c) { - // unreserved: - case '-': - case '.': - case '_': - case '~': - case ':': - - case '@': // pchar - case '/': // separator - - // sub-delimiters: - case '!': - case '$': - case '&': - case '\'': - case '(': - case ')': - case '*': - case '+': - case ',': - case ';': - case '=': - return true; - default: - return false; - } + return is_alpha(c) || is_digit(c) || strchr("!$&\'()*+,-./:;=@_~", c); } static bool diff --git a/src/reader.c b/src/reader.c index 1936808a..aa24a9ca 100644 --- a/src/reader.c +++ b/src/reader.c @@ -200,6 +200,7 @@ serd_reader_new(const SerdSyntax syntax, void serd_reader_set_strict(SerdReader* const reader, const bool strict) { + assert(reader); reader->strict = strict; } @@ -208,6 +209,7 @@ serd_reader_set_error_sink(SerdReader* const reader, const SerdErrorSink error_sink, void* const error_handle) { + assert(reader); reader->error_sink = error_sink; reader->error_handle = error_handle; } @@ -392,6 +394,9 @@ serd_reader_read_file_handle(SerdReader* const reader, FILE* const file, const uint8_t* const name) { + assert(reader); + assert(file); + return serd_reader_read_source(reader, (SerdSource)fread, (SerdStreamErrorFunc)ferror, diff --git a/src/serdi.c b/src/serdi.c index 2bea9e3a..9d0a8f44 100644 --- a/src/serdi.c +++ b/src/serdi.c @@ -44,7 +44,7 @@ static SerdSyntax get_syntax(const char* const name) { for (const Syntax* s = syntaxes; s->name; ++s) { - if (!serd_strncasecmp(s->name, name, strlen(name))) { + if (!serd_strcasecmp(s->name, name)) { return s->syntax; } } @@ -59,7 +59,7 @@ guess_syntax(const char* const filename) const char* ext = strrchr(filename, '.'); if (ext) { for (const Syntax* s = syntaxes; s->name; ++s) { - if (!serd_strncasecmp(s->extension, ext, strlen(ext))) { + if (!serd_strcasecmp(s->extension, ext)) { return s->syntax; } } diff --git a/src/stack.h b/src/stack.h index d118430a..388dd054 100644 --- a/src/stack.h +++ b/src/stack.h @@ -78,9 +78,7 @@ serd_stack_push_aligned(SerdStack* stack, size_t n_bytes, size_t align) // Push padding if necessary const size_t pad = align - stack->size % align; - if (pad > 0) { - serd_stack_push(stack, pad); - } + serd_stack_push(stack, pad); // Set top of stack to pad count so we can properly pop later assert(pad < UINT8_MAX); diff --git a/src/string.c b/src/string.c index 52fb4a91..936989c2 100644 --- a/src/string.c +++ b/src/string.c @@ -49,16 +49,10 @@ serd_strerror(const SerdStatus status) static void serd_update_flags(const uint8_t c, SerdNodeFlags* const flags) { - switch (c) { - case '\r': - case '\n': + if (c == '\r' || c == '\n') { *flags |= SERD_HAS_NEWLINE; - break; - case '"': + } else if (c == '"') { *flags |= SERD_HAS_QUOTE; - break; - default: - break; } } @@ -68,6 +62,9 @@ serd_substrlen(const uint8_t* const str, size_t* const n_bytes, SerdNodeFlags* const flags) { + assert(n_bytes); + assert(flags); + size_t n_chars = 0; size_t i = 0; SerdNodeFlags f = 0; @@ -77,12 +74,9 @@ serd_substrlen(const uint8_t* const str, serd_update_flags(str[i], &f); } } - if (n_bytes) { - *n_bytes = i; - } - if (flags) { - *flags = f; - } + + *n_bytes = i; + *flags = f; return n_chars; } @@ -91,6 +85,8 @@ serd_strlen(const uint8_t* const str, size_t* const n_bytes, SerdNodeFlags* const flags) { + assert(str); + size_t n_chars = 0; size_t i = 0; SerdNodeFlags f = 0; @@ -114,16 +110,11 @@ read_sign(const char** const sptr) { double sign = 1.0; - switch (**sptr) { - case '-': + if (**sptr == '-') { sign = -1.0; ++(*sptr); - break; - case '+': + } else if (**sptr == '+') { ++(*sptr); - break; - default: - break; } return sign; diff --git a/src/string_utils.h b/src/string_utils.h index 7770e1eb..2ce90ac9 100644 --- a/src/string_utils.h +++ b/src/string_utils.h @@ -51,17 +51,7 @@ is_xdigit(const int c) static inline bool is_space(const char c) { - switch (c) { - case ' ': - case '\f': - case '\n': - case '\r': - case '\t': - case '\v': - return true; - default: - return false; - } + return c == ' ' || (c >= '\t' && c <= '\r'); } static inline bool @@ -101,16 +91,20 @@ serd_to_upper(const char c) return (char)((c >= 'a' && c <= 'z') ? c - 32 : c); } -static inline int -serd_strncasecmp(const char* s1, const char* s2, size_t n) +SERD_PURE_FUNC static inline int +serd_strcasecmp(const char* s1, const char* s2) { - for (; n > 0 && *s2; s1++, s2++, --n) { - if (serd_to_upper(*s1) != serd_to_upper(*s2)) { - return ((*(const uint8_t*)s1 < *(const uint8_t*)s2) ? -1 : +1); + while (*s1 && *s2) { + const char c1 = serd_to_upper(*s1++); + const char c2 = serd_to_upper(*s2++); + if (c1 != c2) { + return (c1 < c2) ? -1 : +1; } } - return 0; + const char c1 = serd_to_upper(*s1); + const char c2 = serd_to_upper(*s2); + return (c1 == c2) ? 0 : (c1 < c2) ? -1 : +1; } static inline uint32_t @@ -430,7 +430,6 @@ serd_uri_serialise_relative(const SerdURI* const uri, { assert(uri); assert(sink); - assert(stream); size_t len = 0; const bool relative = @@ -484,7 +483,7 @@ serd_uri_serialise_relative(const SerdURI* const uri, size_t serd_uri_serialise(const SerdURI* const uri, SerdSink sink, void* const stream) { + assert(uri); assert(sink); - assert(stream); return serd_uri_serialise_relative(uri, NULL, NULL, sink, stream); } diff --git a/src/uri_utils.h b/src/uri_utils.h index e2f30edb..0d3bd74e 100644 --- a/src/uri_utils.h +++ b/src/uri_utils.h @@ -101,15 +101,8 @@ uri_is_under(const SerdURI* uri, const SerdURI* root) static inline bool is_uri_scheme_char(const int c) { - switch (c) { - case ':': - case '+': - case '-': - case '.': - return true; - default: - return is_alpha(c) || is_digit(c); - } + return c == '+' || c == '-' || c == '.' || c == ':' || is_alpha(c) || + is_digit(c); } #endif // SERD_SRC_URI_UTILS_H diff --git a/src/writer.c b/src/writer.c index 96e6135d..e4ef5651 100644 --- a/src/writer.c +++ b/src/writer.c @@ -148,12 +148,6 @@ supports_abbrev(const SerdWriter* writer) return writer->syntax == SERD_TURTLE || writer->syntax == SERD_TRIG; } -SERD_NODISCARD static bool -supports_uriref(const SerdWriter* writer) -{ - return writer->syntax == SERD_TURTLE || writer->syntax == SERD_TRIG; -} - static SerdStatus free_context(WriteContext* const ctx) { @@ -289,21 +283,8 @@ write_character(SerdWriter* writer, SERD_NODISCARD static bool uri_must_escape(const uint8_t c) { - switch (c) { - case ' ': - case '"': - case '<': - case '>': - case '\\': - case '^': - case '`': - case '{': - case '|': - case '}': - return true; - default: - return !in_range(c, 0x20, 0x7E); - } + return (c == '"') || (c == '<') || (c == '>') || (c == '\\') || (c == '^') || + (c == '`') || in_range(c, '{', '}') || !in_range(c, 0x21, 0x7E); } static size_t @@ -374,38 +355,17 @@ write_uri_from_node(SerdWriter* writer, const SerdNode* node) static bool lname_must_escape(const uint8_t c) { - /* This arbitrary list of characters, most of which have nothing to do with - Turtle, must be handled as special cases here because the RDF and SPARQL - WGs are apparently intent on making the once elegant Turtle a baroque - and inconsistent mess, throwing elegance and extensibility completely - out the window for no good reason. - - Note '-', '.', and '_' are also in PN_LOCAL_ESC, but are valid unescaped - in local names, so they are not escaped here. */ - - switch (c) { - case '\'': - case '!': - case '#': - case '$': - case '%': - case '&': - case '(': - case ')': - case '*': - case '+': - case ',': - case '/': - case ';': - case '=': - case '?': - case '@': - case '~': - return true; - default: - break; - } - return false; + /* Most of these characters have nothing to do with Turtle, but were taken + from SPARQL and mashed into the Turtle grammar (despite not being used) + with RDF 1.1. So now Turtle is a mess because the SPARQL grammar is + poorly designed and didn't use a leading character to distinguish things + like path patterns like it should have. + + Note that '-', '.', and '_' are also in PN_LOCAL_ESC, but are valid + unescaped in local names, so they are not escaped here. */ + + return (c == '!') || (c == '/') || (c == ';') || (c == '=') || (c == '?') || + (c == '@') || (c == '~') || in_range(c, '#', ','); } SERD_NODISCARD static SerdStatus @@ -653,6 +613,31 @@ reset_context(SerdWriter* writer, const unsigned flags) return SERD_SUCCESS; } +// Return the name of the XSD datatype referred to by `datatype`, if any +static const char* +get_xsd_name(const SerdEnv* const env, const SerdNode* const datatype) +{ + const char* const datatype_str = (const char*)datatype->buf; + + if (datatype->type == SERD_URI && + (!strncmp(datatype_str, NS_XSD, sizeof(NS_XSD) - 1))) { + return datatype_str + sizeof(NS_XSD) - 1U; + } + + if (datatype->type == SERD_CURIE) { + SerdChunk prefix = {NULL, 0}; + SerdChunk suffix = {NULL, 0}; + // We can be a bit lazy/presumptive here due to grammar limitations + if (!serd_env_expand(env, datatype, &prefix, &suffix)) { + if (!strcmp((const char*)prefix.buf, NS_XSD)) { + return (const char*)suffix.buf; + } + } + } + + return ""; +} + SERD_NODISCARD static SerdStatus write_literal(SerdWriter* writer, const SerdNode* node, @@ -663,21 +648,11 @@ write_literal(SerdWriter* writer, SerdStatus st = SERD_SUCCESS; if (supports_abbrev(writer) && datatype && datatype->buf) { - const char* type_uri = (const char*)datatype->buf; - if (!strncmp(type_uri, NS_XSD, sizeof(NS_XSD) - 1)) { - const char* const xsd_name = type_uri + sizeof(NS_XSD) - 1; - if (!strcmp(xsd_name, "boolean") || !strcmp(xsd_name, "integer")) { - return esink(node->buf, node->n_bytes, writer); - } - - if (!strcmp(xsd_name, "decimal") && strchr((const char*)node->buf, '.') && - node->buf[node->n_bytes - 1] != '.') { - /* xsd:decimal literals without trailing digits, e.g. "5.", can't be - written bare in Turtle. We could add a 0 which is prettier, but - changes the text and breaks round tripping. - */ - return esink(node->buf, node->n_bytes, writer); - } + const char* const xsd_name = get_xsd_name(writer->env, datatype); + if (!strcmp(xsd_name, "boolean") || !strcmp(xsd_name, "integer") || + (!strcmp(xsd_name, "decimal") && strchr((const char*)node->buf, '.') && + node->buf[node->n_bytes - 1] != '.')) { + return esink(node->buf, node->n_bytes, writer); } } @@ -746,7 +721,8 @@ write_uri_node(SerdWriter* const writer, } } - if (!has_scheme && !supports_uriref(writer) && + if (!has_scheme && + (writer->syntax == SERD_NTRIPLES || writer->syntax == SERD_NQUADS) && !serd_env_get_base_uri(writer->env, NULL)->buf) { return w_err(writer, SERD_ERR_BAD_ARG, @@ -859,26 +835,18 @@ write_node(SerdWriter* writer, Field field, SerdStatementFlags flags) { - switch (node->type) { - case SERD_NOTHING: - break; - case SERD_LITERAL: - return write_literal(writer, node, datatype, lang, flags); - case SERD_URI: - return write_uri_node(writer, node, field); - case SERD_CURIE: - return write_curie(writer, node); - case SERD_BLANK: - return write_blank(writer, node, field, flags); - } - - return SERD_SUCCESS; + return (node->type == SERD_LITERAL) + ? write_literal(writer, node, datatype, lang, flags) + : (node->type == SERD_URI) ? write_uri_node(writer, node, field) + : (node->type == SERD_CURIE) ? write_curie(writer, node) + : (node->type == SERD_BLANK) ? write_blank(writer, node, field, flags) + : SERD_SUCCESS; } static bool is_resource(const SerdNode* node) { - return node && node->buf && node->type > SERD_LITERAL; + return node->buf && node->type > SERD_LITERAL; } SERD_NODISCARD static SerdStatus @@ -946,11 +914,13 @@ serd_writer_write_statement(SerdWriter* writer, const SerdNode* lang) { assert(writer); + assert(subject); + assert(predicate); + assert(object); SerdStatus st = SERD_SUCCESS; - if (!is_resource(subject) || !is_resource(predicate) || !object || - !object->buf) { + if (!is_resource(subject) || !is_resource(predicate) || !object->buf) { return SERD_ERR_BAD_ARG; } @@ -984,8 +954,8 @@ serd_writer_write_statement(SerdWriter* writer, (!graph && writer->context.graph.type)) { TRY(st, terminate_context(writer)); reset_context(writer, RESET_GRAPH | RESET_INDENT); + TRY(st, write_newline(writer)); if (graph) { - TRY(st, write_newline(writer)); TRY(st, write_node(writer, graph, datatype, lang, FIELD_GRAPH, flags)); TRY(st, write_sep(writer, SEP_GRAPH_BEGIN)); copy_node(&writer->context.graph, graph); @@ -1041,24 +1011,24 @@ serd_writer_write_statement(SerdWriter* writer, } else { // No abbreviation - if (serd_stack_is_empty(&writer->anon_stack)) { - if (writer->context.subject.type) { - TRY(st, write_sep(writer, SEP_END_S)); - } - if (writer->last_sep == SEP_END_S || writer->last_sep == SEP_END_DIRECT) { - TRY(st, write_newline(writer)); - } + if (!serd_stack_is_empty(&writer->anon_stack)) { + return SERD_ERR_BAD_ARG; + } - TRY(st, write_node(writer, subject, NULL, NULL, FIELD_SUBJECT, flags)); - if ((flags & (SERD_ANON_S_BEGIN | SERD_LIST_S_BEGIN))) { - TRY(st, write_sep(writer, SEP_ANON_S_P)); - } else { - TRY(st, write_sep(writer, SEP_S_P)); - } + if (writer->context.subject.type) { + TRY(st, write_sep(writer, SEP_END_S)); + } - } else { + if (writer->last_sep == SEP_END_S || writer->last_sep == SEP_END_DIRECT) { + TRY(st, write_newline(writer)); + } + + TRY(st, write_node(writer, subject, NULL, NULL, FIELD_SUBJECT, flags)); + if ((flags & (SERD_ANON_S_BEGIN | SERD_LIST_S_BEGIN))) { TRY(st, write_sep(writer, SEP_ANON_S_P)); + } else { + TRY(st, write_sep(writer, SEP_S_P)); } reset_context(writer, 0U); @@ -1145,7 +1115,7 @@ serd_writer_new(SerdSyntax syntax, void* stream) { assert(env); - assert(sink); + assert(ssink); const WriteContext context = WRITE_CONTEXT_NULL; SerdWriter* writer = (SerdWriter*)calloc(1, sizeof(SerdWriter)); @@ -1239,6 +1209,10 @@ serd_writer_set_prefix(SerdWriter* writer, const SerdNode* name, const SerdNode* uri) { + assert(writer); + assert(name); + assert(uri); + SerdStatus st = SERD_SUCCESS; TRY(st, serd_env_set_prefix(writer->env, name, uri)); diff --git a/test/.clang-tidy b/test/.clang-tidy index 75f5312d..457abcaf 100644 --- a/test/.clang-tidy +++ b/test/.clang-tidy @@ -11,5 +11,9 @@ Checks: > -clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling, -concurrency-mt-unsafe, -hicpp-signed-bitwise, - -readability-function-cognitive-complexity, +CheckOptions: + - key: readability-function-cognitive-complexity.IgnoreMacros + value: 'true' + - key: readability-function-cognitive-complexity.Threshold + value: '6' InheritParentConfig: true diff --git a/test/extra/bad/bad-bom-1.ttl b/test/extra/bad/bad-bom-1.ttl new file mode 100644 index 00000000..7d7681d4 --- /dev/null +++ b/test/extra/bad/bad-bom-1.ttl @@ -0,0 +1,3 @@ +ï# This file starts with the first byte of the UTF-8 Byte Order Mark + +<http://example.org/thing> a <http://example.org/Thing> . diff --git a/test/extra/bad/bad-bom-2.ttl b/test/extra/bad/bad-bom-2.ttl new file mode 100644 index 00000000..9ce3f093 --- /dev/null +++ b/test/extra/bad/bad-bom-2.ttl @@ -0,0 +1,3 @@ +ï»# This file starts with the first two bytes of the UTF-8 Byte Order Mark + +<http://example.org/thing> a <http://example.org/Thing> . diff --git a/test/extra/bad/bad-bom-only-1.ttl b/test/extra/bad/bad-bom-only-1.ttl new file mode 100644 index 00000000..a4a063a1 --- /dev/null +++ b/test/extra/bad/bad-bom-only-1.ttl @@ -0,0 +1 @@ +ï
\ No newline at end of file diff --git a/test/extra/bad/bad-bom-only-2.ttl b/test/extra/bad/bad-bom-only-2.ttl new file mode 100644 index 00000000..022c50f1 --- /dev/null +++ b/test/extra/bad/bad-bom-only-2.ttl @@ -0,0 +1 @@ +ï»
\ No newline at end of file diff --git a/test/extra/bad/bad-prefix-dot.ttl b/test/extra/bad/bad-prefix-dot.ttl new file mode 100644 index 00000000..7b02211f --- /dev/null +++ b/test/extra/bad/bad-prefix-dot.ttl @@ -0,0 +1 @@ +@prefix dotted.: <http://example.org/> . diff --git a/test/extra/bad/manifest.ttl b/test/extra/bad/manifest.ttl index 6b6df540..64dbf05f 100644 --- a/test/extra/bad/manifest.ttl +++ b/test/extra/bad/manifest.ttl @@ -13,7 +13,10 @@ <#bad-blank-node-label> <#bad-blank-predicate> <#bad-blank-syntax> - <#bad-bom> + <#bad-bom-1> + <#bad-bom-2> + <#bad-bom-only-1> + <#bad-bom-only-2> <#bad-char-in-local> <#bad-char-in-prefix> <#bad-char-in-uri> @@ -113,8 +116,9 @@ <#bad-object2> <#bad-paths> <#bad-pn-escape> - <#bad-prefix-missing-colon> <#bad-prefix> + <#bad-prefix-dot> + <#bad-prefix-missing-colon> <#bad-quote-in-uri> <#bad-semicolon-after-subject> <#bad-string> @@ -164,10 +168,25 @@ mf:action <bad-blank-syntax.ttl> ; mf:name "bad-blank-syntax" . -<#bad-bom> +<#bad-bom-1> + a rdft:TestTurtleNegativeSyntax ; + mf:action <bad-bom-1.ttl> ; + mf:name "bad-bom-1" . + +<#bad-bom-2> + a rdft:TestTurtleNegativeSyntax ; + mf:action <bad-bom-2.ttl> ; + mf:name "bad-bom-2" . + +<#bad-bom-only-1> + a rdft:TestTurtleNegativeSyntax ; + mf:action <bad-bom-only-1.ttl> ; + mf:name "bad-bom-only-1" . + +<#bad-bom-only-2> a rdft:TestTurtleNegativeSyntax ; - mf:action <bad-bom.ttl> ; - mf:name "bad-bom" . + mf:action <bad-bom-only-2.ttl> ; + mf:name "bad-bom-only-2" . <#bad-char-in-local> a rdft:TestTurtleNegativeSyntax ; @@ -669,6 +688,11 @@ mf:action <bad-prefix.ttl> ; mf:name "bad-prefix" . +<#bad-prefix-dot> + a rdft:TestTurtleNegativeSyntax ; + mf:action <bad-prefix-dot.ttl> ; + mf:name "bad-prefix-dot" . + <#bad-prefix-missing-colon> a rdft:TestTurtleNegativeSyntax ; mf:action <bad-prefix-missing-colon.ttl> ; diff --git a/test/extra/good/manifest.ttl b/test/extra/good/manifest.ttl index befe9451..350d7d9c 100644 --- a/test/extra/good/manifest.ttl +++ b/test/extra/good/manifest.ttl @@ -28,12 +28,15 @@ <#test-id> <#test-list-in-blank> <#test-list-subject> + <#test-local-name-ends-with-dot> + <#test-local-name-escapes> + <#test-local-name-percent> <#test-long-utf8> <#test-no-spaces> <#test-non-curie-uri> <#test-nq-syntax-all-rules> <#test-nq-syntax-dot-end> - <#test-nq-syntax-eof-after-blank-dot> + <#test-nt-syntax-all-rules> <#test-nq-syntax-eol-cr> <#test-nq-syntax-eol-crlf> <#test-nq-syntax-eol-lf> @@ -46,13 +49,14 @@ <#test-nt-syntax-eol-crlf> <#test-nt-syntax-eol-lf> <#test-nt-syntax-eol-lfcr> - <#test-out-of-range-unicode> <#test-prefix> <#test-quote-escapes> <#test-rel> <#test-semi-dot> <#test-several-eaten-dots> <#test-string-escapes> + <#test-trig-syntax-all-rules> + <#test-ttl-syntax-all-rules> <#test-uri> ) . @@ -188,6 +192,24 @@ mf:name "test-list-subject" ; mf:result <test-list-subject.nt> . +<#test-local-name-ends-with-dot> + a rdft:TestTurtleEval ; + mf:action <test-local-name-ends-with-dot.ttl> ; + mf:name "test-local-name-ends-with-dot" ; + mf:result <test-local-name-ends-with-dot.nt> . + +<#test-local-name-escapes> + a rdft:TestTurtleEval ; + mf:action <test-local-name-escapes.ttl> ; + mf:name "test-local-name-escapes" ; + mf:result <test-local-name-escapes.nt> . + +<#test-local-name-percent> + a rdft:TestTurtleEval ; + mf:action <test-local-name-percent.ttl> ; + mf:name "test-local-name-percent" ; + mf:result <test-local-name-percent.nt> . + <#test-long-utf8> a rdft:TestTurtleEval ; mf:action <test-long-utf8.ttl> ; @@ -206,11 +228,6 @@ mf:name "test-non-curie-uri" ; mf:result <test-non-curie-uri.nt> . -<#test-nq-syntax-eof-after-blank-dot> - a rdft:TestNTriplesPositiveSyntax ; - mf:action <test-nq-syntax-eof-after-blank-dot.nq> ; - mf:name "test-nq-syntax-eof-after-blank-dot" . - <#test-nq-syntax-all-rules> a rdft:TestNQuadsPositiveSyntax ; mf:action <test-nq-syntax-all-rules.nq> ; @@ -221,6 +238,11 @@ mf:action <test-nq-syntax-dot-end.nq> ; mf:name "test-nq-syntax-dot-end" . +<#test-nq-syntax-eof-after-blank-dot> + a rdft:TestNTriplesPositiveSyntax ; + mf:action <test-nq-syntax-eof-after-blank-dot.nq> ; + mf:name "test-nq-syntax-eof-after-blank-dot" . + <#test-nq-syntax-eol-cr> a rdft:TestNTriplesPositiveSyntax ; mf:action <test-nq-syntax-eol-cr.nq> ; @@ -281,12 +303,6 @@ mf:action <test-nt-syntax-eol-lfcr.nt> ; mf:name "test-nt-syntax-eol-lfcr" . -<#test-out-of-range-unicode> - a rdft:TestTurtleEval ; - mf:action <test-out-of-range-unicode.ttl> ; - mf:name "test-out-of-range-unicode" ; - mf:result <test-out-of-range-unicode.nt> . - <#test-prefix> a rdft:TestTurtleEval ; mf:action <test-prefix.ttl> ; @@ -323,6 +339,16 @@ mf:name "test-string-escapes" ; mf:result <test-string-escapes.nt> . +<#test-trig-syntax-all-rules> + a rdft:TestTrigPositiveSyntax ; + mf:action <test-trig-syntax-all-rules.trig> ; + mf:name "test-trig-syntax-all-rules" . + +<#test-ttl-syntax-all-rules> + a rdft:TestTurtlePositiveSyntax ; + mf:action <test-ttl-syntax-all-rules.ttl> ; + mf:name "test-ttl-syntax-all-rules" . + <#test-uri> a rdft:TestTurtleEval ; mf:action <test-uri.ttl> ; diff --git a/test/extra/good/test-local-name-ends-with-dot.nt b/test/extra/good/test-local-name-ends-with-dot.nt new file mode 100644 index 00000000..3285348a --- /dev/null +++ b/test/extra/good/test-local-name-ends-with-dot.nt @@ -0,0 +1 @@ +<http://example.org/eg#s> <http://example.org/eg#p> <http://example.org/eg#foo.> . diff --git a/test/extra/good/test-local-name-escapes.nt b/test/extra/good/test-local-name-escapes.nt new file mode 100644 index 00000000..a6362d7a --- /dev/null +++ b/test/extra/good/test-local-name-escapes.nt @@ -0,0 +1,17 @@ +<http://example.org/s> <http://example.org/p> <http://example.org/o'> . +<http://example.org/s> <http://example.org/p> <http://example.org/o!> . +<http://example.org/s> <http://example.org/p> <http://example.org/o#> . +<http://example.org/s> <http://example.org/p> <http://example.org/o$> . +<http://example.org/s> <http://example.org/p> <http://example.org/o%> . +<http://example.org/s> <http://example.org/p> <http://example.org/o&> . +<http://example.org/s> <http://example.org/p> <http://example.org/o(> . +<http://example.org/s> <http://example.org/p> <http://example.org/o)> . +<http://example.org/s> <http://example.org/p> <http://example.org/o*> . +<http://example.org/s> <http://example.org/p> <http://example.org/o+> . +<http://example.org/s> <http://example.org/p> <http://example.org/o,> . +<http://example.org/s> <http://example.org/p> <http://example.org/o/> . +<http://example.org/s> <http://example.org/p> <http://example.org/o;> . +<http://example.org/s> <http://example.org/p> <http://example.org/o=> . +<http://example.org/s> <http://example.org/p> <http://example.org/o?> . +<http://example.org/s> <http://example.org/p> <http://example.org/o@> . +<http://example.org/s> <http://example.org/p> <http://example.org/o~> . diff --git a/test/extra/good/test-local-name-escapes.ttl b/test/extra/good/test-local-name-escapes.ttl new file mode 100644 index 00000000..8c5fce37 --- /dev/null +++ b/test/extra/good/test-local-name-escapes.ttl @@ -0,0 +1,19 @@ +@prefix eg: <http://example.org/> . + +eg:s eg:p eg:o\' . +eg:s eg:p eg:o\! . +eg:s eg:p eg:o\# . +eg:s eg:p eg:o\$ . +eg:s eg:p eg:o\% . +eg:s eg:p eg:o\& . +eg:s eg:p eg:o\( . +eg:s eg:p eg:o\) . +eg:s eg:p eg:o\* . +eg:s eg:p eg:o\+ . +eg:s eg:p eg:o\, . +eg:s eg:p eg:o\/ . +eg:s eg:p eg:o\; . +eg:s eg:p eg:o\= . +eg:s eg:p eg:o\? . +eg:s eg:p eg:o\@ . +eg:s eg:p eg:o\~ . diff --git a/test/extra/good/test-local-name-percent.nt b/test/extra/good/test-local-name-percent.nt new file mode 100644 index 00000000..e6330547 --- /dev/null +++ b/test/extra/good/test-local-name-percent.nt @@ -0,0 +1,2 @@ +<http://example.org/s> <http://example.org/p> <http://example.org/o%3E> . +<http://example.org/s> <http://example.org/p> <http://example.org/o%3f> . diff --git a/test/extra/good/test-local-name-percent.ttl b/test/extra/good/test-local-name-percent.ttl new file mode 100644 index 00000000..32fb63d5 --- /dev/null +++ b/test/extra/good/test-local-name-percent.ttl @@ -0,0 +1,4 @@ +@prefix eg: <http://example.org/> . + +eg:s eg:p eg:o%3E . +eg:s eg:p eg:o%3f . diff --git a/test/extra/lax/manifest.ttl b/test/extra/lax/manifest.ttl index b9890e14..f79e1984 100644 --- a/test/extra/lax/manifest.ttl +++ b/test/extra/lax/manifest.ttl @@ -8,14 +8,18 @@ mf:entries ( <#test-bad-string-nq> <#test-bad-string-nt> + <#test-bad-string-trig> <#test-bad-string-ttl> <#test-bad-uri-nq> <#test-bad-uri-nt> <#test-bad-uri-ttl> + <#test-bad-uri-trig> <#test-bad-utf8-nq> <#test-bad-utf8-nt> <#test-bad-utf8-ttl> + <#test-bad-utf8-trig> <#test-lone-list> + <#test-out-of-range-unicode> ) . <#test-bad-string-nq> @@ -36,6 +40,12 @@ mf:name "test-bad-string-ttl" ; mf:result <test-bad-string-out.nt> . +<#test-bad-string-trig> + a rdft:TestTrigNegativeSyntax ; + mf:action <test-bad-string.trig> ; + mf:name "test-bad-string-trig" ; + mf:result <test-bad-string-out.nt> . + <#test-bad-uri-nq> a rdft:TestNQuadsNegativeSyntax ; mf:action <test-bad-uri.nq> ; @@ -54,6 +64,12 @@ mf:name "test-bad-uri-ttl" ; mf:result <test-bad-uri-out.nt> . +<#test-bad-uri-trig> + a rdft:TestTurtleNegativeSyntax ; + mf:action <test-bad-uri.trig> ; + mf:name "test-bad-uri-trig" ; + mf:result <test-bad-uri-nq-out.nq> . + <#test-bad-utf8-nq> a rdft:TestNQuadsNegativeSyntax ; mf:action <test-bad-utf8.nq> ; @@ -72,8 +88,20 @@ mf:name "test-bad-utf8-ttl" ; mf:result <test-bad-utf8-ttl-out.nt> . +<#test-bad-utf8-trig> + a rdft:TestTurtleNegativeSyntax ; + mf:action <test-bad-utf8.trig> ; + mf:name "test-bad-utf8-trig" ; + mf:result <test-bad-utf8-ttl-out.nt> . + <#test-lone-list> a rdft:TestTurtleNegativeSyntax ; mf:action <test-lone-list.ttl> ; mf:name "test-lone-list" ; mf:result <test-lone-list.nt> . + +<#test-out-of-range-unicode> + a rdft:TestTurtleNegativeSyntax ; + mf:action <test-out-of-range-unicode.ttl> ; + mf:name "test-out-of-range-unicode" ; + mf:result <test-out-of-range-unicode.nt> . diff --git a/test/extra/lax/test-bad-string.trig b/test/extra/lax/test-bad-string.trig new file mode 100644 index 00000000..72eb9621 --- /dev/null +++ b/test/extra/lax/test-bad-string.trig @@ -0,0 +1,3 @@ +<http://example.org/s1> <http://example.org/p1> "Truncated line +<http://example.org/s1> <http://example.org/p1> "Bad escape \? " . +<http://example.org/s1> <http://example.org/p2> "Good" . diff --git a/test/extra/lax/test-bad-uri.trig b/test/extra/lax/test-bad-uri.trig new file mode 100644 index 00000000..ba852fef --- /dev/null +++ b/test/extra/lax/test-bad-uri.trig @@ -0,0 +1,8 @@ +<http://example.org/ÿÿbadg1> { + <http://example.org/s> <http://example.org/p> <http://example.org/goodo1> +} + +<http://example.org/s> + <http://example.org/p> <http://example.org/ bado1> ; + <http://example.org/p> <http://example.org/ÿÿbado2> ; + <http://example.org/p> <http://example.org/goodo2> . diff --git a/test/extra/lax/test-bad-utf8.trig b/test/extra/lax/test-bad-utf8.trig new file mode 100644 index 00000000..0e177366 --- /dev/null +++ b/test/extra/lax/test-bad-utf8.trig @@ -0,0 +1,6 @@ +<http://example.org/s> <http://example.org/p> "Impossible bytes: þ ÿ" . +<http://example.org/s> <http://example.org/p> "2 continuation bytes: €¿" . +<http://example.org/s> <http://example.org/p> "Missing continuation: À" . +<http://example.org/s> <http://example.org/p> """Impossible bytes: þ ÿ""" . +<http://example.org/s> <http://example.org/p> """2 continuation bytes: €¿""" . +<http://example.org/s> <http://example.org/p> """Missing continuation: À""" . diff --git a/test/extra/good/test-out-of-range-unicode.nt b/test/extra/lax/test-out-of-range-unicode.nt index 5def9e31..5def9e31 100644 --- a/test/extra/good/test-out-of-range-unicode.nt +++ b/test/extra/lax/test-out-of-range-unicode.nt diff --git a/test/extra/good/test-out-of-range-unicode.ttl b/test/extra/lax/test-out-of-range-unicode.ttl index 7e64785a..7e64785a 100644 --- a/test/extra/good/test-out-of-range-unicode.ttl +++ b/test/extra/lax/test-out-of-range-unicode.ttl diff --git a/test/extra/pretty/datatypes.ttl b/test/extra/pretty/datatypes.ttl index 721dfe4d..8f8b13f2 100644 --- a/test/extra/pretty/datatypes.ttl +++ b/test/extra/pretty/datatypes.ttl @@ -9,6 +9,8 @@ eg:s 1 , 2.3 , "4."^^xsd:decimal , + "5"^^xsd:decimal , + "6.7E8"^^xsd:float , false , true , "x"^^eg:datatype , diff --git a/test/extra/pretty/named-graph.trig b/test/extra/pretty/named-graph.trig index 5cd12f3b..29f1d970 100644 --- a/test/extra/pretty/named-graph.trig +++ b/test/extra/pretty/named-graph.trig @@ -1,8 +1,18 @@ @prefix eg: <http://example.org/> . -eg:g { - eg:s - eg:p [ +eg:g1 { + eg:s1 + eg:p1 [ a eg:Object ] . } + +eg:g2 { + eg:s2 + eg:p2 [ + a eg:Object + ] . +} + +eg:s3 + eg:p3 eg:o3 . diff --git a/test/headers/.clang-tidy b/test/headers/.clang-tidy index eaf6ac95..06ede334 100644 --- a/test/headers/.clang-tidy +++ b/test/headers/.clang-tidy @@ -1,4 +1,4 @@ -# Copyright 2020-2022 David Robillard <d@drobilla.net> +# Copyright 2020-2024 David Robillard <d@drobilla.net> # SPDX-License-Identifier: 0BSD OR ISC Checks: > @@ -6,6 +6,9 @@ Checks: > -altera-*, -llvmlibc-*, -readability-identifier-length, +CheckOptions: + - key: readability-function-cognitive-complexity.Threshold + value: '0' WarningsAsErrors: '*' HeaderFilterRegex: '.*' FormatStyle: file diff --git a/test/meson.build b/test/meson.build index f6f56c2b..33f86dfb 100644 --- a/test/meson.build +++ b/test/meson.build @@ -88,13 +88,12 @@ if get_option('lint') required: false, ) - pylint_scripts = simple_scripts + pylint_args = ['--disable', 'bad-option-value'] + simple_scripts if plot_py.found() - pylint_scripts += plot_scripts + pylint_args += plot_scripts endif - pylint_args = ['--disable', 'bad-option-value'] - test('pylint', pylint, args: pylint_args + pylint_scripts, suite: 'scripts') + test('pylint', pylint, args: pylint_args, suite: 'scripts') endif # Check Turtle formatting with serdi @@ -122,6 +121,7 @@ unit_tests = [ 'env', 'free_null', 'node', + 'reader', 'reader_writer', 'string', 'uri', @@ -154,14 +154,20 @@ simple_command_tests = { 'serdi': { 'bad': [ ['-c'], + ['-cx'], ['-fi'], ['-i', 'turtle'], - ['-i', 'unknown'], + ['-i', 'turt'], ['-i'], - ['-o', 'unknown'], + ['-ix'], + ['-o', '~unknown'], + ['-o', 'ntripleses'], ['-o'], + ['-ox'], ['-p'], + ['-px'], ['-r'], + ['-rx'], ['-z'], ], 'good': [ @@ -178,6 +184,7 @@ if is_variable('serdi') script_args = common_script_args + ['--serdi', serdi] serd_ttl = files('../serd.ttl')[0] bad_input_file = files('extra/bad/bad-base.ttl') + text_input_file = files('extra/bad/README.md') test('serd_ttl', serdi, args: [serd_ttl], suite: 'data') @@ -214,6 +221,7 @@ if is_variable('serdi') 'string': ['-s', '<foo> a <Bar> .'], 'no_such_file': ['no_such_file'], 'remote': ['ftp://example.org/unsupported.ttl'], + 'text': [text_input_file], } foreach name, args : bad_input_tests diff --git a/test/test_env.c b/test/test_env.c index 1de075f3..d51e0595 100644 --- a/test/test_env.c +++ b/test/test_env.c @@ -9,6 +9,7 @@ #include <stdint.h> #include <string.h> +#define NS_EG "http://example.org/" #define USTR(s) ((const uint8_t*)(s)) static SerdStatus @@ -24,12 +25,11 @@ count_prefixes(void* handle, const SerdNode* name, const SerdNode* uri) static void test_env(void) { - SerdNode u = serd_node_from_string(SERD_URI, USTR("http://example.org/foo")); - SerdNode b = serd_node_from_string(SERD_CURIE, USTR("invalid")); - SerdNode c = serd_node_from_string(SERD_CURIE, USTR("eg.2:b")); + SerdNode u = serd_node_from_string(SERD_URI, USTR(NS_EG "foo")); + SerdNode b = serd_node_from_string(SERD_CURIE, USTR("invalid")); + SerdNode c = serd_node_from_string(SERD_CURIE, USTR("eg.2:b")); SerdEnv* env = serd_env_new(NULL); - serd_env_set_prefix_from_strings( - env, USTR("eg.2"), USTR("http://example.org/")); + serd_env_set_prefix_from_strings(env, USTR("eg.2"), USTR(NS_EG "")); assert(!serd_env_set_base_uri(env, NULL)); assert(serd_env_set_base_uri(env, &SERD_NODE_NULL)); @@ -39,7 +39,8 @@ test_env(void) SerdChunk suffix; assert(!serd_env_qualify(NULL, &u, &u, &suffix)); assert(serd_env_expand(NULL, &c, &prefix, &suffix)); - assert(serd_env_expand(env, &b, &prefix, &suffix)); + assert(serd_env_expand(env, &b, &prefix, &suffix) == SERD_ERR_BAD_ARG); + assert(serd_env_expand(env, &u, &prefix, &suffix) == SERD_ERR_BAD_ARG); SerdNode nxnode = serd_env_expand_node(NULL, &c); assert(serd_node_equals(&nxnode, &SERD_NODE_NULL)); @@ -48,7 +49,7 @@ test_env(void) assert(serd_node_equals(&xnode, &SERD_NODE_NULL)); SerdNode xu = serd_env_expand_node(env, &u); - assert(!strcmp((const char*)xu.buf, "http://example.org/foo")); + assert(!strcmp((const char*)xu.buf, NS_EG "foo")); serd_node_free(&xu); SerdNode badpre = serd_node_from_string(SERD_CURIE, USTR("hm:what")); @@ -56,7 +57,7 @@ test_env(void) assert(serd_node_equals(&xbadpre, &SERD_NODE_NULL)); SerdNode xc = serd_env_expand_node(env, &c); - assert(!strcmp((const char*)xc.buf, "http://example.org/b")); + assert(!strcmp((const char*)xc.buf, NS_EG "b")); serd_node_free(&xc); assert(serd_env_set_prefix(env, &SERD_NODE_NULL, &SERD_NODE_NULL)); @@ -71,8 +72,7 @@ test_env(void) assert(serd_node_equals(&xblank, &SERD_NODE_NULL)); int n_prefixes = 0; - serd_env_set_prefix_from_strings( - env, USTR("eg.2"), USTR("http://example.org/")); + serd_env_set_prefix_from_strings(env, USTR("eg.2"), USTR(NS_EG)); serd_env_foreach(env, count_prefixes, &n_prefixes); assert(n_prefixes == 1); diff --git a/test/test_node.c b/test/test_node.c index f08363cb..af14171b 100644 --- a/test/test_node.c +++ b/test/test_node.c @@ -42,8 +42,14 @@ test_string_to_double(void) const double expt_test_nums[] = { 2.0E18, -5e19, +8e20, 2e+22, -5e-5, 8e0, 9e-0, 2e+0}; - const char* expt_test_strs[] = { - "02e18", "-5e019", "+8e20", "2E+22", "-5E-5", "8E0", "9e-0", " 2e+0"}; + const char* expt_test_strs[] = {"02e18", + "-5e019", + " +8e20", + "\f2E+22", + "\n-5E-5", + "\r8E0", + "\t9e-0", + "\v2e+0"}; for (size_t i = 0; i < sizeof(expt_test_nums) / sizeof(double); ++i) { const double num = serd_strtod(expt_test_strs[i], NULL); @@ -144,6 +150,58 @@ test_blob_to_node(void) } static void +test_base64_decode(void) +{ + static const char* const decoded = "test"; + static const size_t decoded_len = 4U; + + // Test decoding clean base64 + { + static const char* const encoded = "dGVzdA=="; + static const size_t encoded_len = 8U; + + size_t size = 0U; + void* const data = + serd_base64_decode((const uint8_t*)encoded, encoded_len, &size); + + assert(data); + assert(size == decoded_len); + assert(!strncmp((const char*)data, decoded, decoded_len)); + serd_free(data); + } + + // Test decoding equivalent dirty base64 with ignored junk characters + { + static const char* const encoded = "d-G#V!z*d(A$%=="; + static const size_t encoded_len = 13U; + + size_t size = 0U; + void* const data = + serd_base64_decode((const uint8_t*)encoded, encoded_len, &size); + + assert(data); + assert(size == decoded_len); + assert(!strncmp((const char*)data, decoded, decoded_len)); + serd_free(data); + } + + // Test decoding effectively nothing + { + static const char* const encoded = "@#$%"; + static const size_t encoded_len = 4U; + + size_t size = 0U; + void* const data = + serd_base64_decode((const uint8_t*)encoded, encoded_len, &size); + + assert(data); + assert(!size); + // Contents of data are undefined + serd_free(data); + } +} + +static void test_node_equals(void) { const uint8_t replacement_char_str[] = {0xEF, 0xBF, 0xBD, 0}; @@ -176,6 +234,8 @@ test_node_from_string(void) static void test_node_from_substring(void) { + static const uint8_t utf8_str[] = {'l', 0xC3, 0xB6, 'n', 'g', 0}; + SerdNode empty = serd_node_from_substring(SERD_LITERAL, NULL, 32); assert(!empty.buf && !empty.n_bytes && !empty.n_chars && !empty.flags && !empty.type); @@ -187,6 +247,30 @@ test_node_from_substring(void) a_b = serd_node_from_substring(SERD_LITERAL, USTR("a\"bc"), 10); assert(a_b.n_bytes == 4 && a_b.n_chars == 4 && a_b.flags == SERD_HAS_QUOTE && !strncmp((const char*)a_b.buf, "a\"bc", 4)); + + SerdNode utf8 = serd_node_from_substring(SERD_LITERAL, utf8_str, 5); + assert(utf8.n_bytes == 5 && utf8.n_chars == 4 && !utf8.flags && + !strncmp((const char*)utf8.buf, (const char*)utf8_str, 6)); +} + +static void +test_uri_node_from_node(void) +{ + const SerdNode string = serd_node_from_string(SERD_LITERAL, USTR("s")); + SerdNode string_node = serd_node_new_uri_from_node(&string, NULL, NULL); + assert(!string_node.n_bytes); + serd_node_free(&string_node); + + const SerdNode nouri = {NULL, 0U, 0U, 0U, SERD_URI}; + SerdNode nouri_node = serd_node_new_uri_from_node(&nouri, NULL, NULL); + assert(!nouri_node.n_bytes); + serd_node_free(&nouri_node); + + const SerdNode uri = + serd_node_from_string(SERD_URI, USTR("http://example.org/p")); + SerdNode uri_node = serd_node_new_uri_from_node(&uri, NULL, NULL); + assert(uri_node.n_bytes == 20U); + serd_node_free(&uri_node); } int @@ -196,9 +280,11 @@ main(void) test_double_to_node(); test_integer_to_node(); test_blob_to_node(); + test_base64_decode(); test_node_equals(); test_node_from_string(); test_node_from_substring(); + test_uri_node_from_node(); printf("Success\n"); return 0; diff --git a/test/test_reader.c b/test/test_reader.c new file mode 100644 index 00000000..b102f8a6 --- /dev/null +++ b/test/test_reader.c @@ -0,0 +1,437 @@ +// Copyright 2011-2024 David Robillard <d@drobilla.net> +// SPDX-License-Identifier: ISC + +#undef NDEBUG + +#include "serd/serd.h" + +#ifdef _WIN32 +# include <windows.h> +#endif + +#include <assert.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#define USTR(s) ((const uint8_t*)(s)) + +typedef struct { + int n_base; + int n_prefix; + int n_statement; + int n_end; +} ReaderTest; + +static SerdStatus +test_base_sink(void* const handle, const SerdNode* const uri) +{ + (void)uri; + + ReaderTest* const rt = (ReaderTest*)handle; + ++rt->n_base; + return SERD_SUCCESS; +} + +static SerdStatus +test_prefix_sink(void* const handle, + const SerdNode* const name, + const SerdNode* const uri) +{ + (void)name; + (void)uri; + + ReaderTest* const rt = (ReaderTest*)handle; + ++rt->n_prefix; + return SERD_SUCCESS; +} + +static SerdStatus +test_statement_sink(void* const handle, + SerdStatementFlags flags, + const SerdNode* const graph, + const SerdNode* const subject, + const SerdNode* const predicate, + const SerdNode* const object, + const SerdNode* const object_datatype, + const SerdNode* const object_lang) +{ + (void)flags; + (void)graph; + (void)subject; + (void)predicate; + (void)object; + (void)object_datatype; + (void)object_lang; + + ReaderTest* const rt = (ReaderTest*)handle; + ++rt->n_statement; + return SERD_SUCCESS; +} + +static SerdStatus +test_end_sink(void* const handle, const SerdNode* const node) +{ + (void)node; + + ReaderTest* const rt = (ReaderTest*)handle; + ++rt->n_end; + return SERD_SUCCESS; +} + +static void +test_read_string(void) +{ + ReaderTest rt = {0, 0, 0, 0}; + SerdReader* const reader = serd_reader_new(SERD_TURTLE, + &rt, + NULL, + test_base_sink, + test_prefix_sink, + test_statement_sink, + test_end_sink); + + assert(reader); + assert(serd_reader_get_handle(reader) == &rt); + + // Test reading a string that ends exactly at the end of input (no newline) + const SerdStatus st = serd_reader_read_string( + reader, + USTR("<http://example.org/s> <http://example.org/p> " + "<http://example.org/o> .")); + + assert(!st); + assert(rt.n_base == 0); + assert(rt.n_prefix == 0); + assert(rt.n_statement == 1); + assert(rt.n_end == 0); + + serd_reader_free(reader); +} + +/// Reads a null byte after a statement, then succeeds again (like a socket) +static size_t +eof_test_read(void* buf, size_t size, size_t nmemb, void* stream) +{ + assert(size == 1); + assert(nmemb == 1); + (void)size; + + static const char* const string = "_:s1 <http://example.org/p> _:o1 .\n" + "_:s2 <http://example.org/p> _:o2 .\n"; + + size_t* const count = (size_t*)stream; + + // Normal reading for the first statement + if (*count < 35) { + *(char*)buf = string[*count]; + ++*count; + return nmemb; + } + + // EOF for the first read at the start of the second statement + if (*count == 35) { + assert(string[*count] == '_'); + ++*count; + return 0; + } + + if (*count >= strlen(string)) { + return 0; + } + + // Normal reading after the EOF, adjusting for the skipped index 35 + *(char*)buf = string[*count - 1]; + ++*count; + return nmemb; +} + +static int +eof_test_error(void* stream) +{ + (void)stream; + return 0; +} + +/// A read of a big page hits EOF then fails to read chunks immediately +static void +test_read_eof_by_page(const char* const path) +{ + FILE* const f = fopen(path, "w+b"); + assert(f); + + fprintf(f, "_:s <http://example.org/p> _:o .\n"); + fflush(f); + fseek(f, 0L, SEEK_SET); + + ReaderTest rt = {0, 0, 0, 0}; + SerdReader* const reader = serd_reader_new(SERD_TURTLE, + &rt, + NULL, + test_base_sink, + test_prefix_sink, + test_statement_sink, + test_end_sink); + + serd_reader_start_stream(reader, f, (const uint8_t*)"test", true); + + assert(serd_reader_read_chunk(reader) == SERD_SUCCESS); + assert(serd_reader_read_chunk(reader) == SERD_FAILURE); + assert(serd_reader_read_chunk(reader) == SERD_FAILURE); + + serd_reader_end_stream(reader); + serd_reader_free(reader); + fclose(f); +} + +// A byte-wise reader hits EOF once then continues (like a socket) +static void +test_read_eof_by_byte(void) +{ + ReaderTest rt = {0, 0, 0, 0}; + SerdReader* const reader = serd_reader_new(SERD_TURTLE, + &rt, + NULL, + test_base_sink, + test_prefix_sink, + test_statement_sink, + test_end_sink); + + size_t n_reads = 0U; + serd_reader_start_source_stream(reader, + eof_test_read, + eof_test_error, + &n_reads, + (const uint8_t*)"test", + 1U); + + assert(serd_reader_read_chunk(reader) == SERD_SUCCESS); + assert(serd_reader_read_chunk(reader) == SERD_FAILURE); + assert(serd_reader_read_chunk(reader) == SERD_SUCCESS); + assert(serd_reader_read_chunk(reader) == SERD_FAILURE); + + serd_reader_free(reader); +} + +static void +test_read_nquads_chunks(const char* const path) +{ + static const char null = 0; + + FILE* const f = fopen(path, "w+b"); + + // Write two statements, a null separator, then another statement + + fprintf(f, + "<http://example.org/s> <http://example.org/p1> " + "<http://example.org/o1> .\n"); + + fprintf(f, + "<http://example.org/s> <http://example.org/p2> " + "<http://example.org/o2> .\n"); + + fwrite(&null, sizeof(null), 1, f); + + fprintf(f, + "<http://example.org/s> <http://example.org/p3> " + "<http://example.org/o3> .\n"); + + fseek(f, 0, SEEK_SET); + + ReaderTest rt = {0, 0, 0, 0}; + SerdReader* const reader = serd_reader_new(SERD_NQUADS, + &rt, + NULL, + test_base_sink, + test_prefix_sink, + test_statement_sink, + test_end_sink); + + assert(reader); + assert(serd_reader_get_handle(reader) == &rt); + assert(f); + + SerdStatus st = serd_reader_start_stream(reader, f, NULL, false); + assert(st == SERD_SUCCESS); + + // Read first statement + st = serd_reader_read_chunk(reader); + assert(st == SERD_SUCCESS); + assert(rt.n_base == 0); + assert(rt.n_prefix == 0); + assert(rt.n_statement == 1); + assert(rt.n_end == 0); + + // Read second statement + st = serd_reader_read_chunk(reader); + assert(st == SERD_SUCCESS); + assert(rt.n_base == 0); + assert(rt.n_prefix == 0); + assert(rt.n_statement == 2); + assert(rt.n_end == 0); + + // Read terminator + st = serd_reader_read_chunk(reader); + assert(st == SERD_FAILURE); + assert(rt.n_base == 0); + assert(rt.n_prefix == 0); + assert(rt.n_statement == 2); + assert(rt.n_end == 0); + + // Read last statement + st = serd_reader_read_chunk(reader); + assert(st == SERD_SUCCESS); + assert(rt.n_base == 0); + assert(rt.n_prefix == 0); + assert(rt.n_statement == 3); + assert(rt.n_end == 0); + + // EOF + st = serd_reader_read_chunk(reader); + assert(st == SERD_FAILURE); + assert(rt.n_base == 0); + assert(rt.n_prefix == 0); + assert(rt.n_statement == 3); + assert(rt.n_end == 0); + + assert(serd_reader_read_chunk(reader) == SERD_FAILURE); + + serd_reader_free(reader); + fclose(f); + remove(path); +} + +static void +test_read_turtle_chunks(const char* const path) +{ + static const char null = 0; + + FILE* const f = fopen(path, "w+b"); + + // Write two statements separated by null characters + fprintf(f, "@base <http://example.org/base/> .\n"); + fprintf(f, "@prefix eg: <http://example.org/> .\n"); + fprintf(f, "eg:s eg:p1 eg:o1 ;\n"); + fprintf(f, " eg:p2 eg:o2 .\n"); + fwrite(&null, sizeof(null), 1, f); + fprintf(f, "eg:s eg:p [ eg:sp eg:so ] .\n"); + fwrite(&null, sizeof(null), 1, f); + fseek(f, 0, SEEK_SET); + + ReaderTest rt = {0, 0, 0, 0}; + SerdReader* const reader = serd_reader_new(SERD_TURTLE, + &rt, + NULL, + test_base_sink, + test_prefix_sink, + test_statement_sink, + test_end_sink); + + assert(reader); + assert(serd_reader_get_handle(reader) == &rt); + assert(f); + + SerdStatus st = serd_reader_start_stream(reader, f, NULL, false); + assert(st == SERD_SUCCESS); + + // Read base + st = serd_reader_read_chunk(reader); + assert(st == SERD_SUCCESS); + assert(rt.n_base == 1); + assert(rt.n_prefix == 0); + assert(rt.n_statement == 0); + assert(rt.n_end == 0); + + // Read prefix + st = serd_reader_read_chunk(reader); + assert(st == SERD_SUCCESS); + assert(rt.n_base == 1); + assert(rt.n_prefix == 1); + assert(rt.n_statement == 0); + assert(rt.n_end == 0); + + // Read first two statements + st = serd_reader_read_chunk(reader); + assert(st == SERD_SUCCESS); + assert(rt.n_base == 1); + assert(rt.n_prefix == 1); + assert(rt.n_statement == 2); + assert(rt.n_end == 0); + + // Read terminator + st = serd_reader_read_chunk(reader); + assert(st == SERD_FAILURE); + assert(rt.n_base == 1); + assert(rt.n_prefix == 1); + assert(rt.n_statement == 2); + assert(rt.n_end == 0); + + // Read statements after null terminator + st = serd_reader_read_chunk(reader); + assert(st == SERD_SUCCESS); + assert(rt.n_base == 1); + assert(rt.n_prefix == 1); + assert(rt.n_statement == 4); + assert(rt.n_end == 1); + + // Read terminator + st = serd_reader_read_chunk(reader); + assert(st == SERD_FAILURE); + assert(rt.n_base == 1); + assert(rt.n_prefix == 1); + assert(rt.n_statement == 4); + assert(rt.n_end == 1); + + // EOF + st = serd_reader_read_chunk(reader); + assert(st == SERD_FAILURE); + assert(rt.n_base == 1); + assert(rt.n_prefix == 1); + assert(rt.n_statement == 4); + assert(rt.n_end == 1); + + assert(serd_reader_read_chunk(reader) == SERD_FAILURE); + + serd_reader_free(reader); + fclose(f); + remove(path); +} + +int +main(void) +{ +#ifdef _WIN32 + char tmp[MAX_PATH] = {0}; + const size_t tmp_len = (size_t)GetTempPath(sizeof(tmp), tmp); +#else + const char* const env_tmp = getenv("TMPDIR"); + const char* const tmp = env_tmp ? env_tmp : "/tmp"; + const size_t tmp_len = strlen(tmp); +#endif + + const char* const ttl_name = "serd_test_reader.ttl"; + const char* const nq_name = "serd_test_reader.nq"; + const size_t ttl_name_len = strlen(ttl_name); + const size_t nq_name_len = strlen(nq_name); + const size_t path_len = tmp_len + 1 + ttl_name_len; + char* const path = (char*)calloc(path_len + 1, 1); + + memcpy(path, tmp, tmp_len + 1); + path[tmp_len] = '/'; + + memcpy(path + tmp_len + 1, nq_name, nq_name_len + 1); + test_read_nquads_chunks(path); + + memcpy(path + tmp_len + 1, ttl_name, ttl_name_len + 1); + test_read_turtle_chunks(path); + + test_read_string(); + test_read_eof_by_page(path); + test_read_eof_by_byte(); + assert(!remove(path)); + + free(path); + return 0; +} diff --git a/test/test_reader_writer.c b/test/test_reader_writer.c index cd7ca408..c229d1c5 100644 --- a/test/test_reader_writer.c +++ b/test/test_reader_writer.c @@ -1,4 +1,4 @@ -// Copyright 2011-2023 David Robillard <d@drobilla.net> +// Copyright 2011-2024 David Robillard <d@drobilla.net> // SPDX-License-Identifier: ISC #undef NDEBUG @@ -11,7 +11,6 @@ #include <assert.h> #include <errno.h> -#include <stdbool.h> #include <stdint.h> #include <stdio.h> #include <stdlib.h> @@ -25,10 +24,7 @@ typedef struct { } ErrorContext; typedef struct { - int n_base; - int n_prefix; int n_statement; - int n_end; const SerdNode* graph; } ReaderTest; @@ -41,6 +37,8 @@ static const char* const doc_string = " \"lang\"@en ;\n" " eg:p <http://example.com/o> .\n" "}\n" + "@prefix other: <http://example.org/other> .\n" + "@base <http://drobilla.net/> .\n" "eg:s\n" " <http://example.org/p> [\n" " eg:p 3.0 ,\n" @@ -56,27 +54,6 @@ static const char* const doc_string = "( eg:o ) eg:t eg:u .\n"; static SerdStatus -test_base_sink(void* handle, const SerdNode* uri) -{ - (void)uri; - - ReaderTest* rt = (ReaderTest*)handle; - ++rt->n_base; - return SERD_SUCCESS; -} - -static SerdStatus -test_prefix_sink(void* handle, const SerdNode* name, const SerdNode* uri) -{ - (void)name; - (void)uri; - - ReaderTest* rt = (ReaderTest*)handle; - ++rt->n_prefix; - return SERD_SUCCESS; -} - -static SerdStatus test_statement_sink(void* handle, SerdStatementFlags flags, const SerdNode* graph, @@ -99,274 +76,6 @@ test_statement_sink(void* handle, return SERD_SUCCESS; } -static SerdStatus -test_end_sink(void* handle, const SerdNode* node) -{ - (void)node; - - ReaderTest* rt = (ReaderTest*)handle; - ++rt->n_end; - return SERD_SUCCESS; -} - -/// Reads a null byte after a statement, then succeeds again (like a socket) -static size_t -eof_test_read(void* buf, size_t size, size_t nmemb, void* stream) -{ - assert(size == 1); - assert(nmemb == 1); - (void)size; - - static const char* const string = "_:s1 <http://example.org/p> _:o1 .\n" - "_:s2 <http://example.org/p> _:o2 .\n"; - - size_t* const count = (size_t*)stream; - - // Normal reading for the first statement - if (*count < 35) { - *(char*)buf = string[*count]; - ++*count; - return nmemb; - } - - // EOF for the first read at the start of the second statement - if (*count == 35) { - assert(string[*count] == '_'); - ++*count; - return 0; - } - - if (*count >= strlen(string)) { - return 0; - } - - // Normal reading after the EOF, adjusting for the skipped index 35 - *(char*)buf = string[*count - 1]; - ++*count; - return nmemb; -} - -static int -eof_test_error(void* stream) -{ - (void)stream; - return 0; -} - -static void -test_read_nquads_chunks(const char* const path) -{ - static const char null = 0; - - FILE* const f = fopen(path, "w+b"); - - // Write two statements, a null separator, then another statement - - fprintf(f, - "<http://example.org/s> <http://example.org/p1> " - "<http://example.org/o1> .\n"); - - fprintf(f, - "<http://example.org/s> <http://example.org/p2> " - "<http://example.org/o2> .\n"); - - fwrite(&null, sizeof(null), 1, f); - - fprintf(f, - "<http://example.org/s> <http://example.org/p3> " - "<http://example.org/o3> .\n"); - - fseek(f, 0, SEEK_SET); - - ReaderTest* const rt = (ReaderTest*)calloc(1, sizeof(ReaderTest)); - SerdReader* const reader = serd_reader_new(SERD_NQUADS, - rt, - free, - test_base_sink, - test_prefix_sink, - test_statement_sink, - test_end_sink); - - assert(reader); - assert(serd_reader_get_handle(reader) == rt); - assert(f); - - SerdStatus st = serd_reader_start_stream(reader, f, NULL, false); - assert(st == SERD_SUCCESS); - - // Read first statement - st = serd_reader_read_chunk(reader); - assert(st == SERD_SUCCESS); - assert(rt->n_base == 0); - assert(rt->n_prefix == 0); - assert(rt->n_statement == 1); - assert(rt->n_end == 0); - - // Read second statement - st = serd_reader_read_chunk(reader); - assert(st == SERD_SUCCESS); - assert(rt->n_base == 0); - assert(rt->n_prefix == 0); - assert(rt->n_statement == 2); - assert(rt->n_end == 0); - - // Read terminator - st = serd_reader_read_chunk(reader); - assert(st == SERD_FAILURE); - assert(rt->n_base == 0); - assert(rt->n_prefix == 0); - assert(rt->n_statement == 2); - assert(rt->n_end == 0); - - // Read last statement - st = serd_reader_read_chunk(reader); - assert(st == SERD_SUCCESS); - assert(rt->n_base == 0); - assert(rt->n_prefix == 0); - assert(rt->n_statement == 3); - assert(rt->n_end == 0); - - // EOF - st = serd_reader_read_chunk(reader); - assert(st == SERD_FAILURE); - assert(rt->n_base == 0); - assert(rt->n_prefix == 0); - assert(rt->n_statement == 3); - assert(rt->n_end == 0); - - assert(serd_reader_read_chunk(reader) == SERD_FAILURE); - - serd_reader_free(reader); - fclose(f); - remove(path); -} - -static void -test_read_turtle_chunks(const char* const path) -{ - static const char null = 0; - - FILE* const f = fopen(path, "w+b"); - - // Write two statements separated by null characters - fprintf(f, "@base <http://example.org/base/> .\n"); - fprintf(f, "@prefix eg: <http://example.org/> .\n"); - fprintf(f, "eg:s eg:p1 eg:o1 ;\n"); - fprintf(f, " eg:p2 eg:o2 .\n"); - fwrite(&null, sizeof(null), 1, f); - fprintf(f, "eg:s eg:p [ eg:sp eg:so ] .\n"); - fwrite(&null, sizeof(null), 1, f); - fseek(f, 0, SEEK_SET); - - ReaderTest* const rt = (ReaderTest*)calloc(1, sizeof(ReaderTest)); - SerdReader* const reader = serd_reader_new(SERD_TURTLE, - rt, - free, - test_base_sink, - test_prefix_sink, - test_statement_sink, - test_end_sink); - - assert(reader); - assert(serd_reader_get_handle(reader) == rt); - assert(f); - - SerdStatus st = serd_reader_start_stream(reader, f, NULL, false); - assert(st == SERD_SUCCESS); - - // Read base - st = serd_reader_read_chunk(reader); - assert(st == SERD_SUCCESS); - assert(rt->n_base == 1); - assert(rt->n_prefix == 0); - assert(rt->n_statement == 0); - assert(rt->n_end == 0); - - // Read prefix - st = serd_reader_read_chunk(reader); - assert(st == SERD_SUCCESS); - assert(rt->n_base == 1); - assert(rt->n_prefix == 1); - assert(rt->n_statement == 0); - assert(rt->n_end == 0); - - // Read first two statements - st = serd_reader_read_chunk(reader); - assert(st == SERD_SUCCESS); - assert(rt->n_base == 1); - assert(rt->n_prefix == 1); - assert(rt->n_statement == 2); - assert(rt->n_end == 0); - - // Read terminator - st = serd_reader_read_chunk(reader); - assert(st == SERD_FAILURE); - assert(rt->n_base == 1); - assert(rt->n_prefix == 1); - assert(rt->n_statement == 2); - assert(rt->n_end == 0); - - // Read statements after null terminator - st = serd_reader_read_chunk(reader); - assert(st == SERD_SUCCESS); - assert(rt->n_base == 1); - assert(rt->n_prefix == 1); - assert(rt->n_statement == 4); - assert(rt->n_end == 1); - - // Read terminator - st = serd_reader_read_chunk(reader); - assert(st == SERD_FAILURE); - assert(rt->n_base == 1); - assert(rt->n_prefix == 1); - assert(rt->n_statement == 4); - assert(rt->n_end == 1); - - // EOF - st = serd_reader_read_chunk(reader); - assert(st == SERD_FAILURE); - assert(rt->n_base == 1); - assert(rt->n_prefix == 1); - assert(rt->n_statement == 4); - assert(rt->n_end == 1); - - assert(serd_reader_read_chunk(reader) == SERD_FAILURE); - - serd_reader_free(reader); - fclose(f); - remove(path); -} - -static void -test_read_string(void) -{ - ReaderTest* rt = (ReaderTest*)calloc(1, sizeof(ReaderTest)); - SerdReader* reader = serd_reader_new(SERD_TURTLE, - rt, - free, - test_base_sink, - test_prefix_sink, - test_statement_sink, - test_end_sink); - - assert(reader); - assert(serd_reader_get_handle(reader) == rt); - - // Test reading a string that ends exactly at the end of input (no newline) - const SerdStatus st = serd_reader_read_string( - reader, - USTR("<http://example.org/s> <http://example.org/p> " - "<http://example.org/o> .")); - - assert(!st); - assert(rt->n_base == 0); - assert(rt->n_prefix == 0); - assert(rt->n_statement == 1); - assert(rt->n_end == 0); - - serd_reader_free(reader); -} - static size_t faulty_sink(const void* const buf, const size_t len, void* const stream) { @@ -399,7 +108,7 @@ test_write_errors(void) ErrorContext ctx = {0U, 0U}; const SerdStyle style = (SerdStyle)(SERD_STYLE_STRICT | SERD_STYLE_CURIED); - const size_t max_offsets[] = {0, 386, 1911, 2003, 386}; + const size_t max_offsets[] = {0, 462, 1911, 2003, 462}; // Test errors at different offsets to hit different code paths for (unsigned s = 1; s <= (unsigned)SERD_TRIG; ++s) { @@ -558,13 +267,8 @@ static void test_reader(const char* path) { ReaderTest* rt = (ReaderTest*)calloc(1, sizeof(ReaderTest)); - SerdReader* reader = serd_reader_new(SERD_TURTLE, - rt, - free, - test_base_sink, - test_prefix_sink, - test_statement_sink, - test_end_sink); + SerdReader* reader = serd_reader_new( + SERD_TURTLE, rt, free, NULL, NULL, test_statement_sink, NULL); assert(reader); assert(serd_reader_get_handle(reader) == rt); @@ -590,44 +294,12 @@ test_reader(const char* path) const SerdStatus st = serd_reader_read_file(reader, USTR(path)); assert(!st); - assert(rt->n_base == 0); - assert(rt->n_prefix == 0); assert(rt->n_statement == 13); - assert(rt->n_end == 0); assert(rt->graph && rt->graph->buf && !strcmp((const char*)rt->graph->buf, "http://example.org/")); assert(serd_reader_read_string(reader, USTR("This isn't Turtle at all."))); - // A read of a big page hits EOF then fails to read chunks immediately - { - FILE* const in = fopen(path, "rb"); - serd_reader_start_stream(reader, in, (const uint8_t*)"test", true); - - assert(serd_reader_read_chunk(reader) == SERD_SUCCESS); - assert(serd_reader_read_chunk(reader) == SERD_FAILURE); - assert(serd_reader_read_chunk(reader) == SERD_FAILURE); - - serd_reader_end_stream(reader); - fclose(in); - } - - // A byte-wise reader that hits EOF once then continues (like a socket) - { - size_t n_reads = 0; - serd_reader_start_source_stream(reader, - (SerdSource)eof_test_read, - (SerdStreamErrorFunc)eof_test_error, - &n_reads, - NULL, - 1); - - assert(serd_reader_read_chunk(reader) == SERD_SUCCESS); - assert(serd_reader_read_chunk(reader) == SERD_FAILURE); - assert(serd_reader_read_chunk(reader) == SERD_SUCCESS); - assert(serd_reader_read_chunk(reader) == SERD_FAILURE); - } - serd_reader_free(reader); } @@ -644,22 +316,14 @@ main(void) #endif const char* const ttl_name = "serd_test_reader_writer.ttl"; - const char* const nq_name = "serd_test_reader_writer.nq"; const size_t ttl_name_len = strlen(ttl_name); - const size_t nq_name_len = strlen(nq_name); const size_t path_len = tmp_len + 1 + ttl_name_len; char* const path = (char*)calloc(path_len + 1, 1); memcpy(path, tmp, tmp_len + 1); path[tmp_len] = '/'; - - memcpy(path + tmp_len + 1, nq_name, nq_name_len + 1); - test_read_nquads_chunks(path); - memcpy(path + tmp_len + 1, ttl_name, ttl_name_len + 1); - test_read_turtle_chunks(path); - test_read_string(); test_write_errors(); test_writer(path); diff --git a/test/test_string.c b/test/test_string.c index 6767e5ae..23835ca9 100644 --- a/test/test_string.c +++ b/test/test_string.c @@ -11,19 +11,31 @@ #include <string.h> static void +check_strlen(const char* const str, + const size_t expected_n_bytes, + const size_t expected_n_chars, + const SerdNodeFlags expected_flags) +{ + size_t n_bytes = 0U; + SerdNodeFlags flags = 0U; + const size_t n_chars = serd_strlen((const uint8_t*)str, &n_bytes, &flags); + + assert(n_bytes == expected_n_bytes); + assert(n_chars == expected_n_chars); + assert(flags == expected_flags); +} + +static void test_strlen(void) { - const uint8_t str[] = {'"', '5', 0xE2, 0x82, 0xAC, '"', '\n', 0}; + static const uint8_t utf8[] = {'"', '5', 0xE2, 0x82, 0xAC, '"', '\n', 0}; - size_t n_bytes = 0; - SerdNodeFlags flags = 0; - size_t len = serd_strlen(str, &n_bytes, &flags); - assert(len == 5 && n_bytes == 7 && - flags == (SERD_HAS_QUOTE | SERD_HAS_NEWLINE)); - len = serd_strlen(str, NULL, &flags); - assert(len == 5); + check_strlen("\"quotes\"", 8U, 8U, SERD_HAS_QUOTE); + check_strlen("newline\n", 8U, 8U, SERD_HAS_NEWLINE); + check_strlen("\rreturn", 7U, 7U, SERD_HAS_NEWLINE); + check_strlen((const char*)utf8, 7U, 5U, SERD_HAS_QUOTE | SERD_HAS_NEWLINE); - assert(serd_strlen(str, &n_bytes, NULL) == 5); + assert(serd_strlen((const uint8_t*)"nulls", NULL, NULL) == 5U); } static void diff --git a/test/test_uri.c b/test/test_uri.c index cc81b40e..fc5eab71 100644 --- a/test/test_uri.c +++ b/test/test_uri.c @@ -16,6 +16,8 @@ static void test_uri_string_has_scheme(void) { + assert(!serd_uri_string_has_scheme(NULL)); + assert(!serd_uri_string_has_scheme(USTR("relative"))); assert(!serd_uri_string_has_scheme(USTR("http"))); assert(!serd_uri_string_has_scheme(USTR("5nostartdigit"))); @@ -99,6 +101,8 @@ test_uri_to_path(void) "C|/Windows/Sucks")); assert(!serd_uri_to_path((const uint8_t*)"http://example.org/path")); + + assert(!strcmp((const char*)serd_uri_to_path((const uint8_t*)"rel"), "rel")); } #if defined(__GNUC__) @@ -166,11 +170,20 @@ test_uri_parsing(void) "/C:\\Pointless Space"); #endif + // Test tolerance of NULL hostname parameter + uint8_t* const hosted = serd_file_uri_parse(USTR("file://host/path"), NULL); + assert(!strcmp((const char*)hosted, "/path")); + serd_free(hosted); + // Test tolerance of parsing junk URI escapes - uint8_t* out_path = serd_file_uri_parse(USTR("file:///foo/%0Xbar"), NULL); - assert(!strcmp((const char*)out_path, "/foo/bar")); - serd_free(out_path); + uint8_t* const junk1 = serd_file_uri_parse(USTR("file:///foo/%0Xbar"), NULL); + assert(!strcmp((const char*)junk1, "/foo/bar")); + serd_free(junk1); + + uint8_t* const junk2 = serd_file_uri_parse(USTR("file:///foo/%X0bar"), NULL); + assert(!strcmp((const char*)junk2, "/foo/bar")); + serd_free(junk2); } static void @@ -343,6 +356,22 @@ test_relative_uri(void) "http://example.org/a/b/c", "http://example.org/a/b", "http://example.org/a"); + + // Tolerance of NULL URI output parameter + { + SerdURI uri = SERD_URI_NULL; + assert(!serd_uri_parse(USTR("http://example.org/path"), &uri)); + + SerdURI base = SERD_URI_NULL; + assert(!serd_uri_parse(USTR("http://example.org/"), &base)); + + SerdNode result_node = serd_node_new_relative_uri(&uri, &base, NULL, NULL); + + assert(result_node.n_bytes == 4U); + assert(!strcmp((const char*)result_node.buf, "path")); + + serd_node_free(&result_node); + } } int diff --git a/test/test_writer.c b/test/test_writer.c index b02aba7e..6c765148 100644 --- a/test/test_writer.c +++ b/test/test_writer.c @@ -10,6 +10,8 @@ #include <stdio.h> #include <string.h> +#define NS_EG "http://example.org/" + #define USTR(s) ((const uint8_t*)(s)) static void @@ -22,8 +24,8 @@ test_write_long_literal(void) assert(writer); - SerdNode s = serd_node_from_string(SERD_URI, USTR("http://example.org/s")); - SerdNode p = serd_node_from_string(SERD_URI, USTR("http://example.org/p")); + SerdNode s = serd_node_from_string(SERD_URI, USTR(NS_EG "s")); + SerdNode p = serd_node_from_string(SERD_URI, USTR(NS_EG "p")); SerdNode o = serd_node_from_string(SERD_LITERAL, USTR("hello \"\"\"world\"\"\"!")); @@ -52,16 +54,16 @@ test_write_nested_anon(void) assert(writer); - SerdNode s0 = serd_node_from_string(SERD_URI, USTR("http://example.org/s0")); - SerdNode p0 = serd_node_from_string(SERD_URI, USTR("http://example.org/p0")); + SerdNode s0 = serd_node_from_string(SERD_URI, USTR(NS_EG "s0")); + SerdNode p0 = serd_node_from_string(SERD_URI, USTR(NS_EG "p0")); SerdNode b0 = serd_node_from_string(SERD_BLANK, USTR("b0")); - SerdNode p1 = serd_node_from_string(SERD_URI, USTR("http://example.org/p1")); + SerdNode p1 = serd_node_from_string(SERD_URI, USTR(NS_EG "p1")); SerdNode b1 = serd_node_from_string(SERD_BLANK, USTR("b1")); - SerdNode p2 = serd_node_from_string(SERD_URI, USTR("http://example.org/p2")); - SerdNode o2 = serd_node_from_string(SERD_URI, USTR("http://example.org/o2")); - SerdNode p3 = serd_node_from_string(SERD_URI, USTR("http://example.org/p3")); - SerdNode p4 = serd_node_from_string(SERD_URI, USTR("http://example.org/p4")); - SerdNode o4 = serd_node_from_string(SERD_URI, USTR("http://example.org/o4")); + SerdNode p2 = serd_node_from_string(SERD_URI, USTR(NS_EG "p2")); + SerdNode o2 = serd_node_from_string(SERD_URI, USTR(NS_EG "o2")); + SerdNode p3 = serd_node_from_string(SERD_URI, USTR(NS_EG "p3")); + SerdNode p4 = serd_node_from_string(SERD_URI, USTR(NS_EG "p4")); + SerdNode o4 = serd_node_from_string(SERD_URI, USTR(NS_EG "o4")); SerdNode nil = serd_node_from_string( SERD_URI, USTR("http://www.w3.org/1999/02/22-rdf-syntax-ns#nil")); @@ -132,9 +134,11 @@ test_writer_cleanup(void) SerdWriter* writer = serd_writer_new(SERD_TURTLE, (SerdStyle)0U, env, NULL, null_sink, NULL); - SerdNode s = serd_node_from_string(SERD_URI, USTR("http://example.org/s")); - SerdNode p = serd_node_from_string(SERD_URI, USTR("http://example.org/p")); - SerdNode o = serd_node_from_string(SERD_BLANK, USTR("http://example.org/o")); + SerdNode s = serd_node_from_string(SERD_URI, USTR(NS_EG "s")); + SerdNode p = serd_node_from_string(SERD_URI, USTR(NS_EG "p")); + + char o_buf[12] = {'b', '0', '\0'}; + SerdNode o = serd_node_from_string(SERD_BLANK, USTR(o_buf)); st = serd_writer_write_statement( writer, SERD_ANON_O_BEGIN, NULL, &s, &p, &o, NULL, NULL); @@ -142,16 +146,24 @@ test_writer_cleanup(void) assert(!st); // Write the start of several nested anonymous objects - for (unsigned i = 0U; !st && i < 8U; ++i) { - char buf[12] = {0}; - snprintf(buf, sizeof(buf), "b%u", i); + for (unsigned i = 1U; !st && i < 9U; ++i) { + char next_o_buf[12] = {'\0'}; + snprintf(next_o_buf, sizeof(next_o_buf), "b%u", i); + + SerdNode next_o = serd_node_from_string(SERD_BLANK, USTR(next_o_buf)); - SerdNode next_o = serd_node_from_string(SERD_BLANK, USTR(buf)); + st = serd_writer_write_statement(writer, + SERD_ANON_O_BEGIN | SERD_ANON_CONT, + NULL, + &o, + &p, + &next_o, + NULL, + NULL); - st = serd_writer_write_statement( - writer, SERD_ANON_O_BEGIN, NULL, &o, &p, &next_o, NULL, NULL); + assert(!st); - o = next_o; + memcpy(o_buf, next_o_buf, sizeof(o_buf)); } // Finish writing without terminating nodes @@ -166,6 +178,35 @@ test_writer_cleanup(void) } static void +test_write_bad_anon_stack(void) +{ + SerdStatus st = SERD_SUCCESS; + SerdEnv* env = serd_env_new(NULL); + SerdWriter* writer = + serd_writer_new(SERD_TURTLE, (SerdStyle)0U, env, NULL, null_sink, NULL); + + SerdNode s = serd_node_from_string(SERD_URI, USTR(NS_EG "s")); + SerdNode p = serd_node_from_string(SERD_URI, USTR(NS_EG "p")); + SerdNode b0 = serd_node_from_string(SERD_BLANK, USTR("b0")); + SerdNode b1 = serd_node_from_string(SERD_BLANK, USTR("b1")); + SerdNode b2 = serd_node_from_string(SERD_BLANK, USTR("b2")); + + assert(!(st = serd_writer_write_statement( + writer, SERD_ANON_O_BEGIN, NULL, &s, &p, &b0, NULL, NULL))); + + // (missing call to end the anonymous node here) + + st = serd_writer_write_statement( + writer, SERD_ANON_O_BEGIN, NULL, &b1, &p, &b2, NULL, NULL); + + assert(st == SERD_ERR_BAD_ARG); + + assert(!(st = serd_writer_finish(writer))); + serd_writer_free(writer); + serd_env_free(env); +} + +static void test_strict_write(void) { const char* const path = "serd_strict_write_test.ttl"; @@ -180,8 +221,8 @@ test_strict_write(void) const uint8_t bad_str[] = {0xFF, 0x90, 'h', 'i', 0}; - SerdNode s = serd_node_from_string(SERD_URI, USTR("http://example.org/s")); - SerdNode p = serd_node_from_string(SERD_URI, USTR("http://example.org/p")); + SerdNode s = serd_node_from_string(SERD_URI, USTR(NS_EG "s")); + SerdNode p = serd_node_from_string(SERD_URI, USTR(NS_EG "p")); SerdNode bad_lit = serd_node_from_string(SERD_LITERAL, bad_str); SerdNode bad_uri = serd_node_from_string(SERD_URI, bad_str); @@ -233,8 +274,11 @@ main(void) test_write_long_literal(); test_write_nested_anon(); test_writer_cleanup(); + test_write_bad_anon_stack(); test_strict_write(); test_write_error(); return 0; } + +#undef NS_EG |