diff options
-rw-r--r-- | serd/serd.h | 32 | ||||
-rw-r--r-- | src/namespaces.c | 4 | ||||
-rw-r--r-- | src/reader.c | 67 | ||||
-rw-r--r-- | src/serdi.c | 3 | ||||
-rw-r--r-- | src/uri.c | 68 | ||||
-rw-r--r-- | wscript | 16 |
6 files changed, 102 insertions, 88 deletions
diff --git a/serd/serd.h b/serd/serd.h index dd4b25b7..3c39e593 100644 --- a/serd/serd.h +++ b/serd/serd.h @@ -1,6 +1,6 @@ /* Serd, an RDF serialisation library. * Copyright 2011 David Robillard <d@drobilla.net> - * + * * Serd is free software: you can redistribute it and/or modify it under * the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, either version 3 of the License, or @@ -15,6 +15,10 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ +/* @file + * Public Serd API. + */ + #ifndef SERD_SERD_H #define SERD_SERD_H @@ -40,9 +44,9 @@ #define SERD_API #endif -/* @file - * Public Serd API. - */ +#ifdef __cplusplus +extern "C" { +#endif /** @defgroup serd Serd * @brief A lightweight RDF Serialisation Library. @@ -79,13 +83,13 @@ typedef struct { /* Parsed URI. */ typedef struct { - SerdRange scheme; ///< Scheme - SerdRange authority; ///< Authority - SerdRange path_base; ///< Path prefix if relative - SerdRange path; ///< Path suffix - SerdRange query; ///< Query - SerdRange fragment; ///< Fragment - bool base_uri_has_authority; ///< True iff base URI has authority + SerdRange scheme; ///< Scheme + SerdRange authority; ///< Authority + SerdRange path_base; ///< Path prefix if relative + SerdRange path; ///< Path suffix + SerdRange query; ///< Query + SerdRange fragment; ///< Fragment + bool base_uri_has_authority; ///< True iff base URI has authority } SerdURI; /** Return true iff @a utf8 is a relative URI string. */ @@ -137,7 +141,7 @@ serd_string_new(const uint8_t* utf8); /** Copy @a string. */ SERD_API SerdString* -serd_string_copy(const SerdString* string); +serd_string_copy(const SerdString* str); /** Serialise @a uri to a string. */ SERD_API @@ -240,4 +244,8 @@ serd_namespaces_expand(SerdNamespaces ns, /** @} */ +#ifdef __cplusplus +} /* extern "C" */ +#endif + #endif /* SERD_SERD_H */ diff --git a/src/namespaces.c b/src/namespaces.c index f18ebcff..a2526a5d 100644 --- a/src/namespaces.c +++ b/src/namespaces.c @@ -1,6 +1,6 @@ /* Serd, an RDF serialisation library. * Copyright 2011 David Robillard <d@drobilla.net> - * + * * Serd is free software: you can redistribute it and/or modify it under * the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, either version 3 of the License, or @@ -102,7 +102,7 @@ serd_namespaces_expand(SerdNamespaces ns, if (!colon) { return false; // Illegal qname } - + SerdNamespace* const record = serd_namespaces_find(ns, qname->buf, colon - qname->buf); if (record) { uri_prefix->buf = record->uri->buf; diff --git a/src/reader.c b/src/reader.c index 58a4100c..e267b285 100644 --- a/src/reader.c +++ b/src/reader.c @@ -1,6 +1,6 @@ /* Serd, an RDF serialisation library. * Copyright 2011 David Robillard <d@drobilla.net> - * + * * Serd is free software: you can redistribute it and/or modify it under * the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, either version 3 of the License, or @@ -76,7 +76,7 @@ struct SerdReaderImpl { unsigned next_id; int err; uint8_t* read_buf; - int32_t read_head; ///< Offset into read_buf + int32_t read_head; ///< Offset into read_buf bool eof; #ifdef STACK_DEBUG Ref* alloc_stack; ///< Stack of push offsets @@ -124,7 +124,6 @@ readahead(SerdReader parser, uint8_t* pre, int n) uint8_t* ptr = parser->read_buf + parser->read_head; for (int i = 0; i < n; ++i) { if (parser->read_head + i >= READ_BUF_LEN) { - //fprintf(stderr, "PAGE FAULT DURING READAHEAD\n"); if (!page(parser)) { return false; } @@ -276,7 +275,6 @@ pop_string(SerdReader parser, Ref ref) assert(stack_is_top_string(parser, ref)); --parser->n_allocs; #endif - //fprintf(stderr, " * POP `%s'\n", deref(parser, ref)->buf); parser->stack.size -= deref(parser, ref)->n_bytes; } } @@ -648,7 +646,11 @@ read_relativeURI(SerdReader parser) { Ref str = push_string(parser, "", 1); while (read_ucharacter(parser, str)) {} - return str; + if (!parser->err) { + return str; + } + pop_string(parser, str); + return 0; } // [30] nameStartChar ::= [A-Z] | "_" | [a-z] @@ -754,8 +756,11 @@ read_uriref(SerdReader parser) { eat_byte(parser, '<'); Ref const str = read_relativeURI(parser); - eat_byte(parser, '>'); - return str; + if (str) { + eat_byte(parser, '>'); + return str; + } + return 0; } // [27] qname ::= prefixName? ':' name? @@ -768,8 +773,10 @@ read_qname(SerdReader parser) } push_byte(parser, prefix, eat_byte(parser, ':')); Ref str = read_name(parser, prefix, false); - if (parser->err) + if (parser->err) { + pop_string(parser, prefix); return 0; + } return str ? str : prefix; } @@ -937,7 +944,7 @@ static Ref blank_id(SerdReader parser) { char str[32]; - const int len = snprintf(str, 32, "genid%u", parser->next_id++); + const int len = snprintf(str, sizeof(str), "genid%u", parser->next_id++); return push_string(parser, str, len + 1); } @@ -1097,7 +1104,6 @@ read_predicateObjectList(SerdReader parser, const Node* subject) read_ws_star(parser); } } - //pop_string(parser, predicate.value); return true; except: pop_string(parser, predicate.value); @@ -1139,13 +1145,13 @@ read_collection(SerdReader parser, Node* dest) *dest = parser->rdf_nil; return true; } - + *dest = make_node(BLANK, blank_id(parser), 0, 0); if (!read_object(parser, dest, &parser->rdf_first)) { pop_string(parser, dest->value); return error(parser, "unexpected end of collection\n"); } - + return read_collection_rec(parser, dest); } @@ -1262,7 +1268,7 @@ read_turtleDoc(SerdReader parser) while (!parser->err && !parser->eof) { TRY_RET(read_statement(parser)); } - return true;//!parser->err; + return !parser->err; } SERD_API @@ -1286,7 +1292,7 @@ serd_reader_new(SerdSyntax syntax, reader->cur = cur; reader->next_id = 1; reader->err = 0; - reader->read_buf = (uint8_t*)malloc(READ_BUF_LEN) + MAX_READAHEAD; + reader->read_buf = (uint8_t*)malloc(READ_BUF_LEN * 2); reader->read_head = 0; reader->eof = false; #ifdef STACK_DEBUG @@ -1294,10 +1300,30 @@ serd_reader_new(SerdSyntax syntax, reader->n_allocs = 0; #endif + memset(reader->read_buf, '\0', READ_BUF_LEN * 2); + + /* Read into the second page of the buffer. Occasionally readahead + will move the read_head to before this point when readahead causes + a page fault. + */ + reader->read_buf += READ_BUF_LEN; // Read 1 page in return reader; } SERD_API +void +serd_reader_free(SerdReader reader) +{ + SerdReader const me = (SerdReader)reader; +#ifdef STACK_DEBUG + free(me->alloc_stack); +#endif + free(me->stack.buf); + free(me->read_buf - READ_BUF_LEN); + free(me); +} + +SERD_API bool serd_reader_read_file(SerdReader reader, FILE* file, const uint8_t* name) { @@ -1320,16 +1346,3 @@ serd_reader_read_file(SerdReader reader, FILE* file, const uint8_t* name) me->cur = cur; return ret; } - -SERD_API -void -serd_reader_free(SerdReader reader) -{ - SerdReader const me = (SerdReader)reader; -#ifdef STACK_DEBUG - free(me->alloc_stack); -#endif - free(me->stack.buf); - free(me->read_buf - MAX_READAHEAD); - free(me); -} diff --git a/src/serdi.c b/src/serdi.c index 3f91e040..6b4e62b4 100644 --- a/src/serdi.c +++ b/src/serdi.c @@ -85,6 +85,7 @@ event_prefix(void* handle, SerdURI new_abs_uri; SerdString* abs_uri_string = serd_string_new_from_uri(&abs_uri, &new_abs_uri); serd_namespaces_add(state->ns, name, abs_uri_string); + free(abs_uri_string); } else { serd_namespaces_add(state->ns, name, uri_string); } @@ -135,7 +136,6 @@ main(int argc, char** argv) fprintf(stderr, "invalid base uri: %s\n", base_uri_str); return 1; } - } FILE* const in_fd = fopen((const char*)in_filename, "r"); @@ -146,7 +146,6 @@ main(int argc, char** argv) return 1; } - //SerdURI null_uri = {{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}}; State state = { out_fd, serd_namespaces_new(), serd_string_new(base_uri_str), base_uri }; SerdReader reader = serd_reader_new( @@ -1,6 +1,6 @@ /* Serd, an RDF serialisation library. * Copyright 2011 David Robillard <d@drobilla.net> - * + * * Serd is free software: you can redistribute it and/or modify it under * the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, either version 3 of the License, or @@ -202,7 +202,7 @@ end: serd_uri_dump(uri, stderr); fprintf(stderr, "\n"); #endif - + return true; } @@ -212,11 +212,10 @@ serd_uri_resolve(const SerdURI* r, const SerdURI* base, SerdURI* t) { assert(!r->scheme.len); // r is relative - /** See http://tools.ietf.org/html/rfc3986#section-5.2.2 */ + // See http://tools.ietf.org/html/rfc3986#section-5.2.2 - t->path_base.buf = NULL; - t->path_base.len = 0; - t->base_uri_has_authority = base->authority.len; + t->path_base.buf = NULL; + t->path_base.len = 0; if (r->scheme.len) { t->scheme = r->scheme; t->authority = r->authority; @@ -264,7 +263,7 @@ SERD_API size_t serd_uri_serialise(const SerdURI* uri, SerdSink sink, void* stream) { - /* See http://tools.ietf.org/html/rfc3986#section-5.3 */ + // See http://tools.ietf.org/html/rfc3986#section-5.3 size_t write_size = 0; #define WRITE(buf, len) \ @@ -293,39 +292,33 @@ serd_uri_serialise(const SerdURI* uri, SerdSink sink, void* stream) /* Merge paths, removing dot components. See http://tools.ietf.org/html/rfc3986#section-5.2.3 */ - if (uri->base_uri_has_authority && !uri->path_base.len) { - WRITE("/", 1); - WRITE_COMPONENT("", uri->path, ""); - } else { - const uint8_t* uri_first = uri->path.buf; - const uint8_t* uri_end = uri_first; - size_t up = 1; - if (uri_first) { - // Count and skip leading dot components - uri_end = uri->path.buf + uri->path.len; - while (uri_first < uri_end) { - if (!memcmp((const char*)uri_first, "./", 2)) { - uri_first += 2; - } else if (!memcmp((const char*)uri_first, "../", 3)) { - ++up; - uri_first += 3; - } else if (!memcmp((const char*)uri_first, "..", 2)) { - ++up; - uri_first += 2; - } else if (!memcmp((const char*)uri_first, ".", 1)) { - ++uri_first; - } else if (!memcmp((const char*)uri_first, "//", 1)) { - ++uri_first; - } else { - break; - } + const uint8_t* uri_first = uri->path.buf; + const uint8_t* uri_end = uri_first; + size_t up = 1; + if (uri_first) { + // Count and skip leading dot components + uri_end = uri->path.buf + uri->path.len; + while (uri_first < uri_end) { + if (!memcmp((const char*)uri_first, "./", 2)) { + uri_first += 2; + } else if (!memcmp((const char*)uri_first, "../", 3)) { + ++up; + uri_first += 3; + } else if (!memcmp((const char*)uri_first, "..", 2)) { + ++up; + uri_first += 2; + } else if (!memcmp((const char*)uri_first, ".", 1)) { + ++uri_first; + } else if (!memcmp((const char*)uri_first, "//", 1)) { + ++uri_first; + } else { + break; } } if (uri->path.buf && uri->path_base.buf) { // Find the up'th last slash const uint8_t* base_last = uri->path_base.buf + uri->path_base.len - 1; - //for (; base_last > uri->path_base.buf; --base_last) { do { if (*base_last == '/') { --up; @@ -338,7 +331,7 @@ serd_uri_serialise(const SerdURI* uri, SerdSink sink, void* stream) } else { // Relative path is just query or fragment, append it to full base URI - WRITE_COMPONENT("", uri->path_base, ""); + WRITE_COMPONENT("", uri->path_base, ""); } // Write URI suffix @@ -364,7 +357,7 @@ serd_uri_string_length(const SerdURI* uri) #define ADD_LEN(field, n_delims) \ if ((field).len) { len += (field).len + (n_delims); } - + ADD_LEN(uri->path, 1); // + possible leading `/' ADD_LEN(uri->scheme, 1); // + trailing `:' ADD_LEN(uri->authority, 2); // + leading `//' @@ -388,13 +381,14 @@ SerdString* serd_string_new_from_uri(const SerdURI* uri, SerdURI* out) { const size_t len = serd_uri_string_length(uri); + //SerdString* str = calloc(sizeof(SerdString) + len + 1, 1); SerdString* str = malloc(sizeof(SerdString) + len + 1); str->n_bytes = len + 1; str->n_chars = len; // FIXME: UTF-8 uint8_t* ptr = str->buf; const size_t actual_len = serd_uri_serialise(uri, string_sink, &ptr); - + str->buf[actual_len + 1] = '\0'; str->n_bytes = actual_len + 1; str->n_chars = str->n_bytes - 1; @@ -138,11 +138,11 @@ def test(ctx): Logs.pprint('GREEN', 'PASS: %s output is correct' % test) print -# commands = [] -# bad_tests = glob.glob('tests/bad-*.ttl') -# bad_tests.sort() -# for test in bad_tests: -# commands = commands + [ './serdi_static ../%s \'http://www.w3.org/2001/sw/DataAccess/df1/%s\' > %s.out' % (test, test, test) ] -# -# autowaf.run_tests(ctx, APPNAME, commands, 1) -# + commands = [] + bad_tests = glob.glob('tests/bad-*.ttl') + bad_tests.sort() + for test in bad_tests: + commands = commands + [ './serdi_static ../%s \'http://www.w3.org/2001/sw/DataAccess/df1/%s\' > %s.out' % (test, test, test) ] + + autowaf.run_tests(ctx, APPNAME, commands, 1) + |