From edf40906a3988a4daace075fc714533a0e778814 Mon Sep 17 00:00:00 2001 From: David Robillard Date: Fri, 4 Feb 2011 05:23:31 +0000 Subject: Use uint8_t for API char type for UTF-8 friendliness. Fix literal languages. git-svn-id: http://svn.drobilla.net/sord/trunk@20 3d64ff67-21c5-427c-a301-fe4f08042e5a --- src/sord.c | 61 ++++++++++++++++++++++++++++------------------------- src/sord_internal.h | 5 ++++- src/sord_test.c | 42 ++++++++++++++++++------------------ src/sordi.c | 6 +++--- src/syntax.c | 9 ++++---- 5 files changed, 65 insertions(+), 58 deletions(-) (limited to 'src') diff --git a/src/sord.c b/src/sord.c index 3e98095..8519bb3 100644 --- a/src/sord.c +++ b/src/sord.c @@ -148,7 +148,7 @@ static unsigned sord_literal_hash(const void* n) { SordNode node = (SordNode)n; - return g_str_hash(node->buf) + g_str_hash(node->lang); + return g_str_hash(node->buf) + (node->lang ? g_str_hash(node->lang) : 0); } static gboolean @@ -170,10 +170,11 @@ sord_node_compare(const SordNode a, const SordNode b) switch ((SordNodeType)a->type) { case SORD_URI: case SORD_BLANK: - return strcmp(a->buf, b->buf); + return strcmp((const char*)a->buf, (const char*)b->buf); case SORD_LITERAL: // TODO: lang, type - return strcmp(sord_node_get_string(a), sord_node_get_string(b)); + return strcmp((const char*)sord_node_get_string(a), + (const char*)sord_node_get_string(b)); } assert(false); return 0; @@ -790,13 +791,13 @@ sord_find(Sord sord, const SordTuple pat) } static SordID -sord_lookup_name(Sord sord, const char* str, int str_len) +sord_lookup_name(Sord sord, const uint8_t* str, size_t str_len) { return g_hash_table_lookup(sord->names, str); } static SordNode -sord_new_node(SordNodeType type, const char* data, size_t n_bytes) +sord_new_node(SordNodeType type, const uint8_t* data, size_t n_bytes) { SordNode node = malloc(sizeof(struct _SordNode)); node->type = type; @@ -805,25 +806,25 @@ sord_new_node(SordNodeType type, const char* data, size_t n_bytes) node->user_data = 0; node->datatype = 0; node->lang = 0; - node->buf = g_strdup(data); // TODO: add no-copy option + node->buf = (uint8_t*)g_strdup((const char*)data); // TODO: no-copy return node; } static SordNode sord_new_literal_node(Sord sord, SordNode datatype, - const char* str, int str_len, - const char* lang, uint8_t lang_len) + const uint8_t* str, int str_len, + const char* lang, uint8_t lang_len) { SordNode node = sord_new_node(SORD_LITERAL, str, str_len + 1); node->datatype = datatype; - node->lang = g_intern_string(str); + node->lang = lang ? g_intern_string(lang) : NULL; return node; } static SordNode sord_lookup_literal(Sord sord, SordNode type, - const char* str, int str_len, - const char* lang, uint8_t lang_len) + const uint8_t* str, int str_len, + const char* lang, uint8_t lang_len) { SordNode node = sord_new_literal_node(sord, type, str, str_len, lang, lang_len); SordNode id = g_hash_table_lookup(sord->literals, node); @@ -847,13 +848,13 @@ sord_node_get_type(SordNode ref) return ref->type; } -const char* +const uint8_t* sord_node_get_string(SordNode ref) { - return ref->buf; + return (const uint8_t*)ref->buf; } -const char* +const uint8_t* sord_node_get_string_counted(SordNode ref, size_t* n_bytes) { *n_bytes = ref->n_bytes; @@ -892,53 +893,53 @@ sord_add_node(Sord sord, SordNode node) } SordID -sord_get_uri_counted(Sord sord, bool create, const char* str, int str_len) +sord_get_uri_counted(Sord sord, bool create, const uint8_t* str, int str_len) { SordID id = sord_lookup_name(sord, str, str_len); if (id || !create) return id; - id = sord_new_node(SORD_URI, str, str_len + 1); + id = sord_new_node(SORD_URI, (const uint8_t*)str, str_len + 1); assert(id); - g_hash_table_insert(sord->names, (void*)g_strdup(str), (void*)id); + g_hash_table_insert(sord->names, (void*)g_strdup((const char*)str), (void*)id); sord_add_node(sord, id); return id; } SordID -sord_get_uri(Sord sord, bool create, const char* str) +sord_get_uri(Sord sord, bool create, const uint8_t* str) { - return sord_get_uri_counted(sord, create, str, strlen(str)); + return sord_get_uri_counted(sord, create, str, strlen((const char*)str)); } SordID -sord_get_blank_counted(Sord sord, bool create, const char* str, int str_len) +sord_get_blank_counted(Sord sord, bool create, const uint8_t* str, int str_len) { SordID id = sord_lookup_name(sord, str, str_len); if (id || !create) return id; - id = sord_new_node(SORD_BLANK, str, str_len + 1); + id = sord_new_node(SORD_BLANK, (const uint8_t*)str, str_len + 1); assert(id); - g_hash_table_insert(sord->names, (void*)g_strdup(str), (void*)id); + g_hash_table_insert(sord->names, (void*)g_strdup((const char*)str), (void*)id); sord_add_node(sord, id); return id; } SordID -sord_get_blank(Sord sord, bool create, const char* str) +sord_get_blank(Sord sord, bool create, const uint8_t* str) { - return sord_get_blank_counted(sord, create, str, strlen(str)); + return sord_get_blank_counted(sord, create, str, strlen((const char*)str)); } SordID sord_get_literal_counted(Sord sord, bool create, SordID type, - const char* str, int str_len, - const char* lang, uint8_t lang_len) + const uint8_t* str, int str_len, + const char* lang, uint8_t lang_len) { SordID id = sord_lookup_literal(sord, type, str, str_len, lang, lang_len); if (id || !create) @@ -953,10 +954,12 @@ sord_get_literal_counted(Sord sord, bool create, SordID type, } SordID -sord_get_literal(Sord sord, bool create, SordID type, const char* str, const char* lang) +sord_get_literal(Sord sord, bool create, SordID type, + const uint8_t* str, const char* lang) { - return sord_get_literal_counted(sord, create, type, str, strlen(str), - lang, lang ? strlen(lang) : 0); + return sord_get_literal_counted(sord, create, type, + str, strlen((const char*)str), + lang, lang ? strlen(lang) : 0); } static inline bool diff --git a/src/sord_internal.h b/src/sord_internal.h index b7a3398..dc7ac26 100644 --- a/src/sord_internal.h +++ b/src/sord_internal.h @@ -18,6 +18,9 @@ #ifndef SORD_INTERNAL_H #define SORD_INTERNAL_H +#include +#include + #include "sord/sord.h" /** Node */ @@ -28,7 +31,7 @@ struct _SordNode { void* user_data; ///< Opaque user data SordNode datatype; ///< Literal data type (ID of a URI node, or 0) const char* lang; ///< Literal language (interned string) - char* buf; ///< Value (string) + uint8_t* buf; ///< Value (string) }; #endif // SORD_INTERNAL_H diff --git a/src/sord_test.c b/src/sord_test.c index 97b7b4d..4a3dda7 100644 --- a/src/sord_test.c +++ b/src/sord_test.c @@ -27,6 +27,8 @@ static const int MAX_NUM = 999; typedef struct { SordTuple query; int expected_num_results; } QueryTest; +#define USTR(s) ((const uint8_t*)(s)) + static SordID uri(Sord sord, int num) { @@ -35,9 +37,9 @@ uri(Sord sord, int num) char uri[] = "eg:000"; const size_t uri_len = 3 + DIGITS; - char* uri_num = uri + 3; // First `0' + char* uri_num = uri + 3; // First `0' snprintf(uri_num, DIGITS + 1, "%0*d", DIGITS, num); - return sord_get_uri_counted(sord, true, uri, uri_len); + return sord_get_uri_counted(sord, true, (const uint8_t*)uri, uri_len); } void @@ -64,16 +66,16 @@ generate(Sord sord, size_t n_tuples, size_t n_objects_per) SordTuple tup; tup[0] = uri(sord, 98); tup[1] = uri(sord, 4); - tup[2] = sord_get_literal(sord, true, 0, "hello", NULL); + tup[2] = sord_get_literal(sord, true, 0, (const uint8_t*)"hello", NULL); tup[3] = 0; sord_add(sord, tup); - tup[2] = sord_get_literal(sord, true, 0, "hi", NULL); + tup[2] = sord_get_literal(sord, true, 0, USTR("hi"), NULL); sord_add(sord, tup); tup[0] = uri(sord, 14); - tup[2] = sord_get_literal(sord, true, 0, "bonjour", "fr"); + tup[2] = sord_get_literal(sord, true, 0, USTR("bonjour"), "fr"); sord_add(sord, tup); - tup[2] = sord_get_literal(sord, true, 0, "salut", "fr"); + tup[2] = sord_get_literal(sord, true, 0, USTR("salut"), "fr"); sord_add(sord, tup); // Attempt to add some duplicates @@ -81,7 +83,7 @@ generate(Sord sord, size_t n_tuples, size_t n_objects_per) sord_add(sord, tup); // Add a blank node subject - tup[0] = sord_get_blank(sord, true, "ablank"); + tup[0] = sord_get_blank(sord, true, USTR("ablank")); sord_add(sord, tup); tup[1] = uri(sord, 6); @@ -99,11 +101,11 @@ test_fail() #define TUP_FMT "(%6s %6s %6s)" #define TUP_FMT_ARGS(t) \ (sord_node_load(sord, (t)[0]) \ - ? sord_node_get_string(sord_node_load(sord, (t)[0])) : "*"), \ + ? sord_node_get_string(sord_node_load(sord, (t)[0])) : USTR("*")), \ (sord_node_load(sord, (t)[1]) \ - ? sord_node_get_string(sord_node_load(sord, (t)[1])) : "*"), \ + ? sord_node_get_string(sord_node_load(sord, (t)[1])) : USTR("*")), \ (sord_node_load(sord, (t)[2]) \ - ? sord_node_get_string(sord_node_load(sord, (t)[2])) : "*") + ? sord_node_get_string(sord_node_load(sord, (t)[2])) : USTR("*")) int test_read(Sord sord, const size_t n_tuples, const int n_objects_per) @@ -169,7 +171,7 @@ test_read(Sord sord, const size_t n_tuples, const int n_objects_per) } // Query blank node subject - SordTuple pat = { sord_get_blank(sord, true, "ablank"), 0, 0 }; + SordTuple pat = { sord_get_blank(sord, true, USTR("ablank")), 0, 0 }; if (!pat[0]) { fprintf(stderr, "Blank node subject lost\n"); return test_fail(); @@ -277,13 +279,13 @@ main(int argc, char** argv) } // Check interning merges equivalent values - SordID uri_id = sord_get_uri(sord, true, "http://example.org"); - SordID blank_id = sord_get_uri(sord, true, "testblank"); - SordID lit_id = sord_get_literal(sord, true, uri_id, "hello", NULL); + SordID uri_id = sord_get_uri(sord, true, USTR("http://example.org")); + SordID blank_id = sord_get_uri(sord, true, USTR("testblank")); + SordID lit_id = sord_get_literal(sord, true, uri_id, USTR("hello"), NULL); //sord_clear_cache(write); - SordID uri_id2 = sord_get_uri(sord, false, "http://example.org"); - SordID blank_id2 = sord_get_uri(sord, false, "testblank"); - SordID lit_id2 = sord_get_literal(sord, false, uri_id, "hello", NULL); + SordID uri_id2 = sord_get_uri(sord, false, USTR("http://example.org")); + SordID blank_id2 = sord_get_uri(sord, false, USTR("testblank")); + SordID lit_id2 = sord_get_literal(sord, false, uri_id, USTR("hello"), NULL); if (uri_id2 != uri_id) { fprintf(stderr, "Fail: URI interning failed (duplicates)\n"); goto fail; @@ -296,9 +298,9 @@ main(int argc, char** argv) } // Check interning doesn't clash non-equivalent values - SordID uri_id3 = sord_get_uri(sord, false, "http://example.orgX"); - SordID blank_id3 = sord_get_uri(sord, false, "testblankX"); - SordID lit_id3 = sord_get_literal(sord, false, uri_id, "helloX", NULL); + SordID uri_id3 = sord_get_uri(sord, false, USTR("http://example.orgX")); + SordID blank_id3 = sord_get_uri(sord, false, USTR("testblankX")); + SordID lit_id3 = sord_get_literal(sord, false, uri_id, USTR("helloX"), NULL); if (uri_id3 == uri_id) { fprintf(stderr, "Fail: URI interning failed (clash)\n"); goto fail; diff --git a/src/sordi.c b/src/sordi.c index f304b9e..4a7454b 100644 --- a/src/sordi.c +++ b/src/sordi.c @@ -65,9 +65,9 @@ file_sink(const void* buf, size_t len, void* stream) static inline SerdNode serd_node_from_sord_node(const SordNode n) { - size_t n_bytes = 0; - const char* buf = sord_node_get_string_counted(n, &n_bytes); - SerdNode sn = { SERD_NOTHING, n_bytes, n_bytes - 1, (const uint8_t*)buf }; + size_t n_bytes = 0; + const uint8_t* buf = sord_node_get_string_counted(n, &n_bytes); + SerdNode sn = { SERD_NOTHING, n_bytes, n_bytes - 1, (const uint8_t*)buf }; // FIXME: UTF-8 switch (sord_node_get_type(n)) { case SORD_URI: diff --git a/src/syntax.c b/src/syntax.c index e7560c5..3afda17 100644 --- a/src/syntax.c +++ b/src/syntax.c @@ -101,8 +101,7 @@ sord_node_from_serd_node(ReadState* state, const SerdNode* sn) case SERD_NOTHING: return NULL; case SERD_LITERAL: - return sord_get_literal(state->sord, true, NULL, - (const char*)sn->buf, NULL); + return sord_get_literal(state->sord, true, NULL, sn->buf, NULL); case SERD_URI: { SerdURI uri; if (!serd_uri_parse(sn->buf, &uri)) { @@ -114,7 +113,7 @@ sord_node_from_serd_node(ReadState* state, const SerdNode* sn) } SerdURI ignored; SerdNode abs_uri_node = serd_node_new_uri(&abs_uri, &ignored); - SordID ret = sord_get_uri(state->sord, true, (const char*)abs_uri_node.buf); + SordID ret = sord_get_uri(state->sord, true, abs_uri_node.buf); serd_node_free(&abs_uri_node); return ret; } @@ -126,7 +125,7 @@ sord_node_from_serd_node(ReadState* state, const SerdNode* sn) return NULL; } const size_t uri_len = uri_prefix.len + uri_suffix.len; - char* buf = malloc(uri_len + 1); + uint8_t* buf = malloc(uri_len + 1); memcpy(buf, uri_prefix.buf, uri_prefix.len); memcpy(buf + uri_prefix.len, uri_suffix.buf, uri_suffix.len); buf[uri_len] = '\0'; @@ -138,7 +137,7 @@ sord_node_from_serd_node(ReadState* state, const SerdNode* sn) case SERD_BLANK_ID: case SERD_ANON_BEGIN: case SERD_ANON: - return sord_get_blank(state->sord, true, (const char*)sn->buf); + return sord_get_blank(state->sord, true, sn->buf); } return NULL; } -- cgit v1.2.1