aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Robillard <d@drobilla.net>2019-10-27 19:29:30 +0100
committerDavid Robillard <d@drobilla.net>2019-10-27 22:40:45 +0100
commitcd6d4569c1c8819cc8e54eefdc0ac389d8efb4ea (patch)
tree9be604e9ceb648652b1b4dd28202b7bf896817d2
parent9c3aa95c0ed18ecc20517458bf1c0e4180e8b2e2 (diff)
downloadserd-cd6d4569c1c8819cc8e54eefdc0ac389d8efb4ea.tar.gz
serd-cd6d4569c1c8819cc8e54eefdc0ac389d8efb4ea.tar.bz2
serd-cd6d4569c1c8819cc8e54eefdc0ac389d8efb4ea.zip
Use int as internal character type so it can represent EOF
-rw-r--r--src/n3.c131
-rw-r--r--src/reader.h31
-rw-r--r--src/serd_internal.h12
3 files changed, 94 insertions, 80 deletions
diff --git a/src/n3.c b/src/n3.c
index a1335945..187f371c 100644
--- a/src/n3.c
+++ b/src/n3.c
@@ -44,9 +44,9 @@ read_predicateObjectList(SerdReader* reader, ReadContext ctx, bool* ate_dot);
static inline uint8_t
read_HEX(SerdReader* reader)
{
- const uint8_t c = peek_byte(reader);
+ const int c = peek_byte(reader);
if (is_xdigit(c)) {
- return eat_byte_safe(reader, c);
+ return (uint8_t)eat_byte_safe(reader, c);
}
return (uint8_t)r_err(reader, SERD_ERR_BAD_SYNTAX,
@@ -57,8 +57,8 @@ read_HEX(SerdReader* reader)
static inline bool
read_UCHAR(SerdReader* reader, Ref dest, uint32_t* char_code)
{
- const uint8_t b = peek_byte(reader);
- unsigned length = 0;
+ const int b = peek_byte(reader);
+ unsigned length = 0;
switch (b) {
case 'U':
length = 8;
@@ -131,7 +131,7 @@ read_UCHAR(SerdReader* reader, Ref dest, uint32_t* char_code)
static inline bool
read_ECHAR(SerdReader* reader, Ref dest, SerdNodeFlags* flags)
{
- const uint8_t c = peek_byte(reader);
+ const int c = peek_byte(reader);
switch (c) {
case 't':
eat_byte_safe(reader, 't');
@@ -167,7 +167,7 @@ static inline SerdStatus
bad_char(SerdReader* reader, const char* fmt, uint8_t c)
{
// Skip bytes until the next start byte
- for (uint8_t b = peek_byte(reader); (b & 0x80);) {
+ for (int b = peek_byte(reader); (b & 0x80);) {
eat_byte_safe(reader, b);
b = peek_byte(reader);
}
@@ -186,11 +186,14 @@ read_utf8_bytes(SerdReader* reader, uint8_t bytes[4], uint32_t* size, uint8_t c)
bytes[0] = c;
for (unsigned i = 1; i < *size; ++i) {
- if (((bytes[i] = peek_byte(reader)) & 0x80) == 0) {
+ const int b = peek_byte(reader);
+ if ((b & 0x80) == 0) {
return bad_char(reader, "invalid UTF-8 continuation 0x%X\n",
- bytes[i]);
+ (uint8_t)b);
}
- eat_byte_safe(reader, bytes[i]);
+
+ eat_byte_safe(reader, b);
+ bytes[i] = (uint8_t)b;
}
return SERD_SUCCESS;
@@ -251,7 +254,7 @@ static void
read_comment(SerdReader* reader)
{
eat_byte_safe(reader, '#');
- uint8_t c;
+ int c;
while (((c = peek_byte(reader)) != 0xA) && (c != 0xD) && c) {
eat_byte_safe(reader, c);
}
@@ -261,7 +264,7 @@ read_comment(SerdReader* reader)
static inline bool
read_ws(SerdReader* reader)
{
- const uint8_t c = peek_byte(reader);
+ const int c = peek_byte(reader);
switch (c) {
case 0x9: case 0xA: case 0xD: case 0x20:
eat_byte_safe(reader, c);
@@ -306,7 +309,7 @@ read_STRING_LITERAL_LONG(SerdReader* reader, SerdNodeFlags* flags, uint8_t q)
Ref ref = push_node(reader, SERD_LITERAL, "", 0);
SerdStatus st = SERD_SUCCESS;
while (!reader->status && !(st && reader->strict)) {
- const uint8_t c = peek_byte(reader);
+ const int c = peek_byte(reader);
if (c == '\\') {
eat_byte_safe(reader, c);
uint32_t code;
@@ -318,17 +321,18 @@ read_STRING_LITERAL_LONG(SerdReader* reader, SerdNodeFlags* flags, uint8_t q)
}
} else if (c == q) {
eat_byte_safe(reader, q);
- const uint8_t q2 = eat_byte_safe(reader, peek_byte(reader));
- const uint8_t q3 = peek_byte(reader);
+ const int q2 = eat_byte_safe(reader, peek_byte(reader));
+ const int q3 = peek_byte(reader);
if (q2 == q && q3 == q) { // End of string
eat_byte_safe(reader, q3);
- return ref;
+ break;
}
*flags |= SERD_HAS_QUOTE;
push_byte(reader, ref, c);
- read_character(reader, ref, flags, q2);
+ read_character(reader, ref, flags, (uint8_t)q2);
} else {
- st = read_character(reader, ref, flags, eat_byte_safe(reader, c));
+ st = read_character(
+ reader, ref, flags, (uint8_t)eat_byte_safe(reader, c));
}
}
return ref;
@@ -342,8 +346,8 @@ read_STRING_LITERAL(SerdReader* reader, SerdNodeFlags* flags, uint8_t q)
Ref ref = push_node(reader, SERD_LITERAL, "", 0);
SerdStatus st = SERD_SUCCESS;
while (!reader->status && !(st && reader->strict)) {
- const uint8_t c = peek_byte(reader);
- uint32_t code = 0;
+ const int c = peek_byte(reader);
+ uint32_t code = 0;
switch (c) {
case '\n': case '\r':
r_err(reader, SERD_ERR_BAD_SYNTAX, "line end in short string\n");
@@ -362,7 +366,8 @@ read_STRING_LITERAL(SerdReader* reader, SerdNodeFlags* flags, uint8_t q)
eat_byte_check(reader, q);
return ref;
} else {
- st = read_character(reader, ref, flags, eat_byte_safe(reader, c));
+ st = read_character(
+ reader, ref, flags, (uint8_t)eat_byte_safe(reader, c));
}
}
}
@@ -373,16 +378,16 @@ read_STRING_LITERAL(SerdReader* reader, SerdNodeFlags* flags, uint8_t q)
static Ref
read_String(SerdReader* reader, SerdNodeFlags* flags)
{
- const uint8_t q1 = peek_byte(reader);
+ const int q1 = peek_byte(reader);
eat_byte_safe(reader, q1);
- const uint8_t q2 = peek_byte(reader);
+ const int q2 = peek_byte(reader);
if (q2 != q1) { // Short string (not triple quoted)
- return read_STRING_LITERAL(reader, flags, q1);
+ return read_STRING_LITERAL(reader, flags, (uint8_t)q1);
}
eat_byte_safe(reader, q2);
- const uint8_t q3 = peek_byte(reader);
+ const int q3 = peek_byte(reader);
if (q3 != q1) { // Empty short string ("" or '')
return push_node(reader, SERD_LITERAL, "", 0);
}
@@ -393,7 +398,7 @@ read_String(SerdReader* reader, SerdNodeFlags* flags)
}
eat_byte_safe(reader, q3);
- return read_STRING_LITERAL_LONG(reader, flags, q1);
+ return read_STRING_LITERAL_LONG(reader, flags, (uint8_t)q1);
}
static inline bool
@@ -410,15 +415,15 @@ is_PN_CHARS_BASE(const uint32_t c)
static SerdStatus
read_PN_CHARS_BASE(SerdReader* reader, Ref dest)
{
- uint32_t code;
- const uint8_t c = peek_byte(reader);
- SerdStatus st = SERD_SUCCESS;
+ uint32_t code;
+ const int c = peek_byte(reader);
+ SerdStatus st = SERD_SUCCESS;
if (is_alpha(c)) {
push_byte(reader, dest, eat_byte_safe(reader, c));
} else if (!(c & 0x80)) {
return SERD_FAILURE;
} else if ((st = read_utf8_code(reader, dest, &code,
- eat_byte_safe(reader, c)))) {
+ (uint8_t)eat_byte_safe(reader, c)))) {
return st;
} else if (!is_PN_CHARS_BASE(code)) {
r_err(reader, SERD_ERR_BAD_SYNTAX,
@@ -440,15 +445,15 @@ is_PN_CHARS(const uint32_t c)
static SerdStatus
read_PN_CHARS(SerdReader* reader, Ref dest)
{
- uint32_t code;
- const uint8_t c = peek_byte(reader);
- SerdStatus st = SERD_SUCCESS;
+ uint32_t code;
+ const int c = peek_byte(reader);
+ SerdStatus st = SERD_SUCCESS;
if (is_alpha(c) || is_digit(c) || c == '_' || c == '-') {
push_byte(reader, dest, eat_byte_safe(reader, c));
} else if (!(c & 0x80)) {
return SERD_FAILURE;
} else if ((st = read_utf8_code(reader, dest, &code,
- eat_byte_safe(reader, c)))) {
+ (uint8_t)eat_byte_safe(reader, c)))) {
return st;
} else if (!is_PN_CHARS(code)) {
r_err(reader, (st = SERD_ERR_BAD_SYNTAX),
@@ -474,7 +479,7 @@ read_PERCENT(SerdReader* reader, Ref dest)
static SerdStatus
read_PLX(SerdReader* reader, Ref dest)
{
- uint8_t c = peek_byte(reader);
+ int c = peek_byte(reader);
switch (c) {
case '%':
if (!read_PERCENT(reader, dest)) {
@@ -498,7 +503,7 @@ read_PLX(SerdReader* reader, Ref dest)
static SerdStatus
read_PN_LOCAL(SerdReader* reader, Ref dest, bool* ate_dot)
{
- uint8_t c = peek_byte(reader);
+ int c = peek_byte(reader);
SerdStatus st = SERD_SUCCESS;
bool trailing_unescaped_dot = false;
switch (c) {
@@ -540,7 +545,7 @@ read_PN_LOCAL(SerdReader* reader, Ref dest, bool* ate_dot)
static SerdStatus
read_PN_PREFIX_tail(SerdReader* reader, Ref dest)
{
- uint8_t c;
+ int c;
while ((c = peek_byte(reader))) { // Middle: (PN_CHARS | '.')*
if (c == '.') {
push_byte(reader, dest, eat_byte_safe(reader, c));
@@ -570,10 +575,11 @@ read_PN_PREFIX(SerdReader* reader, Ref dest)
static Ref
read_LANGTAG(SerdReader* reader)
{
- uint8_t c = peek_byte(reader);
+ int c = peek_byte(reader);
if (!is_alpha(c)) {
return r_err(reader, SERD_ERR_BAD_SYNTAX, "unexpected `%c'\n", c);
}
+
Ref ref = push_node(reader, SERD_LITERAL, "", 0);
push_byte(reader, ref, eat_byte_safe(reader, c));
while ((c = peek_byte(reader)) && is_alpha(c)) {
@@ -591,7 +597,7 @@ read_LANGTAG(SerdReader* reader)
static bool
read_IRIREF_scheme(SerdReader* reader, Ref dest)
{
- uint8_t c = peek_byte(reader);
+ int c = peek_byte(reader);
if (!isalpha(c)) {
return r_err(reader, SERD_ERR_BAD_SYNTAX,
"bad IRI scheme start `%c'\n", c);
@@ -626,7 +632,7 @@ read_IRIREF(SerdReader* reader)
uint32_t code = 0;
while (!reader->status && !(st && reader->strict)) {
- const uint8_t c = eat_byte_safe(reader, peek_byte(reader));
+ const int c = eat_byte_safe(reader, peek_byte(reader));
switch (c) {
case '"': case '<': case '^': case '`': case '{': case '|': case '}':
r_err(reader, SERD_ERR_BAD_SYNTAX,
@@ -664,7 +670,7 @@ read_IRIREF(SerdReader* reader)
push_byte(reader, ref, c);
} else if (!(c & 0x80)) {
push_byte(reader, ref, c);
- } else if ((st = read_utf8_character(reader, ref, c))) {
+ } else if ((st = read_utf8_character(reader, ref, (uint8_t)c))) {
if (reader->strict) {
reader->status = SERD_FAILURE;
return pop_node(reader, ref);
@@ -692,7 +698,7 @@ static bool
read_0_9(SerdReader* reader, Ref str, bool at_least_one)
{
unsigned count = 0;
- for (uint8_t c; is_digit((c = peek_byte(reader))); ++count) {
+ for (int c; is_digit((c = peek_byte(reader))); ++count) {
push_byte(reader, str, eat_byte_safe(reader, c));
}
if (at_least_one && count == 0) {
@@ -707,9 +713,10 @@ read_number(SerdReader* reader, Ref* dest, Ref* datatype, bool* ate_dot)
#define XSD_DECIMAL NS_XSD "decimal"
#define XSD_DOUBLE NS_XSD "double"
#define XSD_INTEGER NS_XSD "integer"
- Ref ref = push_node(reader, SERD_LITERAL, "", 0);
- uint8_t c = peek_byte(reader);
- bool has_decimal = false;
+
+ Ref ref = push_node(reader, SERD_LITERAL, "", 0);
+ int c = peek_byte(reader);
+ bool has_decimal = false;
if (c == '-' || c == '+') {
push_byte(reader, ref, eat_byte_safe(reader, c));
}
@@ -821,9 +828,9 @@ read_verb(SerdReader* reader, Ref* dest)
const SerdStatus st = read_PN_PREFIX(reader, *dest);
bool ate_dot = false;
SerdNode* node = deref(reader, *dest);
- const uint8_t next = peek_byte(reader);
+ const int next = peek_byte(reader);
if (!st && node->n_bytes == 1 && node->buf[0] == 'a' &&
- next != ':' && !is_PN_CHARS_BASE(next)) {
+ next != ':' && !is_PN_CHARS_BASE((uint32_t)next)) {
pop_node(reader, *dest);
return (*dest = push_node(reader, SERD_URI, NS_RDF "type", 47));
} else if (st > SERD_FAILURE ||
@@ -845,7 +852,7 @@ read_BLANK_NODE_LABEL(SerdReader* reader, bool* ate_dot)
reader->bprefix ? (char*)reader->bprefix : "",
reader->bprefix_len);
- uint8_t c = peek_byte(reader); // First: (PN_CHARS | '_' | [0-9])
+ int c = peek_byte(reader); // First: (PN_CHARS | '_' | [0-9])
if (is_digit(c) || c == '_') {
push_byte(reader, ref, eat_byte_safe(reader, c));
} else if (read_PN_CHARS(reader, ref)) {
@@ -958,14 +965,14 @@ read_object(SerdReader* reader, ReadContext* ctx, bool emit, bool* ate_dot)
const size_t orig_stack_size = reader->stack.size;
#endif
- bool ret = false;
- bool simple = (ctx->subject != 0);
- SerdNode* node = NULL;
- Ref o = 0;
- Ref datatype = 0;
- Ref lang = 0;
- uint32_t flags = 0;
- const uint8_t c = peek_byte(reader);
+ bool ret = false;
+ bool simple = (ctx->subject != 0);
+ SerdNode* node = NULL;
+ Ref o = 0;
+ Ref datatype = 0;
+ Ref lang = 0;
+ uint32_t flags = 0;
+ const int c = peek_byte(reader);
if (!fancy_syntax(reader)) {
switch (c) {
case '"': case ':': case '<': case '_': break;
@@ -1069,8 +1076,8 @@ read_predicateObjectList(SerdReader* reader, ReadContext ctx, bool* ate_dot)
return true;
}
- bool ate_semi = false;
- uint8_t c;
+ bool ate_semi = false;
+ int c;
do {
read_ws_star(reader);
switch (c = peek_byte(reader)) {
@@ -1162,7 +1169,7 @@ read_collection(SerdReader* reader, ReadContext ctx, Ref* dest)
}
static Ref
-read_subject(SerdReader* reader, ReadContext ctx, Ref* dest, char* s_type)
+read_subject(SerdReader* reader, ReadContext ctx, Ref* dest, int* s_type)
{
bool ate_dot = false;
switch ((*s_type = peek_byte(reader))) {
@@ -1313,7 +1320,7 @@ read_wrappedGraph(SerdReader* reader, ReadContext* ctx)
read_ws_star(reader);
while (peek_byte(reader) != '}') {
bool ate_dot = false;
- char s_type = 0;
+ int s_type = 0;
ctx->subject = 0;
Ref subj = read_subject(reader, *ctx, &ctx->subject, &s_type);
if (!subj && ctx->subject) {
@@ -1351,7 +1358,7 @@ read_n3_statement(SerdReader* reader)
ReadContext ctx = { 0, 0, 0, 0, 0, 0, &flags };
Ref subj = 0;
bool ate_dot = false;
- char s_type = 0;
+ int s_type = 0;
bool ret = true;
read_ws_star(reader);
switch (peek_byte(reader)) {
@@ -1419,7 +1426,7 @@ read_n3_statement(SerdReader* reader)
static void
skip_until(SerdReader* reader, uint8_t byte)
{
- for (uint8_t c = 0; (c = peek_byte(reader)) && c != byte;) {
+ for (int c = 0; (c = peek_byte(reader)) && c != byte;) {
eat_byte_safe(reader, c);
}
}
@@ -1446,7 +1453,7 @@ read_nquadsDoc(SerdReader* reader)
SerdStatementFlags flags = 0;
ReadContext ctx = { 0, 0, 0, 0, 0, 0, &flags };
bool ate_dot = false;
- char s_type = false;
+ int s_type = 0;
read_ws_star(reader);
if (peek_byte(reader) == '\0') {
reader->source.eof = true;
diff --git a/src/reader.h b/src/reader.h
index f6cbc7ad..f7cdba02 100644
--- a/src/reader.h
+++ b/src/reader.h
@@ -16,16 +16,21 @@
#include "serd_internal.h"
-static inline uint8_t
+#include <assert.h>
+#include <stdio.h>
+
+static inline int
peek_byte(SerdReader* reader)
{
- return serd_byte_source_peek(&reader->source);
+ SerdByteSource* source = &reader->source;
+
+ return source->eof ? EOF : (int)source->read_buf[source->read_head];
}
-static inline uint8_t
+static inline int
eat_byte(SerdReader* reader)
{
- const uint8_t c = peek_byte(reader);
+ const int c = peek_byte(reader);
const SerdStatus st = serd_byte_source_advance(&reader->source);
if (st) {
reader->status = st;
@@ -33,20 +38,20 @@ eat_byte(SerdReader* reader)
return c;
}
-static inline uint8_t
-eat_byte_safe(SerdReader* reader, const uint8_t byte)
+static inline int
+eat_byte_safe(SerdReader* reader, const int byte)
{
(void)byte;
- const uint8_t c = eat_byte(reader);
+ const int c = eat_byte(reader);
assert(c == byte);
return c;
}
-static inline uint8_t
-eat_byte_check(SerdReader* reader, const uint8_t byte)
+static inline int
+eat_byte_check(SerdReader* reader, const int byte)
{
- const uint8_t c = peek_byte(reader);
+ const int c = peek_byte(reader);
if (c != byte) {
return r_err(reader, SERD_ERR_BAD_SYNTAX,
"expected `%c', not `%c'\n", byte, c);
@@ -65,16 +70,18 @@ eat_string(SerdReader* reader, const char* str, unsigned n)
}
static inline SerdStatus
-push_byte(SerdReader* reader, Ref ref, const uint8_t c)
+push_byte(SerdReader* reader, Ref ref, const int c)
{
+ assert(c != EOF);
SERD_STACK_ASSERT_TOP(reader, ref);
+
uint8_t* const s = serd_stack_push(&reader->stack, 1);
SerdNode* const node = (SerdNode*)(reader->stack.buf + ref);
++node->n_bytes;
if (!(c & 0x80)) { // Starts with 0 bit, start of new character
++node->n_chars;
}
- *(s - 1) = c;
+ *(s - 1) = (uint8_t)c;
*s = '\0';
return SERD_SUCCESS;
}
diff --git a/src/serd_internal.h b/src/serd_internal.h
index d1c0a3a5..eeb64687 100644
--- a/src/serd_internal.h
+++ b/src/serd_internal.h
@@ -328,35 +328,35 @@ serd_byte_sink_write(const void* buf, size_t len, SerdByteSink* bsink)
/** Return true if `c` lies within [`min`...`max`] (inclusive) */
static inline bool
-in_range(const uint8_t c, const uint8_t min, const uint8_t max)
+in_range(const int c, const int min, const int max)
{
return (c >= min && c <= max);
}
/** RFC2234: ALPHA ::= %x41-5A / %x61-7A ; A-Z / a-z */
static inline bool
-is_alpha(const uint8_t c)
+is_alpha(const int c)
{
return in_range(c, 'A', 'Z') || in_range(c, 'a', 'z');
}
/** RFC2234: DIGIT ::= %x30-39 ; 0-9 */
static inline bool
-is_digit(const uint8_t c)
+is_digit(const int c)
{
return in_range(c, '0', '9');
}
/* RFC2234: HEXDIG ::= DIGIT / "A" / "B" / "C" / "D" / "E" / "F" */
static inline bool
-is_hexdig(const uint8_t c)
+is_hexdig(const int c)
{
return is_digit(c) || in_range(c, 'A', 'F');
}
/* Turtle / JSON / C: XDIGIT ::= DIGIT / A-F / a-f */
static inline bool
-is_xdigit(const uint8_t c)
+is_xdigit(const int c)
{
return is_hexdig(c) || in_range(c, 'a', 'f');
}
@@ -518,7 +518,7 @@ uri_is_under(const SerdURI* uri, const SerdURI* root)
}
static inline bool
-is_uri_scheme_char(const uint8_t c)
+is_uri_scheme_char(const int c)
{
switch (c) {
case ':': case '+': case '-': case '.':