aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorDavid Robillard <d@drobilla.net>2021-07-08 16:15:46 -0400
committerDavid Robillard <d@drobilla.net>2023-12-02 16:27:02 -0500
commitfb95ceca3b744e47e973585aa682515365ac9eb0 (patch)
tree264e30801d634ea580478a10095938448b4962c8 /src
parent8978501e5cf06f366eb14f6ef5f5f7f2f8e34986 (diff)
downloadserd-fb95ceca3b744e47e973585aa682515365ac9eb0.tar.gz
serd-fb95ceca3b744e47e973585aa682515365ac9eb0.tar.bz2
serd-fb95ceca3b744e47e973585aa682515365ac9eb0.zip
Use a fixed-size reader stack
Diffstat (limited to 'src')
-rw-r--r--src/n3.c261
-rw-r--r--src/node.c2
-rw-r--r--src/node.h3
-rw-r--r--src/reader.c84
-rw-r--r--src/reader.h62
-rw-r--r--src/serdi.c19
-rw-r--r--src/stack.h12
-rw-r--r--src/writer.c29
8 files changed, 273 insertions, 199 deletions
diff --git a/src/n3.c b/src/n3.c
index cd7e5a03..e35b8940 100644
--- a/src/n3.c
+++ b/src/n3.c
@@ -39,7 +39,7 @@ fancy_syntax(const SerdReader* const reader)
}
static SerdStatus
-read_collection(SerdReader* reader, ReadContext ctx, Ref* dest);
+read_collection(SerdReader* reader, ReadContext ctx, SerdNode** dest);
static SerdStatus
read_predicateObjectList(SerdReader* reader, ReadContext ctx, bool* ate_dot);
@@ -58,7 +58,9 @@ read_HEX(SerdReader* const reader)
// Read UCHAR escape, initial \ is already eaten by caller
static SerdStatus
-read_UCHAR(SerdReader* const reader, const Ref dest, uint32_t* const char_code)
+read_UCHAR(SerdReader* const reader,
+ SerdNode* const dest,
+ uint32_t* const char_code)
{
const int b = peek_byte(reader);
unsigned length = 0;
@@ -99,9 +101,9 @@ read_UCHAR(SerdReader* const reader, const Ref dest, uint32_t* const char_code)
} else {
r_err(
reader, SERD_BAD_SYNTAX, "unicode character 0x%X out of range\n", code);
- push_bytes(reader, dest, replacement_char, 3);
- *char_code = 0xFFFD;
- return SERD_SUCCESS;
+ *char_code = 0xFFFD;
+ const SerdStatus st = push_bytes(reader, dest, replacement_char, 3);
+ return st ? st : SERD_SUCCESS;
}
// Build output in buf
@@ -130,14 +132,15 @@ read_UCHAR(SerdReader* const reader, const Ref dest, uint32_t* const char_code)
break;
}
- push_bytes(reader, dest, buf, size);
*char_code = code;
- return SERD_SUCCESS;
+ return push_bytes(reader, dest, buf, size);
}
// Read ECHAR escape, initial \ is already eaten by caller
static SerdStatus
-read_ECHAR(SerdReader* const reader, const Ref dest, SerdNodeFlags* const flags)
+read_ECHAR(SerdReader* const reader,
+ SerdNode* const dest,
+ SerdNodeFlags* const flags)
{
SerdStatus st = SERD_SUCCESS;
const int c = peek_byte(reader);
@@ -201,23 +204,24 @@ read_utf8_bytes(SerdReader* const reader,
}
static SerdStatus
-read_utf8_character(SerdReader* const reader, const Ref dest, const uint8_t c)
+read_utf8_character(SerdReader* const reader,
+ SerdNode* const dest,
+ const uint8_t c)
{
uint32_t size = 0;
uint8_t bytes[4] = {0, 0, 0, 0};
SerdStatus st = read_utf8_bytes(reader, bytes, &size, c);
if (st) {
push_bytes(reader, dest, replacement_char, 3);
- } else {
- push_bytes(reader, dest, bytes, size);
+ return st;
}
- return st;
+ return push_bytes(reader, dest, bytes, size);
}
static SerdStatus
read_utf8_code(SerdReader* const reader,
- const Ref dest,
+ SerdNode* const dest,
uint32_t* const code,
const uint8_t c)
{
@@ -229,8 +233,10 @@ read_utf8_code(SerdReader* const reader,
return st;
}
- push_bytes(reader, dest, bytes, size);
- *code = parse_counted_utf8_char(bytes, size);
+ if (!(st = push_bytes(reader, dest, bytes, size))) {
+ *code = parse_counted_utf8_char(bytes, size);
+ }
+
return st;
}
@@ -238,7 +244,7 @@ read_utf8_code(SerdReader* const reader,
// The first byte, c, has already been eaten by caller
static SerdStatus
read_character(SerdReader* const reader,
- const Ref dest,
+ SerdNode* const dest,
SerdNodeFlags* const flags,
const uint8_t c)
{
@@ -255,9 +261,9 @@ read_character(SerdReader* const reader,
default:
break;
}
+
return push_byte(reader, dest, c);
}
-
return read_utf8_character(reader, dest, c);
}
@@ -322,7 +328,7 @@ eat_delim(SerdReader* const reader, const uint8_t delim)
static SerdStatus
read_string_escape(SerdReader* const reader,
- const Ref ref,
+ SerdNode* const ref,
SerdNodeFlags* const flags)
{
SerdStatus st = SERD_SUCCESS;
@@ -339,7 +345,7 @@ read_string_escape(SerdReader* const reader,
// Initial triple quotes are already eaten by caller
static SerdStatus
read_STRING_LITERAL_LONG(SerdReader* const reader,
- const Ref ref,
+ SerdNode* const ref,
SerdNodeFlags* const flags,
const uint8_t q)
{
@@ -382,7 +388,7 @@ read_STRING_LITERAL_LONG(SerdReader* const reader,
// Initial quote is already eaten by caller
static SerdStatus
read_STRING_LITERAL(SerdReader* const reader,
- const Ref ref,
+ SerdNode* const ref,
SerdNodeFlags* const flags,
const uint8_t q)
{
@@ -415,7 +421,7 @@ read_STRING_LITERAL(SerdReader* const reader,
static SerdStatus
read_String(SerdReader* const reader,
- const Ref node,
+ SerdNode* const node,
SerdNodeFlags* const flags)
{
const int q1 = eat_byte_safe(reader, peek_byte(reader));
@@ -459,7 +465,7 @@ is_PN_CHARS_BASE(const uint32_t c)
}
static SerdStatus
-read_PN_CHARS_BASE(SerdReader* const reader, const Ref dest)
+read_PN_CHARS_BASE(SerdReader* const reader, SerdNode* const dest)
{
uint32_t code = 0;
const int c = peek_byte(reader);
@@ -494,7 +500,7 @@ is_PN_CHARS(const uint32_t c)
}
static SerdStatus
-read_PN_CHARS(SerdReader* const reader, const Ref dest)
+read_PN_CHARS(SerdReader* const reader, SerdNode* const dest)
{
uint32_t code = 0;
const int c = peek_byte(reader);
@@ -520,7 +526,7 @@ read_PN_CHARS(SerdReader* const reader, const Ref dest)
}
static SerdStatus
-read_PERCENT(SerdReader* const reader, const Ref dest)
+read_PERCENT(SerdReader* const reader, SerdNode* const dest)
{
push_byte(reader, dest, eat_byte_safe(reader, '%'));
const uint8_t h1 = read_HEX(reader);
@@ -534,7 +540,7 @@ read_PERCENT(SerdReader* const reader, const Ref dest)
}
static SerdStatus
-read_PN_LOCAL_ESC(SerdReader* const reader, const Ref dest)
+read_PN_LOCAL_ESC(SerdReader* const reader, SerdNode* const dest)
{
skip_byte(reader, '\\');
@@ -570,7 +576,7 @@ read_PN_LOCAL_ESC(SerdReader* const reader, const Ref dest)
}
static SerdStatus
-read_PLX(SerdReader* const reader, const Ref dest)
+read_PLX(SerdReader* const reader, SerdNode* const dest)
{
const int c = peek_byte(reader);
switch (c) {
@@ -584,7 +590,9 @@ read_PLX(SerdReader* const reader, const Ref dest)
}
static SerdStatus
-read_PN_LOCAL(SerdReader* const reader, const Ref dest, bool* const ate_dot)
+read_PN_LOCAL(SerdReader* const reader,
+ SerdNode* const dest,
+ bool* const ate_dot)
{
int c = peek_byte(reader);
SerdStatus st = SERD_SUCCESS;
@@ -625,10 +633,9 @@ read_PN_LOCAL(SerdReader* const reader, const Ref dest, bool* const ate_dot)
trailing_unescaped_dot = (c == '.');
}
- SerdNode* const n = deref(reader, dest);
if (trailing_unescaped_dot) {
// Ate trailing dot, pop it from stack/node and inform caller
- --n->length;
+ --dest->length;
serd_stack_pop(&reader->stack, 1);
*ate_dot = true;
}
@@ -638,28 +645,29 @@ read_PN_LOCAL(SerdReader* const reader, const Ref dest, bool* const ate_dot)
// Read the remainder of a PN_PREFIX after some initial characters
static SerdStatus
-read_PN_PREFIX_tail(SerdReader* const reader, const Ref dest)
+read_PN_PREFIX_tail(SerdReader* const reader, SerdNode* const dest)
{
- int c = 0;
+ SerdStatus st = SERD_SUCCESS;
+ int c = 0;
while ((c = peek_byte(reader))) { // Middle: (PN_CHARS | '.')*
if (c == '.') {
- push_byte(reader, dest, eat_byte_safe(reader, c));
- } else if (read_PN_CHARS(reader, dest)) {
+ st = push_byte(reader, dest, eat_byte_safe(reader, c));
+ } else if ((st = read_PN_CHARS(reader, dest))) {
break;
}
}
- const SerdNode* const n = deref(reader, dest);
- if (serd_node_string(n)[serd_node_length(n) - 1] == '.' &&
+ if (st <= SERD_FAILURE &&
+ serd_node_string(dest)[serd_node_length(dest) - 1] == '.' &&
read_PN_CHARS(reader, dest)) {
return r_err(reader, SERD_BAD_SYNTAX, "prefix ends with '.'\n");
}
- return SERD_SUCCESS;
+ return st > SERD_FAILURE ? st : SERD_SUCCESS;
}
static SerdStatus
-read_PN_PREFIX(SerdReader* const reader, const Ref dest)
+read_PN_PREFIX(SerdReader* const reader, SerdNode* const dest)
{
const SerdStatus st = read_PN_CHARS_BASE(reader, dest);
@@ -667,33 +675,33 @@ read_PN_PREFIX(SerdReader* const reader, const Ref dest)
}
static SerdStatus
-read_LANGTAG(SerdReader* const reader, Ref* const dest)
+read_LANGTAG(SerdReader* const reader, SerdNode** const dest)
{
int c = peek_byte(reader);
if (!is_alpha(c)) {
return r_err(reader, SERD_BAD_SYNTAX, "unexpected '%c'\n", c);
}
- *dest = push_node(reader, SERD_LITERAL, "", 0);
+ if (!(*dest = push_node(reader, SERD_LITERAL, "", 0))) {
+ return SERD_BAD_STACK;
+ }
SerdStatus st = SERD_SUCCESS;
TRY(st, push_byte(reader, *dest, eat_byte_safe(reader, c)));
while ((c = peek_byte(reader)) && is_alpha(c)) {
TRY(st, push_byte(reader, *dest, eat_byte_safe(reader, c)));
}
-
while (peek_byte(reader) == '-') {
TRY(st, push_byte(reader, *dest, eat_byte_safe(reader, '-')));
while ((c = peek_byte(reader)) && (is_alpha(c) || is_digit(c))) {
TRY(st, push_byte(reader, *dest, eat_byte_safe(reader, c)));
}
}
-
return SERD_SUCCESS;
}
static SerdStatus
-read_IRIREF_scheme(SerdReader* const reader, const Ref dest)
+read_IRIREF_scheme(SerdReader* const reader, SerdNode* const dest)
{
int c = peek_byte(reader);
if (!is_alpha(c)) {
@@ -723,7 +731,7 @@ read_IRIREF_scheme(SerdReader* const reader, const Ref dest)
}
static SerdStatus
-read_IRIREF(SerdReader* const reader, Ref* const dest)
+read_IRIREF(SerdReader* const reader, SerdNode** const dest)
{
if (!eat_byte_check(reader, '<')) {
return SERD_BAD_SYNTAX;
@@ -807,7 +815,7 @@ read_IRIREF(SerdReader* const reader, Ref* const dest)
static SerdStatus
read_PrefixedName(SerdReader* const reader,
- const Ref dest,
+ SerdNode* const dest,
const bool read_prefix,
bool* const ate_dot)
{
@@ -827,7 +835,7 @@ read_PrefixedName(SerdReader* const reader,
}
static SerdStatus
-read_0_9(SerdReader* const reader, const Ref str, const bool at_least_one)
+read_0_9(SerdReader* const reader, SerdNode* const str, const bool at_least_one)
{
unsigned count = 0;
SerdStatus st = SERD_SUCCESS;
@@ -844,8 +852,8 @@ read_0_9(SerdReader* const reader, const Ref str, const bool at_least_one)
static SerdStatus
read_number(SerdReader* const reader,
- Ref* const dest,
- Ref* const datatype,
+ SerdNode** const dest,
+ SerdNode** const datatype,
SerdNodeFlags* const flags,
bool* const ate_dot)
{
@@ -858,9 +866,14 @@ read_number(SerdReader* const reader,
SerdStatus st = SERD_SUCCESS;
int c = peek_byte(reader);
bool has_decimal = false;
+ if (!*dest) {
+ return SERD_BAD_STACK;
+ }
+
if (c == '-' || c == '+') {
push_byte(reader, *dest, eat_byte_safe(reader, c));
}
+
if ((c = peek_byte(reader)) == '.') {
has_decimal = true;
// decimal case 2 (e.g. '.0' or '-.0' or '+.0')
@@ -914,22 +927,24 @@ read_number(SerdReader* const reader,
}
static SerdStatus
-read_iri(SerdReader* const reader, Ref* const dest, bool* const ate_dot)
+read_iri(SerdReader* const reader, SerdNode** const dest, bool* const ate_dot)
{
switch (peek_byte(reader)) {
case '<':
return read_IRIREF(reader, dest);
default:
- *dest = push_node(reader, SERD_CURIE, "", 0);
+ if (!(*dest = push_node(reader, SERD_CURIE, "", 0))) {
+ return SERD_BAD_STACK;
+ }
return read_PrefixedName(reader, *dest, true, ate_dot);
}
}
static SerdStatus
read_literal(SerdReader* const reader,
- Ref* const dest,
- Ref* const datatype,
- Ref* const lang,
+ SerdNode** const dest,
+ SerdNode** const datatype,
+ SerdNode** const lang,
SerdNodeFlags* const flags,
bool* const ate_dot)
{
@@ -971,7 +986,7 @@ read_literal(SerdReader* const reader,
}
static SerdStatus
-read_verb(SerdReader* const reader, Ref* const dest)
+read_verb(SerdReader* const reader, SerdNode** const dest)
{
if (peek_byte(reader) == '<') {
return read_IRIREF(reader, dest);
@@ -980,11 +995,13 @@ read_verb(SerdReader* const reader, Ref* const dest)
/* Either a qname, or "a". Read the prefix first, and if it is in fact
"a", produce that instead.
*/
- *dest = push_node(reader, SERD_CURIE, "", 0);
+ if (!(*dest = push_node(reader, SERD_CURIE, "", 0))) {
+ return SERD_BAD_STACK;
+ }
SerdStatus st = read_PN_PREFIX(reader, *dest);
bool ate_dot = false;
- SerdNode* node = deref(reader, *dest);
+ SerdNode* node = *dest;
const int next = peek_byte(reader);
if (!st && node->length == 1 && serd_node_string(node)[0] == 'a' &&
next != ':' && !is_PN_CHARS_BASE((uint32_t)next)) {
@@ -1005,7 +1022,7 @@ read_verb(SerdReader* const reader, Ref* const dest)
static SerdStatus
read_BLANK_NODE_LABEL(SerdReader* const reader,
- Ref* const dest,
+ SerdNode** const dest,
bool* const ate_dot)
{
skip_byte(reader, '_');
@@ -1013,30 +1030,36 @@ read_BLANK_NODE_LABEL(SerdReader* const reader,
return SERD_BAD_SYNTAX;
}
- const Ref ref = *dest = push_node(reader,
- SERD_BLANK,
- reader->bprefix ? reader->bprefix : "",
- reader->bprefix_len);
+ SerdStatus st = SERD_SUCCESS;
+
+ SerdNode* n = *dest = push_node(reader,
+ SERD_BLANK,
+ reader->bprefix ? reader->bprefix : "",
+ reader->bprefix_len);
int c = peek_byte(reader); // First: (PN_CHARS | '_' | [0-9])
if (is_digit(c) || c == '_') {
- push_byte(reader, ref, eat_byte_safe(reader, c));
- } else if (read_PN_CHARS(reader, ref)) {
- *dest = pop_node(reader, *dest);
- return r_err(reader, SERD_BAD_SYNTAX, "invalid name start\n");
+ push_byte(reader, n, eat_byte_safe(reader, c));
+ } else if ((st = read_PN_CHARS(reader, n))) {
+ st = st > SERD_FAILURE ? st : SERD_BAD_SYNTAX;
+ *dest = pop_node(reader, n);
+ return r_err(reader, st, "invalid name start\n");
}
while ((c = peek_byte(reader))) { // Middle: (PN_CHARS | '.')*
if (c == '.') {
- push_byte(reader, ref, eat_byte_safe(reader, c));
- } else if (read_PN_CHARS(reader, ref)) {
+ TRY(st, push_byte(reader, n, eat_byte_safe(reader, c)));
+ } else if ((st = read_PN_CHARS(reader, n))) {
break;
}
}
- SerdNode* n = deref(reader, ref);
- char* buf = serd_node_buffer(n);
- if (buf[n->length - 1] == '.' && read_PN_CHARS(reader, ref)) {
+ if (st > SERD_FAILURE) {
+ return st;
+ }
+
+ char* buf = serd_node_buffer(n);
+ if (buf[n->length - 1] == '.' && read_PN_CHARS(reader, n)) {
// Ate trailing dot, pop it from stack/node and inform caller
--n->length;
serd_stack_pop(&reader->stack, 1);
@@ -1049,13 +1072,14 @@ read_BLANK_NODE_LABEL(SerdReader* const reader,
buf[reader->bprefix_len] = 'B'; // Prevent clash
reader->seen_genid = true;
} else if (reader->seen_genid && buf[reader->bprefix_len] == 'B') {
- *dest = pop_node(reader, *dest);
+ *dest = pop_node(reader, n);
return r_err(reader,
SERD_BAD_LABEL,
"found both 'b' and 'B' blank IDs, prefix required\n");
}
}
}
+
return SERD_SUCCESS;
}
@@ -1063,7 +1087,7 @@ static SerdStatus
read_anon(SerdReader* const reader,
ReadContext ctx,
const bool subject,
- Ref* const dest)
+ SerdNode** const dest)
{
skip_byte(reader, '[');
@@ -1100,7 +1124,7 @@ read_anon(SerdReader* const reader,
return r_err(reader, SERD_BAD_SYNTAX, "'.' inside blank\n");
}
read_ws_star(reader);
- serd_sink_write_end(reader->sink, deref(reader, *dest));
+ serd_sink_write_end(reader->sink, *dest);
*ctx.flags = old_flags;
}
@@ -1128,10 +1152,9 @@ read_object(SerdReader* const reader,
SerdStatus st = SERD_FAILURE;
bool simple = (ctx->subject != 0);
- SerdNode* node = NULL;
- Ref o = 0;
- Ref datatype = 0;
- Ref lang = 0;
+ SerdNode* o = 0;
+ SerdNode* datatype = 0;
+ SerdNode* lang = 0;
uint32_t flags = 0;
const int c = peek_byte(reader);
if (!fancy_syntax(reader)) {
@@ -1187,16 +1210,18 @@ read_object(SerdReader* const reader,
/* Either a boolean literal, or a qname. Read the prefix first, and if
it is in fact a "true" or "false" literal, produce that instead.
*/
- o = push_node(reader, SERD_CURIE, "", 0);
+ if (!(o = push_node(reader, SERD_CURIE, "", 0))) {
+ return SERD_BAD_STACK;
+ }
+
while (!read_PN_CHARS_BASE(reader, o)) {
}
- node = deref(reader, o);
- if ((node->length == 4 && !memcmp(serd_node_string(node), "true", 4)) ||
- (node->length == 5 && !memcmp(serd_node_string(node), "false", 5))) {
- flags = flags | SERD_HAS_DATATYPE;
- node->type = SERD_LITERAL;
- datatype = push_node(reader, SERD_URI, XSD_BOOLEAN, XSD_BOOLEAN_LEN);
- st = SERD_SUCCESS;
+ if ((o->length == 4 && !memcmp(serd_node_string(o), "true", 4)) ||
+ (o->length == 5 && !memcmp(serd_node_string(o), "false", 5))) {
+ flags = flags | SERD_HAS_DATATYPE;
+ o->type = SERD_LITERAL;
+ datatype = push_node(reader, SERD_URI, XSD_BOOLEAN, XSD_BOOLEAN_LEN);
+ st = SERD_SUCCESS;
} else if (read_PN_PREFIX_tail(reader, o) > SERD_FAILURE) {
st = SERD_BAD_SYNTAX;
} else {
@@ -1209,7 +1234,7 @@ read_object(SerdReader* const reader,
}
if (!st && simple && o) {
- deref(reader, o)->flags = flags;
+ o->flags = flags;
}
if (!st && emit && simple) {
@@ -1289,8 +1314,8 @@ read_predicateObjectList(SerdReader* const reader,
static SerdStatus
end_collection(SerdReader* const reader,
const ReadContext ctx,
- const Ref n1,
- const Ref n2,
+ SerdNode* const n1,
+ SerdNode* const n2,
const SerdStatus st)
{
pop_node(reader, n2);
@@ -1305,7 +1330,9 @@ end_collection(SerdReader* const reader,
}
static SerdStatus
-read_collection(SerdReader* const reader, ReadContext ctx, Ref* const dest)
+read_collection(SerdReader* const reader,
+ ReadContext ctx,
+ SerdNode** const dest)
{
SerdStatus st = SERD_SUCCESS;
@@ -1329,10 +1356,15 @@ read_collection(SerdReader* const reader, ReadContext ctx, Ref* const dest)
/* The order of node allocation here is necessarily not in stack order,
so we create two nodes and recycle them throughout. */
- Ref n1 = push_node_padded(reader, genid_size(reader), SERD_BLANK, "", 0);
- Ref n2 = 0;
- Ref node = n1;
- Ref rest = 0;
+ SerdNode* n1 =
+ push_node_padded(reader, genid_size(reader), SERD_BLANK, "", 0);
+ SerdNode* n2 = 0;
+ SerdNode* node = n1;
+ SerdNode* rest = 0;
+
+ if (!n1) {
+ return SERD_BAD_STACK;
+ }
ctx.subject = *dest;
while (!peek_delim(reader, ')')) {
@@ -1368,8 +1400,8 @@ read_collection(SerdReader* const reader, ReadContext ctx, Ref* const dest)
static SerdStatus
read_subject(SerdReader* const reader,
- const ReadContext ctx,
- Ref* const dest,
+ ReadContext ctx,
+ SerdNode** const dest,
int* const s_type)
{
SerdStatus st = SERD_SUCCESS;
@@ -1397,7 +1429,7 @@ read_subject(SerdReader* const reader,
}
static SerdStatus
-read_labelOrSubject(SerdReader* const reader, Ref* const dest)
+read_labelOrSubject(SerdReader* const reader, SerdNode** const dest)
{
bool ate_dot = false;
switch (peek_byte(reader)) {
@@ -1450,9 +1482,9 @@ read_base(SerdReader* const reader, const bool sparql, const bool token)
read_ws_star(reader);
- Ref uri = 0;
+ SerdNode* uri = NULL;
TRY(st, read_IRIREF(reader, &uri));
- TRY(st, serd_sink_write_base(reader->sink, deref(reader, uri)));
+ TRY(st, serd_sink_write_base(reader->sink, uri));
pop_node(reader, uri);
read_ws_star(reader);
@@ -1476,7 +1508,11 @@ read_prefixID(SerdReader* const reader, const bool sparql, const bool token)
}
read_ws_star(reader);
- Ref name = push_node(reader, SERD_LITERAL, "", 0);
+ SerdNode* name = push_node(reader, SERD_LITERAL, "", 0);
+ if (!name) {
+ return SERD_BAD_STACK;
+ }
+
TRY_FAILING(st, read_PN_PREFIX(reader, name));
if (eat_byte_check(reader, ':') != ':') {
@@ -1485,11 +1521,10 @@ read_prefixID(SerdReader* const reader, const bool sparql, const bool token)
}
read_ws_star(reader);
- Ref uri = 0;
+ SerdNode* uri = NULL;
TRY(st, read_IRIREF(reader, &uri));
- st = serd_sink_write_prefix(
- reader->sink, deref(reader, name), deref(reader, uri));
+ st = serd_sink_write_prefix(reader->sink, name, uri);
pop_node(reader, uri);
pop_node(reader, name);
@@ -1566,17 +1601,11 @@ read_wrappedGraph(SerdReader* const reader, ReadContext* const ctx)
}
static int
-tokcmp(SerdReader* const reader,
- const Ref ref,
- const char* const tok,
- const size_t n)
+tokcmp(SerdNode* const node, const char* const tok, const size_t n)
{
- SerdNode* node = deref(reader, ref);
- if (!node || node->length != n) {
- return -1;
- }
-
- return serd_strncasecmp(serd_node_string(node), tok, n);
+ return ((!node || node->length != n)
+ ? -1
+ : serd_strncasecmp(serd_node_string(node), tok, n));
}
SerdStatus
@@ -1617,11 +1646,11 @@ read_n3_statement(SerdReader* const reader)
default:
TRY_FAILING(st, read_subject(reader, ctx, &ctx.subject, &s_type));
- if (!tokcmp(reader, ctx.subject, "base", 4)) {
+ if (!tokcmp(ctx.subject, "base", 4)) {
st = read_base(reader, true, false);
- } else if (!tokcmp(reader, ctx.subject, "prefix", 6)) {
+ } else if (!tokcmp(ctx.subject, "prefix", 6)) {
st = read_prefixID(reader, true, false);
- } else if (!tokcmp(reader, ctx.subject, "graph", 5)) {
+ } else if (!tokcmp(ctx.subject, "graph", 5)) {
ctx.subject = pop_node(reader, ctx.subject);
read_ws_star(reader);
TRY(st, read_labelOrSubject(reader, &ctx.graph));
@@ -1630,8 +1659,8 @@ read_n3_statement(SerdReader* const reader)
pop_node(reader, ctx.graph);
ctx.graph = 0;
read_ws_star(reader);
- } else if (!tokcmp(reader, ctx.subject, "true", 4) ||
- !tokcmp(reader, ctx.subject, "false", 5)) {
+ } else if (!tokcmp(ctx.subject, "true", 4) ||
+ !tokcmp(ctx.subject, "false", 5)) {
return r_err(reader, SERD_BAD_SYNTAX, "expected subject\n");
} else if (read_ws_star(reader) && peek_byte(reader) == '{') {
if (s_type == '(' || (s_type == '[' && !*ctx.flags)) {
diff --git a/src/node.c b/src/node.c
index 5b5b754f..ec14e611 100644
--- a/src/node.c
+++ b/src/node.c
@@ -49,8 +49,6 @@ DEFINE_XSD_NODE(boolean)
DEFINE_XSD_NODE(decimal)
DEFINE_XSD_NODE(integer)
-static const size_t serd_node_align = 2 * sizeof(uint64_t);
-
static const SerdNodeFlags meta_mask = (SERD_HAS_DATATYPE | SERD_HAS_LANGUAGE);
static SerdNode*
diff --git a/src/node.h b/src/node.h
index 41cf0d82..a87871b4 100644
--- a/src/node.h
+++ b/src/node.h
@@ -10,6 +10,7 @@
#include "serd/uri.h"
#include <stddef.h>
+#include <stdint.h>
struct SerdNodeImpl {
size_t length; ///< Length in bytes (not including null)
@@ -17,6 +18,8 @@ struct SerdNodeImpl {
SerdNodeType type; ///< Node type
};
+static const size_t serd_node_align = 2 * sizeof(uint64_t);
+
static inline char* SERD_NONNULL
serd_node_buffer(SerdNode* SERD_NONNULL node)
{
diff --git a/src/reader.c b/src/reader.c
index 944c8d09..3b4dfd2b 100644
--- a/src/reader.c
+++ b/src/reader.c
@@ -34,9 +34,10 @@ r_err(SerdReader* const reader, const SerdStatus st, const char* const fmt, ...)
}
void
-set_blank_id(SerdReader* const reader, const Ref ref, const size_t buf_size)
+set_blank_id(SerdReader* const reader,
+ SerdNode* const node,
+ const size_t buf_size)
{
- SerdNode* node = deref(reader, ref);
char* buf = (char*)(node + 1);
const char* prefix = reader->bprefix ? (const char*)reader->bprefix : "";
@@ -50,15 +51,18 @@ genid_size(const SerdReader* const reader)
return reader->bprefix_len + 1 + 10 + 1; // + "b" + UINT32_MAX + \0
}
-Ref
+SerdNode*
blank_id(SerdReader* const reader)
{
- Ref ref = push_node_padded(reader, genid_size(reader), SERD_BLANK, "", 0);
- set_blank_id(reader, ref, genid_size(reader));
+ SerdNode* ref =
+ push_node_padded(reader, genid_size(reader), SERD_BLANK, "", 0);
+ if (ref) {
+ set_blank_id(reader, ref, genid_size(reader));
+ }
return ref;
}
-Ref
+SerdNode*
push_node_padded(SerdReader* const reader,
const size_t maxlen,
const SerdNodeType type,
@@ -68,6 +72,10 @@ push_node_padded(SerdReader* const reader,
void* mem = serd_stack_push_aligned(
&reader->stack, sizeof(SerdNode) + maxlen + 1, sizeof(SerdNode));
+ if (!mem) {
+ return NULL;
+ }
+
SerdNode* const node = (SerdNode*)mem;
node->length = length;
@@ -78,14 +86,15 @@ push_node_padded(SerdReader* const reader,
memcpy(buf, str, length + 1);
#ifdef SERD_STACK_CHECK
- reader->allocs = (Ref*)realloc(reader->allocs,
- sizeof(reader->allocs) * (++reader->n_allocs));
- reader->allocs[reader->n_allocs - 1] = ((char*)mem - reader->stack.buf);
+ reader->allocs = (SerdNode**)realloc(
+ reader->allocs, sizeof(reader->allocs) * (++reader->n_allocs));
+ reader->allocs[reader->n_allocs - 1] =
+ (SerdNode*)((char*)mem - reader->stack.buf);
#endif
- return (Ref)((char*)node - reader->stack.buf);
+ return node;
}
-Ref
+SerdNode*
push_node(SerdReader* const reader,
const SerdNodeType type,
const char* const str,
@@ -94,42 +103,33 @@ push_node(SerdReader* const reader,
return push_node_padded(reader, length, type, str, length);
}
-SERD_PURE_FUNC SerdNode*
-deref(SerdReader* const reader, const Ref ref)
-{
- return ref ? (SerdNode*)(reader->stack.buf + ref) : NULL;
-}
-
-Ref
-pop_node(SerdReader* const reader, const Ref ref)
+SerdNode*
+pop_node(SerdReader* const reader, const SerdNode* const node)
{
- if (ref && ref != reader->rdf_first && ref != reader->rdf_rest &&
- ref != reader->rdf_nil) {
+ if (node && node != reader->rdf_first && node != reader->rdf_rest &&
+ node != reader->rdf_nil) {
#ifdef SERD_STACK_CHECK
- SERD_STACK_ASSERT_TOP(reader, ref);
+ SERD_STACK_ASSERT_TOP(reader, node);
--reader->n_allocs;
#endif
- SerdNode* const node = deref(reader, ref);
- char* const top = reader->stack.buf + reader->stack.size;
+ char* const top = reader->stack.buf + reader->stack.size;
serd_stack_pop_aligned(&reader->stack, (size_t)(top - (char*)node));
}
- return 0;
+ return NULL;
}
SerdStatus
-emit_statement(SerdReader* const reader, const ReadContext ctx, const Ref o)
+emit_statement(SerdReader* const reader,
+ const ReadContext ctx,
+ SerdNode* const o)
{
- SerdNode* graph = deref(reader, ctx.graph);
+ SerdNode* graph = ctx.graph;
if (!graph && reader->default_graph) {
graph = reader->default_graph;
}
- const SerdStatus st = serd_sink_write(reader->sink,
- *ctx.flags,
- deref(reader, ctx.subject),
- deref(reader, ctx.predicate),
- deref(reader, o),
- graph);
+ const SerdStatus st = serd_sink_write(
+ reader->sink, *ctx.flags, ctx.subject, ctx.predicate, o, graph);
*ctx.flags &= SERD_ANON_CONT | SERD_LIST_CONT; // Preserve only cont flags
return st;
@@ -150,21 +150,35 @@ serd_reader_read_document(SerdReader* const reader)
}
SerdReader*
-serd_reader_new(const SerdSyntax syntax, const SerdSink* const sink)
+serd_reader_new(const SerdSyntax syntax,
+ const SerdSink* const sink,
+ const size_t stack_size)
{
+ if (stack_size < 3 * sizeof(SerdNode) + 192 + serd_node_align) {
+ return NULL;
+ }
+
SerdReader* me = (SerdReader*)calloc(1, sizeof(SerdReader));
me->sink = sink;
me->default_graph = NULL;
- me->stack = serd_stack_new(SERD_PAGE_SIZE);
+ me->stack = serd_stack_new(stack_size);
me->syntax = syntax;
me->next_id = 1;
me->strict = true;
+ // Reserve a bit of space at the end of the stack to zero pad nodes
+ me->stack.buf_size -= serd_node_align;
+
me->rdf_first = push_node(me, SERD_URI, NS_RDF "first", 48);
me->rdf_rest = push_node(me, SERD_URI, NS_RDF "rest", 47);
me->rdf_nil = push_node(me, SERD_URI, NS_RDF "nil", 46);
+ // The initial stack size check should cover this
+ assert(me->rdf_first);
+ assert(me->rdf_rest);
+ assert(me->rdf_nil);
+
return me;
}
diff --git a/src/reader.h b/src/reader.h
index db749fe8..a39fc278 100644
--- a/src/reader.h
+++ b/src/reader.h
@@ -29,18 +29,13 @@
# define SERD_STACK_ASSERT_TOP(reader, ref)
#endif
-/* Reference to a node in the stack (we can not use pointers since the
- stack may be reallocated, invalidating any pointers to elements).
-*/
-typedef size_t Ref;
-
typedef struct {
- Ref graph;
- Ref subject;
- Ref predicate;
- Ref object;
- Ref datatype;
- Ref lang;
+ SerdNode* graph;
+ SerdNode* subject;
+ SerdNode* predicate;
+ SerdNode* object;
+ SerdNode* datatype;
+ SerdNode* lang;
SerdStatementFlags* flags;
} ReadContext;
@@ -48,9 +43,9 @@ struct SerdReaderImpl {
const SerdSink* sink;
SerdErrorFunc error_func;
void* error_handle;
- Ref rdf_first;
- Ref rdf_rest;
- Ref rdf_nil;
+ SerdNode* rdf_first;
+ SerdNode* rdf_rest;
+ SerdNode* rdf_nil;
SerdNode* default_graph;
SerdByteSource source;
SerdStack stack;
@@ -62,8 +57,8 @@ struct SerdReaderImpl {
bool strict; ///< True iff strict parsing
bool seen_genid;
#ifdef SERD_STACK_CHECK
- Ref* allocs; ///< Stack of push offsets
- size_t n_allocs; ///< Number of stack pushes
+ SerdNode** allocs; ///< Stack of push offsets
+ size_t n_allocs; ///< Number of stack pushes
#endif
};
@@ -71,14 +66,14 @@ SERD_LOG_FUNC(3, 4)
SerdStatus
r_err(SerdReader* reader, SerdStatus st, const char* fmt, ...);
-Ref
+SerdNode*
push_node_padded(SerdReader* reader,
size_t maxlen,
SerdNodeType type,
const char* str,
size_t length);
-Ref
+SerdNode*
push_node(SerdReader* reader,
SerdNodeType type,
const char* str,
@@ -87,20 +82,17 @@ push_node(SerdReader* reader,
SERD_PURE_FUNC size_t
genid_size(const SerdReader* reader);
-Ref
+SerdNode*
blank_id(SerdReader* reader);
void
-set_blank_id(SerdReader* reader, Ref ref, size_t buf_size);
+set_blank_id(SerdReader* reader, SerdNode* node, size_t buf_size);
SerdNode*
-deref(SerdReader* reader, Ref ref);
-
-Ref
-pop_node(SerdReader* reader, Ref ref);
+pop_node(SerdReader* reader, const SerdNode* node);
SerdStatus
-emit_statement(SerdReader* reader, ReadContext ctx, Ref o);
+emit_statement(SerdReader* reader, ReadContext ctx, SerdNode* o);
SerdStatus
read_n3_statement(SerdReader* reader);
@@ -166,13 +158,15 @@ eat_string(SerdReader* reader, const char* str, unsigned n)
}
static inline SerdStatus
-push_byte(SerdReader* reader, Ref ref, const int c)
+push_byte(SerdReader* reader, SerdNode* node, const int c)
{
assert(c != EOF);
SERD_STACK_ASSERT_TOP(reader, ref);
- char* const s = (char*)serd_stack_push(&reader->stack, 1);
- SerdNode* const node = (SerdNode*)(reader->stack.buf + ref);
+ char* const s = (char*)serd_stack_push(&reader->stack, 1);
+ if (!s) {
+ return SERD_BAD_STACK;
+ }
*(s - 1) = (char)c;
*s = '\0';
@@ -181,12 +175,20 @@ push_byte(SerdReader* reader, Ref ref, const int c)
return SERD_SUCCESS;
}
-static inline void
-push_bytes(SerdReader* reader, Ref ref, const uint8_t* bytes, unsigned len)
+static inline SerdStatus
+push_bytes(SerdReader* reader,
+ SerdNode* ref,
+ const uint8_t* bytes,
+ unsigned len)
{
+ if (reader->stack.buf_size < reader->stack.size + len) {
+ return SERD_BAD_STACK;
+ }
+
for (unsigned i = 0; i < len; ++i) {
push_byte(reader, ref, bytes[i]);
}
+ return SERD_SUCCESS;
}
#endif // SERD_SRC_READER_H
diff --git a/src/serdi.c b/src/serdi.c
index 8ed02dfc..aece6d9d 100644
--- a/src/serdi.c
+++ b/src/serdi.c
@@ -24,8 +24,10 @@
# include <io.h>
#endif
+#include <limits.h>
#include <stdbool.h>
#include <stdio.h>
+#include <stdlib.h>
#include <string.h>
#define SERDI_ERROR(msg) fprintf(stderr, "serdi: " msg)
@@ -100,6 +102,7 @@ print_usage(const char* const name, const bool error)
" -f Fast and loose URI pass-through.\n"
" -h Display this help and exit.\n"
" -i SYNTAX Input syntax: turtle/ntriples/trig/nquads.\n"
+ " -k BYTES Parser stack size.\n"
" -l Lax (non-strict) parsing.\n"
" -o SYNTAX Output syntax: turtle/ntriples/nquads.\n"
" -p PREFIX Add PREFIX to blank node IDs.\n"
@@ -180,6 +183,7 @@ main(int argc, char** argv)
bool full_uris = false;
bool lax = false;
bool quiet = false;
+ size_t stack_size = 1048576U;
const char* add_prefix = NULL;
const char* chop_prefix = NULL;
const char* root_uri = NULL;
@@ -236,6 +240,19 @@ main(int argc, char** argv)
return print_usage(prog, true);
}
break;
+ } else if (opt == 'k') {
+ if (argv[a][o + 1] || ++a == argc) {
+ return missing_arg(prog, 'k');
+ }
+
+ char* endptr = NULL;
+ const long size = strtol(argv[a], &endptr, 10);
+ if (size <= 0 || size == LONG_MAX || *endptr != '\0') {
+ SERDI_ERRORF("invalid stack size '%s'\n", argv[a]);
+ return 1;
+ }
+ stack_size = (size_t)size;
+ break;
} else if (opt == 'o') {
if (argv[a][o + 1] || ++a == argc) {
return missing_arg(prog, 'o');
@@ -307,7 +324,7 @@ main(int argc, char** argv)
output_syntax, writer_flags, env, (SerdWriteFunc)fwrite, out_fd);
SerdReader* const reader =
- serd_reader_new(input_syntax, serd_writer_sink(writer));
+ serd_reader_new(input_syntax, serd_writer_sink(writer), stack_size);
serd_reader_set_strict(reader, !lax);
if (quiet) {
diff --git a/src/stack.h b/src/stack.h
index 7cd40a2f..e6e46372 100644
--- a/src/stack.h
+++ b/src/stack.h
@@ -53,9 +53,9 @@ serd_stack_push(SerdStack* stack, size_t n_bytes)
{
const size_t new_size = stack->size + n_bytes;
if (stack->buf_size < new_size) {
- stack->buf_size += (stack->buf_size >> 1); // *= 1.5
- stack->buf = (char*)realloc(stack->buf, stack->buf_size);
+ return NULL;
}
+
char* const ret = (stack->buf + stack->size);
stack->size = new_size;
return ret;
@@ -72,12 +72,16 @@ static inline void*
serd_stack_push_aligned(SerdStack* stack, size_t n_bytes, size_t align)
{
// Push one byte to ensure space for a pad count
- serd_stack_push(stack, 1);
+ if (!serd_stack_push(stack, 1)) {
+ return NULL;
+ }
// Push padding if necessary
const size_t pad = align - stack->size % align;
if (pad > 0) {
- serd_stack_push(stack, pad);
+ if (!serd_stack_push(stack, pad)) {
+ return NULL;
+ }
}
// Set top of stack to pad count so we can properly pop later
diff --git a/src/writer.c b/src/writer.c
index e674f2b1..5c05e244 100644
--- a/src/writer.c
+++ b/src/writer.c
@@ -209,7 +209,7 @@ ctx(SerdWriter* writer, const Field field)
return node && node->type ? node : NULL;
}
-static void
+SERD_NODISCARD static SerdStatus
push_context(SerdWriter* const writer,
const ContextType type,
const SerdNode* const graph,
@@ -218,6 +218,10 @@ push_context(SerdWriter* const writer,
{
// Push the current context to the stack
void* const top = serd_stack_push(&writer->anon_stack, sizeof(WriteContext));
+ if (!top) {
+ return SERD_BAD_STACK;
+ }
+
*(WriteContext*)top = writer->context;
// Update the current context
@@ -230,6 +234,7 @@ push_context(SerdWriter* const writer,
0U};
writer->context = current;
+ return SERD_SUCCESS;
}
static void
@@ -1079,20 +1084,22 @@ serd_writer_write_statement(SerdWriter* const writer,
if (flags & (SERD_ANON_S_BEGIN | SERD_LIST_S_BEGIN)) {
// Push context for anonymous or list subject
const bool is_list = (flags & SERD_LIST_S_BEGIN);
- push_context(writer,
- is_list ? CTX_LIST : CTX_BLANK,
- graph,
- subject,
- is_list ? NULL : predicate);
+ TRY(st,
+ push_context(writer,
+ is_list ? CTX_LIST : CTX_BLANK,
+ graph,
+ subject,
+ is_list ? NULL : predicate));
}
if (flags & (SERD_ANON_O_BEGIN | SERD_LIST_O_BEGIN)) {
// Push context for anonymous or list object if necessary
- push_context(writer,
- (flags & SERD_LIST_O_BEGIN) ? CTX_LIST : CTX_BLANK,
- graph,
- object,
- NULL);
+ TRY(st,
+ push_context(writer,
+ (flags & SERD_LIST_O_BEGIN) ? CTX_LIST : CTX_BLANK,
+ graph,
+ object,
+ NULL));
}
return st;