aboutsummaryrefslogtreecommitdiffstats
path: root/src/reader.c
diff options
context:
space:
mode:
authorDavid Robillard <d@drobilla.net>2013-03-30 16:37:21 +0000
committerDavid Robillard <d@drobilla.net>2013-03-30 16:37:21 +0000
commit09c4bb6a3031d2951ff3d285936a91a4f87dca0f (patch)
tree32b72c0618d2aca379ab8b00cf6bd687e53b0ab0 /src/reader.c
parent99a57ab1bc5878001f639d1cbfc4ab323848f884 (diff)
downloadserd-09c4bb6a3031d2951ff3d285936a91a4f87dca0f.tar.gz
serd-09c4bb6a3031d2951ff3d285936a91a4f87dca0f.tar.bz2
serd-09c4bb6a3031d2951ff3d285936a91a4f87dca0f.zip
Add more tests from the new W3C Turtle test suite.
Support crazy escaped NULL characters in literals. Fix incorrect round-trip serialization test command. git-svn-id: http://svn.drobilla.net/serd/trunk@446 490d8e77-9747-427b-9fa3-0b8f29cee8a0
Diffstat (limited to 'src/reader.c')
-rw-r--r--src/reader.c28
1 files changed, 19 insertions, 9 deletions
diff --git a/src/reader.c b/src/reader.c
index 6233cf30..f86bb630 100644
--- a/src/reader.c
+++ b/src/reader.c
@@ -280,8 +280,8 @@ read_HEX(SerdReader* reader)
}
// Read UCHAR escape, initial \ is already eaten by caller
-static inline uint32_t
-read_UCHAR(SerdReader* reader, Ref dest)
+static inline bool
+read_UCHAR(SerdReader* reader, Ref dest, uint32_t* char_code)
{
const uint8_t b = peek_byte(reader);
unsigned length = 0;
@@ -293,14 +293,14 @@ read_UCHAR(SerdReader* reader, Ref dest)
length = 4;
break;
default:
- return 0;
+ return false;
}
eat_byte_safe(reader, b);
uint8_t buf[9] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 };
for (unsigned i = 0; i < length; ++i) {
if (!(buf[i] = read_HEX(reader))) {
- return 0;
+ return false;
}
}
@@ -320,7 +320,8 @@ read_UCHAR(SerdReader* reader, Ref dest)
r_err(reader, SERD_ERR_BAD_SYNTAX,
"unicode character 0x%X out of range\n", code);
push_replacement(reader, dest);
- return 0xFFFD;
+ *char_code = 0xFFFD;
+ return true;
}
// Build output in buf
@@ -346,7 +347,8 @@ read_UCHAR(SerdReader* reader, Ref dest)
for (unsigned i = 0; i < size; ++i) {
push_byte(reader, dest, buf[i]);
}
- return code;
+ *char_code = code;
+ return true;
}
// Read ECHAR escape, initial \ is already eaten by caller
@@ -521,10 +523,12 @@ read_STRING_LITERAL_LONG(SerdReader* reader, SerdNodeFlags* flags, uint8_t q)
Ref ref = push_node(reader, SERD_LITERAL, "", 0);
while (true) {
const uint8_t c = peek_byte(reader);
+ uint32_t code;
switch (c) {
case '\\':
eat_byte_safe(reader, c);
- if (!read_ECHAR(reader, ref, flags) && !read_UCHAR(reader, ref)) {
+ if (!read_ECHAR(reader, ref, flags) &&
+ !read_UCHAR(reader, ref, &code)) {
r_err(reader, SERD_ERR_BAD_SYNTAX,
"invalid escape `\\%c'\n", peek_byte(reader));
return pop_node(reader, ref);
@@ -559,13 +563,15 @@ read_STRING_LITERAL(SerdReader* reader, SerdNodeFlags* flags, uint8_t q)
Ref ref = push_node(reader, SERD_LITERAL, "", 0);
while (true) {
const uint8_t c = peek_byte(reader);
+ uint32_t code;
switch (c) {
case '\n': case '\r':
r_err(reader, SERD_ERR_BAD_SYNTAX, "line end in short string\n");
return pop_node(reader, ref);
case '\\':
eat_byte_safe(reader, c);
- if (!read_ECHAR(reader, ref, flags) && !read_UCHAR(reader, ref)) {
+ if (!read_ECHAR(reader, ref, flags) &&
+ !read_UCHAR(reader, ref, &code)) {
r_err(reader, SERD_ERR_BAD_SYNTAX,
"invalid escape `\\%c'\n", peek_byte(reader));
return pop_node(reader, ref);
@@ -775,7 +781,11 @@ read_IRIREF(SerdReader* reader)
return ref;
case '\\':
eat_byte_safe(reader, c);
- switch (code = read_UCHAR(reader, ref)) {
+ if (!read_UCHAR(reader, ref, &code)) {
+ r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid IRI escape\n");
+ return pop_node(reader, ref);
+ }
+ switch (code) {
case 0: case ' ': case '<': case '>':
r_err(reader, SERD_ERR_BAD_SYNTAX,
"invalid escaped IRI character %X %c\n", code, code);