diff options
author | David Robillard <d@drobilla.net> | 2013-03-30 16:37:21 +0000 |
---|---|---|
committer | David Robillard <d@drobilla.net> | 2013-03-30 16:37:21 +0000 |
commit | 09c4bb6a3031d2951ff3d285936a91a4f87dca0f (patch) | |
tree | 32b72c0618d2aca379ab8b00cf6bd687e53b0ab0 /src/reader.c | |
parent | 99a57ab1bc5878001f639d1cbfc4ab323848f884 (diff) | |
download | serd-09c4bb6a3031d2951ff3d285936a91a4f87dca0f.tar.gz serd-09c4bb6a3031d2951ff3d285936a91a4f87dca0f.tar.bz2 serd-09c4bb6a3031d2951ff3d285936a91a4f87dca0f.zip |
Add more tests from the new W3C Turtle test suite.
Support crazy escaped NULL characters in literals.
Fix incorrect round-trip serialization test command.
git-svn-id: http://svn.drobilla.net/serd/trunk@446 490d8e77-9747-427b-9fa3-0b8f29cee8a0
Diffstat (limited to 'src/reader.c')
-rw-r--r-- | src/reader.c | 28 |
1 files changed, 19 insertions, 9 deletions
diff --git a/src/reader.c b/src/reader.c index 6233cf30..f86bb630 100644 --- a/src/reader.c +++ b/src/reader.c @@ -280,8 +280,8 @@ read_HEX(SerdReader* reader) } // Read UCHAR escape, initial \ is already eaten by caller -static inline uint32_t -read_UCHAR(SerdReader* reader, Ref dest) +static inline bool +read_UCHAR(SerdReader* reader, Ref dest, uint32_t* char_code) { const uint8_t b = peek_byte(reader); unsigned length = 0; @@ -293,14 +293,14 @@ read_UCHAR(SerdReader* reader, Ref dest) length = 4; break; default: - return 0; + return false; } eat_byte_safe(reader, b); uint8_t buf[9] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 }; for (unsigned i = 0; i < length; ++i) { if (!(buf[i] = read_HEX(reader))) { - return 0; + return false; } } @@ -320,7 +320,8 @@ read_UCHAR(SerdReader* reader, Ref dest) r_err(reader, SERD_ERR_BAD_SYNTAX, "unicode character 0x%X out of range\n", code); push_replacement(reader, dest); - return 0xFFFD; + *char_code = 0xFFFD; + return true; } // Build output in buf @@ -346,7 +347,8 @@ read_UCHAR(SerdReader* reader, Ref dest) for (unsigned i = 0; i < size; ++i) { push_byte(reader, dest, buf[i]); } - return code; + *char_code = code; + return true; } // Read ECHAR escape, initial \ is already eaten by caller @@ -521,10 +523,12 @@ read_STRING_LITERAL_LONG(SerdReader* reader, SerdNodeFlags* flags, uint8_t q) Ref ref = push_node(reader, SERD_LITERAL, "", 0); while (true) { const uint8_t c = peek_byte(reader); + uint32_t code; switch (c) { case '\\': eat_byte_safe(reader, c); - if (!read_ECHAR(reader, ref, flags) && !read_UCHAR(reader, ref)) { + if (!read_ECHAR(reader, ref, flags) && + !read_UCHAR(reader, ref, &code)) { r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid escape `\\%c'\n", peek_byte(reader)); return pop_node(reader, ref); @@ -559,13 +563,15 @@ read_STRING_LITERAL(SerdReader* reader, SerdNodeFlags* flags, uint8_t q) Ref ref = push_node(reader, SERD_LITERAL, "", 0); while (true) { const uint8_t c = peek_byte(reader); + uint32_t code; switch (c) { case '\n': case '\r': r_err(reader, SERD_ERR_BAD_SYNTAX, "line end in short string\n"); return pop_node(reader, ref); case '\\': eat_byte_safe(reader, c); - if (!read_ECHAR(reader, ref, flags) && !read_UCHAR(reader, ref)) { + if (!read_ECHAR(reader, ref, flags) && + !read_UCHAR(reader, ref, &code)) { r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid escape `\\%c'\n", peek_byte(reader)); return pop_node(reader, ref); @@ -775,7 +781,11 @@ read_IRIREF(SerdReader* reader) return ref; case '\\': eat_byte_safe(reader, c); - switch (code = read_UCHAR(reader, ref)) { + if (!read_UCHAR(reader, ref, &code)) { + r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid IRI escape\n"); + return pop_node(reader, ref); + } + switch (code) { case 0: case ' ': case '<': case '>': r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid escaped IRI character %X %c\n", code, code); |