diff options
author | David Robillard <d@drobilla.net> | 2012-02-29 23:26:28 +0000 |
---|---|---|
committer | David Robillard <d@drobilla.net> | 2012-02-29 23:26:28 +0000 |
commit | 79545ea09d2d07012ab0a88fb0a7457999d97afa (patch) | |
tree | 0791473fe7232e6cd89c4d1379647a7cc3d1ee68 | |
parent | 11a87df2845c4cd577786d2b4df83ab44421f546 (diff) | |
download | serd-79545ea09d2d07012ab0a88fb0a7457999d97afa.tar.gz serd-79545ea09d2d07012ab0a88fb0a7457999d97afa.tar.bz2 serd-79545ea09d2d07012ab0a88fb0a7457999d97afa.zip |
Handle files and strings that start with a UTF-8 Byte Order Mark (fix #814).
git-svn-id: http://svn.drobilla.net/serd/trunk@326 490d8e77-9747-427b-9fa3-0b8f29cee8a0
-rw-r--r-- | ChangeLog | 1 | ||||
-rw-r--r-- | src/reader.c | 16 | ||||
-rw-r--r-- | tests/test-bom.out | 1 | ||||
-rw-r--r-- | tests/test-bom.ttl | 3 |
4 files changed, 20 insertions, 1 deletions
@@ -28,6 +28,7 @@ serd (UNRELEASED) unstable; urgency=low * Parse collections iteratively in O(1) space. * Report read error if both "genid" and "docid" IDs are found in the same document, to prevent silent merging of distinct blank nodes. + * Handle files and strings that start with a UTF-8 Byte Order Mark. -- David Robillard <d@drobilla.net> (UNRELEASED) diff --git a/src/reader.c b/src/reader.c index c0cb522a..41e0fb7d 100644 --- a/src/reader.c +++ b/src/reader.c @@ -1438,6 +1438,15 @@ serd_reader_read_file(SerdReader* reader, return ret; } +static void +skip_bom(SerdReader* me) +{ + const uint8_t* const b = me->read_buf; + if (b[0] == 0xEF && b[1] == 0xBB && b[2] == 0xBF) { + me->read_head += 3; + } +} + SERD_API SerdStatus serd_reader_read_file_handle(SerdReader* me, FILE* file, const uint8_t* name) @@ -1452,7 +1461,11 @@ serd_reader_read_file_handle(SerdReader* me, FILE* file, const uint8_t* name) memset(me->read_buf, '\0', SERD_PAGE_SIZE); - const bool ret = !page(me) || read_turtleDoc(me); + bool ret = page(me); + if (ret) { + skip_bom(me); + ret = read_turtleDoc(me); + } free(me->read_buf); me->fd = 0; @@ -1472,6 +1485,7 @@ serd_reader_read_string(SerdReader* me, const uint8_t* utf8) me->from_file = false; me->eof = false; + skip_bom(me); const bool ret = read_turtleDoc(me); me->read_buf = NULL; diff --git a/tests/test-bom.out b/tests/test-bom.out new file mode 100644 index 00000000..aea1655b --- /dev/null +++ b/tests/test-bom.out @@ -0,0 +1 @@ +<http://example.org/thing> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/Thing> . diff --git a/tests/test-bom.ttl b/tests/test-bom.ttl new file mode 100644 index 00000000..8d6534e1 --- /dev/null +++ b/tests/test-bom.ttl @@ -0,0 +1,3 @@ +# This file starts with a UTF-8 Byte Order Mark + +<http://example.org/thing> a <http://example.org/Thing> .
\ No newline at end of file |