From 79545ea09d2d07012ab0a88fb0a7457999d97afa Mon Sep 17 00:00:00 2001 From: David Robillard Date: Wed, 29 Feb 2012 23:26:28 +0000 Subject: Handle files and strings that start with a UTF-8 Byte Order Mark (fix #814). git-svn-id: http://svn.drobilla.net/serd/trunk@326 490d8e77-9747-427b-9fa3-0b8f29cee8a0 --- ChangeLog | 1 + src/reader.c | 16 +++++++++++++++- tests/test-bom.out | 1 + tests/test-bom.ttl | 3 +++ 4 files changed, 20 insertions(+), 1 deletion(-) create mode 100644 tests/test-bom.out create mode 100644 tests/test-bom.ttl diff --git a/ChangeLog b/ChangeLog index 4bc5a77c..5aee313c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -28,6 +28,7 @@ serd (UNRELEASED) unstable; urgency=low * Parse collections iteratively in O(1) space. * Report read error if both "genid" and "docid" IDs are found in the same document, to prevent silent merging of distinct blank nodes. + * Handle files and strings that start with a UTF-8 Byte Order Mark. -- David Robillard (UNRELEASED) diff --git a/src/reader.c b/src/reader.c index c0cb522a..41e0fb7d 100644 --- a/src/reader.c +++ b/src/reader.c @@ -1438,6 +1438,15 @@ serd_reader_read_file(SerdReader* reader, return ret; } +static void +skip_bom(SerdReader* me) +{ + const uint8_t* const b = me->read_buf; + if (b[0] == 0xEF && b[1] == 0xBB && b[2] == 0xBF) { + me->read_head += 3; + } +} + SERD_API SerdStatus serd_reader_read_file_handle(SerdReader* me, FILE* file, const uint8_t* name) @@ -1452,7 +1461,11 @@ serd_reader_read_file_handle(SerdReader* me, FILE* file, const uint8_t* name) memset(me->read_buf, '\0', SERD_PAGE_SIZE); - const bool ret = !page(me) || read_turtleDoc(me); + bool ret = page(me); + if (ret) { + skip_bom(me); + ret = read_turtleDoc(me); + } free(me->read_buf); me->fd = 0; @@ -1472,6 +1485,7 @@ serd_reader_read_string(SerdReader* me, const uint8_t* utf8) me->from_file = false; me->eof = false; + skip_bom(me); const bool ret = read_turtleDoc(me); me->read_buf = NULL; diff --git a/tests/test-bom.out b/tests/test-bom.out new file mode 100644 index 00000000..aea1655b --- /dev/null +++ b/tests/test-bom.out @@ -0,0 +1 @@ + . diff --git a/tests/test-bom.ttl b/tests/test-bom.ttl new file mode 100644 index 00000000..8d6534e1 --- /dev/null +++ b/tests/test-bom.ttl @@ -0,0 +1,3 @@ +# This file starts with a UTF-8 Byte Order Mark + + a . \ No newline at end of file -- cgit v1.2.1