diff options
-rw-r--r-- | ChangeLog | 1 | ||||
-rw-r--r-- | src/reader.c | 16 | ||||
-rw-r--r-- | tests/test-bom.out | 1 | ||||
-rw-r--r-- | tests/test-bom.ttl | 3 |
4 files changed, 20 insertions, 1 deletions
@@ -28,6 +28,7 @@ serd (UNRELEASED) unstable; urgency=low * Parse collections iteratively in O(1) space. * Report read error if both "genid" and "docid" IDs are found in the same document, to prevent silent merging of distinct blank nodes. + * Handle files and strings that start with a UTF-8 Byte Order Mark. -- David Robillard <d@drobilla.net> (UNRELEASED) diff --git a/src/reader.c b/src/reader.c index c0cb522a..41e0fb7d 100644 --- a/src/reader.c +++ b/src/reader.c @@ -1438,6 +1438,15 @@ serd_reader_read_file(SerdReader* reader, return ret; } +static void +skip_bom(SerdReader* me) +{ + const uint8_t* const b = me->read_buf; + if (b[0] == 0xEF && b[1] == 0xBB && b[2] == 0xBF) { + me->read_head += 3; + } +} + SERD_API SerdStatus serd_reader_read_file_handle(SerdReader* me, FILE* file, const uint8_t* name) @@ -1452,7 +1461,11 @@ serd_reader_read_file_handle(SerdReader* me, FILE* file, const uint8_t* name) memset(me->read_buf, '\0', SERD_PAGE_SIZE); - const bool ret = !page(me) || read_turtleDoc(me); + bool ret = page(me); + if (ret) { + skip_bom(me); + ret = read_turtleDoc(me); + } free(me->read_buf); me->fd = 0; @@ -1472,6 +1485,7 @@ serd_reader_read_string(SerdReader* me, const uint8_t* utf8) me->from_file = false; me->eof = false; + skip_bom(me); const bool ret = read_turtleDoc(me); me->read_buf = NULL; diff --git a/tests/test-bom.out b/tests/test-bom.out new file mode 100644 index 00000000..aea1655b --- /dev/null +++ b/tests/test-bom.out @@ -0,0 +1 @@ +<http://example.org/thing> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/Thing> . diff --git a/tests/test-bom.ttl b/tests/test-bom.ttl new file mode 100644 index 00000000..8d6534e1 --- /dev/null +++ b/tests/test-bom.ttl @@ -0,0 +1,3 @@ +# This file starts with a UTF-8 Byte Order Mark + +<http://example.org/thing> a <http://example.org/Thing> .
\ No newline at end of file |