aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Robillard <d@drobilla.net>2012-02-29 23:26:28 +0000
committerDavid Robillard <d@drobilla.net>2012-02-29 23:26:28 +0000
commit79545ea09d2d07012ab0a88fb0a7457999d97afa (patch)
tree0791473fe7232e6cd89c4d1379647a7cc3d1ee68
parent11a87df2845c4cd577786d2b4df83ab44421f546 (diff)
downloadserd-79545ea09d2d07012ab0a88fb0a7457999d97afa.tar.gz
serd-79545ea09d2d07012ab0a88fb0a7457999d97afa.tar.bz2
serd-79545ea09d2d07012ab0a88fb0a7457999d97afa.zip
Handle files and strings that start with a UTF-8 Byte Order Mark (fix #814).
git-svn-id: http://svn.drobilla.net/serd/trunk@326 490d8e77-9747-427b-9fa3-0b8f29cee8a0
-rw-r--r--ChangeLog1
-rw-r--r--src/reader.c16
-rw-r--r--tests/test-bom.out1
-rw-r--r--tests/test-bom.ttl3
4 files changed, 20 insertions, 1 deletions
diff --git a/ChangeLog b/ChangeLog
index 4bc5a77c..5aee313c 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -28,6 +28,7 @@ serd (UNRELEASED) unstable; urgency=low
* Parse collections iteratively in O(1) space.
* Report read error if both "genid" and "docid" IDs are found in the same
document, to prevent silent merging of distinct blank nodes.
+ * Handle files and strings that start with a UTF-8 Byte Order Mark.
-- David Robillard <d@drobilla.net> (UNRELEASED)
diff --git a/src/reader.c b/src/reader.c
index c0cb522a..41e0fb7d 100644
--- a/src/reader.c
+++ b/src/reader.c
@@ -1438,6 +1438,15 @@ serd_reader_read_file(SerdReader* reader,
return ret;
}
+static void
+skip_bom(SerdReader* me)
+{
+ const uint8_t* const b = me->read_buf;
+ if (b[0] == 0xEF && b[1] == 0xBB && b[2] == 0xBF) {
+ me->read_head += 3;
+ }
+}
+
SERD_API
SerdStatus
serd_reader_read_file_handle(SerdReader* me, FILE* file, const uint8_t* name)
@@ -1452,7 +1461,11 @@ serd_reader_read_file_handle(SerdReader* me, FILE* file, const uint8_t* name)
memset(me->read_buf, '\0', SERD_PAGE_SIZE);
- const bool ret = !page(me) || read_turtleDoc(me);
+ bool ret = page(me);
+ if (ret) {
+ skip_bom(me);
+ ret = read_turtleDoc(me);
+ }
free(me->read_buf);
me->fd = 0;
@@ -1472,6 +1485,7 @@ serd_reader_read_string(SerdReader* me, const uint8_t* utf8)
me->from_file = false;
me->eof = false;
+ skip_bom(me);
const bool ret = read_turtleDoc(me);
me->read_buf = NULL;
diff --git a/tests/test-bom.out b/tests/test-bom.out
new file mode 100644
index 00000000..aea1655b
--- /dev/null
+++ b/tests/test-bom.out
@@ -0,0 +1 @@
+<http://example.org/thing> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/Thing> .
diff --git a/tests/test-bom.ttl b/tests/test-bom.ttl
new file mode 100644
index 00000000..8d6534e1
--- /dev/null
+++ b/tests/test-bom.ttl
@@ -0,0 +1,3 @@
+# This file starts with a UTF-8 Byte Order Mark
+
+<http://example.org/thing> a <http://example.org/Thing> . \ No newline at end of file