aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog1
-rw-r--r--src/reader.c16
-rw-r--r--tests/test-bom.out1
-rw-r--r--tests/test-bom.ttl3
4 files changed, 20 insertions, 1 deletions
diff --git a/ChangeLog b/ChangeLog
index 4bc5a77c..5aee313c 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -28,6 +28,7 @@ serd (UNRELEASED) unstable; urgency=low
* Parse collections iteratively in O(1) space.
* Report read error if both "genid" and "docid" IDs are found in the same
document, to prevent silent merging of distinct blank nodes.
+ * Handle files and strings that start with a UTF-8 Byte Order Mark.
-- David Robillard <d@drobilla.net> (UNRELEASED)
diff --git a/src/reader.c b/src/reader.c
index c0cb522a..41e0fb7d 100644
--- a/src/reader.c
+++ b/src/reader.c
@@ -1438,6 +1438,15 @@ serd_reader_read_file(SerdReader* reader,
return ret;
}
+static void
+skip_bom(SerdReader* me)
+{
+ const uint8_t* const b = me->read_buf;
+ if (b[0] == 0xEF && b[1] == 0xBB && b[2] == 0xBF) {
+ me->read_head += 3;
+ }
+}
+
SERD_API
SerdStatus
serd_reader_read_file_handle(SerdReader* me, FILE* file, const uint8_t* name)
@@ -1452,7 +1461,11 @@ serd_reader_read_file_handle(SerdReader* me, FILE* file, const uint8_t* name)
memset(me->read_buf, '\0', SERD_PAGE_SIZE);
- const bool ret = !page(me) || read_turtleDoc(me);
+ bool ret = page(me);
+ if (ret) {
+ skip_bom(me);
+ ret = read_turtleDoc(me);
+ }
free(me->read_buf);
me->fd = 0;
@@ -1472,6 +1485,7 @@ serd_reader_read_string(SerdReader* me, const uint8_t* utf8)
me->from_file = false;
me->eof = false;
+ skip_bom(me);
const bool ret = read_turtleDoc(me);
me->read_buf = NULL;
diff --git a/tests/test-bom.out b/tests/test-bom.out
new file mode 100644
index 00000000..aea1655b
--- /dev/null
+++ b/tests/test-bom.out
@@ -0,0 +1 @@
+<http://example.org/thing> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/Thing> .
diff --git a/tests/test-bom.ttl b/tests/test-bom.ttl
new file mode 100644
index 00000000..8d6534e1
--- /dev/null
+++ b/tests/test-bom.ttl
@@ -0,0 +1,3 @@
+# This file starts with a UTF-8 Byte Order Mark
+
+<http://example.org/thing> a <http://example.org/Thing> . \ No newline at end of file