diff options
-rw-r--r-- | serd/serd.h | 18 | ||||
-rw-r--r-- | src/node.c | 38 | ||||
-rw-r--r-- | src/reader.c | 22 | ||||
-rw-r--r-- | src/serd_internal.h | 32 |
4 files changed, 61 insertions, 49 deletions
diff --git a/serd/serd.h b/serd/serd.h index 1e7da13c..5c802a79 100644 --- a/serd/serd.h +++ b/serd/serd.h @@ -191,13 +191,18 @@ typedef enum { } SerdNodeFlag; /** + Bitwise OR of SerdNodeFlag values. +*/ +typedef uint32_t SerdNodeFlags; + +/** A syntactic RDF node. */ typedef struct { const uint8_t* buf; /**< Value string */ size_t n_bytes; /**< Size in bytes (including null) */ size_t n_chars; /**< Length in characters */ - uint32_t flags; /**< Bitwise OR of SerdNodeFlag values */ + SerdNodeFlags flags; /**< String properties */ SerdType type; /**< Node type */ } SerdNode; @@ -241,6 +246,17 @@ typedef enum { } SerdStyle; /** + UTF-8 strlen. + @return Length of @c str in characters. + @param str A null-terminated UTF-8 string. + @param n_bytes (Output) Set to the size of @a str in bytes (incl. NULL). + @param flags (Output) Set to the applicable flags. +*/ +SERD_API +size_t +serd_strlen(const uint8_t* str, size_t* n_bytes, SerdNodeFlags* flags); + +/** @name URI @{ */ @@ -20,6 +20,32 @@ #include "serd_internal.h" SERD_API +size_t +serd_strlen(const uint8_t* str, size_t* n_bytes, SerdNodeFlags* flags) +{ + size_t n_chars = 0; + size_t i = 0; + for (; str[i]; ++i) { + if ((str[i] & 0xC0) != 0x80) { + // Does not start with `10', start of a new character + ++n_chars; + switch (str[i]) { + case '\r': + case '\n': + *flags |= SERD_HAS_NEWLINE; + break; + case '"': + *flags |= SERD_HAS_QUOTE; + } + } + } + if (n_bytes) { + *n_bytes = i + 1; + } + return n_chars; +} + +SERD_API SerdNode serd_node_from_string(SerdType type, const uint8_t* buf) { @@ -34,12 +60,14 @@ SERD_API SerdNode serd_node_copy(const SerdNode* node) { - SerdNode copy = *node; - if (node->buf) { - uint8_t* buf = malloc(copy.n_bytes); - memcpy(buf, node->buf, copy.n_bytes); - copy.buf = buf; + if (!node || !node->buf) { + return SERD_NODE_NULL; } + + SerdNode copy = *node; + uint8_t* buf = malloc(copy.n_bytes); + memcpy(buf, node->buf, copy.n_bytes); + copy.buf = buf; return copy; } diff --git a/src/reader.c b/src/reader.c index b0ad02e5..e956c3df 100644 --- a/src/reader.c +++ b/src/reader.c @@ -390,7 +390,7 @@ read_character_escape(SerdReader* reader, Ref dest) } static inline bool -read_echaracter_escape(SerdReader* reader, Ref dest, uint32_t* flags) +read_echaracter_escape(SerdReader* reader, Ref dest, SerdNodeFlags* flags) { switch (peek_byte(reader)) { case 't': @@ -413,7 +413,7 @@ read_echaracter_escape(SerdReader* reader, Ref dest, uint32_t* flags) } static inline bool -read_scharacter_escape(SerdReader* reader, Ref dest, uint32_t* flags) +read_scharacter_escape(SerdReader* reader, Ref dest, SerdNodeFlags* flags) { switch (peek_byte(reader)) { case '"': @@ -428,7 +428,7 @@ read_scharacter_escape(SerdReader* reader, Ref dest, uint32_t* flags) static inline bool read_ucharacter_escape(SerdReader* reader, Ref dest) { - uint32_t flags = 0; + SerdNodeFlags flags = 0; switch (peek_byte(reader)) { case '>': push_byte(reader, dest, eat_byte(reader, '>')); @@ -482,8 +482,8 @@ read_character(SerdReader* reader, Ref dest) static inline SerdStatus read_echaracter(SerdReader* reader, Ref dest) { - uint32_t flags = 0; - uint8_t c = peek_byte(reader); + SerdNodeFlags flags = 0; + uint8_t c = peek_byte(reader); switch (c) { case '\\': eat_byte(reader, '\\'); @@ -500,7 +500,7 @@ read_echaracter(SerdReader* reader, Ref dest) // [43] lcharacter ::= echaracter | '\"' | #x9 | #xA | #xD static inline SerdStatus -read_lcharacter(SerdReader* reader, Ref dest, uint32_t* flags) +read_lcharacter(SerdReader* reader, Ref dest, SerdNodeFlags* flags) { const uint8_t c = peek_byte(reader); uint8_t pre[3]; @@ -537,7 +537,7 @@ read_lcharacter(SerdReader* reader, Ref dest, uint32_t* flags) // [42] scharacter ::= ( echaracter - #x22 ) | '\"' static inline SerdStatus -read_scharacter(SerdReader* reader, Ref dest, uint32_t* flags) +read_scharacter(SerdReader* reader, Ref dest, SerdNodeFlags* flags) { uint8_t c = peek_byte(reader); switch (c) { @@ -621,7 +621,7 @@ read_ws_plus(SerdReader* reader) // [37] longString ::= #x22 #x22 #x22 lcharacter* #x22 #x22 #x22 static Ref -read_longString(SerdReader* reader, uint32_t* flags) +read_longString(SerdReader* reader, SerdNodeFlags* flags) { eat_string(reader, "\"\"\"", 3); Ref str = push_string(reader, "", 1); @@ -636,7 +636,7 @@ read_longString(SerdReader* reader, uint32_t* flags) // [36] string ::= #x22 scharacter* #x22 static Ref -read_string(SerdReader* reader, uint32_t* flags) +read_string(SerdReader* reader, SerdNodeFlags* flags) { eat_byte(reader, '\"'); Ref str = push_string(reader, "", 1); @@ -652,7 +652,7 @@ read_string(SerdReader* reader, uint32_t* flags) // [35] quotedString ::= string | longString static Ref -read_quotedString(SerdReader* reader, uint32_t* flags) +read_quotedString(SerdReader* reader, SerdNodeFlags* flags) { uint8_t pre[3]; peek_string(reader, pre, 3); @@ -903,7 +903,7 @@ read_resource(SerdReader* reader, Node* dest) // | integer | double | decimal | boolean static bool read_literal(SerdReader* reader, Node* dest, - Node* datatype, Ref* lang, uint32_t* flags) + Node* datatype, Ref* lang, SerdNodeFlags* flags) { Ref str = 0; const uint8_t c = peek_byte(reader); diff --git a/src/serd_internal.h b/src/serd_internal.h index e573a806..6c00fea1 100644 --- a/src/serd_internal.h +++ b/src/serd_internal.h @@ -98,36 +98,4 @@ is_digit(const uint8_t c) return in_range(c, '0', '9'); } -/** - UTF-8 strlen. - @return Length of @c str in characters. - @param str A null-terminated UTF-8 string. - @param n_bytes (Output) Set to the size of @a str in bytes (incl. NULL). -*/ -static inline size_t -serd_strlen(const uint8_t* str, size_t* n_bytes, uint32_t* flags) -{ - size_t n_chars = 0; - size_t i = 0; - for (; str[i]; ++i) { - if ((str[i] & 0xC0) != 0x80) { - // Does not start with `10', start of a new character - ++n_chars; - switch (str[i]) { - case '\r': - case '\n': - *flags |= SERD_HAS_NEWLINE; - break; - case '"': - *flags |= SERD_HAS_QUOTE; - } - } - } - if (n_bytes) { - *n_bytes = i + 1; - } - return n_chars; -} - - #endif // SERD_INTERNAL_H |