/* Copyright 2011-2021 David Robillard Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above copyright notice and this permission notice appear in all copies. THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #ifndef SERD_READ_NTRIPLES_H #define SERD_READ_NTRIPLES_H #include "serd/serd.h" #include #include // Utilities static inline bool codepoint_in_range(const uint32_t c, const uint32_t min, const uint32_t max) { return c >= min && c <= max; } static inline bool is_PN_CHARS_BASE(const uint32_t c) { return (codepoint_in_range(c, 'A', 'Z') || codepoint_in_range(c, 'a', 'z') || codepoint_in_range(c, 0x000C0u, 0x000D6u) || codepoint_in_range(c, 0x000D8u, 0x000F6u) || codepoint_in_range(c, 0x000F8u, 0x002FFu) || codepoint_in_range(c, 0x00370u, 0x0037Du) || codepoint_in_range(c, 0x0037Fu, 0x01FFFu) || codepoint_in_range(c, 0x0200Cu, 0x0200Du) || codepoint_in_range(c, 0x02070u, 0x0218Fu) || codepoint_in_range(c, 0x02C00u, 0x02FEFu) || codepoint_in_range(c, 0x03001u, 0x0D7FFu) || codepoint_in_range(c, 0x0F900u, 0x0FDCFu) || codepoint_in_range(c, 0x0FDF0u, 0x0FFFDu) || codepoint_in_range(c, 0x10000u, 0xEFFFFu)); } /** Read one (possibly multi-byte) character (possibly multi-byte). The caller must have already eaten the first byte, `c`. */ SerdStatus read_character(SerdReader* reader, SerdNode* dest, uint8_t c); // Terminals /** Read a language tag starting after the '@'. RDF 1.1 NTriples: [144s] LANGTAG */ SerdStatus read_LANGTAG(SerdReader* reader); /** Read an end of line. RDF 1.1 NTriples: [7] EOL */ SerdStatus read_EOL(SerdReader* reader); /** Read an IRI reference suffix into an existing node. RDF 1.1 NTriples: [8] IRIREF */ SerdStatus read_IRIREF_suffix(SerdReader* reader, SerdNode* node); /** Read a string that is single-quoted with the given character. RDF 1.1 NTriples: [9] STRING_LITERAL_QUOTE RDF 1.1 Turtle: [23] STRING_LITERAL_SINGLE_QUOTE */ SerdStatus read_STRING_LITERAL(SerdReader* reader, SerdNode* ref, uint8_t q); /** Read a blank node label that comes after "_:". RDF 1.1 NTriples: [141s] BLANK_NODE_LABEL */ SerdStatus read_BLANK_NODE_LABEL(SerdReader* reader, SerdNode** dest, bool* ate_dot); /** Read an escape like "u201C", starting after the initial backslash. RDF 1.1 NTriples: [10] UCHAR */ SerdStatus read_UCHAR(SerdReader* reader, SerdNode* node, uint32_t* code_point); /** Read an escape like "n", starting after the initial backslash. RDF 1.1 NTriples: [153s] ECHAR */ SerdStatus read_ECHAR(SerdReader* reader, SerdNode* dest); /** Read a basic prefixed name character. RDF 1.1 NTriples: [157s] PN_CHARS_BASE */ SerdStatus read_PN_CHARS_BASE(SerdReader* reader, SerdNode* dest); /** Read an initial prefixed name character. RDF 1.1 NTriples: [158s] PN_CHARS_U */ SerdStatus read_PN_CHARS_U(SerdReader* reader, SerdNode* dest); /** Read any prefixed name character. RDF 1.1 NTriples: [160s] PN_CHARS */ SerdStatus read_PN_CHARS(SerdReader* reader, SerdNode* dest); /** Read a single hexadecimal digit. RDF 1.1 NTriples: [162s] HEX */ uint8_t read_HEX(SerdReader* reader); /** Read a variable name, starting after the '?' or '$'. This is an extension that serd uses in certain contexts to support patterns. Restricted version of SPARQL 1.1: [166] VARNAME */ SerdStatus read_VARNAME(SerdReader* reader, SerdNode** dest); // Nonterminals /** Read a comment that starts with '#' and ends with the line. Not described by a rule in the grammar since RDF 1.1. */ SerdStatus read_comment(SerdReader* reader); /** Read a subject (IRI or blank). RDF 1.1 NTriples: [3] subject */ SerdStatus read_nt_subject(SerdReader* reader, SerdNode** dest); /** Read a predicate (IRI). RDF 1.1 NTriples: [4] predicate */ SerdStatus read_nt_predicate(SerdReader* reader, SerdNode** dest); /** Read an object (IRI or blank or literal). RDF 1.1 NTriples: [5] object */ SerdStatus read_nt_object(SerdReader* reader, SerdNode** dest, bool* ate_dot); /** Read a variable that starts with '?' or '$'. This is an extension that serd uses in certain contexts to support patterns. Restricted version of SPARQL 1.1: [108] Var */ SerdStatus read_Var(SerdReader* reader, SerdNode** dest); /** Read a complete NTriples document. RDF 1.1 NTriples: [1] ntriplesDoc */ SerdStatus read_ntriplesDoc(SerdReader* reader); #endif // SERD_READ_NTRIPLES_H