From d22653dfe356e3da1354cdb0f7915e29c4a33e3b Mon Sep 17 00:00:00 2001 From: David Robillard Date: Fri, 31 Mar 2023 10:50:12 -0400 Subject: Factor out NTriples reader --- src/read_ntriples.h | 186 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 186 insertions(+) create mode 100644 src/read_ntriples.h (limited to 'src/read_ntriples.h') diff --git a/src/read_ntriples.h b/src/read_ntriples.h new file mode 100644 index 00000000..bc76fed6 --- /dev/null +++ b/src/read_ntriples.h @@ -0,0 +1,186 @@ +// Copyright 2011-2021 David Robillard +// SPDX-License-Identifier: ISC + +#ifndef SERD_SRC_READ_NTRIPLES_H +#define SERD_SRC_READ_NTRIPLES_H + +#include "serd/node.h" +#include "serd/reader.h" +#include "serd/status.h" + +#include +#include + +// Utilities + +/** + Return true if the codepoint `c` is a valid PN_CHARS_BASE character. + + RDF 1.1 NTriples: [157s] PN_CHARS_BASE +*/ +bool +is_PN_CHARS_BASE(uint32_t c); + +/** + Read one (possibly multi-byte) character. + + The caller must have already eaten the first byte, `c`. +*/ +SerdStatus +read_character(SerdReader* reader, SerdNode* dest, uint8_t c); + +/** + Read one string literal escape. + + The caller must have already eaten the first byte, a backslash. +*/ +SerdStatus +read_string_escape(SerdReader* reader, SerdNode* ref); + +// Terminals + +/** + Read a language tag starting after the '@'. + + RDF 1.1 NTriples: [144s] LANGTAG +*/ +SerdStatus +read_LANGTAG(SerdReader* reader); + +/** + Read an end of line. + + RDF 1.1 NTriples: [7] EOL +*/ +SerdStatus +read_EOL(SerdReader* reader); + +/** + Read an absolute IRI. + + This is a stricter subset of [8] IRIREF in the NTriples grammar, since a + scheme is required. Handling this in the parser results in better error + messages. +*/ +SerdStatus +read_IRI(SerdReader* reader, SerdNode** dest); + +/** + Read an IRI reference suffix into an existing node. + + RDF 1.1 NTriples: [8] IRIREF +*/ +SerdStatus +read_IRIREF_suffix(SerdReader* reader, SerdNode* node); + +/** + Read a string that is single-quoted with the given character. + + RDF 1.1 NTriples: [9] STRING_LITERAL_QUOTE + RDF 1.1 Turtle: [23] STRING_LITERAL_SINGLE_QUOTE +*/ +SerdStatus +read_STRING_LITERAL(SerdReader* reader, SerdNode* ref, uint8_t q); + +/** + Read a blank node label that comes after "_:". + + RDF 1.1 NTriples: [141s] BLANK_NODE_LABEL +*/ +SerdStatus +read_BLANK_NODE_LABEL(SerdReader* reader, SerdNode** dest, bool* ate_dot); + +/** + Read an escape like "u201C", starting after the initial backslash. + + RDF 1.1 NTriples: [10] UCHAR +*/ +SerdStatus +read_UCHAR(SerdReader* reader, SerdNode* node, uint32_t* code_point); + +/** + Read an escape like "n", starting after the initial backslash. + + RDF 1.1 NTriples: [153s] ECHAR +*/ +SerdStatus +read_ECHAR(SerdReader* reader, SerdNode* dest); + +/** + Read a basic prefixed name character. + + RDF 1.1 NTriples: [157s] PN_CHARS_BASE +*/ +SerdStatus +read_PN_CHARS_BASE(SerdReader* reader, SerdNode* dest); + +/** + Read any prefixed name character. + + RDF 1.1 NTriples: [160s] PN_CHARS +*/ +SerdStatus +read_PN_CHARS(SerdReader* reader, SerdNode* dest); + +/** + Read a single hexadecimal digit. + + RDF 1.1 NTriples: [162s] HEX +*/ +uint8_t +read_HEX(SerdReader* reader); + +// Nonterminals + +/** + Read a comment that starts with '#' and ends with the line. + + Not described by a rule in the grammar since RDF 1.1. +*/ +SerdStatus +read_comment(SerdReader* reader); + +/** + Read a subject (IRI or blank). + + RDF 1.1 NTriples: [3] subject +*/ +SerdStatus +read_nt_subject(SerdReader* reader, SerdNode** dest); + +/** + Read a predicate (IRI). + + RDF 1.1 NTriples: [4] predicate +*/ +SerdStatus +read_nt_predicate(SerdReader* reader, SerdNode** dest); + +/** + Read an object (IRI or blank or literal). + + RDF 1.1 NTriples: [5] object +*/ +SerdStatus +read_nt_object(SerdReader* reader, SerdNode** dest, bool* ate_dot); + +/** + Read a variable that starts with '?' or '$'. + + This is an extension that serd uses in certain contexts to support + patterns. + + Restricted version of SPARQL 1.1: [108] Var +*/ +SerdStatus +read_Var(SerdReader* reader, SerdNode** dest); + +/** + Read a complete NTriples document. + + RDF 1.1 NTriples: [1] ntriplesDoc +*/ +SerdStatus +read_ntriplesDoc(SerdReader* reader); + +#endif // SERD_SRC_READ_NTRIPLES_H -- cgit v1.2.1