// Copyright 2011-2022 David Robillard // SPDX-License-Identifier: ISC #ifndef SERD_READER_H #define SERD_READER_H #include "serd/attributes.h" #include "serd/env.h" #include "serd/input_stream.h" #include "serd/node.h" #include "serd/sink.h" #include "serd/status.h" #include "serd/syntax.h" #include "serd/world.h" #include "zix/attributes.h" #include #include SERD_BEGIN_DECLS /** @defgroup serd_reader Reader @ingroup serd_reading_writing @{ */ /// Streaming parser that reads a text stream and writes to a statement sink typedef struct SerdReaderImpl SerdReader; /// Reader options typedef enum { /** Tolerate invalid input where possible. This will attempt to ignore invalid input and continue reading. Invalid Unicode characters will be replaced with the replacement character, and various other syntactic problems will be ignored. If there are more severe problems, the reader will try to skip the statement and continue parsing. This should work reasonably well for line-based syntaxes like NTriples and NQuads, but abbreviated Turtle or TriG may not recover. Note that this flag should be used carefully, since it can result in data loss. */ SERD_READ_LAX = 1U << 0U, /** Support reading variable nodes. As an extension, serd supports reading variables nodes with SPARQL-like syntax, for example "?foo" or "$bar". This can be used for storing graph patterns and templates. */ SERD_READ_VARIABLES = 1U << 1U, /** Read URIs and blank node labels exactly. Normally, the reader expands all relative URIs, and may adjust blank node labels to avoid clashing with generated ones. This flag disables all of this processing, so that URI references and blank nodes are passed to the sink exactly as they are in the input. Note that this does not apply to CURIEs, since serd deliberately does not have a way to represent CURIE nodes. A bad namespace prefix is considered a syntax error. */ SERD_READ_VERBATIM = 1U << 2U, } SerdReaderFlag; /// Bitwise OR of SerdReaderFlag values typedef uint32_t SerdReaderFlags; /// Create a new RDF reader SERD_API SerdReader* ZIX_ALLOCATED serd_reader_new(SerdWorld* ZIX_NONNULL world, SerdSyntax syntax, SerdReaderFlags flags, SerdEnv* ZIX_NONNULL env, const SerdSink* ZIX_NONNULL sink); /** Set a prefix to be added to all blank node identifiers. This is useful when multiple files are to be parsed into the same output (a model or a file). Since Serd preserves blank node IDs, this could cause conflicts where two non-equivalent blank nodes are merged, resulting in corrupt data. By setting a unique blank node prefix for each parsed file, this can be avoided, while preserving blank node names. */ SERD_API void serd_reader_add_blank_prefix(SerdReader* ZIX_NONNULL reader, const char* ZIX_NULLABLE prefix); /** Prepare to read some input. This sets up the reader to read from the given input, but will not read any bytes from it. This should be followed by serd_reader_read_chunk() or serd_reader_read_document() to actually read the input. @param reader The reader. @param input An opened input stream to read from. @param input_name The name of the input stream for error messages. @param block_size The number of bytes to read from the stream at once. */ SERD_API SerdStatus serd_reader_start(SerdReader* ZIX_NONNULL reader, SerdInputStream* ZIX_NONNULL input, const SerdNode* ZIX_NULLABLE input_name, size_t block_size); /** Read a single "chunk" of data during an incremental read. This function will read a single top level description, and return. This may be a directive, statement, or several statements; essentially it reads until a '.' is encountered. This is particularly useful for reading directly from a pipe or socket. */ SERD_API SerdStatus serd_reader_read_chunk(SerdReader* ZIX_NONNULL reader); /** Read a complete document from the source. This function will continue pulling from the source until a complete document has been read. Note that this may block when used with streams, for incremental reading use serd_reader_read_chunk(). */ SERD_API SerdStatus serd_reader_read_document(SerdReader* ZIX_NONNULL reader); /** Finish reading from the source. This should be called before starting to read from another source. */ SERD_API SerdStatus serd_reader_finish(SerdReader* ZIX_NONNULL reader); /** Skip over bytes in the input until a specific byte is encountered. Typically used for recording from errors in a line-based syntax by skipping ahead to the next newline. @return #SERD_SUCCESS if the given byte was reached, or #SERD_FAILURE if the end of input is reached. */ SERD_API SerdStatus serd_reader_skip_until_byte(SerdReader* ZIX_NONNULL reader, uint8_t byte); /** Free `reader`. The reader will be finished via `serd_reader_finish()` if necessary. */ SERD_API void serd_reader_free(SerdReader* ZIX_NULLABLE reader); /** @} */ SERD_END_DECLS #endif // SERD_READER_H