// Copyright 2011-2022 David Robillard // SPDX-License-Identifier: ISC #ifndef SERD_READER_H #define SERD_READER_H #include "serd/attributes.h" #include "serd/node.h" #include "serd/sink.h" #include "serd/status.h" #include "serd/stream.h" #include "serd/syntax.h" #include "serd/world.h" #include #include #include SERD_BEGIN_DECLS /** @defgroup serd_reader Reader @ingroup serd_reading_writing @{ */ /// Streaming parser that reads a text stream and writes to a statement sink typedef struct SerdReaderImpl SerdReader; /// Create a new RDF reader SERD_API SerdReader* SERD_ALLOCATED serd_reader_new(SerdWorld* SERD_NONNULL world, SerdSyntax syntax, const SerdSink* SERD_NONNULL sink, size_t stack_size); /** Enable or disable strict parsing. The reader is non-strict (lax) by default, which will tolerate URIs with invalid characters. Setting strict will fail when parsing such files. An error is printed for invalid input in either case. */ SERD_API void serd_reader_set_strict(SerdReader* SERD_NONNULL reader, bool strict); /** Set a prefix to be added to all blank node identifiers. This is useful when multiple files are to be parsed into the same output (a model or a file). Since Serd preserves blank node IDs, this could cause conflicts where two non-equivalent blank nodes are merged, resulting in corrupt data. By setting a unique blank node prefix for each parsed file, this can be avoided, while preserving blank node names. */ SERD_API void serd_reader_add_blank_prefix(SerdReader* SERD_NONNULL reader, const char* SERD_NULLABLE prefix); /** Set the URI of the default graph. If this is set, the reader will emit quads with the graph set to the given node for any statements that are not in a named graph (which is currently all of them since Serd currently does not support any graph syntaxes). */ SERD_API void serd_reader_set_default_graph(SerdReader* SERD_NONNULL reader, const SerdNode* SERD_NULLABLE graph); /// Prepare to read from the file at a local file `uri` SERD_API SerdStatus serd_reader_start_file(SerdReader* SERD_NONNULL reader, const char* SERD_NONNULL uri, bool bulk); /** Prepare to read from a stream. The `read_func` is guaranteed to only be called for `page_size` elements with size 1 (i.e. `page_size` bytes). */ SERD_API SerdStatus serd_reader_start_stream(SerdReader* SERD_NONNULL reader, SerdReadFunc SERD_NONNULL read_func, SerdStreamErrorFunc SERD_NONNULL error_func, void* SERD_NONNULL stream, const char* SERD_NULLABLE name, size_t page_size); /// Prepare to read from a string SERD_API SerdStatus serd_reader_start_string(SerdReader* SERD_NONNULL reader, const char* SERD_NONNULL utf8); /** Read a single "chunk" of data during an incremental read. This function will read a single top level description, and return. This may be a directive, statement, or several statements; essentially it reads until a '.' is encountered. This is particularly useful for reading directly from a pipe or socket. */ SERD_API SerdStatus serd_reader_read_chunk(SerdReader* SERD_NONNULL reader); /** Read a complete document from the source. This function will continue pulling from the source until a complete document has been read. Note that this may block when used with streams, for incremental reading use serd_reader_read_chunk(). */ SERD_API SerdStatus serd_reader_read_document(SerdReader* SERD_NONNULL reader); /** Finish reading from the source. This should be called before starting to read from another source. */ SERD_API SerdStatus serd_reader_finish(SerdReader* SERD_NONNULL reader); /** Skip over bytes in the input until a specific byte is encountered. Typically used for recording from errors in a line-based syntax by skipping ahead to the next newline. @return #SERD_SUCCESS if the given byte was reached, or #SERD_FAILURE if the end of input is reached. */ SERD_API SerdStatus serd_reader_skip_until_byte(SerdReader* SERD_NONNULL reader, uint8_t byte); /** Free `reader`. The reader will be finished via `serd_reader_finish()` if necessary. */ SERD_API void serd_reader_free(SerdReader* SERD_NULLABLE reader); /** @} */ SERD_END_DECLS #endif // SERD_READER_H