From 67bcd63bda9d7b095489a09b9880aa730ddb5488 Mon Sep 17 00:00:00 2001 From: David Robillard Date: Fri, 7 Oct 2022 12:07:51 -0400 Subject: Port sord_validate to pcre2 --- src/sord_config.h | 14 ++++++------ src/sord_validate.c | 61 ++++++++++++++++++++++++++++++++--------------------- 2 files changed, 44 insertions(+), 31 deletions(-) (limited to 'src') diff --git a/src/sord_config.h b/src/sord_config.h index 7853b4a..5b7a341 100644 --- a/src/sord_config.h +++ b/src/sord_config.h @@ -20,11 +20,11 @@ #if !defined(SORD_NO_DEFAULT_CONFIG) -// The validator uses PCRE for literal pattern matching -# ifndef HAVE_PCRE +// The validator uses PCRE2 for literal pattern matching +# ifndef HAVE_PCRE2 # ifdef __has_include -# if __has_include() -# define HAVE_PCRE 1 +# if __has_include() +# define HAVE_PCRE2 1 # endif # endif # endif @@ -39,10 +39,10 @@ if the build system defines them all. */ -#ifdef HAVE_PCRE -# define USE_PCRE 1 +#ifdef HAVE_PCRE2 +# define USE_PCRE2 1 #else -# define USE_PCRE 0 +# define USE_PCRE2 0 #endif #endif // SORD_CONFIG_H diff --git a/src/sord_validate.c b/src/sord_validate.c index d899eb6..6d40138 100644 --- a/src/sord_validate.c +++ b/src/sord_validate.c @@ -8,8 +8,9 @@ #include "sord/sord.h" #include "sord_config.h" -#if USE_PCRE -# include +#if USE_PCRE2 +# define PCRE2_CODE_UNIT_WIDTH 8 +# include #endif #ifdef _WIN32 @@ -176,31 +177,43 @@ is_descendant_of(SordModel* model, } static bool -regexp_match(const uint8_t* pat, const char* str) +regexp_match(const uint8_t* const pattern, const char* const str) { -#if USE_PCRE - // Append a $ to the pattern so we only match if the entire string matches - const size_t len = strlen((const char*)pat); - char* const regx = (char*)malloc(len + 2); - memcpy(regx, pat, len); - regx[len] = '$'; - regx[len + 1] = '\0'; - - const char* err; - int erroffset; - pcre* re = pcre_compile(regx, PCRE_ANCHORED, &err, &erroffset, NULL); - free(regx); +#if USE_PCRE2 + static const uint32_t options = PCRE2_ANCHORED | PCRE2_ENDANCHORED; + + int err = 0; + size_t erroffset = 0U; + + pcre2_code* const re = pcre2_compile( + pattern, PCRE2_ZERO_TERMINATED, options, &err, &erroffset, NULL); + if (!re) { - fprintf( - stderr, "Error in pattern `%s' at offset %d (%s)\n", pat, erroffset, err); + fprintf(stderr, + "Error in pattern `%s' at offset %lu (%d)\n", + pattern, + erroffset, + err); return false; } - const bool ret = - pcre_exec(re, NULL, str, (int)strlen(str), 0, 0, NULL, 0) >= 0; - pcre_free(re); - return ret; -#endif // USE_PCRE + pcre2_match_data* const match_data = + pcre2_match_data_create_from_pattern(re, NULL); + + const int rc = pcre2_match(re, + (const uint8_t*)str, + PCRE2_ZERO_TERMINATED, + 0, + options, + match_data, + NULL); + + pcre2_match_data_free(match_data); + + pcre2_code_free(re); + return rc > 0; +#endif // USE_PCRE2 + return true; } @@ -776,8 +789,8 @@ main(int argc, char** argv) URI(xsd, pattern); URI(xsd, string); -#if !USE_PCRE - fprintf(stderr, "warning: Built without PCRE, datatypes not checked.\n"); +#if !USE_PCRE2 + fprintf(stderr, "warning: Built without PCRE2, datatypes not checked.\n"); #endif const int prop_st = check_properties(model, &uris); -- cgit v1.2.1