diff options
author | David Robillard <d@drobilla.net> | 2022-10-23 13:41:15 -0400 |
---|---|---|
committer | David Robillard <d@drobilla.net> | 2022-10-23 13:46:58 -0400 |
commit | c886d489576cd0bc33d7d22d81981c794067946f (patch) | |
tree | f43c8d872401ed80b37f974516d9e9f2fee9d765 /src | |
parent | f95a698b94069ed03f6a8f2d0f7eb089d66c91ef (diff) | |
download | zix-c886d489576cd0bc33d7d22d81981c794067946f.tar.gz zix-c886d489576cd0bc33d7d22d81981c794067946f.tar.bz2 zix-c886d489576cd0bc33d7d22d81981c794067946f.zip |
Add path API
Diffstat (limited to 'src')
-rw-r--r-- | src/index_range.h | 28 | ||||
-rw-r--r-- | src/path.c | 712 | ||||
-rw-r--r-- | src/path_iter.h | 31 |
3 files changed, 771 insertions, 0 deletions
diff --git a/src/index_range.h b/src/index_range.h new file mode 100644 index 0000000..1e06e90 --- /dev/null +++ b/src/index_range.h @@ -0,0 +1,28 @@ +// Copyright 2022 David Robillard <d@drobilla.net> +// SPDX-License-Identifier: ISC + +#ifndef ZIX_INDEX_RANGE_H +#define ZIX_INDEX_RANGE_H + +#include <stdbool.h> +#include <stddef.h> + +typedef struct { + size_t begin; ///< Index to the first character + size_t end; ///< Index one past the last character +} ZixIndexRange; + +static inline ZixIndexRange +zix_make_range(const size_t begin, const size_t end) +{ + const ZixIndexRange result = {begin, end}; + return result; +} + +static inline bool +zix_is_empty_range(const ZixIndexRange range) +{ + return range.begin == range.end; +} + +#endif // ZIX_INDEX_RANGE_H diff --git a/src/path.c b/src/path.c new file mode 100644 index 0000000..aa787b3 --- /dev/null +++ b/src/path.c @@ -0,0 +1,712 @@ +// Copyright 2007-2022 David Robillard <d@drobilla.net> +// SPDX-License-Identifier: ISC + +#include "zix/path.h" + +#include "index_range.h" +#include "path_iter.h" + +#include "zix/string_view.h" + +#include <stdbool.h> +#include <stddef.h> +#include <string.h> + +static const ZixIndexRange two_range = {0U, 2U}; +static const ZixIndexRange one_range = {0U, 1U}; + +#ifdef _WIN32 + +# define ZIX_DIR_SEP '\\' // Backslash is preferred on Windows + +static inline bool +is_dir_sep(const char c) +{ + return c == '/' || c == '\\'; +} + +static inline bool +is_any_sep(const char c) +{ + return c == '/' || c == ':' || c == '\\'; +} + +static bool +is_root_name_char(const char c) +{ + return c && !is_dir_sep(c); +} + +static ZixIndexRange +zix_path_root_name_range(const char* const path) +{ + ZixIndexRange result = {0U, 0U}; + + if (path) { + if (((path[0] >= 'A' && path[0] <= 'Z') || + (path[0] >= 'a' && path[0] <= 'z')) && + path[1] == ':') { + // A root that starts with a letter then ':' has a name like "C:" + result.end = 2U; + + } else if (is_dir_sep(path[0]) && is_dir_sep(path[1]) && + is_root_name_char(path[2])) { + // A root that starts with two separators has a name like "\\host" + result.end = 2U; + while (is_root_name_char(path[result.end])) { + ++result.end; + } + } + } + + return result; +} + +#else + +# define ZIX_DIR_SEP '/' // Slash is the only separator on other platforms + +static inline bool +is_dir_sep(const char c) +{ + return c == '/'; +} + +static inline bool +is_any_sep(const char c) +{ + return c == '/'; +} + +static inline ZixIndexRange +zix_path_root_name_range(const char* const path) +{ + (void)path; + return zix_make_range(0U, 0U); +} + +#endif + +typedef struct { + ZixIndexRange name; + ZixIndexRange dir; +} ZixRootSlices; + +static ZixRootSlices +zix_path_root_slices(const char* const path) +{ + // A root name not trailed by a separator has no root directory + const ZixIndexRange name = zix_path_root_name_range(path); + const size_t dir_len = (size_t)(path && is_dir_sep(path[name.end])); + if (!dir_len) { + const ZixRootSlices result = {name, {name.end, name.end}}; + return result; + } + + // Skip redundant root directory separators + ZixIndexRange dir = {name.end, name.end + dir_len}; + while (is_dir_sep(path[dir.end])) { + dir.begin = dir.end++; + } + + const ZixRootSlices result = {name, dir}; + return result; +} + +static bool +zix_string_ranges_equal(const char* const lhs, + const ZixIndexRange lhs_range, + const char* const rhs, + const ZixIndexRange rhs_range) +{ + const size_t lhs_len = lhs_range.end - lhs_range.begin; + const size_t rhs_len = rhs_range.end - rhs_range.begin; + + return lhs_len == rhs_len && + (lhs_len == 0 || + !strncmp(lhs + lhs_range.begin, rhs + rhs_range.begin, lhs_len)); +} + +static ZixIndexRange +zix_path_root_path_range(const char* const path) +{ + const ZixRootSlices root = zix_path_root_slices(path); + const size_t dir_len = (size_t)!zix_is_empty_range(root.dir); + + return zix_is_empty_range(root.name) + ? root.dir + : zix_make_range(root.name.begin, root.name.end + dir_len); +} + +static ZixIndexRange +zix_path_parent_path_range(const ZixStringView path) +{ + if (!path.length) { + return zix_make_range(0U, 0U); // Empty paths have no parent + } + + // Find the first relevant character (skip leading root path if any) + const ZixIndexRange root = zix_path_root_path_range(path.data); + const size_t p = root.begin; + + // Scan backwards starting at the last character + size_t l = path.length - 1U; + if (is_dir_sep(path.data[l])) { + // Rewind to the last relevant separator (skip trailing redundant ones) + while (l > p && is_dir_sep(path.data[l - 1U])) { + --l; + } + } else { + // Rewind to the last relevant character (skip trailing name) + while (l > p && !is_dir_sep(path.data[l])) { + --l; + } + } + + if (l <= root.end) { + return root; + } + + // Drop trailing separators + while (l > p && is_dir_sep(path.data[l])) { + --l; + } + + return zix_make_range(root.begin, root.begin + l + 1U - p); +} + +static ZixIndexRange +zix_path_filename_range(const ZixStringView path) +{ + // Find the first filename character (skip leading root path if any) + const size_t begin = zix_path_root_path_range(path.data).end; + if (begin == path.length || is_dir_sep(path.data[path.length - 1U])) { + return zix_make_range(0U, 0U); + } + + // Scan backwards to find the first character after the last separator + size_t f = path.length - 1U; + while (f > begin && !is_dir_sep(path.data[f - 1])) { + --f; + } + + return zix_make_range(f, path.length); +} + +static ZixIndexRange +zix_path_stem_range(const ZixStringView path) +{ + const ZixIndexRange name = zix_path_filename_range(path); + + ZixIndexRange stem = name; + if (!zix_is_empty_range(stem) && + !zix_string_ranges_equal(path.data, stem, ".", one_range) && + !zix_string_ranges_equal(path.data, stem, "..", two_range)) { + // Scan backwards to the last "." after the last directory separator + --stem.end; + while (stem.end > stem.begin && path.data[stem.end] != '.') { + --stem.end; + } + } + + return zix_is_empty_range(stem) ? name : stem; +} + +static ZixIndexRange +zix_path_extension_range(const ZixStringView path) +{ + const ZixIndexRange stem = zix_path_stem_range(path); + + return zix_is_empty_range(stem) + ? stem + : zix_make_range(stem.end, stem.end + path.length - stem.end); +} + +char* +zix_path_join(ZixAllocator* const allocator, + const char* const a, + const char* const b) +{ + const ZixStringView b_view = zix_optional_string(b); + if (!a || !a[0]) { + return zix_string_view_copy(allocator, b_view); + } + + const ZixStringView a_view = zix_optional_string(a); + const ZixRootSlices a_root = zix_path_root_slices(a); + const ZixRootSlices b_root = zix_path_root_slices(b); + +#ifdef _WIN32 + // If the RHS has a different root name, just copy it + if (b_root.name.end && + !zix_string_ranges_equal(a, a_root.name, b, b_root.name)) { + return zix_string_view_copy(allocator, b_view); + } +#endif + + // Determine how to join paths + const bool a_has_root_dir = !zix_is_empty_range(a_root.dir); + const bool a_has_filename = zix_path_has_filename(a_view.data); + size_t prefix_len = a_view.length; + bool add_sep = false; + if (!zix_is_empty_range(b_root.dir)) { + prefix_len = a_root.name.end; // Omit any path from a + } else if (a_has_filename || (!a_has_root_dir && zix_path_is_absolute(a))) { + add_sep = true; // Add a preferred separator after a + } + + const size_t path_len = prefix_len + (size_t)add_sep + b_view.length; + char* const path = (char*)zix_calloc(allocator, path_len + 1U, 1U); + + if (path) { + memcpy(path, a, prefix_len); + + size_t p = prefix_len; + if (add_sep) { + path[p++] = ZIX_DIR_SEP; + } + + if (b_view.length > b_root.name.end) { + memcpy(path + p, b + b_root.name.end, b_view.length - b_root.name.end); + path[p + b_view.length] = '\0'; + } + } + + return path; +} + +char* +zix_path_preferred(ZixAllocator* const allocator, const char* const path) +{ + const ZixStringView path_view = zix_string(path); + char* const result = (char*)zix_calloc(allocator, path_view.length + 1U, 1U); + + if (result) { + for (size_t i = 0U; i < path_view.length; ++i) { + result[i] = (char)(is_dir_sep(path[i]) ? ZIX_DIR_SEP : path[i]); + } + } + + return result; +} + +char* +zix_path_lexically_normal(ZixAllocator* const allocator, const char* const path) +{ + static const char sep = ZIX_DIR_SEP; + + /* Loosely following the normalization algorithm from + <https://en.cppreference.com/w/cpp/filesystem/path>, but in such a way + that only a single buffer (the result) is needed. This means that + dot-dot segments are relatively expensive, but it beats a stack in + everyday common cases. */ + + if (!path[0]) { + return (char*)zix_calloc(allocator, 1U, 1U); // The empty path is normal + } + + // Allocate a result buffer, with space for one additional character + const ZixStringView path_view = zix_string(path); + const size_t path_len = path_view.length; + char* const result = (char*)zix_calloc(allocator, path_len + 2U, 1); + size_t r = 0U; + + // Copy root, normalizing separators as we go + const ZixIndexRange root = zix_path_root_path_range(path); + const size_t root_len = root.end - root.begin; + for (size_t i = 0; i < root_len; ++i) { + result[r++] = (char)(is_dir_sep(path[i]) ? sep : path[i]); + } + + // Copy path, removing dot entries and collapsing separators as we go + for (size_t i = root.end; i < path_len; ++i) { + if (is_dir_sep(path[i])) { + if ((i >= root.end) && ((r == root.end + 1U && result[r - 1] == '.') || + (r >= root.end + 2U && result[r - 2] == sep && + result[r - 1] == '.'))) { + // Remove dot entry and any immediately following separators + result[--r] = '\0'; + + } else { + // Replace separators with a single preferred separator + result[r++] = sep; + } + + // Collapse redundant separators + while (is_dir_sep(path[i + 1])) { + ++i; + } + + } else { + result[r++] = path[i]; + } + } + + // Collapse any dot-dot entries following a directory name + size_t last = r; + size_t next = 0; + for (size_t i = root_len; i < r;) { + if (last < r && i > 2 && i + 1 <= r && result[i - 2] == sep && + result[i - 1] == '.' && result[i] == '.' && + (!result[i + 1] || is_dir_sep(result[i + 1]))) { + if (i < r && result[i + 1] == sep) { + ++i; + } + + const size_t suffix_len = r - i - 1U; + memmove(result + last, result + i + 1, suffix_len); + r = r - ((r - last) - suffix_len); + result[r] = '\0'; + i = 0; + last = r; + next = 0; + } else { + if (i >= 1 && result[i - 1] == sep) { + next = i; + } + + if (result[i] != sep && result[i] != '.') { + last = next; + } + ++i; + } + } + + // Remove any dot-dot entries following the root directory + if (root_len && is_dir_sep(result[root_len - 1U])) { + size_t start = root_len; + while (start < r && result[start] == '.' && result[start + 1] == '.' && + (result[start + 2] == sep || result[start + 2] == '\0')) { + start += (result[start + 2] == sep) ? 3U : 2U; + } + + if (start > root_len) { + if (start < r) { + memmove(result + root_len, result + start, r - start); + r = root_len + r - start; + } else { + r = root_len; + } + + result[r] = '\0'; + return result; + } + } + + // Remove trailing dot entry + if (r >= 2U && is_any_sep(result[r - 2]) && result[r - 1] == '.') { + result[r - 1] = '\0'; + } + + // Remove trailing dot-dot entry + if (r >= 3U && result[r - 3] == '.' && result[r - 2] == '.' && + is_any_sep(result[r - 1])) { + result[r - 1] = '\0'; + } + + // If the path is empty, add a dot + if (!result[0]) { + result[0] = '.'; + result[1] = '\0'; + } + + return result; +} + +static ZixPathIter +make_iter(const ZixPathIterState state, const ZixIndexRange range) +{ + const ZixPathIter result = {range, state}; + return result; +} + +ZixPathIter +zix_path_begin(const char* const path) +{ + const ZixPathIter iter = {zix_path_root_name_range(path), ZIX_PATH_ROOT_NAME}; + + return (iter.range.end > iter.range.begin) ? iter + : path ? zix_path_next(path, iter) + : zix_path_next("", iter); +} + +ZixPathIter +zix_path_next(const char* const path, ZixPathIter iter) +{ + if (iter.state == ZIX_PATH_ROOT_NAME) { + if (is_dir_sep(path[iter.range.end])) { + return make_iter(ZIX_PATH_ROOT_DIRECTORY, + zix_make_range(iter.range.end, iter.range.end + 1U)); + } + } + + if (iter.state <= ZIX_PATH_ROOT_DIRECTORY) { + iter.range.begin = iter.range.end; + iter.state = ZIX_PATH_FILE_NAME; + } + + if (iter.state == ZIX_PATH_FILE_NAME) { + // If the last range end was the end of the path, we're done + iter.range.begin = iter.range.end; + if (!path[iter.range.begin]) { + return make_iter(ZIX_PATH_END, iter.range); + } + + // Skip any leading separators + while (is_dir_sep(path[iter.range.begin])) { + iter.range.begin = ++iter.range.end; + } + + // Advance end to the next separator or path end + while (path[iter.range.end] && !is_dir_sep(path[iter.range.end])) { + ++iter.range.end; + } + } + + return iter; +} + +static size_t +zix_path_append(char* const buf, + const size_t offset, + const char* const string, + const size_t length) +{ + size_t o = offset; + if (offset) { + buf[o++] = ZIX_DIR_SEP; + } + + memcpy(buf + o, string, length); + return o + length; +} + +char* +zix_path_lexically_relative(ZixAllocator* const allocator, + const char* const path, + const char* const base) +{ + // Mismatched roots can't be expressed in relative form + const ZixRootSlices path_root = zix_path_root_slices(path); + const ZixRootSlices base_root = zix_path_root_slices(base); + const bool path_has_root_dir = !zix_is_empty_range(path_root.dir); + const bool base_has_root_dir = !zix_is_empty_range(base_root.dir); + if (!zix_string_ranges_equal(path, path_root.name, base, base_root.name) || + (zix_path_is_absolute(path) != zix_path_is_absolute(base)) || + (!path_has_root_dir && base_has_root_dir)) { + return NULL; + } + + // Find the first mismatching element in the paths (or the end) + ZixPathIter a = zix_path_begin(path); + ZixPathIter b = zix_path_begin(base); + while (a.state != ZIX_PATH_END && b.state != ZIX_PATH_END && + a.state == b.state && + zix_string_ranges_equal(path, a.range, base, b.range)) { + a = zix_path_next(path, a); + b = zix_path_next(base, b); + } + + // Matching paths reduce to "." + if ((a.state == ZIX_PATH_END && b.state == ZIX_PATH_END) || + (zix_is_empty_range(a.range) && b.state == ZIX_PATH_END)) { + return zix_string_view_copy(allocator, zix_string(".")); + } + + // Count the trailing non-matching entries in base + size_t n_base_up = 0U; + size_t n_non_empty = 0U; + for (; b.state < ZIX_PATH_END; b = zix_path_next(base, b)) { + if (!zix_is_empty_range(b.range)) { + if (zix_string_ranges_equal(base, b.range, "..", two_range)) { + ++n_base_up; + } else if (!zix_string_ranges_equal(base, b.range, ".", one_range)) { + ++n_non_empty; + } + } + } + + // Base can't have too many up-references + if (n_base_up > n_non_empty) { + return NULL; + } + + const size_t n_up = + (a.state == ZIX_PATH_ROOT_DIRECTORY) ? 0U : (n_non_empty - n_base_up); + + // A result with no up-references or names reduces to "." + if (n_up == 0 && a.state == ZIX_PATH_END) { + return zix_string_view_copy(allocator, zix_string(".")); + } + + // Allocate buffer for relative path result + const size_t path_len = strlen(path); + const size_t rel_len = (n_up * 3U) + path_len - a.range.begin; + char* const rel = (char*)zix_calloc(allocator, rel_len + 1U, 1U); + + // Write leading up-references + size_t offset = 0U; + for (size_t i = 0U; i < n_up; ++i) { + offset = zix_path_append(rel, offset, "..", 2U); + } + + const char path_last = path[path_len - 1U]; + if (a.range.begin < path_len) { + // Copy suffix from path (from `a` to the end) + const size_t suffix_len = path_len - a.range.begin; + offset = zix_path_append(rel, offset, path + a.range.begin, suffix_len); + } else if (n_up && path_len > 1 && is_dir_sep(path_last)) { + // Copy trailing directory separator from path + rel[offset++] = path_last; + } + + rel[offset++] = '\0'; + return rel; +} + +// Decomposition + +static ZixStringView +range_string_view(const char* const path, const ZixIndexRange range) +{ + return zix_substring(path + range.begin, range.end - range.begin); +} + +#ifdef _WIN32 + +ZixStringView +zix_path_root_name(const char* const path) +{ + return range_string_view(path, zix_path_root_name_range(path)); +} + +#else + +ZixStringView +zix_path_root_name(const char* const path) +{ + (void)path; + return zix_empty_string(); +} + +#endif + +ZixStringView +zix_path_root_directory(const char* const path) +{ + return range_string_view(path, zix_path_root_slices(path).dir); +} + +ZixStringView +zix_path_root_path(const char* const path) +{ + return range_string_view(path, zix_path_root_path_range(path)); +} + +ZixStringView +zix_path_relative_path(const char* const path) +{ + const ZixStringView path_view = zix_string(path); + const size_t path_len = path_view.length; + const ZixIndexRange root = zix_path_root_path_range(path); + + return range_string_view(path, zix_make_range(root.end, path_len)); +} + +ZixStringView +zix_path_parent_path(const char* const path) +{ + return range_string_view(path, zix_path_parent_path_range(zix_string(path))); +} + +ZixStringView +zix_path_filename(const char* const path) +{ + return range_string_view(path, zix_path_filename_range(zix_string(path))); +} + +ZixStringView +zix_path_stem(const char* const path) +{ + return range_string_view(path, zix_path_stem_range(zix_string(path))); +} + +ZixStringView +zix_path_extension(const char* const path) +{ + return range_string_view(path, + zix_path_extension_range(zix_optional_string(path))); +} + +// Queries + +bool +zix_path_has_root_path(const char* const path) +{ + return !zix_is_empty_range(zix_path_root_path_range(path)); +} + +bool +zix_path_has_root_name(const char* const path) +{ + return !zix_is_empty_range(zix_path_root_name_range(path)); +} + +bool +zix_path_has_root_directory(const char* const path) +{ + return !zix_is_empty_range(zix_path_root_slices(path).dir); +} + +bool +zix_path_has_relative_path(const char* const path) +{ + return path && path[zix_path_root_path_range(path).end]; +} + +bool +zix_path_has_parent_path(const char* const path) +{ + return !zix_is_empty_range( + zix_path_parent_path_range(zix_optional_string(path))); +} + +bool +zix_path_has_filename(const char* const path) +{ + return !zix_is_empty_range( + zix_path_filename_range(zix_optional_string(path))); +} + +bool +zix_path_has_stem(const char* const path) +{ + return !zix_is_empty_range(zix_path_stem_range(zix_optional_string(path))); +} + +bool +zix_path_has_extension(const char* const path) +{ + return !zix_is_empty_range( + zix_path_extension_range(zix_optional_string(path))); +} + +bool +zix_path_is_absolute(const char* const path) +{ +#ifdef _WIN32 + const ZixRootSlices root = zix_path_root_slices(path); + return (!zix_is_empty_range(root.name) && + (!zix_is_empty_range(root.dir) || + (is_dir_sep(path[0]) && is_dir_sep(path[1])))); + +#else + return path && is_dir_sep(path[0]); +#endif +} + +bool +zix_path_is_relative(const char* const path) +{ + return !zix_path_is_absolute(path); +} diff --git a/src/path_iter.h b/src/path_iter.h new file mode 100644 index 0000000..b3b23e8 --- /dev/null +++ b/src/path_iter.h @@ -0,0 +1,31 @@ +// Copyright 2022 David Robillard <d@drobilla.net> +// SPDX-License-Identifier: ISC + +#ifndef ZIX_PATH_ITER_H +#define ZIX_PATH_ITER_H + +#include "index_range.h" + +#include "zix/attributes.h" + +typedef enum { + ZIX_PATH_ROOT_NAME, + ZIX_PATH_ROOT_DIRECTORY, + ZIX_PATH_FILE_NAME, + ZIX_PATH_END, +} ZixPathIterState; + +typedef struct { + ZixIndexRange range; + ZixPathIterState state; +} ZixPathIter; + +ZIX_PURE_FUNC +ZixPathIter +zix_path_begin(const char* ZIX_NULLABLE path); + +ZIX_PURE_FUNC +ZixPathIter +zix_path_next(const char* ZIX_NONNULL path, ZixPathIter iter); + +#endif // ZIX_PATH_ITER_H |