summaryrefslogtreecommitdiffstats
path: root/src/path.c
diff options
context:
space:
mode:
authorDavid Robillard <d@drobilla.net>2022-10-23 13:41:15 -0400
committerDavid Robillard <d@drobilla.net>2022-10-23 13:46:58 -0400
commitc886d489576cd0bc33d7d22d81981c794067946f (patch)
treef43c8d872401ed80b37f974516d9e9f2fee9d765 /src/path.c
parentf95a698b94069ed03f6a8f2d0f7eb089d66c91ef (diff)
downloadzix-c886d489576cd0bc33d7d22d81981c794067946f.tar.gz
zix-c886d489576cd0bc33d7d22d81981c794067946f.tar.bz2
zix-c886d489576cd0bc33d7d22d81981c794067946f.zip
Add path API
Diffstat (limited to 'src/path.c')
-rw-r--r--src/path.c712
1 files changed, 712 insertions, 0 deletions
diff --git a/src/path.c b/src/path.c
new file mode 100644
index 0000000..aa787b3
--- /dev/null
+++ b/src/path.c
@@ -0,0 +1,712 @@
+// Copyright 2007-2022 David Robillard <d@drobilla.net>
+// SPDX-License-Identifier: ISC
+
+#include "zix/path.h"
+
+#include "index_range.h"
+#include "path_iter.h"
+
+#include "zix/string_view.h"
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <string.h>
+
+static const ZixIndexRange two_range = {0U, 2U};
+static const ZixIndexRange one_range = {0U, 1U};
+
+#ifdef _WIN32
+
+# define ZIX_DIR_SEP '\\' // Backslash is preferred on Windows
+
+static inline bool
+is_dir_sep(const char c)
+{
+ return c == '/' || c == '\\';
+}
+
+static inline bool
+is_any_sep(const char c)
+{
+ return c == '/' || c == ':' || c == '\\';
+}
+
+static bool
+is_root_name_char(const char c)
+{
+ return c && !is_dir_sep(c);
+}
+
+static ZixIndexRange
+zix_path_root_name_range(const char* const path)
+{
+ ZixIndexRange result = {0U, 0U};
+
+ if (path) {
+ if (((path[0] >= 'A' && path[0] <= 'Z') ||
+ (path[0] >= 'a' && path[0] <= 'z')) &&
+ path[1] == ':') {
+ // A root that starts with a letter then ':' has a name like "C:"
+ result.end = 2U;
+
+ } else if (is_dir_sep(path[0]) && is_dir_sep(path[1]) &&
+ is_root_name_char(path[2])) {
+ // A root that starts with two separators has a name like "\\host"
+ result.end = 2U;
+ while (is_root_name_char(path[result.end])) {
+ ++result.end;
+ }
+ }
+ }
+
+ return result;
+}
+
+#else
+
+# define ZIX_DIR_SEP '/' // Slash is the only separator on other platforms
+
+static inline bool
+is_dir_sep(const char c)
+{
+ return c == '/';
+}
+
+static inline bool
+is_any_sep(const char c)
+{
+ return c == '/';
+}
+
+static inline ZixIndexRange
+zix_path_root_name_range(const char* const path)
+{
+ (void)path;
+ return zix_make_range(0U, 0U);
+}
+
+#endif
+
+typedef struct {
+ ZixIndexRange name;
+ ZixIndexRange dir;
+} ZixRootSlices;
+
+static ZixRootSlices
+zix_path_root_slices(const char* const path)
+{
+ // A root name not trailed by a separator has no root directory
+ const ZixIndexRange name = zix_path_root_name_range(path);
+ const size_t dir_len = (size_t)(path && is_dir_sep(path[name.end]));
+ if (!dir_len) {
+ const ZixRootSlices result = {name, {name.end, name.end}};
+ return result;
+ }
+
+ // Skip redundant root directory separators
+ ZixIndexRange dir = {name.end, name.end + dir_len};
+ while (is_dir_sep(path[dir.end])) {
+ dir.begin = dir.end++;
+ }
+
+ const ZixRootSlices result = {name, dir};
+ return result;
+}
+
+static bool
+zix_string_ranges_equal(const char* const lhs,
+ const ZixIndexRange lhs_range,
+ const char* const rhs,
+ const ZixIndexRange rhs_range)
+{
+ const size_t lhs_len = lhs_range.end - lhs_range.begin;
+ const size_t rhs_len = rhs_range.end - rhs_range.begin;
+
+ return lhs_len == rhs_len &&
+ (lhs_len == 0 ||
+ !strncmp(lhs + lhs_range.begin, rhs + rhs_range.begin, lhs_len));
+}
+
+static ZixIndexRange
+zix_path_root_path_range(const char* const path)
+{
+ const ZixRootSlices root = zix_path_root_slices(path);
+ const size_t dir_len = (size_t)!zix_is_empty_range(root.dir);
+
+ return zix_is_empty_range(root.name)
+ ? root.dir
+ : zix_make_range(root.name.begin, root.name.end + dir_len);
+}
+
+static ZixIndexRange
+zix_path_parent_path_range(const ZixStringView path)
+{
+ if (!path.length) {
+ return zix_make_range(0U, 0U); // Empty paths have no parent
+ }
+
+ // Find the first relevant character (skip leading root path if any)
+ const ZixIndexRange root = zix_path_root_path_range(path.data);
+ const size_t p = root.begin;
+
+ // Scan backwards starting at the last character
+ size_t l = path.length - 1U;
+ if (is_dir_sep(path.data[l])) {
+ // Rewind to the last relevant separator (skip trailing redundant ones)
+ while (l > p && is_dir_sep(path.data[l - 1U])) {
+ --l;
+ }
+ } else {
+ // Rewind to the last relevant character (skip trailing name)
+ while (l > p && !is_dir_sep(path.data[l])) {
+ --l;
+ }
+ }
+
+ if (l <= root.end) {
+ return root;
+ }
+
+ // Drop trailing separators
+ while (l > p && is_dir_sep(path.data[l])) {
+ --l;
+ }
+
+ return zix_make_range(root.begin, root.begin + l + 1U - p);
+}
+
+static ZixIndexRange
+zix_path_filename_range(const ZixStringView path)
+{
+ // Find the first filename character (skip leading root path if any)
+ const size_t begin = zix_path_root_path_range(path.data).end;
+ if (begin == path.length || is_dir_sep(path.data[path.length - 1U])) {
+ return zix_make_range(0U, 0U);
+ }
+
+ // Scan backwards to find the first character after the last separator
+ size_t f = path.length - 1U;
+ while (f > begin && !is_dir_sep(path.data[f - 1])) {
+ --f;
+ }
+
+ return zix_make_range(f, path.length);
+}
+
+static ZixIndexRange
+zix_path_stem_range(const ZixStringView path)
+{
+ const ZixIndexRange name = zix_path_filename_range(path);
+
+ ZixIndexRange stem = name;
+ if (!zix_is_empty_range(stem) &&
+ !zix_string_ranges_equal(path.data, stem, ".", one_range) &&
+ !zix_string_ranges_equal(path.data, stem, "..", two_range)) {
+ // Scan backwards to the last "." after the last directory separator
+ --stem.end;
+ while (stem.end > stem.begin && path.data[stem.end] != '.') {
+ --stem.end;
+ }
+ }
+
+ return zix_is_empty_range(stem) ? name : stem;
+}
+
+static ZixIndexRange
+zix_path_extension_range(const ZixStringView path)
+{
+ const ZixIndexRange stem = zix_path_stem_range(path);
+
+ return zix_is_empty_range(stem)
+ ? stem
+ : zix_make_range(stem.end, stem.end + path.length - stem.end);
+}
+
+char*
+zix_path_join(ZixAllocator* const allocator,
+ const char* const a,
+ const char* const b)
+{
+ const ZixStringView b_view = zix_optional_string(b);
+ if (!a || !a[0]) {
+ return zix_string_view_copy(allocator, b_view);
+ }
+
+ const ZixStringView a_view = zix_optional_string(a);
+ const ZixRootSlices a_root = zix_path_root_slices(a);
+ const ZixRootSlices b_root = zix_path_root_slices(b);
+
+#ifdef _WIN32
+ // If the RHS has a different root name, just copy it
+ if (b_root.name.end &&
+ !zix_string_ranges_equal(a, a_root.name, b, b_root.name)) {
+ return zix_string_view_copy(allocator, b_view);
+ }
+#endif
+
+ // Determine how to join paths
+ const bool a_has_root_dir = !zix_is_empty_range(a_root.dir);
+ const bool a_has_filename = zix_path_has_filename(a_view.data);
+ size_t prefix_len = a_view.length;
+ bool add_sep = false;
+ if (!zix_is_empty_range(b_root.dir)) {
+ prefix_len = a_root.name.end; // Omit any path from a
+ } else if (a_has_filename || (!a_has_root_dir && zix_path_is_absolute(a))) {
+ add_sep = true; // Add a preferred separator after a
+ }
+
+ const size_t path_len = prefix_len + (size_t)add_sep + b_view.length;
+ char* const path = (char*)zix_calloc(allocator, path_len + 1U, 1U);
+
+ if (path) {
+ memcpy(path, a, prefix_len);
+
+ size_t p = prefix_len;
+ if (add_sep) {
+ path[p++] = ZIX_DIR_SEP;
+ }
+
+ if (b_view.length > b_root.name.end) {
+ memcpy(path + p, b + b_root.name.end, b_view.length - b_root.name.end);
+ path[p + b_view.length] = '\0';
+ }
+ }
+
+ return path;
+}
+
+char*
+zix_path_preferred(ZixAllocator* const allocator, const char* const path)
+{
+ const ZixStringView path_view = zix_string(path);
+ char* const result = (char*)zix_calloc(allocator, path_view.length + 1U, 1U);
+
+ if (result) {
+ for (size_t i = 0U; i < path_view.length; ++i) {
+ result[i] = (char)(is_dir_sep(path[i]) ? ZIX_DIR_SEP : path[i]);
+ }
+ }
+
+ return result;
+}
+
+char*
+zix_path_lexically_normal(ZixAllocator* const allocator, const char* const path)
+{
+ static const char sep = ZIX_DIR_SEP;
+
+ /* Loosely following the normalization algorithm from
+ <https://en.cppreference.com/w/cpp/filesystem/path>, but in such a way
+ that only a single buffer (the result) is needed. This means that
+ dot-dot segments are relatively expensive, but it beats a stack in
+ everyday common cases. */
+
+ if (!path[0]) {
+ return (char*)zix_calloc(allocator, 1U, 1U); // The empty path is normal
+ }
+
+ // Allocate a result buffer, with space for one additional character
+ const ZixStringView path_view = zix_string(path);
+ const size_t path_len = path_view.length;
+ char* const result = (char*)zix_calloc(allocator, path_len + 2U, 1);
+ size_t r = 0U;
+
+ // Copy root, normalizing separators as we go
+ const ZixIndexRange root = zix_path_root_path_range(path);
+ const size_t root_len = root.end - root.begin;
+ for (size_t i = 0; i < root_len; ++i) {
+ result[r++] = (char)(is_dir_sep(path[i]) ? sep : path[i]);
+ }
+
+ // Copy path, removing dot entries and collapsing separators as we go
+ for (size_t i = root.end; i < path_len; ++i) {
+ if (is_dir_sep(path[i])) {
+ if ((i >= root.end) && ((r == root.end + 1U && result[r - 1] == '.') ||
+ (r >= root.end + 2U && result[r - 2] == sep &&
+ result[r - 1] == '.'))) {
+ // Remove dot entry and any immediately following separators
+ result[--r] = '\0';
+
+ } else {
+ // Replace separators with a single preferred separator
+ result[r++] = sep;
+ }
+
+ // Collapse redundant separators
+ while (is_dir_sep(path[i + 1])) {
+ ++i;
+ }
+
+ } else {
+ result[r++] = path[i];
+ }
+ }
+
+ // Collapse any dot-dot entries following a directory name
+ size_t last = r;
+ size_t next = 0;
+ for (size_t i = root_len; i < r;) {
+ if (last < r && i > 2 && i + 1 <= r && result[i - 2] == sep &&
+ result[i - 1] == '.' && result[i] == '.' &&
+ (!result[i + 1] || is_dir_sep(result[i + 1]))) {
+ if (i < r && result[i + 1] == sep) {
+ ++i;
+ }
+
+ const size_t suffix_len = r - i - 1U;
+ memmove(result + last, result + i + 1, suffix_len);
+ r = r - ((r - last) - suffix_len);
+ result[r] = '\0';
+ i = 0;
+ last = r;
+ next = 0;
+ } else {
+ if (i >= 1 && result[i - 1] == sep) {
+ next = i;
+ }
+
+ if (result[i] != sep && result[i] != '.') {
+ last = next;
+ }
+ ++i;
+ }
+ }
+
+ // Remove any dot-dot entries following the root directory
+ if (root_len && is_dir_sep(result[root_len - 1U])) {
+ size_t start = root_len;
+ while (start < r && result[start] == '.' && result[start + 1] == '.' &&
+ (result[start + 2] == sep || result[start + 2] == '\0')) {
+ start += (result[start + 2] == sep) ? 3U : 2U;
+ }
+
+ if (start > root_len) {
+ if (start < r) {
+ memmove(result + root_len, result + start, r - start);
+ r = root_len + r - start;
+ } else {
+ r = root_len;
+ }
+
+ result[r] = '\0';
+ return result;
+ }
+ }
+
+ // Remove trailing dot entry
+ if (r >= 2U && is_any_sep(result[r - 2]) && result[r - 1] == '.') {
+ result[r - 1] = '\0';
+ }
+
+ // Remove trailing dot-dot entry
+ if (r >= 3U && result[r - 3] == '.' && result[r - 2] == '.' &&
+ is_any_sep(result[r - 1])) {
+ result[r - 1] = '\0';
+ }
+
+ // If the path is empty, add a dot
+ if (!result[0]) {
+ result[0] = '.';
+ result[1] = '\0';
+ }
+
+ return result;
+}
+
+static ZixPathIter
+make_iter(const ZixPathIterState state, const ZixIndexRange range)
+{
+ const ZixPathIter result = {range, state};
+ return result;
+}
+
+ZixPathIter
+zix_path_begin(const char* const path)
+{
+ const ZixPathIter iter = {zix_path_root_name_range(path), ZIX_PATH_ROOT_NAME};
+
+ return (iter.range.end > iter.range.begin) ? iter
+ : path ? zix_path_next(path, iter)
+ : zix_path_next("", iter);
+}
+
+ZixPathIter
+zix_path_next(const char* const path, ZixPathIter iter)
+{
+ if (iter.state == ZIX_PATH_ROOT_NAME) {
+ if (is_dir_sep(path[iter.range.end])) {
+ return make_iter(ZIX_PATH_ROOT_DIRECTORY,
+ zix_make_range(iter.range.end, iter.range.end + 1U));
+ }
+ }
+
+ if (iter.state <= ZIX_PATH_ROOT_DIRECTORY) {
+ iter.range.begin = iter.range.end;
+ iter.state = ZIX_PATH_FILE_NAME;
+ }
+
+ if (iter.state == ZIX_PATH_FILE_NAME) {
+ // If the last range end was the end of the path, we're done
+ iter.range.begin = iter.range.end;
+ if (!path[iter.range.begin]) {
+ return make_iter(ZIX_PATH_END, iter.range);
+ }
+
+ // Skip any leading separators
+ while (is_dir_sep(path[iter.range.begin])) {
+ iter.range.begin = ++iter.range.end;
+ }
+
+ // Advance end to the next separator or path end
+ while (path[iter.range.end] && !is_dir_sep(path[iter.range.end])) {
+ ++iter.range.end;
+ }
+ }
+
+ return iter;
+}
+
+static size_t
+zix_path_append(char* const buf,
+ const size_t offset,
+ const char* const string,
+ const size_t length)
+{
+ size_t o = offset;
+ if (offset) {
+ buf[o++] = ZIX_DIR_SEP;
+ }
+
+ memcpy(buf + o, string, length);
+ return o + length;
+}
+
+char*
+zix_path_lexically_relative(ZixAllocator* const allocator,
+ const char* const path,
+ const char* const base)
+{
+ // Mismatched roots can't be expressed in relative form
+ const ZixRootSlices path_root = zix_path_root_slices(path);
+ const ZixRootSlices base_root = zix_path_root_slices(base);
+ const bool path_has_root_dir = !zix_is_empty_range(path_root.dir);
+ const bool base_has_root_dir = !zix_is_empty_range(base_root.dir);
+ if (!zix_string_ranges_equal(path, path_root.name, base, base_root.name) ||
+ (zix_path_is_absolute(path) != zix_path_is_absolute(base)) ||
+ (!path_has_root_dir && base_has_root_dir)) {
+ return NULL;
+ }
+
+ // Find the first mismatching element in the paths (or the end)
+ ZixPathIter a = zix_path_begin(path);
+ ZixPathIter b = zix_path_begin(base);
+ while (a.state != ZIX_PATH_END && b.state != ZIX_PATH_END &&
+ a.state == b.state &&
+ zix_string_ranges_equal(path, a.range, base, b.range)) {
+ a = zix_path_next(path, a);
+ b = zix_path_next(base, b);
+ }
+
+ // Matching paths reduce to "."
+ if ((a.state == ZIX_PATH_END && b.state == ZIX_PATH_END) ||
+ (zix_is_empty_range(a.range) && b.state == ZIX_PATH_END)) {
+ return zix_string_view_copy(allocator, zix_string("."));
+ }
+
+ // Count the trailing non-matching entries in base
+ size_t n_base_up = 0U;
+ size_t n_non_empty = 0U;
+ for (; b.state < ZIX_PATH_END; b = zix_path_next(base, b)) {
+ if (!zix_is_empty_range(b.range)) {
+ if (zix_string_ranges_equal(base, b.range, "..", two_range)) {
+ ++n_base_up;
+ } else if (!zix_string_ranges_equal(base, b.range, ".", one_range)) {
+ ++n_non_empty;
+ }
+ }
+ }
+
+ // Base can't have too many up-references
+ if (n_base_up > n_non_empty) {
+ return NULL;
+ }
+
+ const size_t n_up =
+ (a.state == ZIX_PATH_ROOT_DIRECTORY) ? 0U : (n_non_empty - n_base_up);
+
+ // A result with no up-references or names reduces to "."
+ if (n_up == 0 && a.state == ZIX_PATH_END) {
+ return zix_string_view_copy(allocator, zix_string("."));
+ }
+
+ // Allocate buffer for relative path result
+ const size_t path_len = strlen(path);
+ const size_t rel_len = (n_up * 3U) + path_len - a.range.begin;
+ char* const rel = (char*)zix_calloc(allocator, rel_len + 1U, 1U);
+
+ // Write leading up-references
+ size_t offset = 0U;
+ for (size_t i = 0U; i < n_up; ++i) {
+ offset = zix_path_append(rel, offset, "..", 2U);
+ }
+
+ const char path_last = path[path_len - 1U];
+ if (a.range.begin < path_len) {
+ // Copy suffix from path (from `a` to the end)
+ const size_t suffix_len = path_len - a.range.begin;
+ offset = zix_path_append(rel, offset, path + a.range.begin, suffix_len);
+ } else if (n_up && path_len > 1 && is_dir_sep(path_last)) {
+ // Copy trailing directory separator from path
+ rel[offset++] = path_last;
+ }
+
+ rel[offset++] = '\0';
+ return rel;
+}
+
+// Decomposition
+
+static ZixStringView
+range_string_view(const char* const path, const ZixIndexRange range)
+{
+ return zix_substring(path + range.begin, range.end - range.begin);
+}
+
+#ifdef _WIN32
+
+ZixStringView
+zix_path_root_name(const char* const path)
+{
+ return range_string_view(path, zix_path_root_name_range(path));
+}
+
+#else
+
+ZixStringView
+zix_path_root_name(const char* const path)
+{
+ (void)path;
+ return zix_empty_string();
+}
+
+#endif
+
+ZixStringView
+zix_path_root_directory(const char* const path)
+{
+ return range_string_view(path, zix_path_root_slices(path).dir);
+}
+
+ZixStringView
+zix_path_root_path(const char* const path)
+{
+ return range_string_view(path, zix_path_root_path_range(path));
+}
+
+ZixStringView
+zix_path_relative_path(const char* const path)
+{
+ const ZixStringView path_view = zix_string(path);
+ const size_t path_len = path_view.length;
+ const ZixIndexRange root = zix_path_root_path_range(path);
+
+ return range_string_view(path, zix_make_range(root.end, path_len));
+}
+
+ZixStringView
+zix_path_parent_path(const char* const path)
+{
+ return range_string_view(path, zix_path_parent_path_range(zix_string(path)));
+}
+
+ZixStringView
+zix_path_filename(const char* const path)
+{
+ return range_string_view(path, zix_path_filename_range(zix_string(path)));
+}
+
+ZixStringView
+zix_path_stem(const char* const path)
+{
+ return range_string_view(path, zix_path_stem_range(zix_string(path)));
+}
+
+ZixStringView
+zix_path_extension(const char* const path)
+{
+ return range_string_view(path,
+ zix_path_extension_range(zix_optional_string(path)));
+}
+
+// Queries
+
+bool
+zix_path_has_root_path(const char* const path)
+{
+ return !zix_is_empty_range(zix_path_root_path_range(path));
+}
+
+bool
+zix_path_has_root_name(const char* const path)
+{
+ return !zix_is_empty_range(zix_path_root_name_range(path));
+}
+
+bool
+zix_path_has_root_directory(const char* const path)
+{
+ return !zix_is_empty_range(zix_path_root_slices(path).dir);
+}
+
+bool
+zix_path_has_relative_path(const char* const path)
+{
+ return path && path[zix_path_root_path_range(path).end];
+}
+
+bool
+zix_path_has_parent_path(const char* const path)
+{
+ return !zix_is_empty_range(
+ zix_path_parent_path_range(zix_optional_string(path)));
+}
+
+bool
+zix_path_has_filename(const char* const path)
+{
+ return !zix_is_empty_range(
+ zix_path_filename_range(zix_optional_string(path)));
+}
+
+bool
+zix_path_has_stem(const char* const path)
+{
+ return !zix_is_empty_range(zix_path_stem_range(zix_optional_string(path)));
+}
+
+bool
+zix_path_has_extension(const char* const path)
+{
+ return !zix_is_empty_range(
+ zix_path_extension_range(zix_optional_string(path)));
+}
+
+bool
+zix_path_is_absolute(const char* const path)
+{
+#ifdef _WIN32
+ const ZixRootSlices root = zix_path_root_slices(path);
+ return (!zix_is_empty_range(root.name) &&
+ (!zix_is_empty_range(root.dir) ||
+ (is_dir_sep(path[0]) && is_dir_sep(path[1]))));
+
+#else
+ return path && is_dir_sep(path[0]);
+#endif
+}
+
+bool
+zix_path_is_relative(const char* const path)
+{
+ return !zix_path_is_absolute(path);
+}