dir-iterator: refactor state machine model

dir_iterator_advance() is a large function with two nested loops. Let's
improve its readability factoring out three functions and simplifying
its mechanics. The refactored model will no longer depend on
level.initialized and level.dir_state to keep track of the iteration
state and will perform on a single loop.

Also, dir_iterator_begin() currently does not check if the given string
represents a valid directory path. Since the refactored model will have
to stat() the given path at initialization, let's also check for this
kind of error and make dir_iterator_begin() return NULL, on failures,
with errno appropriately set. And add tests for this new behavior.

Improve documentation at dir-iteration.h and code comments at
dir-iterator.c to reflect the changes and eliminate possible
ambiguities.

Finally, adjust refs/files-backend.c to check for now possible
dir_iterator_begin() failures.

Original-patch-by: Daniel Ferreira <bnmvco@gmail.com>
Signed-off-by: Matheus Tavares <matheus.bernardino@usp.br>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
Matheus Tavares 2019-07-10 20:58:59 -03:00 committed by Junio C Hamano
parent c9bba372ed
commit 3012397e03
5 changed files with 162 additions and 120 deletions

View File

@ -4,8 +4,6 @@
#include "dir-iterator.h"
struct dir_iterator_level {
int initialized;
DIR *dir;
/*
@ -13,16 +11,6 @@ struct dir_iterator_level {
* (including a trailing '/'):
*/
size_t prefix_len;
/*
* The last action that has been taken with the current entry
* (needed for directories, which have to be included in the
* iteration and also iterated into):
*/
enum {
DIR_STATE_ITER,
DIR_STATE_RECURSE
} dir_state;
};
/*
@ -34,9 +22,11 @@ struct dir_iterator_int {
struct dir_iterator base;
/*
* The number of levels currently on the stack. This is always
* at least 1, because when it becomes zero the iteration is
* ended and this struct is freed.
* The number of levels currently on the stack. After the first
* call to dir_iterator_begin(), if it succeeds to open the
* first level's dir, this will always be at least 1. Then,
* when it comes to zero the iteration is ended and this
* struct is freed.
*/
size_t levels_nr;
@ -50,113 +40,118 @@ struct dir_iterator_int {
struct dir_iterator_level *levels;
};
/*
* Push a level in the iter stack and initialize it with information from
* the directory pointed by iter->base->path. It is assumed that this
* strbuf points to a valid directory path. Return 0 on success and -1
* otherwise, leaving the stack unchanged.
*/
static int push_level(struct dir_iterator_int *iter)
{
struct dir_iterator_level *level;
ALLOC_GROW(iter->levels, iter->levels_nr + 1, iter->levels_alloc);
level = &iter->levels[iter->levels_nr++];
if (!is_dir_sep(iter->base.path.buf[iter->base.path.len - 1]))
strbuf_addch(&iter->base.path, '/');
level->prefix_len = iter->base.path.len;
level->dir = opendir(iter->base.path.buf);
if (!level->dir) {
if (errno != ENOENT) {
warning_errno("error opening directory '%s'",
iter->base.path.buf);
}
iter->levels_nr--;
return -1;
}
return 0;
}
/*
* Pop the top level on the iter stack, releasing any resources associated
* with it. Return the new value of iter->levels_nr.
*/
static int pop_level(struct dir_iterator_int *iter)
{
struct dir_iterator_level *level =
&iter->levels[iter->levels_nr - 1];
if (level->dir && closedir(level->dir))
warning_errno("error closing directory '%s'",
iter->base.path.buf);
level->dir = NULL;
return --iter->levels_nr;
}
/*
* Populate iter->base with the necessary information on the next iteration
* entry, represented by the given dirent de. Return 0 on success and -1
* otherwise.
*/
static int prepare_next_entry_data(struct dir_iterator_int *iter,
struct dirent *de)
{
strbuf_addstr(&iter->base.path, de->d_name);
/*
* We have to reset these because the path strbuf might have
* been realloc()ed at the previous strbuf_addstr().
*/
iter->base.relative_path = iter->base.path.buf +
iter->levels[0].prefix_len;
iter->base.basename = iter->base.path.buf +
iter->levels[iter->levels_nr - 1].prefix_len;
if (lstat(iter->base.path.buf, &iter->base.st)) {
if (errno != ENOENT)
warning_errno("failed to stat '%s'", iter->base.path.buf);
return -1;
}
return 0;
}
int dir_iterator_advance(struct dir_iterator *dir_iterator)
{
struct dir_iterator_int *iter =
(struct dir_iterator_int *)dir_iterator;
if (S_ISDIR(iter->base.st.st_mode)) {
if (push_level(iter) && iter->levels_nr == 0) {
/* Pushing the first level failed */
return dir_iterator_abort(dir_iterator);
}
}
/* Loop until we find an entry that we can give back to the caller. */
while (1) {
struct dirent *de;
struct dir_iterator_level *level =
&iter->levels[iter->levels_nr - 1];
struct dirent *de;
if (!level->initialized) {
/*
* Note: dir_iterator_begin() ensures that
* path is not the empty string.
*/
if (!is_dir_sep(iter->base.path.buf[iter->base.path.len - 1]))
strbuf_addch(&iter->base.path, '/');
level->prefix_len = iter->base.path.len;
strbuf_setlen(&iter->base.path, level->prefix_len);
errno = 0;
de = readdir(level->dir);
level->dir = opendir(iter->base.path.buf);
if (!level->dir && errno != ENOENT) {
warning_errno("error opening directory '%s'",
if (!de) {
if (errno)
warning_errno("error reading directory '%s'",
iter->base.path.buf);
/* Popping the level is handled below */
}
level->initialized = 1;
} else if (S_ISDIR(iter->base.st.st_mode)) {
if (level->dir_state == DIR_STATE_ITER) {
/*
* The directory was just iterated
* over; now prepare to iterate into
* it.
*/
level->dir_state = DIR_STATE_RECURSE;
ALLOC_GROW(iter->levels, iter->levels_nr + 1,
iter->levels_alloc);
level = &iter->levels[iter->levels_nr++];
level->initialized = 0;
continue;
} else {
/*
* The directory has already been
* iterated over and iterated into;
* we're done with it.
*/
}
}
if (!level->dir) {
/*
* This level is exhausted (or wasn't opened
* successfully); pop up a level.
*/
if (--iter->levels_nr == 0)
else if (pop_level(iter) == 0)
return dir_iterator_abort(dir_iterator);
continue;
}
/*
* Loop until we find an entry that we can give back
* to the caller:
*/
while (1) {
strbuf_setlen(&iter->base.path, level->prefix_len);
errno = 0;
de = readdir(level->dir);
if (is_dot_or_dotdot(de->d_name))
continue;
if (!de) {
/* This level is exhausted; pop up a level. */
if (errno) {
warning_errno("error reading directory '%s'",
iter->base.path.buf);
} else if (closedir(level->dir))
warning_errno("error closing directory '%s'",
iter->base.path.buf);
if (prepare_next_entry_data(iter, de))
continue;
level->dir = NULL;
if (--iter->levels_nr == 0)
return dir_iterator_abort(dir_iterator);
break;
}
if (is_dot_or_dotdot(de->d_name))
continue;
strbuf_addstr(&iter->base.path, de->d_name);
if (lstat(iter->base.path.buf, &iter->base.st) < 0) {
if (errno != ENOENT)
warning_errno("failed to stat '%s'",
iter->base.path.buf);
continue;
}
/*
* We have to set these each time because
* the path strbuf might have been realloc()ed.
*/
iter->base.relative_path =
iter->base.path.buf + iter->levels[0].prefix_len;
iter->base.basename =
iter->base.path.buf + level->prefix_len;
level->dir_state = DIR_STATE_ITER;
return ITER_OK;
}
return ITER_OK;
}
}
@ -187,17 +182,32 @@ struct dir_iterator *dir_iterator_begin(const char *path)
{
struct dir_iterator_int *iter = xcalloc(1, sizeof(*iter));
struct dir_iterator *dir_iterator = &iter->base;
if (!path || !*path)
BUG("empty path passed to dir_iterator_begin()");
int saved_errno;
strbuf_init(&iter->base.path, PATH_MAX);
strbuf_addstr(&iter->base.path, path);
ALLOC_GROW(iter->levels, 10, iter->levels_alloc);
iter->levels_nr = 0;
iter->levels_nr = 1;
iter->levels[0].initialized = 0;
/*
* Note: stat already checks for NULL or empty strings and
* inexistent paths.
*/
if (stat(iter->base.path.buf, &iter->base.st) < 0) {
saved_errno = errno;
goto error_out;
}
if (!S_ISDIR(iter->base.st.st_mode)) {
saved_errno = ENOTDIR;
goto error_out;
}
return dir_iterator;
error_out:
dir_iterator_abort(dir_iterator);
errno = saved_errno;
return NULL;
}

View File

@ -8,18 +8,22 @@
*
* Iterate over a directory tree, recursively, including paths of all
* types and hidden paths. Skip "." and ".." entries and don't follow
* symlinks except for the original path.
* symlinks except for the original path. Note that the original path
* is not included in the iteration.
*
* Every time dir_iterator_advance() is called, update the members of
* the dir_iterator structure to reflect the next path in the
* iteration. The order that paths are iterated over within a
* directory is undefined, but directory paths are always iterated
* over before the subdirectory contents.
* directory is undefined, directory paths are always given before
* their contents.
*
* A typical iteration looks like this:
*
* int ok;
* struct iterator *iter = dir_iterator_begin(path);
* struct dir_iterator *iter = dir_iterator_begin(path);
*
* if (!iter)
* goto error_handler;
*
* while ((ok = dir_iterator_advance(iter)) == ITER_OK) {
* if (want_to_stop_iteration()) {
@ -59,8 +63,9 @@ struct dir_iterator {
};
/*
* Start a directory iteration over path. Return a dir_iterator that
* holds the internal state of the iteration.
* Start a directory iteration over path. On success, return a
* dir_iterator that holds the internal state of the iteration.
* In case of failure, return NULL and set errno accordingly.
*
* The iteration includes all paths under path, not including path
* itself and not including "." or ".." entries.

View File

@ -2143,13 +2143,22 @@ static struct ref_iterator_vtable files_reflog_iterator_vtable = {
static struct ref_iterator *reflog_iterator_begin(struct ref_store *ref_store,
const char *gitdir)
{
struct files_reflog_iterator *iter = xcalloc(1, sizeof(*iter));
struct ref_iterator *ref_iterator = &iter->base;
struct dir_iterator *diter;
struct files_reflog_iterator *iter;
struct ref_iterator *ref_iterator;
struct strbuf sb = STRBUF_INIT;
base_ref_iterator_init(ref_iterator, &files_reflog_iterator_vtable, 0);
strbuf_addf(&sb, "%s/logs", gitdir);
iter->dir_iterator = dir_iterator_begin(sb.buf);
diter = dir_iterator_begin(sb.buf);
if(!diter)
return empty_ref_iterator_begin();
iter = xcalloc(1, sizeof(*iter));
ref_iterator = &iter->base;
base_ref_iterator_init(ref_iterator, &files_reflog_iterator_vtable, 0);
iter->dir_iterator = diter;
iter->ref_store = ref_store;
strbuf_release(&sb);

View File

@ -17,6 +17,11 @@ int cmd__dir_iterator(int argc, const char **argv)
diter = dir_iterator_begin(path.buf);
if (!diter) {
printf("dir_iterator_begin failure: %d\n", errno);
exit(EXIT_FAILURE);
}
while (dir_iterator_advance(diter) == ITER_OK) {
if (S_ISDIR(diter->st.st_mode))
printf("[d] ");

View File

@ -52,4 +52,17 @@ test_expect_success 'dir-iterator should list files in the correct order' '
test_cmp expected-pre-order-output actual-pre-order-output
'
test_expect_success 'begin should fail upon inexistent paths' '
test_must_fail test-tool dir-iterator ./inexistent-path \
>actual-inexistent-path-output &&
echo "dir_iterator_begin failure: 2" >expected-inexistent-path-output &&
test_cmp expected-inexistent-path-output actual-inexistent-path-output
'
test_expect_success 'begin should fail upon non directory paths' '
test_must_fail test-tool dir-iterator ./dir/b >actual-non-dir-output &&
echo "dir_iterator_begin failure: 20" >expected-non-dir-output &&
test_cmp expected-non-dir-output actual-non-dir-output
'
test_done