fsck: use oidset instead of oid_array for skipList

Change the implementation of the skipList feature to use oidset
instead of oid_array to store SHA-1s for later lookup.

This list is parsed once on startup by fsck, fetch-pack or
receive-pack depending on the *.skipList config in use. I.e. only once
per invocation, but note that for "clone --recurse-submodules" each
submodule will re-parse the list, in addition to the main project, and
it will be re-parsed when checking .gitmodules blobs, see
fb16287719 ("fsck: check skiplist for object in fsck_blob()",
2018-06-27).

Memory usage is a bit higher, but we don't need to keep track of the
sort order anymore. Embed the oidset into struct fsck_options to make
its ownership clear (no hidden sharing) and avoid unnecessary pointer
indirection.

The cumulative impact on performance of this & the preceding change,
using the test setup described in the previous commit:

    Test                                             HEAD~2            HEAD~                   HEAD
    ----------------------------------------------------------------------------------------------------------------
    1450.3: fsck with 0 skipped bad commits          7.70(7.31+0.38)   7.72(7.33+0.38) +0.3%   7.70(7.30+0.40) +0.0%
    1450.5: fsck with 1 skipped bad commits          7.84(7.47+0.37)   7.69(7.32+0.36) -1.9%   7.71(7.29+0.41) -1.7%
    1450.7: fsck with 10 skipped bad commits         7.81(7.40+0.40)   7.94(7.57+0.36) +1.7%   7.92(7.55+0.37) +1.4%
    1450.9: fsck with 100 skipped bad commits        7.81(7.42+0.38)   7.95(7.53+0.41) +1.8%   7.83(7.42+0.41) +0.3%
    1450.11: fsck with 1000 skipped bad commits      7.99(7.62+0.36)   7.90(7.50+0.40) -1.1%   7.86(7.49+0.37) -1.6%
    1450.13: fsck with 10000 skipped bad commits     7.98(7.57+0.40)   7.94(7.53+0.40) -0.5%   7.90(7.45+0.44) -1.0%
    1450.15: fsck with 100000 skipped bad commits    7.97(7.57+0.39)   8.03(7.67+0.36) +0.8%   7.84(7.43+0.41) -1.6%
    1450.17: fsck with 1000000 skipped bad commits   7.72(7.22+0.50)   7.28(7.07+0.20) -5.7%   7.13(6.87+0.25) -7.6%

Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Rene Scharfe <l.s.r@web.de>
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
René Scharfe 2018-09-03 14:49:27 +00:00 committed by Junio C Hamano
parent fb8952077d
commit 3b41fb0cb2
3 changed files with 13 additions and 29 deletions

View File

@ -1731,11 +1731,12 @@ all three of them they must all set to the same values.
+
Older versions of Git (before 2.20) documented that the object names
list should be sorted. This was never a requirement, the object names
can appear in any order, but when reading the list we track whether
the list is sorted for the purposes of an internal binary search
implementation, which can save itself some work with an already sorted
list. Unless you have a humongous list there's no reason to go out of
your way to pre-sort the list.
could appear in any order, but when reading the list we tracked whether
the list was sorted for the purposes of an internal binary search
implementation, which could save itself some work with an already sorted
list. Unless you had a humongous list there was no reason to go out of
your way to pre-sort the list. After Git version 2.20 a hash implementation
is used instead, so there's now no reason to pre-sort the list.
gc.aggressiveDepth::
The depth parameter used in the delta compression

23
fsck.c
View File

@ -10,7 +10,6 @@
#include "fsck.h"
#include "refs.h"
#include "utf8.h"
#include "sha1-array.h"
#include "decorate.h"
#include "oidset.h"
#include "packfile.h"
@ -182,19 +181,10 @@ static int fsck_msg_type(enum fsck_msg_id msg_id,
static void init_skiplist(struct fsck_options *options, const char *path)
{
static struct oid_array skiplist = OID_ARRAY_INIT;
int sorted;
FILE *fp;
struct strbuf sb = STRBUF_INIT;
struct object_id oid;
if (options->skiplist)
sorted = options->skiplist->sorted;
else {
sorted = 1;
options->skiplist = &skiplist;
}
fp = fopen(path, "r");
if (!fp)
die("Could not open skip list: %s", path);
@ -202,19 +192,12 @@ static void init_skiplist(struct fsck_options *options, const char *path)
const char *p;
if (parse_oid_hex(sb.buf, &oid, &p) || *p != '\0')
die("Invalid SHA-1: %s", sb.buf);
oid_array_append(&skiplist, &oid);
if (sorted && skiplist.nr > 1 &&
oidcmp(&skiplist.oid[skiplist.nr - 2],
&oid) > 0)
sorted = 0;
oidset_insert(&options->skiplist, &oid);
}
if (ferror(fp))
die_errno("Could not read '%s'", path);
fclose(fp);
strbuf_release(&sb);
if (sorted)
skiplist.sorted = 1;
}
static int parse_msg_type(const char *str)
@ -319,9 +302,7 @@ static void append_msg_id(struct strbuf *sb, const char *msg_id)
static int object_on_skiplist(struct fsck_options *opts, struct object *obj)
{
if (opts && opts->skiplist && obj)
return oid_array_lookup(opts->skiplist, &obj->oid) >= 0;
return 0;
return opts && obj && oidset_contains(&opts->skiplist, &obj->oid);
}
__attribute__((format (printf, 4, 5)))

8
fsck.h
View File

@ -1,6 +1,8 @@
#ifndef GIT_FSCK_H
#define GIT_FSCK_H
#include "oidset.h"
#define FSCK_ERROR 1
#define FSCK_WARN 2
#define FSCK_IGNORE 3
@ -35,12 +37,12 @@ struct fsck_options {
fsck_error error_func;
unsigned strict:1;
int *msg_type;
struct oid_array *skiplist;
struct oidset skiplist;
struct decoration *object_names;
};
#define FSCK_OPTIONS_DEFAULT { NULL, fsck_error_function, 0, NULL }
#define FSCK_OPTIONS_STRICT { NULL, fsck_error_function, 1, NULL }
#define FSCK_OPTIONS_DEFAULT { NULL, fsck_error_function, 0, NULL, OIDSET_INIT }
#define FSCK_OPTIONS_STRICT { NULL, fsck_error_function, 1, NULL, OIDSET_INIT }
/* descend in all linked child objects
* the return value is: