Skip to content

Commit

Permalink
fsck: use oidset instead of oid_array for skipList
Browse files Browse the repository at this point in the history
Change the implementation of the skipList feature to use oidset
instead of oid_array to store SHA-1s for later lookup.

This list is parsed once on startup by fsck, fetch-pack or
receive-pack depending on the *.skipList config in use. I.e. only once
per invocation, but note that for "clone --recurse-submodules" each
submodule will re-parse the list, in addition to the main project, and
it will be re-parsed when checking .gitmodules blobs, see
fb16287 ("fsck: check skiplist for object in fsck_blob()",
2018-06-27).

Memory usage is a bit higher, but we don't need to keep track of the
sort order anymore. Embed the oidset into struct fsck_options to make
its ownership clear (no hidden sharing) and avoid unnecessary pointer
indirection.

The cumulative impact on performance of this & the preceding change,
using the test setup described in the previous commit:

    Test                                             HEAD~2            HEAD~                   HEAD
    ----------------------------------------------------------------------------------------------------------------
    1450.3: fsck with 0 skipped bad commits          7.70(7.31+0.38)   7.72(7.33+0.38) +0.3%   7.70(7.30+0.40) +0.0%
    1450.5: fsck with 1 skipped bad commits          7.84(7.47+0.37)   7.69(7.32+0.36) -1.9%   7.71(7.29+0.41) -1.7%
    1450.7: fsck with 10 skipped bad commits         7.81(7.40+0.40)   7.94(7.57+0.36) +1.7%   7.92(7.55+0.37) +1.4%
    1450.9: fsck with 100 skipped bad commits        7.81(7.42+0.38)   7.95(7.53+0.41) +1.8%   7.83(7.42+0.41) +0.3%
    1450.11: fsck with 1000 skipped bad commits      7.99(7.62+0.36)   7.90(7.50+0.40) -1.1%   7.86(7.49+0.37) -1.6%
    1450.13: fsck with 10000 skipped bad commits     7.98(7.57+0.40)   7.94(7.53+0.40) -0.5%   7.90(7.45+0.44) -1.0%
    1450.15: fsck with 100000 skipped bad commits    7.97(7.57+0.39)   8.03(7.67+0.36) +0.8%   7.84(7.43+0.41) -1.6%
    1450.17: fsck with 1000000 skipped bad commits   7.72(7.22+0.50)   7.28(7.07+0.20) -5.7%   7.13(6.87+0.25) -7.6%

Helped-by: Ævar Arnfjörð Bjarmason <[email protected]>
Signed-off-by: Rene Scharfe <[email protected]>
Signed-off-by: Ævar Arnfjörð Bjarmason <[email protected]>
Signed-off-by: Junio C Hamano <[email protected]>
  • Loading branch information
rscharfe authored and gitster committed Sep 12, 2018
1 parent fb89520 commit 3b41fb0
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 29 deletions.
11 changes: 6 additions & 5 deletions Documentation/config.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1731,11 +1731,12 @@ all three of them they must all set to the same values.
+
Older versions of Git (before 2.20) documented that the object names
list should be sorted. This was never a requirement, the object names
can appear in any order, but when reading the list we track whether
the list is sorted for the purposes of an internal binary search
implementation, which can save itself some work with an already sorted
list. Unless you have a humongous list there's no reason to go out of
your way to pre-sort the list.
could appear in any order, but when reading the list we tracked whether
the list was sorted for the purposes of an internal binary search
implementation, which could save itself some work with an already sorted
list. Unless you had a humongous list there was no reason to go out of
your way to pre-sort the list. After Git version 2.20 a hash implementation
is used instead, so there's now no reason to pre-sort the list.

gc.aggressiveDepth::
The depth parameter used in the delta compression
Expand Down
23 changes: 2 additions & 21 deletions fsck.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
#include "fsck.h"
#include "refs.h"
#include "utf8.h"
#include "sha1-array.h"
#include "decorate.h"
#include "oidset.h"
#include "packfile.h"
Expand Down Expand Up @@ -182,39 +181,23 @@ static int fsck_msg_type(enum fsck_msg_id msg_id,

static void init_skiplist(struct fsck_options *options, const char *path)
{
static struct oid_array skiplist = OID_ARRAY_INIT;
int sorted;
FILE *fp;
struct strbuf sb = STRBUF_INIT;
struct object_id oid;

if (options->skiplist)
sorted = options->skiplist->sorted;
else {
sorted = 1;
options->skiplist = &skiplist;
}

fp = fopen(path, "r");
if (!fp)
die("Could not open skip list: %s", path);
while (!strbuf_getline(&sb, fp)) {
const char *p;
if (parse_oid_hex(sb.buf, &oid, &p) || *p != '\0')
die("Invalid SHA-1: %s", sb.buf);
oid_array_append(&skiplist, &oid);
if (sorted && skiplist.nr > 1 &&
oidcmp(&skiplist.oid[skiplist.nr - 2],
&oid) > 0)
sorted = 0;
oidset_insert(&options->skiplist, &oid);
}
if (ferror(fp))
die_errno("Could not read '%s'", path);
fclose(fp);
strbuf_release(&sb);

if (sorted)
skiplist.sorted = 1;
}

static int parse_msg_type(const char *str)
Expand Down Expand Up @@ -319,9 +302,7 @@ static void append_msg_id(struct strbuf *sb, const char *msg_id)

static int object_on_skiplist(struct fsck_options *opts, struct object *obj)
{
if (opts && opts->skiplist && obj)
return oid_array_lookup(opts->skiplist, &obj->oid) >= 0;
return 0;
return opts && obj && oidset_contains(&opts->skiplist, &obj->oid);
}

__attribute__((format (printf, 4, 5)))
Expand Down
8 changes: 5 additions & 3 deletions fsck.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#ifndef GIT_FSCK_H
#define GIT_FSCK_H

#include "oidset.h"

#define FSCK_ERROR 1
#define FSCK_WARN 2
#define FSCK_IGNORE 3
Expand Down Expand Up @@ -35,12 +37,12 @@ struct fsck_options {
fsck_error error_func;
unsigned strict:1;
int *msg_type;
struct oid_array *skiplist;
struct oidset skiplist;
struct decoration *object_names;
};

#define FSCK_OPTIONS_DEFAULT { NULL, fsck_error_function, 0, NULL }
#define FSCK_OPTIONS_STRICT { NULL, fsck_error_function, 1, NULL }
#define FSCK_OPTIONS_DEFAULT { NULL, fsck_error_function, 0, NULL, OIDSET_INIT }
#define FSCK_OPTIONS_STRICT { NULL, fsck_error_function, 1, NULL, OIDSET_INIT }

/* descend in all linked child objects
* the return value is:
Expand Down

0 comments on commit 3b41fb0

Please sign in to comment.