diff --git a/Documentation/technical/pack-format.txt b/Documentation/technical/pack-format.txt index 96d2fc589f..8833b71c8b 100644 --- a/Documentation/technical/pack-format.txt +++ b/Documentation/technical/pack-format.txt @@ -274,6 +274,26 @@ Pack file entry: <+ Index checksum of all of the above. +== pack-*.rev files have the format: + + - A 4-byte magic number '0x52494458' ('RIDX'). + + - A 4-byte version identifier (= 1). + + - A 4-byte hash function identifier (= 1 for SHA-1, 2 for SHA-256). + + - A table of index positions (one per packed object, num_objects in + total, each a 4-byte unsigned integer in network order), sorted by + their corresponding offsets in the packfile. + + - A trailer, containing a: + + checksum of the corresponding packfile, and + + a checksum of all of the above. + +All 4-byte numbers are in network order. + == multi-pack-index (MIDX) files have the following format: The multi-pack-index files refer to multiple pack-files and loose objects. diff --git a/builtin/repack.c b/builtin/repack.c index 2158b48f4c..01440de2d5 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -209,6 +209,7 @@ static struct { } exts[] = { {".pack"}, {".idx"}, + {".rev", 1}, {".bitmap", 1}, {".promisor", 1}, }; diff --git a/object-store.h b/object-store.h index c4fc9dd74e..541dab0858 100644 --- a/object-store.h +++ b/object-store.h @@ -85,6 +85,9 @@ struct packed_git { multi_pack_index:1; unsigned char hash[GIT_MAX_RAWSZ]; struct revindex_entry *revindex; + const uint32_t *revindex_data; + const uint32_t *revindex_map; + size_t revindex_size; /* something like ".git/objects/pack/xxxxx.pack" */ char pack_name[FLEX_ARRAY]; /* more */ }; diff --git a/pack-revindex.c b/pack-revindex.c index 5e69bc7372..a174fa5388 100644 --- a/pack-revindex.c +++ b/pack-revindex.c @@ -164,14 +164,128 @@ static void create_pack_revindex(struct packed_git *p) sort_revindex(p->revindex, num_ent, p->pack_size); } +static int create_pack_revindex_in_memory(struct packed_git *p) +{ + if (open_pack_index(p)) + return -1; + create_pack_revindex(p); + return 0; +} + +static char *pack_revindex_filename(struct packed_git *p) +{ + size_t len; + if (!strip_suffix(p->pack_name, ".pack", &len)) + BUG("pack_name does not end in .pack"); + return xstrfmt("%.*s.rev", (int)len, p->pack_name); +} + +#define RIDX_HEADER_SIZE (12) +#define RIDX_MIN_SIZE (RIDX_HEADER_SIZE + (2 * the_hash_algo->rawsz)) + +struct revindex_header { + uint32_t signature; + uint32_t version; + uint32_t hash_id; +}; + +static int load_revindex_from_disk(char *revindex_name, + uint32_t num_objects, + const uint32_t **data_p, size_t *len_p) +{ + int fd, ret = 0; + struct stat st; + void *data = NULL; + size_t revindex_size; + struct revindex_header *hdr; + + fd = git_open(revindex_name); + + if (fd < 0) { + ret = -1; + goto cleanup; + } + if (fstat(fd, &st)) { + ret = error_errno(_("failed to read %s"), revindex_name); + goto cleanup; + } + + revindex_size = xsize_t(st.st_size); + + if (revindex_size < RIDX_MIN_SIZE) { + ret = error(_("reverse-index file %s is too small"), revindex_name); + goto cleanup; + } + + if (revindex_size - RIDX_MIN_SIZE != st_mult(sizeof(uint32_t), num_objects)) { + ret = error(_("reverse-index file %s is corrupt"), revindex_name); + goto cleanup; + } + + data = xmmap(NULL, revindex_size, PROT_READ, MAP_PRIVATE, fd, 0); + hdr = data; + + if (ntohl(hdr->signature) != RIDX_SIGNATURE) { + ret = error(_("reverse-index file %s has unknown signature"), revindex_name); + goto cleanup; + } + if (ntohl(hdr->version) != 1) { + ret = error(_("reverse-index file %s has unsupported version %"PRIu32), + revindex_name, ntohl(hdr->version)); + goto cleanup; + } + if (!(ntohl(hdr->hash_id) == 1 || ntohl(hdr->hash_id) == 2)) { + ret = error(_("reverse-index file %s has unsupported hash id %"PRIu32), + revindex_name, ntohl(hdr->hash_id)); + goto cleanup; + } + +cleanup: + if (ret) { + if (data) + munmap(data, revindex_size); + } else { + *len_p = revindex_size; + *data_p = (const uint32_t *)data; + } + + close(fd); + return ret; +} + +static int load_pack_revindex_from_disk(struct packed_git *p) +{ + char *revindex_name; + int ret; + if (open_pack_index(p)) + return -1; + + revindex_name = pack_revindex_filename(p); + + ret = load_revindex_from_disk(revindex_name, + p->num_objects, + &p->revindex_map, + &p->revindex_size); + if (ret) + goto cleanup; + + p->revindex_data = (const uint32_t *)((const char *)p->revindex_map + RIDX_HEADER_SIZE); + +cleanup: + free(revindex_name); + return ret; +} + int load_pack_revindex(struct packed_git *p) { - if (!p->revindex) { - if (open_pack_index(p)) - return -1; - create_pack_revindex(p); - } - return 0; + if (p->revindex || p->revindex_data) + return 0; + + if (!load_pack_revindex_from_disk(p)) + return 0; + else if (!create_pack_revindex_in_memory(p)) + return 0; + return -1; } int offset_to_pack_pos(struct packed_git *p, off_t ofs, uint32_t *pos) @@ -203,18 +317,28 @@ int offset_to_pack_pos(struct packed_git *p, off_t ofs, uint32_t *pos) uint32_t pack_pos_to_index(struct packed_git *p, uint32_t pos) { - if (!p->revindex) + if (!(p->revindex || p->revindex_data)) BUG("pack_pos_to_index: reverse index not yet loaded"); if (p->num_objects <= pos) BUG("pack_pos_to_index: out-of-bounds object at %"PRIu32, pos); - return p->revindex[pos].nr; + + if (p->revindex) + return p->revindex[pos].nr; + else + return get_be32(p->revindex_data + pos); } off_t pack_pos_to_offset(struct packed_git *p, uint32_t pos) { - if (!p->revindex) + if (!(p->revindex || p->revindex_data)) BUG("pack_pos_to_index: reverse index not yet loaded"); if (p->num_objects < pos) BUG("pack_pos_to_offset: out-of-bounds object at %"PRIu32, pos); - return p->revindex[pos].offset; + + if (p->revindex) + return p->revindex[pos].offset; + else if (pos == p->num_objects) + return p->pack_size - the_hash_algo->rawsz; + else + return nth_packed_object_offset(p, pack_pos_to_index(p, pos)); } diff --git a/pack-revindex.h b/pack-revindex.h index 6e0320b08b..61b2f3ab75 100644 --- a/pack-revindex.h +++ b/pack-revindex.h @@ -16,11 +16,17 @@ * can be found */ +#define RIDX_SIGNATURE 0x52494458 /* "RIDX" */ +#define RIDX_VERSION 1 + struct packed_git; /* * load_pack_revindex populates the revindex's internal data-structures for the * given pack, returning zero on success and a negative value otherwise. + * + * If a '.rev' file is present it is mmap'd, and pointers are assigned into it + * (instead of using the in-memory variant). */ int load_pack_revindex(struct packed_git *p); @@ -55,7 +61,9 @@ uint32_t pack_pos_to_index(struct packed_git *p, uint32_t pos); * If the reverse index has not yet been loaded, or the position is out of * bounds, this function aborts. * - * This function runs in constant time. + * This function runs in constant time under both in-memory and on-disk reverse + * indexes, but an additional step is taken to consult the corresponding .idx + * file when using the on-disk format. */ off_t pack_pos_to_offset(struct packed_git *p, uint32_t pos); diff --git a/packfile.c b/packfile.c index 4b938b4372..1fec12ac5f 100644 --- a/packfile.c +++ b/packfile.c @@ -324,11 +324,21 @@ void close_pack_index(struct packed_git *p) } } +void close_pack_revindex(struct packed_git *p) { + if (!p->revindex_map) + return; + + munmap((void *)p->revindex_map, p->revindex_size); + p->revindex_map = NULL; + p->revindex_data = NULL; +} + void close_pack(struct packed_git *p) { close_pack_windows(p); close_pack_fd(p); close_pack_index(p); + close_pack_revindex(p); } void close_object_store(struct raw_object_store *o) @@ -351,7 +361,7 @@ void close_object_store(struct raw_object_store *o) void unlink_pack_path(const char *pack_name, int force_delete) { - static const char *exts[] = {".pack", ".idx", ".keep", ".bitmap", ".promisor"}; + static const char *exts[] = {".pack", ".idx", ".rev", ".keep", ".bitmap", ".promisor"}; int i; struct strbuf buf = STRBUF_INIT; size_t plen; @@ -853,6 +863,7 @@ static void prepare_pack(const char *full_name, size_t full_name_len, if (!strcmp(file_name, "multi-pack-index")) return; if (ends_with(file_name, ".idx") || + ends_with(file_name, ".rev") || ends_with(file_name, ".pack") || ends_with(file_name, ".bitmap") || ends_with(file_name, ".keep") || diff --git a/packfile.h b/packfile.h index a58fc738e0..4cfec9e8d3 100644 --- a/packfile.h +++ b/packfile.h @@ -90,6 +90,7 @@ uint32_t get_pack_fanout(struct packed_git *p, uint32_t value); unsigned char *use_pack(struct packed_git *, struct pack_window **, off_t, unsigned long *); void close_pack_windows(struct packed_git *); +void close_pack_revindex(struct packed_git *); void close_pack(struct packed_git *); void close_object_store(struct raw_object_store *o); void unuse_pack(struct pack_window **); diff --git a/tmp-objdir.c b/tmp-objdir.c index 42ed4db5d3..b8d880e362 100644 --- a/tmp-objdir.c +++ b/tmp-objdir.c @@ -185,9 +185,11 @@ static int pack_copy_priority(const char *name) return 1; if (ends_with(name, ".pack")) return 2; - if (ends_with(name, ".idx")) + if (ends_with(name, ".rev")) return 3; - return 4; + if (ends_with(name, ".idx")) + return 4; + return 5; } static int pack_copy_cmp(const char *a, const char *b)