pack-mtimes: support reading .mtimes files

To store the individual mtimes of objects in a cruft pack, introduce a
new `.mtimes` format that can optionally accompany a single pack in the
repository.

The format is defined in Documentation/technical/pack-format.txt, and
stores a 4-byte network order timestamp for each object in name (index)
order.

This patch prepares for cruft packs by defining the `.mtimes` format,
and introducing a basic API that callers can use to read out individual
mtimes.

Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
Taylor Blau 2022-05-20 19:17:35 -04:00 committed by Junio C Hamano
parent 3d89a8c118
commit 94cd775a6c
7 changed files with 202 additions and 3 deletions

View File

@ -294,6 +294,25 @@ Pack file entry: <+
All 4-byte numbers are in network order.
== pack-*.mtimes files have the format:
All 4-byte numbers are in network byte order.
- A 4-byte magic number '0x4d544d45' ('MTME').
- A 4-byte version identifier (= 1).
- A 4-byte hash function identifier (= 1 for SHA-1, 2 for SHA-256).
- A table of 4-byte unsigned integers. The ith value is the
modification time (mtime) of the ith object in the corresponding
pack by lexicographic (index) order. The mtimes count standard
epoch seconds.
- A trailer, containing a checksum of the corresponding packfile,
and a checksum of all of the above (each having length according
to the specified hash function).
== multi-pack-index (MIDX) files have the following format:
The multi-pack-index files refer to multiple pack-files and loose objects.

View File

@ -993,6 +993,7 @@ LIB_OBJS += oidtree.o
LIB_OBJS += pack-bitmap-write.o
LIB_OBJS += pack-bitmap.o
LIB_OBJS += pack-check.o
LIB_OBJS += pack-mtimes.o
LIB_OBJS += pack-objects.o
LIB_OBJS += pack-revindex.o
LIB_OBJS += pack-write.o

View File

@ -217,6 +217,7 @@ static struct {
} exts[] = {
{".pack"},
{".rev", 1},
{".mtimes", 1},
{".bitmap", 1},
{".promisor", 1},
{".idx"},

View File

@ -115,12 +115,20 @@ struct packed_git {
freshened:1,
do_not_close:1,
pack_promisor:1,
multi_pack_index:1;
multi_pack_index:1,
is_cruft:1;
unsigned char hash[GIT_MAX_RAWSZ];
struct revindex_entry *revindex;
const uint32_t *revindex_data;
const uint32_t *revindex_map;
size_t revindex_size;
/*
* mtimes_map points at the beginning of the memory mapped region of
* this pack's corresponding .mtimes file, and mtimes_size is the size
* of that .mtimes file
*/
const uint32_t *mtimes_map;
size_t mtimes_size;
/* something like ".git/objects/pack/xxxxx.pack" */
char pack_name[FLEX_ARRAY]; /* more */
};

129
pack-mtimes.c Normal file
View File

@ -0,0 +1,129 @@
#include "git-compat-util.h"
#include "pack-mtimes.h"
#include "object-store.h"
#include "packfile.h"
static char *pack_mtimes_filename(struct packed_git *p)
{
size_t len;
if (!strip_suffix(p->pack_name, ".pack", &len))
BUG("pack_name does not end in .pack");
return xstrfmt("%.*s.mtimes", (int)len, p->pack_name);
}
#define MTIMES_HEADER_SIZE (12)
struct mtimes_header {
uint32_t signature;
uint32_t version;
uint32_t hash_id;
};
static int load_pack_mtimes_file(char *mtimes_file,
uint32_t num_objects,
const uint32_t **data_p, size_t *len_p)
{
int fd, ret = 0;
struct stat st;
uint32_t *data = NULL;
size_t mtimes_size, expected_size;
struct mtimes_header header;
fd = git_open(mtimes_file);
if (fd < 0) {
ret = -1;
goto cleanup;
}
if (fstat(fd, &st)) {
ret = error_errno(_("failed to read %s"), mtimes_file);
goto cleanup;
}
mtimes_size = xsize_t(st.st_size);
if (mtimes_size < MTIMES_HEADER_SIZE) {
ret = error(_("mtimes file %s is too small"), mtimes_file);
goto cleanup;
}
data = xmmap(NULL, mtimes_size, PROT_READ, MAP_PRIVATE, fd, 0);
header.signature = ntohl(data[0]);
header.version = ntohl(data[1]);
header.hash_id = ntohl(data[2]);
if (header.signature != MTIMES_SIGNATURE) {
ret = error(_("mtimes file %s has unknown signature"), mtimes_file);
goto cleanup;
}
if (header.version != 1) {
ret = error(_("mtimes file %s has unsupported version %"PRIu32),
mtimes_file, header.version);
goto cleanup;
}
if (!(header.hash_id == 1 || header.hash_id == 2)) {
ret = error(_("mtimes file %s has unsupported hash id %"PRIu32),
mtimes_file, header.hash_id);
goto cleanup;
}
expected_size = MTIMES_HEADER_SIZE;
expected_size = st_add(expected_size, st_mult(sizeof(uint32_t), num_objects));
expected_size = st_add(expected_size, 2 * (header.hash_id == 1 ? GIT_SHA1_RAWSZ : GIT_SHA256_RAWSZ));
if (mtimes_size != expected_size) {
ret = error(_("mtimes file %s is corrupt"), mtimes_file);
goto cleanup;
}
cleanup:
if (ret) {
if (data)
munmap(data, mtimes_size);
} else {
*len_p = mtimes_size;
*data_p = data;
}
close(fd);
return ret;
}
int load_pack_mtimes(struct packed_git *p)
{
char *mtimes_name = NULL;
int ret = 0;
if (!p->is_cruft)
return ret; /* not a cruft pack */
if (p->mtimes_map)
return ret; /* already loaded */
ret = open_pack_index(p);
if (ret < 0)
goto cleanup;
mtimes_name = pack_mtimes_filename(p);
ret = load_pack_mtimes_file(mtimes_name,
p->num_objects,
&p->mtimes_map,
&p->mtimes_size);
cleanup:
free(mtimes_name);
return ret;
}
uint32_t nth_packed_mtime(struct packed_git *p, uint32_t pos)
{
if (!p->mtimes_map)
BUG("pack .mtimes file not loaded for %s", p->pack_name);
if (p->num_objects <= pos)
BUG("pack .mtimes out-of-bounds (%"PRIu32" vs %"PRIu32")",
pos, p->num_objects);
return get_be32(p->mtimes_map + pos + 3);
}

26
pack-mtimes.h Normal file
View File

@ -0,0 +1,26 @@
#ifndef PACK_MTIMES_H
#define PACK_MTIMES_H
#include "git-compat-util.h"
#define MTIMES_SIGNATURE 0x4d544d45 /* "MTME" */
#define MTIMES_VERSION 1
struct packed_git;
/*
* Loads the .mtimes file corresponding to "p", if any, returning zero
* on success.
*/
int load_pack_mtimes(struct packed_git *p);
/* Returns the mtime associated with the object at position "pos" (in
* lexicographic/index order) in pack "p".
*
* Note that it is a BUG() to call this function if either (a) "p" does
* not have a corresponding .mtimes file, or (b) it does, but it hasn't
* been loaded
*/
uint32_t nth_packed_mtime(struct packed_git *p, uint32_t pos);
#endif

View File

@ -334,12 +334,22 @@ static void close_pack_revindex(struct packed_git *p)
p->revindex_data = NULL;
}
static void close_pack_mtimes(struct packed_git *p)
{
if (!p->mtimes_map)
return;
munmap((void *)p->mtimes_map, p->mtimes_size);
p->mtimes_map = NULL;
}
void close_pack(struct packed_git *p)
{
close_pack_windows(p);
close_pack_fd(p);
close_pack_index(p);
close_pack_revindex(p);
close_pack_mtimes(p);
oidset_clear(&p->bad_objects);
}
@ -363,7 +373,7 @@ void close_object_store(struct raw_object_store *o)
void unlink_pack_path(const char *pack_name, int force_delete)
{
static const char *exts[] = {".pack", ".idx", ".rev", ".keep", ".bitmap", ".promisor"};
static const char *exts[] = {".pack", ".idx", ".rev", ".keep", ".bitmap", ".promisor", ".mtimes"};
int i;
struct strbuf buf = STRBUF_INIT;
size_t plen;
@ -718,6 +728,10 @@ struct packed_git *add_packed_git(const char *path, size_t path_len, int local)
if (!access(p->pack_name, F_OK))
p->pack_promisor = 1;
xsnprintf(p->pack_name + path_len, alloc - path_len, ".mtimes");
if (!access(p->pack_name, F_OK))
p->is_cruft = 1;
xsnprintf(p->pack_name + path_len, alloc - path_len, ".pack");
if (stat(p->pack_name, &st) || !S_ISREG(st.st_mode)) {
free(p);
@ -869,7 +883,8 @@ static void prepare_pack(const char *full_name, size_t full_name_len,
ends_with(file_name, ".pack") ||
ends_with(file_name, ".bitmap") ||
ends_with(file_name, ".keep") ||
ends_with(file_name, ".promisor"))
ends_with(file_name, ".promisor") ||
ends_with(file_name, ".mtimes"))
string_list_append(data->garbage, full_name);
else
report_garbage(PACKDIR_FILE_GARBAGE, full_name);