chunk-format: create read chunk API

Add the capability to read the table of contents, then pair the chunks
with necessary logic using read_chunk_fn pointers. Callers will be added
in future changes, but the typical outline will be:

 1. initialize a 'struct chunkfile' with init_chunkfile(NULL).
 2. call read_table_of_contents().
 3. for each chunk to parse,
    a. call pair_chunk() to assign a pointer with the chunk position, or
    b. call read_chunk() to run a callback on the chunk start and size.
 4. call free_chunkfile() to clear the 'struct chunkfile' data.

We are re-using the anonymous 'struct chunkfile' data, as it is internal
to the chunk-format API. This gives it essentially two modes: write and
read. If the same struct instance was used for both reads and writes,
then there would be failures.

Helped-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
Derrick Stolee 2021-02-18 14:07:34 +00:00 committed by Junio C Hamano
parent 63a8f0e9b9
commit 5f0879f54b
2 changed files with 127 additions and 0 deletions

View File

@ -11,6 +11,8 @@ struct chunk_info {
uint32_t id;
uint64_t size;
chunk_write_fn write_fn;
const void *start;
};
struct chunkfile {
@ -88,3 +90,81 @@ int write_chunkfile(struct chunkfile *cf, void *data)
return 0;
}
int read_table_of_contents(struct chunkfile *cf,
const unsigned char *mfile,
size_t mfile_size,
uint64_t toc_offset,
int toc_length)
{
uint32_t chunk_id;
const unsigned char *table_of_contents = mfile + toc_offset;
ALLOC_GROW(cf->chunks, toc_length, cf->chunks_alloc);
while (toc_length--) {
uint64_t chunk_offset, next_chunk_offset;
chunk_id = get_be32(table_of_contents);
chunk_offset = get_be64(table_of_contents + 4);
if (!chunk_id) {
error(_("terminating chunk id appears earlier than expected"));
return 1;
}
table_of_contents += CHUNK_TOC_ENTRY_SIZE;
next_chunk_offset = get_be64(table_of_contents + 4);
if (next_chunk_offset < chunk_offset ||
next_chunk_offset > mfile_size - the_hash_algo->rawsz) {
error(_("improper chunk offset(s) %"PRIx64" and %"PRIx64""),
chunk_offset, next_chunk_offset);
return -1;
}
cf->chunks[cf->chunks_nr].id = chunk_id;
cf->chunks[cf->chunks_nr].start = mfile + chunk_offset;
cf->chunks[cf->chunks_nr].size = next_chunk_offset - chunk_offset;
cf->chunks_nr++;
}
chunk_id = get_be32(table_of_contents);
if (chunk_id) {
error(_("final chunk has non-zero id %"PRIx32""), chunk_id);
return -1;
}
return 0;
}
static int pair_chunk_fn(const unsigned char *chunk_start,
size_t chunk_size,
void *data)
{
const unsigned char **p = data;
*p = chunk_start;
return 0;
}
int pair_chunk(struct chunkfile *cf,
uint32_t chunk_id,
const unsigned char **p)
{
return read_chunk(cf, chunk_id, pair_chunk_fn, p);
}
int read_chunk(struct chunkfile *cf,
uint32_t chunk_id,
chunk_read_fn fn,
void *data)
{
int i;
for (i = 0; i < cf->chunks_nr; i++) {
if (cf->chunks[i].id == chunk_id)
return fn(cf->chunks[i].start, cf->chunks[i].size, data);
}
return CHUNK_NOT_FOUND;
}

View File

@ -8,6 +8,20 @@ struct chunkfile;
#define CHUNK_TOC_ENTRY_SIZE (sizeof(uint32_t) + sizeof(uint64_t))
/*
* Initialize a 'struct chunkfile' for writing _or_ reading a file
* with the chunk format.
*
* If writing a file, supply a non-NULL 'struct hashfile *' that will
* be used to write.
*
* If reading a file, use a NULL 'struct hashfile *' and then call
* read_table_of_contents(). Supply the memory-mapped data to the
* pair_chunk() or read_chunk() methods, as appropriate.
*
* DO NOT MIX THESE MODES. Use different 'struct chunkfile' instances
* for reading and writing.
*/
struct chunkfile *init_chunkfile(struct hashfile *f);
void free_chunkfile(struct chunkfile *cf);
int get_num_chunks(struct chunkfile *cf);
@ -18,4 +32,37 @@ void add_chunk(struct chunkfile *cf,
chunk_write_fn fn);
int write_chunkfile(struct chunkfile *cf, void *data);
int read_table_of_contents(struct chunkfile *cf,
const unsigned char *mfile,
size_t mfile_size,
uint64_t toc_offset,
int toc_length);
#define CHUNK_NOT_FOUND (-2)
/*
* Find 'chunk_id' in the given chunkfile and assign the
* given pointer to the position in the mmap'd file where
* that chunk begins.
*
* Returns CHUNK_NOT_FOUND if the chunk does not exist.
*/
int pair_chunk(struct chunkfile *cf,
uint32_t chunk_id,
const unsigned char **p);
typedef int (*chunk_read_fn)(const unsigned char *chunk_start,
size_t chunk_size, void *data);
/*
* Find 'chunk_id' in the given chunkfile and call the
* given chunk_read_fn method with the information for
* that chunk.
*
* Returns CHUNK_NOT_FOUND if the chunk does not exist.
*/
int read_chunk(struct chunkfile *cf,
uint32_t chunk_id,
chunk_read_fn fn,
void *data);
#endif