Merge branch 'jt/reftable-geometric-compaction'

The strategy to compact multiple tables of reftables after many
operations accumulate many entries has been improved to avoid
accumulating too many tables uncollected.

* jt/reftable-geometric-compaction:
  reftable/stack: use geometric table compaction
  reftable/stack: add env to disable autocompaction
  reftable/stack: expose option to disable auto-compaction
This commit is contained in:
Junio C Hamano 2024-04-16 14:50:30 -07:00
commit 82a31ec324
6 changed files with 148 additions and 141 deletions

View File

@ -18,6 +18,7 @@
#include "../reftable/reftable-merged.h"
#include "../setup.h"
#include "../strmap.h"
#include "parse.h"
#include "refs-internal.h"
/*
@ -247,6 +248,8 @@ static struct ref_store *reftable_be_init(struct repository *repo,
refs->write_options.block_size = 4096;
refs->write_options.hash_id = repo->hash_algo->format_id;
refs->write_options.default_permissions = calc_shared_perm(0666 & ~mask);
refs->write_options.disable_auto_compact =
!git_env_bool("GIT_TEST_REFTABLE_AUTOCOMPACTION", 1);
/*
* Set up the main reftable stack that is hosted in GIT_COMMON_DIR.

View File

@ -46,6 +46,9 @@ struct reftable_write_options {
* is a single line, and add '\n' if missing.
*/
unsigned exact_log_message : 1;
/* boolean: Prevent auto-compaction of tables. */
unsigned disable_auto_compact : 1;
};
/* reftable_block_stats holds statistics for a single block type */

View File

@ -680,7 +680,7 @@ int reftable_addition_commit(struct reftable_addition *add)
if (err)
goto done;
if (!add->stack->disable_auto_compact) {
if (!add->stack->config.disable_auto_compact) {
/*
* Auto-compact the stack to keep the number of tables in
* control. It is possible that a concurrent writer is already
@ -1216,75 +1216,76 @@ static int segment_size(struct segment *s)
return s->end - s->start;
}
int fastlog2(uint64_t sz)
{
int l = 0;
if (sz == 0)
return 0;
for (; sz; sz /= 2) {
l++;
}
return l - 1;
}
struct segment *sizes_to_segments(size_t *seglen, uint64_t *sizes, size_t n)
{
struct segment *segs = reftable_calloc(n, sizeof(*segs));
struct segment cur = { 0 };
size_t next = 0, i;
if (n == 0) {
*seglen = 0;
return segs;
}
for (i = 0; i < n; i++) {
int log = fastlog2(sizes[i]);
if (cur.log != log && cur.bytes > 0) {
struct segment fresh = {
.start = i,
};
segs[next++] = cur;
cur = fresh;
}
cur.log = log;
cur.end = i + 1;
cur.bytes += sizes[i];
}
segs[next++] = cur;
*seglen = next;
return segs;
}
struct segment suggest_compaction_segment(uint64_t *sizes, size_t n)
{
struct segment min_seg = {
.log = 64,
};
struct segment *segs;
size_t seglen = 0, i;
struct segment seg = { 0 };
uint64_t bytes;
size_t i;
segs = sizes_to_segments(&seglen, sizes, n);
for (i = 0; i < seglen; i++) {
if (segment_size(&segs[i]) == 1)
continue;
/*
* If there are no tables or only a single one then we don't have to
* compact anything. The sequence is geometric by definition already.
*/
if (n <= 1)
return seg;
if (segs[i].log < min_seg.log)
min_seg = segs[i];
}
while (min_seg.start > 0) {
size_t prev = min_seg.start - 1;
if (fastlog2(min_seg.bytes) < fastlog2(sizes[prev]))
/*
* Find the ending table of the compaction segment needed to restore the
* geometric sequence. Note that the segment end is exclusive.
*
* To do so, we iterate backwards starting from the most recent table
* until a valid segment end is found. If the preceding table is smaller
* than the current table multiplied by the geometric factor (2), the
* compaction segment end has been identified.
*
* Tables after the ending point are not added to the byte count because
* they are already valid members of the geometric sequence. Due to the
* properties of a geometric sequence, it is not possible for the sum of
* these tables to exceed the value of the ending point table.
*
* Example table size sequence requiring no compaction:
* 64, 32, 16, 8, 4, 2, 1
*
* Example table size sequence where compaction segment end is set to
* the last table. Since the segment end is exclusive, the last table is
* excluded during subsequent compaction and the table with size 3 is
* the final table included:
* 64, 32, 16, 8, 4, 3, 1
*/
for (i = n - 1; i > 0; i--) {
if (sizes[i - 1] < sizes[i] * 2) {
seg.end = i + 1;
bytes = sizes[i];
break;
min_seg.start = prev;
min_seg.bytes += sizes[prev];
}
}
reftable_free(segs);
return min_seg;
/*
* Find the starting table of the compaction segment by iterating
* through the remaining tables and keeping track of the accumulated
* size of all tables seen from the segment end table. The previous
* table is compared to the accumulated size because the tables from the
* segment end are merged backwards recursively.
*
* Note that we keep iterating even after we have found the first
* starting point. This is because there may be tables in the stack
* preceding that first starting point which violate the geometric
* sequence.
*
* Example compaction segment start set to table with size 32:
* 128, 32, 16, 8, 4, 3, 1
*/
for (; i > 0; i--) {
uint64_t curr = bytes;
bytes += sizes[i - 1];
if (sizes[i - 1] < curr * 2) {
seg.start = i - 1;
seg.bytes = bytes;
}
}
return seg;
}
static uint64_t *stack_table_sizes_for_compaction(struct reftable_stack *st)

View File

@ -19,7 +19,6 @@ struct reftable_stack {
int list_fd;
char *reftable_dir;
int disable_auto_compact;
struct reftable_write_options config;
@ -33,12 +32,9 @@ int read_lines(const char *filename, char ***lines);
struct segment {
size_t start, end;
int log;
uint64_t bytes;
};
int fastlog2(uint64_t sz);
struct segment *sizes_to_segments(size_t *seglen, uint64_t *sizes, size_t n);
struct segment suggest_compaction_segment(uint64_t *sizes, size_t n);
#endif

View File

@ -325,7 +325,7 @@ static void test_reftable_stack_transaction_api_performs_auto_compaction(void)
* we can ensure that we indeed honor this setting and have
* better control over when exactly auto compaction runs.
*/
st->disable_auto_compact = i != n;
st->config.disable_auto_compact = i != n;
err = reftable_stack_new_addition(&add, st);
EXPECT_ERR(err);
@ -497,6 +497,7 @@ static void test_reftable_stack_add(void)
struct reftable_write_options cfg = {
.exact_log_message = 1,
.default_permissions = 0660,
.disable_auto_compact = 1,
};
struct reftable_stack *st = NULL;
char *dir = get_tmp_dir(__LINE__);
@ -508,7 +509,6 @@ static void test_reftable_stack_add(void)
err = reftable_new_stack(&st, dir, cfg);
EXPECT_ERR(err);
st->disable_auto_compact = 1;
for (i = 0; i < N; i++) {
char buf[256];
@ -770,59 +770,13 @@ static void test_reftable_stack_hash_id(void)
clear_dir(dir);
}
static void test_log2(void)
{
EXPECT(1 == fastlog2(3));
EXPECT(2 == fastlog2(4));
EXPECT(2 == fastlog2(5));
}
static void test_sizes_to_segments(void)
{
uint64_t sizes[] = { 2, 3, 4, 5, 7, 9 };
/* .................0 1 2 3 4 5 */
size_t seglen = 0;
struct segment *segs =
sizes_to_segments(&seglen, sizes, ARRAY_SIZE(sizes));
EXPECT(segs[2].log == 3);
EXPECT(segs[2].start == 5);
EXPECT(segs[2].end == 6);
EXPECT(segs[1].log == 2);
EXPECT(segs[1].start == 2);
EXPECT(segs[1].end == 5);
reftable_free(segs);
}
static void test_sizes_to_segments_empty(void)
{
size_t seglen = 0;
struct segment *segs = sizes_to_segments(&seglen, NULL, 0);
EXPECT(seglen == 0);
reftable_free(segs);
}
static void test_sizes_to_segments_all_equal(void)
{
uint64_t sizes[] = { 5, 5 };
size_t seglen = 0;
struct segment *segs =
sizes_to_segments(&seglen, sizes, ARRAY_SIZE(sizes));
EXPECT(seglen == 1);
EXPECT(segs[0].start == 0);
EXPECT(segs[0].end == 2);
reftable_free(segs);
}
static void test_suggest_compaction_segment(void)
{
uint64_t sizes[] = { 128, 64, 17, 16, 9, 9, 9, 16, 16 };
/* .................0 1 2 3 4 5 6 */
uint64_t sizes[] = { 512, 64, 17, 16, 9, 9, 9, 16, 2, 16 };
struct segment min =
suggest_compaction_segment(sizes, ARRAY_SIZE(sizes));
EXPECT(min.start == 2);
EXPECT(min.end == 7);
EXPECT(min.start == 1);
EXPECT(min.end == 10);
}
static void test_suggest_compaction_segment_nothing(void)
@ -933,9 +887,21 @@ static void test_empty_add(void)
reftable_stack_destroy(st2);
}
static int fastlog2(uint64_t sz)
{
int l = 0;
if (sz == 0)
return 0;
for (; sz; sz /= 2)
l++;
return l - 1;
}
static void test_reftable_stack_auto_compaction(void)
{
struct reftable_write_options cfg = { 0 };
struct reftable_write_options cfg = {
.disable_auto_compact = 1,
};
struct reftable_stack *st = NULL;
char *dir = get_tmp_dir(__LINE__);
@ -945,7 +911,6 @@ static void test_reftable_stack_auto_compaction(void)
err = reftable_new_stack(&st, dir, cfg);
EXPECT_ERR(err);
st->disable_auto_compact = 1; /* call manually below for coverage. */
for (i = 0; i < N; i++) {
char name[100];
struct reftable_ref_record ref = {
@ -994,7 +959,7 @@ static void test_reftable_stack_add_performs_auto_compaction(void)
* we can ensure that we indeed honor this setting and have
* better control over when exactly auto compaction runs.
*/
st->disable_auto_compact = i != n;
st->config.disable_auto_compact = i != n;
strbuf_reset(&refname);
strbuf_addf(&refname, "branch-%04d", i);
@ -1121,7 +1086,6 @@ static void test_reftable_stack_compaction_concurrent_clean(void)
int stack_test_main(int argc, const char *argv[])
{
RUN_TEST(test_empty_add);
RUN_TEST(test_log2);
RUN_TEST(test_names_equal);
RUN_TEST(test_parse_names);
RUN_TEST(test_read_file);
@ -1142,9 +1106,6 @@ int stack_test_main(int argc, const char *argv[])
RUN_TEST(test_reftable_stack_update_index_check);
RUN_TEST(test_reftable_stack_uptodate);
RUN_TEST(test_reftable_stack_validate_refname);
RUN_TEST(test_sizes_to_segments);
RUN_TEST(test_sizes_to_segments_all_equal);
RUN_TEST(test_sizes_to_segments_empty);
RUN_TEST(test_suggest_compaction_segment);
RUN_TEST(test_suggest_compaction_segment_nothing);
return 0;

View File

@ -121,7 +121,7 @@ test_expect_reftable_perms () {
umask $umask &&
git init --shared=$shared repo &&
test_commit -C repo A &&
test_line_count = 3 repo/.git/reftable/tables.list &&
test_line_count = 2 repo/.git/reftable/tables.list &&
git -C repo pack-refs
) &&
test_expect_perms "$expect" repo/.git/reftable/tables.list &&
@ -324,12 +324,46 @@ test_expect_success 'ref transaction: writes cause auto-compaction' '
test_line_count = 1 repo/.git/reftable/tables.list &&
test_commit -C repo --no-tag A &&
test_line_count = 2 repo/.git/reftable/tables.list &&
test_line_count = 1 repo/.git/reftable/tables.list &&
test_commit -C repo --no-tag B &&
test_line_count = 1 repo/.git/reftable/tables.list
'
test_expect_success 'ref transaction: env var disables compaction' '
test_when_finished "rm -rf repo" &&
git init repo &&
test_commit -C repo A &&
start=$(wc -l <repo/.git/reftable/tables.list) &&
iterations=5 &&
expected=$((start + iterations)) &&
for i in $(test_seq $iterations)
do
GIT_TEST_REFTABLE_AUTOCOMPACTION=false \
git -C repo update-ref branch-$i HEAD || return 1
done &&
test_line_count = $expected repo/.git/reftable/tables.list &&
git -C repo update-ref foo HEAD &&
test_line_count -lt $expected repo/.git/reftable/tables.list
'
test_expect_success 'ref transaction: alternating table sizes are compacted' '
test_when_finished "rm -rf repo" &&
git init repo &&
test_commit -C repo A &&
for i in $(test_seq 5)
do
git -C repo branch -f foo &&
git -C repo branch -d foo || return 1
done &&
test_line_count = 2 repo/.git/reftable/tables.list
'
check_fsync_events () {
local trace="$1" &&
shift &&
@ -355,7 +389,7 @@ test_expect_success 'ref transaction: writes are synced' '
git -C repo -c core.fsync=reference \
-c core.fsyncMethod=fsync update-ref refs/heads/branch HEAD &&
check_fsync_events trace2.txt <<-EOF
"name":"hardware-flush","count":2
"name":"hardware-flush","count":4
EOF
'
@ -387,7 +421,7 @@ test_expect_success 'ref transaction: fails gracefully when auto compaction fail
done ||
exit 1
done &&
test_line_count = 13 .git/reftable/tables.list
test_line_count = 10 .git/reftable/tables.list
)
'
@ -397,8 +431,8 @@ test_expect_success 'pack-refs: compacts tables' '
test_commit -C repo A &&
ls -1 repo/.git/reftable >table-files &&
test_line_count = 4 table-files &&
test_line_count = 3 repo/.git/reftable/tables.list &&
test_line_count = 3 table-files &&
test_line_count = 2 repo/.git/reftable/tables.list &&
git -C repo pack-refs &&
ls -1 repo/.git/reftable >table-files &&
@ -439,7 +473,7 @@ test_expect_success "$command: auto compaction" '
# The tables should have been auto-compacted, and thus auto
# compaction should not have to do anything.
ls -1 .git/reftable >tables-expect &&
test_line_count = 4 tables-expect &&
test_line_count = 3 tables-expect &&
git $command --auto &&
ls -1 .git/reftable >tables-actual &&
test_cmp tables-expect tables-actual &&
@ -457,7 +491,7 @@ test_expect_success "$command: auto compaction" '
git branch B &&
git branch C &&
rm .git/reftable/*.lock &&
test_line_count = 5 .git/reftable/tables.list &&
test_line_count = 4 .git/reftable/tables.list &&
git $command --auto &&
test_line_count = 1 .git/reftable/tables.list
@ -837,12 +871,16 @@ test_expect_success 'worktree: pack-refs in main repo packs main refs' '
test_when_finished "rm -rf repo worktree" &&
git init repo &&
test_commit -C repo A &&
git -C repo worktree add ../worktree &&
test_line_count = 3 repo/.git/worktrees/worktree/reftable/tables.list &&
test_line_count = 4 repo/.git/reftable/tables.list &&
GIT_TEST_REFTABLE_AUTOCOMPACTION=false \
git -C repo worktree add ../worktree &&
GIT_TEST_REFTABLE_AUTOCOMPACTION=false \
git -C worktree update-ref refs/worktree/per-worktree HEAD &&
test_line_count = 4 repo/.git/worktrees/worktree/reftable/tables.list &&
test_line_count = 3 repo/.git/reftable/tables.list &&
git -C repo pack-refs &&
test_line_count = 3 repo/.git/worktrees/worktree/reftable/tables.list &&
test_line_count = 4 repo/.git/worktrees/worktree/reftable/tables.list &&
test_line_count = 1 repo/.git/reftable/tables.list
'
@ -850,13 +888,17 @@ test_expect_success 'worktree: pack-refs in worktree packs worktree refs' '
test_when_finished "rm -rf repo worktree" &&
git init repo &&
test_commit -C repo A &&
git -C repo worktree add ../worktree &&
test_line_count = 3 repo/.git/worktrees/worktree/reftable/tables.list &&
test_line_count = 4 repo/.git/reftable/tables.list &&
GIT_TEST_REFTABLE_AUTOCOMPACTION=false \
git -C repo worktree add ../worktree &&
GIT_TEST_REFTABLE_AUTOCOMPACTION=false \
git -C worktree update-ref refs/worktree/per-worktree HEAD &&
test_line_count = 4 repo/.git/worktrees/worktree/reftable/tables.list &&
test_line_count = 3 repo/.git/reftable/tables.list &&
git -C worktree pack-refs &&
test_line_count = 1 repo/.git/worktrees/worktree/reftable/tables.list &&
test_line_count = 4 repo/.git/reftable/tables.list
test_line_count = 3 repo/.git/reftable/tables.list
'
test_expect_success 'worktree: creating shared ref updates main stack' '
@ -870,6 +912,7 @@ test_expect_success 'worktree: creating shared ref updates main stack' '
test_line_count = 1 repo/.git/worktrees/worktree/reftable/tables.list &&
test_line_count = 1 repo/.git/reftable/tables.list &&
GIT_TEST_REFTABLE_AUTOCOMPACTION=false \
git -C worktree update-ref refs/heads/shared HEAD &&
test_line_count = 1 repo/.git/worktrees/worktree/reftable/tables.list &&
test_line_count = 2 repo/.git/reftable/tables.list