Re-compute active_defrag_running after adjusting defrag configurations (#13020)

Currently, once active defrag starts, we can not adjust active_defrag_running downwards. This is because active_defrag_running will be dynamically compute based on the fragmentation, we think we should not lower the effort when the fragmentation drops. However, we need to note that active_defrag_running will also be dynamically computed based on configurations. In this case, we are not respecting cycle-min or cycle-max. Some people may realize halfway through that defrag consumes a lot and want to adjust it. Previously we could only turn off activedefrag and then turn it on again to adjust active_defrag_running downwards. So in this PR, when a active defrag configuration change is made, we will re-compute it. These configuration items are: - active-defrag-cycle-min - active-defrag-cycle-max - active-defrag-threshold-upper
2024-02-06 19:39:07 +08:00 · 2024-02-06 19:39:07 +08:00 · 13bd3643c2
parent 87eaf119cd
commit 13bd3643c2
5 changed files with 41 additions and 8 deletions
--- a/src/config.c
+++ b/src/config.c
@ -2478,6 +2478,12 @@ static int updatePort(const char **err) {
    return 1;
 }

+static int updateDefragConfiguration(const char **err) {
+    UNUSED(err);
+    server.active_defrag_configuration_changed = 1;
+    return 1;
+}
+
 static int updateJemallocBgThread(const char **err) {
    UNUSED(err);
    set_jemalloc_bg_thread(server.jemalloc_bg_thread);
@ -3164,10 +3170,10 @@ standardConfig static_configs[] = {
    createIntConfig("list-max-listpack-size", "list-max-ziplist-size", MODIFIABLE_CONFIG, INT_MIN, INT_MAX, server.list_max_listpack_size, -2, INTEGER_CONFIG, NULL, NULL),
    createIntConfig("tcp-keepalive", NULL, MODIFIABLE_CONFIG, 0, INT_MAX, server.tcpkeepalive, 300, INTEGER_CONFIG, NULL, NULL),
    createIntConfig("cluster-migration-barrier", NULL, MODIFIABLE_CONFIG, 0, INT_MAX, server.cluster_migration_barrier, 1, INTEGER_CONFIG, NULL, NULL),
-    createIntConfig("active-defrag-cycle-min", NULL, MODIFIABLE_CONFIG, 1, 99, server.active_defrag_cycle_min, 1, INTEGER_CONFIG, NULL, NULL), /* Default: 1% CPU min (at lower threshold) */
-    createIntConfig("active-defrag-cycle-max", NULL, MODIFIABLE_CONFIG, 1, 99, server.active_defrag_cycle_max, 25, INTEGER_CONFIG, NULL, NULL), /* Default: 25% CPU max (at upper threshold) */
+    createIntConfig("active-defrag-cycle-min", NULL, MODIFIABLE_CONFIG, 1, 99, server.active_defrag_cycle_min, 1, INTEGER_CONFIG, NULL, updateDefragConfiguration), /* Default: 1% CPU min (at lower threshold) */
+    createIntConfig("active-defrag-cycle-max", NULL, MODIFIABLE_CONFIG, 1, 99, server.active_defrag_cycle_max, 25, INTEGER_CONFIG, NULL, updateDefragConfiguration), /* Default: 25% CPU max (at upper threshold) */
    createIntConfig("active-defrag-threshold-lower", NULL, MODIFIABLE_CONFIG, 0, 1000, server.active_defrag_threshold_lower, 10, INTEGER_CONFIG, NULL, NULL), /* Default: don't defrag when fragmentation is below 10% */
-    createIntConfig("active-defrag-threshold-upper", NULL, MODIFIABLE_CONFIG, 0, 1000, server.active_defrag_threshold_upper, 100, INTEGER_CONFIG, NULL, NULL), /* Default: maximum defrag force at 100% fragmentation */
+    createIntConfig("active-defrag-threshold-upper", NULL, MODIFIABLE_CONFIG, 0, 1000, server.active_defrag_threshold_upper, 100, INTEGER_CONFIG, NULL, updateDefragConfiguration), /* Default: maximum defrag force at 100% fragmentation */
    createIntConfig("lfu-log-factor", NULL, MODIFIABLE_CONFIG, 0, INT_MAX, server.lfu_log_factor, 10, INTEGER_CONFIG, NULL, NULL),
    createIntConfig("lfu-decay-time", NULL, MODIFIABLE_CONFIG, 0, INT_MAX, server.lfu_decay_time, 1, INTEGER_CONFIG, NULL, NULL),
    createIntConfig("replica-priority", "slave-priority", MODIFIABLE_CONFIG, 0, INT_MAX, server.slave_priority, 100, INTEGER_CONFIG, NULL, NULL),
--- a/src/defrag.c
+++ b/src/defrag.c
@ -898,7 +898,8 @@ void computeDefragCycles(void) {
            return;
    }

-    /* Calculate the adaptive aggressiveness of the defrag */
+    /* Calculate the adaptive aggressiveness of the defrag based on the current
+     * fragmentation and configurations. */
    int cpu_pct = INTERPOLATE(frag_pct,
            server.active_defrag_threshold_lower,
            server.active_defrag_threshold_upper,
@ -907,10 +908,15 @@ void computeDefragCycles(void) {
    cpu_pct = LIMIT(cpu_pct,
            server.active_defrag_cycle_min,
            server.active_defrag_cycle_max);
-     /* We allow increasing the aggressiveness during a scan, but don't
-      * reduce it. */
-    if (cpu_pct > server.active_defrag_running) {
+
+    /* Normally we allow increasing the aggressiveness during a scan, but don't
+     * reduce it, since we should not lower the aggressiveness when fragmentation
+     * drops. But when a configuration is made, we should reconsider it. */
+    if (cpu_pct > server.active_defrag_running ||
+        server.active_defrag_configuration_changed)
+    {
        server.active_defrag_running = cpu_pct;
+        server.active_defrag_configuration_changed = 0;
        serverLog(LL_VERBOSE,
            "Starting active defrag, frag=%.0f%%, frag_bytes=%zu, cpu=%d%%",
            frag_pct, frag_bytes, cpu_pct);
@ -940,6 +946,7 @@ void activeDefragCycle(void) {
        if (server.active_defrag_running) {
            /* if active defrag was disabled mid-run, start from fresh next time. */
            server.active_defrag_running = 0;
+            server.active_defrag_configuration_changed = 0;
            if (db)
                listEmpty(db->defrag_later);
            defrag_later_current_key = NULL;
@ -963,6 +970,14 @@ void activeDefragCycle(void) {
    run_with_period(1000) {
        computeDefragCycles();
    }
+
+    /* Normally it is checked once a second, but when there is a configuration
+     * change, we want to check it as soon as possible. */
+    if (server.active_defrag_configuration_changed) {
+        computeDefragCycles();
+        server.active_defrag_configuration_changed = 0;
+    }
+
    if (!server.active_defrag_running)
        return;

--- a/src/server.c
+++ b/src/server.c
@ -1556,7 +1556,7 @@ void whileBlockedCron(void) {
     * make sure it was done. */
    serverAssert(server.blocked_last_cron);

-    /* In case we where called too soon, leave right away. This way one time
+    /* In case we were called too soon, leave right away. This way one time
     * jobs after the loop below don't need an if. and we don't bother to start
     * latency monitor if this function is called too often. */
    if (server.blocked_last_cron >= server.mstime)
@ -2066,6 +2066,7 @@ void initServerConfig(void) {
    server.aof_last_incr_size = 0;
    server.aof_last_incr_fsync_offset = 0;
    server.active_defrag_running = 0;
+    server.active_defrag_configuration_changed = 0;
    server.notify_keyspace_events = 0;
    server.blocked_clients = 0;
    memset(server.blocked_clients_by_type,0,
--- a/src/server.h
+++ b/src/server.h
@ -1737,6 +1737,8 @@ struct redisServer {
    int sanitize_dump_payload;      /* Enables deep sanitization for ziplist and listpack in RDB and RESTORE. */
    int skip_checksum_validation;   /* Disable checksum validation for RDB and RESTORE payload. */
    int jemalloc_bg_thread;         /* Enable jemalloc background thread */
+    int active_defrag_configuration_changed; /* defrag configuration has been changed and need to reconsider
+                                              * active_defrag_running in computeDefragCycles. */
    size_t active_defrag_ignore_bytes; /* minimum amount of fragmentation waste to start active defrag */
    int active_defrag_threshold_lower; /* minimum percentage of fragmentation to start active defrag */
    int active_defrag_threshold_upper; /* maximum percentage of fragmentation at which we use maximum effort */
--- a/tests/unit/memefficiency.tcl
+++ b/tests/unit/memefficiency.tcl
@ -80,6 +80,15 @@ run_solo {defrag} {
                    fail "defrag not started."
                }

+                # This test usually runs for a while, during this interval, we test the range.
+                assert_range [s active_defrag_running] 65 75
+                r config set active-defrag-cycle-min 1
+                r config set active-defrag-cycle-max 1
+                after 120 ;# serverCron only updates the info once in 100ms
+                assert_range [s active_defrag_running] 1 1
+                r config set active-defrag-cycle-min 65
+                r config set active-defrag-cycle-max 75
+
                # Wait for the active defrag to stop working.
                wait_for_condition 2000 100 {
                    [s active_defrag_running] eq 0