Added INFO LATENCYSTATS section: latency by percentile distribution/latency by cumulative distribution of latencies (#9462)

# Short description The Redis extended latency stats track per command latencies and enables: - exporting the per-command percentile distribution via the `INFO LATENCYSTATS` command. **( percentile distribution is not mergeable between cluster nodes ).** - exporting the per-command cumulative latency distributions via the `LATENCY HISTOGRAM` command. Using the cumulative distribution of latencies we can merge several stats from different cluster nodes to calculate aggregate metrics . By default, the extended latency monitoring is enabled since the overhead of keeping track of the command latency is very small. If you don't want to track extended latency metrics, you can easily disable it at runtime using the command: - `CONFIG SET latency-tracking no` By default, the exported latency percentiles are the p50, p99, and p999. You can alter them at runtime using the command: - `CONFIG SET latency-tracking-info-percentiles "0.0 50.0 100.0"` ## Some details: - The total size per histogram should sit around 40 KiB. We only allocate those 40KiB when a command was called for the first time. - With regards to the WRITE overhead As seen below, there is no measurable overhead on the achievable ops/sec or full latency spectrum on the client. Including also the measured redis-benchmark for unstable vs this branch. - We track from 1 nanosecond to 1 second ( everything above 1 second is considered +Inf ) ## `INFO LATENCYSTATS` exposition format - Format: `latency_percentiles_usec_<CMDNAME>:p0=XX,p50....` ## `LATENCY HISTOGRAM [command ...]` exposition format Return a cumulative distribution of latencies in the format of a histogram for the specified command names. The histogram is composed of a map of time buckets: - Each representing a latency range, between 1 nanosecond and roughly 1 second. - Each bucket covers twice the previous bucket's range. - Empty buckets are not printed. - Everything above 1 sec is considered +Inf. - At max there will be log2(1000000000)=30 buckets We reply a map for each command in the format: `<command name> : { `calls`: <total command calls> , `histogram` : { <bucket 1> : latency , < bucket 2> : latency, ... } }` Co-authored-by: Oran Agra <oran@redislabs.com>
2022-01-05 12:01:05 +00:00 · 2022-01-05 12:01:05 +00:00 · 5dd15443ac
parent 4d3c4cfac7
commit 5dd15443ac
24 changed files with 568 additions and 6 deletions
--- a/.gitignore
+++ b/.gitignore
@ -28,6 +28,7 @@ src/nodes.conf
 deps/lua/src/lua
 deps/lua/src/luac
 deps/lua/src/liblua.a
+deps/hdr_histogram/libhdrhistogram.a
 tests/tls/*
 .make-*
 .prerequisites
--- a/deps/hdr_histogram/Makefile
+++ b/deps/hdr_histogram/Makefile
@ -9,12 +9,21 @@ DEBUG= -g
 R_CC=$(CC) $(R_CFLAGS)
 R_LD=$(CC) $(R_LDFLAGS)

-hdr_histogram.o: hdr_histogram.h hdr_histogram.c 
+AR= ar
+ARFLAGS= rcs
+
+libhdrhistogram.a: hdr_histogram.o hdr_alloc.o
+	$(AR) $(ARFLAGS) $@ $+
+
+hdr_alloc.o: hdr_alloc.h hdr_alloc.c
+
+hdr_histogram.o: hdr_alloc.o hdr_histogram.h hdr_histogram.c

 .c.o:
 	$(R_CC) -c  $< 

 clean:
 	rm -f *.o
+	rm -f *.a


--- a/deps/hdr_histogram/hdr_alloc.c
+++ b/deps/hdr_histogram/hdr_alloc.c
@ -0,0 +1,34 @@
+/**
+ * hdr_alloc.c
+ * Written by Filipe Oliveira and released to the public domain,
+ * as explained at http://creativecommons.org/publicdomain/zero/1.0/
+ */
+
+#include "hdr_alloc.h"
+#include <stdlib.h>
+
+hdrAllocFuncs hdrAllocFns = {
+    .mallocFn = malloc,
+    .callocFn = calloc,
+    .reallocFn = realloc,
+    .freeFn = free,
+};
+
+/* Override hdr' allocators with ones supplied by the user */
+hdrAllocFuncs hdrSetAllocators(hdrAllocFuncs *override) {
+    hdrAllocFuncs orig = hdrAllocFns;
+
+    hdrAllocFns = *override;
+
+    return orig;
+}
+
+/* Reset allocators to use build time defaults */
+void hdrResetAllocators(void) {
+    hdrAllocFns = (hdrAllocFuncs){
+        .mallocFn = malloc,
+        .callocFn = calloc,
+        .reallocFn = realloc,
+        .freeFn = free,
+    };
+}
--- a/deps/hdr_histogram/hdr_alloc.h
+++ b/deps/hdr_histogram/hdr_alloc.h
@ -0,0 +1,47 @@
+/**
+ * hdr_alloc.h
+ * Written by Filipe Oliveira and released to the public domain,
+ * as explained at http://creativecommons.org/publicdomain/zero/1.0/
+ *
+ * Allocator selection.
+ *
+ * This file is used in order to change the HdrHistogram allocator at run
+ * time. */
+
+#ifndef HDR_ALLOC_H
+#define HDR_ALLOC_H
+
+#include <stddef.h> /* for size_t */
+#include <stdint.h>
+
+/* Structure pointing to our actually configured allocators */
+typedef struct hdrAllocFuncs {
+    void *(*mallocFn)(size_t);
+    void *(*callocFn)(size_t, size_t);
+    void *(*reallocFn)(void *, size_t);
+    void (*freeFn)(void *);
+} hdrAllocFuncs;
+
+/* hdr' configured allocator function pointer struct */
+extern hdrAllocFuncs hdrAllocFns;
+
+hdrAllocFuncs hdrSetAllocators(hdrAllocFuncs *ha);
+void hdrResetAllocators(void);
+
+static inline void *hdr_malloc(size_t size) {
+    return hdrAllocFns.mallocFn(size);
+}
+
+static inline void *hdr_calloc(size_t nmemb, size_t size) {
+    return hdrAllocFns.callocFn(nmemb, size);
+}
+
+static inline void *hdr_realloc(void *ptr, size_t size) {
+    return hdrAllocFns.reallocFn(ptr, size);
+}
+
+static inline void hdr_free(void *ptr) {
+    hdrAllocFns.freeFn(ptr);
+}
+
+#endif /* HDR_ALLOC_H */
--- a/deps/hdr_histogram/hdr_histogram.c
+++ b/deps/hdr_histogram/hdr_histogram.c
@ -15,6 +15,12 @@

 #include "hdr_histogram.h"
 #include "hdr_atomic.h"
+#include "hdr_alloc.h"
+
+#define malloc hdr_malloc
+#define calloc hdr_calloc
+#define free hdr_free
+#define realloc hdr_realloc

 /*  ######   #######  ##     ## ##    ## ########  ######  */
 /* ##    ## ##     ## ##     ## ###   ##    ##    ##    ## */
--- a/deps/hdr_histogram/hdr_histogram.h
+++ b/deps/hdr_histogram/hdr_histogram.h
@ -13,7 +13,6 @@
 #include <stdint.h>
 #include <stdbool.h>
 #include <stdio.h>
-
 struct hdr_histogram
 {
    int64_t lowest_trackable_value;
--- a/redis.conf
+++ b/redis.conf
@ -1769,6 +1769,20 @@ slowlog-max-len 128
 # "CONFIG SET latency-monitor-threshold <milliseconds>" if needed.
 latency-monitor-threshold 0

+################################ LATENCY TRACKING ##############################
+
+# The Redis extended latency monitoring tracks the per command latencies and enables
+# exporting the percentile distribution via the INFO latencystats command,
+# and cumulative latency distributions (histograms) via the LATENCY command.
+#
+# By default, the extended latency monitoring is enabled since the overhead
+# of keeping track of the command latency is very small.
+# latency-tracking yes
+
+# By default the exported latency percentiles via the INFO latencystats command
+# are the p50, p99, and p999.
+# latency-tracking-info-percentiles 50.0 99.0 99.9
+
 ############################# EVENT NOTIFICATION ##############################

 # Redis can notify Pub/Sub clients about events happening in the key space.
--- a/src/Makefile
+++ b/src/Makefile
@ -365,7 +365,7 @@ endif

 # redis-server
 $(REDIS_SERVER_NAME): $(REDIS_SERVER_OBJ)
-	$(REDIS_LD) -o $@ $^ ../deps/hiredis/libhiredis.a ../deps/lua/src/liblua.a $(FINAL_LIBS)
+	$(REDIS_LD) -o $@ $^ ../deps/hiredis/libhiredis.a ../deps/lua/src/liblua.a ../deps/hdr_histogram/libhdrhistogram.a $(FINAL_LIBS)

 # redis-sentinel
 $(REDIS_SENTINEL_NAME): $(REDIS_SERVER_NAME)
@ -385,7 +385,7 @@ $(REDIS_CLI_NAME): $(REDIS_CLI_OBJ)

 # redis-benchmark
 $(REDIS_BENCHMARK_NAME): $(REDIS_BENCHMARK_OBJ)
-	$(REDIS_LD) -o $@ $^ ../deps/hiredis/libhiredis.a ../deps/hdr_histogram/hdr_histogram.o $(FINAL_LIBS)
+	$(REDIS_LD) -o $@ $^ ../deps/hiredis/libhiredis.a ../deps/hdr_histogram/libhdrhistogram.a $(FINAL_LIBS)

 DEP = $(REDIS_SERVER_OBJ:%.o=%.d) $(REDIS_CLI_OBJ:%.o=%.d) $(REDIS_BENCHMARK_OBJ:%.o=%.d)
 -include $(DEP)
--- a/src/blocked.c
+++ b/src/blocked.c
@ -111,7 +111,8 @@ void blockClient(client *c, int btype) {
 void updateStatsOnUnblock(client *c, long blocked_us, long reply_us){
    const ustime_t total_cmd_duration = c->duration + blocked_us + reply_us;
    c->lastcmd->microseconds += total_cmd_duration;
-
+    if (server.latency_tracking_enabled)
+        updateCommandLatencyHistogram(&(c->lastcmd->latency_histogram), total_cmd_duration*1000);
    /* Log the command into the Slow log if needed. */
    slowlogPushCurrentCommand(c, c->lastcmd, total_cmd_duration);
    /* Log the reply duration event. */
--- a/src/commands.c
+++ b/src/commands.c
@ -4105,6 +4105,20 @@ struct redisCommandArg LATENCY_GRAPH_Args[] = {
 /* LATENCY HELP hints */
 #define LATENCY_HELP_Hints NULL

+/********** LATENCY HISTOGRAM ********************/
+
+/* LATENCY HISTOGRAM history */
+#define LATENCY_HISTOGRAM_History NULL
+
+/* LATENCY HISTOGRAM hints */
+#define LATENCY_HISTOGRAM_Hints NULL
+
+/* LATENCY HISTOGRAM argument table */
+struct redisCommandArg LATENCY_HISTOGRAM_Args[] = {
+{"command",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL|CMD_ARG_MULTIPLE},
+{0}
+};
+
 /********** LATENCY HISTORY ********************/

 /* LATENCY HISTORY history */
@ -4146,6 +4160,7 @@ struct redisCommand LATENCY_Subcommands[] = {
 {"doctor","Return a human readable latency analysis report.","O(1)","2.8.13",CMD_DOC_NONE,NULL,NULL,COMMAND_GROUP_SERVER,LATENCY_DOCTOR_History,LATENCY_DOCTOR_Hints,latencyCommand,2,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE,0},
 {"graph","Return a latency graph for the event.","O(1)","2.8.13",CMD_DOC_NONE,NULL,NULL,COMMAND_GROUP_SERVER,LATENCY_GRAPH_History,LATENCY_GRAPH_Hints,latencyCommand,3,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE,0,.args=LATENCY_GRAPH_Args},
 {"help","Show helpful text about the different subcommands.","O(1)","2.8.13",CMD_DOC_NONE,NULL,NULL,COMMAND_GROUP_SERVER,LATENCY_HELP_History,LATENCY_HELP_Hints,latencyCommand,2,CMD_LOADING|CMD_STALE,0},
+{"histogram","Return the cumulative distribution of latencies of a subset of commands or all.","O(N) where N is the number of commands with latency information being retrieved.","7.0.0",CMD_DOC_NONE,NULL,NULL,COMMAND_GROUP_SERVER,LATENCY_HISTOGRAM_History,LATENCY_HISTOGRAM_Hints,latencyCommand,-2,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE,0,.args=LATENCY_HISTOGRAM_Args},
 {"history","Return timestamp-latency samples for the event.","O(1)","2.8.13",CMD_DOC_NONE,NULL,NULL,COMMAND_GROUP_SERVER,LATENCY_HISTORY_History,LATENCY_HISTORY_Hints,latencyCommand,3,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE,0,.args=LATENCY_HISTORY_Args},
 {"latest","Return the latest latency samples for all events.","O(1)","2.8.13",CMD_DOC_NONE,NULL,NULL,COMMAND_GROUP_SERVER,LATENCY_LATEST_History,LATENCY_LATEST_Hints,latencyCommand,2,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE,0},
 {"reset","Reset latency data for one or more events.","O(1)","2.8.13",CMD_DOC_NONE,NULL,NULL,COMMAND_GROUP_SERVER,LATENCY_RESET_History,LATENCY_RESET_Hints,latencyCommand,-2,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE,0,.args=LATENCY_RESET_Args},
--- a/src/commands/latency-histogram.json
+++ b/src/commands/latency-histogram.json
@ -0,0 +1,25 @@
+{
+    "HISTOGRAM": {
+        "summary": "Return the cumulative distribution of latencies of a subset of commands or all.",
+        "complexity": "O(N) where N is the number of commands with latency information being retrieved.",
+        "group": "server",
+        "since": "7.0.0",
+        "arity": -2,
+        "container": "LATENCY",
+        "function": "latencyCommand",
+        "command_flags": [
+            "ADMIN",
+            "NOSCRIPT",
+            "LOADING",
+            "STALE"
+        ],
+        "arguments": [
+            {
+                "name": "COMMAND",
+                "type": "string",
+                "optional": true,
+                "multiple": true
+            }
+        ]
+    }
+}
--- a/src/config.c
+++ b/src/config.c
@ -2660,6 +2660,71 @@ int allowProtectedAction(int config, client *c) {
           (config == PROTECTED_ACTION_ALLOWED_LOCAL && islocalClient(c));
 }

+
+static int setConfigLatencyTrackingInfoPercentilesOutputOption(typeData data, sds *argv, int argc, const char **err) {
+    UNUSED(data);
+    zfree(server.latency_tracking_info_percentiles);
+    server.latency_tracking_info_percentiles = NULL;
+    server.latency_tracking_info_percentiles_len = argc;
+
+    /* Special case: treat single arg "" as zero args indicating empty percentile configuration */
+    if (argc == 1 && sdslen(argv[0]) == 0)
+        server.latency_tracking_info_percentiles_len = 0;
+    else
+        server.latency_tracking_info_percentiles = zmalloc(sizeof(double)*argc);
+
+    for (int j = 0; j < server.latency_tracking_info_percentiles_len; j++) {
+        double percentile;
+        if (!string2d(argv[j], sdslen(argv[j]), &percentile)) {
+            *err = "Invalid latency-tracking-info-percentiles parameters";
+            goto configerr;
+        }
+        if (percentile > 100.0 || percentile < 0.0) {
+            *err = "latency-tracking-info-percentiles parameters should sit between [0.0,100.0]";
+            goto configerr;
+        }
+        server.latency_tracking_info_percentiles[j] = percentile;
+    }
+
+    return 1;
+configerr:
+    zfree(server.latency_tracking_info_percentiles);
+    server.latency_tracking_info_percentiles = NULL;
+    server.latency_tracking_info_percentiles_len = 0;
+    return 0;
+}
+
+static sds getConfigLatencyTrackingInfoPercentilesOutputOption(typeData data) {
+    UNUSED(data);
+    sds buf = sdsempty();
+    for (int j = 0; j < server.latency_tracking_info_percentiles_len; j++) {
+        buf = sdscatprintf(buf,"%f",
+                        server.latency_tracking_info_percentiles[j]);
+        if (j != server.latency_tracking_info_percentiles_len-1)
+            buf = sdscatlen(buf," ",1);
+    }
+    return buf;
+}
+
+/* Rewrite the latency-tracking-info-percentiles option. */
+void rewriteConfigLatencyTrackingInfoPercentilesOutputOption(typeData data, const char *name, struct rewriteConfigState *state) {
+    UNUSED(data);
+    sds line = sdsnew(name);
+    /* Rewrite latency-tracking-info-percentiles parameters,
+     * or an empty 'latency-tracking-info-percentiles ""' line to avoid the
+     * defaults from being used.
+     */
+    if (!server.latency_tracking_info_percentiles_len) {
+        line = sdscat(line," \"\"");
+    } else {
+        for (int j = 0; j < server.latency_tracking_info_percentiles_len; j++) {
+            line = sdscatprintf(line," %f",
+                server.latency_tracking_info_percentiles[j]);
+        }
+    }
+    rewriteConfigRewriteLine(state,name,line,1);
+}
+
 standardConfig configs[] = {
    /* Bool configs */
    createBoolConfig("rdbchecksum", NULL, IMMUTABLE_CONFIG, server.rdb_checksum, 1, NULL, NULL),
@ -2705,6 +2770,7 @@ standardConfig configs[] = {
    createBoolConfig("disable-thp", NULL, MODIFIABLE_CONFIG, server.disable_thp, 1, NULL, NULL),
    createBoolConfig("cluster-allow-replica-migration", NULL, MODIFIABLE_CONFIG, server.cluster_allow_replica_migration, 1, NULL, NULL),
    createBoolConfig("replica-announced", NULL, MODIFIABLE_CONFIG, server.replica_announced, 1, NULL, NULL),
+    createBoolConfig("latency-tracking", NULL, MODIFIABLE_CONFIG, server.latency_tracking_enabled, 1, NULL, NULL),
    createBoolConfig("aof-disable-auto-gc", NULL, MODIFIABLE_CONFIG, server.aof_disable_auto_gc, 0, NULL, updateAofAutoGCEnabled),
    
    /* String Configs */
@ -2857,6 +2923,7 @@ standardConfig configs[] = {
    createSpecialConfig("notify-keyspace-events", NULL, MODIFIABLE_CONFIG, setConfigNotifyKeyspaceEventsOption, getConfigNotifyKeyspaceEventsOption, rewriteConfigNotifyKeyspaceEventsOption, NULL),
    createSpecialConfig("bind", NULL, MODIFIABLE_CONFIG | MULTI_ARG_CONFIG, setConfigBindOption, getConfigBindOption, rewriteConfigBindOption, applyBind),
    createSpecialConfig("replicaof", "slaveof", IMMUTABLE_CONFIG | MULTI_ARG_CONFIG, setConfigReplicaOfOption, getConfigReplicaOfOption, rewriteConfigReplicaOfOption, NULL),
+    createSpecialConfig("latency-tracking-info-percentiles", NULL, MODIFIABLE_CONFIG | MULTI_ARG_CONFIG, setConfigLatencyTrackingInfoPercentilesOutputOption, getConfigLatencyTrackingInfoPercentilesOutputOption, rewriteConfigLatencyTrackingInfoPercentilesOutputOption, NULL),

    /* NULL Terminator */
    {NULL}
--- a/src/latency.c
+++ b/src/latency.c
@ -490,6 +490,78 @@ sds createLatencyReport(void) {

 /* ---------------------- Latency command implementation -------------------- */

+/* latencyCommand() helper to produce a map of time buckets,
+ * each representing a latency range,
+ * between 1 nanosecond and roughly 1 second.
+ * Each bucket covers twice the previous bucket's range.
+ * Empty buckets are not printed.
+ * Everything above 1 sec is considered +Inf.
+ * At max there will be log2(1000000000)=30 buckets */
+void fillCommandCDF(client *c, struct hdr_histogram* histogram) {
+    addReplyMapLen(c,2);
+    addReplyBulkCString(c,"calls");
+    addReplyLongLong(c,(long long) histogram->total_count);
+    addReplyBulkCString(c,"histogram_usec");
+    void *replylen = addReplyDeferredLen(c);
+    int samples = 0;
+    struct hdr_iter iter;
+    hdr_iter_log_init(&iter,histogram,1024,2);
+    int64_t previous_count = 0;
+    while (hdr_iter_next(&iter)) {
+        const int64_t micros = iter.highest_equivalent_value / 1000;
+        const int64_t cumulative_count = iter.cumulative_count;
+        if(cumulative_count > previous_count){
+            addReplyLongLong(c,(long long) micros);
+            addReplyLongLong(c,(long long) cumulative_count);
+            samples++;
+        }
+        previous_count = cumulative_count;
+    }
+    setDeferredMapLen(c,replylen,samples);
+}
+
+/* latencyCommand() helper to produce for all commands,
+ * a per command cumulative distribution of latencies. */
+void latencyAllCommandsFillCDF(client *c) {
+    dictIterator *di = dictGetSafeIterator(server.commands);
+    dictEntry *de;
+    struct redisCommand *cmd;
+    void *replylen = addReplyDeferredLen(c);
+    int command_with_data = 0;
+    while((de = dictNext(di)) != NULL) {
+        cmd = (struct redisCommand *) dictGetVal(de);
+        if (!cmd->latency_histogram)
+            continue;
+        addReplyBulkCString(c,cmd->name);
+        fillCommandCDF(c, cmd->latency_histogram);
+        command_with_data++;
+    }
+    dictReleaseIterator(di);
+    setDeferredMapLen(c,replylen,command_with_data);
+}
+
+/* latencyCommand() helper to produce for a specific command set,
+ * a per command cumulative distribution of latencies. */
+void latencySpecificCommandsFillCDF(client *c) {
+    void *replylen = addReplyDeferredLen(c);
+    int command_with_data = 0;
+    for (int j = 2; j < c->argc; j++){
+        struct redisCommand *cmd = dictFetchValue(server.commands, c->argv[j]->ptr);
+        /* If the command does not exist we skip the reply */
+        if (cmd == NULL) {
+            continue;
+        }
+        /* If no latency info we reply with the same format as non empty histograms */
+        if (!cmd->latency_histogram) {
+            continue;
+        }
+        addReplyBulkCString(c,c->argv[j]->ptr);
+        fillCommandCDF(c, cmd->latency_histogram);
+        command_with_data++;
+    }
+    setDeferredMapLen(c,replylen,command_with_data);
+}
+
 /* latencyCommand() helper to produce a time-delay reply for all the samples
 * in memory for the specified time series. */
 void latencyCommandReplyWithSamples(client *c, struct latencyTimeSeries *ts) {
@ -582,6 +654,7 @@ sds latencyCommandGenSparkeline(char *event, struct latencyTimeSeries *ts) {
 * LATENCY DOCTOR: returns a human readable analysis of instance latency.
 * LATENCY GRAPH: provide an ASCII graph of the latency of the specified event.
 * LATENCY RESET: reset data of a specified event or all the data if no event provided.
+ * LATENCY HISTOGRAM: return a cumulative distribution of latencies in the format of an histogram for the specified command names.
 */
 void latencyCommand(client *c) {
    struct latencyTimeSeries *ts;
@ -628,6 +701,13 @@ void latencyCommand(client *c) {
                resets += latencyResetEvent(c->argv[j]->ptr);
            addReplyLongLong(c,resets);
        }
+    } else if (!strcasecmp(c->argv[1]->ptr,"histogram") && c->argc >= 2) {
+        /* LATENCY HISTOGRAM*/
+        if (c->argc == 2) {
+            latencyAllCommandsFillCDF(c);
+        } else {
+            latencySpecificCommandsFillCDF(c);
+        }
    } else if (!strcasecmp(c->argv[1]->ptr,"help") && c->argc == 2) {
        const char *help[] = {
 "DOCTOR",
@ -641,6 +721,9 @@ void latencyCommand(client *c) {
 "RESET [<event> ...]",
 "    Reset latency data of one or more <event> classes.",
 "    (default: reset all data for all event classes)",
+"HISTOGRAM [COMMAND ...]",
+"    Return a cumulative distribution of latencies in the format of a histogram for the specified command names.",
+"    If no commands are specified then all histograms are replied.",
 NULL
        };
        addReplyHelp(c, help);
--- a/src/module.c
+++ b/src/module.c
@ -9589,6 +9589,10 @@ void moduleUnregisterCommands(struct RedisModule *module) {
                sdsfree((sds)cmd->summary);
                sdsfree((sds)cmd->since);
                sdsfree((sds)cmd->complexity);
+                if (cmd->latency_histogram) {
+                    hdr_close(cmd->latency_histogram);
+                    cmd->latency_histogram = NULL;
+                }
                zfree(cmd->args);
                zfree(cmd);
                zfree(cp);
--- a/src/server.c
+++ b/src/server.c
@ -36,6 +36,7 @@
 #include "atomicvar.h"
 #include "mt19937-64.h"
 #include "functions.h"
+#include "hdr_alloc.h"

 #include <time.h>
 #include <signal.h>
@ -1781,6 +1782,13 @@ void initServerConfig(void) {
    server.page_size = sysconf(_SC_PAGESIZE);
    server.pause_cron = 0;

+    server.latency_tracking_enabled = 1;
+    server.latency_tracking_info_percentiles_len = 3;
+    server.latency_tracking_info_percentiles = zmalloc(sizeof(double)*(server.latency_tracking_info_percentiles_len));
+    server.latency_tracking_info_percentiles[0] = 50.0;  /* p50 */
+    server.latency_tracking_info_percentiles[1] = 99.0;  /* p99 */
+    server.latency_tracking_info_percentiles[2] = 99.9;  /* p999 */
+
    unsigned int lruclock = getLRUClock();
    atomicSet(server.lruclock,lruclock);
    resetServerSaveParams();
@ -1789,6 +1797,15 @@ void initServerConfig(void) {
    appendServerSaveParams(300,100);  /* save after 5 minutes and 100 changes */
    appendServerSaveParams(60,10000); /* save after 1 minute and 10000 changes */

+    /* Specify the allocation function for the hdr histogram */
+    hdrAllocFuncs hdrallocfn = {
+        .mallocFn = zmalloc,
+        .callocFn = zcalloc_num,
+        .reallocFn = zrealloc,
+        .freeFn = zfree,
+    };
+    hdrSetAllocators(&hdrallocfn);
+
    /* Replication related */
    server.masterhost = NULL;
    server.masterport = 6379;
@ -2610,6 +2627,10 @@ void populateCommandStructure(struct redisCommand *c) {
    c->key_specs = c->key_specs_static;
    c->key_specs_max = STATIC_KEY_SPECS_NUM;

+    /* We start with an unallocated histogram and only allocate memory when a command
+     * has been issued for the first time */
+    c->latency_histogram = NULL;
+
    for (int i = 0; i < STATIC_KEY_SPECS_NUM; i++) {
        if (c->key_specs[i].begin_search_type == KSPEC_BS_INVALID)
            break;
@ -2683,6 +2704,10 @@ void resetCommandTableStats(dict* commands) {
        c->calls = 0;
        c->rejected_calls = 0;
        c->failed_calls = 0;
+        if(c->latency_histogram) {
+            hdr_close(c->latency_histogram);
+            c->latency_histogram = NULL;
+        }
        if (c->subcommands_dict)
            resetCommandTableStats(c->subcommands_dict);
    }
@ -2918,6 +2943,19 @@ void slowlogPushCurrentCommand(client *c, struct redisCommand *cmd, ustime_t dur
    slowlogPushEntryIfNeeded(c,argv,argc,duration);
 }

+/* This function is called in order to update the total command histogram duration.
+ * The latency unit is nano-seconds.
+ * If needed it will allocate the histogram memory and trim the duration to the upper/lower tracking limits*/
+void updateCommandLatencyHistogram(struct hdr_histogram **latency_histogram, int64_t duration_hist){
+    if (duration_hist < LATENCY_HISTOGRAM_MIN_VALUE)
+        duration_hist=LATENCY_HISTOGRAM_MIN_VALUE;
+    if (duration_hist>LATENCY_HISTOGRAM_MAX_VALUE)
+        duration_hist=LATENCY_HISTOGRAM_MAX_VALUE;
+    if (*latency_histogram==NULL)
+        hdr_init(LATENCY_HISTOGRAM_MIN_VALUE,LATENCY_HISTOGRAM_MAX_VALUE,LATENCY_HISTOGRAM_PRECISION,latency_histogram);
+    hdr_record_value(*latency_histogram,duration_hist);
+}
+
 /* Handle the alsoPropagate() API to handle commands that want to propagate
 * multiple separated commands. Note that alsoPropagate() is not affected
 * by CLIENT_PREVENT_PROP flag. */
@ -3104,6 +3142,9 @@ void call(client *c, int flags) {
    if (flags & CMD_CALL_STATS) {
        real_cmd->microseconds += duration;
        real_cmd->calls++;
+        /* If the client is blocked we will handle latency stats when it is unblocked. */
+        if (server.latency_tracking_enabled && !(c->flags & CLIENT_BLOCKED))
+            updateCommandLatencyHistogram(&(real_cmd->latency_histogram), duration*1000);
    }

    /* Propagate the command into the AOF and replication link.
@ -4573,6 +4614,19 @@ void bytesToHuman(char *s, unsigned long long n) {
    }
 }

+/* Fill percentile distribution of latencies. */
+sds fillPercentileDistributionLatencies(sds info, const char* histogram_name, struct hdr_histogram* histogram) {
+    info = sdscatfmt(info,"latency_percentiles_usec_%s:",histogram_name);
+    for (int j = 0; j < server.latency_tracking_info_percentiles_len; j++) {
+        info = sdscatprintf(info,"p%f=%.3f", server.latency_tracking_info_percentiles[j],
+            ((double)hdr_value_at_percentile(histogram,server.latency_tracking_info_percentiles[j]))/1000.0f);
+        if (j != server.latency_tracking_info_percentiles_len-1)
+            info = sdscatlen(info,",",1);
+        }
+    info = sdscatprintf(info,"\r\n");
+    return info;
+}
+
 sds getFullCommandName(struct redisCommand *cmd) {
    if (!cmd->parent) {
        return sdsnew(cmd->name);
@ -5346,6 +5400,27 @@ sds genRedisInfoString(const char *section) {
        raxStop(&ri);
    }

+    /* Latency by percentile distribution per command */
+    if (allsections || !strcasecmp(section,"latencystats")) {
+        if (sections++) info = sdscat(info,"\r\n");
+        info = sdscatprintf(info, "# Latencystats\r\n");
+        if (server.latency_tracking_enabled) {
+            struct redisCommand *c;
+            dictEntry *de;
+            dictIterator *di;
+            di = dictGetSafeIterator(server.commands);
+            while((de = dictNext(di)) != NULL) {
+                char *tmpsafe;
+                c = (struct redisCommand *) dictGetVal(de);
+                if (!c->latency_histogram)
+                    continue;
+                info = fillPercentileDistributionLatencies(info,getSafeInfoString(c->name, strlen(c->name), &tmpsafe),c->latency_histogram);
+                if (tmpsafe != NULL) zfree(tmpsafe);
+            }
+            dictReleaseIterator(di);
+        }
+    }
+
    /* Cluster */
    if (allsections || defsections || !strcasecmp(section,"cluster")) {
        if (sections++) info = sdscat(info,"\r\n");
--- a/src/server.h
+++ b/src/server.h
@ -50,6 +50,7 @@
 #include <sys/socket.h>
 #include <lua.h>
 #include <signal.h>
+#include "hdr_histogram.h"

 #ifdef HAVE_LIBSYSTEMD
 #include <systemd/sd-daemon.h>
@ -571,6 +572,13 @@ typedef enum {
 #define serverAssert(_e) ((_e)?(void)0 : (_serverAssert(#_e,__FILE__,__LINE__),redis_unreachable()))
 #define serverPanic(...) _serverPanic(__FILE__,__LINE__,__VA_ARGS__),redis_unreachable()

+/* latency histogram per command init settings */
+#define LATENCY_HISTOGRAM_MIN_VALUE 1L        /* >= 1 nanosec */
+#define LATENCY_HISTOGRAM_MAX_VALUE 1000000000L  /* <= 1 secs */
+#define LATENCY_HISTOGRAM_PRECISION 2  /* Maintain a value precision of 2 significant digits across LATENCY_HISTOGRAM_MIN_VALUE and LATENCY_HISTOGRAM_MAX_VALUE range.
+                                        * Value quantization within the range will thus be no larger than 1/100th (or 1%) of any value.
+                                        * The total size per histogram should sit around 40 KiB Bytes. */
+
 /*-----------------------------------------------------------------------------
 * Data types
 *----------------------------------------------------------------------------*/
@ -1583,6 +1591,9 @@ struct redisServer {
    char *proc_title_template;      /* Process title template format */
    clientBufferLimitsConfig client_obuf_limits[CLIENT_TYPE_OBUF_COUNT];
    int pause_cron;                 /* Don't run cron tasks (debug) */
+    int latency_tracking_enabled;   /* 1 if extended latency tracking is enabled, 0 otherwise. */
+    double *latency_tracking_info_percentiles; /* Extended latency tracking info output percentile list configuration. */
+    int latency_tracking_info_percentiles_len;
    /* AOF persistence */
    int aof_enabled;                /* AOF configuration */
    int aof_state;                  /* AOF_(ON|OFF|WAIT_REWRITE) */
@ -2175,6 +2186,7 @@ struct redisCommand {
                   ACLs. A connection is able to execute a given command if
                   the user associated to the connection has this command
                   bit set in the bitmap of allowed commands. */
+    struct hdr_histogram* latency_histogram; /*points to the command latency command histogram (unit of time nanosecond) */
    keySpec *key_specs;
    keySpec legacy_range_key_spec; /* The legacy (first,last,step) key spec is
                                     * still maintained (if applicable) so that
@ -2782,6 +2794,7 @@ void preventCommandPropagation(client *c);
 void preventCommandAOF(client *c);
 void preventCommandReplication(client *c);
 void slowlogPushCurrentCommand(client *c, struct redisCommand *cmd, ustime_t duration);
+void updateCommandLatencyHistogram(struct hdr_histogram** latency_histogram, int64_t duration_hist);
 int prepareForShutdown(int flags);
 void replyToClientsBlockedOnShutdown(void);
 int abortShutdown(void);
--- a/src/zmalloc.c
+++ b/src/zmalloc.c
@ -181,6 +181,20 @@ void *ztrycalloc_usable(size_t size, size_t *usable) {
 #endif
 }

+/* Allocate memory and zero it or panic.
+ * We need this wrapper to have a calloc compatible signature */
+void *zcalloc_num(size_t num, size_t size) {
+    /* Ensure that the arguments to calloc(), when multiplied, do not wrap.
+     * Division operations are susceptible to divide-by-zero errors so we also check it. */
+    if ((size == 0) || (num > SIZE_MAX/size)) {
+        zmalloc_oom_handler(SIZE_MAX);
+        return NULL;
+    }
+    void *ptr = ztrycalloc_usable(num*size, NULL);
+    if (!ptr) zmalloc_oom_handler(num*size);
+    return ptr;
+}
+
 /* Allocate memory and zero it or panic */
 void *zcalloc(size_t size) {
    void *ptr = ztrycalloc_usable(size, NULL);
--- a/src/zmalloc.h
+++ b/src/zmalloc.h
@ -98,6 +98,7 @@

 void *zmalloc(size_t size);
 void *zcalloc(size_t size);
+void *zcalloc_num(size_t num, size_t size);
 void *zrealloc(void *ptr, size_t size);
 void *ztrymalloc(size_t size);
 void *ztrycalloc(size_t size);
--- a/tests/support/util.tcl
+++ b/tests/support/util.tcl
@ -631,6 +631,12 @@ proc errorrstat {cmd r} {
    }
 }

+proc latencyrstat_percentiles {cmd r} {
+    if {[regexp "\r\nlatency_percentiles_usec_$cmd:(.*?)\r\n" [$r info latencystats] _ value]} {
+        set _ $value
+    }
+}
+
 proc generate_fuzzy_traffic_on_key {key duration} {
    # Commands per type, blocking commands removed
    # TODO: extract these from help.h or elsewhere, and improve to include other types
--- a/tests/unit/info.tcl
+++ b/tests/unit/info.tcl
@ -6,9 +6,90 @@ proc errorstat {cmd} {
    return [errorrstat $cmd r]
 }

+proc latency_percentiles_usec {cmd} {
+    return [latencyrstat_percentiles $cmd r]
+}
+
 start_server {tags {"info" "external:skip"}} {
    start_server {} {

+        test {latencystats: disable/enable} {
+            r config resetstat
+            r CONFIG SET latency-tracking no
+            r set a b
+            assert_match {} [latency_percentiles_usec set]
+            r CONFIG SET latency-tracking yes
+            r set a b
+            assert_match {*p50.000000=*,p99.000000=*,p99.900000=*} [latency_percentiles_usec set]
+            r config resetstat
+            assert_match {} [latency_percentiles_usec set]
+        }
+
+        test {latencystats: configure percentiles} {
+            r config resetstat
+            assert_match {} [latency_percentiles_usec set]
+            r CONFIG SET latency-tracking yes
+            r SET a b
+            r GET a
+            assert_match {*p50.000000=*,p99.000000=*,p99.900000=*} [latency_percentiles_usec set]
+            assert_match {*p50.000000=*,p99.000000=*,p99.900000=*} [latency_percentiles_usec get]
+            r CONFIG SET latency-tracking-info-percentiles "0.0 50.0 100.0"
+            assert_match [r config get latency-tracking-info-percentiles] {latency-tracking-info-percentiles {0.000000 50.000000 100.000000}}
+            assert_match {*p0.000000=*,p50.000000=*,p100.000000=*} [latency_percentiles_usec set]
+            assert_match {*p0.000000=*,p50.000000=*,p100.000000=*} [latency_percentiles_usec get]
+            r config resetstat
+            assert_match {} [latency_percentiles_usec set]
+        }
+
+        test {latencystats: bad configure percentiles} {
+            r config resetstat
+            set configlatencyline [r config get latency-tracking-info-percentiles]
+            catch {r CONFIG SET latency-tracking-info-percentiles "10.0 50.0 a"} e
+            assert_match {ERR CONFIG SET failed*} $e
+            assert_equal [s total_error_replies] 1
+            assert_match [r config get latency-tracking-info-percentiles] $configlatencyline
+            catch {r CONFIG SET latency-tracking-info-percentiles "10.0 50.0 101.0"} e
+            assert_match {ERR CONFIG SET failed*} $e
+            assert_equal [s total_error_replies] 2
+            assert_match [r config get latency-tracking-info-percentiles] $configlatencyline
+            r config resetstat
+            assert_match {} [errorstat ERR]
+        }
+
+        test {latencystats: blocking commands} {
+            r config resetstat
+            r CONFIG SET latency-tracking yes
+            r CONFIG SET latency-tracking-info-percentiles "50.0 99.0 99.9"
+            set rd [redis_deferring_client]
+            r del list1{t}
+
+            $rd blpop list1{t} 0
+            wait_for_blocked_client
+            r lpush list1{t} a
+            assert_equal [$rd read] {list1{t} a}
+            $rd blpop list1{t} 0
+            wait_for_blocked_client
+            r lpush list1{t} b
+            assert_equal [$rd read] {list1{t} b}
+            assert_match {*p50.000000=*,p99.000000=*,p99.900000=*} [latency_percentiles_usec blpop]
+            $rd close
+        }
+
+        test {latencystats: measure latency} {
+            r config resetstat
+            r CONFIG SET latency-tracking yes
+            r CONFIG SET latency-tracking-info-percentiles "50.0"
+            r DEBUG sleep 0.05
+            r SET k v
+            set latencystatline_debug [latency_percentiles_usec debug]
+            set latencystatline_set [latency_percentiles_usec set]
+            regexp "p50.000000=(.+\..+)" $latencystatline_debug -> p50_debug
+            regexp "p50.000000=(.+\..+)" $latencystatline_set -> p50_set
+            assert {$p50_debug >= 50000}
+            assert {$p50_set >= 0}
+            assert {$p50_debug >= $p50_set}
+        } {} {needs:debug}
+
        test {errorstats: failed call authentication error} {
            r config resetstat
            assert_match {} [errorstat ERR]
--- a/tests/unit/latency-monitor.tcl
+++ b/tests/unit/latency-monitor.tcl
@ -1,8 +1,59 @@
+proc latency_histogram {cmd} {
+    return [lindex [r latency histogram $cmd] 1]
+}
+
 start_server {tags {"latency-monitor needs:latency"}} {
    # Set a threshold high enough to avoid spurious latency events.
    r config set latency-monitor-threshold 200
    r latency reset

+    test {LATENCY HISTOGRAM with empty histogram} {
+        r config resetstat
+        assert_match {} [latency_histogram set]
+        assert {[llength [r latency histogram]] == 0}
+    }
+
+    test {LATENCY HISTOGRAM all commands} {
+        r config resetstat
+        r set a b
+        r set c d
+        assert_match {calls 2 histogram_usec *} [latency_histogram set]
+    }
+
+    test {LATENCY HISTOGRAM with a subset of commands} {
+        r config resetstat
+        r set a b
+        r set c d
+        r get a
+        r hset f k v
+        r hgetall f
+        assert_match {calls 2 histogram_usec *} [latency_histogram set]
+        assert_match {calls 1 histogram_usec *} [latency_histogram hset]
+        assert_match {calls 1 histogram_usec *} [latency_histogram hgetall]
+        assert_match {calls 1 histogram_usec *} [latency_histogram get]
+        assert {[llength [r latency histogram]] == 8}
+        assert {[llength [r latency histogram set get]] == 4}
+    }
+
+    test {LATENCY HISTOGRAM command} {
+        r config resetstat
+        r set a b
+        r get a
+        assert {[llength [r latency histogram]] == 4}
+        assert {[llength [r latency histogram set get]] == 4}
+    }
+
+    test {LATENCY HISTOGRAM with wrong command name skips the invalid one} {
+        r config resetstat
+        assert {[llength [r latency histogram blabla]] == 0}
+        assert {[llength [r latency histogram blabla blabla2 set get]] == 0}
+        r set a b
+        r get a
+        assert_match {calls 1 histogram_usec *} [lindex [r latency histogram blabla blabla2 set get] 1]
+        assert_match {calls 1 histogram_usec *} [lindex [r latency histogram blabla blabla2 set get] 3]
+        assert {[string length [r latency histogram blabla set get]] > 0}
+    }
+
    test {Test latency events logging} {
        r debug sleep 0.3
        after 1100
--- a/tests/unit/maxmemory.tcl
+++ b/tests/unit/maxmemory.tcl
@ -326,6 +326,10 @@ proc test_slave_buffers {test_name cmd_count payload_len limit_memory pipeline}
            $master config set client-output-buffer-limit "replica 100000000 100000000 300"
            $master config set repl-backlog-size [expr {10*1024}]

+            # disable latency tracking
+            $master config set latency-tracking no
+            $slave config set latency-tracking no
+
            $slave slaveof $master_host $master_port
            wait_for_condition 50 100 {
                [s 0 master_link_status] eq {up}
@ -412,6 +416,7 @@ test_slave_buffers "replica buffer don't induce eviction" 100000 100 1 0

 start_server {tags {"maxmemory external:skip"}} {
    test {Don't rehash if used memory exceeds maxmemory after rehash} {
+        r config set latency-tracking no
        r config set maxmemory 0
        r config set maxmemory-policy allkeys-random

@ -432,6 +437,7 @@ start_server {tags {"maxmemory external:skip"}} {

 start_server {tags {"maxmemory external:skip"}} {
    test {client tracking don't cause eviction feedback loop} {
+        r config set latency-tracking no
        r config set maxmemory 0
        r config set maxmemory-policy allkeys-lru
        r config set maxmemory-eviction-tenacity 100
--- a/tests/unit/moduleapi/blockonbackground.tcl
+++ b/tests/unit/moduleapi/blockonbackground.tcl
@ -2,6 +2,10 @@ set testmodule [file normalize tests/modules/blockonbackground.so]

 source tests/support/util.tcl

+proc latency_percentiles_usec {cmd} {
+    return [latencyrstat_percentiles $cmd r]
+}
+
 start_server {tags {"modules"}} {
    r module load $testmodule

@ -16,16 +20,21 @@ start_server {tags {"modules"}} {
            assert_equal [r slowlog len] 0
        }
        r config resetstat
+        r config set latency-tracking yes
+        r config set latency-tracking-info-percentiles "50.0"
        r block.debug 200 10000
        if {!$::no_latency} {
            assert_equal [r slowlog len] 1
        }

        set cmdstatline [cmdrstat block.debug r]
+        set latencystatline_debug [latency_percentiles_usec block.debug]

-        regexp "calls=1,usec=(.*?),usec_per_call=(.*?),rejected_calls=0,failed_calls=0" $cmdstatline usec usec_per_call
+        regexp "calls=1,usec=(.*?),usec_per_call=(.*?),rejected_calls=0,failed_calls=0" $cmdstatline -> usec usec_per_call
+        regexp "p50.000000=(.+\..+)" $latencystatline_debug  -> p50
        assert {$usec >= 100000}
        assert {$usec_per_call >= 100000}
+        assert {$p50 >= 100000}
    }

    test { blocked clients time tracking - check blocked command that uses RedisModule_BlockedClientMeasureTimeStart() is tracking background time even in timeout } {
--- a/tests/unit/obuf-limits.tcl
+++ b/tests/unit/obuf-limits.tcl
@ -115,6 +115,7 @@ start_server {tags {"obuf-limits external:skip"}} {
    }

    test {No response for single command if client output buffer hard limit is enforced} {
+        r config set latency-tracking no
        r config set client-output-buffer-limit {normal 100000 0 0}
        # Total size of all items must be more than 100k
        set item [string repeat "x" 1000]