Lua eval scripts first in first out LRU eviction (#13108)

In some cases, users will abuse lua eval. Each EVAL call generates
a new lua script, which is added to the lua interpreter and cached
to redis-server, consuming a large amount of memory over time.

Since EVAL is mostly the one that abuses the lua cache, and these
won't have pipeline issues (i.e. the script won't disappear
unexpectedly,
and cause errors like it would with SCRIPT LOAD and EVALSHA),
we implement a plain FIFO LRU eviction only for these (not for
scripts loaded with SCRIPT LOAD).

### Implementation notes:
When not abused we'll probably have less than 100 scripts, and when
abused we'll have many thousands. So we use a hard coded value of 500
scripts. And considering that we don't have many scripts, then unlike
keys, we don't need to worry about the memory usage of keeping a true
sorted LRU linked list. We compute the SHA of each script anyway,
and put the script in a dict, we can store a listNode there, and use
it for quick removal and re-insertion into an LRU list each time the
script is used.

### New interfaces:
At the same time, a new `evicted_scripts` field is added to
INFO, which represents the number of evicted eval scripts. Users
can check it to see if they are abusing EVAL.

### benchmark:
`./src/redis-benchmark -P 10 -n 1000000 -r 10000000000 eval "return
__rand_int__" 0`

The simple abuse of eval benchmark test that will create 1 million EVAL
scripts. The performance has been improved by 50%, and the max latency
has dropped from 500ms to 13ms (this may be caused by table expansion
inside Lua when the number of scripts is large). And in the INFO memory,
it used to consume 120MB (server cache) + 310MB (lua engine), but now
it only consumes 70KB (server cache) + 210KB (lua_engine) because of
the scripts eviction.

For non-abusive case of about 100 EVAL scripts, there's no noticeable
change in performance or memory usage.

### unlikely potentially breaking change:
in theory, a user can maybe load a
script with EVAL and then use EVALSHA to call it (by calculating the
SHA1 value on the client side), it could be that if we read the docs
carefully we'll realized it's a valid scenario, but we suppose it's
extremely rare. So it may happen that EVALSHA acts on a script created
by EVAL, and the script is evicted and EVALSHA returns a NOSCRIPT error.
that is if you have more than 500 scripts being used in the same
transaction / pipeline.

This solves the second point in #13102.
This commit is contained in:
Binbin 2024-03-13 14:27:41 +08:00 committed by GitHub
parent a8e745117f
commit ad28d222ed
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 194 additions and 24 deletions

View File

@ -46,6 +46,7 @@ void ldbDisable(client *c);
void ldbEnable(client *c);
void evalGenericCommandWithDebugging(client *c, int evalsha);
sds ldbCatStackValue(sds s, lua_State *lua, int idx);
listNode *luaScriptsLRUAdd(client *c, sds sha, int evalsha);
static void dictLuaScriptDestructor(dict *d, void *val) {
UNUSED(d);
@ -58,7 +59,7 @@ static uint64_t dictStrCaseHash(const void *key) {
return dictGenCaseHashFunction((unsigned char*)key, strlen((char*)key));
}
/* server.lua_scripts sha (as sds string) -> scripts (as luaScript) cache. */
/* lctx.lua_scripts sha (as sds string) -> scripts (as luaScript) cache. */
dictType shaScriptObjectDictType = {
dictStrCaseHash, /* hash function */
NULL, /* key dup */
@ -74,6 +75,7 @@ struct luaCtx {
lua_State *lua; /* The Lua interpreter. We use just one for all clients */
client *lua_client; /* The "fake client" to query Redis from Lua */
dict *lua_scripts; /* A dictionary of SHA1 -> Lua scripts */
list *lua_scripts_lru_list; /* A list of SHA1, first in first out LRU eviction. */
unsigned long long lua_scripts_mem; /* Cached scripts' memory + oh */
} lctx;
@ -190,9 +192,10 @@ void scriptingInit(int setup) {
}
/* Initialize a dictionary we use to map SHAs to scripts.
* This is useful for replication, as we need to replicate EVALSHA
* as EVAL, so we need to remember the associated script. */
* Initialize a list we use for lua script evictions, it shares the
* sha with the dictionary, so free fn is not set. */
lctx.lua_scripts = dictCreate(&shaScriptObjectDictType);
lctx.lua_scripts_lru_list = listCreate();
lctx.lua_scripts_mem = 0;
luaRegisterRedisAPI(lua);
@ -265,8 +268,9 @@ void scriptingInit(int setup) {
}
/* Free lua_scripts dict and close lua interpreter. */
void freeLuaScriptsSync(dict *lua_scripts, lua_State *lua) {
void freeLuaScriptsSync(dict *lua_scripts, list *lua_scripts_lru_list, lua_State *lua) {
dictRelease(lua_scripts);
listRelease(lua_scripts_lru_list);
lua_close(lua);
#if !defined(USE_LIBC)
@ -286,9 +290,9 @@ void freeLuaScriptsSync(dict *lua_scripts, lua_State *lua) {
* This function is used in order to reset the scripting environment. */
void scriptingRelease(int async) {
if (async)
freeLuaScriptsAsync(lctx.lua_scripts, lctx.lua);
freeLuaScriptsAsync(lctx.lua_scripts, lctx.lua_scripts_lru_list, lctx.lua);
else
freeLuaScriptsSync(lctx.lua_scripts, lctx.lua);
freeLuaScriptsSync(lctx.lua_scripts, lctx.lua_scripts_lru_list, lctx.lua);
}
void scriptingReset(int async) {
@ -434,8 +438,11 @@ uint64_t evalGetCommandFlags(client *c, uint64_t cmd_flags) {
* exists, and in such a case, it behaves like in the success case.
*
* If 'c' is not NULL, on error the client is informed with an appropriate
* error describing the nature of the problem and the Lua interpreter error. */
sds luaCreateFunction(client *c, robj *body) {
* error describing the nature of the problem and the Lua interpreter error.
*
* 'evalsha' indicating whether the lua function is created from the EVAL context
* or from the SCRIPT LOAD. */
sds luaCreateFunction(client *c, robj *body, int evalsha) {
char funcname[43];
dictEntry *de;
uint64_t script_flags;
@ -480,6 +487,7 @@ sds luaCreateFunction(client *c, robj *body) {
l->body = body;
l->flags = script_flags;
sds sha = sdsnewlen(funcname+2,40);
l->node = luaScriptsLRUAdd(c, sha, evalsha);
int retval = dictAdd(lctx.lua_scripts,sha,l);
serverAssertWithInfo(c ? c : lctx.lua_client,NULL,retval == DICT_OK);
lctx.lua_scripts_mem += sdsZmallocSize(sha) + getStringObjectSdsUsedMemory(body);
@ -487,6 +495,63 @@ sds luaCreateFunction(client *c, robj *body) {
return sha;
}
/* Delete a Lua function with the specified sha.
*
* This will delete the lua function from the lua interpreter and delete
* the lua function from server. */
void luaDeleteFunction(client *c, sds sha) {
/* Delete the script from lua interpreter. */
char funcname[43];
funcname[0] = 'f';
funcname[1] = '_';
memcpy(funcname+2, sha, 40);
funcname[42] = '\0';
lua_pushnil(lctx.lua);
lua_setfield(lctx.lua, LUA_REGISTRYINDEX, funcname);
/* Delete the script from server. */
dictEntry *de = dictUnlink(lctx.lua_scripts, sha);
serverAssertWithInfo(c ? c : lctx.lua_client, NULL, de);
luaScript *l = dictGetVal(de);
/* We only delete `EVAL` scripts, which must exist in the LRU list. */
serverAssert(l->node);
listDelNode(lctx.lua_scripts_lru_list, l->node);
lctx.lua_scripts_mem -= sdsZmallocSize(sha) + getStringObjectSdsUsedMemory(l->body);
dictFreeUnlinkedEntry(lctx.lua_scripts, de);
}
/* Users who abuse EVAL will generate a new lua script on each call, which can
* consume large amounts of memory over time. Since EVAL is mostly the one that
* abuses the lua cache, and these won't have pipeline issues (scripts won't
* disappear when EVALSHA needs it and cause failure), we implement script eviction
* only for these (not for one loaded with SCRIPT LOAD). Considering that we don't
* have many scripts, then unlike keys, we don't need to worry about the memory
* usage of keeping a true sorted LRU linked list.
*
* 'evalsha' indicating whether the lua function is added from the EVAL context
* or from the SCRIPT LOAD.
*
* Returns the corresponding node added, which is used to save it in luaScript
* and use it for quick removal and re-insertion into an LRU list each time the
* script is used. */
#define LRU_LIST_LENGTH 500
listNode *luaScriptsLRUAdd(client *c, sds sha, int evalsha) {
/* Script eviction only applies to EVAL, not SCRIPT LOAD. */
if (evalsha) return NULL;
/* Evict oldest. */
while (listLength(lctx.lua_scripts_lru_list) >= LRU_LIST_LENGTH) {
listNode *ln = listFirst(lctx.lua_scripts_lru_list);
sds oldest = listNodeValue(ln);
luaDeleteFunction(c, oldest);
server.stat_evictedscripts++;
}
/* Add current. */
listAddNodeTail(lctx.lua_scripts_lru_list, sha);
return listLast(lctx.lua_scripts_lru_list);
}
void evalGenericCommand(client *c, int evalsha) {
lua_State *lua = lctx.lua;
char funcname[43];
@ -525,7 +590,7 @@ void evalGenericCommand(client *c, int evalsha) {
addReplyErrorObject(c, shared.noscripterr);
return;
}
if (luaCreateFunction(c,c->argv[1]) == NULL) {
if (luaCreateFunction(c, c->argv[1], evalsha) == NULL) {
lua_pop(lua,1); /* remove the error handler from the stack. */
/* The error is sent to the client by luaCreateFunction()
* itself when it returns NULL. */
@ -554,6 +619,13 @@ void evalGenericCommand(client *c, int evalsha) {
luaCallFunction(&rctx, lua, c->argv+3, numkeys, c->argv+3+numkeys, c->argc-3-numkeys, ldb.active);
lua_pop(lua,1); /* Remove the error handler. */
scriptResetRun(&rctx);
if (l->node) {
/* Quick removal and re-insertion after the script is called to
* maintain the LRU list. */
listUnlinkNode(lctx.lua_scripts_lru_list, l->node);
listLinkNodeTail(lctx.lua_scripts_lru_list, l->node);
}
}
void evalCommand(client *c) {
@ -639,7 +711,7 @@ NULL
addReply(c,shared.czero);
}
} else if (c->argc == 3 && !strcasecmp(c->argv[1]->ptr,"load")) {
sds sha = luaCreateFunction(c,c->argv[2]);
sds sha = luaCreateFunction(c, c->argv[2], 1);
if (sha == NULL) return; /* The error was sent by luaCreateFunction(). */
addReplyBulkCBuffer(c,sha,40);
} else if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"kill")) {
@ -679,7 +751,8 @@ dict* evalScriptsDict(void) {
unsigned long evalScriptsMemory(void) {
return lctx.lua_scripts_mem +
dictMemUsage(lctx.lua_scripts) +
dictSize(lctx.lua_scripts) * sizeof(luaScript);
dictSize(lctx.lua_scripts) * sizeof(luaScript) +
listLength(lctx.lua_scripts_lru_list) * sizeof(listNode);
}
/* ---------------------------------------------------------------------------

View File

@ -42,9 +42,10 @@ void lazyFreeTrackingTable(void *args[]) {
/* Release the lua_scripts dict. */
void lazyFreeLuaScripts(void *args[]) {
dict *lua_scripts = args[0];
lua_State *lua = args[1];
list *lua_scripts_lru_list = args[1];
lua_State *lua = args[2];
long long len = dictSize(lua_scripts);
freeLuaScriptsSync(lua_scripts, lua);
freeLuaScriptsSync(lua_scripts, lua_scripts_lru_list, lua);
atomicDecr(lazyfree_objects,len);
atomicIncr(lazyfreed_objects,len);
}
@ -196,14 +197,14 @@ void freeTrackingRadixTreeAsync(rax *tracking) {
}
}
/* Free lua_scripts dict, if the dict is huge enough, free it in async way.
/* Free lua_scripts dict and lru list, if the dict is huge enough, free them in async way.
* Close lua interpreter, if there are a lot of lua scripts, close it in async way. */
void freeLuaScriptsAsync(dict *lua_scripts, lua_State *lua) {
void freeLuaScriptsAsync(dict *lua_scripts, list *lua_scripts_lru_list, lua_State *lua) {
if (dictSize(lua_scripts) > LAZYFREE_THRESHOLD) {
atomicIncr(lazyfree_objects,dictSize(lua_scripts));
bioCreateLazyFreeJob(lazyFreeLuaScripts,2,lua_scripts,lua);
bioCreateLazyFreeJob(lazyFreeLuaScripts,3,lua_scripts,lua_scripts_lru_list,lua);
} else {
freeLuaScriptsSync(lua_scripts, lua);
freeLuaScriptsSync(lua_scripts, lua_scripts_lru_list, lua);
}
}

View File

@ -2524,6 +2524,7 @@ void resetServerStats(void) {
server.stat_expire_cycle_time_used = 0;
server.stat_evictedkeys = 0;
server.stat_evictedclients = 0;
server.stat_evictedscripts = 0;
server.stat_total_eviction_exceeded_time = 0;
server.stat_last_eviction_exceeded_time = 0;
server.stat_keyspace_misses = 0;
@ -5826,6 +5827,7 @@ sds genRedisInfoString(dict *section_dict, int all_sections, int everything) {
"expire_cycle_cpu_milliseconds:%lld\r\n", server.stat_expire_cycle_time_used/1000,
"evicted_keys:%lld\r\n", server.stat_evictedkeys,
"evicted_clients:%lld\r\n", server.stat_evictedclients,
"evicted_scripts:%lld\r\n", server.stat_evictedscripts,
"total_eviction_exceeded_time:%lld\r\n", (server.stat_total_eviction_exceeded_time + current_eviction_exceeded_time) / 1000,
"current_eviction_exceeded_time:%lld\r\n", current_eviction_exceeded_time / 1000,
"keyspace_hits:%lld\r\n", server.stat_keyspace_hits,

View File

@ -1660,6 +1660,7 @@ struct redisServer {
long long stat_expire_cycle_time_used; /* Cumulative microseconds used. */
long long stat_evictedkeys; /* Number of evicted keys (maxmemory) */
long long stat_evictedclients; /* Number of evicted clients */
long long stat_evictedscripts; /* Number of evicted lua scripts. */
long long stat_total_eviction_exceeded_time; /* Total time over the memory limit, unit us */
monotime stat_last_eviction_exceeded_time; /* Timestamp of current eviction start, unit us */
long long stat_keyspace_hits; /* Number of successful lookups of keys */
@ -3384,10 +3385,9 @@ void scriptingInit(int setup);
int ldbRemoveChild(pid_t pid);
void ldbKillForkedSessions(void);
int ldbPendingChildren(void);
sds luaCreateFunction(client *c, robj *body);
void luaLdbLineHook(lua_State *lua, lua_Debug *ar);
void freeLuaScriptsSync(dict *lua_scripts, lua_State *lua);
void freeLuaScriptsAsync(dict *lua_scripts, lua_State *lua);
void freeLuaScriptsSync(dict *lua_scripts, list *lua_scripts_lru_list, lua_State *lua);
void freeLuaScriptsAsync(dict *lua_scripts, list *lua_scripts_lru_list, lua_State *lua);
void freeFunctionsAsync(functionsLibCtx *lib_ctx);
int ldbIsEnabled(void);
void ldbLog(sds entry);
@ -3403,6 +3403,7 @@ int isInsideYieldingLongCommand(void);
typedef struct luaScript {
uint64_t flags;
robj *body;
listNode *node; /* list node in lua_scripts_lru_list list. */
} luaScript;
/* Cache of recently used small arguments to avoid malloc calls. */
#define LUA_CMD_OBJCACHE_SIZE 32

View File

@ -576,13 +576,19 @@ start_server {tags {"scripting"}} {
# script command is only relevant for is_eval Lua
test {SCRIPTING FLUSH - is able to clear the scripts cache?} {
r set mykey myval
r script load {return redis.call('get',KEYS[1])}
set v [r evalsha fd758d1589d044dd850a6f05d52f2eefd27f033f 1 mykey]
assert_equal $v myval
set e ""
r script flush
catch {r evalsha fd758d1589d044dd850a6f05d52f2eefd27f033f 1 mykey} e
set e
} {NOSCRIPT*}
assert_error {NOSCRIPT*} {r evalsha fd758d1589d044dd850a6f05d52f2eefd27f033f 1 mykey}
r eval {return redis.call('get',KEYS[1])} 1 mykey
set v [r evalsha fd758d1589d044dd850a6f05d52f2eefd27f033f 1 mykey]
assert_equal $v myval
r script flush
assert_error {NOSCRIPT*} {r evalsha fd758d1589d044dd850a6f05d52f2eefd27f033f 1 mykey}
}
test {SCRIPTING FLUSH ASYNC} {
for {set j 0} {$j < 100} {incr j} {
@ -1514,6 +1520,93 @@ start_server {tags {"scripting needs:debug external:skip"}} {
assert_equal [r ping] {PONG}
}
}
start_server {tags {"scripting external:skip"}} {
test {Lua scripts eviction does not generate many scripts} {
r script flush
r config resetstat
# "return 1" sha is: e0e1f9fabfc9d4800c877a703b823ac0578ff8db
# "return 500" sha is: 98fe65896b61b785c5ed328a5a0a1421f4f1490c
for {set j 1} {$j <= 250} {incr j} {
r eval "return $j" 0
}
for {set j 251} {$j <= 500} {incr j} {
r eval_ro "return $j" 0
}
assert_equal [s number_of_cached_scripts] 500
assert_equal 1 [r evalsha e0e1f9fabfc9d4800c877a703b823ac0578ff8db 0]
assert_equal 1 [r evalsha_ro e0e1f9fabfc9d4800c877a703b823ac0578ff8db 0]
assert_equal 500 [r evalsha 98fe65896b61b785c5ed328a5a0a1421f4f1490c 0]
assert_equal 500 [r evalsha_ro 98fe65896b61b785c5ed328a5a0a1421f4f1490c 0]
# Scripts between "return 1" and "return 500" are evicted
for {set j 501} {$j <= 750} {incr j} {
r eval "return $j" 0
}
for {set j 751} {$j <= 1000} {incr j} {
r eval "return $j" 0
}
assert_error {NOSCRIPT*} {r evalsha e0e1f9fabfc9d4800c877a703b823ac0578ff8db 0}
assert_error {NOSCRIPT*} {r evalsha_ro e0e1f9fabfc9d4800c877a703b823ac0578ff8db 0}
assert_error {NOSCRIPT*} {r evalsha 98fe65896b61b785c5ed328a5a0a1421f4f1490c 0}
assert_error {NOSCRIPT*} {r evalsha_ro 98fe65896b61b785c5ed328a5a0a1421f4f1490c 0}
assert_equal [s evicted_scripts] 500
assert_equal [s number_of_cached_scripts] 500
}
test {Lua scripts eviction is plain LRU} {
r script flush
r config resetstat
# "return 1" sha is: e0e1f9fabfc9d4800c877a703b823ac0578ff8db
# "return 2" sha is: 7f923f79fe76194c868d7e1d0820de36700eb649
# "return 3" sha is: 09d3822de862f46d784e6a36848b4f0736dda47a
# "return 500" sha is: 98fe65896b61b785c5ed328a5a0a1421f4f1490c
# "return 1000" sha is: 94f1a7bc9f985a1a1d5a826a85579137d9d840c8
for {set j 1} {$j <= 500} {incr j} {
r eval "return $j" 0
}
# Call "return 1" to move it to the tail.
r eval "return 1" 0
# Call "return 2" to move it to the tail.
r evalsha 7f923f79fe76194c868d7e1d0820de36700eb649 0
# Create a new script, "return 3" will be evicted.
r eval "return 1000" 0
# "return 1" is ok since it was moved to tail.
assert_equal 1 [r evalsha e0e1f9fabfc9d4800c877a703b823ac0578ff8db 0]
# "return 2" is ok since it was moved to tail.
assert_equal 1 [r evalsha e0e1f9fabfc9d4800c877a703b823ac0578ff8db 0]
# "return 3" was evicted.
assert_error {NOSCRIPT*} {r evalsha 09d3822de862f46d784e6a36848b4f0736dda47a 0}
# Others are ok.
assert_equal 500 [r evalsha 98fe65896b61b785c5ed328a5a0a1421f4f1490c 0]
assert_equal 1000 [r evalsha 94f1a7bc9f985a1a1d5a826a85579137d9d840c8 0]
assert_equal [s evicted_scripts] 1
assert_equal [s number_of_cached_scripts] 500
}
test {Lua scripts eviction does not affect script load} {
r script flush
r config resetstat
set num [randomRange 500 1000]
for {set j 1} {$j <= $num} {incr j} {
r script load "return $j"
r eval "return 'str_$j'" 0
}
set evicted [s evicted_scripts]
set cached [s number_of_cached_scripts]
# evicted = num eval scripts - 500 eval scripts
assert_equal $evicted [expr $num-500]
# cached = num load scripts + 500 eval scripts
assert_equal $cached [expr $num+500]
}
}
} ;# is_eval
start_server {tags {"scripting needs:debug"}} {