Fix possible int overflow when hashing an sds. (#9916)

This caused a crash when adding elements larger than 2GB to a set (same goes for hash keys). See #8455.

Details:
* The fix makes the dict hash functions receive a `size_t` instead of an `int`. In practice the dict hash functions
  call siphash which receives a `size_t` and the callers to the hash function pass a `size_t` to it so the fix is trivial.
* The issue was recreated by attempting to add a >2gb value to a set. Appropriate tests were added where I create
  a set with large elements and check basic functionality on it (SADD, SCARD, SPOP, etc...).
* When I added the tests I also refactored a bit all the tests code which is run under the `--large-memory` flag.
  This removed code duplication for the test framework's `write_big_bulk` and `write_big_bulk` code and also takes
  care of not allocating the test frameworks helper huge string used by these tests when not run under `--large-memory`.
* I also added the _violoations.tcl_ unit tests to be part of the entire test suite and leaned up non relevant list related
  tests that were in there. This was done in this PR because most of the _violations_ tests are "large memory" tests.
This commit is contained in:
yoav-steinberg 2021-12-13 20:16:25 +01:00 committed by GitHub
parent c40d23b89f
commit c7dc17fc0f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 125 additions and 107 deletions

View File

@ -83,11 +83,11 @@ uint8_t *dictGetHashFunctionSeed(void) {
uint64_t siphash(const uint8_t *in, const size_t inlen, const uint8_t *k);
uint64_t siphash_nocase(const uint8_t *in, const size_t inlen, const uint8_t *k);
uint64_t dictGenHashFunction(const void *key, int len) {
uint64_t dictGenHashFunction(const void *key, size_t len) {
return siphash(key,len,dict_hash_function_seed);
}
uint64_t dictGenCaseHashFunction(const unsigned char *buf, int len) {
uint64_t dictGenCaseHashFunction(const unsigned char *buf, size_t len) {
return siphash_nocase(buf,len,dict_hash_function_seed);
}

View File

@ -192,8 +192,8 @@ dictEntry *dictGetRandomKey(dict *d);
dictEntry *dictGetFairRandomKey(dict *d);
unsigned int dictGetSomeKeys(dict *d, dictEntry **des, unsigned int count);
void dictGetStats(char *buf, size_t bufsize, dict *d);
uint64_t dictGenHashFunction(const void *key, int len);
uint64_t dictGenCaseHashFunction(const unsigned char *buf, int len);
uint64_t dictGenHashFunction(const void *key, size_t len);
uint64_t dictGenCaseHashFunction(const unsigned char *buf, size_t len);
void dictEmpty(dict *d, void(callback)(dict*));
void dictEnableResize(void);
void dictDisableResize(void);

View File

@ -907,3 +907,74 @@ proc delete_lines_with_pattern {filename tmpfilename pattern} {
close $fh_out
file rename -force $tmpfilename $filename
}
# The following functions and variables are used only when running large-memory
# tests. We avoid defining them when not running large-memory tests because the
# global variables takes up lots of memory.
proc init_large_mem_vars {} {
if {![info exists ::str500]} {
set ::str500 [string repeat x 500000000] ;# 500mb
set ::str500_len [string length $::str500]
}
}
# Utility function to write big argument into redis client connection
proc write_big_bulk {size {prefix ""} {skip_read no}} {
init_large_mem_vars
assert {[string length prefix] <= $size}
r write "\$$size\r\n"
r write $prefix
incr size -[string length $prefix]
while {$size >= 500000000} {
r write $::str500
incr size -500000000
}
if {$size > 0} {
r write [string repeat x $size]
}
r write "\r\n"
if {!$skip_read} {
r flush
r read
}
}
# Utility to read big bulk response (work around Tcl limitations)
proc read_big_bulk {code {compare no} {prefix ""}} {
init_large_mem_vars
r readraw 1
set resp_len [uplevel 1 $code] ;# get the first line of the RESP response
assert_equal [string range $resp_len 0 0] "$"
set resp_len [string range $resp_len 1 end]
set prefix_len [string length $prefix]
if {$compare} {
assert {$prefix_len <= $resp_len}
assert {$prefix_len <= $::str500_len}
}
set remaining $resp_len
while {$remaining > 0} {
set l $remaining
if {$l > $::str500_len} {set l $::str500_len} ; # can't read more than 2gb at a time, so read 500mb so we can easily verify read data
set read_data [r rawread $l]
set nbytes [string length $read_data]
if {$compare} {
set comp_len $nbytes
# Compare prefix part
if {$remaining == $resp_len} {
assert_equal $prefix [string range $read_data 0 [expr $prefix_len - 1]]
set read_data [string range $read_data $prefix_len $nbytes]
incr comp_len -$prefix_len
}
# Compare rest of data, evaluate and then assert to avoid huge print in case of failure
set data_equal [expr {$read_data == [string range $::str500 0 [expr $comp_len - 1]]}]
assert $data_equal
}
incr remaining -$nbytes
}
assert_equal [r rawread 2] "\r\n"
r readraw 0
return $resp_len
}

View File

@ -85,6 +85,7 @@ set ::all_tests {
unit/networking
unit/cluster
unit/client-eviction
unit/violations
}
# Index to the next test to run in the ::all_tests list.
set ::next_test 0

View File

@ -1,38 +1,3 @@
set ::str500 [string repeat x 500000000] ;# 500mb
# Utility function to write big argument into redis client connection
proc write_big_bulk {size} {
r write "\$$size\r\n"
while {$size >= 500000000} {
r write $::str500
incr size -500000000
}
if {$size > 0} {
r write [string repeat x $size]
}
r write "\r\n"
r flush
r read
}
# Utility to read big bulk response (work around Tcl limitations)
proc read_big_bulk {code} {
r readraw 1
set resp_len [uplevel 1 $code] ;# get the first line of the RESP response
assert_equal [string range $resp_len 0 0] "$"
set resp_len [string range $resp_len 1 end]
set remaining $resp_len
while {$remaining > 0} {
set l $remaining
if {$l > 2147483647} {set l 2147483647}
set nbytes [string length [r rawread $l]]
incr remaining [expr {- $nbytes}]
}
assert_equal [r rawread 2] "\r\n"
r readraw 0
return $resp_len
}
# check functionality compression of plain and zipped nodes
start_server [list overrides [list save ""] ] {
r config set list-compress-depth 2

View File

@ -934,3 +934,38 @@ start_server {
}
}
}
start_server [list overrides [list save ""] ] {
# test if the server supports such large configs (avoid 32 bit builds)
catch {
r config set proto-max-bulk-len 10000000000 ;#10gb
r config set client-query-buffer-limit 10000000000 ;#10gb
}
if {[lindex [r config get proto-max-bulk-len] 1] == 10000000000} {
set str_length 4400000000 ;#~4.4GB
test {SADD, SCARD, SISMEMBER - large data} {
r flushdb
r write "*3\r\n\$4\r\nSADD\r\n\$5\r\nmyset\r\n"
assert_equal 1 [write_big_bulk $str_length "aaa"]
r write "*3\r\n\$4\r\nSADD\r\n\$5\r\nmyset\r\n"
assert_equal 1 [write_big_bulk $str_length "bbb"]
r write "*3\r\n\$4\r\nSADD\r\n\$5\r\nmyset\r\n"
assert_equal 0 [write_big_bulk $str_length "aaa"]
assert_encoding hashtable myset
set s0 [s used_memory]
assert {$s0 > [expr $str_length * 2]}
assert_equal 2 [r scard myset]
r write "*3\r\n\$9\r\nSISMEMBER\r\n\$5\r\nmyset\r\n"
assert_equal 1 [write_big_bulk $str_length "aaa"]
r write "*3\r\n\$9\r\nSISMEMBER\r\n\$5\r\nmyset\r\n"
assert_equal 0 [write_big_bulk $str_length "ccc"]
r write "*3\r\n\$4\r\nSREM\r\n\$5\r\nmyset\r\n"
assert_equal 1 [write_big_bulk $str_length "bbb"]
assert_equal [read_big_bulk {r spop myset} yes "aaa"] $str_length
} {} {large-memory}
} ;# skip 32bit builds
}

View File

@ -1,20 +1,3 @@
# These tests consume massive amounts of memory, and are not
# suitable to be executed as part of the normal test suite
set ::str500 [string repeat x 500000000] ;# 500mb
# Utility function to write big argument into redis client connection
proc write_big_bulk {size} {
r write "\$$size\r\n"
while {$size >= 500000000} {
r write $::str500
incr size -500000000
}
if {$size > 0} {
r write [string repeat x $size]
}
r write "\r\n"
}
# One XADD with one huge 5GB field
# Expected to fail resulting in an empty stream
start_server [list overrides [list save ""] ] {
@ -23,12 +6,12 @@ start_server [list overrides [list save ""] ] {
r config set client-query-buffer-limit 10000000000 ;#10gb
r write "*5\r\n\$4\r\nXADD\r\n\$2\r\nS1\r\n\$1\r\n*\r\n"
r write "\$1\r\nA\r\n"
write_big_bulk 5000000000 ;#5gb
r flush
catch {r read} err
catch {
write_big_bulk 5000000000 ;#5gb
} err
assert_match {*too large*} $err
r xlen S1
} {0}
} {0} {large-memory}
}
# One XADD with one huge (exactly nearly) 4GB field
@ -40,12 +23,12 @@ start_server [list overrides [list save ""] ] {
r config set client-query-buffer-limit 10000000000 ;#10gb
r write "*5\r\n\$4\r\nXADD\r\n\$2\r\nS1\r\n\$1\r\n*\r\n"
r write "\$1\r\nA\r\n"
write_big_bulk 4294967295 ;#4gb-1
r flush
catch {r read} err
catch {
write_big_bulk 4294967295 ;#4gb-1
} err
assert_match {*too large*} $err
r xlen S1
} {0}
} {0} {large-memory}
}
# Gradually add big stream fields using repeated XADD calls
@ -57,7 +40,7 @@ start_server [list overrides [list save ""] ] {
}
r ping
r xlen stream
} {10}
} {10} {large-memory}
}
# Add over 4GB to a single stream listpack (one XADD command)
@ -67,13 +50,13 @@ start_server [list overrides [list save ""] ] {
r write "*23\r\n\$4\r\nXADD\r\n\$1\r\nS\r\n\$1\r\n*\r\n"
for {set j 0} {$j<10} {incr j} {
r write "\$1\r\n$j\r\n"
write_big_bulk 500000000 ;#500mb
write_big_bulk 500000000 "" yes ;#500mb
}
r flush
catch {r read} err
assert_match {*too large*} $err
r xlen S
} {0}
} {0} {large-memory}
}
# Gradually add big hash fields using repeated HSET calls
@ -86,7 +69,7 @@ start_server [list overrides [list save ""] ] {
r hset h $j $::str500
}
r object encoding h
} {hashtable}
} {hashtable} {large-memory}
}
# Add over 4GB to a single hash field (one HSET command)
@ -99,47 +82,10 @@ start_server [list overrides [list save ""] ] {
r write "*4\r\n\$4\r\nHSET\r\n\$2\r\nH1\r\n"
r write "\$1\r\nA\r\n"
write_big_bulk 5000000000 ;#5gb
r flush
r read
r object encoding H1
} {hashtable}
} {hashtable} {large-memory}
}
# Add over 4GB to a single list member (one LPUSH command)
# Currently unsupported, and expected to fail rather than being truncated
# Expected to fail resulting in a non-existing list
start_server [list overrides [list save ""] ] {
test {list with one huge field} {
r config set proto-max-bulk-len 10000000000 ;#10gb
r config set client-query-buffer-limit 10000000000 ;#10gb
r write "*3\r\n\$5\r\nLPUSH\r\n\$2\r\nL1\r\n"
write_big_bulk 5000000000 ;#5gb
r flush
catch {r read} err
assert_match {*too large*} $err
r exists L1
} {0}
}
# SORT which attempts to store an element larger than 4GB into a list.
# Currently unsupported and results in an assertion instead of truncation
start_server [list overrides [list save ""] ] {
test {SORT adds huge field to list} {
r config set proto-max-bulk-len 10000000000 ;#10gb
r config set client-query-buffer-limit 10000000000 ;#10gb
r write "*3\r\n\$3\r\nSET\r\n\$2\r\nS1\r\n"
write_big_bulk 5000000000 ;#5gb
r flush
r read
assert_equal [r strlen S1] 5000000000
r set S2 asdf
r sadd myset 1 2
r mset D1 1 D2 2
catch {r sort myset by D* get S* store mylist}
assert_equal [count_log_message 0 "crashed by signal"] 0
assert_equal [count_log_message 0 "ASSERTION FAILED"] 1
}
}
# SORT which stores an integer encoded element into a list.
# Just for coverage, no news here.
@ -152,5 +98,5 @@ start_server [list overrides [list save ""] ] {
r mset D1 1 D2 2
r sort myset by D* get S* store mylist
r llen mylist
} {2}
} {2} {cluster:skip}
}