Replace regular array to array of structs to get rid of false sharing (#10892)

There are a lot of false sharing cache misses in line 4013 inside getIOPendingCount function.
The reason is that elements of io_threads_pending array access the same cache line from different
threads, so it is better to represent it as an array of structures with fields aligned to cache line size.
This change should improve performance (in particular, it affects the latency metric, we saw up to 3% improvement).
This commit is contained in:
Elvina Yakubova 2022-06-26 08:39:30 +03:00 committed by GitHub
parent a64b29485d
commit e2cf386765
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 8 additions and 3 deletions

View File

@ -3997,10 +3997,15 @@ void processEventsWhileBlocked(void) {
* ========================================================================== */
#define IO_THREADS_MAX_NUM 128
#define CACHE_LINE_SIZE 64
typedef struct __attribute__((aligned(CACHE_LINE_SIZE))) threads_pending {
redisAtomic unsigned long value;
} threads_pending;
pthread_t io_threads[IO_THREADS_MAX_NUM];
pthread_mutex_t io_threads_mutex[IO_THREADS_MAX_NUM];
redisAtomic unsigned long io_threads_pending[IO_THREADS_MAX_NUM];
threads_pending io_threads_pending[IO_THREADS_MAX_NUM];
int io_threads_op; /* IO_THREADS_OP_IDLE, IO_THREADS_OP_READ or IO_THREADS_OP_WRITE. */ // TODO: should access to this be atomic??!
/* This is the list of clients each thread will serve when threaded I/O is
@ -4010,12 +4015,12 @@ list *io_threads_list[IO_THREADS_MAX_NUM];
static inline unsigned long getIOPendingCount(int i) {
unsigned long count = 0;
atomicGetWithSync(io_threads_pending[i], count);
atomicGetWithSync(io_threads_pending[i].value, count);
return count;
}
static inline void setIOPendingCount(int i, unsigned long count) {
atomicSetWithSync(io_threads_pending[i], count);
atomicSetWithSync(io_threads_pending[i].value, count);
}
void *IOThreadMain(void *myid) {